%{
#include <stdio.h>
#include <string.h>
int lines = 1, tokens = 0, errors = 0;
FILE *cleaned;
/* Helper: print token info */
void print_token(const char *type, const char *lexeme) {
printf("Line %-4d %-15s %s\n", lines, type, lexeme);
tokens++;
}
/* Helper: report error */
void report_error(const char *msg, const char *lexeme) {
fprintf(stderr, "Error (Line %d): %s -> %s\n", lines, msg, lexeme);
errors++;
}
%}
/* Definitions */
KEYWORD
(auto
|break
|case
|char
|const
|continue
|default
|do
|double
|else
|enum
|extern
|float|for
|goto
|if
|inline
|int
|long
|register
|restrict
|return
|short
|signed
|sizeof
|static
|struct
|switch
|typedef
|union
|unsigned
|void
|volatile
|while
|_Bool
|_Complex
|_Imaginary
)IDENT [A-Za-z_][A-Za-z0-9_]*
NUM ([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)
STR \"([^\"\\\n]|\\.)*\"
CHAR \'([^\'\\\n]|\\.)\'
INV_ID [0-9]+[A-Za-z_][A-Za-z0-9_]*
STR_BAD \"([^\"\\\n]|\\.)*\n
CHAR_BAD \'([^\'\\\n]|\\.)*\n
OP (\+\+|--|==|!=|<=|>=|&&|\|\||\+=|-=|\*=|/=|%=|&=|\|=|\^=|->|<<|>>|[+\-*/%&|^~!=<>?:;,.\[\]\(\)\{\}])
WS [ \t]+
COMMENT (//[^\n]*|/\*([^*]|\*+[^*/])*\*+/)
%%
{WS} { fputc(' ', cleaned); }
{COMMENT} { /* skip comments in cleaned file */ }
{KEYWORD} { print_token("KEYWORD", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
{IDENT} { print_token("IDENTIFIER", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
{NUM} { print_token("NUMBER", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
{STR} { print_token("STRING", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
{CHAR} { print_token("CHAR", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
{INV_ID} { print_token("INVALID_ID", yytext); report_error("Invalid identifier", yytext); }
{STR_BAD} { print_token("BAD_STRING", yytext); report_error("Unterminated string", yytext); lines++; }
{CHAR_BAD} { print_token("BAD_CHAR", yytext); report_error("Unterminated char", yytext); lines++; }
{OP} { print_token("OPERATOR", yytext); fputs(yytext, cleaned); fputc(' ', cleaned); }
\n { lines++; fputc('\n', cleaned); }
. { print_token("UNKNOWN", yytext); report_error("Unknown character", yytext); fputs(yytext, cleaned); }
%%
int main() {
cleaned = fopen("cleaned.c", "w");
if (!cleaned) {
perror("cleaned.c");
return 1;
}
yylex();
printf("\n=== SUMMARY ===\n");
printf("Lines : %d\n", lines);
printf("Tokens : %d\n", tokens);
printf("Errors : %d\n", errors);
fclose(cleaned);
return 0;
}
int yywrap() { return 1; }
JXsKI2luY2x1ZGUgPHN0ZGlvLmg+CiNpbmNsdWRlIDxzdHJpbmcuaD4KCmludCBsaW5lcyA9IDEsIHRva2VucyA9IDAsIGVycm9ycyA9IDA7CkZJTEUgKmNsZWFuZWQ7CgovKiBIZWxwZXI6IHByaW50IHRva2VuIGluZm8gKi8Kdm9pZCBwcmludF90b2tlbihjb25zdCBjaGFyICp0eXBlLCBjb25zdCBjaGFyICpsZXhlbWUpIHsKICAgIHByaW50ZigiTGluZSAlLTRkICUtMTVzICVzXG4iLCBsaW5lcywgdHlwZSwgbGV4ZW1lKTsKICAgIHRva2VucysrOwp9CgovKiBIZWxwZXI6IHJlcG9ydCBlcnJvciAqLwp2b2lkIHJlcG9ydF9lcnJvcihjb25zdCBjaGFyICptc2csIGNvbnN0IGNoYXIgKmxleGVtZSkgewogICAgZnByaW50ZihzdGRlcnIsICJFcnJvciAoTGluZSAlZCk6ICVzIC0+ICVzXG4iLCBsaW5lcywgbXNnLCBsZXhlbWUpOwogICAgZXJyb3JzKys7Cn0KJX0KCi8qIERlZmluaXRpb25zICovCktFWVdPUkQgICAgIChhdXRvfGJyZWFrfGNhc2V8Y2hhcnxjb25zdHxjb250aW51ZXxkZWZhdWx0fGRvfGRvdWJsZXxlbHNlfGVudW18ZXh0ZXJufGZsb2F0fGZvcnxnb3RvfGlmfGlubGluZXxpbnR8bG9uZ3xyZWdpc3RlcnxyZXN0cmljdHxyZXR1cm58c2hvcnR8c2lnbmVkfHNpemVvZnxzdGF0aWN8c3RydWN0fHN3aXRjaHx0eXBlZGVmfHVuaW9ufHVuc2lnbmVkfHZvaWR8dm9sYXRpbGV8d2hpbGV8X0Jvb2x8X0NvbXBsZXh8X0ltYWdpbmFyeSkKSURFTlQgICAgICAgW0EtWmEtel9dW0EtWmEtejAtOV9dKgpOVU0gICAgICAgICAoWzAtOV0rKFwuWzAtOV0rKT8oW2VFXVsrLV0/WzAtOV0rKT8pClNUUiAgICAgICAgIFwiKFteXCJcXFxuXXxcXC4pKlwiCkNIQVIgICAgICAgIFwnKFteXCdcXFxuXXxcXC4pXCcKSU5WX0lEICAgICAgWzAtOV0rW0EtWmEtel9dW0EtWmEtejAtOV9dKgpTVFJfQkFEICAgICBcIihbXlwiXFxcbl18XFwuKSpcbgpDSEFSX0JBRCAgICBcJyhbXlwnXFxcbl18XFwuKSpcbgpPUCAgICAgICAgICAoXCtcK3wtLXw9PXwhPXw8PXw+PXwmJnxcfFx8fFwrPXwtPXxcKj18Lz18JT18Jj18XHw9fFxePXwtPnw8PHw+PnxbK1wtKi8lJnxefiE9PD4/OjssLlxbXF1cKFwpXHtcfV0pCldTICAgICAgICAgIFsgXHRdKwpDT01NRU5UICAgICAoLy9bXlxuXSp8L1wqKFteKl18XCorW14qL10pKlwqKy8pCgolJQoKe1dTfSAgICAgICAgeyBmcHV0YygnICcsIGNsZWFuZWQpOyB9CntDT01NRU5UfSAgIHsgLyogc2tpcCBjb21tZW50cyBpbiBjbGVhbmVkIGZpbGUgKi8gfQp7S0VZV09SRH0gICB7IHByaW50X3Rva2VuKCJLRVlXT1JEIiwgeXl0ZXh0KTsgZnB1dHMoeXl0ZXh0LCBjbGVhbmVkKTsgZnB1dGMoJyAnLCBjbGVhbmVkKTsgfQp7SURFTlR9ICAgICB7IHByaW50X3Rva2VuKCJJREVOVElGSUVSIiwgeXl0ZXh0KTsgZnB1dHMoeXl0ZXh0LCBjbGVhbmVkKTsgZnB1dGMoJyAnLCBjbGVhbmVkKTsgfQp7TlVNfSAgICAgICB7IHByaW50X3Rva2VuKCJOVU1CRVIiLCB5eXRleHQpOyBmcHV0cyh5eXRleHQsIGNsZWFuZWQpOyBmcHV0YygnICcsIGNsZWFuZWQpOyB9CntTVFJ9ICAgICAgIHsgcHJpbnRfdG9rZW4oIlNUUklORyIsIHl5dGV4dCk7IGZwdXRzKHl5dGV4dCwgY2xlYW5lZCk7IGZwdXRjKCcgJywgY2xlYW5lZCk7IH0Ke0NIQVJ9ICAgICAgeyBwcmludF90b2tlbigiQ0hBUiIsIHl5dGV4dCk7IGZwdXRzKHl5dGV4dCwgY2xlYW5lZCk7IGZwdXRjKCcgJywgY2xlYW5lZCk7IH0Ke0lOVl9JRH0gICAgeyBwcmludF90b2tlbigiSU5WQUxJRF9JRCIsIHl5dGV4dCk7IHJlcG9ydF9lcnJvcigiSW52YWxpZCBpZGVudGlmaWVyIiwgeXl0ZXh0KTsgfQp7U1RSX0JBRH0gICB7IHByaW50X3Rva2VuKCJCQURfU1RSSU5HIiwgeXl0ZXh0KTsgcmVwb3J0X2Vycm9yKCJVbnRlcm1pbmF0ZWQgc3RyaW5nIiwgeXl0ZXh0KTsgbGluZXMrKzsgfQp7Q0hBUl9CQUR9ICB7IHByaW50X3Rva2VuKCJCQURfQ0hBUiIsIHl5dGV4dCk7IHJlcG9ydF9lcnJvcigiVW50ZXJtaW5hdGVkIGNoYXIiLCB5eXRleHQpOyBsaW5lcysrOyB9CntPUH0gICAgICAgIHsgcHJpbnRfdG9rZW4oIk9QRVJBVE9SIiwgeXl0ZXh0KTsgZnB1dHMoeXl0ZXh0LCBjbGVhbmVkKTsgZnB1dGMoJyAnLCBjbGVhbmVkKTsgfQpcbiAgICAgICAgICB7IGxpbmVzKys7IGZwdXRjKCdcbicsIGNsZWFuZWQpOyB9Ci4gICAgICAgICAgIHsgcHJpbnRfdG9rZW4oIlVOS05PV04iLCB5eXRleHQpOyByZXBvcnRfZXJyb3IoIlVua25vd24gY2hhcmFjdGVyIiwgeXl0ZXh0KTsgZnB1dHMoeXl0ZXh0LCBjbGVhbmVkKTsgfQoKJSUKCmludCBtYWluKCkgewogICAgY2xlYW5lZCA9IGZvcGVuKCJjbGVhbmVkLmMiLCAidyIpOwogICAgaWYgKCFjbGVhbmVkKSB7CiAgICAgICAgcGVycm9yKCJjbGVhbmVkLmMiKTsKICAgICAgICByZXR1cm4gMTsKICAgIH0KCiAgICB5eWxleCgpOwoKICAgIHByaW50ZigiXG49PT0gU1VNTUFSWSA9PT1cbiIpOwogICAgcHJpbnRmKCJMaW5lcyAgOiAlZFxuIiwgbGluZXMpOwogICAgcHJpbnRmKCJUb2tlbnMgOiAlZFxuIiwgdG9rZW5zKTsKICAgIHByaW50ZigiRXJyb3JzIDogJWRcbiIsIGVycm9ycyk7CgogICAgZmNsb3NlKGNsZWFuZWQpOwogICAgcmV0dXJuIDA7Cn0KCmludCB5eXdyYXAoKSB7IHJldHVybiAxOyB9Cg==