#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAX_SYMBOLS 200
struct Symbol {
char name[100];
char type[30];
};
struct Symbol symbolTable[MAX_SYMBOLS];
int symbolCount = 0;
void addSymbol(const char *name, const char *type) {
for (int i = 0; i < symbolCount; ++i) {
if (strcmp(symbolTable[i].name, name) == 0)
return;
}
strcpy(symbolTable[symbolCount].name, name);
strcpy(symbolTable[symbolCount].type, type);
symbolCount++;
}
void tokenize(const char *input) {
int i = 0;
char token[100];
while (input[i] != '\0') {
if (isspace((unsigned char)input[i])) {
i++;
continue;
}
if (input[i] == '(' || input[i] == ')') {
token[0] = input[i];
token[1] = '\0';
addSymbol(token, "PARENTHESIS");
i++;
continue;
}
if (strchr("|*+.-", input[i])) {
token[0] = input[i];
token[1] = '\0';
addSymbol(token, "OPERATOR");
i++;
continue;
}
if (input[i] == '\\') {
if (input[i + 1] == 'n' || input[i + 1] == 't' || input[i + 1] == '\\') {
token[0] = '\\';
token[1] = input[i + 1];
token[2] = '\0';
addSymbol(token, "ESCAPE_SEQUENCE");
i += 2;
continue;
}
}
if (input[i] == '\'' && input[i + 2] == '\'') {
token[0] = input[i];
token[1] = input[i + 1];
token[2] = input[i + 2];
token[3] = '\0';
addSymbol(token, "LITERAL");
i += 3;
continue;
}
if (strncmp(&input[i], "lambda", 6) == 0) {
addSymbol("lambda", "EMPTY_STRING");
i += 6;
continue;
} else if (strncmp(&input[i], "null", 4) == 0) {
addSymbol("null", "EMPTY_STRING");
i += 4;
continue;
} else if ((unsigned char)input[i] == 0xCE && (unsigned char)input[i + 1] == 0xB5) {
// UTF-8 for ε
strcpy(token, "ε");
addSymbol(token, "EMPTY_STRING");
i += 2;
continue;
} else if (input[i] == 'ε') {
token[0] = 'ε';
token[1] = '\0';
addSymbol(token, "EMPTY_STRING");
i++;
continue;
}
if (isalpha((unsigned char)input[i]) || input[i] == '_') {
int j = 0;
while (isalnum((unsigned char)input[i]) || input[i] == '_') {
token[j++] = input[i++];
}
token[j] = '\0';
addSymbol(token, "IDENTIFIER");
continue;
}
if (isdigit((unsigned char)input[i])) {
int j = 0;
while (isdigit((unsigned char)input[i])) {
token[j++] = input[i++];
}
token[j] = '\0';
addSymbol(token, "NUMBER");
continue;
}
token[0] = input[i];
token[1] = '\0';
printf("Unknown token: %s\n", token);
i++;
}
}
int main() {
char input[1024];
printf("Enter a regular expression:\n");
fflush(stdout);
if (fgets(input, sizeof(input), stdin) == NULL) {
fprintf(stderr, "Error reading input.\n");
return 1;
}
tokenize(input);
+
printf("\nSymbol Table:\n");
printf("-------------------------------------------------------------------\n");
printf("%-20s %-20s %-20s\n", "Token", "Type", "Address");
printf("-------------------------------------------------------------------\n");
for (int i = 0; i < symbolCount; ++i) {
printf("%-20s %-20s %p\n", symbolTable[i].name, symbolTable[i].type, (void*)&symbolTable[i]);
}
printf("-------------------------------------------------------------------\n");
return 0;
}