A comprehensive JSON compiler implementation with syntax validation, pretty printing, token tracking, and statistical analysis using Lex and Yacc/Bison.
Validates JSON syntax and reports errors with line and column numbers.
Formats JSON with proper indentation and removes formatting errors.
Records and displays detailed information about each token.
Counts objects, arrays, strings, numbers, booleans, and null values.
Demonstrates compiler phases: lexical analysis, parsing, and validation.
Generates clean, human-readable formatted output.
%{ #include "json.tab.h" #include <stdio.h> #include <stdlib.h> #include <string.h> int line_number = 1; int column_number = 1; int object_count = 0; int array_count = 0; int string_count = 0; int number_count = 0; int boolean_count = 0; int null_count = 0; void print_token(char* token_type, char* value); void update_position(); %} %option noyywrap DIGIT [0-9] INTEGER {DIGIT}+ FLOAT {INTEGER}\.{INTEGER} NUMBER {INTEGER}|{FLOAT} STRING \"([^\\\"]|\\.)*\" WHITESPACE [ \t\r] NEWLINE \n %% {WHITESPACE}+ { column_number += yyleng; } {NEWLINE} { line_number++; column_number = 1; } "{" { print_token("LBRACE", "{"); update_position(); object_count++; return LBRACE; } "}" { print_token("RBRACE", "}"); update_position(); return RBRACE; } "[" { print_token("LBRACKET", "["); update_position(); array_count++; return LBRACKET; } "]" { print_token("RBRACKET", "]"); update_position(); return RBRACKET; } ":" { print_token("COLON", ":"); update_position(); return COLON; } "," { print_token("COMMA", ","); update_position(); return COMMA; } "true" { print_token("TRUE", "true"); update_position(); boolean_count++; yylval.str = strdup(yytext); return TRUE_TOKEN; } "false" { print_token("FALSE", "false"); update_position(); boolean_count++; yylval.str = strdup(yytext); return FALSE_TOKEN; } "null" { print_token("NULL", "null"); update_position(); null_count++; yylval.str = strdup(yytext); return NULL_TOKEN; } {STRING} { print_token("STRING", yytext); update_position(); string_count++; yylval.str = strdup(yytext); return STRING_TOKEN; } {NUMBER} { print_token("NUMBER", yytext); update_position(); number_count++; yylval.str = strdup(yytext); return NUMBER_TOKEN; } . { fprintf(stderr, "Unexpected character '%c' at line %d, column %d\n", yytext[0], line_number, column_number); exit(1); } %% void print_token(char* token_type, char* value) { printf("Token: %-10s | Value: %-10s | Line: %d | Column: %d\n", token_type, value, line_number, column_number); } void update_position() { column_number += yyleng; } void print_statistics() { printf("\n=== ELEMENT COUNT STATISTICS ===\n"); printf("Objects: %d\n", object_count); printf("Arrays: %d\n", array_count); printf("Strings: %d\n", string_count); printf("Numbers: %d\n", number_count); printf("Booleans: %d\n", boolean_count); printf("Nulls: %d\n", null_count); printf("================================\n"); }
%{ #include <stdio.h> #include <stdlib.h> #include <string.h> void yyerror(const char *s); int yylex(); extern int line_number; extern int column_number; extern void print_statistics(); FILE *output_file; int indent_level = 0; void print_indent(); void pretty_print(char *str); %} %union { char *str; } %token <str> STRING_TOKEN NUMBER_TOKEN TRUE_TOKEN FALSE_TOKEN NULL_TOKEN %token LBRACE RBRACE LBRACKET RBRACKET COLON COMMA %type <str> json value object array members member elements %% json: value { printf("\n=== JSON SYNTAX VALIDATION: SUCCESS ===\n"); printf("JSON is syntactically valid!\n"); printf("Pretty-printed output saved to 'output.json'\n"); print_statistics(); fprintf(output_file, "%s", $1); fclose(output_file); } ; value: STRING_TOKEN { $$ = malloc(strlen($1) + 1); strcpy($$, $1); } | NUMBER_TOKEN { $$ = malloc(strlen($1) + 1); strcpy($$, $1); } | TRUE_TOKEN { $$ = malloc(strlen($1) + 1); strcpy($$, $1); } | FALSE_TOKEN { $$ = malloc(strlen($1) + 1); strcpy($$, $1); } | NULL_TOKEN { $$ = malloc(strlen($1) + 1); strcpy($$, $1); } | object { $$ = $1; } | array { $$ = $1; } ; object: LBRACE RBRACE { $$ = malloc(10); strcpy($$, "{}"); } | LBRACE members RBRACE { $$ = malloc(strlen($2) + 20); sprintf($$, "{\n%s\n}", $2); } ; members: member { $$ = $1; } | members COMMA member { $$ = malloc(strlen($1) + strlen($3) + 10); sprintf($$, "%s,\n%s", $1, $3); } ; member: STRING_TOKEN COLON value { $$ = malloc(strlen($1) + strlen($3) + 10); sprintf($$, " %s: %s", $1, $3); } ; array: LBRACKET RBRACKET { $$ = malloc(10); strcpy($$, "[]"); } | LBRACKET elements RBRACKET { $$ = malloc(strlen($2) + 20); sprintf($$, "[\n%s\n]", $2); } ; elements: value { $$ = malloc(strlen($1) + 10); sprintf($$, " %s", $1); } | elements COMMA value { $$ = malloc(strlen($1) + strlen($3) + 10); sprintf($$, "%s,\n %s", $1, $3); } ; %% void yyerror(const char *s) { fprintf(stderr, "\n=== SYNTAX ERROR ===\n"); fprintf(stderr, "Error: %s at line %d, column %d\n", s, line_number, column_number); fprintf(stderr, "====================\n"); exit(1); } int main() { printf("=== JSON COMPILER USING LEX AND YACC ===\n"); printf("Starting compilation process...\n\n"); output_file = fopen("output.json", "w"); if (!output_file) { fprintf(stderr, "Error: Cannot create output file\n"); return 1; } printf("=== TOKEN ANALYSIS ===\n"); yyparse(); return 0; }