JSON Compiler using Lex and Yacc

A comprehensive JSON compiler implementation with syntax validation, pretty printing, token tracking, and statistical analysis using Lex and Yacc/Bison.

Features

Syntax Validation

Validates JSON syntax and reports errors with line and column numbers.

Pretty Printing

Formats JSON with proper indentation and removes formatting errors.

Token Tracking

Records and displays detailed information about each token.

Element Statistics

Counts objects, arrays, strings, numbers, booleans, and null values.

Enhanced Learning

Demonstrates compiler phases: lexical analysis, parsing, and validation.

User-Friendly Output

Generates clean, human-readable formatted output.

Interactive JSON Compiler Demo

Token Analysis

Element Statistics

Pretty Printed JSON (output.json)

json.l (Lexical Analyzer)

%{
#include "json.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int line_number = 1;
int column_number = 1;
int object_count = 0;
int array_count = 0;
int string_count = 0;
int number_count = 0;
int boolean_count = 0;
int null_count = 0;

void print_token(char* token_type, char* value);
void update_position();
%}

%option noyywrap

DIGIT       [0-9]
INTEGER     {DIGIT}+
FLOAT       {INTEGER}\.{INTEGER}
NUMBER      {INTEGER}|{FLOAT}
STRING      \"([^\\\"]|\\.)*\"
WHITESPACE  [ \t\r]
NEWLINE     \n

%%

{WHITESPACE}+   { column_number += yyleng; }
{NEWLINE}       { line_number++; column_number = 1; }

"{"             { 
                    print_token("LBRACE", "{"); 
                    update_position(); 
                    object_count++; 
                    return LBRACE; 
                }
"}"             { 
                    print_token("RBRACE", "}"); 
                    update_position(); 
                    return RBRACE; 
                }
"["             { 
                    print_token("LBRACKET", "["); 
                    update_position(); 
                    array_count++; 
                    return LBRACKET; 
                }
"]"             { 
                    print_token("RBRACKET", "]"); 
                    update_position(); 
                    return RBRACKET; 
                }
":"             { 
                    print_token("COLON", ":"); 
                    update_position(); 
                    return COLON; 
                }
","             { 
                    print_token("COMMA", ","); 
                    update_position(); 
                    return COMMA; 
                }
"true"          { 
                    print_token("TRUE", "true"); 
                    update_position(); 
                    boolean_count++; 
                    yylval.str = strdup(yytext); 
                    return TRUE_TOKEN; 
                }
"false"         { 
                    print_token("FALSE", "false"); 
                    update_position(); 
                    boolean_count++; 
                    yylval.str = strdup(yytext); 
                    return FALSE_TOKEN; 
                }
"null"          { 
                    print_token("NULL", "null"); 
                    update_position(); 
                    null_count++; 
                    yylval.str = strdup(yytext); 
                    return NULL_TOKEN; 
                }
{STRING}        { 
                    print_token("STRING", yytext); 
                    update_position(); 
                    string_count++; 
                    yylval.str = strdup(yytext); 
                    return STRING_TOKEN; 
                }
{NUMBER}        { 
                    print_token("NUMBER", yytext); 
                    update_position(); 
                    number_count++; 
                    yylval.str = strdup(yytext); 
                    return NUMBER_TOKEN; 
                }

.               { 
                    fprintf(stderr, "Unexpected character '%c' at line %d, column %d\n", 
                            yytext[0], line_number, column_number); 
                    exit(1); 
                }

%%

void print_token(char* token_type, char* value) {
    printf("Token: %-10s | Value: %-10s | Line: %d | Column: %d\n", 
           token_type, value, line_number, column_number);
}

void update_position() {
    column_number += yyleng;
}

void print_statistics() {
    printf("\n=== ELEMENT COUNT STATISTICS ===\n");
    printf("Objects:  %d\n", object_count);
    printf("Arrays:   %d\n", array_count);
    printf("Strings:  %d\n", string_count);
    printf("Numbers:  %d\n", number_count);
    printf("Booleans: %d\n", boolean_count);
    printf("Nulls:    %d\n", null_count);
    printf("================================\n");
}

json.y (Parser)

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void yyerror(const char *s);
int yylex();
extern int line_number;
extern int column_number;
extern void print_statistics();

FILE *output_file;
int indent_level = 0;

void print_indent();
void pretty_print(char *str);
%}

%union {
    char *str;
}

%token <str> STRING_TOKEN NUMBER_TOKEN TRUE_TOKEN FALSE_TOKEN NULL_TOKEN
%token LBRACE RBRACE LBRACKET RBRACKET COLON COMMA

%type <str> json value object array members member elements

%%

json: value {
        printf("\n=== JSON SYNTAX VALIDATION: SUCCESS ===\n");
        printf("JSON is syntactically valid!\n");
        printf("Pretty-printed output saved to 'output.json'\n");
        print_statistics();
        fprintf(output_file, "%s", $1);
        fclose(output_file);
    }
    ;

value: STRING_TOKEN {
        $$ = malloc(strlen($1) + 1);
        strcpy($$, $1);
    }
    | NUMBER_TOKEN {
        $$ = malloc(strlen($1) + 1);
        strcpy($$, $1);
    }
    | TRUE_TOKEN {
        $$ = malloc(strlen($1) + 1);
        strcpy($$, $1);
    }
    | FALSE_TOKEN {
        $$ = malloc(strlen($1) + 1);
        strcpy($$, $1);
    }
    | NULL_TOKEN {
        $$ = malloc(strlen($1) + 1);
        strcpy($$, $1);
    }
    | object {
        $$ = $1;
    }
    | array {
        $$ = $1;
    }
    ;

object: LBRACE RBRACE {
        $$ = malloc(10);
        strcpy($$, "{}");
    }
    | LBRACE members RBRACE {
        $$ = malloc(strlen($2) + 20);
        sprintf($$, "{\n%s\n}", $2);
    }
    ;

members: member {
        $$ = $1;
    }
    | members COMMA member {
        $$ = malloc(strlen($1) + strlen($3) + 10);
        sprintf($$, "%s,\n%s", $1, $3);
    }
    ;

member: STRING_TOKEN COLON value {
        $$ = malloc(strlen($1) + strlen($3) + 10);
        sprintf($$, "  %s: %s", $1, $3);
    }
    ;

array: LBRACKET RBRACKET {
        $$ = malloc(10);
        strcpy($$, "[]");
    }
    | LBRACKET elements RBRACKET {
        $$ = malloc(strlen($2) + 20);
        sprintf($$, "[\n%s\n]", $2);
    }
    ;

elements: value {
        $$ = malloc(strlen($1) + 10);
        sprintf($$, "  %s", $1);
    }
    | elements COMMA value {
        $$ = malloc(strlen($1) + strlen($3) + 10);
        sprintf($$, "%s,\n  %s", $1, $3);
    }
    ;

%%

void yyerror(const char *s) {
    fprintf(stderr, "\n=== SYNTAX ERROR ===\n");
    fprintf(stderr, "Error: %s at line %d, column %d\n", 
            s, line_number, column_number);
    fprintf(stderr, "====================\n");
    exit(1);
}

int main() {
    printf("=== JSON COMPILER USING LEX AND YACC ===\n");
    printf("Starting compilation process...\n\n");
    
    output_file = fopen("output.json", "w");
    if (!output_file) {
        fprintf(stderr, "Error: Cannot create output file\n");
        return 1;
    }
    
    printf("=== TOKEN ANALYSIS ===\n");
    yyparse();
    return 0;
}