This is a very silly problem. There are no errors in the grammar rules afaik but its not giving the right output. I have been staring at it but the mistake is not visible to me.
What tools are available to me to help me see what is going on in a parse? My attempts to insert tracing code are a lot of work and don't seem to be helping me much.
parser.y
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"
int yyparse(void);
int yylex(void);
extern char* yytext;
extern FILE * yyin;
extern int tableSize;
FILE *logout;
extern int line_count;
extern char *arr[100];
extern char *final_arr[100];
SymbolTable *table;
void yyerror (const char *s)
{
fprintf(stderr,"%s\n",s);
return;
}
%}
%union {
class SymbolInfo* sym;
char *s;
float f;
}
%error-verbose
%verbose
%token COMMA INT ID SEMICOLON FLOAT VOID LCURL RCURL RETURN NOT IF FOR WHILE PRINTLN LPAREN RPAREN
%token CONST_INT CONST_FLOAT LTHIRD RTHIRD
%token ADDOP MULOP INCOP DECOP RELOP LOGICOP ASSIGNOP
%token <f> DOUBLE
//%expect 1
%precedence THEN
%precedence ELSE
%left "<" ">" "<=" ">=" "=" "!="
%left "+" "-"
%left "*" "/"
%left UMINUS
%%
start : program { printf("start -> program\n");
fprintf(logout,"%d : start -> program\n",line_count);
}
;
program : program unit {
printf("program -> program unit\n");
fprintf(logout,"%d : program -> program unit\n\n",line_count);
for(int j = 0; final_arr[j] != NULL; j++)
{
fprintf(logout,"%s",final_arr[j]);
}
fprintf(logout,"\n\n");
}
| unit {
printf("program -> unit\n");
fprintf(logout,"%d : program -> unit\n\n",line_count);
for(int j = 0; final_arr[j] != NULL; j++)
{
fprintf(logout,"%s",final_arr[j]);
}
fprintf(logout,"\n\n");
}
;
unit : var_dec {
printf("unit -> var_dec\n");
fprintf(logout,"%d : unit -> var_dec\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|func_declaration {
fprintf(logout,"%d : unit -> func_declaration\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|func_definition {
fprintf(logout,"%d : unit -> func_definition\n\n",line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
;
func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON {
printf("func_declaration -> type_specifier id LPAREN parameter_list RPAREN SEMICOLON\n");
fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| type_specifier ID LPAREN RPAREN SEMICOLON {
printf("func_declaration -> type_specifier id LPAREN RPAREN SEMICOLON\n");
fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement {
printf("func_definition -> type_specifier ID LPAREN parameter_list RPAREN compound_statement\n");
fprintf(logout,"%d : func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement\n\n", line_count);
}
| type_specifier ID LPAREN RPAREN compound_statement {
printf("func_definition -> type_specifier id LPAREN RPAREN compound_statement\n");
fprintf(logout,"%d : func_definition : type_specifier ID LPAREN RPAREN compound_statement\n\n", line_count);
}
;
parameter_list : parameter_list COMMA type_specifier ID {
printf("parameter_list -> parameter_list COMMA type_specifier ID\n");
fprintf(logout,"%d : parameter_list : parameter_list COMMA type_specifier ID\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| parameter_list COMMA type_specifier {
printf("parameter_list -> parameter_list COMMA type_specifier\n");
fprintf(logout,"%d : parameter_list : parameter_list COMMA type_specifier\n\n", line_count);
}
| type_specifier ID {
printf("parameter_list -> type_specifier ID\n");
fprintf(logout,"%d : parameter_list : type_specifier ID\n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| type_specifier {
printf("parameter_list -> type_specifier\n");
fprintf(logout,"%d : parameter_list : type_specifier \n\n", line_count);
}
;
compound_statement : LCURL statements RCURL {
printf("compound_statement -> LCURL statements RCURL\n");
fprintf(logout,"compound_statement : LCURL statements RCURL\n\n");
}
| LCURL RCURL
;
var_dec: type_specifier declaration_list SEMICOLON {
printf("var_dec -> type_specifier declaration_list SEMICOLON \n");
fprintf(logout,"%d : var_dec: type_specifier declaration_list SEMICOLON \n\n", line_count);
for(int j = 0; arr[j] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
type_specifier : INT {printf("type_specifier -> INT\n");
fprintf(logout,"%d : type_specifier-> INT\n\n%s\n\n", line_count,yytext);
}
| FLOAT {printf("type_specifier ->FLOAT\n");
fprintf(logout,"%d : type_specifier-> FLOAT\n\n%s\n\n",line_count, yytext);
}
| VOID {printf("type_specifier -> VOID\n");
fprintf(logout,"%d : type_specifier-> VOID\n\n%s\n\n",line_count, yytext);
}
;
declaration_list : declaration_list COMMA ID {
printf("declaration_list -> declaration_list COMMA ID\n");
fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID\n\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
| declaration_list COMMA ID LTHIRD CONST_INT RTHIRD {
printf("declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n");
fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|ID {
printf("declaration_list -> ID\n");
fprintf(logout,"%d : declaration_list -> ID\n\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
|ID LTHIRD CONST_INT RTHIRD {
printf("declaration_list -> ID LTHIRD CONST_INT RTHIRD\n");
fprintf(logout,"%d : declaration_list -> ID LTHIRD CONST_INT RTHIRD\n",line_count);
for(int j = 1; arr[j+1] != NULL; j++)
{
fprintf(logout,"%s",arr[j]);
}
fprintf(logout,"\n\n");
}
;
statements : statement {
printf("statements -> statement\n");
fprintf(logout,"%d : statements : statement\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| statements statement
;
statement : var_dec
| expression_statement
| compound_statement
| FOR LPAREN expression_statement expression_statement expression RPAREN statement
| IF LPAREN expression RPAREN statement
| WHILE LPAREN expression RPAREN statement
| PRINTLN LPAREN ID RPAREN SEMICOLON
| RETURN expression SEMICOLON {
printf("statement -> RETURN expression SEMICOLON\n");
fprintf(logout,"%d : statement : RETURN expression SEMICOLON\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
;
expression_statement : SEMICOLON
| expression SEMICOLON
;
variable : ID {
printf("variable -> ID\n");
fprintf(logout,"%d : variable : ID\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| ID LTHIRD expression RTHIRD
;
expression : logic_expression {
printf("expression -> logic_expression\n");
fprintf(logout,"%d : expression : logic_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| variable ASSIGNOP logic_expression
;
logic_expression : rel_expression
| rel_expression LOGICOP rel_expression
;
rel_expression : simple_expression {
printf("rel_expression -> simple_expression \n");
fprintf(logout,"%d : rel_expression : simple_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| simple_expression RELOP simple_expression
;
simple_expression : term {
printf("simple_expression -> term\n");
fprintf(logout,"%d : simple_expression : term \n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| simple_expression ADDOP term {
printf("simple_expression -> simple_expression ADDOP term\n");
fprintf(logout,"simple_expression : simple_expression ADDOP term \n\n");
fprintf(logout, "%s\n\n",yytext);
}
;
term : unary_expression {
printf("term -> unary_expression\n");
fprintf(logout,"%d : term : unary_expression\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| term MULOP unary_expression
;
unary_expression : ADDOP unary_expression
| NOT unary_expression
| factor {
printf("unary_expression -> factor\n");
fprintf(logout,"%d : unary_expression : factor\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
;
factor : variable {
printf("factor -> variable\n");
fprintf(logout,"%d : factor : variable\n\n",line_count);
fprintf(logout, "%s\n\n",yytext);
}
| ID LPAREN argument_list RPAREN
| LPAREN expression RPAREN
| CONST_INT
| CONST_FLOAT
| variable INCOP
| variable DECOP
;
argument_list : arguments
|
;
arguments : arguments COMMA logic_expression
| logic_expression
;
%%
int main(int argc, char *argv[])
{
FILE *fp ;
int token = 0;
if((fp = fopen(argv[1],"r")) == NULL)
{
fprintf(logout,"cannot open file");
exit(1);
}
logout = fopen("log.txt","w");
yyin = fp;
yyparse();
fclose(fp);
fclose(logout);
return 0;
}
input.txt
int var(int a, int b){
return a+b;
}
output I'm getting :
type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
rel_expression -> simple_expression
expression -> logic_expression
syntax error, unexpected ID, expecting SEMICOLON
expected output is :
type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression : simple_expression ADDOP term
rel_expression -> simple_expression
logic_expression : rel_expression
expression -> logic_expression
statement : RETURN expression SEMICOLON
statements : statement
compound_statement : LCURL statements RCURL
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
unit : func_definition
program : program unit
start : program
Adding the flex file just in case
%option noyywrap
%{
#include<stdlib.h>
#include<stdio.h>
#include "y.tab.h"
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"
void yyerror (char *);
extern YYSTYPE yylval;
extern SymbolTable *table;
extern FILE *logout;
char *arr[100];
char *final_arr[100];
int k; //final_arr count
int i = 0; //arr count
int line_count = 1;
%}
id [a-z]*
DOUBLE (([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+))
newline \n
%%
{newline} {
arr[i] = "\n",final_arr[k] = arr[i];
i++; k++;
line_count++;
}
[ \t]+ {}
(([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+)) {
yylval.f = atof(yytext);
return DOUBLE;
}
"int" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "int ";
final_arr[k] = "int ";
i++; k++;
return INT;
}
"float" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "float "; final_arr[k] = "float ";
i++; k++;
return FLOAT;
}
"void" {
memset(&arr,NULL,sizeof(arr)); i = 0;
arr[i] = "void "; final_arr[k] = "void ";
i++; k++;
return VOID;
}
";" {
arr[i] = ";";final_arr[k] = ";";
i++; k++;
return SEMICOLON;}
"," {
arr[i] = ","; final_arr[k] = ",";
i++; k++;
return COMMA;
}
"(" {
arr[i] = "(";final_arr[k] = "(";
i++; k++;
return LPAREN;}
")" {
arr[i] = ")";final_arr[k] = ")";
i++; k++;
return RPAREN;}
"{" {return LCURL;}
"}" {return RCURL;}
{id} {
yylval.s = strdup(yytext);
arr[i] = strdup(yytext); final_arr[k] = strdup(yytext);
k++; i++;
for(int j = 1; arr[j] != NULL; j++)
{
//fprintf(logout,"%s", arr[j]);
//fprintf(logout,"arr [%d] %s\n ",j,arr[j]);
}
//fprintf(logout,"\n\n");
return ID;
}
%%
If you define a symbol (with the #define directive) named in the declarations section and set the variable yydebug to a nonzero value, your parser displays a good deal of debugging information as it parses input.
The Bison parser detects a syntax error (or parse error) whenever it reads a token which cannot satisfy any syntax rule. An action in the grammar can also explicitly proclaim an error, using the macro YYERROR (see Special Features for Use in Actions).
You seem to have spent an awful lot of effort trying to implement a way of tracing what's going on in your parser, and to little effect since the problem here is simply a missing lexer keyword rule.
You would be much better off using the built-in debugging features of flex and bison. Then your grammar and lexer would be much simpler and easier to read, and the debugging output would be more complete (and would let you trace the behaviour through the state table).
Here's a quick summary. It's a snap, really.
Add --debug
to your bison command. That will cause bison to generate code to trace your parse. (If you're lazy, you can use -t
-- for trace -- which is the Posix standard command-line option, and should also work with yacc, byacc, btyacc, etc., etc.)
Add the following three lines at the beginning of main
, assuming that main
is in your .y
file:
#ifdef YYDEBUG
yydebug = 1;
#endif
For additional bonus points, you could make this assignment conditional on some command line flag.
Once you do that, you will receive the following trace output:
... snip ... Pick up the trace at the ) at the end of the parameter list
Reading a token: Next token is token RPAREN ()
Shifting token RPAREN ()
Entering state 28
Reading a token: Next token is token LCURL ()
Shifting token LCURL ()
Entering state 25
Reading a token: Next token is token ID ()
Shifting token ID ()
Entering state 44
Reading a token: Next token is token ID ()
... snip ...
Note that two ID
s were returned after the curly bracket, corresponding to the tokens return
and a
.
You can also enable tracing in flex with flex --debug
(or -d
). This causes the scanner to produce an output line of the form
--accepting rule at line 85 ("return")
for every accepted token (and some other lines). You need to check the line numbers against your source code, unfortunately, but in this case you might have noticed the similarity between the above and
--accepting rule at line 85 ("b")
For additional debugging simplicity, it's worth getting into the habit writing your scanner in a way that it can be compiled independently of the parser. Then you can test your scanner by compiling it separately using the main()
implementation in -lfl
.
References and more debugging information:
%printer
declaration.If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With