Intermediate code generation using LEX &YACC for Control Flow and Switch Case statements

Title:Intermediate code generation using LEX &YACC for Control Flow and Switch Case statements
---------------------------------------------
YACC PROGRAM
---------------------------------------------
/*(Yacc Program : intar.y)*/
%token ID NUM
%right '='
%left '+' '-'
%left '*' '/'
%left UMINUS
%%
S:ID{push();} '='{push();} E{codegen_assign();}
;
E:E'+'{push();} T{codegen();}
|E'-'{push();} T{codegen();}
| T
;
T:T'*'{push();} F{codegen();}
|T'/'{push();} F{codegen();}
|F
;
F:'(' E ')'
|'-'{push();} F{codegen_umin();} %prec UMINUS
|ID{push();}
|NUM{push();}
;
%%
#include "lex.yy.c"
#include<ctype.h>
char st[100][10];
int top=0;
char i_[2]="0";
char temp[2]="t";
main()
{
printf("Enter the expression : ");
yyparse();
}
push()
{
strcpy(st[++top],yytext);
}
codegen()
{
strcpy(temp,"t");
strcat(temp,i_);
printf("%s = %s %s %s\n",temp,st[top-2],st[top-1],st[top]);
top-=2;
strcpy(st[top],temp);
i_[0]++;
}
codegen_umin()
{
strcpy(temp,"t");
strcat(temp,i_);
printf("%s = -%s\n",temp,st[top]);
top--;
strcpy(st[top],temp);
i_[0]++;
}
codegen_assign()
{
printf("%s = %s\n",st[top-2],st[top]);
top-=2;
}

/* output 

[email protected]:~/Desktop/ic/ic final$ lex intar.l
[email protected]:~/Desktop/ic/ic final$ yacc intar.y
[email protected]:~/Desktop/ic/ic final$ gcc y.tab.c -ll -ly
[email protected]:~/Desktop/ic/ic final$ ./a.out
Enter the expression : a=(k+8)*(c-s)
t0 = k + 8
t1 = c - s
t2 = t0 * t1
a = t2
 */
-------------------------------------------
LEX PROGRAM
-------------------------------------------
/*(Lex Program : intar.l)*/

ALPHA [A-Za-z]
DIGIT [0-9]
%%

{ALPHA}({ALPHA}|{DIGIT})* return ID;
{DIGIT}+ {yylval=atoi(yytext); return NUM;}
[\n\t] yyterminate();
. return yytext[0];
%%
/*
Output:
[email protected] ~ $ lex intar.l
[email protected] ~ $ yacc intar.y
[email protected] ~ $ gcc y.tab.c -ll -ly
[email protected] ~ $ ./a.out
Enter the expression : a=(k+8)*(c-s)
t0 = k + 8
t1 = c - s
t2 = t0 * t1
a = t2
*/


Intermediate code generation using LEX &YACC for Control Flow (IF LOOP)
-------------------------------------
YACC Program using IF LOOP
-------------------------------------
%token ID NUM IF THEN ELSE
%right '='
%left '+' '-'
%left '*' '/'
%left UMINUS
%%

S : IF '(' E ')'{lab1();} THEN E ';'{lab2();} ELSE E ';'{lab3();}
  ;
E :V '='{push();} E{codegen_assign();}
  | E '+'{push();} E{codegen();}
  | E '-'{push();} E{codegen();}
  | E '*'{push();} E{codegen();}
  | E '/'{push();} E{codegen();}
  | '(' E ')'
  | '-'{push();} E{codegen_umin();} %prec UMINUS
  | V
  | NUM{push();}
  ;
V : ID {push();}
  ;
%%

#include "lex.yy.c"
#include<ctype.h>
char st[100][10];
int top=0;
char i_[2]="0";
char temp[2]="t";

int label[20];
int lnum=0;
int ltop=0;

int main()
 {
 printf("Enter the expression : ");
 yyparse();
return 0;
 }

push()
 {
  strcpy(st[++top],yytext);
 }

codegen()
 {
 strcpy(temp,"t");
 strcat(temp,i_);
  printf("%s = %s %s %s\n",temp,st[top-2],st[top-1],st[top]);
  top-=2;
 strcpy(st[top],temp);
 i_[0]++;
 }

codegen_umin()
 {
 strcpy(temp,"t");
 strcat(temp,i_);
 printf("%s = -%s\n",temp,st[top]);
 top--;
 strcpy(st[top],temp);
 i_[0]++;
 }

codegen_assign()
 {
 printf("%s = %s\n",st[top-2],st[top]);
 top-=2;
 }

lab1()
{
 lnum++;
 strcpy(temp,"t");
 strcat(temp,i_);
 printf("%s = not %s\n",temp,st[top]);
 printf("if %s goto L%d\n",temp,lnum);
 i_[0]++;
 label[++ltop]=lnum;
}

lab2()
{
int x;
lnum++;
x=label[ltop--];
printf("goto L%d\n",lnum);
printf("L%d: \n",x);
label[++ltop]=lnum;
}

lab3()
{
int y;
y=label[ltop--];
printf("L%d: \n",y);
}

/* output 
[email protected]:~/ajit if$ lex if.l
[email protected]:~/ajit if$ yacc if.y
[email protected]:~/ajit if$ gcc y.tab.c -ll -ly
[email protected]:~/ajit if$ ./a.out
Enter the expression : if(k+8) then k=18;else c=s;
t0 = k + 8
t1 = not t0
if t1 goto L1
k = 18
goto L2
L1: 
c = s
L2: 
*/
----------------------------------------------------
LEX Program using If Loop
----------------------------------------------------
ALPHA [A-Za-z]
DIGIT [0-9]
%%
if                 return IF;
then                 return THEN;
else                 return ELSE;
{ALPHA}({ALPHA}|{DIGIT})*    return ID;
{DIGIT}+             {yylval=atoi(yytext); return NUM;}
[ \t]                 ;
\n                yyterminate();
.                 return yytext[0];
%%


Intermediate code generation using LEX &YACC for Control Flow (WHILE LOOP)
----------------------------------------------
YACC PROGRAM
----------------------------------------------
%token ID NUM WHILE
%right '='
%left '+' '-'
%left '*' '/'
%left UMINUS
%%

S : WHILE{lab1();} '(' E ')'{lab2();} E ';'{lab3();}
  ;
E :V '='{push();} E{codegen_assign();}
  | E '+'{push();} E{codegen();}
  | E '-'{push();} E{codegen();}
  | E '*'{push();} E{codegen();}
  | E '/'{push();} E{codegen();}
  | '(' E ')'
  | '-'{push();} E{codegen_umin();} %prec UMINUS
  | V
  | NUM{push();}
  ;
V : ID {push();}
  ;
%%

#include "lex.yy.c"
#include<ctype.h>
char st[100][10];
int top=0;
char i_[2]="0";
char temp[2]="t";

int lnum=1;
int start=1;
main()
 {
 printf("Enter the expression : ");
 yyparse();
 }

push()
 {
  strcpy(st[++top],yytext);
 }

codegen()
 {
 strcpy(temp,"t");
 strcat(temp,i_);
  printf("%s = %s %s %s\n",temp,st[top-2],st[top-1],st[top]);
  top-=2;
 strcpy(st[top],temp);
 i_[0]++;
 }

codegen_umin()
 {
 strcpy(temp,"t");
 strcat(temp,i_);
 printf("%s = -%s\n",temp,st[top]);
 top--;
 strcpy(st[top],temp);
 i_[0]++;
 }

codegen_assign()
 {
 printf("%s = %s\n",st[top-2],st[top]);
 top-=2;
 }

lab1()
{
printf("L%d: \n",lnum++);
}

lab2()
{
 strcpy(temp,"t");
 strcat(temp,i_);
 printf("%s = not %s\n",temp,st[top]);
 printf("if %s goto L%d\n",temp,lnum);
 i_[0]++;
 }

lab3()
{
printf("goto L%d \n",start);
printf("L%d: \n",lnum);
}
/* output
[email protected]:~/Desktop/ic/while final$ lex while.l
[email protected]:~/Desktop/ic/while final$ yacc while.y
[email protected]:~/Desktop/ic/while final$ gcc y.tab.c -ll -ly
[email protected]:~/Desktop/ic/while final$ ./a.out
Enter the expression : while(k=c/s)k=k*c+d;
L1: 
t0 = c / s
k = t0
t1 = not k
if t1 goto L0
t2 = k * c
t3 = t2 + d
k = t3
goto L1 
L0: 
*/

----------------------------------------------
LEX PROGRAM
----------------------------------------------
ALPHA [A-Za-z]
DIGIT [0-9]
%%
while                return WHILE;
{ALPHA}({ALPHA}|{DIGIT})*    return ID;
{DIGIT}+             {yylval=atoi(yytext); return NUM;}
[ \t]                 ;
\n                yyterminate();
.                 return yytext[0];
%%
TITLE: LEX and YACC program to generate Intermediate Code

PROBLEM STATEMENT:

  1. Write a LEX and YACC program to generate Intermediate Code for arithmetic expression
  2. Write a LEX and YACC program to generate Intermediate Code for subset of C (If)statement.
OBJECTIVES:

  • To understand fourth phase of compiler: Intermediate code generation.
  • To learn and use compiler writing tools.
  • To learn how to write three address code for given statement.

THEORY:

Introduction
In the analysis-synthesis model of a compiler, the front end analyzes a source program and creates an intermediate representation, from which the back end generates target code. Ideally, details of the source language are confined to the front end, and details of the target machine to the back end. The front end translates a source program into an intermediate representation from which the back end generates target code.With a suitably defined intermediate representation, a compiler for language i and machine j can then be built by combining the front end for language i with the back end for machine j. This approach to creating suite of compilers can save a considerable amount of effort: m x n compilers can be built by writing just m front ends and n back ends.

Benefits of using a machine-independent intermediate form are:

  1. Retargeting is facilitated. That is, a compiler for a different machine can be created by attaching a back end for the new machine to an existing front end.
  2. A machine-independent code optimizer can be applied to the intermediate representation.

Intermediate Languages
Three ways of intermediate representation:

  • Syntax tree
  • Postfix notation
  • Three address code
The semantic rules for generating three-address code from common programming language constructs are similar to those for constructing syntax trees or for generating postfix notation.

Graphical Representations:

1.Syntax tree:
A syntax tree depicts the natural hierarchical structure of a source program. A dag (Directed Acyclic Graph) gives the same information but in a more compact way because common subexpressions are identified. A syntax tree and dag for the assignment statement a : = b * - c + b * - c are as follows:
2.Postfix notation:
Postfix notation is a linearized representation of a syntax tree; it is a list of the nodes of the tree in which a node appears immediately after its children. The postfix notation for the syntax tree given above is a b c uminus * b c uminus * + assign
3. Three-Address Code:
Three-address code is a sequence of statements of the general form x : = y op z where x, y and z are names, constants, or compiler-generated temporaries; op stands for any operator, such as a fixed- or floating-point arithmetic operator, or a logical operator on Boolean valued data. Thus a source language expression like x+ y*z might be translated into a sequence
t1 : = y * z
t2 : = x + t1
where t1 and t2 are compiler-generated temporary names.
The reason for the term “three-address code” is that each statement usually contains three addresses, two for the operands and one for the result.
Advantages of three-address code:

  • The unraveling of complicated arithmetic expressions and of nested flow-of-control statements makes three-address code desirable for target code generation and optimization.
  • The use of names for the intermediate values computed by a program allows three address code to be easily rearranged – unlike postfix notation.
  • Three-address code is a liberalized representation of a syntax tree or a dag in which explicit names correspond to the interior nodes of the graph. The syntax tree and dag are represented by the three-address code sequences. Variable names can appear directly in three address statements.

Types of Three-Address Statements:
The common three-address statements are:

  • Assignment statements of the form x : = y op z, where op is a binary arithmetic or logical operation.
  • Assignment instructions of the form x : = op y, where op is a unary operation. Essential 
  • unary operations include unary minus, logical negation, shift operators, and conversion operators that, for example, convert a fixed-point number to a floating-point number.
  • Copy statements of the form x : = y where the value of y is assigned to x.
  • The unconditional jump goto L. The three-address statement with label L is the next to be executed.
  • Conditional jumps such as if x relop y goto L. This instruction applies a relational operator (<, =, >=, etc. ) to x and y, and executes the statement with label L next if x stands in relation relop to y. If not, the three-address statement following if x relop y goto L is executed next, as in the usual sequence.
  • param x and call p, n for procedure calls and return y, where y representing a returned value is optional.
  •  For example,
  • param x1
  • param x2
  • .......
  • param xn
  • call p,n
  • generated as part of a call of the procedure p(x1, x2, …. ,xn ).
  • Indexed assignments of the form x : = y[i] and x[i] : = y.
  • Address and pointer assignments of the form x : = &y , x : = *y, and *x : = y.

Implementation of Three-Address Statements:
A three-address statement is an abstract form of intermediate code. In a compiler, these statements can be implemented as records with fields for the operator and the operands.

Three such representations are: Quadruples, Triples, Indirect triples.

A. Quadruples:

  • A quadruple is a record structure with four fields, which are, op, arg1, arg2 and result.
  • The op field contains an internal code for the operator. The 3 address statement x = y op z is represented by placing y in arg1, z in arg2 and x in result.
  • The contents of fields arg1, arg2 and result are normally pointers to the symbol-table entries for the names represented by these fields. If so, temporary names must be entered into the symbol table as they are created.
  • Fig a) shows quadruples for the assignment a : b *

B. Triples:

  • To avoid entering temporary names into the symbol table, we might refer to a temporary value by the position of the statement that computes it.
  • If we do so, three-address statements can be represented by records with only three fields:
  • op, arg1 and arg2.
  • c+b*
  • c
  • The fields arg1 and arg2, for the arguments of op, are either pointers to the symbol table or pointers into the triple structure ( for temporary values ).
  • Since three fields are used, this intermediate code format is known as triples.
  • Fig b) shows the triples for the assignment statement a: = b *

Steps to execute the program

  1. $ lex filename.l
  2. $ yacc -d filename.y
  3. $cc lex.yy.c y.tab.c –ll –ly -lm
  4. $./a .out
  5. (eg: comp.l)
  6. (eg: comp.y)

ALGORITHM:
Write a LEX and YACC program to generate Intermediate Code for arithmetic expression LEX program

  1. Declaration of header files specially y.tab.h which contains declaration for Letter, Digit, expr.
  2. End declaration section by %%
  3. Match regular expression.
  4. If match found then convert it into char and store it in yylval.p where p is pointer declared in YACC
  5. Return token
  6. If input contains new line character (\n) then return 0
  7. If input contains „.‟ then return yytext[0]
  8. End rule-action section by %%
  9. Declare main function
  10. a. open file given at command line
  11. b.if any error occurs then print error and exit
  12. c. assign file pointer fp to yyin
  13. d.call function yylex until file ends
  14. End

YACC program:

  1. Declaration of header files
  2. Declare structure for threeaddresscode representation having fields of argument1, argument2, operator, result.
  3. Declare pointer of char type in union.
  4. Declare token expr of type pointer p.
  5. Give precedence to „*‟,‟/‟.
  6. Give precedence to „+‟,‟-‟.
  7. End of declaration section by %%.
  8. If final expression evaluates then add it to the table of three address code.
  9. If input type is expression of the form.
  10. a. exp‟+‟exp then add to table the argument1, argument2, operator.
  11. b.exp‟-‟exp then add to table the argument1, argument2, operator.
  12. c. exp‟*‟exp then add to table the argument1, argument2, operator.
  13. d.exp‟/‟exp then add to table the argument1, argument2, operator.
  14. e. „(„exp‟)‟ then assign $2 to $$.
  15. f. Digit OR Letter then assigns $1 to $$.
  16. End the section by %%.
  17. Declare file *yyin externally.
  18. Declare main function and call yyparse function untill yyin ends
  19. Declare yyerror for if any error occurs.
  20. Declare char pointer s to print error.
  21. Print error message.
  22. End of the program.

Addtotable function will add the argument1, argument2, operator and temporary variable to the structure array of threeaddresscode. Threeaddresscode function will print the values from the structure in the form first temporary variable, argument1, operator, argument2 
Quadruple Function will print the values from the structure in the form first operator, argument1, argument2, result field
Triple Function will print the values from the structure in the form first argument1, argument2, and operator. The temporary variables in this form are integer / index instead of variables.

4 Comments

  1. This comment has been removed by the author.

    ReplyDelete
  2. JCECE Admit Card will be available to download in the 2nd Week of May,2018. Details about JCECE 2018 Admit Card are given in this article.

    Assam CEE admit card to be released on 9th April 2018. Click Here to know more. Step by step guide to view and download the admit card. Important instructions to be followed on the exam day. contents of Hall Ticket

    GUJCET Admit Card will be available to download in the Month of May. Get the latest information on GUJCET 2018 Admit Card here. The procedure to view and download the GUJCET 2018 admit card is also given in the article. GUJCET 2018 Hall Ticket link will be provided here as soon as it officially released.

    Get the detailed information about the Admit Card of Uttarakhand State Entrance Examination UKSEE for the academic session 2018-19 here UKSEE admit card

    GCET Admit card will be available from 27th April 2018. Get the latest information on GCET 2018 Admit Card. The admit card for GCET 2018 will be handed over to the applicant as soon as the candidate submit the application form at the application reception center.

    OJEE Admit Card Important Dates Related To OJEE Admit Card 2018, How To Download OJEE 2018 Admit Card, Information In OJEE 2018 Admit Card, OJEE 2018 Admit Card Important Highlights. Also Find Complete details.

    Get the latest information on IPU CET Admit Card. Step by step guide is provided to view and download the IPU CET admit card is also provided in the article. IPU CET 2018 Admit Card Download link will be made available here as soon as GGSIPU Releases the Admit Card.

    ReplyDelete
  3. Very Bad...Worst.....Code not at all working....If ur code doesn't work, Y are u even posting it and faking us...?

    ReplyDelete