Leonidas Fegaras. Parsing #1. Parser. A parser recognizes sequences of tokens according to some grammar and generates Abstract Syntax Trees (ASTs) A contextfree grammar (CFG) has a finite set of terminals (tokens) a finite set of nonterminals from which one is the start symbol
Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author.While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server.
Leonidas Fegaras
A ::= X1 X2 ... Xn
where A is a nonterminal and each Xi is either a terminal or nonterminal symbol
get token
get next character
AST
scanner
parser
source file
token
E ::= E + T
 E  T
 T
T ::= T * F
 T / F
 F
F ::= num
 id
E ::= E + T
E ::= E  T
E ::= T
T ::= T * F
T ::= T / F
T ::= F
F ::= num
F ::= id
A ::= X1 X2 ... Xn
the form aAb => aX1 X2 ... Xnb is called a derivation
T / F + 1  x => T * F / F + 1  x
E => E + Tuse E ::= E + T
=> E + T * Fuse T ::= T * F
=> T + T * Fuse E ::= T
=> T + F * Fuse T ::= F
=> T + num * Fuse F ::= num
=> F + num * Fuse T ::= F
=> id + num * Fuse F ::= id
=> id + num * iduse F ::= id
id(x) + num(2) * id(y)
<= id(x) + num(2) * Fuse F ::= id
<= id(x) + F * Fuse F ::= num
<= id(x) + T * Fuse T ::= F
<= id(x) + Tuse T ::= T * F
<= F + Tuse F ::= id
<= T + Tuse T ::= F
<= E + Tuse E ::= T
<= Euse E ::= E + T
E
E T
T T F
F F
id(x) + num(2) * id(y)
E => E + T
=> E + T * F
=> T + T * F
=> T + F * F
=> T + num * F
=> F + num * F
=> id + num * F
=> id + num * id
E ::= T + E
 T  E
 T
T ::= F * T
 F / T
 F
F ::= num
 id
E
T E
F T E
F T
F
id(x) + id(y) + id(z)
E ::= E + E
 E  E
 E * E
 E / E
 num
 id
E
E E
E E
id(x) * id(y) + id(z)
E
E E
E E
id(x) * id(y) + id(z)
A ::= A a
 b
recognizes the regular expression ba*.
A ::= b A'
A' ::= a A'

ie, A' parses the RE a*.
That is:
X ::= X a1
...
X ::= X an
where a and b are symbol sequences.
X ::= b1 X'
...
X ::= bm X'
X ::= b1
...
X ::= bm
X' ::= a1 X'
...
X' ::= an X'
X' ::=
E ::= T E'
E' ::= + T E'
  T E'

T ::= F T'
T' ::= * F T'
 / F T'

F ::= num
 id
E ::= E + T
 E  T
 T
T ::= T * F
 T / F
 F
F ::= num
 id
R ::= R R
 R bar R
 R *
 ( R )
 char
R ::= ( R ) R'
 char R'
R' ::= R R'
 bar R R'
 * R'

X ::= a b1
...
X ::= a bn
X ::= a X'
X' ::= b1
...
X' ::= bn
E ::= T + E
 T  E
 T
E ::= T E'
E' ::= + E
  E

static void E () { T(); Eprime(); }
static void Eprime () {
if (current_token == PLUS)
{ read_next_token(); T(); Eprime(); }
else if (current_token == MINUS)
{ read_next_token(); T(); Eprime(); };
}
static void T () { F(); Tprime(); }
static void Tprime() {
if (current_token == TIMES)
{ read_next_token(); F(); Tprime(); }
else if (current_token == DIV)
{ read_next_token(); F(); Tprime(); };
}
static void F () {
if (current_token == NUM  current_token == ID)
read_next_token();
else error();
}
E ::= T E'
E' ::= + T E'
  T E'

T ::= F T'
T' ::= * F T'
 / F T'

F ::= num
 id
push(S);
read_next_token();
repeat
X = pop();
if (X is a terminal or '$')
if (X == current_token)
read_next_token();
else error();
else if (M[X,current_token]
== "X ::= Y1 Y2 ... Yk")
{ push(Yk);
...
push(Y1);
}
else error();
until X == '$';
1) E ::= T E' $
2) E' ::= + T E'
3)   T E'
4) 
5) T ::= F T'
6) T' ::= * F T'
7)  / F T'
8) 
9) F ::= num
10)  id
num id +  * / $
E 1 1
E' 2 3 4
T 5 5
T' 8 8 6 7 8
F 9 10
top
Stack current_token Rule
ExM[E,id] = 1 (using E ::= T E' $)
$ E' TxM[T,id] = 5 (using T ::= F T')
$ E' T' FxM[F,id] = 10 (using F ::= id)
$ E' T' idxread_next_token
$ E' T'M[T',] = 8 (using T' ::= )
$ E'M[E',] = 3 (using E' ::=  T E')
$ E' T read_next_token
$ E' T 2M[T,num] = 5 (using T ::= F T')
$ E' T' F2M[F,num] = 9 (using F ::= num)
$ E' T' num2read_next_token
$ E' T'*M[T',*] = 6 (using T' ::= * F T')
$ E' T' F **read_next_token
$ E' T' FyM[F,id] = 10 (using F ::= id)
$ E' T' idyread_next_token
$ E' T'$M[T',$] = 8 (using T' ::= )
$ E'$M[E',$] = 4 (using E' ::= )
$$stop (accept)
1) E ::= T E' $
2) E' ::= + T E'
3)   T E'
4) 
5) T ::= F T'
6) T' ::= * F T'
7)  / F T'
8) 
9) F ::= num
10)  id
FIRST FOLLOW
E {num,id} {}
E' {+,} {$}
T {num,id} {+,,$}
T' {*,/} {+,,$}
F {num,id} {+,,*,/,$}
FIRST FOLLOW
E {num,id} {}
E' {+,} {$}
T {num,id} {+,,$}
T' {*,/} {+,,$}
F {num,id} {+,,*,/,$}
1) E ::= T E' $
2) E' ::= + T E'
3)   T E'
4) 
5) T ::= F T'
6) T' ::= * F T'
7)  / F T'
8) 
9) F ::= num
10)  id
num id +  * / $
E 1 1
E' 2 3 4
T 5 5
T' 8 8 6 7 8
F 9 10
0) G := S $
1) S ::= ( L )
2) S ::= a
3) L ::= S L'
4) L' ::= , S L'
5) L' ::=
G ::= S $
S ::= ( L )
 a
L ::= L , S
 S
( ) a , $
G 0 0
S 1 2
L 3 3
L' 5 4