advanced programming andrew black and tim sheard n.
Download
Skip this Video
Loading SlideShow in 5 Seconds..
Advanced Programming Andrew Black and Tim Sheard PowerPoint Presentation
Download Presentation
Advanced Programming Andrew Black and Tim Sheard

Loading in 2 Seconds...

play fullscreen
1 / 29

Advanced Programming Andrew Black and Tim Sheard - PowerPoint PPT Presentation


  • 87 Views
  • Uploaded on

Advanced Programming Andrew Black and Tim Sheard. Lecture 11 Parsing Combinators. Type of a Parser. data Parser a = Parser (String -> [(a,String)]) A function inside a data definition. The output is a list of successful parses. This type can be made into a monad

loader
I am the owner, or an agent authorized to act on behalf of the owner, of the copyrighted work described.
capcha
Download Presentation

Advanced Programming Andrew Black and Tim Sheard


An Image/Link below is provided (as is) to download presentation

Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author.While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server.


- - - - - - - - - - - - - - - - - - - - - - - - - - E N D - - - - - - - - - - - - - - - - - - - - - - - - - -
Presentation Transcript
type of a parser
Type of a Parser

data Parser a =

Parser (String -> [(a,String)])

  • A function inside a data definition.
  • The output is a list of successful parses.
  • This type can be made into a monad
    • A monad is the sequencing operator in Haskell.
  • Also be made into a Monad with zero and (++) or plus.
defining the monad
Defining the Monad

Technical details, can be ignored when using combinators

instance Monad Parser where

return v = Parser (\inp -> [(v,inp)])

p >>= f =

Parser (\inp -> concat

[applyP (f v) out

| (v,out) <- applyP p inp])

instance MonadPlus Parser where

mzero = Parser (\inp -> [])

mplus (Parser p) (Parser q)

= Parser(\inp -> p inp ++ q inp)

instance Functor Parser where . . .

  • where applyP undoes the constructor
      • applyP (Parser f) x = f x

Note the comprehension syntax

typical parser
Typical Parser
  • Because the parser is a monad we can use the Do syntax .

do { x1 <- p1

; x2 <- p2

; ...

; xn <- pn

; f x1 x2 ... Xn

}

running the parser
Running the Parser
  • Running Parsers

papply :: Parser a -> String -> [(a,String)]

papply p = applyP (do {junk; p})

  • junk skips over white space and comments. We'll see how to define it later
simple primitives
Simple Primitives

applyP :: Parser a -> String -> [(a,String)]

applyP (Parser p) = p

item :: Parser Char

item = Parser (\inp -> case inp of

"" -> []

(x:xs) -> [(x,xs)])

sat :: (Char -> Bool) -> Parser Char

sat p = do {x <- item;

if p x then return x else mzero}

? papply item "abc"

[('a',"bc")]

examples
Examples

? papply item "abc"

[('a',"bc")]

? papply (sat isDigit) "123"

[('1',"23")]

? parse (sat isDigit) "abc"

[]

useful parsers
Useful Parsers

char :: Char -> Parser Char

char x = sat (x ==)

digit :: Parser Int

digit = do { x <- sat isDigit

; return (ord x - ord '0') }

lower :: Parser Char

lower = sat isLower

upper :: Parser Char

upper = sat isUpper

examples1
Examples

char x = sat (x ==)

? papply (char 'z') "abc"

[]

? papply (char 'a') "abc"

[('a',"bc")]

? papply digit "123"

[(1,"23")]

? papply upper "ABC"

[('A',"BC")]

? papply lower "ABC"

[]

more useful parsers
More Useful Parsers
    • letter :: Parser Char
    • letter = sat isAlpha
  • Can even use recursion
    • string :: String -> Parser String
    • string "" = return ""
    • string (x:xs) =
    • do {char x; string xs; return (x:xs) }
  • Helps define even more useful parsers
    • identifier :: Parser String
    • identifier = do {x <- lower
    • ; xs <- many alphanum
    • ; return (x:xs)}
  • What do you think many does?
examples2
Examples

? papply (string "tim") "tim is red"

[("tim"," is red")]

? papply identifier "tim is blue"

[("tim"," is blue")]

? papply identifier "x5W3 = 12"

[("x5W3"," = 12")]

choice 1 parser or another
Choice -- 1 parser or another
  • Note that the ++ operator (from MonadPlus) gives non-deterministic choice.
    • instance MonadPlus Parser where
    • (Parser p) ++ (Parser q)
    • = Parser(\inp -> p inp ++ q inp)
  • Sometimes we’d like to prefer one choice over another, and take the second only if the first fails
  • We don’t we need an explicit sequencing operator because the monad sequencing plays that role.
efficiency
Efficiency

force :: Parser a -> Parser a

force p =

Parser (\ inp ->

let x = applyP p inp

in (fst (head x), snd (head x))

: (tail x) )

Deterministic Choice

(+++) :: Parser a -> Parser a -> Parser a

p +++ q =

Parser(\inp ->

case applyP (p `mplus` q) inp of

[] -> []

(x:xs) -> [x])

example
Example
  • ? papply (string "x" +++ string "b") "abc"
  • []
  • ? papply (string "x" +++ string "b") "bcd"
  • [("b","cd")]
sequences more recursion
Sequences (more recursion)

many :: Parser a -> Parser [a]

many p = force (many1 p +++ return [])

many1 :: Parser a -> Parser [a]

many1 p = do {x <- p

; xs <- many p

; return (x:xs)}

sepby :: Parser a -> Parser b -> Parser [a]

p `sepby` sep = (p `sepby1` sep) +++ return []

sepby1 :: Parser a -> Parser b -> Parser [a]

p `sepby1` sep =

do { x <- p

; xs <- many (do {sep; p})

; return (x:xs) }

example1
Example

? papply (many (char 'z')) "zzz234"

[("zzz","234")]

? papply (sepby (char 'z') spaceP) "z z z 34"

[("zzz"," 34")]

sequences separated by operators
Sequences separated by operators

chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a

chainl p op v = (p `chainl1` op) +++ return v

chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a

p `chainl1` op = do {x <- p; rest x }

where rest x =

do {f <- op; y <- p; rest (f x y)} +++ return x

? papply (chainl int (return (+)) 0) "1 3 4 abc"

[(8,"abc")]

tokens and lexical issues
Tokens and Lexical Issues

spaceP :: Parser ()

spaceP = do {many1 (sat isSpace); return ()}

comment :: Parser ()

comment = do{string "--"; many (sat p); return ()}

where p x = x /= '\n'

junk :: Parser ()

junk = do {many (spaceP +++ comment); return ()}

  • A Token is any parser followed by optional white space or a comment

token :: Parser a -> Parser a

token p = do {v <- p; junk; return v}

using tokens
Using Tokens

symb :: String -> Parser String

symb xs = token (string xs)

ident :: [String] -> Parser String

ident ks =

do { x <- token identifier

; if (not (elem x ks))

then return x else zero }

nat :: Parser Int

nat = token natural

natural :: Parser Int

natural = digit `chainl1` return (\m n -> 10*m + n)

example2
Example

? papply (token (char 'z')) "z 123"

[('z',"123")]

? papply (symb "tim") "tim is cold"

[("tim","is cold")]

? papply natural "123 abc"

[(123," abc")]

? papply (many identifier) "x d3 23"

[(["x"]," d3 23")]

? papply (many (token identifier)) "x d3 23"

[(["x", "d3"],"23")]

more parsers
More Parsers

int :: Parser Int

int = token integer

integer :: Parser Int

integer = (do {char '-’

; n <- natural

; return (-n)})

+++ nat

example parsing expressions
Example: Parsing Expressions

data Term = Add Term Term

| Sub Term Term

| Mult Term Term

| Div Term Term

| Const Int

addop:: Parser(Term -> Term -> Term)

addop = do {symb "+"; return Add} +++

do {symb "-"; return Sub}

mulop:: Parser(Term -> Term -> Term)

mulop = do {symb "*"; return Mult} +++

do {symb "/"; return Div}

constructing a parse tree
Constructing a Parse tree

expr :: Parser Term

addop :: Parser (Term -> Term -> Term)

mulop :: Parser (Term -> Term -> Term)

expr = term `chainl1` addop

term = factor `chainl1` mulop

factor = (do { n <- token digit

; return (Const n)}) +++

(do {symb "(“ ; n <- expr

; symb ")“ ; return n})

? papply expr "5 abc"

[(Const 5,"abc")]

? papply expr "4 + 5 - 2"

[(Sub (Add (Const 4) (Const 5))(Const 2),[])]

array based parsers
Array Based Parsers

type Subword = (Int,Int)

newtype P a = P (Array Int Char -> Subword -> [a])

unP (P z) = z

emptyP :: P ()

emptyP = P f

where f z (i,j) = [() | i == j]

notchar :: Char -> P Char

notchar s = P f

where f z (i,j) = [z!j | i+1 == j, z!j /= s]

charP :: Char -> P Char

charP c = P f

where f z (i,j) = [c | i+1 == j, z!j == c]

slide25
anychar :: P Char

anychar = P f

where f z (i,j) = [z!j | i+1 == j]

anystring :: P(Int,Int)

anystring = P f

where f z (i,j) = [(i,j) | i <= j]

symbol :: String -> P (Int,Int)

symbol s = P f

where f z (i,j) =

if j-i == length s

then [(i,j)| and [z!(i+k) == s!!(k-1)

| k <-[1..(j-i)]]]

else []

combinators
Combinators

infixr 6 |||

(|||) :: P b -> P b -> P b

(|||) (P r) (P q) = P f

where f z (i,j) = r z (i,j) ++ q z (i,j)

infix 8 <<<

(<<<) :: (b -> c) -> P b -> P c

(<<<) f (P q) = P h

where h z (i,j) = map f (q z (i,j))

infixl 7 ~~~

(~~~) :: P(b -> c) -> P b -> P c

(~~~) (P r) (P q) = P f

where f z (i,j) =

[f y | k <- [i..j], f <- r z (i,k), y <- q z (k,j)]

slide27
run :: String -> P b -> [b]

run s (P ax) = ax (s2a s) (0,length s)

s2a s = (array bounds (zip [1..] s))

where bounds = (1,length s)

instance Monad P where

return x =

P(\ z (i,j) -> if i==j then [x] else [])

(>>=) (P f) g = P h

where h z (i,j) =

concat[ unP (g a) z (k,j)

| k <- [i..j] , a <- f z (i,k)]

examples3
Examples

p1 = do { symbol "tim"; c <- anychar

; symbol "tom"; return c}

ex4 = run "tim5tom" p1

ex5 = run "timtom" p1

Main> ex4

"5"

Main> ex5

""

exercise in class
Exercise in class
  • Write a parser for regular expressions