advanced programming andrew black and tim sheard n.
Download
Skip this Video
Loading SlideShow in 5 Seconds..
Advanced Programming Andrew Black and Tim Sheard PowerPoint Presentation
Download Presentation
Advanced Programming Andrew Black and Tim Sheard

Loading in 2 Seconds...

play fullscreen
1 / 29

Advanced Programming Andrew Black and Tim Sheard - PowerPoint PPT Presentation


  • 86 Views
  • Uploaded on

Advanced Programming Andrew Black and Tim Sheard. Lecture 11 Parsing Combinators. Type of a Parser. data Parser a = Parser (String -> [(a,String)]) A function inside a data definition. The output is a list of successful parses. This type can be made into a monad

loader
I am the owner, or an agent authorized to act on behalf of the owner, of the copyrighted work described.
capcha
Download Presentation

PowerPoint Slideshow about 'Advanced Programming Andrew Black and Tim Sheard' - zagiri


An Image/Link below is provided (as is) to download presentation

Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author.While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server.


- - - - - - - - - - - - - - - - - - - - - - - - - - E N D - - - - - - - - - - - - - - - - - - - - - - - - - -
Presentation Transcript
type of a parser
Type of a Parser

data Parser a =

Parser (String -> [(a,String)])

  • A function inside a data definition.
  • The output is a list of successful parses.
  • This type can be made into a monad
    • A monad is the sequencing operator in Haskell.
  • Also be made into a Monad with zero and (++) or plus.
defining the monad
Defining the Monad

Technical details, can be ignored when using combinators

instance Monad Parser where

return v = Parser (\inp -> [(v,inp)])

p >>= f =

Parser (\inp -> concat

[applyP (f v) out

| (v,out) <- applyP p inp])

instance MonadPlus Parser where

mzero = Parser (\inp -> [])

mplus (Parser p) (Parser q)

= Parser(\inp -> p inp ++ q inp)

instance Functor Parser where . . .

  • where applyP undoes the constructor
      • applyP (Parser f) x = f x

Note the comprehension syntax

typical parser
Typical Parser
  • Because the parser is a monad we can use the Do syntax .

do { x1 <- p1

; x2 <- p2

; ...

; xn <- pn

; f x1 x2 ... Xn

}

running the parser
Running the Parser
  • Running Parsers

papply :: Parser a -> String -> [(a,String)]

papply p = applyP (do {junk; p})

  • junk skips over white space and comments. We'll see how to define it later
simple primitives
Simple Primitives

applyP :: Parser a -> String -> [(a,String)]

applyP (Parser p) = p

item :: Parser Char

item = Parser (\inp -> case inp of

"" -> []

(x:xs) -> [(x,xs)])

sat :: (Char -> Bool) -> Parser Char

sat p = do {x <- item;

if p x then return x else mzero}

? papply item "abc"

[('a',"bc")]

examples
Examples

? papply item "abc"

[('a',"bc")]

? papply (sat isDigit) "123"

[('1',"23")]

? parse (sat isDigit) "abc"

[]

useful parsers
Useful Parsers

char :: Char -> Parser Char

char x = sat (x ==)

digit :: Parser Int

digit = do { x <- sat isDigit

; return (ord x - ord '0') }

lower :: Parser Char

lower = sat isLower

upper :: Parser Char

upper = sat isUpper

examples1
Examples

char x = sat (x ==)

? papply (char 'z') "abc"

[]

? papply (char 'a') "abc"

[('a',"bc")]

? papply digit "123"

[(1,"23")]

? papply upper "ABC"

[('A',"BC")]

? papply lower "ABC"

[]

more useful parsers
More Useful Parsers
    • letter :: Parser Char
    • letter = sat isAlpha
  • Can even use recursion
    • string :: String -> Parser String
    • string "" = return ""
    • string (x:xs) =
    • do {char x; string xs; return (x:xs) }
  • Helps define even more useful parsers
    • identifier :: Parser String
    • identifier = do {x <- lower
    • ; xs <- many alphanum
    • ; return (x:xs)}
  • What do you think many does?
examples2
Examples

? papply (string "tim") "tim is red"

[("tim"," is red")]

? papply identifier "tim is blue"

[("tim"," is blue")]

? papply identifier "x5W3 = 12"

[("x5W3"," = 12")]

choice 1 parser or another
Choice -- 1 parser or another
  • Note that the ++ operator (from MonadPlus) gives non-deterministic choice.
    • instance MonadPlus Parser where
    • (Parser p) ++ (Parser q)
    • = Parser(\inp -> p inp ++ q inp)
  • Sometimes we’d like to prefer one choice over another, and take the second only if the first fails
  • We don’t we need an explicit sequencing operator because the monad sequencing plays that role.
efficiency
Efficiency

force :: Parser a -> Parser a

force p =

Parser (\ inp ->

let x = applyP p inp

in (fst (head x), snd (head x))

: (tail x) )

Deterministic Choice

(+++) :: Parser a -> Parser a -> Parser a

p +++ q =

Parser(\inp ->

case applyP (p `mplus` q) inp of

[] -> []

(x:xs) -> [x])

example
Example
  • ? papply (string "x" +++ string "b") "abc"
  • []
  • ? papply (string "x" +++ string "b") "bcd"
  • [("b","cd")]
sequences more recursion
Sequences (more recursion)

many :: Parser a -> Parser [a]

many p = force (many1 p +++ return [])

many1 :: Parser a -> Parser [a]

many1 p = do {x <- p

; xs <- many p

; return (x:xs)}

sepby :: Parser a -> Parser b -> Parser [a]

p `sepby` sep = (p `sepby1` sep) +++ return []

sepby1 :: Parser a -> Parser b -> Parser [a]

p `sepby1` sep =

do { x <- p

; xs <- many (do {sep; p})

; return (x:xs) }

example1
Example

? papply (many (char 'z')) "zzz234"

[("zzz","234")]

? papply (sepby (char 'z') spaceP) "z z z 34"

[("zzz"," 34")]

sequences separated by operators
Sequences separated by operators

chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a

chainl p op v = (p `chainl1` op) +++ return v

chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a

p `chainl1` op = do {x <- p; rest x }

where rest x =

do {f <- op; y <- p; rest (f x y)} +++ return x

? papply (chainl int (return (+)) 0) "1 3 4 abc"

[(8,"abc")]

tokens and lexical issues
Tokens and Lexical Issues

spaceP :: Parser ()

spaceP = do {many1 (sat isSpace); return ()}

comment :: Parser ()

comment = do{string "--"; many (sat p); return ()}

where p x = x /= '\n'

junk :: Parser ()

junk = do {many (spaceP +++ comment); return ()}

  • A Token is any parser followed by optional white space or a comment

token :: Parser a -> Parser a

token p = do {v <- p; junk; return v}

using tokens
Using Tokens

symb :: String -> Parser String

symb xs = token (string xs)

ident :: [String] -> Parser String

ident ks =

do { x <- token identifier

; if (not (elem x ks))

then return x else zero }

nat :: Parser Int

nat = token natural

natural :: Parser Int

natural = digit `chainl1` return (\m n -> 10*m + n)

example2
Example

? papply (token (char 'z')) "z 123"

[('z',"123")]

? papply (symb "tim") "tim is cold"

[("tim","is cold")]

? papply natural "123 abc"

[(123," abc")]

? papply (many identifier) "x d3 23"

[(["x"]," d3 23")]

? papply (many (token identifier)) "x d3 23"

[(["x", "d3"],"23")]

more parsers
More Parsers

int :: Parser Int

int = token integer

integer :: Parser Int

integer = (do {char '-’

; n <- natural

; return (-n)})

+++ nat

example parsing expressions
Example: Parsing Expressions

data Term = Add Term Term

| Sub Term Term

| Mult Term Term

| Div Term Term

| Const Int

addop:: Parser(Term -> Term -> Term)

addop = do {symb "+"; return Add} +++

do {symb "-"; return Sub}

mulop:: Parser(Term -> Term -> Term)

mulop = do {symb "*"; return Mult} +++

do {symb "/"; return Div}

constructing a parse tree
Constructing a Parse tree

expr :: Parser Term

addop :: Parser (Term -> Term -> Term)

mulop :: Parser (Term -> Term -> Term)

expr = term `chainl1` addop

term = factor `chainl1` mulop

factor = (do { n <- token digit

; return (Const n)}) +++

(do {symb "(“ ; n <- expr

; symb ")“ ; return n})

? papply expr "5 abc"

[(Const 5,"abc")]

? papply expr "4 + 5 - 2"

[(Sub (Add (Const 4) (Const 5))(Const 2),[])]

array based parsers
Array Based Parsers

type Subword = (Int,Int)

newtype P a = P (Array Int Char -> Subword -> [a])

unP (P z) = z

emptyP :: P ()

emptyP = P f

where f z (i,j) = [() | i == j]

notchar :: Char -> P Char

notchar s = P f

where f z (i,j) = [z!j | i+1 == j, z!j /= s]

charP :: Char -> P Char

charP c = P f

where f z (i,j) = [c | i+1 == j, z!j == c]

slide25
anychar :: P Char

anychar = P f

where f z (i,j) = [z!j | i+1 == j]

anystring :: P(Int,Int)

anystring = P f

where f z (i,j) = [(i,j) | i <= j]

symbol :: String -> P (Int,Int)

symbol s = P f

where f z (i,j) =

if j-i == length s

then [(i,j)| and [z!(i+k) == s!!(k-1)

| k <-[1..(j-i)]]]

else []

combinators
Combinators

infixr 6 |||

(|||) :: P b -> P b -> P b

(|||) (P r) (P q) = P f

where f z (i,j) = r z (i,j) ++ q z (i,j)

infix 8 <<<

(<<<) :: (b -> c) -> P b -> P c

(<<<) f (P q) = P h

where h z (i,j) = map f (q z (i,j))

infixl 7 ~~~

(~~~) :: P(b -> c) -> P b -> P c

(~~~) (P r) (P q) = P f

where f z (i,j) =

[f y | k <- [i..j], f <- r z (i,k), y <- q z (k,j)]

slide27
run :: String -> P b -> [b]

run s (P ax) = ax (s2a s) (0,length s)

s2a s = (array bounds (zip [1..] s))

where bounds = (1,length s)

instance Monad P where

return x =

P(\ z (i,j) -> if i==j then [x] else [])

(>>=) (P f) g = P h

where h z (i,j) =

concat[ unP (g a) z (k,j)

| k <- [i..j] , a <- f z (i,k)]

examples3
Examples

p1 = do { symbol "tim"; c <- anychar

; symbol "tom"; return c}

ex4 = run "tim5tom" p1

ex5 = run "timtom" p1

Main> ex4

"5"

Main> ex5

""

exercise in class
Exercise in class
  • Write a parser for regular expressions