module Parsing where import ParserLib import Data.Char import Control.Applicative -- Simplistic abstract syntax tree we aim for. data Expr = Num Integer | Var String | Prim2 Op2 Expr Expr -- Prim2 op operand operand | Let [(String, Expr)] Expr -- Let [(name, rhs), ...] body deriving (Eq, Show) data Op2 = Add | Mul deriving (Eq, Show) {- Lesson 1a: Number literals and * muls ::= natural { "*" natural } where * associates to the right. My code ends up implemented this rule instead: muls ::= natural [ "*" muls ] -} mulsRv1 :: Parser Expr mulsRv1 = liftA2 link (fmap Num natural) (optional (liftA2 (,) (operator "*" *> pure (Prim2 Mul)) mulsRv1)) where link x Nothing = x link x (Just (op,y)) = op x y -- In practice, chainr1 generalizes that, so use chainr1 instead. mulsRv2 :: Parser Expr mulsRv2 = chainr1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul)) {- Lesson 1b: * associates to the left. Main idea: Use muls ::= natural { "*" natural } then use foldl. -} mulsLv1 :: Parser Expr mulsLv1 = liftA2 link (fmap Num natural) (many (liftA2 (,) (operator "*" *> pure (Prim2 Mul)) (fmap Num natural))) where link x opys = foldl (\accum (op,y) -> op accum y) x opys -- In practice, chainl1 generalizes that, so use chainl1 instead. mulsLv2 :: Parser Expr mulsLv2 = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul)) {- Lesson 2: Allow and skip whitespaces everywhere. Disallow trailing junk. For best sanity and least redundancy, follow this convention: 0. Have a "main" (entry point) parser. ("lesson2" here.) 1. main parser skips leading whitespaces before real work. Note that henceforth all remaining whitespaces can be considered as trailing, therefore: 2. Other parsers skip only trailing whitespaces. 3. main parser checks for no-junk after real work. E.g., when parsing " 4 * 3 ": 1. main parser skips the space before "4". 2a. natural parser skips the space after "4". 2b. parser for "*" skips the space after "*". 2a. natural parser skips the space after "3". 3. main parser checks that there is no junk after. -} lesson2 :: Parser Expr lesson2 = whitespaces *> muls <* eof where muls = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul)) {- Lesson 3: Include +. + has lower precedence than *. Both associate to the left. Also support parentheses e.g., (0+1)*2 adds ::= muls { "+" muls } muls ::= atom { "*" atom } atom ::= natural | "(" adds ")" -} lesson3 :: Parser Expr lesson3 = whitespaces *> adds <* eof where adds = chainl1 muls (operator "+" *> pure (Prim2 Add)) muls = chainl1 atom (operator "*" *> pure (Prim2 Mul)) atom = fmap Num natural <|> (openParen *> adds <* closeParen) {- Lesson 4: Include let-in and variables. expr ::= local | adds local ::= "let" { var "=" expr ";" } "in" expr adds ::= muls { "+" muls } muls ::= atom { "*" atom } atom ::= natural | var | "(" expr ")" Problem: What happens to "let inn+4" under a naive parser? It thinks you mean "let in n+4" rather than a syntax error. Solution: To parse a keyword like "let" and "in", don't just read those letters. Pretend to read an identifier (read as many alphanums as possible), then check whether it equals the keyword you want. -} lesson4 :: Parser Expr lesson4 = whitespaces *> expr <* eof where expr = local <|> adds local = pure (\_ eqns _ e -> Let eqns e) <*> keyword "let" <*> many equation <*> keyword "in" <*> expr -- Basically a liftA4. -- Could also be implemented in monadic style, like equation below. equation = var >>= \v -> operator "=" >> expr >>= \e -> semicolon >> return (v, e) -- Basically a liftA4. -- Could also be implemented in applicative style, like local above. semicolon = char ';' *> whitespaces adds = chainl1 muls (operator "+" *> pure (Prim2 Add)) muls = chainl1 atom (operator "*" *> pure (Prim2 Mul)) atom = fmap Num natural <|> fmap Var var <|> (openParen *> expr <* closeParen) var = identifier ["let", "in"]