module Parsing where import Data.Char import Prelude hiding (fmap, pure, (<*>), (<*), (*>), (>>=)) import ParserLib -- Example of using fmap anyCode :: Parser Int anyCode = fmap ord anyChar -- Try: runParser anyCode "AB", runParser anyCode "" -- Example of using liftA2 ldeV1 :: Parser String ldeV1 = liftA2 (\x y -> [x,y]) (satisfy isAlpha) (satisfy isDigit) <* (char '!') -- Try: runParser ldeV1 "B6!", runParser ldeV1 "B6a" -- Example of using >>= ldeV2 :: Parser String ldeV2 = satisfy isAlpha >>= (\x -> satisfy isDigit >>= (\y -> char '!' >>= (\_ -> pure [x, y]))) -- Note: Those parentheses are redundant. -- Try: runParser ldeV2 "B6!", runParser ldeV2 "B6a" -- Example of using empty satisfyV2 pred = anyChar >>= \c -> if pred c then pure c else empty -- Example of using <|> ab01 :: Parser String ab01 = liftA2 (\x y -> [x,y]) (char 'A' <|> char 'B') (char '0' <|> char '1') -- Simplistic abstract syntax tree we aim for. data Expr = Num Integer | Var String | Prim2 Op2 Expr Expr -- Prim2 op operand operand | Let [(String, Expr)] Expr -- Let [(name, rhs), ...] body deriving (Eq, Show) data Op2 = Add | Sub | Mul | Pow deriving (Eq, Show) {- Lesson 1a: Number literals and * pows ::= natural { "^" natural } where ^ associates to the right. My code uses the right recursive version: pows ::= natural [ "^" pows ] -} powsV1 :: Parser Expr powsV1 = fmap Num natural >>= \x -> ((operator "^" *> pure (Prim2 Pow)) >>= \op -> powsV1 >>= \y -> pure (op x y)) <|> pure x -- In practice, chainr1 generalizes that, so use chainr1 instead. powsV2 :: Parser Expr powsV2 = chainr1 (fmap Num natural) (operator "^" *> pure (Prim2 Pow)) {- Lesson 1b: * associates to the left. Main idea: Use muls ::= natural { "*" natural } then use foldl. -} mulsV1 :: Parser Expr mulsV1 = fmap Num natural >>= \x -> many (liftA2 (,) (operator "*" *> pure (Prim2 Mul)) (fmap Num natural)) >>= \opys -> pure (foldl (\accum (op,y) -> op accum y) x opys) -- In practice, chainl1 generalizes that, so use chainl1 instead. mulsV2 :: Parser Expr mulsV2 = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul)) {- Lesson 2: Allow and skip whitespaces everywhere. Disallow trailing junk. For best sanity and least redundancy, follow this convention: 0. Have a "main" (entry point) parser. ("lesson2" here.) 1. main parser skips leading whitespaces before real work. Note that henceforth all remaining whitespaces can be considered as trailing, therefore: 2. Other parsers skip only trailing whitespaces. 3. main parser checks for no-junk after real work. E.g., when parsing " 4 * 3 ": 1. main parser skips the space before "4". 2a. natural parser skips the space after "4". 2b. parser for "*" skips the space after "*". 2a. natural parser skips the space after "3". 3. main parser checks that there is no junk after. -} lesson2 :: Parser Expr lesson2 = whitespaces *> muls <* eof where muls = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul)) {- Lesson 3: * has lower precedence than ^. Also support parentheses e.g. (3*4)^2. (start symbol: muls) muls ::= pows { "*" pows } pows ::= atom { "^" atom } atom ::= natural | "(" muls ")" -} lesson3 :: Parser Expr lesson3 = whitespaces *> muls <* eof where muls = chainl1 pows (operator "*" *> pure (Prim2 Mul)) pows = chainr1 atom (operator "^" *> pure (Prim2 Pow)) atom = fmap Num natural <|> (openParen *> muls <* closeParen) {- Lesson 4: Include let-in and variables. (start symbol: expr) expr ::= local | muls local ::= "let" { var "=" expr ";" } "in" expr muls ::= pows { "*" pows } pows ::= atom { "^" atom } atom ::= natural | var | "(" expr ")" Problem: What happens to "let inn+4" under a naive parser? It thinks you mean "let in n+4" rather than a syntax error. Solution: To parse a keyword like "let" and "in", don't just read those letters. Pretend to read an identifier (read as many alphanums as possible), then check whether it equals the keyword you want. -} lesson4 :: Parser Expr lesson4 = whitespaces *> expr <* eof where expr = local <|> muls local = pure (\_ eqns _ e -> Let eqns e) <*> keyword "let" <*> many equation <*> keyword "in" <*> expr -- Basically a liftA4. -- Could also be implemented with (>>=), like equation below. equation = var >>= \v -> operator "=" *> expr >>= \e -> semicolon *> pure (v, e) -- Basically a liftA4. -- Could also be implemented with (<*>), like local above. semicolon = char ';' *> whitespaces muls = chainl1 pows (operator "*" *> pure (Prim2 Mul)) pows = chainr1 atom (operator "^" *> pure (Prim2 Pow)) atom = fmap Num natural <|> fmap Var var <|> (openParen *> expr <* closeParen) var = identifier ["let", "in"]