module Parsing where

import ParserLib
import Data.Char
import Control.Applicative

-- Simplistic abstract syntax tree we aim for.
data Expr
    = Num Integer
    | Var String
    | Prim2 Op2 Expr Expr       -- Prim2 op operand operand
    | Let [(String, Expr)] Expr -- Let [(name, rhs), ...] body
    deriving (Eq, Show)

data Op2 = Add | Mul
    deriving (Eq, Show)

{-
Lesson 1a: Number literals and *

    muls ::= natural { "*" natural }

where * associates to the right.

My code ends up implemented this rule instead:

    muls ::= natural [ "*" muls ]
-}

mulsRv1 :: Parser Expr
mulsRv1 = liftA2 link
          (fmap Num natural)
          (optional (liftA2 (,)
                      (operator "*" *> pure (Prim2 Mul))
                      mulsRv1))
  where
    link x Nothing = x
    link x (Just (op,y)) = op x y

-- In practice, chainr1 generalizes that, so use chainr1 instead.
mulsRv2 :: Parser Expr
mulsRv2 = chainr1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul))


{-
Lesson 1b: * associates to the left.

Main idea: Use

    muls ::= natural { "*" natural }

then use foldl.
-}

mulsLv1 :: Parser Expr
mulsLv1 = liftA2 link
          (fmap Num natural)
          (many (liftA2 (,)
                  (operator "*" *> pure (Prim2 Mul))
                  (fmap Num natural)))
  
  where
    link x opys = foldl (\accum (op,y) -> op accum y) x opys

-- In practice, chainl1 generalizes that, so use chainl1 instead.
mulsLv2 :: Parser Expr
mulsLv2 = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul))


{-
Lesson 2: Allow and skip whitespaces everywhere. Disallow trailing junk.

For best sanity and least redundancy, follow this convention:

0. Have a "main" (entry point) parser.  ("lesson2" here.)

1. main parser skips leading whitespaces before real work.  Note that henceforth
   all remaining whitespaces can be considered as trailing, therefore:

2. Other parsers skip only trailing whitespaces.

3. main parser checks for no-junk after real work.

E.g., when parsing " 4 * 3 ":

1. main parser skips the space before "4".
2a. natural parser skips the space after "4".
2b. parser for "*" skips the space after "*".
2a. natural parser skips the space after "3".
3. main parser checks that there is no junk after.
-}

lesson2 :: Parser Expr
lesson2 = whitespaces *> muls <* eof
  where
    muls = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul))


{-
Lesson 3: Include +.  + has lower precedence than *.  Both associate to the left.
Also support parentheses e.g., (0+1)*2 

    adds ::= muls { "+" muls }
    muls ::= atom { "*" atom }
    atom ::= natural | "(" adds ")"
-}

lesson3 :: Parser Expr
lesson3 = whitespaces *> adds <* eof
  where
    adds = chainl1 muls (operator "+" *> pure (Prim2 Add))
    muls = chainl1 atom (operator "*" *> pure (Prim2 Mul))
    atom = fmap Num natural <|> (openParen *> adds <* closeParen)


{-
Lesson 4: Include let-in and variables.

    expr ::= local | adds
    local ::= "let" { var "=" expr ";" } "in" expr
    adds ::= muls { "+" muls }
    muls ::= atom { "*" atom }
    atom ::= natural | var | "(" expr ")"

Problem: What happens to "let inn+4" under a naive parser?

It thinks you mean "let in n+4" rather than a syntax error.

Solution: To parse a keyword like "let" and "in", don't just read those letters.
Pretend to read an identifier (read as many alphanums as possible), then check
whether it equals the keyword you want.
-}

lesson4 :: Parser Expr
lesson4 = whitespaces *> expr <* eof
  where
    expr = local <|> adds

    local = pure (\_ eqns _ e -> Let eqns e)
            <*> keyword "let"
            <*> many equation
            <*> keyword "in"
            <*> expr
    -- Basically a liftA4.
    -- Could also be implemented in monadic style, like equation below.

    equation = var
               >>= \v -> operator "="
               >> expr
               >>= \e -> semicolon
               >> return (v, e)
    -- Basically a liftA4.
    -- Could also be implemented in applicative style, like local above.

    semicolon = char ';' *> whitespaces
    adds = chainl1 muls (operator "+" *> pure (Prim2 Add))
    muls = chainl1 atom (operator "*" *> pure (Prim2 Mul))
    atom = fmap Num natural
           <|> fmap Var var
           <|> (openParen *> expr <* closeParen)
    var = identifier ["let", "in"]