module Parsing where

import Data.Char
import Prelude hiding (fmap, pure, (<*>), (<*), (*>), (>>=))

import ParserLib

-- Example of using fmap

anyCode :: Parser Int
anyCode = fmap ord anyChar
-- Try: runParser anyCode "AB", runParser anyCode ""

-- Example of using liftA2

ldeV1 :: Parser String
ldeV1 = liftA2 (\x y -> [x,y]) (satisfy isAlpha) (satisfy isDigit)
        <*
        (char '!')
-- Try: runParser ldeV1 "B6!", runParser ldeV1 "B6a"

-- Example of using >>=
ldeV2 :: Parser String
ldeV2 = satisfy isAlpha
        >>= (\x -> satisfy isDigit
        >>= (\y -> char '!'
        >>= (\_ -> pure [x, y])))
        -- Note: Those parentheses are redundant.
-- Try: runParser ldeV2 "B6!", runParser ldeV2 "B6a"

-- Example of using empty
satisfyV2 pred = anyChar >>= \c -> if pred c then pure c else empty

-- Example of using <|>
ab01 :: Parser String
ab01 = liftA2 (\x y -> [x,y]) (char 'A' <|> char 'B') (char '0' <|> char '1')


-- Simplistic abstract syntax tree we aim for.
data Expr
    = Num Integer
    | Var String
    | Prim2 Op2 Expr Expr       -- Prim2 op operand operand
    | Let [(String, Expr)] Expr -- Let [(name, rhs), ...] body
    deriving (Eq, Show)

data Op2 = Add | Sub | Mul | Pow
    deriving (Eq, Show)

{-
Lesson 1a: Number literals and *

    pows ::= natural { "^" natural }

where ^ associates to the right.

My code uses the right recursive version:

    pows ::= natural [ "^" pows ]
-}

powsV1 :: Parser Expr
powsV1 = fmap Num natural
         >>= \x ->     ((operator "^" *> pure (Prim2 Pow))
                        >>= \op -> powsV1
                        >>= \y -> pure (op x y))
                   <|>
                       pure x

-- In practice, chainr1 generalizes that, so use chainr1 instead.
powsV2 :: Parser Expr
powsV2 = chainr1 (fmap Num natural) (operator "^" *> pure (Prim2 Pow))


{-
Lesson 1b: * associates to the left.

Main idea: Use

    muls ::= natural { "*" natural }

then use foldl.
-}

mulsV1 :: Parser Expr
mulsV1 = fmap Num natural
         >>= \x -> many (liftA2 (,)
                                (operator "*" *> pure (Prim2 Mul))
                                (fmap Num natural))
         >>= \opys -> pure (foldl (\accum (op,y) -> op accum y) x opys)

-- In practice, chainl1 generalizes that, so use chainl1 instead.
mulsV2 :: Parser Expr
mulsV2 = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul))


{-
Lesson 2: Allow and skip whitespaces everywhere. Disallow trailing junk.

For best sanity and least redundancy, follow this convention:

0. Have a "main" (entry point) parser.  ("lesson2" here.)

1. main parser skips leading whitespaces before real work.  Note that henceforth
   all remaining whitespaces can be considered as trailing, therefore:

2. Other parsers skip only trailing whitespaces.

3. main parser checks for no-junk after real work.

E.g., when parsing " 4 * 3 ":

1. main parser skips the space before "4".
2a. natural parser skips the space after "4".
2b. parser for "*" skips the space after "*".
2a. natural parser skips the space after "3".
3. main parser checks that there is no junk after.
-}

lesson2 :: Parser Expr
lesson2 = whitespaces *> muls <* eof
  where
    muls = chainl1 (fmap Num natural) (operator "*" *> pure (Prim2 Mul))


{-
Lesson 3: * has lower precedence than ^. Also support parentheses e.g. (3*4)^2.

    (start symbol: muls)
    muls ::= pows { "*" pows }
    pows ::= atom { "^" atom }
    atom ::= natural | "(" muls ")"
-}

lesson3 :: Parser Expr
lesson3 = whitespaces *> muls <* eof
  where
    muls = chainl1 pows (operator "*" *> pure (Prim2 Mul))
    pows = chainr1 atom (operator "^" *> pure (Prim2 Pow))
    atom = fmap Num natural <|> (openParen *> muls <* closeParen)


{-
Lesson 4: Include let-in and variables.

    (start symbol: expr)
    expr ::= local | muls
    local ::= "let" { var "=" expr ";" } "in" expr
    muls ::= pows { "*" pows }
    pows ::= atom { "^" atom }
    atom ::= natural | var | "(" expr ")"

Problem: What happens to "let inn+4" under a naive parser?

It thinks you mean "let in n+4" rather than a syntax error.

Solution: To parse a keyword like "let" and "in", don't just read those letters.
Pretend to read an identifier (read as many alphanums as possible), then check
whether it equals the keyword you want.
-}

lesson4 :: Parser Expr
lesson4 = whitespaces *> expr <* eof
  where
    expr = local <|> muls

    local = pure (\_ eqns _ e -> Let eqns e)
            <*> keyword "let"
            <*> many equation
            <*> keyword "in"
            <*> expr
    -- Basically a liftA4.
    -- Could also be implemented with (>>=), like equation below.

    equation = var
               >>= \v -> operator "="
               *> expr
               >>= \e -> semicolon
               *> pure (v, e)
    -- Basically a liftA4.
    -- Could also be implemented with (<*>), like local above.

    semicolon = char ';' *> whitespaces
    muls = chainl1 pows (operator "*" *> pure (Prim2 Mul))
    pows = chainr1 atom (operator "^" *> pure (Prim2 Pow))
    atom = fmap Num natural
           <|> fmap Var var
           <|> (openParen *> expr <* closeParen)
    var = identifier ["let", "in"]