-- | Library of parser definition and operations.
module ParserLib (
    -- * Representation
    Parser, unParser, runParser, traceP,
    -- * Character-Level Primitives
    anyChar, char, satisfy, eof,
    -- * Connectives
    fmap, liftA2, (<*), (*>), (<*>), liftA3, pure, (>>=),
    (<|>), empty, optional, many, some,
    -- * Token-Level Primitives
    whitespace, whitespaces,
    natural, identifier, keyword, anyOperator, operator,
    openParen, closeParen,
    -- * Right/Left Associative Infix Operators
    chainr1, chainl1,
) where

import Data.Char
import Prelude hiding (fmap, pure, (<*>), (<*), (*>), (>>=))
-- In the future, we will learn that they are methods of some standard type
-- classes.  For now, I don't want to talk about those classes, I pretend they
-- are specific to parsing.


-----------------
-- Representation
-----------------

-- | Internal representation: Function from input string to:
--
-- * Nothing, if failure (syntax error);
-- * Just (unconsumed input, answer), if success.
data Parser a = MkParser (String -> Maybe (String, a))

-- | Unwrapper.
unParser :: Parser a -> String -> Maybe (String, a)
unParser (MkParser sf) = sf

-- | Using a parser on an input string.
runParser :: Parser a -> String -> Maybe a
runParser (MkParser sf) inp = case sf inp of
                                Nothing -> Nothing
                                Just (_, a) -> Just a

-- | This version of traceP is silent.
traceP :: Show a => String -> Parser a -> Parser a
traceP _ p = p


-----------------------------
-- Character-level primitives
-----------------------------

-- | Read a character. Failure if input is empty.
anyChar :: Parser Char
anyChar = MkParser sf
  where
    sf "" = Nothing
    sf (c:cs) = Just (cs, c)

-- | Read a character and check against the given character.
char :: Char -> Parser Char
char wanted = MkParser sf
  where
    sf (c:cs) | c == wanted = Just (cs, c)
    sf _ = Nothing

-- | Read a character and check against the given predicate.
satisfy :: (Char -> Bool) -> Parser Char
satisfy pred = MkParser sf
  where
    sf (c:cs) | pred c = Just (cs, c)
    sf _ = Nothing

-- | Expect the input to be empty.
eof :: Parser ()
eof = MkParser sf
  where
    sf "" = Just ("", ())
    sf _ = Nothing


--------------
-- Connectives
--------------

-- But you have to compose smaller parsers to build larger parsers and to return
-- more interesting answers, e.g., abstract syntax trees.

-- | Convert parser's answer by a given function.
fmap :: (a -> b) -> Parser a -> Parser b
fmap f (MkParser sf) = MkParser sfb
  where
    sfb inp = case sf inp of
                Nothing -> Nothing
                Just (rest, a) -> Just (rest, f a)

-- | Compose two parsers sequentially (unconsumed input from 1st is fed to 2nd).
-- Combine their answers by a binary operator/function.
liftA2 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
liftA2 op (MkParser sf1) p2 = MkParser g
  where
    g inp = case sf1 inp of
              Nothing -> Nothing
              Just (middle, a) ->
                  case unParser p2 middle of
                    Nothing -> Nothing
                    Just (rest, b) -> Just (rest, op a b)

-- | Compose two parsers sequentially.  Yield 1st parser's answer.
(<*) :: Parser a -> Parser b -> Parser a
pa <* pb = liftA2 (\a _ -> a) pa pb

-- | Compose two parsers sequentially.  Yield 2nd parser's answer.
(*>) :: Parser a -> Parser b -> Parser b
pa *> pb = liftA2 (\_ b -> b) pa pb

infixl 4 <*, *>

-- | Compose two parsers sequentially.  1st parser yields a function, apply it
-- to the answer of 2nd parser.
(<*>) :: Parser (a -> b) -> Parser a -> Parser b
pf <*> pa = liftA2 (\f a -> f a) pf pa

infixl 4 <*>

-- | Compose 3 parsers sequentially. Like liftA2 but 3.  In general every liftAn
-- can be done by pure and <*>.
liftA3 :: (a -> b -> c -> d) -> Parser a -> Parser b -> Parser c -> Parser d
liftA3 f pa pb pc = ((pure f <*> pa) <*> pb) <*> pc
-- Note: Those parentheses are redundant, <*> is left-associative.

-- | A "trivial" parser that yields the specified answer without consuming input.
--
-- Kind of identity of liftA2 and (<*>):
--
-- liftA2 op (pure a) pb = fmap (\b -> op a b) pb
-- liftA2 op pa (pure b) = fmap (\a -> op a b) pa
-- pure f <*> pa = fmap f pa
-- pf <*> pure a = fmap (\f -> f a) pf
pure :: a -> Parser a
pure a = MkParser (\inp -> Just (inp, a))

-- | Compose two parsers sequentially.  2nd parser varies over answer from 1st
-- parser.
--
-- pure is kind of identity too:
--
-- pure a >>= k = k a
-- k >>= pure = k
(>>=) :: Parser a -> (a -> Parser b) -> Parser b
MkParser sf1 >>= k = MkParser g
  where
    g inp = case sf1 inp of
              Nothing -> Nothing
              Just (rest, a) -> unParser (k a) rest

infixl 1 >>=

-- | Alternation. Try 1st parser. If success, done; if failure, 2nd parser.
(<|>) :: Parser a -> Parser a -> Parser a
MkParser sf1 <|> p2 = MkParser g
  where
    g inp = case sf1 inp of
              Nothing -> unParser p2 inp
              j -> j        -- the Just case

infixl 3 <|>

-- | Always fail. Identity of <|>.
empty :: Parser a
empty = MkParser (\_ -> Nothing)

-- | 0 or 1 time.
optional :: Parser a -> Parser (Maybe a)
optional p = fmap Just p <|> pure Nothing
-- Try 1 time. If success, tag with Just; if failure, answer Nothing.

-- | 0 or more times, maximum munch, collect the answers into a list.
many :: Parser a -> Parser [a]
many p = some p <|> pure []
-- Explanation: To repeat 0 or more times, first try 1 or more
-- times!  If that fails, then we know it's 0 times, and the answer is the
-- empty list.

-- | 1 or more times, maximum munch, collect the answers into a list.
some :: Parser a -> Parser [a]
some p = liftA2 (:) p (many p)
-- Explanation: To repeat 1 or more times, do 1 time, then 0 or more times!


-------------------------
-- Token-level primitives
-------------------------

-- | Space or tab or newline (unix and windows).
whitespace :: Parser Char
whitespace = satisfy (\c -> c `elem` ['\t', '\n', '\r', ' '])

-- | Consume zero or more whitespaces, maximum munch.
whitespaces :: Parser String
whitespaces = many whitespace

-- | Read a natural number (non-negative integer), then skip trailing spaces.
natural :: Parser Integer
natural = fmap read (some (satisfy isDigit)) <* whitespaces
-- read :: Read a => String -> a
-- For converting string to your data type, assuming valid string.  Integer
-- is an instance of Read, and our string is valid, so we can use read.

-- | Read an identifier, then skip trailing spaces.  The list is a list of
-- reserved words to avoid.
identifier :: [String] -> Parser String
identifier keywords =
    satisfy isAlpha
    >>= \c -> many (satisfy isAlphaNum)
    >>= \cs -> whitespaces
    *> let str = c:cs
    in if str `elem` keywords then empty else pure str

-- | Read the wanted reserved word, then skip trailing spaces.
keyword :: String -> Parser String
keyword wanted =
    satisfy isAlpha
    >>= \c -> many (satisfy isAlphaNum)
    >>= \cs -> whitespaces
    *> if c:cs == wanted then pure wanted else empty

-- | Read something that looks like an operator, then skip trailing spaces.
anyOperator :: Parser String
anyOperator = some (satisfy symChar) <* whitespaces
  where
    symChar c = c `elem` "!#$%&*+-./:<=>?@\\^|~"

-- | Read the wanted operator, then skip trailing spaces.
operator :: String -> Parser String
operator wanted =
    anyOperator
    >>= \sym -> if sym == wanted then pure wanted else empty

-- | Open and close parentheses.
openParen, closeParen :: Parser Char
openParen = char '(' <* whitespaces
closeParen = char ')' <* whitespaces


---------------------------------------
-- Canned solutions for infix operators
---------------------------------------

-- | One or more operands separated by an operator. Apply the operator(s) in a
-- right-associating way.
chainr1 :: Parser a               -- ^ operand parser
        -> Parser (a -> a -> a)   -- ^ operator parser
        -> Parser a               -- ^ whole answer
chainr1 getArg getOp = getArg
                       >>= \x ->     (getOp
                                      >>= \op -> chainr1 getArg getOp
                                      >>= \y -> pure (op x y))
                                 <|>
                                     pure x

-- | One or more operands separated by an operator. Apply the operator(s) in a
-- left-associating way.
chainl1 :: Parser a               -- ^ operand parser
        -> Parser (a -> a -> a)   -- ^ operator parser
        -> Parser a               -- ^ whole answer
chainl1 getArg getOp = getArg
                       >>= \x -> many (liftA2 (,) getOp getArg)
                       >>= \opys -> pure (foldl (\accum (op,y) -> op accum y) x opys)