I'm building a parser for expression.
Here's my grammar rule:
expr ::= term (+ expr | - expr | null)
term ::= expo (* term | / term | null)
expo ::= factor (^ expo | null)
factor ::= (expr) | int
and corresponding code:
expr :: Parser Int
expr = do t <- term
do _ <- symbol "+"
e <- expr
return (t + e)
<|> do _ <- symbol "-"
e <- expr
return (t - e)
<|> return t
term :: Parser Int
term = do ep <- expo
do _ <- symbol "*"
t <- term
return (ep * t)
<|> do _ <- symbol "/"
t <- term
return (ep `div` t)
<|> return ep
expo :: Parser Int
expo = do f <- factor
do _ <- symbol "^"
e <- expo
return (f ^ e)
<|> return f
factor :: Parser Int
factor = do _ <- symbol "("
e <- expr
_ <- symbol ")"
return e
<|> integer
It works well for most case but fail for certain:
$ eval "3*1/3"
0
since 3 * 1 / 3
is parsed to 3 * (1 / 3)
(*)
/ \
3 (/)
/ \
1 3
and
$ eval "4-3-2"
3
since 4 - 3 - 2
is parsed to 4 - (3 - 2)
(-)
/ \
4 (-)
/ \
3 2
I realize it's all about parsing direction (right associativity).
What I expect is (4 - 3) - 2
(-)
/ \
(-) 2
/ \
4 3
which means I would parse right-left
and interpret it left-right
(or parse it recursively).
I have no idea how to achieve so. Nothing but foldl
comes to my mind so far.
Could someone suggest what should I do to fix it?
total program:
{-# OPTIONS_GHC -Wall #-}
import Control.Applicative
import Data.Char
newtype Parser a = P (String -> [(a, String)])
parse :: Parser a -> String -> [(a, String)]
parse (P p) inp = p inp
instance Functor Parser where
fmap g p = P (\inp -> case parse p inp of
[] -> []
[(v, out)] -> [(g v, out)]
_ -> undefined)
instance Applicative Parser where
pure v = P (\inp -> [(v, inp)])
pg <*> px = P (\inp -> case parse pg inp of
[] -> []
[(g, out)] -> parse (fmap g px) out
_ -> undefined)
instance Monad Parser where
p >>= f = P (\inp -> case parse p inp of
[] -> []
[(v, out)] -> parse (f v) out
_ -> undefined)
instance Alternative Parser where
empty = P (\_ -> [])
p <|> q = P (\inp -> case parse p inp of
[] -> parse q inp
[(v, out)] -> [(v, out)]
_ -> undefined)
many x = some x <|> pure []
some x = pure (:) <*> x <*> many x
item :: Parser Char
item = P (\inp -> case inp of
[] -> []
(x : xs) -> [(x, xs)])
sat :: (Char -> Bool) -> Parser Char
sat p = do x <- item
if p x
then return x
else empty
digit :: Parser Char
digit = sat isDigit
char :: Char -> Parser Char
char x = sat (== x)
string :: String -> Parser String
string [] = return []
string (x : xs) = do _ <- char x
_ <- string xs
return (x : xs)
space :: Parser ()
space = do _ <- many (sat isSpace)
return ()
nat :: Parser Int
nat = do xs <- some digit
return (read xs)
int :: Parser Int
int = do _ <- char '-'
n <- nat
return (-n)
<|> nat
token :: Parser a -> Parser a
token p = do _ <- space
v <- p
_ <- space
return v
integer :: Parser Int
integer = token int
symbol :: String -> Parser String
symbol = token . string
expr :: Parser Int
expr = do t <- term
do _ <- symbol "+"
e <- expr
return (t + e)
<|> do _ <- symbol "-"
e <- expr
return (t - e)
<|> return t
term :: Parser Int
term = do ep <- expo
do _ <- symbol "*"
t <- term
return (ep * t)
<|> do _ <- symbol "/"
t <- term
return (ep `div` t)
<|> return ep
expo :: Parser Int
expo = do f <- factor
do _ <- symbol "^"
e <- expo
return (f ^ e)
<|> return f
factor :: Parser Int
factor = do _ <- symbol "("
e <- expr
_ <- symbol ")"
return e
<|> integer
eval :: String -> Int
eval xs = case (parse expr xs) of
[(n, [])] -> n
[(_, out)] -> error ("Unused input " ++ out)
[] -> error "Invalid input"
_ -> undefined
You can implement parser combinators like these:
chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
chainl1 p op = p >>= rest
where
rest x = do{ f <- op
; y <- p
; rest (f x y)
}
<|> pure x
chainr1 :: Parsec a -> Parsec (a -> a -> a) -> Parsec a
chainr1 p op = scan
where
scan = p >>= rest
rest x = (\f y -> f x y) <$> op <*> scan <|> pure x
Then you can implement your grammar rules like these:
expr = term `chainl1` addop
term = expo `chainl1` mulop
expo = factor `chainr1` expop
factor = parens expr <|> integer
addop = (+) <$ symbol "+" <|> (-) <$ symbol "-"
mulop = (*) <$ symbol "*" <|> (div) <$ symbol "/"
expop = (^) <$ symbol "^"
parens p = symbol "(" *> p <* symbol ")"
But I recommend you to use parser library like package parsec.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With