-- H. Conrad Cunningham
-- Dept. of Computer and Info. Science, U. of Mississippi
-- CSci 555, Fall 1994, Homework #3
-- A recognizer for simple mathematical expressions
-- 14 November 1994
-- BNF for expression
-- expression ::= term | term addOp expression
-- term ::= factor | factor mulOp term
-- factor ::= number | identifier | ( expression )
-- Lexical symbols
-- addOp; char "+" or "-"
-- mulOp: char "*" or "/"
-- identifier: first char alphabetic, all alphanumerics and
-- underscores that follow
-- number: first char numeric, all numerics that follow
-- space: space characters (blanks, tabs, newlines, etc.)
-- occur anywhere except in identifiers and numbers
type Token = String
-- For testing purposes, function "test" takes a string and shows
-- both the "Bool" and "[Token]" returns from "expr".
test :: String -> (Bool,[Token])
test xs = expr (lex xs)
-- Function "valid" takes a string and returns "True" if and only if
-- the string is an acceptable expression.
valid :: String -> Bool
valid xs = exprOk && rest == []
where (exprOk,rest) = expr (lex xs)
-- Function "lex" takes a string and returns the corresponding list of
-- lexical tokens. Except for spaces, identifiers, and numbers, each
-- character is considered a token.
lex :: String -> [Token]
lex [] = []
lex xs@(x:xs')
| isSpace x = lex xs'
| isAlpha x = let (id,rest) = span p xs
p z = isAlphanum z || z == '_'
in id : lex rest
| isDigit x = let (num,rest) = span isDigit xs
in num : lex rest
| otherwise = [x] : lex xs'
-- Function "expr" takes a token list and returns a tuple. The first
-- component of the tuple is "True" if and only if an expression is
-- recognized at the beginning of the token list. If the first
-- component is "True", then the second component is the token list
-- remaining after the expression is removed. Otherwise, the second
-- component is the token list remaining at the point an error is discovered.
expr :: [Token] -> (Bool,[Token])
expr [] = (False,[])
expr xs
| rest == [] = (termOk,[])
| termOk && next == ")" = (True,rest)
| termOk && addOk next = expr aft
| otherwise = (False,rest)
where (termOk,rest) = term xs
(next:aft) = rest
-- Function "term" takes a token list and returns a tuple. The first
-- component of the tuple is "True" if and only if a term is recognized at
-- the beginning of the token list. If the first component is "True", then
-- the second component is the token list remaining after the term is
-- removed. Otherwise, the second component is the token list remaining
-- at the point an error is discovered.
term :: [Token] -> (Bool,[Token])
term [] = (False,[])
term xs
| rest == [] = (factorOk,[])
| factorOk && next == ")" = (True,rest)
| factorOk && addOk next = (True,rest)
| factorOk && mulOk next = term aft
| otherwise = (False,rest)
where (factorOk,rest) = factor xs
(next:aft) = rest
-- Function "factor" takes a token list and returns a tuple. The
-- first component of the tuple is "True" if and only if a factor is
-- recognized at the beginning of the token list. If the first component
-- is "True", then the second component is the token list remaining after
-- the factor is removed. Otherwise, the second component is the token
-- list remaining at the point an error is discovered. Function "factor2"
-- recognizes a nested expression and its closing parenthesis.
factor :: [Token] -> (Bool,[Token])
factor [] = (False,[])
factor xs@(x:xs')
| ident x = (True,xs')
| numb x = (True,xs')
| x == "(" = factor2 xs'
| otherwise = (False,xs)
factor2 :: [Token] -> (Bool,[Token])
factor2 [] = (False,[])
factor2 xs
| rest == [] = (False,[])
| exprOk && next == ")" = (True,aft)
| otherwise = (False,rest)
where (exprOk,rest) = expr xs
(next:aft) = rest
-- Functions "ident", "numb", "addOk", and "mulOk" take a token and
-- return "True" if and only if the token is an identifier, number,
-- addOp, or mulOp, respectively. Functions "ident" and "numb" assume
-- that "lex" has worked correctly.
ident :: Token -> Bool
ident [] = False
ident (x:xs) = isAlpha x
numb :: Token -> Bool
numb [] = False
numb (x:xs) = isDigit x
addOk :: Token -> Bool
addOk x = (x == "+" || x == "-")
mulOk :: Token -> Bool
mulOk x = (x == "*" || x == "/")