import Prelude hiding (seq)
import Data.Char
import Control.Monad.Error
data RegEx c = Null
| Eps
| Sym (c -> Bool)
| Alt (RegEx c) (RegEx c)
| Seq (RegEx c) (RegEx c)
| Star (RegEx c)
notNull :: RegEx c -> Bool
notNull Null = False
notNull _ = True
-- show
-- Some helpful utility functions
word :: String -> RegEx Char
word s = foldr seq Eps (map symC s)
symC :: (Eq c) => c -> RegEx c
symC c = Sym (==c)
alt :: RegEx c -> RegEx c -> RegEx c
alt Null r = r
alt r Null = r
alt r1 r2 = Alt r1 r2
seq :: RegEx c -> RegEx c -> RegEx c
seq Null _ = Null
seq _ Null = Null
seq Eps r = r
seq r Eps = r
seq r1 r2 = Seq r1 r2
star :: RegEx c -> RegEx c
star Null = Eps
star r = Star r
-- /show
empty :: RegEx c -> Bool
empty Null = False
empty Eps = True
empty (Sym _) = False
empty (Alt r1 r2) = empty r1 || empty r2
empty (Seq r1 r2) = empty r1 && empty r2
empty (Star _) = True
fromBool :: Bool -> RegEx c
fromBool True = Eps
fromBool False = Null
derivative :: c -> RegEx c -> RegEx c
derivative _ Null = Null
derivative _ Eps = Null
derivative c (Sym f) = if f c then Eps else Null
derivative c (Alt r1 r2) = alt (derivative c r1) (derivative c r2)
derivative c (Seq r1 r2) = alt (seq (fromBool $ empty r1) (derivative c r2))
(seq (derivative c r1) (r2))
derivative c (Star r) = seq (derivative c r) (star r)
match :: RegEx Char -> String -> Bool
match r [] = empty r
match r (c:cs) = match (derivative c r) cs
type Regex = RegEx Char
mapFst :: (a->t) -> [(a,b)] -> [(t,b)]
mapFst f ps = map (\(x,y) ->(f x, y)) ps
lexS :: String -> Either String [Token]
lexS s = lexer rules s
lexer :: [(Regex,String -> t)] -> String -> Either String [t]
lexer rules s = lexAt 0 0 rules s where
lexAt line char rules s = do
(t,s',line',char') <- tokenize line char rules s
case s' of
[] -> return [t]
str -> do
rest <- lexAt line' char' rules str
return $ t:rest
tok :: [(Regex,String->t)] -> String -> Either String (t,String,Int,Int)
tok = tokenize 0 0
tokenize :: Int -> Int -> [(Regex,String->t)] -> String -> Either String (t,String,Int,Int)
tokenize line char rules string = go line char rules string [] where
go line char [] _ _ = Left "Error: Empty rule list."
go line char rules [] s = let rules' = filter ((\(a,b) -> empty a)) rules in
case rules' of
[] -> Left $ "Line: " ++ show line ++ " Char: " ++ show char ++ "\nError: Empty string unexpected"
(r:rs) -> Right (snd r $ reverse s,[],line,char)
go line char rules (c:cs) s = let char' = char + 1
line' = line + if c == '\n' then 1 else 0
step = mapFst (derivative c) rules in
case filter (\(a,b) -> notNull a) step of
(r:rs) -> go line' char' step cs (c:s)
[] -> case filter (\(a,b) -> empty a) rules of
[] -> Left $ "Line: " ++ show line ++ " Char: " ++ show char ++ "\nError: No matching regular expressions."
(r:rs) -> Right $ (snd r $ reverse s,(c:cs),line,char)
-- show
data Token = LParenT
| RParenT
| SpaceT
deriving(Show)
rules = [ (Sym $ isSpace, const SpaceT)
, (symC '(', const LParenT)
, (symC ')', const RParenT)
]
program = "(let (x 5) (+ x 3))"
-- /show
main = putStrLn $ show $ lexS program
Lexing
As of March 2020, School of Haskell has been switched to read-only mode.