From 2ec1c389b882e62d309eb7d02c519575a23a5e78 Mon Sep 17 00:00:00 2001 From: Joachim Breitner Date: Thu, 7 Sep 2023 14:39:00 -0700 Subject: [PATCH] More stuff --- Parser.hs | 2 +- ParserParser.hs | 89 ++++++++++++++++++++++++++++++++++++++++++++++ RParser.hs | 9 +++-- RParser2.hs | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 ParserParser.hs create mode 100644 RParser2.hs diff --git a/Parser.hs b/Parser.hs index 154b6c6..a34ec3c 100644 --- a/Parser.hs +++ b/Parser.hs @@ -152,4 +152,4 @@ memoise key p = P $ \input i k -> do Just (Value rs ks) -> do insertTable (Value rs (k' : ks)) concat . concat <$> - mapM (\(i, rs) -> mapM (k i) (map from rs)) (IntMap.toList rs) + mapM (\(i, rs) -> mapM (k i . from) rs) (IntMap.toList rs) diff --git a/ParserParser.hs b/ParserParser.hs new file mode 100644 index 0000000..2dbf386 --- /dev/null +++ b/ParserParser.hs @@ -0,0 +1,89 @@ +module ParserParser where + +import Control.Applicative +import Data.Foldable +import Data.Functor +import Data.Char +import qualified Data.Map as M + +import RParser + +-- A type for simple grammars + +type Ident = String +type RuleRhs = [Seq] +type Seq = [Atom] +data Atom = Lit String | NonTerm Ident deriving Show +type Rule = (Ident, RuleRhs) +type BNF = [Rule] + +-- a parser for simple BNF syntax + +type P = Parser Char + +l :: P a -> P a +l p = p' where -- NB: Sharing! + p' = p <|> p' <* sat isSpace +quote :: P Char +quote = tok '\'' +quoted :: P a -> P a +quoted p = quote *> p <* quote +str :: P String +str = some (sat (not . (== '\''))) +ident :: P Ident +ident = some (sat (\c -> isAlphaNum c && isAscii c)) +atom :: P Atom +atom = Lit <$> l (quoted str) + <|> NonTerm <$> l ident +eps :: P () +eps = void $ l (tok 'ε') +sep :: P () +sep = void $ some (sat isSpace) +sq :: P Seq +sq = [] <$ eps + <|> (:) <$> atom <* sep <*> sq + <|> pure <$> atom +ruleRhs :: P RuleRhs +ruleRhs = pure <$> sq <* l (tok ';') + <|> (:) <$> sq <* l (tok '|') <*> ruleRhs +rule :: P Rule +rule = (,) <$> l ident <* l (tok ':' *> tok '=') <*> ruleRhs +bnf :: P BNF +bnf = liftA2 (:) rule bnf <|> pure [] + +-- Interpreting a BNF, producing a parse tree, i.e. noting which +-- non-terminal fired + +interp :: BNF -> P String +interp bnf = parsers M.! start + where + start :: Ident + start = fst (head bnf) + + parsers :: M.Map Ident (P String) + parsers = M.fromList [ (i, parseRule i rhs) | (i, rhs) <- bnf ] + + parseRule :: Ident -> RuleRhs -> P String + parseRule ident rhs = trace <$> asum (map parseSeq rhs) + where trace s = ident ++ "(" ++ s ++ ")" + + parseSeq :: Seq -> P String + parseSeq = fmap concat . traverse parseAtom + + parseAtom :: Atom -> P String + parseAtom (Lit s) = traverse tok s + parseAtom (NonTerm i) = parsers M.! i + +-- An example + +numExp :: String +numExp = unlines + [ "term := sum;" + , "pdigit := '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';" + , "digit := '0' | pdigit;" + , "pnum := pdigit | pnum digit;" + , "num := '0' | pnum;" + , "prod := atom | atom '*' prod;" + , "sum := prod | prod '+' sum;" + , "atom := num | '(' term ')';" + ] diff --git a/RParser.hs b/RParser.hs index 5e0ec6d..0b6317d 100644 --- a/RParser.hs +++ b/RParser.hs @@ -4,6 +4,7 @@ import Data.Unique import Data.Typeable import System.IO.Unsafe import Control.Applicative +import Data.Maybe import qualified Parser as P @@ -14,8 +15,12 @@ withMemo p = unsafePerformIO $ do u <- newUnique pure $ MkP $ P.memoise u p -parse :: Parser tok a -> [tok] -> [a] -parse (MkP p) = P.parse p +parses :: Parser tok a -> [tok] -> [a] +parses (MkP p) = P.parse p + +parse :: Parser tok a -> [tok] -> Maybe a +parse p = listToMaybe . parses p + sat' :: Typeable a => (tok -> Maybe a) -> Parser tok a sat' p = MkP (P.sat' p) diff --git a/RParser2.hs b/RParser2.hs new file mode 100644 index 0000000..ddd89e6 --- /dev/null +++ b/RParser2.hs @@ -0,0 +1,93 @@ +module RParser2 (Parser, parse, parses, sat', sat, token, tok) where + +import Data.Unique +import Data.Typeable +import System.IO.Unsafe +import Control.Applicative +import Data.Maybe + +import qualified Data.Recursive.Set as RS +import qualified Data.Set as S + +import qualified Parser as P + +import Debug.Trace + +data Parser tok a = MkP + { unP :: P.Parser Unique tok a + , refs :: RS.RSet Unique } + +lift0 :: P.Parser Unique tok a -> Parser tok a +lift0 p = MkP p RS.empty + +mkMemoP r p = withUnique $ \u -> MkP + (if u `S.member` RS.get r + then -- traceShow (hashUnique u) $ + P.memoise u p + else p) + (RS.insert u r) + +lift1 :: (P.Parser Unique tok a -> P.Parser Unique tok b) + -> (Parser tok a -> Parser tok b) +lift1 pf ~(MkP p1 r1) = + mkMemoP r1 (pf p1) + +lift2 :: (P.Parser Unique tok a -> P.Parser Unique tok b -> P.Parser Unique tok c) + -> (Parser tok a -> Parser tok b -> Parser tok c) +lift2 pf ~(MkP p1 r1) ~(MkP p2 r2) = + mkMemoP (r1 `RS.union` r2) (pf p1 p2) + +withUnique :: (Unique -> a) -> a +withUnique f = unsafePerformIO $ f <$> newUnique + +parses :: Parser tok a -> [tok] -> [a] +parses (MkP p _) = P.parse p + +parse :: Parser tok a -> [tok] -> Maybe a +parse p = listToMaybe . parses p + +sat' :: Typeable a => (tok -> Maybe a) -> Parser tok a +sat' p = lift0 (P.sat' p) + +sat :: Typeable tok => (tok -> Bool) -> Parser tok tok +sat p = lift0 (P.sat p) + +token :: Typeable tok => Parser tok tok +token = lift0 P.token + +tok :: Eq tok => tok -> Parser tok tok +tok t = lift0 (P.tok t) + +instance Functor (Parser tok) where + fmap f = lift1 (fmap f) + +instance Applicative (Parser tok) where + pure x = lift0 (pure x) + (<*>) = lift2 (<*>) + +{- +instance Monad (Parser tok) where + return = pure + p1 >>= f = withMemo $ unP p1 >>= unP . f +-} + +instance Alternative (Parser tok) where + empty = lift0 empty + (<|>) = lift2 (<|>) + +-- The large example from https://okmij.org/ftp/Haskell/LeftRecursion.hs + +(>>>) :: Monoid a => Parser tok a -> Parser tok a -> Parser tok a +p1 >>> p2 = liftA2 (<>) p1 p2 + +char :: Char -> Parser Char String +char a = pure <$> tok a + +s,a,b,c :: Parser Char String +s = s >>> a >>> c <|> c +a = b <|> char 'a' >>> c >>> char 'a' +b = id <$> b +c = char 'b' <|> c >>> a + +-- ghci> parse s "babababa" +-- ["babababa"]