From 4215d4e2e6c53bdb86846a8a5672821ae36d04be Mon Sep 17 00:00:00 2001 From: "Anton Vl. Kalinin" Date: Thu, 4 Aug 2022 14:41:03 +0300 Subject: [PATCH] [ haskell, #423 ]: structured errors in the Haskell backend A new option "--errors" is introduced, which can change the parser failure type from 'String' to a record type. --- source/src/BNFC/Backend/Haskell.hs | 4 +- source/src/BNFC/Backend/Haskell/CFtoAlex3.hs | 7 +- source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 121 +++++++++++++++---- source/src/BNFC/Backend/Haskell/MkErrM.hs | 11 +- source/src/BNFC/Backend/HaskellGADT.hs | 4 +- source/src/BNFC/Options.hs | 24 +++- 6 files changed, 140 insertions(+), 31 deletions(-) diff --git a/source/src/BNFC/Backend/Haskell.hs b/source/src/BNFC/Backend/Haskell.hs index 93d3600f..2c0f2d95 100644 --- a/source/src/BNFC/Backend/Haskell.hs +++ b/source/src/BNFC/Backend/Haskell.hs @@ -68,12 +68,12 @@ makeHaskell opts cf = do -- Generate Happy parser and matching test program. do mkfile (happyFile opts) commentWithEmacsModeHint $ - cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) cf + cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) (errorType opts) cf -- liftIO $ printf "%s Tested with Happy 1.15\n" (happyFile opts) mkfile (tFile opts) comment $ testfile opts cf -- Both Happy parser and skeleton (template) rely on Err. - mkfile (errFile opts) comment $ mkErrM errMod + mapM_ (mkfile (errFile opts) comment) $ mkErrM errMod (errorType opts) mkfile (templateFile opts) comment $ cf2Template (templateFileM opts) absMod (functor opts) cf -- Generate txt2tags documentation. diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs index 65d0dbf5..019ddf7b 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs @@ -268,8 +268,11 @@ restOfAlex tokenText cf = concat , "-- A modified \"posn\" wrapper." , "-------------------------------------------------------------------" , "" - , "data Posn = Pn !Int !Int !Int" - , " deriving (Eq, Show, Ord)" + , "data Posn = Pn" + , " { pnAbsolute :: !Int" + , " , pnLine :: !Int" + , " , pnColumn :: !Int" + , " } deriving (Eq, Show, Ord)" , "" , "alexStartPos :: Posn" , "alexStartPos = Pn 0 1 1" diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index f21f9aba..558e90fe 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -17,7 +17,7 @@ import Data.List (intersperse) import BNFC.CF import BNFC.Backend.Common.StrUtils (escapeChars) import BNFC.Backend.Haskell.Utils -import BNFC.Options (HappyMode(..), TokenText(..)) +import BNFC.Options (HappyMode(..), TokenText(..), ErrorType(..)) import BNFC.PrettyPrint import BNFC.Utils @@ -42,9 +42,10 @@ cf2Happy -> HappyMode -- ^ Happy mode. -> TokenText -- ^ Use @ByteString@ or @Text@? -> Bool -- ^ AST is a functor? + -> ErrorType -- ^ The error type in the parser result type. -> CF -- ^ Grammar. -> String -- ^ Generated code. -cf2Happy name absName lexName mode tokenText functor cf = unlines +cf2Happy name absName lexName mode tokenText functor errorType cf = unlines [ header name absName lexName tokenText eps , render $ declarations mode functor eps , render $ tokens cf functor @@ -52,7 +53,7 @@ cf2Happy name absName lexName mode tokenText functor cf = unlines , specialRules absName functor tokenText cf , render $ prRules absName functor (rulesForHappy absName functor cf) , "" - , footer absName tokenText functor eps cf + , footer absName tokenText functor eps errorType cf ] where eps = toList $ allEntryPoints cf @@ -66,7 +67,13 @@ header modName absName lexName tokenText eps = unlines $ concat , "{-# LANGUAGE PatternSynonyms #-}" , "" , "module " ++ modName - , " ( happyError" + , " ( Failure(..)" + , " , InvalidTokenFailure(..)" + , " , UnexpectedTokenFailure(..)" + , " , UnexpectedEofFailure(..)" + -- TODO: maybe we should stop exporting happyError, since there is no reason + -- to use it outside and its type can vary? + , " , happyError" , " , myLexer" ] , map ((" , " ++) . render . parserName) eps @@ -91,6 +98,8 @@ header modName absName lexName tokenText eps = unlines $ concat -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} +-- %errorhandlertype explist +-- %error { happyError } -- -- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")] -- %name pA_internal A @@ -99,14 +108,18 @@ header modName absName lexName tokenText eps = unlines $ concat -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} +-- %errorhandlertype explist +-- %error { happyError } declarations :: HappyMode -> Bool -> [Cat] -> Doc declarations mode functor ns = vcat [ vcat $ map generateP ns , case mode of Standard -> "-- no lexer declaration" - GLR -> "%lexer { myLexer } { Err _ }", - "%monad { Err } { (>>=) } { return }", - "%tokentype" <+> braces (text tokenName) + GLR -> "%lexer { myLexer } { Err _ }" + , "%monad { Err } { (>>=) } { return }" + , "%tokentype" <+> braces (text tokenName) + , "%errorhandlertype explist" + , "%error { happyError }" ] where generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n) @@ -255,24 +268,88 @@ prRules absM functor = vsep . map prOne -- Finally, some haskell code. -footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String -footer absName tokenText functor eps _cf = unlines $ concat +footer :: ModuleName -> TokenText -> Bool -> [Cat] -> ErrorType -> CF -> String +footer absName tokenText functor eps errorType _cf = unlines $ concat [ [ "{" , "" - , "type Err = Either String" + , "-- | The parser failure type." + , "--" + , "-- It can contain fields of more specific failure record types, so that they" + , "-- could easily be extended with new fields." + , "data Failure" + , " = FailureInvalidToken !InvalidTokenFailure" + , " | FailureUnexpectedToken !UnexpectedTokenFailure" + , " | FailureUnexpectedEof !UnexpectedEofFailure" + , " deriving (Show, Eq)" , "" - , "happyError :: [" ++ tokenName ++ "] -> Err a" - , "happyError ts = Left $" - , " \"syntax error at \" ++ tokenPos ts ++ " - , " case ts of" - , " [] -> []" - , " [Err _] -> \" due to lexer error\"" - , unwords - [ " t:_ -> \" before `\" ++" - , "(prToken t)" - -- , tokenTextUnpack tokenText "(prToken t)" - , "++ \"'\"" - ] + , "-- | The lexer error type." + , "newtype InvalidTokenFailure = InvalidTokenFailure" + , " { itfPosn :: Posn -- ^ The position of the beginning of an invalid token." + , " } deriving (Show, Eq)" + , "" + , "-- | The parser error: no production is found to match a token." + , "data UnexpectedTokenFailure = UnexpectedTokenFailure" + , " { utfPosn :: !Posn -- ^ The position of the beginning of the unexpected token." + , " , utfTokenText :: !(" ++ tokenTextType tokenText ++ ")" + , " , utfExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar." + , " } deriving (Show, Eq)" + , "" + , "-- | The parser error: the end of file is encountered but a token is expected." + , "newtype UnexpectedEofFailure = UnexpectedEofFailure" + , " { ueofExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar." + , " } deriving (Show, Eq)" + , "" + ] + , case errorType of + ErrorTypeStructured -> + [ "type Err = Either Failure" + , "" + , "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a" + , "happyError = Left . uncurry mkFailure" + ] + ErrorTypeString -> + [ "type Err = Either String" + , "" + , "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a" + , "happyError = Left . failureToString . uncurry mkFailure" + , "" + , "failureToString :: Failure -> String" + , "failureToString f =" + , " \"syntax error at \" ++ pos ++ " + , " case f of" + , " FailureUnexpectedEof _ -> []" + , " FailureInvalidToken _ -> \" due to lexer error\"" + , unwords + [ " FailureUnexpectedToken ut -> \" before `\" ++" + , tokenTextUnpack tokenText "(utfTokenText ut)" + , "++ \"'\"" + ] + , " where" + , " pos = case f of" + , " FailureInvalidToken it -> printPosn (itfPosn it)" + , " FailureUnexpectedToken ut -> printPosn (utfPosn ut)" + , " FailureUnexpectedEof _ -> \"end of file\"" + ] + , [ "" + , "mkFailure :: [" ++ tokenName ++ "] -> [String] -> Failure" + , "mkFailure ts expectedTokens = case ts of" + , " [] ->" + , " FailureUnexpectedEof" + , " UnexpectedEofFailure" + , " { ueofExpectedTokens = expectedTokens" + , " }" + , " [Err pos] ->" + , " FailureInvalidToken" + , " InvalidTokenFailure" + , " { itfPosn = pos" + , " }" + , " t : _ ->" + , " FailureUnexpectedToken" + , " UnexpectedTokenFailure" + , " { utfPosn = tokenPosn t" + , " , utfTokenText = tokenText t" + , " , utfExpectedTokens = expectedTokens" + , " }" , "" , "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]" , "myLexer = tokens" diff --git a/source/src/BNFC/Backend/Haskell/MkErrM.hs b/source/src/BNFC/Backend/Haskell/MkErrM.hs index 6c452577..b9f099fe 100644 --- a/source/src/BNFC/Backend/Haskell/MkErrM.hs +++ b/source/src/BNFC/Backend/Haskell/MkErrM.hs @@ -11,9 +11,16 @@ module BNFC.Backend.Haskell.MkErrM where import BNFC.PrettyPrint +import BNFC.Options (ErrorType(..)) -mkErrM :: String -> Doc -mkErrM errMod = vcat +-- | Creates @ErrM.hs@ file if needed. +-- +-- It returns 'Nothing' if there is no need to create it. +mkErrM :: String -> ErrorType -> Maybe Doc +mkErrM _ ErrorTypeStructured = Nothing + -- ErrM.hs is only for backward compatibility with old code using string + -- errors, so that we don't create it in case of structured errors. +mkErrM errMod ErrorTypeString = Just $ vcat [ "{-# LANGUAGE CPP #-}" , "" , "#if __GLASGOW_HASKELL__ >= 708" diff --git a/source/src/BNFC/Backend/HaskellGADT.hs b/source/src/BNFC/Backend/HaskellGADT.hs index 40a65ce2..a0da0999 100644 --- a/source/src/BNFC/Backend/HaskellGADT.hs +++ b/source/src/BNFC/Backend/HaskellGADT.hs @@ -43,14 +43,14 @@ makeHaskellGadt opts cf = do mkHsFileHint (alexFile opts) $ cf2alex3 lexMod (tokenText opts) cf liftIO $ putStrLn " (Use Alex 3 to compile.)" mkHsFileHint (happyFile opts) $ - cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False cf + cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False (errorType opts) cf liftIO $ putStrLn " (Tested with Happy 1.15 - 1.20)" mkHsFile (templateFile opts) $ cf2Template (templateFileM opts) absMod cf mkHsFile (printerFile opts) $ cf2Printer StringToken False True prMod absMod cf when (hasLayout cf) $ mkHsFile (layoutFile opts) $ cf2Layout layMod lexMod cf mkHsFile (tFile opts) $ Haskell.testfile opts cf - mkHsFile (errFile opts) $ mkErrM errMod + mapM_ (mkHsFile (errFile opts)) $ mkErrM errMod (errorType opts) Makefile.mkMakefile opts $ Haskell.makefile opts cf case xml opts of 2 -> makeXML opts True cf diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index 95b0ebae..933ec017 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -13,7 +13,7 @@ module BNFC.Options , SharedOptions(..) , defaultOptions, isDefault, printOptions , AlexVersion(..), HappyMode(..), OCamlParser(..), JavaLexerParser(..) - , RecordPositions(..), TokenText(..) + , RecordPositions(..), TokenText(..), ErrorType(..) , InPackage , removedIn290 , translateOldOptions @@ -82,6 +82,12 @@ instance Show Target where show TargetPygments = "Pygments" show TargetCheck = "Check LBNF file" +-- | Which error type to use in the generated parser result? +data ErrorType + = ErrorTypeString -- ^ Errors are plain strings. + | ErrorTypeStructured -- ^ Errors are values of a record/structure type. + deriving (Show,Eq,Ord) + -- | Which version of Alex is targeted? data AlexVersion = Alex3 deriving (Show,Eq,Ord,Bounded,Enum) @@ -132,6 +138,7 @@ data SharedOptions = Options , glr :: HappyMode -- ^ Happy option @--glr@. , xml :: Int -- ^ Options @--xml@, generate DTD and XML printers. , agda :: Bool -- ^ Option @--agda@. Create bindings for Agda? + , errorType :: ErrorType -- ^ An error type to use in the parser result. --- OCaml specific , ocamlParser :: OCamlParser -- ^ Option @--menhir@ to switch to @Menhir@. --- Java specific @@ -165,6 +172,7 @@ defaultOptions = Options , glr = Standard , xml = 0 , agda = False + , errorType = ErrorTypeString -- OCaml specific , ocamlParser = OCamlYacc -- Java specific @@ -224,6 +232,9 @@ printOptions opts = unwords . concat $ , [ "--xml" | xml opts == 1 ] , [ "--xmlt" | xml opts == 2 ] , [ "--agda" | agda opts ] + , case errorType opts of + ErrorTypeString -> [] + ErrorTypeStructured -> [ "--errors=structured" ] -- C# options: , [ "--vs" | visualStudio opts ] , [ "--wfc" | wcf opts ] @@ -363,6 +374,9 @@ specificOptions = , ( Option [] ["generic"] (NoArg (\o -> pure o {generic = True})) "Derive Data, Generic, and Typeable instances for AST types" , haskellTargets ) + , ( Option [] ["errors"] (ReqArg parseAndSetErrorType "TYPE") + "Set the parser error type. Valid values are `string' (default) and `structured'" + , [TargetHaskell] ) , ( Option [] ["xml"] (NoArg (\o -> pure o {xml = 1})) "Also generate a DTD and an XML printer" , haskellTargets ) @@ -374,6 +388,14 @@ specificOptions = "Also generate Agda bindings for the abstract syntax" , [TargetHaskell] ) ] + where + parseAndSetErrorType arg o = (\t -> o {errorType = t}) <$> parseErrorType arg + + parseErrorType s = case s of + "string" -> pure ErrorTypeString + "structured" -> pure ErrorTypeStructured + _ -> Left $ "Wrong error type: " ++ show s + -- | The list of specific options for a target. specificOptions' :: Target -> [OptDescr (SharedOptions -> Either String SharedOptions)]