From 25047d10e2805a8bad3fe7d304a60f3ef705639a Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Fri, 24 Jan 2025 15:32:00 -0500 Subject: [PATCH 1/3] Rework data pattern matching to use default cases Previously, we were always expanding data type matching to explicitly match against every case. This was necessary because we would eagerly dump the entire data contents to the stack and analyze the tag from there. Different constructors with different numbrs of arguments would result in a different stack layout, so default cases could not be uniform. However, a while ago we started analyzing data type tags directly, to avoid putting them on the stack. This change goes the final step. It selects the branch before deciding to dump the data type to the stack, and only does so in specific case branches. Nothing is added to the stack in default branches, so they can be shared among all constructors. On the other end, the pattern matching compiler has been changed to preserve default cases when possible. When splitting on a data type, the actual constructors matched in the source are used as the explicit branches, and a default case is used if it exists in the source. The translation will still sometimes duplicate branches, but not nearly as much as before (and mainly due to complicated pattern matching). --- unison-runtime/src/Unison/Runtime/Machine.hs | 98 +++++++++++++------- unison-runtime/src/Unison/Runtime/Pattern.hs | 70 ++++++++++++-- unison-runtime/src/Unison/Runtime/Stack.hs | 11 ++- 3 files changed, 137 insertions(+), 42 deletions(-) diff --git a/unison-runtime/src/Unison/Runtime/Machine.hs b/unison-runtime/src/Unison/Runtime/Machine.hs index 75a13ad384..cd0d835a4c 100644 --- a/unison-runtime/src/Unison/Runtime/Machine.hs +++ b/unison-runtime/src/Unison/Runtime/Machine.hs @@ -683,14 +683,13 @@ eval env !denv !activeThreads !stk !k r (Match i br) = do n <- peekOffN stk i eval env denv activeThreads stk k r $ selectBranch n br eval env !denv !activeThreads !stk !k r (DMatch mr i br) = do - (t, stk) <- dumpDataNoTag mr stk =<< peekOff stk i - eval env denv activeThreads stk k r $ - selectBranch (maskTags t) br + (nx, stk) <- dataBranch mr stk br =<< peekOff stk i + eval env denv activeThreads stk k r nx eval env !denv !activeThreads !stk !k r (NMatch _mr i br) = do n <- peekOffN stk i eval env denv activeThreads stk k r $ selectBranch n br eval env !denv !activeThreads !stk !k r (RMatch i pu br) = do - (t, stk) <- dumpDataNoTag Nothing stk =<< peekOff stk i + (t, stk) <- dumpDataValNoTag stk =<< peekOff stk i if t == TT.pureEffectTag then eval env denv activeThreads stk k r pu else case ANF.unpackTags t of @@ -1000,46 +999,41 @@ buildData !stk !r !t (VArgV i) = do l = fsize stk - i {-# INLINE buildData #-} +dumpDataValNoTag :: + Stack -> + Val -> + IO (PackedTag, Stack) +dumpDataValNoTag stk (BoxedVal c) = + (closureTag c,) <$> dumpDataNoTag Nothing stk c +dumpDataValNoTag _ v = + die $ "dumpDataValNoTag: unboxed val: " ++ show v +{-# inline dumpDataValNoTag #-} + -- Dumps a data type closure to the stack without writing its tag. -- Instead, the tag is returned for direct case analysis. dumpDataNoTag :: Maybe Reference -> Stack -> - Val -> - IO (PackedTag, Stack) + Closure -> + IO Stack dumpDataNoTag !mr !stk = \case -- Normally we want to avoid dumping unboxed values since it's unnecessary, but sometimes we don't know the type of -- the incoming value and end up dumping unboxed values, so we just push them back to the stack as-is. e.g. in type-casts/coercions - val@(UnboxedVal _ t) -> do + Enum _ _ -> pure stk + Data1 _ _ x -> do stk <- bump stk - poke stk val - pure (unboxedPackedTag t, stk) - BoxedVal clos -> case clos of - (Enum _ t) -> pure (t, stk) - (Data1 _ t x) -> do - stk <- bump stk - poke stk x - pure (t, stk) - (Data2 _ t x y) -> do - stk <- bumpn stk 2 - pokeOff stk 1 y - poke stk x - pure (t, stk) - (DataG _ t seg) -> do - stk <- dumpSeg stk seg S - pure (t, stk) - clo -> - die $ - "dumpDataNoTag: bad closure: " - ++ show clo - ++ maybe "" (\r -> "\nexpected type: " ++ show r) mr - where - unboxedPackedTag :: UnboxedTypeTag -> PackedTag - unboxedPackedTag = \case - CharTag -> TT.charTag - FloatTag -> TT.floatTag - IntTag -> TT.intTag - NatTag -> TT.natTag + poke stk x + pure stk + Data2 _ _ x y -> do + stk <- bumpn stk 2 + pokeOff stk 1 y + stk <$ poke stk x + DataG _ _ seg -> dumpSeg stk seg S + clo -> + die $ + "dumpDataNoTag: bad closure: " + ++ show clo + ++ maybe "" (\r -> "\nexpected type: " ++ show r) mr {-# INLINE dumpDataNoTag #-} -- Note: although the representation allows it, it is impossible @@ -1995,6 +1989,40 @@ selectBranch t (TestW df cs) = lookupWithDefault df t cs selectBranch _ (TestT {}) = error "impossible" {-# INLINE selectBranch #-} +selectDataBranch :: Tag -> MBranch -> Either MSection MSection +selectDataBranch t (Test1 u cu df) + | t == u = Left cu + | otherwise = Right df +selectDataBranch t (Test2 u cu v cv df) + | t == u = Left cu + | t == v = Left cv + | otherwise = Right df +selectDataBranch t (TestW df bs) + | Just ca <- EC.lookup t bs = Left ca + | otherwise = Right df +selectDataBranch _ _ = + throw $ Panic "selectDataBranch: bad branches" Nothing +{-# inline selectDataBranch #-} + +-- Combined branch selection and field dumping function for data types. +-- Fields should only be dumped on _matches_, not default cases, because +-- default cases potentially cover many constructors which could result +-- in a variable number of values being put on the stack. Default cases +-- uniformly expect _no_ values to be added to the stack. +dataBranch + :: Maybe Reference -> Stack -> MBranch -> Val -> IO (MSection, Stack) +dataBranch mrf stk br (BoxedVal c) = case selectDataBranch t br of + Left ca -> (ca,) <$> dumpDataNoTag mrf stk c + Right df -> pure (df, stk) + where + t = maskTags $ closureTag c +dataBranch mrf _ _ v = + die $ + "dataBranch: unboxed value: " + ++ show v + ++ maybe "" (\r -> "\nexpected type: " ++ show r) mrf +{-# inline dataBranch #-} + -- Splits off a portion of the continuation up to a given prompt. -- -- The main procedure walks along the 'code' stack `k`, keeping track of how diff --git a/unison-runtime/src/Unison/Runtime/Pattern.hs b/unison-runtime/src/Unison/Runtime/Pattern.hs index 610a456d3a..899f2686c7 100644 --- a/unison-runtime/src/Unison/Runtime/Pattern.hs +++ b/unison-runtime/src/Unison/Runtime/Pattern.hs @@ -13,7 +13,7 @@ module Unison.Runtime.Pattern where import Control.Monad.State (State, evalState, modify, runState, state) -import Data.List (transpose) +import Data.List (nub, transpose) import Data.Map.Strict ( fromListWith, insertWith, @@ -92,6 +92,11 @@ builtinDataSpec = Map.fromList decls | (_, x, y) <- builtinEffectDecls ] +findPattern :: Eq v => v -> PatternRow v -> Maybe (Pattern v) +findPattern v (PR ms _ _) + | (_, p : _) <- break ((== v) . loc) ms = Just p + | otherwise = Nothing + -- A pattern compilation matrix is just a list of rows. There is -- no need for the rows to have uniform length; the variable -- annotations on the patterns in the rows keep track of what @@ -125,8 +130,11 @@ refutable (P.Unbound _) = False refutable (P.Var _) = False refutable _ = True -rowIrrefutable :: PatternRow v -> Bool -rowIrrefutable (PR ps _ _) = null ps +noMatches :: PatternRow v -> Bool +noMatches (PR ps _ _) = null ps + +rowRefutable :: PatternRow v -> Bool +rowRefutable (PR ps g _) = isJust g || not (null ps) firstRow :: ([P.Pattern v] -> Maybe v) -> Heuristic v firstRow f (PM (r : _)) = f $ matches r @@ -481,6 +489,19 @@ splitMatrix v rf cons (PM rs) = where mmap = fmap (\(t, fs) -> (t, splitRow v rf t fs =<< rs)) cons +-- Eliminates a variable from a matrix, keeping the rows that are +-- _not_ specific matches on that variable (so, would potentially +-- occur in a default case). +antiSplitMatrix :: + (Var v) => + v -> + PatternMatrix v -> + PatternMatrix v +antiSplitMatrix v (PM rs) = PM (f =<< rs) + where + -- keep rows that do not have a refutable pattern for v + f r = [ r | isNothing $ findPattern v r ] + -- Monad for pattern preparation. It is a state monad carrying a fresh -- variable source, the list of variables bound the pattern being -- prepared, and a variable renaming mapping. @@ -596,7 +617,7 @@ compile _ _ (PM []) = apps' bu [text () "pattern match failure"] where bu = ref () (Builtin "bug") compile spec ctx m@(PM (r : rs)) - | rowIrrefutable r = + | noMatches r = case guard r of Nothing -> body r Just g -> iff mempty g (body r) $ compile spec ctx (PM rs) @@ -614,8 +635,11 @@ compile spec ctx m@(PM (r : rs)) case lookupData rf spec of Right cons -> match () (var () v) $ - buildCase spec rf False cons ctx - <$> splitMatrix v (Just rf) (numberCons cons) m + (buildCase spec rf False cons ctx + <$> splitMatrix v (Just rf) ncons m) + ++ buildDefaultCase spec False needDefault ctx dm + where + needDefault = length ncons < length cons Left err -> internalBug err | PReq rfs <- ty = match () (var () v) $ @@ -631,7 +655,29 @@ compile spec ctx m@(PM (r : rs)) internalBug "unknown pattern compilation type" where v = choose heuristics m + ncons = relevantConstructors m v ty = Map.findWithDefault Unknown v ctx + dm = antiSplitMatrix v m + +-- Calculates the data constructors—with their arities—that should be +-- matched on when splitting a matrix on a given variable. This +-- includes +relevantConstructors :: Eq v => PatternMatrix v -> v -> [(Int, Int)] +relevantConstructors (PM rows) v = search [] rows + where + search acc (row : rows) + | rowRefutable row = case findPattern v row of + Just (P.Constructor _ (ConstructorReference _ t) sps) -> + search ((fromIntegral t, length sps) : acc) rows + Just (P.Boolean _ b) -> + search ((if b then 1 else 0, 0) : acc) rows + Just p -> + internalBug $ "unexpected data pattern: " ++ show p + -- if the pattern is not found, it must have been irrefutable, + -- so contributes no relevant constructor. + _ -> search acc rows + -- irrefutable row, or no rows left + search acc _ = nub $ reverse acc buildCaseBuiltin :: (Var v) => @@ -677,6 +723,18 @@ buildCase spec r eff cons ctx0 (t, vts, m) = vs = ((),) . fst <$> vts ctx = Map.fromList vts <> ctx0 +buildDefaultCase :: + (Var v) => + DataSpec -> + Bool -> + Bool -> + Ctx v -> + PatternMatrix v -> + [MatchCase () (Term v)] +buildDefaultCase spec _eff needed ctx pm + | needed = [MatchCase (Unbound ()) Nothing $ compile spec ctx pm] + | otherwise = [] + mkRow :: (Var v) => v -> diff --git a/unison-runtime/src/Unison/Runtime/Stack.hs b/unison-runtime/src/Unison/Runtime/Stack.hs index 164a4591f3..1b16004fae 100644 --- a/unison-runtime/src/Unison/Runtime/Stack.hs +++ b/unison-runtime/src/Unison/Runtime/Stack.hs @@ -22,6 +22,7 @@ module Unison.Runtime.Stack BlackHole, UnboxedTypeTag ), + closureTag, UnboxedTypeTag (..), unboxedTypeTagToInt, unboxedTypeTagFromInt, @@ -153,7 +154,7 @@ module Unison.Runtime.Stack ) where -import Control.Exception (throwIO) +import Control.Exception (throw, throwIO) import Control.Monad.Primitive import Data.Char qualified as Char import Data.IORef (IORef) @@ -371,6 +372,14 @@ splitData = \case (DataG r t seg) -> Just (r, t, segToList seg) _ -> Nothing +closureTag :: Closure -> PackedTag +closureTag (Enum _ t) = t +closureTag (Data1 _ t _) = t +closureTag (Data2 _ t _ _) = t +closureTag (DataG _ t _) = t +closureTag c = + throw $ Panic "closureTag: unexpected closure" (Just $ BoxedVal c) + -- | Converts a list of integers representing an unboxed segment back into the -- appropriate segment. Segments are stored backwards in the runtime, so this -- reverses the list. From 83718f916bab11c26e4d77b8355187aade4e7567 Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Wed, 29 Jan 2025 15:37:55 -0500 Subject: [PATCH 2/3] Rewrite `dataBranch` to perform a bit better. --- unison-runtime/src/Unison/Runtime/Machine.hs | 108 ++++++++++++++----- unison-runtime/src/Unison/Runtime/Stack.hs | 1 + 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/unison-runtime/src/Unison/Runtime/Machine.hs b/unison-runtime/src/Unison/Runtime/Machine.hs index cd0d835a4c..8976269e17 100644 --- a/unison-runtime/src/Unison/Runtime/Machine.hs +++ b/unison-runtime/src/Unison/Runtime/Machine.hs @@ -683,7 +683,7 @@ eval env !denv !activeThreads !stk !k r (Match i br) = do n <- peekOffN stk i eval env denv activeThreads stk k r $ selectBranch n br eval env !denv !activeThreads !stk !k r (DMatch mr i br) = do - (nx, stk) <- dataBranch mr stk br =<< peekOff stk i + (nx, stk) <- dataBranch mr stk br =<< bpeekOff stk i eval env denv activeThreads stk k r nx eval env !denv !activeThreads !stk !k r (NMatch _mr i br) = do n <- peekOffN stk i @@ -1989,40 +1989,94 @@ selectBranch t (TestW df cs) = lookupWithDefault df t cs selectBranch _ (TestT {}) = error "impossible" {-# INLINE selectBranch #-} -selectDataBranch :: Tag -> MBranch -> Either MSection MSection -selectDataBranch t (Test1 u cu df) - | t == u = Left cu - | otherwise = Right df -selectDataBranch t (Test2 u cu v cv df) - | t == u = Left cu - | t == v = Left cv - | otherwise = Right df -selectDataBranch t (TestW df bs) - | Just ca <- EC.lookup t bs = Left ca - | otherwise = Right df -selectDataBranch _ _ = - throw $ Panic "selectDataBranch: bad branches" Nothing -{-# inline selectDataBranch #-} - -- Combined branch selection and field dumping function for data types. -- Fields should only be dumped on _matches_, not default cases, because -- default cases potentially cover many constructors which could result -- in a variable number of values being put on the stack. Default cases -- uniformly expect _no_ values to be added to the stack. dataBranch - :: Maybe Reference -> Stack -> MBranch -> Val -> IO (MSection, Stack) -dataBranch mrf stk br (BoxedVal c) = case selectDataBranch t br of - Left ca -> (ca,) <$> dumpDataNoTag mrf stk c - Right df -> pure (df, stk) - where - t = maskTags $ closureTag c -dataBranch mrf _ _ v = - die $ - "dataBranch: unboxed value: " - ++ show v - ++ maybe "" (\r -> "\nexpected type: " ++ show r) mrf + :: Maybe Reference -> Stack -> MBranch -> Closure -> IO (MSection, Stack) +dataBranch mrf stk (Test1 u cu df) = \case + Enum _ t + | maskTags t == u -> pure (cu, stk) + | otherwise -> pure (df, stk) + Data1 _ t x + | maskTags t == u -> do + stk <- bump stk + (cu, stk) <$ poke stk x + | otherwise -> pure (df, stk) + Data2 _ t x y + | maskTags t == u -> do + stk <- bumpn stk 2 + pokeOff stk 1 y + (cu, stk) <$ poke stk x + | otherwise -> pure (df, stk) + DataG _ t seg + | maskTags t == u -> (cu,) <$> dumpSeg stk seg S + | otherwise -> pure (df, stk) + clo -> dataBranchClosureError mrf clo +dataBranch mrf stk (Test2 u cu v cv df) = \case + Enum _ t + | maskTags t == u -> pure (cu, stk) + | maskTags t == v -> pure (cv, stk) + | otherwise -> pure (df, stk) + Data1 _ t x + | maskTags t == u -> do + stk <- bump stk + (cu, stk) <$ poke stk x + | maskTags t == v -> do + stk <- bump stk + (cv, stk) <$ poke stk x + | otherwise -> pure (df, stk) + Data2 _ t x y + | maskTags t == u -> do + stk <- bumpn stk 2 + pokeOff stk 1 y + (cu, stk) <$ poke stk x + | maskTags t == v -> do + stk <- bumpn stk 2 + pokeOff stk 1 y + (cv, stk) <$ poke stk x + | otherwise -> pure (df, stk) + DataG _ t seg + | maskTags t == u -> (cu,) <$> dumpSeg stk seg S + | maskTags t == v -> (cv,) <$> dumpSeg stk seg S + | otherwise -> pure (df, stk) + clo -> dataBranchClosureError mrf clo +dataBranch mrf stk (TestW df bs) = \case + Enum _ t + | Just ca <- EC.lookup (maskTags t) bs -> pure (ca, stk) + | otherwise -> pure (df, stk) + Data1 _ t x + | Just ca <- EC.lookup (maskTags t) bs -> do + stk <- bump stk + (ca, stk) <$ poke stk x + | otherwise -> pure (df, stk) + Data2 _ t x y + | Just ca <- EC.lookup (maskTags t) bs -> do + stk <- bumpn stk 2 + pokeOff stk 1 y + (ca, stk) <$ poke stk x + | otherwise -> pure (df, stk) + DataG _ t seg + | Just ca <- EC.lookup (maskTags t) bs -> + (ca,) <$> dumpSeg stk seg S + | otherwise -> pure (df, stk) + clo -> dataBranchClosureError mrf clo +dataBranch _ _ br = \_ -> + dataBranchBranchError br {-# inline dataBranch #-} +dataBranchClosureError :: Maybe Reference -> Closure -> IO a +dataBranchClosureError mrf clo = + die $ "dataBranch: bad closure: " + ++ show clo + ++ maybe "" (\ r -> "\nexpected type: " ++ show r) mrf + +dataBranchBranchError :: MBranch -> IO a +dataBranchBranchError br = + die $ "dataBranch: unexpected branch: " ++ show br + -- Splits off a portion of the continuation up to a given prompt. -- -- The main procedure walks along the 'code' stack `k`, keeping track of how diff --git a/unison-runtime/src/Unison/Runtime/Stack.hs b/unison-runtime/src/Unison/Runtime/Stack.hs index 1b16004fae..730f5351f0 100644 --- a/unison-runtime/src/Unison/Runtime/Stack.hs +++ b/unison-runtime/src/Unison/Runtime/Stack.hs @@ -379,6 +379,7 @@ closureTag (Data2 _ t _ _) = t closureTag (DataG _ t _) = t closureTag c = throw $ Panic "closureTag: unexpected closure" (Just $ BoxedVal c) +{-# inline closureTag #-} -- | Converts a list of integers representing an unboxed segment back into the -- appropriate segment. Segments are stored backwards in the runtime, so this From cd8171312b2df34dd8aef7bdd7d8b6c0cf74cf76 Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Wed, 29 Jan 2025 15:55:32 -0500 Subject: [PATCH 3/3] Use `nubOrd` instead of `nub` --- unison-runtime/src/Unison/Runtime/Pattern.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unison-runtime/src/Unison/Runtime/Pattern.hs b/unison-runtime/src/Unison/Runtime/Pattern.hs index 899f2686c7..b783e9bf50 100644 --- a/unison-runtime/src/Unison/Runtime/Pattern.hs +++ b/unison-runtime/src/Unison/Runtime/Pattern.hs @@ -13,7 +13,8 @@ module Unison.Runtime.Pattern where import Control.Monad.State (State, evalState, modify, runState, state) -import Data.List (nub, transpose) +import Data.Containers.ListUtils (nubOrd) +import Data.List (transpose) import Data.Map.Strict ( fromListWith, insertWith, @@ -662,7 +663,7 @@ compile spec ctx m@(PM (r : rs)) -- Calculates the data constructors—with their arities—that should be -- matched on when splitting a matrix on a given variable. This -- includes -relevantConstructors :: Eq v => PatternMatrix v -> v -> [(Int, Int)] +relevantConstructors :: Ord v => PatternMatrix v -> v -> [(Int, Int)] relevantConstructors (PM rows) v = search [] rows where search acc (row : rows) @@ -677,7 +678,7 @@ relevantConstructors (PM rows) v = search [] rows -- so contributes no relevant constructor. _ -> search acc rows -- irrefutable row, or no rows left - search acc _ = nub $ reverse acc + search acc _ = nubOrd $ reverse acc buildCaseBuiltin :: (Var v) =>