diff --git a/.travis.yml b/.travis.yml index 040ea2fc..b8048c6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,16 +30,16 @@ matrix: include: - compiler: "ghc-7.8.4" # env: TEST=--disable-tests BENCH=--disable-benchmarks - addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.0,ghc-7.8.4], sources: [hvr-ghc]}} + addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.4,ghc-7.8.4], sources: [hvr-ghc]}} - compiler: "ghc-7.10.3" # env: TEST=--disable-tests BENCH=--disable-benchmarks - addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.0,ghc-7.10.3], sources: [hvr-ghc]}} + addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.4,ghc-7.10.3], sources: [hvr-ghc]}} - compiler: "ghc-8.0.2" # env: TEST=--disable-tests BENCH=--disable-benchmarks - addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.0,ghc-8.0.2], sources: [hvr-ghc]}} + addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.4,ghc-8.0.2], sources: [hvr-ghc]}} - compiler: "ghc-8.2.2" # env: TEST=--disable-tests BENCH=--disable-benchmarks - addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.0,ghc-8.2.2], sources: [hvr-ghc]}} + addons: {apt: {packages: [ghc-ppa-tools,cabal-install-2.4,ghc-8.2.2], sources: [hvr-ghc]}} - compiler: "ghc-8.4.1" env: GHCHEAD=true addons: {apt: {packages: [ghc-ppa-tools,cabal-install-head,ghc-8.4.1], sources: [hvr-ghc]}} @@ -87,7 +87,7 @@ install: (cd "."; autoreconf -i); fi - rm -f cabal.project.freeze - - cabal new-build -w ${HC} ${TEST} ${BENCH} --project-file="cabal.project" --dep -j2 all + - cabal new-build -w ${HC} ${TEST} ${BENCH} --project-file="cabal.project" --dep -j2 all --reorder-goals --max-backjumps=-1 - cabal new-build -w ${HC} --disable-tests --disable-benchmarks --project-file="cabal.project" --dep -j2 all - rm -rf "."/.ghc.environment.* "."/dist - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Base.hs index 553d2b3a..3753f05a 100644 --- a/Data/HashMap/Base.hs +++ b/Data/HashMap/Base.hs @@ -23,6 +23,10 @@ module Data.HashMap.Base -- * Basic interface , null , size + , equal1 + , equal2 + , cmp1 + , cmp2 , member , lookup , lookupDefault @@ -76,12 +80,15 @@ module Data.HashMap.Base , fromList , fromListWith - -- Internals used by the strict version + -- Internals used by the strict version or tests , Hash + , Salt , Bitmap , bitmapIndexedOrFull , collision - , hash + , hashWithSalt + , defaultSalt + , nextSalt , mask , index , bitsPerSubkey @@ -97,7 +104,7 @@ module Data.HashMap.Base , filterMapAux , equalKeys , equalKeys1 - , lookupRecordCollision + , lookupWithRes , LookupRes(..) , insert' , delete' @@ -106,12 +113,17 @@ module Data.HashMap.Base , insertKeyExists , deleteKeyExists , insertModifying + , unconsA + , UnconsA(..) + , UnconsHM(..) + , unconsHM , ptrEq , adjust# + , isLeafOrCollision ) where #if __GLASGOW_HASKELL__ < 710 -import Control.Applicative ((<$>), Applicative(pure)) +import Control.Applicative ((<$>), (<*>), Applicative(pure)) import Data.Monoid (Monoid(mempty, mappend)) import Data.Traversable (Traversable(..)) import Data.Word (Word) @@ -149,7 +161,7 @@ import qualified Data.Hashable.Lifted as H #endif #if __GLASGOW_HASKELL__ >= 802 -import GHC.Exts (TYPE, Int (..), Int#) +import GHC.Exts (TYPE) #endif #if MIN_VERSION_base(4,8,0) @@ -157,16 +169,17 @@ import Data.Functor.Identity (Identity (..)) #endif import Control.Applicative (Const (..)) import Data.Coerce (coerce) +import Data.Function (on) -- | A set of values. A set cannot contain duplicate values. ------------------------------------------------------------------------ --- | Convenience function. Compute a hash value for the given value. -hash :: H.Hashable a => a -> Hash -hash = fromIntegral . H.hash +-- | Convenience function. Compute a hash value for the given value with a given salt. +hashWithSalt :: H.Hashable a => Salt -> a -> Hash +hashWithSalt s v = fromIntegral $ H.hashWithSalt s v data Leaf k v = L !k v - deriving (Eq) + deriving (Show, Eq) instance (NFData k, NFData v) => NFData (Leaf k v) where rnf (L k v) = rnf k `seq` rnf v @@ -181,17 +194,17 @@ data HashMap k v | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) | Leaf !Hash !(Leaf k v) | Full !(A.Array (HashMap k v)) - | Collision !Hash !(A.Array (Leaf k v)) + | Collision !Hash !(Leaf k v) !(Leaf k v) !(HashMap k v) deriving (Typeable) type role HashMap nominal representational instance (NFData k, NFData v) => NFData (HashMap k v) where - rnf Empty = () - rnf (BitmapIndexed _ ary) = rnf ary - rnf (Leaf _ l) = rnf l - rnf (Full ary) = rnf ary - rnf (Collision _ ary) = rnf ary + rnf Empty = () + rnf (BitmapIndexed _ ary) = rnf ary + rnf (Leaf _ l) = rnf l + rnf (Full ary) = rnf ary + rnf (Collision _ l1 l2 ary) = rnf l1 `seq` rnf l2 `seq` rnf ary instance Functor (HashMap k) where fmap = map @@ -231,6 +244,7 @@ hashMapDataType :: DataType hashMapDataType = mkDataType "Data.HashMap.Base.HashMap" [fromListConstr] type Hash = Word +type Salt = Int type Bitmap = Word type Shift = Int @@ -273,17 +287,17 @@ instance Traversable (HashMap k) where instance Eq2 HashMap where liftEq2 = equal2 -instance Eq k => Eq1 (HashMap k) where +instance (Eq k, Hashable k) => Eq1 (HashMap k) where liftEq = equal1 #endif -instance (Eq k, Eq v) => Eq (HashMap k v) where +instance (Eq k, Hashable k, Eq v) => Eq (HashMap k v) where (==) = equal1 (==) -- We rely on there being no Empty constructors in the tree! -- This ensures that two equal HashMaps will have the same -- shape, modulo the order of entries in Collisions. -equal1 :: Eq k +equal1 :: (Eq k, Hashable k) => (v -> v' -> Bool) -> HashMap k v -> HashMap k v' -> Bool equal1 eq = go @@ -293,8 +307,10 @@ equal1 eq = go = bm1 == bm2 && A.sameArray1 go ary1 ary2 go (Leaf h1 l1) (Leaf h2 l2) = h1 == h2 && leafEq l1 l2 go (Full ary1) (Full ary2) = A.sameArray1 go ary1 ary2 - go (Collision h1 ary1) (Collision h2 ary2) - = h1 == h2 && isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go t1@(Collision h1 _ _ _) t2@(Collision h2 _ _ _) = + h1 == h2 + && size t1 == size t2 + && foldrWithKey (\k v r -> maybe False (eq v) (lookup k t2) && r) True t1 go _ _ = False leafEq (L k1 v1) (L k2 v2) = k1 == k2 && eq v1 v2 @@ -307,14 +323,16 @@ equal2 eqk eqv t1 t2 = go (toList' t1 []) (toList' t2 []) -- 'Collision's read from left to right should be the same (modulo the -- order of elements in 'Collision'). - go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) - | k1 == k2 && + go (Leaf h1 l1 : tl1) (Leaf h2 l2 : tl2) + | h1 == h2 && leafEq l1 l2 = go tl1 tl2 - go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) - | k1 == k2 && - A.length ary1 == A.length ary2 && - isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go (Collision h1 (L k11 v11) (L k12 v12) hm1 : tl1) (Collision h2 (L k21 v21) (L k22 v22) hm2 : tl2) + | h1 == h2 && + (size hm1 == size hm2) && + isPermutationBy (\(k1, v1) (k2, v2) -> eqk k1 k2 && eqv v1 v2) + ((k11, v11) : (k12, v12) : toList hm1) + ((k21, v21) : (k22, v22) : toList hm2) = go tl1 tl2 go [] [] = True go _ _ = False @@ -323,10 +341,10 @@ equal2 eqk eqv t1 t2 = go (toList' t1 []) (toList' t2 []) #if MIN_VERSION_base(4,9,0) instance Ord2 HashMap where - liftCompare2 = cmp + liftCompare2 = cmp2 -instance Ord k => Ord1 (HashMap k) where - liftCompare = cmp compare +instance (Ord k, Hashable k) => Ord1 (HashMap k) where + liftCompare = cmp1 #endif -- | The order is total. @@ -334,28 +352,65 @@ instance Ord k => Ord1 (HashMap k) where -- /Note:/ Because the hash is not guaranteed to be stable across library -- versions, OSes, or architectures, neither is an actual order of elements in -- 'HashMap' or an result of `compare`.is stable. -instance (Ord k, Ord v) => Ord (HashMap k v) where - compare = cmp compare compare +instance (Ord k, Hashable k, Ord v) => Ord (HashMap k v) where + compare = cmp1 compare -cmp :: (k -> k' -> Ordering) -> (v -> v' -> Ordering) - -> HashMap k v -> HashMap k' v' -> Ordering -cmp cmpk cmpv t1 t2 = go (toList' t1 []) (toList' t2 []) +cmp1 :: (Ord k, Hashable k) => (v -> v' -> Ordering) + -> HashMap k v -> HashMap k v' -> Ordering +cmp1 cmpv t1 t2 = go (toList' t1 []) (toList' t2 []) where go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) = compare k1 k2 `mappend` leafCompare l1 l2 `mappend` go tl1 tl2 - go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) + go (Collision h1 (L k11 v11) (L k12 v12) hm1 : tl1) (Collision h2 (L k21 v21) (L k22 v22) hm2 : tl2) + = compare h1 h2 `mappend` + compare (size hm1) (size hm2) `mappend` + liftCompareList pairCompare + -- We don't use sortOn because fst is cheap. + (L.sortBy (compare `on` fst) $ + (k11, v11) : (k12, v12) : toList hm1) + (L.sortBy (compare `on` fst) $ + (k21, v21) : (k22, v22) : toList hm2) `mappend` + go tl1 tl2 + go (Leaf _ _ : _) (Collision _ _ _ _ : _) = LT + go (Collision _ _ _ _ : _) (Leaf _ _ : _) = GT + go [] [] = EQ + go [] _ = LT + go _ [] = GT + go _ _ = error "cmp2: Should never happen, toList' includes non Leaf / Collision" + + leafCompare (L k v) (L k' v') = compare k k' `mappend` cmpv v v' + pairCompare (k1, v1) (k2, v2) = (k1 `compare` k2) `mappend` (v1 `cmpv` v2) + +-- Our own copy of liftCompare for lists. +liftCompareList :: (a -> b -> Ordering) -> [a] -> [b] -> Ordering +liftCompareList _ [] [] = EQ +liftCompareList _ [] _ = LT +liftCompareList _ _ [] = GT +liftCompareList cmp (x : xs) (y : ys) = cmp x y `mappend` liftCompareList cmp xs ys + +cmp2 :: (k -> k' -> Ordering) -> (v -> v' -> Ordering) + -> HashMap k v -> HashMap k' v' -> Ordering +cmp2 cmpk cmpv t1 t2 = go (toList' t1 []) (toList' t2 []) + where + go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) = compare k1 k2 `mappend` - compare (A.length ary1) (A.length ary2) `mappend` - unorderedCompare leafCompare (A.toList ary1) (A.toList ary2) `mappend` + leafCompare l1 l2 `mappend` go tl1 tl2 - go (Leaf _ _ : _) (Collision _ _ : _) = LT - go (Collision _ _ : _) (Leaf _ _ : _) = GT + go (Collision h1 (L k11 v11) (L k12 v12) hm1 : tl1) (Collision h2 (L k21 v21) (L k22 v22) hm2 : tl2) + = compare h1 h2 `mappend` + compare (size hm1) (size hm2) `mappend` + unorderedCompare (\(k1, v1) (k2, v2) -> (k1 `cmpk` k2) `mappend` (v1 `cmpv` v2)) + ((k11, v11) : (k12, v12) : toList hm1) + ((k21, v21) : (k22, v22) : toList hm2) `mappend` + go tl1 tl2 + go (Leaf _ _ : _) (Collision _ _ _ _ : _) = LT + go (Collision _ _ _ _ : _) (Leaf _ _ : _) = GT go [] [] = EQ go [] _ = LT go _ [] = GT - go _ _ = error "cmp: Should never happend, toList' includes non Leaf / Collision" + go _ _ = error "cmp2: Should never happen, toList' includes non Leaf / Collision" leafCompare (L k v) (L k' v') = cmpk k k' `mappend` cmpv v v' @@ -366,9 +421,12 @@ equalKeys1 eq t1 t2 = go (toList' t1 []) (toList' t2 []) go (Leaf k1 l1 : tl1) (Leaf k2 l2 : tl2) | k1 == k2 && leafEq l1 l2 = go tl1 tl2 - go (Collision k1 ary1 : tl1) (Collision k2 ary2 : tl2) - | k1 == k2 && A.length ary1 == A.length ary2 && - isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go (Collision h1 (L k11 _) (L k12 _) hm1 : tl1) (Collision h2 (L k21 _) (L k22 _) hm2 : tl2) + | h1 == h2 && + (size hm1 == size hm2) && + isPermutationBy eq + (k11 : k12 : keys hm1) + (k21 : k22 : keys hm2) = go tl1 tl2 go [] [] = True go _ _ = False @@ -385,8 +443,10 @@ equalKeys = go = bm1 == bm2 && A.sameArray1 go ary1 ary2 go (Leaf h1 l1) (Leaf h2 l2) = h1 == h2 && leafEq l1 l2 go (Full ary1) (Full ary2) = A.sameArray1 go ary1 ary2 - go (Collision h1 ary1) (Collision h2 ary2) - = h1 == h2 && isPermutationBy leafEq (A.toList ary1) (A.toList ary2) + go t1@(Collision h1 _ _ _) t2@(Collision h2 _ _ _) + = h1 == h2 + && size t1 == size t2 + && L.foldr (\k r -> (k `elem` (keys t2)) && r) True (keys t1) go _ _ = False leafEq (L k1 _) (L k2 _) = k1 == k2 @@ -401,20 +461,18 @@ instance H.Hashable2 HashMap where = s `hashLeafWithSalt` l `go` tl -- For collisions we hashmix hash value -- and then array of values' hashes sorted - go s (Collision h a : tl) - = (s `H.hashWithSalt` h) `hashCollisionWithSalt` a `go` tl + go s (Collision _ l1 l2 hm' : tl) + = collisionHash l1 l2 hm' s `go` tl go s (_ : tl) = s `go` tl + collisionHash (L k1 v1) (L k2 v2) hm' s + = L.foldl' H.hashWithSalt s + $ L.sort . L.map (\(k, v) -> (s `hk` k) `hv` v) + $ (k1, v1) : (k2, v2) : toList hm' + -- hashLeafWithSalt :: Int -> Leaf k v -> Int hashLeafWithSalt s (L k v) = (s `hk` k) `hv` v - -- hashCollisionWithSalt :: Int -> A.Array (Leaf k v) -> Int - hashCollisionWithSalt s - = L.foldl' H.hashWithSalt s . arrayHashesSorted s - - -- arrayHashesSorted :: Int -> A.Array (Leaf k v) -> [Int] - arrayHashesSorted s = L.sort . L.map (hashLeafWithSalt s) . A.toList - instance (Hashable k) => H.Hashable1 (HashMap k) where liftHashWithSalt = H.liftHashWithSalt2 H.hashWithSalt #endif @@ -428,33 +486,32 @@ instance (Hashable k, Hashable v) => Hashable (HashMap k v) where = s `hashLeafWithSalt` l `go` tl -- For collisions we hashmix hash value -- and then array of values' hashes sorted - go s (Collision h a : tl) - = (s `H.hashWithSalt` h) `hashCollisionWithSalt` a `go` tl + go s (Collision _ l1 l2 hm' : tl) + = collisionHash l1 l2 hm' s `go` tl go s (_ : tl) = s `go` tl + collisionHash :: Leaf k v -> Leaf k v -> HashMap k v -> Int -> Int + collisionHash (L k1 v1) (L k2 v2) hm' s + = L.foldl' H.hashWithSalt s + $ L.sort . L.map (H.hashWithSalt s) + $ (k1, v1) : (k2, v2) : toList hm' + hashLeafWithSalt :: Int -> Leaf k v -> Int hashLeafWithSalt s (L k v) = s `H.hashWithSalt` k `H.hashWithSalt` v - hashCollisionWithSalt :: Int -> A.Array (Leaf k v) -> Int - hashCollisionWithSalt s - = L.foldl' H.hashWithSalt s . arrayHashesSorted s - - arrayHashesSorted :: Int -> A.Array (Leaf k v) -> [Int] - arrayHashesSorted s = L.sort . L.map (hashLeafWithSalt s) . A.toList - - -- Helper to get 'Leaf's and 'Collision's as a list. + -- Helper to get 'Leaf's as a list, leave collisions as-is. toList' :: HashMap k v -> [HashMap k v] -> [HashMap k v] -toList' (BitmapIndexed _ ary) a = A.foldr toList' a ary -toList' (Full ary) a = A.foldr toList' a ary -toList' l@(Leaf _ _) a = l : a -toList' c@(Collision _ _) a = c : a -toList' Empty a = a +toList' (BitmapIndexed _ ary) a = A.foldr toList' a ary +toList' (Full ary) a = A.foldr toList' a ary +toList' l@(Leaf _ _) a = l : a +toList' (Collision h l1 l2 hm) a = Collision h l1 l2 hm : a +toList' Empty a = a -- Helper function to detect 'Leaf's and 'Collision's. isLeafOrCollision :: HashMap k v -> Bool -isLeafOrCollision (Leaf _ _) = True -isLeafOrCollision (Collision _ _) = True -isLeafOrCollision _ = False +isLeafOrCollision (Leaf {}) = True +isLeafOrCollision (Collision {}) = True +isLeafOrCollision _ = False ------------------------------------------------------------------------ -- * Construction @@ -465,7 +522,7 @@ empty = Empty -- | /O(1)/ Construct a map with a single element. singleton :: (Hashable k) => k -> v -> HashMap k v -singleton k v = Leaf (hash k) (L k v) +singleton k v = Leaf (hashWithSalt defaultSalt k) (L k v) ------------------------------------------------------------------------ -- * Basic interface @@ -483,7 +540,7 @@ size t = go t 0 go (Leaf _ _) n = n + 1 go (BitmapIndexed _ ary) n = A.foldl' (flip go) n ary go (Full ary) n = A.foldl' (flip go) n ary - go (Collision _ ary) n = n + A.length ary + go (Collision _ _ _ hm) n = go hm (n + 2) -- | /O(log n)/ Return 'True' if the specified key is present in the -- map, 'False' otherwise. @@ -506,18 +563,18 @@ lookup k m = case lookup# k m of {-# INLINE lookup #-} lookup# :: (Eq k, Hashable k) => k -> HashMap k v -> (# (# #) | v #) -lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v _i -> (# | v #)) (hash k) k m +lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v -> (# | v #)) (hashWithSalt defaultSalt k) defaultSalt k m {-# INLINABLE lookup# #-} #else -lookup k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) (hash k) k m +lookup k m = lookupCont (\_ -> Nothing) (\v -> Just v) (hashWithSalt defaultSalt k) defaultSalt k m {-# INLINABLE lookup #-} #endif -- | lookup' is a version of lookup that takes the hash separately. -- It is used to implement alterF. -lookup' :: Eq k => Hash -> k -> HashMap k v -> Maybe v +lookup' :: (Eq k, Hashable k) => Hash -> Salt -> k -> HashMap k v -> Maybe v #if __GLASGOW_HASKELL__ >= 802 -- GHC does not yet perform a worker-wrapper transformation on -- unboxed sums automatically. That seems likely to happen at some @@ -525,18 +582,18 @@ lookup' :: Eq k => Hash -> k -> HashMap k v -> Maybe v -- lookup' would probably prefer to be implemented in terms of its own -- lookup'#, but it's not important enough and we don't want too much -- code. -lookup' h k m = case lookupRecordCollision# h k m of +lookup' h s k m = case lookupWithRes# h s k m of (# (# #) | #) -> Nothing - (# | (# a, _i #) #) -> Just a + (# | a #) -> Just a {-# INLINE lookup' #-} #else -lookup' h k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) h k m +lookup' h s k m = lookupCont (\_ -> Nothing) (\v -> Just v) h s k m {-# INLINABLE lookup' #-} #endif -- The result of a lookup, keeping track of if a hash collision occured. -- If a collision did not occur then it will have the Int value (-1). -data LookupRes a = Absent | Present a !Int +data LookupRes a = Absent | Present a -- Internal helper for lookup. This version takes the precomputed hash so -- that functions that make multiple calls to lookup and related functions @@ -549,34 +606,33 @@ data LookupRes a = Absent | Present a !Int -- -- Outcomes: -- Key not in map => Absent --- Key in map, no collision => Present v (-1) --- Key in map, collision => Present v position -lookupRecordCollision :: Eq k => Hash -> k -> HashMap k v -> LookupRes v +-- Key in map => Present v +-- +-- TODO(syd): We could add some optimisation here where we optimise for the common case +-- where a collision is in the first two elements and not in the recursive HashMap. +-- This could mean turning 'Present a' into 'Present a ColPos' with +-- 'data ColPos = NoCollision | CollisionFirst | CollisionSecond | CollisionRest' +lookupWithRes :: (Eq k, Hashable k) => Hash -> Salt -> k -> HashMap k v -> LookupRes v #if __GLASGOW_HASKELL__ >= 802 -lookupRecordCollision h k m = case lookupRecordCollision# h k m of +lookupWithRes h s k m = case lookupWithRes# h s k m of (# (# #) | #) -> Absent - (# | (# a, i #) #) -> Present a (I# i) -- GHC will eliminate the I# -{-# INLINE lookupRecordCollision #-} - --- Why do we produce an Int# instead of an Int? Unfortunately, GHC is not --- yet any good at unboxing things *inside* products, let alone sums. That --- may be changing in GHC 8.6 or so (there is some work in progress), but --- for now we use Int# explicitly here. We don't need to push the Int# --- into lookupCont because inlining takes care of that. -lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# #) | (# v, Int# #) #) -lookupRecordCollision# h k m = - lookupCont (\_ -> (# (# #) | #)) (\v (I# i) -> (# | (# v, i #) #)) h k m + (# | a #) -> Present a +{-# INLINE lookupWithRes #-} + +lookupWithRes# :: (Eq k, Hashable k) => Hash -> Salt -> k -> HashMap k v -> (# (# #) | v #) +lookupWithRes# h s k m = + lookupCont (\_ -> (# (# #) | #)) (\v -> (# | v #)) h s k m -- INLINABLE to specialize to the Eq instance. -{-# INLINABLE lookupRecordCollision# #-} +{-# INLINABLE lookupWithRes# #-} #else /* GHC < 8.2 so there are no unboxed sums */ -lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m -{-# INLINABLE lookupRecordCollision #-} +lookupWithRes h s k m = lookupCont (\_ -> Absent) Present h s k m +{-# INLINABLE lookupWithRes #-} #endif --- A two-continuation version of lookupRecordCollision. This lets us --- share source code between lookup and lookupRecordCollision without +-- A two-continuation version of lookupWithRes. This lets us +-- share source code between lookup and lookupWithRes without -- risking any performance degradation. -- -- The absent continuation has type @((# #) -> r)@ instead of just @r@ @@ -589,27 +645,31 @@ lookupCont :: #else forall r k v. #endif - Eq k + (Eq k, Hashable k) => ((# #) -> r) -- Absent continuation - -> (v -> Int -> r) -- Present continuation + -> (v -> r) -- Present continuation -> Hash -- The hash of the key + -> Salt -- The salt that was used to obtain that hash -> k -> HashMap k v -> r -lookupCont absent present !h0 !k0 !m0 = go h0 k0 0 m0 +lookupCont absent present !h0 !s0 !k0 !m0 = go h0 s0 k0 0 m0 where - go :: Eq k => Hash -> k -> Int -> HashMap k v -> r - go !_ !_ !_ Empty = absent (# #) - go h k _ (Leaf hx (L kx x)) - | h == hx && k == kx = present x (-1) + go :: Eq k => Hash -> Salt -> k -> Int -> HashMap k v -> r + go !_ !_ !_ !_ Empty = absent (# #) + go h _ k _ (Leaf hx (L kx x)) + | h == hx && k == kx = present x | otherwise = absent (# #) - go h k s (BitmapIndexed b v) + go h s k bs (BitmapIndexed b v) | b .&. m == 0 = absent (# #) | otherwise = - go h k (s+bitsPerSubkey) (A.index v (sparseIndex b m)) - where m = mask h s - go h k s (Full v) = - go h k (s+bitsPerSubkey) (A.index v (index h s)) - go h k _ (Collision hx v) - | h == hx = lookupInArrayCont absent present k v + go h s k (bs+bitsPerSubkey) (A.index v (sparseIndex b m)) + where m = mask h bs + go h s k bs (Full v) = + go h s k (bs+bitsPerSubkey) (A.index v (index h bs)) + go h s k _ (Collision hx (L k1 v1) (L k2 v2) hmx) + | h == hx = case () of + () | k == k1 -> present v1 + | k == k2 -> present v2 + | otherwise -> go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hmx | otherwise = absent (# #) {-# INLINE lookupCont #-} @@ -634,14 +694,22 @@ lookupDefault def k t = case lookup k t of infixl 9 ! -- | Create a 'Collision' value with two 'Leaf' values. -collision :: Hash -> Leaf k v -> Leaf k v -> HashMap k v -collision h !e1 !e2 = - let v = A.run $ do mary <- A.new 2 e1 - A.write mary 1 e2 - return mary - in Collision h v +collision :: (Eq k, Hashable k) => Hash -> Salt -> Leaf k v -> Leaf k v -> HashMap k v +collision h _ l1 l2 = Collision h l1 l2 Empty {-# INLINE collision #-} +-- These are taken from wikipedia: +-- https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV_hash_parameters +defaultSalt :: Int +#if WORD_SIZE_IN_BITS == 64 +defaultSalt = 14695981039346656037 -- 0xcbf29ce484222325 in hex +#else +defaultSalt = 2166136261 -- 0x811c9dc5 in hex +#endif + +nextSalt :: Salt -> Salt +nextSalt = (+1) + -- | Create a 'BitmapIndexed' or 'Full' node. bitmapIndexedOrFull :: Bitmap -> A.Array (HashMap k v) -> HashMap k v bitmapIndexedOrFull b ary @@ -653,42 +721,48 @@ bitmapIndexedOrFull b ary -- key in this map. If this map previously contained a mapping for -- the key, the old value is replaced. insert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v -insert k v m = insert' (hash k) k v m +insert k v m = insert' (hashWithSalt defaultSalt k) defaultSalt k v m {-# INLINABLE insert #-} -insert' :: Eq k => Hash -> k -> v -> HashMap k v -> HashMap k v -insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 +insert' :: (Eq k, Hashable k) => Hash -> Salt -> k -> v -> HashMap k v -> HashMap k v +insert' h0 s0 k0 v0 m0 = go h0 s0 k0 v0 0 m0 where - go !h !k x !_ Empty = Leaf h (L k x) - go h k x s t@(Leaf hy l@(L ky y)) + go :: (Eq k, Hashable k) => Hash -> Salt -> k -> v -> Int -> HashMap k v -> HashMap k v + go !h !_ !k x !_ Empty = Leaf h (L k x) + go h s k x bs t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then if x `ptrEq` y then t else Leaf h (L k x) - else collision h l (L k x) - | otherwise = runST (two s h k x hy ky y) - go h k x s t@(BitmapIndexed b ary) + else collision h s l (L k x) + | otherwise = runST (two bs h k x hy ky y) + go h s k x bs t@(BitmapIndexed b ary) | b .&. m == 0 = let !ary' = A.insert ary i $! Leaf h (L k x) in bitmapIndexedOrFull (b .|. m) ary' | otherwise = let !st = A.index ary i - !st' = go h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in if st' `ptrEq` st then t else BitmapIndexed b (A.update ary i st') - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s t@(Full ary) = + go h s k x bs t@(Full ary) = let !st = A.index ary i - !st' = go h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in if st' `ptrEq` st then t else Full (update16 ary i st') - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = Collision h (updateOrSnocWith const k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k x bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | h == hx = + let go' + | k == k1 = if x `ptrEq` v1 then t else Collision hx (L k x) l2 hmx + | k == k2 = if x `ptrEq` v2 then t else Collision hx l1 (L k x) hmx + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hmx + in go' + | otherwise = go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE insert' #-} -- Insert optimized for the case when we know the key is not in the map. @@ -698,120 +772,106 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 -- We can skip: -- - the key equality check on a Leaf -- - check for its existence in the array for a hash collision -insertNewKey :: Hash -> k -> v -> HashMap k v -> HashMap k v -insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 +insertNewKey :: (Eq k, Hashable k) => Hash -> Salt -> k -> v -> HashMap k v -> HashMap k v +insertNewKey !h0 !s0 !k0 x0 !m0 = go h0 s0 k0 x0 0 m0 where - go !h !k x !_ Empty = Leaf h (L k x) - go h k x s (Leaf hy l@(L ky y)) - | hy == h = collision h l (L k x) - | otherwise = runST (two s h k x hy ky y) - go h k x s (BitmapIndexed b ary) + go !h _ !k x !_ Empty = Leaf h (L k x) + go h s k x bs (Leaf hy l@(L ky y)) + | hy == h = collision h s l (L k x) + | otherwise = runST (two bs h k x hy ky y) + go h s k x bs (BitmapIndexed b ary) | b .&. m == 0 = let !ary' = A.insert ary i $! Leaf h (L k x) in bitmapIndexedOrFull (b .|. m) ary' | otherwise = let !st = A.index ary i - !st' = go h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in BitmapIndexed b (A.update ary i st') - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s (Full ary) = + go h s k x bs (Full ary) = let !st = A.index ary i - !st' = go h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in Full (update16 ary i st') - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = Collision h (snocNewLeaf (L k x) v) + where i = index h bs + go h s k x bs t@(Collision hx l1 l2 hmx) + | h == hx = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hmx | otherwise = - go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) - where - snocNewLeaf :: Leaf k v -> A.Array (Leaf k v) -> A.Array (Leaf k v) - snocNewLeaf leaf ary = A.run $ do - let n = A.length ary - mary <- A.new_ (n + 1) - A.copy ary 0 mary 0 n - A.write mary n leaf - return mary + go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# NOINLINE insertNewKey #-} -- Insert optimized for the case when we know the key is in the map. -- --- It is only valid to call this when the key exists in the map and you know the --- hash collision position if there was one. This information can be obtained --- from 'lookupRecordCollision'. If there is no collision pass (-1) as collPos --- (first argument). --- -- We can skip the key equality check on a Leaf because we know the leaf must be --- for this key. -insertKeyExists :: Int -> Hash -> k -> v -> HashMap k v -> HashMap k v -insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 0 m0 +-- for this key. We still need to do the equality check in case of collisions. +insertKeyExists :: (Eq k, Hashable k) => Hash -> Salt -> k -> v -> HashMap k v -> HashMap k v +insertKeyExists !h0 !s0 !k0 x0 !m0 = go h0 s0 k0 x0 0 m0 where - go !_collPos !h !k x !_s (Leaf _hy _kx) + go !h _ !k x !_s (Leaf _hy _kx) = Leaf h (L k x) - go collPos h k x s (BitmapIndexed b ary) + go h s k x bs (BitmapIndexed b ary) | b .&. m == 0 = let !ary' = A.insert ary i $ Leaf h (L k x) in bitmapIndexedOrFull (b .|. m) ary' | otherwise = let !st = A.index ary i - !st' = go collPos h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in BitmapIndexed b (A.update ary i st') - where m = mask h s + where m = mask h bs i = sparseIndex b m - go collPos h k x s (Full ary) = + go h s k x bs (Full ary) = let !st = A.index ary i - !st' = go collPos h k x (s+bitsPerSubkey) st + !st' = go h s k x (bs+bitsPerSubkey) st in Full (update16 ary i st') - where i = index h s - go collPos h k x _s (Collision _hy v) - | collPos >= 0 = Collision h (setAtPosition collPos k x v) - | otherwise = Empty -- error "Internal error: go {collPos negative}" + where i = index h bs + go _ s k x _ t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | k == k1 = if x `ptrEq` v1 then t else Collision hx (L k x) l2 hmx + | k == k2 = if x `ptrEq` v2 then t else Collision hx l1 (L k x) hmx + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hmx go _ _ _ _ _ Empty = Empty -- error "Internal error: go Empty" {-# NOINLINE insertKeyExists #-} --- Replace the ith Leaf with Leaf k v. --- --- This does not check that @i@ is within bounds of the array. -setAtPosition :: Int -> k -> v -> A.Array (Leaf k v) -> A.Array (Leaf k v) -setAtPosition i k x ary = A.update ary i (L k x) -{-# INLINE setAtPosition #-} - - -- | In-place update version of insert unsafeInsert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v -unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) +unsafeInsert k0 v0 m0 = runST (go h0 s0 k0 v0 0 m0) where - h0 = hash k0 - go !h !k x !_ Empty = return $! Leaf h (L k x) - go h k x s t@(Leaf hy l@(L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go !h !_ !k x !_ Empty = return $! Leaf h (L k x) + go h s k x bs t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then if x `ptrEq` y then return t else return $! Leaf h (L k x) - else return $! collision h l (L k x) - | otherwise = two s h k x hy ky y - go h k x s t@(BitmapIndexed b ary) + else return $! collision h s l (L k x) + | otherwise = two bs h k x hy ky y + go h s k x bs t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! Leaf h (L k x) return $! bitmapIndexedOrFull (b .|. m) ary' | otherwise = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s t@(Full ary) = do + go h s k x bs t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWith const k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k x bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | h == hx = + let go' + | k == k1 = return $! if x `ptrEq` v1 then t else Collision hx (L k x) l2 hmx + | k == k2 = return $! if x `ptrEq` v2 then t else Collision hx l1 (L k x) hmx + | otherwise = Collision hx l1 l2 <$> go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hmx + in go' + | otherwise = go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE unsafeInsert #-} -- | Create a map from two key-value pairs which hashes don't collide. @@ -856,121 +916,115 @@ insertWith f k new m = insertModifying new (\old -> (# f new old #)) k m -- thunks in the tree. insertModifying :: (Eq k, Hashable k) => v -> (v -> (# v #)) -> k -> HashMap k v -> HashMap k v -insertModifying x f k0 m0 = go h0 k0 0 m0 +insertModifying x f k0 m0 = go h0 s0 k0 0 m0 where - !h0 = hash k0 - go !h !k !_ Empty = Leaf h (L k x) - go h k s t@(Leaf hy l@(L ky y)) + !h0 = hashWithSalt s0 k0 + !s0 = defaultSalt + go !h _ !k !_ Empty = Leaf h (L k x) + go h s k bs t@(Leaf hy l@(L ky y)) | hy == h = if ky == k then case f y of (# v' #) | ptrEq y v' -> t | otherwise -> Leaf h (L k (v')) - else collision h l (L k x) - | otherwise = runST (two s h k x hy ky y) - go h k s t@(BitmapIndexed b ary) + else collision h s l (L k x) + | otherwise = runST (two bs h k x hy ky y) + go h s k bs t@(BitmapIndexed b ary) | b .&. m == 0 = let ary' = A.insert ary i $! Leaf h (L k x) in bitmapIndexedOrFull (b .|. m) ary' | otherwise = let !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st ary' = A.update ary i $! st' in if ptrEq st st' then t else BitmapIndexed b ary' - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k s t@(Full ary) = + go h s k bs t@(Full ary) = let !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st ary' = update16 ary i $! st' in if ptrEq st st' then t else Full ary' - where i = index h s - go h k s t@(Collision hy v) - | h == hy = - let !v' = insertModifyingArr x f k v - in if A.unsafeSameArray v v' - then t - else Collision h v' - | otherwise = go h k s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | h == hx = + let go' + | k == k1 = + case f v1 of + (# v' #) | ptrEq v1 v' -> t + | otherwise -> Collision hx (L k v') l2 hmx + | k == k2 = + case f v2 of + (# v' #) | ptrEq v2 v' -> t + | otherwise -> Collision hx l1 (L k v') hmx + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hmx + in go' + | otherwise = go h s k bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE insertModifying #-} --- Like insertModifying for arrays; used to implement insertModifying -insertModifyingArr :: Eq k => v -> (v -> (# v #)) -> k -> A.Array (Leaf k v) - -> A.Array (Leaf k v) -insertModifyingArr x f k0 ary0 = go k0 ary0 0 (A.length ary0) - where - go !k !ary !i !n - | i >= n = A.run $ do - -- Not found, append to the end. - mary <- A.new_ (n + 1) - A.copy ary 0 mary 0 n - A.write mary n (L k x) - return mary - | otherwise = case A.index ary i of - (L kx y) | k == kx -> case f y of - (# y' #) -> if ptrEq y y' - then ary - else A.update ary i (L k y') - | otherwise -> go k ary (i+1) n -{-# INLINE insertModifyingArr #-} - -- | In-place update version of insertWith unsafeInsertWith :: forall k v. (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -unsafeInsertWith f k0 v0 m0 = runST (go h0 k0 v0 0 m0) +unsafeInsertWith f k0 v0 m0 = runST (go h0 s0 k0 v0 0 m0) where - h0 = hash k0 - go :: Hash -> k -> v -> Shift -> HashMap k v -> ST s (HashMap k v) - go !h !k x !_ Empty = return $! Leaf h (L k x) - go h k x s (Leaf hy l@(L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go :: Hash -> Salt -> k -> v -> Shift -> HashMap k v -> ST s (HashMap k v) + go !h _ !k x !_ Empty = return $! Leaf h (L k x) + go h s k x bs (Leaf hy l@(L ky y)) | hy == h = if ky == k then return $! Leaf h (L k (f x y)) - else return $! collision h l (L k x) - | otherwise = two s h k x hy ky y - go h k x s t@(BitmapIndexed b ary) + else return $! collision h s l (L k x) + | otherwise = two bs h k x hy ky y + go h s k x bs t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! Leaf h (L k x) return $! bitmapIndexedOrFull (b .|. m) ary' | otherwise = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s t@(Full ary) = do + go h s k x bs t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWith f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k x bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | h == hx = + let go' + | k == k1 = return $! Collision hx (L k (f x v1)) l2 hmx + | k == k2 = return $! Collision hx l1 (L k (f x v2)) hmx + | otherwise = Collision hx l1 l2 <$> go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hmx + in go' + | otherwise = go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE unsafeInsertWith #-} -- | /O(log n)/ Remove the mapping for the specified key from this map -- if present. delete :: (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v -delete k m = delete' (hash k) k m +delete k m = delete' (hashWithSalt defaultSalt k) defaultSalt k m {-# INLINABLE delete #-} -delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v -delete' h0 k0 m0 = go h0 k0 0 m0 +delete' :: (Eq k, Hashable k) => Hash -> Salt -> k -> HashMap k v -> HashMap k v +delete' h0 s0 k0 m0 = go h0 s0 k0 0 m0 where - go !_ !_ !_ Empty = Empty - go h k _ t@(Leaf hy (L ky _)) + go !_ _ !_ !_ Empty = Empty + go h _ k _ t@(Leaf hy (L ky _)) | hy == h && ky == k = Empty | otherwise = t - go h k s t@(BitmapIndexed b ary) + go h s k bs t@(BitmapIndexed b ary) | b .&. m == 0 = t | otherwise = let !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st in if st' `ptrEq` st then t else case st' of @@ -985,11 +1039,11 @@ delete' h0 k0 m0 = go h0 k0 0 m0 bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) l | isLeafOrCollision l && A.length ary == 1 -> l _ -> BitmapIndexed b (A.update ary i st') - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k s t@(Full ary) = + go h s k bs t@(Full ary) = let !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st in if st' `ptrEq` st then t else case st' of @@ -998,35 +1052,35 @@ delete' h0 k0 m0 = go h0 k0 0 m0 bm = fullNodeMask .&. complement (1 `unsafeShiftL` i) in BitmapIndexed bm ary' _ -> Full (A.update ary i st') - where i = index h s - go h k _ t@(Collision hy v) - | h == hy = case indexOf k v of - Just i - | A.length v == 2 -> - if i == 0 - then Leaf h (A.index v 1) - else Leaf h (A.index v 0) - | otherwise -> Collision h (A.delete v i) - Nothing -> t + where i = index h bs + go h s k _ t@(Collision hx l1@(L k1 _) l2@(L k2 _) hmx) + | h == hx = + let go' + | k == k1 = case unconsHM hmx of + UnconsEmptyHM -> Leaf hx l2 + UnconsedHM l3 hmx' -> Collision hx l2 l3 hmx' + | k == k2 = case unconsHM hmx of + UnconsEmptyHM -> Leaf hx l1 + UnconsedHM l3 hmx' -> Collision hx l1 l3 hmx' + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hmx + in go' | otherwise = t {-# INLINABLE delete' #-} -- | Delete optimized for the case when we know the key is in the map. -- --- It is only valid to call this when the key exists in the map and you know the --- hash collision position if there was one. This information can be obtained --- from 'lookupRecordCollision'. If there is no collision pass (-1) as collPos. --- -- We can skip: -- - the key equality check on the leaf, if we reach a leaf it must be the key -deleteKeyExists :: Int -> Hash -> k -> HashMap k v -> HashMap k v -deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 0 m0 +-- +-- We still need to do the equality check for collisions. +deleteKeyExists :: (Eq k, Hashable k) => Hash -> Salt -> k -> HashMap k v -> HashMap k v +deleteKeyExists !h0 s0 !k0 !m0 = go h0 s0 k0 0 m0 where - go :: Int -> Hash -> k -> Int -> HashMap k v -> HashMap k v - go !_collPos !_h !_k !_s (Leaf _ _) = Empty - go collPos h k s (BitmapIndexed b ary) = + go :: (Eq k, Hashable k) => Hash -> Salt -> k -> Int -> HashMap k v -> HashMap k v + go !_h !_s !_k !_bs (Leaf _ _) = Empty + go h s k bs (BitmapIndexed b ary) = let !st = A.index ary i - !st' = go collPos h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st in case st' of Empty | A.length ary == 1 -> Empty | A.length ary == 2 -> @@ -1039,24 +1093,30 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 0 m0 bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) l | isLeafOrCollision l && A.length ary == 1 -> l _ -> BitmapIndexed b (A.update ary i st') - where m = mask h s + where m = mask h bs i = sparseIndex b m - go collPos h k s (Full ary) = + go h s k bs (Full ary) = let !st = A.index ary i - !st' = go collPos h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st in case st' of Empty -> let ary' = A.delete ary i bm = fullNodeMask .&. complement (1 `unsafeShiftL` i) in BitmapIndexed bm ary' _ -> Full (A.update ary i st') - where i = index h s - go collPos h _ _ (Collision _hy v) - | A.length v == 2 - = if collPos == 0 - then Leaf h (A.index v 1) - else Leaf h (A.index v 0) - | otherwise = Collision h (A.delete v collPos) + where i = index h bs + go h s k _ (Collision hx l1@(L k1 _) l2@(L k2 _) hmx) + | h == hx = + let go' + | k == k1 = case unconsHM hmx of + UnconsEmptyHM -> Leaf hx l2 + UnconsedHM l3 hmx' -> Collision hx l2 l3 hmx' + | k == k2 = case unconsHM hmx of + UnconsEmptyHM -> Leaf hx l1 + UnconsedHM l3 hmx' -> Collision hx l1 l3 hmx' + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hmx + in go' + | otherwise = Empty -- error "Internal error: unexpected collision" go !_ !_ !_ !_ Empty = Empty -- error "Internal error: deleteKeyExists empty" {-# NOINLINE deleteKeyExists #-} @@ -1075,38 +1135,45 @@ adjust f k m = adjust# (\v -> (# f v #)) k m -- | Much like 'adjust', but not inherently leaky. adjust# :: (Eq k, Hashable k) => (v -> (# v #)) -> k -> HashMap k v -> HashMap k v -adjust# f k0 m0 = go h0 k0 0 m0 +adjust# f k0 m0 = go h0 s0 k0 0 m0 where - h0 = hash k0 - go !_ !_ !_ Empty = Empty - go h k _ t@(Leaf hy (L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go !_ !_ !_ !_ Empty = Empty + go h _ k _ t@(Leaf hy (L ky y)) | hy == h && ky == k = case f y of (# y' #) | ptrEq y y' -> t | otherwise -> Leaf h (L k y') | otherwise = t - go h k s t@(BitmapIndexed b ary) + go h s k bs t@(BitmapIndexed b ary) | b .&. m == 0 = t | otherwise = let !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st ary' = A.update ary i $! st' in if ptrEq st st' then t else BitmapIndexed b ary' - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k s t@(Full ary) = - let i = index h s + go h s k bs t@(Full ary) = + let i = index h bs !st = A.index ary i - !st' = go h k (s+bitsPerSubkey) st + !st' = go h s k (bs+bitsPerSubkey) st ary' = update16 ary i $! st' in if ptrEq st st' then t else Full ary' - go h k _ t@(Collision hy v) - | h == hy = let !v' = updateWith# f k v - in if A.unsafeSameArray v v' - then t - else Collision h v' + go h s k _ t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hmx) + | h == hx = + let go' + | k == k1 = case f v1 of + (# v1' #) | ptrEq v1 v1' -> t + | otherwise -> Collision hx (L k v1') l2 hmx + | k == k2 = case f v2 of + (# v2' #) | ptrEq v2 v2' -> t + | otherwise -> Collision hx l1 (L k v2') hmx + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hmx + in go' | otherwise = t {-# INLINABLE adjust# #-} @@ -1146,12 +1213,13 @@ alterF :: (Functor f, Eq k, Hashable k) -- @f@ and a functor that is similar to Const but not actually Const. alterF f = \ !k !m -> let - !h = hash k - mv = lookup' h k m + !h = hashWithSalt s k + s = defaultSalt + mv = lookup' h s k m in (<$> f mv) $ \fres -> case fres of - Nothing -> delete' h k m - Just v' -> insert' h k v' m + Nothing -> delete' h s k m + Just v' -> insert' h s k v' m -- We unconditionally rewrite alterF in RULES, but we expose an -- unfolding just in case it's used in some way that prevents the @@ -1252,28 +1320,29 @@ alterFEager f !k m = (<$> f mv) $ \fres -> Absent -> m -- Key did exist - Present _ collPos -> deleteKeyExists collPos h k m + Present _ -> deleteKeyExists h s k m ------------------------------ -- Update value Just v' -> case lookupRes of -- Key did not exist before, insert v' under a new key - Absent -> insertNewKey h k v' m + Absent -> insertNewKey h s k v' m -- Key existed before - Present v collPos -> + Present v -> if v `ptrEq` v' -- If the value is identical, no-op then m -- If the value changed, update the value. - else insertKeyExists collPos h k v' m + else insertKeyExists h s k v' m - where !h = hash k - !lookupRes = lookupRecordCollision h k m + where !h = hashWithSalt s k + !s = defaultSalt + !lookupRes = lookupWithRes h s k m !mv = case lookupRes of Absent -> Nothing - Present v _ -> Just v + Present v -> Just v {-# INLINABLE alterFEager #-} #endif @@ -1300,86 +1369,97 @@ unionWith f = unionWithKey (const f) -- result. unionWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v -unionWithKey f = go 0 +unionWithKey f = go 0 s0 where + s0 = defaultSalt -- empty vs. anything - go !_ t1 Empty = t1 - go _ Empty t2 = t2 + go !_ !_ t1 Empty = t1 + go _ _ Empty t2 = t2 -- leaf vs. leaf - go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + go bs s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) | h1 == h2 = if k1 == k2 then Leaf h1 (L k1 (f k1 v1 v2)) - else collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) - | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrConcatWithKey f ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + else collision h1 s l1 l2 + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Leaf h1 l1@(L k1 v1)) t2@(Collision h2 l21@(L k21 v21) l22@(L k22 v22) hm2) + | h1 == h2 = + let go' + | k1 == k21 = Collision h1 (L k1 (f k1 v1 v21)) l22 hm2 + | k1 == k22 = Collision h1 l21 (L k1 (f k1 v1 v22)) hm2 + | otherwise = Collision h1 l21 l22 $ go 0 (nextSalt s) (Leaf (hashWithSalt (nextSalt s) k1) l1) hm2 + in go' + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Collision h1 l11@(L k11 v11) l12@(L k12 v12) hm1) t2@(Leaf h2 l2@(L k2 v2)) + | h1 == h2 = + let go' + | k2 == k11 = Collision h1 (L k2 (f k2 v11 v2)) l12 hm1 + | k2 == k12 = Collision h1 l11 (L k2 (f k2 v12 v2)) hm1 + | otherwise = Collision h1 l11 l12 $ go 0 (nextSalt s) hm1 (Leaf (hashWithSalt (nextSalt s) k2) l2) + in go' + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Collision h1 _ _ _) t2@(Collision h2 _ _ _) + | h1 == h2 = L.foldl' (\h (k,v) -> go 0 s h (Leaf (hashWithSalt s k) (L k v))) t1 $ toList t2 -- Nothing better we can do, unfortunately. + | otherwise = goDifferentHash bs s h1 h2 t1 t2 -- branch vs. branch - go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + go bs s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = let b' = b1 .|. b2 - ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 b2 ary1 ary2 + ary' = unionArrayBy (go (bs+bitsPerSubkey) s) b1 b2 ary1 ary2 in bitmapIndexedOrFull b' ary' - go s (BitmapIndexed b1 ary1) (Full ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 fullNodeMask ary1 ary2 + go bs s (BitmapIndexed b1 ary1) (Full ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) b1 fullNodeMask ary1 ary2 in Full ary' - go s (Full ary1) (BitmapIndexed b2 ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask b2 ary1 ary2 + go bs s (Full ary1) (BitmapIndexed b2 ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) fullNodeMask b2 ary1 ary2 in Full ary' - go s (Full ary1) (Full ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask fullNodeMask + go bs s (Full ary1) (Full ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) fullNodeMask fullNodeMask ary1 ary2 in Full ary' -- leaf vs. branch - go s (BitmapIndexed b1 ary1) t2 + go bs s (BitmapIndexed b1 ary1) t2 | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 b' = b1 .|. m2 in bitmapIndexedOrFull b' ary' | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> - go (s+bitsPerSubkey) st1 t2 + go (bs+bitsPerSubkey) s st1 t2 in BitmapIndexed b1 ary' where h2 = leafHashCode t2 - m2 = mask h2 s + m2 = mask h2 bs i = sparseIndex b1 m2 - go s t1 (BitmapIndexed b2 ary2) + go bs s t1 (BitmapIndexed b2 ary2) | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 b' = b2 .|. m1 in bitmapIndexedOrFull b' ary' | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> - go (s+bitsPerSubkey) t1 st2 + go (bs+bitsPerSubkey) s t1 st2 in BitmapIndexed b2 ary' where h1 = leafHashCode t1 - m1 = mask h1 s + m1 = mask h1 bs i = sparseIndex b2 m1 - go s (Full ary1) t2 = + go bs s (Full ary1) t2 = let h2 = leafHashCode t2 - i = index h2 s - ary' = update16With' ary1 i $ \st1 -> go (s+bitsPerSubkey) st1 t2 + i = index h2 bs + ary' = update16With' ary1 i $ \st1 -> go (bs+bitsPerSubkey) s st1 t2 in Full ary' - go s t1 (Full ary2) = + go bs s t1 (Full ary2) = let h1 = leafHashCode t1 - i = index h1 s - ary' = update16With' ary2 i $ \st2 -> go (s+bitsPerSubkey) t1 st2 + i = index h1 bs + ary' = update16With' ary2 i $ \st2 -> go (bs+bitsPerSubkey) s t1 st2 in Full ary' leafHashCode (Leaf h _) = h - leafHashCode (Collision h _) = h + leafHashCode (Collision h _ _ _) = h leafHashCode _ = error "leafHashCode" - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (s+bitsPerSubkey) t1 t2) + goDifferentHash bs s h1 h2 t1 t2 + | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (bs+bitsPerSubkey) s t1 t2) | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) where - m1 = mask h1 s - m2 = mask h2 s + m1 = mask h1 bs + m2 = mask h2 bs {-# INLINE unionWithKey #-} -- | Strict in the result of @f@. @@ -1432,8 +1512,7 @@ mapWithKey f = go go (Full ary) = Full $ A.map go ary -- Why map strictly over collision arrays? Because there's no -- point suspending the O(1) work this does for each leaf. - go (Collision h ary) = Collision h $ - A.map' (\ (L k v) -> L k (f k v)) ary + go (Collision h (L k1 v1) (L k2 v2) hm) = Collision h (L k1 (f k2 v1)) (L k2 (f k2 v2)) $ go hm {-# INLINE mapWithKey #-} -- | /O(n)/ Transform this map by applying a function to every value. @@ -1461,8 +1540,8 @@ traverseWithKey f = go go (Leaf h (L k v)) = Leaf h . L k <$> f k v go (BitmapIndexed b ary) = BitmapIndexed b <$> A.traverse go ary go (Full ary) = Full <$> A.traverse go ary - go (Collision h ary) = - Collision h <$> A.traverse' (\ (L k v) -> L k <$> f k v) ary + go (Collision h (L k1 v1) (L k2 v2) hm) = + Collision h <$> (L k1 <$> f k1 v1) <*> (L k2 <$> f k2 v2) <*> go hm {-# INLINE traverseWithKey #-} ------------------------------------------------------------------------ @@ -1530,7 +1609,7 @@ intersectionWithKey f a b = foldlWithKey' go empty a -- | /O(n)/ Reduce this map by applying a binary operator to all -- elements, using the given starting value (typically the -- left-identity of the operator). Each application of the operator --- is evaluated before using the result in the next application. +-- is evaluated before using the result in the next application. -- This function is strict in the starting value. foldl' :: (a -> v -> a) -> a -> HashMap k v -> a foldl' f = foldlWithKey' (\ z _ v -> f z v) @@ -1539,7 +1618,7 @@ foldl' f = foldlWithKey' (\ z _ v -> f z v) -- | /O(n)/ Reduce this map by applying a binary operator to all -- elements, using the given starting value (typically the -- left-identity of the operator). Each application of the operator --- is evaluated before using the result in the next application. +-- is evaluated before using the result in the next application. -- This function is strict in the starting value. foldlWithKey' :: (a -> k -> v -> a) -> a -> HashMap k v -> a foldlWithKey' f = go @@ -1548,7 +1627,7 @@ foldlWithKey' f = go go z (Leaf _ (L k v)) = f z k v go z (BitmapIndexed _ ary) = A.foldl' go z ary go z (Full ary) = A.foldl' go z ary - go z (Collision _ ary) = A.foldl' (\ z' (L k v) -> f z' k v) z ary + go z (Collision _ (L k1 v1) (L k2 v2) hm) = go (f (f z k1 v1) k2 v2) hm {-# INLINE foldlWithKey' #-} -- | /O(n)/ Reduce this map by applying a binary operator to all @@ -1568,7 +1647,7 @@ foldrWithKey f = go go z (Leaf _ (L k v)) = f k v z go z (BitmapIndexed _ ary) = A.foldr (flip go) z ary go z (Full ary) = A.foldr (flip go) z ary - go z (Collision _ ary) = A.foldr (\ (L k v) z' -> f k v z') z ary + go z (Collision _ (L k1 v1) (L k2 v2) hm) = f k1 v1 (f k2 v2 (go z hm)) {-# INLINE foldrWithKey #-} ------------------------------------------------------------------------ @@ -1577,12 +1656,9 @@ foldrWithKey f = go -- | /O(n)/ Transform this map by applying a function to every value -- and retaining only some of them. mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 -mapMaybeWithKey f = filterMapAux onLeaf onColl +mapMaybeWithKey f = filterMapAux onLeaf where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (Leaf h (L k v')) onLeaf _ = Nothing - - onColl (L k v) | Just v' <- f k v = Just (L k v') - | otherwise = Nothing {-# INLINE mapMaybeWithKey #-} -- | /O(n)/ Transform this map by applying a function to every value @@ -1594,12 +1670,9 @@ mapMaybe f = mapMaybeWithKey (const f) -- | /O(n)/ Filter this map by retaining only elements satisfying a -- predicate. filterWithKey :: forall k v. (k -> v -> Bool) -> HashMap k v -> HashMap k v -filterWithKey pred = filterMapAux onLeaf onColl +filterWithKey pred = filterMapAux onLeaf where onLeaf t@(Leaf _ (L k v)) | pred k v = Just t onLeaf _ = Nothing - - onColl el@(L k v) | pred k v = Just el - onColl _ = Nothing {-# INLINE filterWithKey #-} @@ -1607,10 +1680,9 @@ filterWithKey pred = filterMapAux onLeaf onColl -- allowing the former to former to reuse terms. filterMapAux :: forall k v1 v2 . (HashMap k v1 -> Maybe (HashMap k v2)) - -> (Leaf k v1 -> Maybe (Leaf k v2)) -> HashMap k v1 -> HashMap k v2 -filterMapAux onLeaf onColl = go +filterMapAux onLeaf = go where go Empty = Empty go t@Leaf{} @@ -1618,7 +1690,20 @@ filterMapAux onLeaf onColl = go | otherwise = Empty go (BitmapIndexed b ary) = filterA ary b go (Full ary) = filterA ary fullNodeMask - go (Collision h ary) = filterC ary h + go (Collision h l1 l2 hm) = case (onLeaf (Leaf h l1), onLeaf (Leaf h l2)) of + (Just (Leaf _ l1'), Just (Leaf _ l2')) -> Collision h l1' l2' $ go hm + (Just (Leaf _ l1'), Nothing) -> go1 l1' + (Nothing, Just (Leaf _ l2')) -> go1 l2' + (Nothing, Nothing) -> case unconsHM (go hm) of + UnconsEmptyHM -> Empty + UnconsedHM l1' hm' -> case unconsHM hm' of + UnconsEmptyHM -> Leaf h l1' + UnconsedHM l2' hm'' -> Collision h l1' l2' hm'' + _ -> error "Should not happen, can be fixed with refactoring I think." + where + go1 l1' = case unconsHM (go hm) of + UnconsEmptyHM -> Leaf h l1' + UnconsedHM l2' hm' -> Collision h l1' l2' hm' filterA ary0 b0 = let !n = A.length ary0 @@ -1648,28 +1733,6 @@ filterMapAux onLeaf onColl = go (bi `unsafeShiftL` 1) n t -> do A.write mary j t step ary mary b (i+1) (j+1) (bi `unsafeShiftL` 1) n - - filterC ary0 h = - let !n = A.length ary0 - in runST $ do - mary <- A.new_ n - step ary0 mary 0 0 n - where - step :: A.Array (Leaf k v1) -> A.MArray s (Leaf k v2) - -> Int -> Int -> Int - -> ST s (HashMap k v2) - step !ary !mary i !j n - | i >= n = case j of - 0 -> return Empty - 1 -> do l <- A.read mary 0 - return $! Leaf h l - _ | i == j -> do ary2 <- A.unsafeFreeze mary - return $! Collision h ary2 - | otherwise -> do ary2 <- A.trim mary j - return $! Collision h ary2 - | Just el <- onColl $! A.index ary i - = A.write mary j el >> step ary mary (i+1) (j+1) n - | otherwise = step ary mary (i+1) j n {-# INLINE filterMapAux #-} -- | /O(n)/ Filter this map by retaining only elements which values @@ -1720,26 +1783,6 @@ fromListWith f = L.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) empty ------------------------------------------------------------------------ -- Array operations --- | /O(n)/ Look up the value associated with the given key in an --- array. -lookupInArrayCont :: -#if __GLASGOW_HASKELL__ >= 802 - forall rep (r :: TYPE rep) k v. -#else - forall r k v. -#endif - Eq k => ((# #) -> r) -> (v -> Int -> r) -> k -> A.Array (Leaf k v) -> r -lookupInArrayCont absent present k0 ary0 = go k0 ary0 0 (A.length ary0) - where - go :: Eq k => k -> A.Array (Leaf k v) -> Int -> Int -> r - go !k !ary !i !n - | i >= n = absent (# #) - | otherwise = case A.index ary i of - (L kx v) - | k == kx -> present v i - | otherwise -> go k ary (i+1) n -{-# INLINE lookupInArrayCont #-} - -- | /O(n)/ Lookup the value associated with the given key in this -- array. Returns 'Nothing' if the key wasn't found. indexOf :: Eq k => k -> A.Array (Leaf k v) -> Maybe Int @@ -1753,40 +1796,6 @@ indexOf k0 ary0 = go k0 ary0 0 (A.length ary0) | otherwise -> go k ary (i+1) n {-# INLINABLE indexOf #-} -updateWith# :: Eq k => (v -> (# v #)) -> k -> A.Array (Leaf k v) -> A.Array (Leaf k v) -updateWith# f k0 ary0 = go k0 ary0 0 (A.length ary0) - where - go !k !ary !i !n - | i >= n = ary - | otherwise = case A.index ary i of - (L kx y) | k == kx -> case f y of - (# y' #) - | ptrEq y y' -> ary - | otherwise -> A.update ary i (L k y') - | otherwise -> go k ary (i+1) n -{-# INLINABLE updateWith# #-} - -updateOrSnocWith :: Eq k => (v -> v -> v) -> k -> v -> A.Array (Leaf k v) - -> A.Array (Leaf k v) -updateOrSnocWith f = updateOrSnocWithKey (const f) -{-# INLINABLE updateOrSnocWith #-} - -updateOrSnocWithKey :: Eq k => (k -> v -> v -> v) -> k -> v -> A.Array (Leaf k v) - -> A.Array (Leaf k v) -updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) - where - go !k v !ary !i !n - | i >= n = A.run $ do - -- Not found, append to the end. - mary <- A.new_ (n + 1) - A.copy ary 0 mary 0 n - A.write mary n (L k v) - return mary - | otherwise = case A.index ary i of - (L kx y) | k == kx -> A.update ary i (L k (f k v y)) - | otherwise -> go k v ary (i+1) n -{-# INLINABLE updateOrSnocWithKey #-} - updateOrConcatWith :: Eq k => (v -> v -> v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) updateOrConcatWith f = updateOrConcatWithKey (const f) {-# INLINABLE updateOrConcatWith #-} @@ -1822,6 +1831,80 @@ updateOrConcatWithKey f ary1 ary2 = A.run $ do return mary {-# INLINABLE updateOrConcatWithKey #-} +-- Helper functions for very rare cases. + +-- Remove (any) one element from a non-empty array +-- of non-empty hashmaps. +unconsA :: A.Array (HashMap k v) -> UnconsA k v +unconsA ary = + case unconsHM (A.index ary 0) of + UnconsEmptyHM -> + error "Data.HashMap internal error: empty hashmap in array." + UnconsedHM lf Empty + | A.length ary == 1 -> NowEmptyA lf + | otherwise -> RemovedFirstA lf (A.delete ary 0) + UnconsedHM lf hm' -> UnconsedFromFirstA lf (A.update ary 0 hm') + +data UnconsA k v + = NowEmptyA !(Leaf k v) + -- Got an entry from the array without changing its length + | UnconsedFromFirstA + !(Leaf k v) -- The leaf that was un-consed + !(A.Array (HashMap k v)) -- The leftover array + -- Got an entry from the array, shortening it by one element. + | RemovedFirstA + !(Leaf k v) -- The leaf that was un-consed + !(A.Array (HashMap k v)) -- The leftover array + deriving Show + +data UnconsHM k v + = UnconsEmptyHM + | UnconsedHM !(Leaf k v) !(HashMap k v) + deriving Show + +-- Remove (any) one element from a hashmap +unconsHM :: HashMap k v -> UnconsHM k v +unconsHM hm = case hm of + Empty -> UnconsEmptyHM + Leaf _ l -> UnconsedHM l Empty + BitmapIndexed bm ary' -> case unconsA ary' of + NowEmptyA l -> UnconsedHM l Empty + UnconsedFromFirstA l a' -> UnconsedHM l (collapse_maybe bm a') + RemovedFirstA l a' -> UnconsedHM l (collapse_maybe (shorten_bm bm) a') + Full ary' -> case unconsA ary' of + NowEmptyA l -> + UnconsedHM l Empty + UnconsedFromFirstA l a' -> + UnconsedHM l (Full a') + RemovedFirstA l a' -> + UnconsedHM l $ BitmapIndexed (shorten_bm fullNodeMask) a' + Collision h l1 l2 hm' -> UnconsedHM l1 $ case unconsHM hm' of + UnconsEmptyHM -> Leaf h l2 + UnconsedHM l3 hm'' -> Collision h l2 l3 hm'' + where + -- When the array is shortened, we need to delete the + -- first element (the lowest set bit) from the bitmap. + shorten_bm bm = bm .&. (bm - 1) + + -- We must never create a BitmapIndexed with exactly + -- one element whose sole element is a leaf or collision. + -- In such cases, we instead collapse a level, removing + -- the BitmapIndexed. This "smart constructor" takes care + -- of potential collapse. + -- + -- Note: we *could* arrange to catch this condition earlier, + -- as delete' does, preventing the allocation of an unnecessary + -- one-element array. But to reach this code we must be dealing with a + -- collision bucket that has at least four elements. That puts us + -- well off the fast path already. + collapse_maybe bm ar + | popCount bm == 1 -- The array has exactly one element. + , let the_elem = A.index ar 0 + , isLeafOrCollision the_elem + = the_elem + | otherwise + = BitmapIndexed bm ar + ------------------------------------------------------------------------ -- Manually unrolled loops diff --git a/Data/HashMap/Strict/Base.hs b/Data/HashMap/Strict/Base.hs index 890d18ff..f22d7ccc 100644 --- a/Data/HashMap/Strict/Base.hs +++ b/Data/HashMap/Strict/Base.hs @@ -148,99 +148,115 @@ insert k !v = HM.insert k v -- > where f new old = new + old insertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 +insertWith f k0 v0 m0 = go h0 s0 k0 v0 0 m0 where - h0 = hash k0 - go !h !k x !_ Empty = leaf h k x - go h k x s (Leaf hy l@(L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go !h _ !k x !_ Empty = leaf h k x + go h s k x bs (Leaf hy lf@(L ky y)) | hy == h = if ky == k then leaf h k (f x y) - else x `seq` (collision h l (L k x)) - | otherwise = x `seq` runST (two s h k x hy ky y) - go h k x s (BitmapIndexed b ary) + else collision h s lf (l k x) + | otherwise = x `seq` runST (two bs h k x hy ky y) + go h s k x bs (BitmapIndexed b ary) | b .&. m == 0 = let ary' = A.insert ary i $! leaf h k x in bitmapIndexedOrFull (b .|. m) ary' | otherwise = let st = A.index ary i - st' = go h k x (s+bitsPerSubkey) st + st' = go h s k x (bs+bitsPerSubkey) st ary' = A.update ary i $! st' in BitmapIndexed b ary' - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s (Full ary) = + go h s k x bs (Full ary) = let st = A.index ary i - st' = go h k x (s+bitsPerSubkey) st + st' = go h s k x (bs+bitsPerSubkey) st ary' = update16 ary i $! st' in Full ary' - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = Collision h (updateOrSnocWith f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k x bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hm) + | h == hx = + let go' + | k == k1 = Collision hx (l k (f x v1)) l2 hm + | k == k2 = Collision hx l1 (l k (f x v2)) hm + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hm + in go' + | otherwise = go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE insertWith #-} -- | In-place update version of insertWith unsafeInsertWith :: (Eq k, Hashable k) => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -unsafeInsertWith f k0 v0 m0 = runST (go h0 k0 v0 0 m0) +unsafeInsertWith f k0 v0 m0 = runST (go h0 s0 k0 v0 0 m0) where - h0 = hash k0 - go !h !k x !_ Empty = return $! leaf h k x - go h k x s (Leaf hy l@(L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go !h !_ !k x !_ Empty = return $! leaf h k x + go h s k x bs (Leaf hy lf@(L ky y)) | hy == h = if ky == k then return $! leaf h k (f x y) - else do - let l' = x `seq` (L k x) - return $! collision h l l' - | otherwise = x `seq` two s h k x hy ky y - go h k x s t@(BitmapIndexed b ary) + else return $! collision h s lf (l k x) + | otherwise = x `seq` two bs h k x hy ky y + go h s k x bs t@(BitmapIndexed b ary) | b .&. m == 0 = do ary' <- A.insertM ary i $! leaf h k x return $! bitmapIndexedOrFull (b .|. m) ary' | otherwise = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k x s t@(Full ary) = do + go h s k x bs t@(Full ary) = do st <- A.indexM ary i - st' <- go h k x (s+bitsPerSubkey) st + st' <- go h s k x (bs+bitsPerSubkey) st A.unsafeUpdateM ary i st' return t - where i = index h s - go h k x s t@(Collision hy v) - | h == hy = return $! Collision h (updateOrSnocWith f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + where i = index h bs + go h s k x bs t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hm) + | h == hx = + let go' + | k == k1 = return $! Collision hx (l k (f x v1)) l2 hm + | k == k2 = return $! Collision hx l1 (l k (f x v2)) hm + | otherwise = Collision hx l1 l2 <$> go (hashWithSalt (nextSalt s) k) (nextSalt s) k x 0 hm + in go' + | otherwise = go h s k x bs $ BitmapIndexed (mask hx bs) (A.singleton t) {-# INLINABLE unsafeInsertWith #-} -- | /O(log n)/ Adjust the value tied to a given key in this map only -- if it is present. Otherwise, leave the map alone. adjust :: (Eq k, Hashable k) => (v -> v) -> k -> HashMap k v -> HashMap k v -adjust f k0 m0 = go h0 k0 0 m0 +adjust f k0 m0 = go h0 s0 k0 0 m0 where - h0 = hash k0 - go !_ !_ !_ Empty = Empty - go h k _ t@(Leaf hy (L ky y)) + h0 = hashWithSalt s0 k0 + s0 = defaultSalt + go !_ !_ !_ !_ Empty = Empty + go h _ k _ t@(Leaf hy (L ky y)) | hy == h && ky == k = leaf h k (f y) | otherwise = t - go h k s t@(BitmapIndexed b ary) + go h s k bs t@(BitmapIndexed b ary) | b .&. m == 0 = t | otherwise = let st = A.index ary i - st' = go h k (s+bitsPerSubkey) st + st' = go h s k (bs+bitsPerSubkey) st ary' = A.update ary i $! st' in BitmapIndexed b ary' - where m = mask h s + where m = mask h bs i = sparseIndex b m - go h k s (Full ary) = - let i = index h s + go h s k bs (Full ary) = + let i = index h bs st = A.index ary i - st' = go h k (s+bitsPerSubkey) st + st' = go h s k (bs+bitsPerSubkey) st ary' = update16 ary i $! st' in Full ary' - go h k _ t@(Collision hy v) - | h == hy = Collision h (updateWith f k v) + go h s k _ t@(Collision hx l1@(L k1 v1) l2@(L k2 v2) hm) + | h == hx = + let go' + | k == k1 = Collision hx (L k $! f v1) l2 hm + | k == k2 = Collision hx l1 (L k $! f v2) hm + | otherwise = Collision hx l1 l2 $ go (hashWithSalt (nextSalt s) k) (nextSalt s) k 0 hm + in go' | otherwise = t {-# INLINABLE adjust #-} @@ -277,12 +293,13 @@ alterF :: (Functor f, Eq k, Hashable k) -- version; otherwise someone could tell the difference using a lazy -- @f@ and a functor that is similar to Const but not actually Const. alterF f = \ !k !m -> - let !h = hash k - mv = lookup' h k m + let !h = hashWithSalt s k + s = defaultSalt + mv = lookup' h s k m in (<$> f mv) $ \fres -> case fres of - Nothing -> delete' h k m - Just !v' -> insert' h k v' m + Nothing -> delete' h s k m + Just !v' -> insert' h s k v' m -- We rewrite this function unconditionally in RULES, but we expose -- an unfolding just in case it's used in a context where the rules @@ -361,28 +378,29 @@ alterFEager f !k !m = (<$> f mv) $ \fres -> Absent -> m -- Key did exist, no collision - Present _ collPos -> deleteKeyExists collPos h k m + Present _ -> deleteKeyExists h s k m ------------------------------ -- Update value Just v' -> case lookupRes of -- Key did not exist before, insert v' under a new key - Absent -> insertNewKey h k v' m + Absent -> insertNewKey h s k v' m -- Key existed before, no hash collision - Present v collPos -> v' `seq` + Present v -> v' `seq` if v `ptrEq` v' -- If the value is identical, no-op then m -- If the value changed, update the value. - else insertKeyExists collPos h k v' m + else insertKeyExists h s k v' m - where !h = hash k - !lookupRes = lookupRecordCollision h k m + where !h = hashWithSalt s k + !s = defaultSalt + !lookupRes = lookupWithRes h s k m !mv = case lookupRes of Absent -> Nothing - Present v _ -> Just v + Present v -> Just v {-# INLINABLE alterFEager #-} #endif @@ -400,86 +418,96 @@ unionWith f = unionWithKey (const f) -- the provided function (first argument) will be used to compute the result. unionWithKey :: (Eq k, Hashable k) => (k -> v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v -unionWithKey f = go 0 +unionWithKey f = go 0 s0 where + s0 = defaultSalt -- empty vs. anything - go !_ t1 Empty = t1 - go _ Empty t2 = t2 + go !_ !_ t1 Empty = t1 + go _ _ Empty t2 = t2 -- leaf vs. leaf - go s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) + go bs s t1@(Leaf h1 l1@(L k1 v1)) t2@(Leaf h2 l2@(L k2 v2)) | h1 == h2 = if k1 == k2 then leaf h1 k1 (f k1 v1 v2) - else collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) - | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 - go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) - | h1 == h2 = Collision h1 (updateOrConcatWithKey f ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 - -- branch vs. branch - go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = + else collision h1 s l1 l2 + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Leaf h1 l1@(L k1 v1)) t2@(Collision h2 l21@(L k21 v21) l22@(L k22 v22) hm2) + | h1 == h2 = + let go' + | k1 == k21 = Collision h1 (L k1 (f k1 v1 v21)) l22 hm2 + | k1 == k22 = Collision h1 l21 (L k1 (f k1 v1 v22)) hm2 + | otherwise = Collision h1 l21 l22 $ go 0 (nextSalt s) (Leaf (hashWithSalt (nextSalt s) k1) l1) hm2 + in go' + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Collision h1 l11@(L k11 v11) l12@(L k12 v12) hm1) t2@(Leaf h2 l2@(L k2 v2)) + | h1 == h2 = + let go' + | k2 == k11 = Collision h1 (L k2 (f k2 v11 v2)) l12 hm1 + | k2 == k12 = Collision h1 l11 (L k2 (f k2 v12 v2)) hm1 + | otherwise = Collision h1 l11 l12 $ go 0 (nextSalt s) hm1 (Leaf (hashWithSalt (nextSalt s) k2) l2) + in go' + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s t1@(Collision h1 _ _ _) t2@(Collision h2 _ _ _) + | h1 == h2 = L.foldl' (\h (k,v) -> go 0 s h (Leaf (hashWithSalt s k) (L k v))) t1 $ toList t2 -- Nothing better we can do, unfortunately. + | otherwise = goDifferentHash bs s h1 h2 t1 t2 + go bs s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = let b' = b1 .|. b2 - ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 b2 ary1 ary2 + ary' = unionArrayBy (go (bs+bitsPerSubkey) s) b1 b2 ary1 ary2 in bitmapIndexedOrFull b' ary' - go s (BitmapIndexed b1 ary1) (Full ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) b1 fullNodeMask ary1 ary2 + go bs s (BitmapIndexed b1 ary1) (Full ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) b1 fullNodeMask ary1 ary2 in Full ary' - go s (Full ary1) (BitmapIndexed b2 ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask b2 ary1 ary2 + go bs s (Full ary1) (BitmapIndexed b2 ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) fullNodeMask b2 ary1 ary2 in Full ary' - go s (Full ary1) (Full ary2) = - let ary' = unionArrayBy (go (s+bitsPerSubkey)) fullNodeMask fullNodeMask + go bs s (Full ary1) (Full ary2) = + let ary' = unionArrayBy (go (bs+bitsPerSubkey) s) fullNodeMask fullNodeMask ary1 ary2 in Full ary' -- leaf vs. branch - go s (BitmapIndexed b1 ary1) t2 + go bs s (BitmapIndexed b1 ary1) t2 | b1 .&. m2 == 0 = let ary' = A.insert ary1 i t2 b' = b1 .|. m2 in bitmapIndexedOrFull b' ary' | otherwise = let ary' = A.updateWith' ary1 i $ \st1 -> - go (s+bitsPerSubkey) st1 t2 + go (bs+bitsPerSubkey) s st1 t2 in BitmapIndexed b1 ary' where h2 = leafHashCode t2 - m2 = mask h2 s + m2 = mask h2 bs i = sparseIndex b1 m2 - go s t1 (BitmapIndexed b2 ary2) + go bs s t1 (BitmapIndexed b2 ary2) | b2 .&. m1 == 0 = let ary' = A.insert ary2 i $! t1 b' = b2 .|. m1 in bitmapIndexedOrFull b' ary' | otherwise = let ary' = A.updateWith' ary2 i $ \st2 -> - go (s+bitsPerSubkey) t1 st2 + go (bs+bitsPerSubkey) s t1 st2 in BitmapIndexed b2 ary' where h1 = leafHashCode t1 - m1 = mask h1 s + m1 = mask h1 bs i = sparseIndex b2 m1 - go s (Full ary1) t2 = + go bs s (Full ary1) t2 = let h2 = leafHashCode t2 - i = index h2 s - ary' = update16With' ary1 i $ \st1 -> go (s+bitsPerSubkey) st1 t2 + i = index h2 bs + ary' = update16With' ary1 i $ \st1 -> go (bs+bitsPerSubkey) s st1 t2 in Full ary' - go s t1 (Full ary2) = + go bs s t1 (Full ary2) = let h1 = leafHashCode t1 - i = index h1 s - ary' = update16With' ary2 i $ \st2 -> go (s+bitsPerSubkey) t1 st2 + i = index h1 bs + ary' = update16With' ary2 i $ \st2 -> go (bs+bitsPerSubkey) s t1 st2 in Full ary' leafHashCode (Leaf h _) = h - leafHashCode (Collision h _) = h + leafHashCode (Collision h _ _ _) = h leafHashCode _ = error "leafHashCode" - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (s+bitsPerSubkey) t1 t2) + goDifferentHash bs s h1 h2 t1 t2 + | m1 == m2 = BitmapIndexed m1 (A.singleton $! go (bs+bitsPerSubkey) s t1 t2) | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) where - m1 = mask h1 s - m2 = mask h2 s + m1 = mask h1 bs + m2 = mask h2 bs {-# INLINE unionWithKey #-} ------------------------------------------------------------------------ @@ -493,8 +521,8 @@ mapWithKey f = go go (Leaf h (L k v)) = leaf h k (f k v) go (BitmapIndexed b ary) = BitmapIndexed b $ A.map' go ary go (Full ary) = Full $ A.map' go ary - go (Collision h ary) = - Collision h $ A.map' (\ (L k v) -> let !v' = f k v in L k v') ary + go (Collision h (L k1 v1) (L k2 v2) hm) = + Collision h (v1 `seq` (L k1 (f k1 v1))) (v2 `seq` (L k2 (f k2 v2))) (go hm) {-# INLINE mapWithKey #-} -- | /O(n)/ Transform this map by applying a function to every value. @@ -509,12 +537,9 @@ map f = mapWithKey (const f) -- | /O(n)/ Transform this map by applying a function to every value -- and retaining only some of them. mapMaybeWithKey :: (k -> v1 -> Maybe v2) -> HashMap k v1 -> HashMap k v2 -mapMaybeWithKey f = filterMapAux onLeaf onColl +mapMaybeWithKey f = filterMapAux onLeaf where onLeaf (Leaf h (L k v)) | Just v' <- f k v = Just (leaf h k v') onLeaf _ = Nothing - - onColl (L k v) | Just v' <- f k v = Just (L k v') - | otherwise = Nothing {-# INLINE mapMaybeWithKey #-} -- | /O(n)/ Transform this map by applying a function to every value @@ -545,8 +570,8 @@ traverseWithKey f = go go (Leaf h (L k v)) = leaf h k <$> f k v go (BitmapIndexed b ary) = BitmapIndexed b <$> A.traverse' go ary go (Full ary) = Full <$> A.traverse' go ary - go (Collision h ary) = - Collision h <$> A.traverse' (\ (L k v) -> (L k $!) <$> f k v) ary + go (Collision h (L k1 v1) (L k2 v2) hm) = + Collision h <$> (L k1 <$> f k1 v1) <*> (L k2 <$> f k2 v2) <*> go hm {-# INLINE traverseWithKey #-} ------------------------------------------------------------------------ @@ -615,51 +640,6 @@ fromListWith :: (Eq k, Hashable k) => (v -> v -> v) -> [(k, v)] -> HashMap k v fromListWith f = L.foldl' (\ m (k, v) -> unsafeInsertWith f k v m) empty {-# INLINE fromListWith #-} ------------------------------------------------------------------------- --- Array operations - -updateWith :: Eq k => (v -> v) -> k -> A.Array (Leaf k v) -> A.Array (Leaf k v) -updateWith f k0 ary0 = go k0 ary0 0 (A.length ary0) - where - go !k !ary !i !n - | i >= n = ary - | otherwise = case A.index ary i of - (L kx y) | k == kx -> let !v' = f y in A.update ary i (L k v') - | otherwise -> go k ary (i+1) n -{-# INLINABLE updateWith #-} - --- | Append the given key and value to the array. If the key is --- already present, instead update the value of the key by applying --- the given function to the new and old value (in that order). The --- value is always evaluated to WHNF before being inserted into the --- array. -updateOrSnocWith :: Eq k => (v -> v -> v) -> k -> v -> A.Array (Leaf k v) - -> A.Array (Leaf k v) -updateOrSnocWith f = updateOrSnocWithKey (const f) -{-# INLINABLE updateOrSnocWith #-} - --- | Append the given key and value to the array. If the key is --- already present, instead update the value of the key by applying --- the given function to the new and old value (in that order). The --- value is always evaluated to WHNF before being inserted into the --- array. -updateOrSnocWithKey :: Eq k => (k -> v -> v -> v) -> k -> v -> A.Array (Leaf k v) - -> A.Array (Leaf k v) -updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) - where - go !k v !ary !i !n - | i >= n = A.run $ do - -- Not found, append to the end. - mary <- A.new_ (n + 1) - A.copy ary 0 mary 0 n - let !l = v `seq` (L k v) - A.write mary n l - return mary - | otherwise = case A.index ary i of - (L kx y) | k == kx -> let !v' = f k v y in A.update ary i (L k v') - | otherwise -> go k v ary (i+1) n -{-# INLINABLE updateOrSnocWithKey #-} - ------------------------------------------------------------------------ -- Smart constructors -- @@ -667,5 +647,8 @@ updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) -- inserted into the constructor. leaf :: Hash -> k -> v -> HashMap k v -leaf h k = \ !v -> Leaf h (L k v) +leaf h k = \ !v -> Leaf h (l k v) + +l :: k -> v -> Leaf k v +l k = \ !v -> L k v {-# INLINE leaf #-} diff --git a/Data/HashSet/Base.hs b/Data/HashSet/Base.hs index 09294203..bbc37744 100644 --- a/Data/HashSet/Base.hs +++ b/Data/HashSet/Base.hs @@ -128,7 +128,7 @@ instance Eq1 HashSet where liftEq eq (HashSet a) (HashSet b) = equalKeys1 eq a b #endif -instance (Ord a) => Ord (HashSet a) where +instance (Ord a, Hashable a) => Ord (HashSet a) where compare (HashSet a) (HashSet b) = compare a b {-# INLINE compare #-} diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 75f688c1..3d04a148 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -21,7 +21,7 @@ motivate its existance. ## A note on hash functions -While the [hashable](http://hackage.haskell.org/package/containers) package is a +While the [hashable](http://hackage.haskell.org/package/hashable) package is a separate package, it was co-designed with this package. Its main role is to support this package and not to provide good general purpose hash functions (e.g. to use when fingerprinting a text file). @@ -84,15 +84,13 @@ data HashMap k v | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) | Leaf !Hash !(Leaf k v) | Full !(A.Array (HashMap k v)) - | Collision !Hash !(A.Array (Leaf k v)) + | Collision !Hash !(Leaf k v) !(Leaf k v) !(HashMap k v) ``` Here's a quick overview in order of simplicty: * `Empty` -- The empty map. * `Leaf` -- A key-value pair. - * `Collision` -- An array of key-value pairs where the keys have identical hash - values. Element order doesn't matter. * `Full` -- An array of *2^B* children. Given a key you can find the child it is part of by taking *B* bits of the hash value for the key and indexing into the key. Which bits to use depends on the tree level. @@ -101,6 +99,8 @@ Here's a quick overview in order of simplicty: used to convert from the index taken from the hash value, just like above, to the actual index in the array. This node gets upgraded to a `Full` node when it contains *2^B* elements. + * `Collision` -- Twe key-value pairs that collide on the given hash, and a + recursive hashmap where keys are hashed with a different salt. The number of bits of the hash value to use at each level of the tree, *B*, is a compiled time constant (i.e. 4). In general a larger *B* improves lookup @@ -111,6 +111,19 @@ updating the spine of the tree). indexing, faster copying on modification (given that its size is statically know), and lower memory use. +`Collision` is a special case. The previous version used `Collision !Hash +!(A.Array (HashMap k v))` instead of `Collision !Hash !(Leaf k v) !(Leaf k v) +!(HashMap k v)`. As a result, hash-collisions would cause `O(number of +collisions)` insertion time. Because the currently used hash function is not +collision-resistant, it is cheap to make enough collisions to have this cause a +denial of service exploit. However, we can use the fact that `hashable` supports +salted hashing by default to alleviate at least some of this problem. In the +current version, insertion time is not `O(number of collisions)` but `O(number +of consequtive levels of collision)`. While this is not better in case all the +collisions also collide on all subsequent salts, it is likely that a collision +using one salt will not collide again on the next salt, so this approach is +faster in the common case of collisions. + ## Why things are fast Performance is largely dominated by memory layout and allocation. The code has diff --git a/helper.hs b/helper.hs new file mode 100644 index 00000000..79715b0e --- /dev/null +++ b/helper.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} + +module Main + ( module Main + , module Data.HashMap.Base + , ppShow + ) where + +import Data.HashMap.Base +import Data.Hashable +import Test.QuickCheck +import Text.Show.Pretty + +newtype Key = K + { unK :: Int + } deriving (Arbitrary, Eq, Ord, Read, Show) + +instance Hashable Key where + hashWithSalt salt k = Data.Hashable.hashWithSalt salt (unK k) `mod` 20 + +main = pure () diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 00000000..d082516b --- /dev/null +++ b/stack.yaml @@ -0,0 +1,3 @@ +resolver: lts-10.10 +packages: +- . diff --git a/tests/HashMapProperties.hs b/tests/HashMapProperties.hs index ded3be6d..ec96b841 100644 --- a/tests/HashMapProperties.hs +++ b/tests/HashMapProperties.hs @@ -3,7 +3,7 @@ -- | Tests for the 'Data.HashMap.Lazy' module. We test functions by -- comparing them to a simpler model, an association list. -module Main (main) where +module Main where import Control.Monad ( guard ) import qualified Data.Foldable as Foldable @@ -27,6 +27,9 @@ import Data.Functor.Identity (Identity (..)) import Control.Applicative (Const (..)) import Test.QuickCheck.Function (Fun, apply) import Test.QuickCheck.Poly (A, B) +#if MIN_VERSION_base(4,9,0) +import Data.Functor.Classes (eq2, compare2) +#endif -- Key type that generates more hash collisions. newtype Key = K { unK :: Int } @@ -44,6 +47,17 @@ instance Hashable Key where pEq :: [(Key, Int)] -> [(Key, Int)] -> Bool pEq xs = (M.fromList xs ==) `eq` (HM.fromList xs ==) +#if MIN_VERSION_base(4,9,0) +-- Caution: this is a rather weak test. Fortunately, the "Permutation" +-- properties in the HashSet test suite should catch most of what this +-- doesn't. +pEq2 :: [(Key, Int)] -> [(Key, Int)] -> Property +pEq2 xs ys = (x == y) === (x `eq2` y) + where + x = HM.fromList xs + y = HM.fromList ys +#endif + pNeq :: [(Key, Int)] -> [(Key, Int)] -> Bool pNeq xs = (M.fromList xs /=) `eq` (HM.fromList xs /=) @@ -76,6 +90,14 @@ pOrd3 xs ys = case (compare x y, compare y x) of x = HM.fromList xs y = HM.fromList ys +#if MIN_VERSION_base(4,9,0) +pOrd4 :: [(Key, Int)] -> [(Key, Int)] -> Property +pOrd4 xs ys = compare2 x y === compare x y + where + x = HM.fromList xs + y = HM.fromList ys +#endif + pOrdEq :: [(Key, Int)] -> [(Key, Int)] -> Bool pOrdEq xs ys = case (compare x y, x == y) of (EQ, True) -> True @@ -133,14 +155,43 @@ newtype AlwaysCollide = AC Int instance Hashable AlwaysCollide where hashWithSalt _ _ = 1 + +pDeleteAlwaysCollision :: AlwaysCollide -> AlwaysCollide -> AlwaysCollide -> Int -> Property +pDeleteAlwaysCollision k1 k2 k3 idx = (k1 /= k2) && (k2 /= k3) && (k1 /= k3) ==> + HM.member toKeep $ HM.delete toDelete $ + HM.fromList [(k1, 1 :: Int), (k2, 2), (k3, 3)] + where + which = idx `mod` 3 + toDelete + | which == 0 = k1 + | which == 1 = k2 + | which == 2 = k3 + | otherwise = error "Impossible" + toKeep + | which == 0 = k2 + | which == 1 = k3 + | which == 2 = k1 + | otherwise = error "Impossible" + + + +newtype MostlyCollide = MC Int + deriving (Arbitrary, Eq, Ord, Show) + +instance Hashable MostlyCollide where + hashWithSalt s (MC i) = (hashWithSalt s i) `mod` 3 -- White-box test that tests the case of deleting one of two keys from -- a map, where the keys' hash values collide. -pDeleteCollision :: AlwaysCollide -> AlwaysCollide -> AlwaysCollide -> Int +pDeleteMostlyCollision :: MostlyCollide -> MostlyCollide -> MostlyCollide -> Int -> Property -pDeleteCollision k1 k2 k3 idx = (k1 /= k2) && (k2 /= k3) && (k1 /= k3) ==> - HM.member toKeep $ HM.delete toDelete $ - HM.fromList [(k1, 1 :: Int), (k2, 2), (k3, 3)] +pDeleteMostlyCollision k1 k2 k3 idx = + (k1 /= k2) && (k2 /= k3) && (k1 /= k3) && + (hashWithSalt defaultSalt k1 == hashWithSalt defaultSalt k2) && + (hashWithSalt defaultSalt k2 == hashWithSalt defaultSalt k3) && + (hashWithSalt defaultSalt k1 == hashWithSalt defaultSalt k3) ==> + HM.member toKeep $ HM.delete toDelete $ + HM.fromList [(k1, 1 :: Int), (k2, 2), (k3, 3)] where which = idx `mod` 3 toDelete @@ -153,6 +204,7 @@ pDeleteCollision k1 k2 k3 idx = (k1 /= k2) && (k2 /= k3) && (k1 /= k3) ==> | which == 1 = k3 | which == 2 = k1 | otherwise = error "Impossible" + defaultSalt = -2578643520546668380 -- TODO what do do with this test? pInsertWith :: Key -> [(Key, Int)] -> Bool pInsertWith k = M.insertWith (+) k 1 `eq_` HM.insertWith (+) k 1 @@ -349,9 +401,15 @@ tests = testGroup "instances" [ testProperty "==" pEq , testProperty "/=" pNeq +#if MIN_VERSION_base(4,9,0) + , testProperty "eq2 = (==)" pEq2 +#endif , testProperty "compare reflexive" pOrd1 , testProperty "compare transitive" pOrd2 , testProperty "compare antisymmetric" pOrd3 +#if MIN_VERSION_base(4,9,0) + , testProperty "compare2 = compare" pOrd4 +#endif , testProperty "Ord => Eq" pOrdEq , testProperty "Read/Show" pReadShow , testProperty "Functor" pFunctor @@ -365,7 +423,8 @@ tests = , testProperty "lookup" pLookup , testProperty "insert" pInsert , testProperty "delete" pDelete - , testProperty "deleteCollision" pDeleteCollision + , testProperty "deleteAlwaysCollision" pDeleteAlwaysCollision + , testProperty "deleteMostlyCollision" pDeleteMostlyCollision , testProperty "insertWith" pInsertWith , testProperty "adjust" pAdjust , testProperty "updateAdjust" pUpdateAdjust diff --git a/tests/HashSetProperties.hs b/tests/HashSetProperties.hs index 130c1dc4..abdf8d5e 100644 --- a/tests/HashSetProperties.hs +++ b/tests/HashSetProperties.hs @@ -14,6 +14,9 @@ import Data.Ord (comparing) import Test.QuickCheck (Arbitrary, Property, (==>), (===)) import Test.Framework (Test, defaultMain, testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) +#if MIN_VERSION_base(4,9,0) +import Data.Functor.Classes (eq1, compare1) +#endif -- Key type that generates more hash collisions. newtype Key = K { unK :: Int } @@ -80,13 +83,33 @@ pFoldable :: [Int] -> Bool pFoldable = (L.sort . Foldable.foldr (:) []) `eq` (L.sort . Foldable.foldr (:) []) -pPermutationEq :: [Key] -> [Int] -> Bool -pPermutationEq xs is = S.fromList xs == S.fromList ys +pPermutationEq :: [(Key, Int)] -> Bool +pPermutationEq xs = S.fromList x == S.fromList y where - ys = shuffle is xs - shuffle idxs = L.map snd - . L.sortBy (comparing fst) - . L.zip (idxs ++ [L.maximum (0:is) + 1 ..]) + x = L.map fst xs + y = L.map fst . L.sortBy (comparing snd) $ xs + +#if MIN_VERSION_base(4,9,0) +pPermutationEq1 :: [(Key, Int)] -> Bool +pPermutationEq1 xs = S.fromList x `eq1` S.fromList y + where + x = L.map fst xs + y = L.map fst . L.sortBy (comparing snd) $ xs +#endif + +pPermutationOrd :: [(Key, Int)] -> Bool +pPermutationOrd xs = (S.fromList x `compare` S.fromList y) == EQ + where + x = L.map fst xs + y = L.map fst . L.sortBy (comparing snd) $ xs + +#if MIN_VERSION_base(4,9,0) +pPermutationOrd1 :: [(Key, Int)] -> Bool +pPermutationOrd1 xs = (S.fromList x `compare1` S.fromList y) == EQ + where + x = L.map fst xs + y = L.map fst . L.sortBy (comparing snd) $ xs +#endif pHashable :: [Key] -> [Int] -> Int -> Property pHashable xs is salt = @@ -174,6 +197,13 @@ tests = testGroup "instances" [ testProperty "==" pEq , testProperty "Permutation ==" pPermutationEq +#if MIN_VERSION_base(4,9,0) + , testProperty "Permutation eq1" pPermutationEq1 +#endif + , testProperty "Permutation compare" pPermutationOrd +#if MIN_VERSION_base(4,9,0) + , testProperty "Permutation compare1" pPermutationOrd1 +#endif , testProperty "/=" pNeq , testProperty "compare reflexive" pOrd1 , testProperty "compare transitive" pOrd2 diff --git a/tests/Validity.hs b/tests/Validity.hs new file mode 100644 index 00000000..dc3870f7 --- /dev/null +++ b/tests/Validity.hs @@ -0,0 +1,625 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Main + ( main + ) where + +import GHC.Generics (Generic) +#if __GLASGOW_HASKELL__ < 710 +import Control.Applicative (Applicative(pure), (<$>), (<*>)) +import Data.Monoid (Monoid(mappend, mempty)) +import Data.Traversable (Traversable(..)) +import Data.Word (Word) +#endif +import Data.Bits (popCount) +import Data.Functor +import Data.Hashable (Hashable(hashWithSalt)) +import Data.List (nub) +import Data.Validity +#if defined(STRICT) +import qualified Data.HashMap.Strict as HM +#else +import qualified Data.HashMap.Lazy as HM +#endif +import qualified Data.HashMap.Array as A +import Data.HashMap.Base (HashMap(..), Leaf(..), UnconsHM(..)) +import qualified Data.HashMap.Base as HMB +import qualified Data.HashSet as HS +import Data.HashSet (HashSet) + +import Data.GenValidity (GenUnchecked(..), GenValid(..), genSplit, genSplit4) +import Data.Validity (Validity) +import Test.Framework (Test, defaultMain, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.QuickCheck + ( Arbitrary + , CoArbitrary + , Gen + , Property + , oneof + , resize + , sized + ) +import Test.QuickCheck.Function + ( (:->) + , Fun(..) + , Function(..) + , apply + , functionMap + ) +import Test.Validity.Functions + ( producesValidsOnValids + , producesValidsOnValids2 + , producesValidsOnValids3 + ) +import Test.Validity.GenValidity.Property (genGeneratesValid) +#if !MIN_VERSION_QuickCheck(2,10,0) +-- | Extracts the value of a binary function. +-- +-- 'Fn2' is the pattern equivalent of this function. +-- +-- > prop_zipWith :: Fun (Int, Bool) Char -> [Int] -> [Bool] -> Bool +-- > prop_zipWith f xs ys = zipWith (applyFun2 f) xs ys == [ applyFun2 f x y | (x, y) <- zip xs ys] +-- +applyFun2 :: Fun (a, b) c -> (a -> b -> c) +applyFun2 (Fun _ f) a b = f (a, b) + +-- | Extracts the value of a ternary function. 'Fn3' is the +-- pattern equivalent of this function. +applyFun3 :: Fun (a, b, c) d -> (a -> b -> c -> d) +applyFun3 (Fun _ f) a b c = f (a, b, c) + +-- | Provides a 'Function' instance for types with 'Integral'. +functionIntegral :: Integral a => (a -> b) -> (a :-> b) +functionIntegral = functionMap fromIntegral fromInteger + +instance Function Word where + function = functionIntegral +#else +import Test.QuickCheck.Function (applyFun2, applyFun3, functionIntegral) +#endif +instance (Validity k, Validity v) => Validity (Leaf k v) where + validate (L k v) = mconcat [annotate k "key", annotate v "value"] + +instance Validity a => Validity (A.Array a) where + validate a = annotate (A.toList a) "The array elements" + +instance (Eq k, Hashable k, Validity k, Validity v) => + Validity (UnconsHM k v) where + validate UnconsEmptyHM = decorate "UnconsEmptyHM" mempty + validate (UnconsedHM l hm) = + decorate "UnconsedHM" $ + mconcat [decorate "Leaf" $ validate l, decorate "Map" $ validate hm] + +instance (Eq k, Hashable k, Validity k, Validity v) => + Validity (HashMap k v) where + validate hm = + mconcat + [ go HMB.defaultSalt 0 hm + , check + (HM.fromList (HM.toList (hm $> ())) == (hm $> ())) + "fromList and toList are inverses for this hashmap." + ] + where + go s bs hm_ = + case hm_ of + Empty -> mempty + (Leaf h l@(L k _)) -> + decorate "Leaf" $ + mconcat + [ annotate h "Hash" + , annotate l "Leaf" + , check + (HMB.hashWithSalt s k == h) + "The hash is correct." + ] + (BitmapIndexed bm a) -> + decorate "BitmapIndexed" $ + mconcat + [ annotate bm "Bitmap" + , decorate "Array" $ + decorateList (zip [0 ..] $ A.toList a) $ \(ix, hm__) -> + mconcat + [ go s (bs + HMB.bitsPerSubkey) hm__ + , check + (not $ HM.null hm__) + "The sub HashMap is not empty" + , decorateList (HM.keys hm__) $ \k -> + flip + check + "The hash, when masked with the current bitmask, of the key, produces the index in the array where the hashmap is found" $ + let h = HMB.hashWithSalt s k + in HMB.sparseIndex bm (HMB.mask h bs) == + ix + ] + , check + (A.length a == popCount bm) + "There are values in the array equal to popCount bm." + , check + (uniques $ concatMap HM.keys $ A.toList a) + "The keys are unique." + , check + (A.length a >= 1) + "The array has at least one element." + , check + (A.length a > 1 || + not (HMB.isLeafOrCollision (A.index a 0))) $ + "If the array has only one element, then that element is" ++ + "not a leaf or collision." + ] + (Full a) -> + decorate "Full" $ + mconcat + [ decorate "Array" $ + decorateList (zip [0 ..] $ A.toList a) $ \(ix, hm__) -> + mconcat + [ go s (bs + HMB.bitsPerSubkey) hm__ + , check + (not $ HM.null hm__) + "The sub HashMap is not empty" + , decorateList (HM.keys hm__) $ \k -> + flip + check + "The hash, when masked with the current bitmask, of the key, produces the index in the array where the hashmap is found" $ + let h = HMB.hashWithSalt s k + in HMB.index h bs == ix + ] + , check + (A.length a == 2 ^ HMB.bitsPerSubkey) + "There are 2 ^ bitsPerSubkey values in the array." + , check + (uniques $ concatMap HM.keys $ A.toList a) + "The keys are unique." + ] + (Collision h l1@(L k1 _) l2@(L k2 _) hm') -> + decorate "Collision" $ + mconcat + [ annotate h "Hash" + , annotate l1 "The first collision" + , annotate l2 "The second collision" + , check + (HMB.hashWithSalt s k1 == h) + "The hash of the first collision is correct." + , check + (HMB.hashWithSalt s k2 == h) + "The hash of the second collision is correct." + , check + (k1 /= k2) + "The keys within the collision are not equal." + , check + (uniques (k1 : k2 : HM.keys hm')) + "The keys are unique." + , decorate "The recursive HashMap" $ + go (HMB.nextSalt s) 0 hm' + , check + (all (\k -> HMB.hashWithSalt s k == h) + (HM.keys hm')) + "All recursive keys hash to the hash within the collision, using the salt at this level." + ] + +instance (Eq k, Hashable k, Validity k) => Validity (HashSet k) where + validate hs = decorate "asMap" $ validate $ HS.toMap hs + +uniques :: Eq a => [a] -> Bool +uniques l = length (nub l) == length l +#if !MIN_VERSION_validity(0,6,0) +-- | Decorate a validation with a location +decorate :: String -> Validation -> Validation +decorate = flip annotateValidation + +annotateValidation :: Validation -> String -> Validation +annotateValidation val s = + case val of + Validation errs -> Validation $ map (Location s) errs +#endif + +#if !MIN_VERSION_validity(0,8,0) +-- | Decorate a piecewise validation of a list with their location in the list +decorateList :: [a] -> (a -> Validation) -> Validation +decorateList as func = + mconcat $ + flip map (zip [0 ..] as) $ \(i, a) -> + decorate + (unwords + ["The element at index", show (i :: Integer), "in the list"]) $ + func a +#endif +instance (GenUnchecked k, GenUnchecked v) => GenUnchecked (Leaf k v) where + genUnchecked = + sized $ \n -> do + (a, b) <- genSplit n + k <- resize a genUnchecked + v <- resize b genUnchecked + pure $ L k v + shrinkUnchecked (L k v) = [L k' v' | (k', v') <- shrinkUnchecked (k, v)] + +instance (GenValid k, GenValid v) => GenValid (Leaf k v) where + genValid = + sized $ \n -> do + (a, b) <- genSplit n + k <- resize a genValid + v <- resize b genValid + pure $ L k v + +instance GenUnchecked a => GenUnchecked (A.Array a) where + genUnchecked = do + l <- genUnchecked + pure $ A.fromList (length l) l + shrinkUnchecked = + fmap (\l -> A.fromList (length l) l) . shrinkUnchecked . A.toList + +instance GenValid a => GenValid (A.Array a) where + genValid = do + l <- genValid + pure $ A.fromList (length l) l + +instance (GenUnchecked k, GenUnchecked v) => GenUnchecked (HashMap k v) where + genUnchecked = + sized $ \n -> + case n of + 0 -> pure Empty + _ -> + oneof + [ do (a, b) <- genSplit n + BitmapIndexed <$> resize a genUnchecked <*> + resize b genUnchecked + , do (a, b) <- genSplit n + Leaf <$> resize a genUnchecked <*> + resize b genUnchecked + , Full <$> genUnchecked + , do (a, b, c, d) <- genSplit4 n + Collision <$> resize a genUnchecked <*> + resize b genUnchecked <*> + resize c genUnchecked <*> + resize d genUnchecked + ] + shrinkUnchecked hm = + case hm of + Empty -> [] + Leaf h l -> [Leaf h' l' | (h', l') <- shrinkUnchecked (h, l)] + BitmapIndexed bm a -> + [BitmapIndexed bm' a' | (bm', a') <- shrinkUnchecked (bm, a)] + Full a -> Full <$> shrinkUnchecked a + Collision h l1 l2 hm_ -> + [ Collision h' l1' l2' hm_' + | (h', l1', l2', hm_') <- shrinkUnchecked (h, l1, l2, hm_) + ] + +-- It turns out it's almost impossible to write this instance without using internals. +-- This is good-enough for now. +instance (Eq k, Hashable k, GenValid k, GenValid v) => + GenValid (HashMap k v) where + genValid = HM.fromList <$> genValid + shrinkValid hm = HM.fromList <$> shrinkValid (HM.toList hm) + -- TODO improve the shrinking, maybe. + +-- Key type that generates more hash collisions. +newtype Key = K + { unK :: Int + } deriving ( Arbitrary + , CoArbitrary + , Validity + , GenUnchecked + , GenValid + , Eq + , Ord + , Read + , Show + , Integral + , Real + , Num + , Enum + , Generic + ) + +instance Hashable Key where + hashWithSalt salt k = hashWithSalt salt (unK k) `mod` 20 + +instance Function Key where + function = functionIntegral + +pSingleton :: Property +pSingleton = + producesValidsOnValids2 (HM.singleton :: Key -> Int -> HM.HashMap Key Int) + +pNull :: Property +pNull = producesValidsOnValids (HM.null :: HM.HashMap Key Int -> Bool) + +pSize :: Property +pSize = producesValidsOnValids (HM.size :: HM.HashMap Key Int -> Int) + +-- These four tests will ensure that `equal1`, etc do not segfault and succesfully +-- evaluate to a `Bool` (or `Ordering`, respectively) +pEqual1 :: Fun (Int, Int) Bool -> Property +pEqual1 f = + producesValidsOnValids2 + (HMB.equal1 (applyFun2 f) :: HM.HashMap Key Int -> HM.HashMap Key Int -> Bool) + +pEqual2 :: Fun (Key, Key) Bool -> Fun (Int, Int) Bool -> Property +pEqual2 f g = + producesValidsOnValids2 + (HMB.equal2 (applyFun2 f) (applyFun2 g) :: HM.HashMap Key Int -> HM.HashMap Key Int -> Bool) + +pCmp1 :: Fun (Int, Int) Ordering -> Property +pCmp1 f = + producesValidsOnValids2 + (HMB.cmp1 (applyFun2 f) :: HM.HashMap Key Int -> HM.HashMap Key Int -> Ordering) + +pCmp2 :: Fun (Key, Key) Ordering -> Fun (Int, Int) Ordering -> Property +pCmp2 f g = + producesValidsOnValids2 + (HMB.cmp2 (applyFun2 f) (applyFun2 g) :: HM.HashMap Key Int -> HM.HashMap Key Int -> Ordering) + +pMember :: Property +pMember = + producesValidsOnValids2 (HM.member :: Key -> HM.HashMap Key Int -> Bool) + +pLookup :: Property +pLookup = + producesValidsOnValids2 + (HM.lookup :: Key -> HM.HashMap Key Int -> Maybe Int) + +pLookupDefault :: Property +pLookupDefault = + producesValidsOnValids3 + (HM.lookupDefault :: Int -> Key -> HM.HashMap Key Int -> Int) + +pInsert :: Property +pInsert = + producesValidsOnValids3 + (HM.insert :: Key -> Int -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pInsertWith :: Property +pInsertWith = + producesValidsOnValids3 + (HM.insertWith (+) :: Key -> Int -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pDelete :: Property +pDelete = + producesValidsOnValids2 + (HM.delete :: Key -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pAdjust :: Fun Int Int -> Property +pAdjust f = + producesValidsOnValids2 + (HM.adjust (apply f) :: Key -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pUpdate :: (Fun Int (Maybe Int)) -> Property +pUpdate f = + producesValidsOnValids2 + (HM.update (apply f) :: Key -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pAlter :: Property +pAlter = + producesValidsOnValids2 + (HM.alter (fmap succ) :: Key -> HM.HashMap Key Int -> HM.HashMap Key Int) + +pAlterF :: Fun (Maybe Int) [Maybe Int] -> Property +pAlterF f = + producesValidsOnValids2 + (HM.alterF (apply f) :: Key -> HM.HashMap Key Int -> [HM.HashMap Key Int]) + +pUnion :: Property +pUnion = + producesValidsOnValids2 + (HM.union :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pUnionWith :: Fun (Int, Int) Int -> Property +pUnionWith f = + producesValidsOnValids2 + (HM.unionWith (applyFun2 f) :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pUnionWithKey :: Fun (Key, Int, Int) Int -> Property +pUnionWithKey f = + producesValidsOnValids2 + (HM.unionWithKey (applyFun3 f) :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pUnions :: Property +pUnions = + producesValidsOnValids (HM.unions :: [HashMap Key Int] -> HashMap Key Int) + +pMap :: Fun Int Int -> Property +pMap f = + producesValidsOnValids + (HM.map (apply f) :: HashMap Key Int -> HashMap Key Int) + +pMapWithKey :: Fun (Key, Int) Int -> Property +pMapWithKey f = + producesValidsOnValids + (HM.mapWithKey (applyFun2 f) :: HashMap Key Int -> HashMap Key Int) + +pTraverseWithKey :: Fun (Key, Int) (Maybe Int) -> Property +pTraverseWithKey f = + producesValidsOnValids + (HM.traverseWithKey (applyFun2 f) :: HashMap Key Int -> Maybe (HashMap Key Int)) + +pDifference :: Property +pDifference = + producesValidsOnValids2 + (HM.difference :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pDifferenceWith :: Fun (Int, Int) (Maybe Int) -> Property +pDifferenceWith f = + producesValidsOnValids2 + (HM.differenceWith (applyFun2 f) :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pIntersection :: Property +pIntersection = + producesValidsOnValids2 + (HM.intersection :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pIntersectionWith :: Fun (Int, Int) Int -> Property +pIntersectionWith f = + producesValidsOnValids2 + (HM.intersectionWith (applyFun2 f) :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pIntersectionWithKey :: Fun (Key, Int, Int) Int -> Property +pIntersectionWithKey f = + producesValidsOnValids2 + (HM.intersectionWithKey (applyFun3 f) :: HashMap Key Int -> HashMap Key Int -> HashMap Key Int) + +pFoldl' :: Fun (Word, Int) Word -> Property +pFoldl' f = + producesValidsOnValids2 + (HM.foldl' (applyFun2 f) :: Word -> HashMap Key Int -> Word) + +pFoldlWithKey' :: Fun (Word, Key, Int) Word -> Property +pFoldlWithKey' f = + producesValidsOnValids2 + (HM.foldlWithKey' (applyFun3 f) :: Word -> HashMap Key Int -> Word) + +pFoldr :: Fun (Int, Word) Word -> Property +pFoldr f = + producesValidsOnValids2 + (HM.foldr (applyFun2 f) :: Word -> HashMap Key Int -> Word) + +pFoldrWithKey :: Fun (Key, Int, Word) Word -> Property +pFoldrWithKey f = + producesValidsOnValids2 + (HM.foldrWithKey (applyFun3 f) :: Word -> HashMap Key Int -> Word) + +pFilter :: Fun Int Bool -> Property +pFilter f = + producesValidsOnValids + (HM.filter (apply f) :: HashMap Key Int -> HashMap Key Int) + +pFilterWithKey :: Fun (Key, Int) Bool -> Property +pFilterWithKey f = + producesValidsOnValids + (HM.filterWithKey (applyFun2 f) :: HashMap Key Int -> HashMap Key Int) + +pMapMaybe :: Fun Int (Maybe Int) -> Property +pMapMaybe f = + producesValidsOnValids + (HM.mapMaybe (apply f) :: HashMap Key Int -> HashMap Key Int) + +pMapMaybeWithKey :: Fun (Key, Int) (Maybe Int) -> Property +pMapMaybeWithKey f = + producesValidsOnValids + (HM.mapMaybeWithKey (applyFun2 f) :: HashMap Key Int -> HashMap Key Int) + +pKeys :: Property +pKeys = producesValidsOnValids (HM.keys :: HashMap Key Int -> [Key]) + +pElems :: Property +pElems = producesValidsOnValids (HM.elems :: HashMap Key Int -> [Int]) + +pToList :: Property +pToList = producesValidsOnValids (HM.toList :: HashMap Key Int -> [(Key, Int)]) + +pFromList :: Property +pFromList = + producesValidsOnValids (HM.fromList :: [(Key, Int)] -> HashMap Key Int) + +pFromListWith :: Fun (Int, Int) Int -> Property +pFromListWith f = + producesValidsOnValids + (HM.fromListWith (applyFun2 f) :: [(Key, Int)] -> HashMap Key Int) + +pKeysSet :: Property +pKeysSet = producesValidsOnValids (HM.keysSet :: HashMap Key Int -> HashSet Key) + +pUnconsHM :: Property +pUnconsHM = + producesValidsOnValids (HMB.unconsHM :: HashMap Key Int -> UnconsHM Key Int) + +-- There isn't currently a pUnconsA because I (David Feuer) don't know +-- the right way to tell genvalidity-hspec that we only want cases +-- where the array is non-empty and all of its elements are non-empty. +------------------------------------------------------------------------ +-- * Test list +tests :: [Test] +tests = + let genValidHelper gen = genGeneratesValid gen (const []) + -- Basic interface + in [ testGroup + "HashMap" + [ testProperty "genValid generates valid values for Leaf" $ + genValidHelper (genValid :: Gen (Leaf Key Int)) + , testProperty "genValid generates valid values for Array" $ + genValidHelper (genValid :: Gen (A.Array Key)) + , testProperty "genValid generates valid values for HashMap" $ + genValidHelper (genValid :: Gen (HashMap Key Int)) + ] + , testGroup + "HashMap" + [ testProperty "singleton produces valid HashMaps" pSingleton + , testProperty "null produces valid Bools" pNull + , testProperty "size produces valid HashMaps" pSize + , testProperty "equal1 produce valid Bools" pEqual1 + , testProperty "equal2 produce valid Bools" pEqual2 + , testProperty "cmp1 produce valid Orderings" pCmp1 + , testProperty "cmp2 produce valid Orderings" pCmp2 + , testProperty "member produces valid HashMaps" pMember + , testProperty "lookup produces valid HashMaps" pLookup + , testProperty + "lookupDefault produces valid HashMaps" + pLookupDefault + , testProperty "insert produces valid HashMaps" pInsert + , testProperty "insertWith produces valid HashMaps" pInsertWith + , testProperty "delete produces valid HashMaps" pDelete + , testProperty "adjust produces valid HashMaps" pAdjust + , testProperty "update produces valid HashMaps" pUpdate + , testProperty "alter produces valid HashMaps" pAlter + , testProperty "alterF produces valid HashMaps" pAlterF + , testProperty "union produces valid HashMaps" pUnion + , testProperty "unionWith produces valid HashMaps" pUnionWith + , testProperty + "unionWithKey produces valid HashMaps" + pUnionWithKey + , testProperty "unions produces valid HashMaps" pUnions + , testProperty "map produces valid HashMaps" pMap + , testProperty "mapWithKey produces valid HashMaps" pMapWithKey + , testProperty + "traverseWithKey produces valid HashMaps" + pTraverseWithKey + , testProperty "difference produces valid HashMaps" pDifference + , testProperty + "differenceWith produces valid HashMaps" + pDifferenceWith + , testProperty + "intersection produces valid HashMaps" + pIntersection + , testProperty + "intersectionWith produces valid HashMaps" + pIntersectionWith + , testProperty + "intersectionWithKey produces valid HashMaps" + pIntersectionWithKey + , testProperty "foldl' produces valid HashMaps" pFoldl' + , testProperty + "foldlWithKey' produces valid HashMaps" + pFoldlWithKey' + , testProperty "foldr produces valid HashMaps" pFoldr + , testProperty + "foldrWithKey produces valid HashMaps" + pFoldrWithKey + , testProperty "filter produces valid HashMaps" pFilter + , testProperty + "filterWithKey produces valid HashMaps" + pFilterWithKey + , testProperty "mapMaybe produces valid HashMaps" pMapMaybe + , testProperty + "mapMaybeWithKey produces valid HashMaps" + pMapMaybeWithKey + , testProperty "keys produces valid lists" pKeys + , testProperty "elems produces valid lists" pElems + , testProperty "toList produces valid lists" pToList + , testProperty "fromList produces valid HashMaps" pFromList + , testProperty + "fromListWith produces valid HashMaps" + pFromListWith + , testProperty "unconsHM produces valid HashMaps" pUnconsHM + , testProperty "keysSet produces valid HashSets" pKeysSet + ] + ] + +main :: IO () +main = defaultMain tests diff --git a/unordered-containers.cabal b/unordered-containers.cabal index 95f84581..c0b9138b 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -28,13 +28,13 @@ flag debug library exposed-modules: + Data.HashMap.Array + Data.HashMap.Base Data.HashMap.Lazy Data.HashMap.Strict + Data.HashMap.Strict.Base Data.HashSet other-modules: - Data.HashMap.Array - Data.HashMap.Base - Data.HashMap.Strict.Base Data.HashMap.List Data.HashMap.Unsafe Data.HashMap.UnsafeShift @@ -64,6 +64,46 @@ library if flag(debug) cpp-options: -DASSERTS +test-suite validity-lazy + hs-source-dirs: tests + main-is: Validity.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + validity >= 0.4.0.4, + genvalidity >= 0.4.0.4, + genvalidity-property >= 0.1.0.0, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS + +test-suite validity-strict + hs-source-dirs: tests + main-is: Validity.hs + type: exitcode-stdio-1.0 + + build-depends: + base, + hashable >= 1.0.1.1, + QuickCheck >= 2.4.0.1, + test-framework >= 0.3.3, + test-framework-quickcheck2 >= 0.2.9, + validity >= 0.4.0.4, + genvalidity >= 0.4.0.4, + genvalidity-property >= 0.1.0.0, + unordered-containers + + default-language: Haskell2010 + ghc-options: -Wall + cpp-options: -DASSERTS -DSTRICT + test-suite hashmap-lazy-properties hs-source-dirs: tests main-is: HashMapProperties.hs