Skip to content

Commit

Permalink
Build Set and Map more efficiently
Browse files Browse the repository at this point in the history
Use "Builder"s to implement some Set and Map construction functions.
As a result, some have become good consumers in terms of list fusion,
and all are now O(n) for non-decreasing input.

                     Fusible  Fusible  O(n) for     O(n) for
                     before   after    before       after
Set.fromList         No       Yes      Strict incr  Non-decr
Set.map              -        -        Strict incr  Non-decr
Map.fromList         No       Yes      Strict incr  Non-decr
Map.fromListWith     Yes      Yes      Never        Non-decr
Map.fromListWithKey  Yes      Yes      Never        Non-decr
Map.mapKeys          -        -        Strict incr  Non-decr
Map.mapKeysWith      -        -        Never        Non-decr
  • Loading branch information
meooow25 committed Sep 17, 2024
1 parent 1395671 commit b7d639b
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 150 deletions.
119 changes: 65 additions & 54 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ module Data.Map.Internal (
, Identity(..)
, Stack(..)
, foldl'Stack
, MapBuilder(..)
, emptyB
, finishB

-- Used by Map.Merge.Lazy
, mapWhenMissing
Expand All @@ -387,7 +390,6 @@ import Data.Semigroup (Semigroup((<>)))
#endif
import Control.Applicative (Const (..))
import Control.DeepSeq (NFData(rnf))
import Data.Bits (shiftL, shiftR)
import qualified Data.Foldable as Foldable
import Data.Bifoldable
import Utils.Containers.Internal.Prelude hiding
Expand Down Expand Up @@ -3242,7 +3244,7 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c"

mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a
mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeys f = finishB . foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeys #-}
#endif
Expand All @@ -3261,7 +3263,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
-- Also see the performance note on 'fromListWith'.

mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeysWith c f =
finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeysWith #-}
#endif
Expand Down Expand Up @@ -3510,46 +3513,9 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]

-- For some reason, when 'singleton' is used in fromList or in
-- create, it is not inlined, so we inline it manually.
fromList :: Ord k => [(k,a)] -> Map k a
fromList [] = Tip
fromList [(kx, x)] = Bin 1 kx x Tip Tip
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0
| otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
not_ordered _ [] = False
not_ordered kx ((ky,_) : _) = kx >= ky
{-# INLINE not_ordered #-}

fromList' t0 xs = Foldable.foldl' ins t0 xs
where ins t (k,x) = insert k x t

go !_ t [] = t
go _ t [(kx, x)] = insertMax kx x t
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
| otherwise = case create s xss of
(r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys
(r, _, ys) -> fromList' (link kx x l r) ys

-- The create is returning a triple (tree, xs, ys). Both xs and ys
-- represent not yet processed elements and only one of them can be nonempty.
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
-- and must be inserted using fromList'. Otherwise the keys have been
-- ordered so far.
create !_ [] = (Tip, [], [])
create s xs@(xp : xss)
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss)
| otherwise -> (Bin 1 kx x Tip Tip, xss, [])
| otherwise = case create (s `shiftR` 1) xs of
res@(_, [], _) -> res
(l, [(ky, y)], zs) -> (insertMax ky y l, [], zs)
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
| otherwise -> case create (s `shiftR` 1) yss of
(r, zs, ws) -> (link ky y l r, zs, ws)
#if __GLASGOW_HASKELL__
{-# INLINABLE fromList #-}
#endif
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
{-# INLINE fromList #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
--
Expand Down Expand Up @@ -3588,11 +3554,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples

fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromListWith f xs
= fromListWithKey (\_ x y -> f x y) xs
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWith #-}
#endif
fromListWith f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
{-# INLINE fromListWith #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
--
Expand All @@ -3603,13 +3567,9 @@ fromListWith f xs
-- Also see the performance note on 'fromListWith'.

fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
fromListWithKey f xs
= Foldable.foldl' ins empty xs
where
ins t (k,x) = insertWithKey f k x t
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWithKey #-}
#endif
fromListWithKey f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
{-# INLINE fromListWithKey #-} -- INLINE for fusion

-- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion.
--
Expand Down Expand Up @@ -4004,6 +3964,57 @@ splitMember k0 m = case go k0 m of

data StrictTriple a b c = StrictTriple !a !b !c

{--------------------------------------------------------------------
MapBuilder
--------------------------------------------------------------------}

-- See Note [SetBuilder] in Data.Set.Internal

data MapBuilder k a
= BAsc !(Stack k a)
| BMap !(Map k a)

-- Empty builder.
emptyB :: MapBuilder k a
emptyB = BAsc Nada

-- Insert a key and value. Replaces the old value if one already exists for
-- the key.
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
insertB !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insert ky y (ascLinkAll stk))
EQ -> BAsc (Push ky y l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insert ky y m)
{-# INLINE insertB #-}

-- Insert a key and value. The new value is combined with the old value if one
-- already exists for the key.
insertWithB
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
insertWithB f !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insertWith f ky y (ascLinkAll stk))
EQ -> BAsc (Push ky (f y x) l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insertWith f ky y m)
{-# INLINE insertWithB #-}

-- Finalize the builder into a Map.
finishB :: MapBuilder k a -> Map k a
finishB (BAsc stk) = ascLinkAll stk
finishB (BMap m) = m
{-# INLINABLE finishB #-}

{--------------------------------------------------------------------
Utility functions that maintain the balance properties of the tree.
All constructors assume that all values in [l] < [k] and all values
Expand Down
104 changes: 50 additions & 54 deletions containers/src/Data/Map/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ import Data.Map.Internal
, descLinkTop
, descLinkAll
, Stack (..)
, MapBuilder(..)
, emptyB
, finishB
, (!)
, (!?)
, (\\)
Expand Down Expand Up @@ -375,7 +378,6 @@ import Data.Map.Internal
, foldrWithKey
, foldrWithKey'
, glue
, insertMax
, intersection
, isProperSubmapOf
, isProperSubmapOfBy
Expand Down Expand Up @@ -433,7 +435,6 @@ import qualified Data.Set.Internal as Set
import qualified Data.Map.Internal as L
import Utils.Containers.Internal.StrictPair

import Data.Bits (shiftL, shiftR)
#ifdef __GLASGOW_HASKELL__
import Data.Coerce
#endif
Expand Down Expand Up @@ -1451,7 +1452,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- Also see the performance note on 'fromListWith'.

mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeysWith c f =
finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeysWith #-}
#endif
Expand Down Expand Up @@ -1492,46 +1494,9 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]

-- For some reason, when 'singleton' is used in fromList or in
-- create, it is not inlined, so we inline it manually.
fromList :: Ord k => [(k,a)] -> Map k a
fromList [] = Tip
fromList [(kx, x)] = x `seq` Bin 1 kx x Tip Tip
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 x0 Tip Tip) xs0
| otherwise = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
not_ordered _ [] = False
not_ordered kx ((ky,_) : _) = kx >= ky
{-# INLINE not_ordered #-}

fromList' t0 xs = Foldable.foldl' ins t0 xs
where ins t (k,x) = insert k x t

go !_ t [] = t
go _ t [(kx, x)] = x `seq` insertMax kx x t
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
| otherwise = case create s xss of
(r, ys, []) -> x `seq` go (s `shiftL` 1) (link kx x l r) ys
(r, _, ys) -> x `seq` fromList' (link kx x l r) ys

-- The create is returning a triple (tree, xs, ys). Both xs and ys
-- represent not yet processed elements and only one of them can be nonempty.
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
-- and must be inserted using fromList'. Otherwise the keys have been
-- ordered so far.
create !_ [] = (Tip, [], [])
create s xs@(xp : xss)
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> x `seq` (Bin 1 kx x Tip Tip, [], xss)
| otherwise -> x `seq` (Bin 1 kx x Tip Tip, xss, [])
| otherwise = case create (s `shiftR` 1) xs of
res@(_, [], _) -> res
(l, [(ky, y)], zs) -> y `seq` (insertMax ky y l, [], zs)
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
| otherwise -> case create (s `shiftR` 1) yss of
(r, zs, ws) -> y `seq` (link ky y l r, zs, ws)
#if __GLASGOW_HASKELL__
{-# INLINABLE fromList #-}
#endif
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
{-# INLINE fromList #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
--
Expand Down Expand Up @@ -1570,11 +1535,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples

fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromListWith f xs
= fromListWithKey (\_ x y -> f x y) xs
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWith #-}
#endif
fromListWith f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
{-# INLINE fromListWith #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
--
Expand All @@ -1585,13 +1548,9 @@ fromListWith f xs
-- Also see the performance note on 'fromListWith'.

fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
fromListWithKey f xs
= Foldable.foldl' ins empty xs
where
ins t (k,x) = insertWithKey f k x t
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWithKey #-}
#endif
fromListWithKey f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
{-# INLINE fromListWithKey #-} -- INLINE for fusion

{--------------------------------------------------------------------
Building trees from ascending/descending lists can be done in linear time.
Expand Down Expand Up @@ -1756,3 +1715,40 @@ fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk
next stk (!ky, !y) = Push ky y Tip stk
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

{--------------------------------------------------------------------
MapBuilder
--------------------------------------------------------------------}

-- Insert a key and value. Replaces the old value if one already exists for
-- the key. Strict in the value.
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
insertB !ky !y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insert ky y (ascLinkAll stk))
EQ -> BAsc (Push ky y l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insert ky y m)
{-# INLINE insertB #-}

-- Insert a key and value. The new value is combined with the old value if one
-- already exists for the key. Strict in the inserted value.
insertWithB
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
insertWithB f !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insertWith f ky y (ascLinkAll stk))
EQ -> BAsc (push ky (f y x) l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (push ky y Tip stk)
Nada -> BAsc (push ky y Tip Nada)
BMap m -> BMap (insertWith f ky y m)
where
push kx !x = Push kx x
{-# INLINE insertWithB #-}
Loading

0 comments on commit b7d639b

Please sign in to comment.