From be7b0e0fe67e8c09a81a97e5f14ff00e736b32f5 Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Mon, 20 Sep 2021 13:50:43 +0200 Subject: [PATCH 1/6] Revert "Inline Text.Fuzzy to add INLINABLE pragmas (#2215)" This reverts commit 2869077e19406c5d4e72b432e07025732d60023a. --- ghcide/ghcide.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal index 9e7ba63dff..f807faa227 100644 --- a/ghcide/ghcide.cabal +++ b/ghcide/ghcide.cabal @@ -2,7 +2,7 @@ cabal-version: 2.4 build-type: Simple category: Development name: ghcide -version: 1.4.2.1 +version: 1.4.2.0 license: Apache-2.0 license-file: LICENSE author: Digital Asset and Ghcide contributors From 979bacf76e635d3f09c9347736f016bef7751cba Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Mon, 20 Sep 2021 20:43:54 +0200 Subject: [PATCH 2/6] Fuzz in parallel --- ghcide/ghcide.cabal | 5 +- .../IDE/Plugin/Completions/Logic.hs | 12 ++-- ghcide/src/Text/Fuzzy/Parallel.hs | 60 +++++++++++++++++++ 3 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 ghcide/src/Text/Fuzzy/Parallel.hs diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal index f807faa227..80048f94d9 100644 --- a/ghcide/ghcide.cabal +++ b/ghcide/ghcide.cabal @@ -2,7 +2,7 @@ cabal-version: 2.4 build-type: Simple category: Development name: ghcide -version: 1.4.2.0 +version: 1.4.2.1 license: Apache-2.0 license-file: LICENSE author: Digital Asset and Ghcide contributors @@ -64,6 +64,7 @@ library hiedb == 0.4.1.*, lsp-types >= 1.3.0.1 && < 1.4, lsp == 1.2.*, + monoid-subclasses, mtl, network-uri, optparse-applicative, @@ -208,6 +209,8 @@ library Development.IDE.Plugin.Completions.Logic Development.IDE.Session.VersionCheck Development.IDE.Types.Action + Text.Fuzzy.Parallel + ghc-options: -Wall -Wno-name-shadowing -Wincomplete-uni-patterns -Wno-unticked-promoted-constructors if flag(ghc-patched-unboxed-bytecode) diff --git a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs index 3510429a90..789a2a7922 100644 --- a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs +++ b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs @@ -24,7 +24,7 @@ import Data.Maybe (fromMaybe, isJust, listToMaybe, mapMaybe) import qualified Data.Text as T -import qualified Text.Fuzzy as Fuzzy +import qualified Text.Fuzzy.Parallel as Fuzzy import Control.Monad import Data.Aeson (ToJSON (toJSON)) @@ -53,6 +53,10 @@ import Language.LSP.Types import Language.LSP.Types.Capabilities import qualified Language.LSP.VFS as VFS +-- Chunk size used for parallelizing fuzzy matching +chunkSize :: Int +chunkSize = 1000 + -- From haskell-ide-engine/hie-plugin-api/Haskell/Ide/Engine/Context.hs -- | A context of a declaration in the program @@ -538,9 +542,9 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu filtModNameCompls = map mkModCompl $ mapMaybe (T.stripPrefix enteredQual) - $ Fuzzy.simpleFilter fullPrefix allModNamesAsNS + $ Fuzzy.simpleFilter chunkSize fullPrefix allModNamesAsNS - filtCompls = map Fuzzy.original $ Fuzzy.filter prefixText ctxCompls "" "" label False + filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize prefixText ctxCompls "" "" label False where mcc = case maybe_parsed of @@ -587,7 +591,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu filtListWith f list = [ f label - | label <- Fuzzy.simpleFilter fullPrefix list + | label <- Fuzzy.simpleFilter chunkSize fullPrefix list , enteredQual `T.isPrefixOf` label ] diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs new file mode 100644 index 0000000000..8fb27f8ce4 --- /dev/null +++ b/ghcide/src/Text/Fuzzy/Parallel.hs @@ -0,0 +1,60 @@ +-- | Parallel versions of 'filter' and 'simpleFilter' +module Text.Fuzzy.Parallel +( + filter, + simpleFilter, + -- reexports + Fuzzy(..), + match +) where + +import Control.Parallel.Strategies (Eval, evalTraversable, + parListChunk, rseq, using) +import Data.List (sortOn) +import Data.Maybe (catMaybes) +import Data.Monoid.Textual (TextualMonoid) +import Data.Ord (Down (Down)) +import Prelude hiding (filter) +import Text.Fuzzy (Fuzzy (..), match) + +-- | Evaluation that forces the 'score' field +forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) +forceScore it@Fuzzy{score} = do + score' <- rseq score + return it{score = score'} + +-- | The function to filter a list of values by fuzzy search on the text extracted from them. +-- +-- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False +-- [Fuzzy {original = ("Standard ML",1990), rendered = "standard ", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca", score = 4}] +{-# INLINABLE filter #-} +filter :: (TextualMonoid s) + => Int -- ^ Chunk size. 1000 works well. + -> s -- ^ Pattern. + -> [t] -- ^ The list of values containing the text to search in. + -> s -- ^ The text to add before each match. + -> s -- ^ The text to add after each match. + -> (t -> s) -- ^ The function to extract the text from the container. + -> Bool -- ^ Case sensitivity. + -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. +filter chunkSize pattern ts pre post extract caseSen = + sortOn (Down . score) + (catMaybes + (map (\t -> match pattern t pre post extract caseSen) ts + `using` + parListChunk chunkSize (evalTraversable forceScore))) + +-- | Return all elements of the list that have a fuzzy +-- match against the pattern. Runs with default settings where +-- nothing is added around the matches, as case insensitive. +-- +-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"] +-- ["vim","virtual machine"] +{-# INLINABLE simpleFilter #-} +simpleFilter :: (TextualMonoid s) + => Int -- ^ Chunk size. 1000 works well. + -> s -- ^ Pattern to look for. + -> [s] -- ^ List of texts to check. + -> [s] -- ^ The ones that match. +simpleFilter chunk pattern xs = + map original $ filter chunk pattern xs mempty mempty id False From 72684d4dcb780de0a6ea75105ea7fb7c3f0c461c Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Tue, 21 Sep 2021 11:58:25 +0200 Subject: [PATCH 3/6] Efficiently with vectors --- ghcide/ghcide.cabal | 1 + .../IDE/Plugin/Completions/Logic.hs | 14 ++-- .../IDE/Plugin/Completions/Types.hs | 9 ++- ghcide/src/Text/Fuzzy/Parallel.hs | 71 ++++++++++++++----- 4 files changed, 68 insertions(+), 27 deletions(-) diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal index 80048f94d9..aebe7c8bd0 100644 --- a/ghcide/ghcide.cabal +++ b/ghcide/ghcide.cabal @@ -87,6 +87,7 @@ library unordered-containers >= 0.2.10.0, utf8-string, vector, + vector-algorithms, hslogger, Diff ^>=0.4.0, vector, diff --git a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs index 789a2a7922..a345e24889 100644 --- a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs +++ b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs @@ -494,14 +494,14 @@ ppr :: Outputable a => a -> T.Text ppr = T.pack . prettyPrint toggleSnippets :: ClientCapabilities -> CompletionsConfig -> CompletionItem -> CompletionItem -toggleSnippets ClientCapabilities {_textDocument} (CompletionsConfig with _) = - removeSnippetsWhen (not $ with && supported) +toggleSnippets ClientCapabilities {_textDocument} CompletionsConfig{..} = + removeSnippetsWhen (not $ enableSnippets && supported) where supported = Just True == (_textDocument >>= _completion >>= _completionItem >>= _snippetSupport) toggleAutoExtend :: CompletionsConfig -> CompItem -> CompItem -toggleAutoExtend (CompletionsConfig _ False) x = x {additionalTextEdits = Nothing} +toggleAutoExtend CompletionsConfig{enableAutoExtend=False} x = x {additionalTextEdits = Nothing} toggleAutoExtend _ x = x removeSnippetsWhen :: Bool -> CompletionItem -> CompletionItem @@ -539,12 +539,14 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu -} pos = VFS.cursorPos prefixInfo + maxC = maxCompletions config + filtModNameCompls = map mkModCompl $ mapMaybe (T.stripPrefix enteredQual) - $ Fuzzy.simpleFilter chunkSize fullPrefix allModNamesAsNS + $ Fuzzy.simpleFilter chunkSize maxC fullPrefix allModNamesAsNS - filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize prefixText ctxCompls "" "" label False + filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize maxC prefixText ctxCompls "" "" label False where mcc = case maybe_parsed of @@ -591,7 +593,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu filtListWith f list = [ f label - | label <- Fuzzy.simpleFilter chunkSize fullPrefix list + | label <- Fuzzy.simpleFilter chunkSize maxC fullPrefix list , enteredQual `T.isPrefixOf` label ] diff --git a/ghcide/src/Development/IDE/Plugin/Completions/Types.hs b/ghcide/src/Development/IDE/Plugin/Completions/Types.hs index 3eea61d146..414f3048ca 100644 --- a/ghcide/src/Development/IDE/Plugin/Completions/Types.hs +++ b/ghcide/src/Development/IDE/Plugin/Completions/Types.hs @@ -12,12 +12,13 @@ import qualified Data.Text as T import Data.Aeson (FromJSON, ToJSON) import Data.Text (Text) -import Development.IDE.Spans.Common import Development.IDE.GHC.Compat +import Development.IDE.Spans.Common import GHC.Generics (Generic) import Ide.Plugin.Config (Config) +import qualified Ide.Plugin.Config as Config import Ide.Plugin.Properties -import Ide.PluginUtils (usePropertyLsp) +import Ide.PluginUtils (getClientConfig, usePropertyLsp) import Ide.Types (PluginId) import Language.LSP.Server (MonadLsp) import Language.LSP.Types (CompletionItemKind (..), Uri) @@ -46,11 +47,13 @@ getCompletionsConfig pId = CompletionsConfig <$> usePropertyLsp #snippetsOn pId properties <*> usePropertyLsp #autoExtendOn pId properties + <*> (Config.maxCompletions <$> getClientConfig) data CompletionsConfig = CompletionsConfig { enableSnippets :: Bool, - enableAutoExtend :: Bool + enableAutoExtend :: Bool, + maxCompletions :: Int } data ExtendImport = ExtendImport diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs index 8fb27f8ce4..7a7bc699ba 100644 --- a/ghcide/src/Text/Fuzzy/Parallel.hs +++ b/ghcide/src/Text/Fuzzy/Parallel.hs @@ -8,28 +8,28 @@ module Text.Fuzzy.Parallel match ) where -import Control.Parallel.Strategies (Eval, evalTraversable, - parListChunk, rseq, using) +import Control.Monad.ST (runST) +import Control.Parallel.Strategies (Eval, Strategy, evalTraversable, + parListChunk, parTraversable, + rseq, using) +import Data.Function (on) import Data.List (sortOn) import Data.Maybe (catMaybes) import Data.Monoid.Textual (TextualMonoid) import Data.Ord (Down (Down)) +import Data.Vector (Vector, (!)) +import qualified Data.Vector as V +import qualified Data.Vector.Algorithms.Heap as VA import Prelude hiding (filter) import Text.Fuzzy (Fuzzy (..), match) --- | Evaluation that forces the 'score' field -forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) -forceScore it@Fuzzy{score} = do - score' <- rseq score - return it{score = score'} - -- | The function to filter a list of values by fuzzy search on the text extracted from them. -- --- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False --- [Fuzzy {original = ("Standard ML",1990), rendered = "standard ", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca", score = 4}] -{-# INLINABLE filter #-} +-- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False +-- 200 filter :: (TextualMonoid s) => Int -- ^ Chunk size. 1000 works well. + -> Int -- ^ Max results -> s -- ^ Pattern. -> [t] -- ^ The list of values containing the text to search in. -> s -- ^ The text to add before each match. @@ -37,12 +37,15 @@ filter :: (TextualMonoid s) -> (t -> s) -- ^ The function to extract the text from the container. -> Bool -- ^ Case sensitivity. -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. -filter chunkSize pattern ts pre post extract caseSen = - sortOn (Down . score) - (catMaybes - (map (\t -> match pattern t pre post extract caseSen) ts +filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do + let v = (V.catMaybes + (V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts) `using` - parListChunk chunkSize (evalTraversable forceScore))) + parVectorChunk chunkSize (evalTraversable forceScore))) + v' <- V.unsafeThaw v + VA.partialSortBy (compare `on` (Down . score)) v' maxRes + v'' <- V.unsafeFreeze v' + return $ take maxRes $ V.toList v'' -- | Return all elements of the list that have a fuzzy -- match against the pattern. Runs with default settings where @@ -53,8 +56,40 @@ filter chunkSize pattern ts pre post extract caseSen = {-# INLINABLE simpleFilter #-} simpleFilter :: (TextualMonoid s) => Int -- ^ Chunk size. 1000 works well. + -> Int -- ^ Max results -> s -- ^ Pattern to look for. -> [s] -- ^ List of texts to check. -> [s] -- ^ The ones that match. -simpleFilter chunk pattern xs = - map original $ filter chunk pattern xs mempty mempty id False +simpleFilter chunk maxRes pattern xs = + map original $ filter chunk maxRes pattern xs mempty mempty id False + +-------------------------------------------------------------------------------- + +-- | Divides a vector in chunks, applies the strategy in parallel to each chunk. +parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a) +parVectorChunk chunkSize st v = + V.concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v) + +-- >>> chunkVector 3 (V.fromList [0..10]) +-- >>> chunkVector 3 (V.fromList [0..11]) +-- >>> chunkVector 3 (V.fromList [0..12]) +-- [[0,1,2],[3,4,5],[6,7,8],[9,10]] +-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]] +-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]] +chunkVector :: Int -> Vector a -> [Vector a] +chunkVector chunkSize v = do + let indices = pairwise $ [0, chunkSize .. l-1] ++ [l] + l = V.length v + [V.fromListN (h-l) [v ! j | j <- [l .. h-1]] + | (l,h) <- indices] + +pairwise :: [a] -> [(a,a)] +pairwise [] = [] +pairwise [_] = [] +pairwise (x:y:xs) = (x,y) : pairwise (y:xs) + +-- | Evaluation that forces the 'score' field +forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) +forceScore it@Fuzzy{score} = do + score' <- rseq score + return it{score = score'} From 5c4063baf2dcfe7c7683ec18b60d3da5e0aa9a3b Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Tue, 21 Sep 2021 12:08:11 +0200 Subject: [PATCH 4/6] use mapMaybe for compat. with older versions --- ghcide/src/Text/Fuzzy/Parallel.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs index 7a7bc699ba..5b3bcfcce7 100644 --- a/ghcide/src/Text/Fuzzy/Parallel.hs +++ b/ghcide/src/Text/Fuzzy/Parallel.hs @@ -38,7 +38,7 @@ filter :: (TextualMonoid s) -> Bool -- ^ Case sensitivity. -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do - let v = (V.catMaybes + let v = (V.mapMaybe id (V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts) `using` parVectorChunk chunkSize (evalTraversable forceScore))) From 3cdd4497d1c45956b662db907817c3fe63443be9 Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Tue, 21 Sep 2021 20:54:14 +0200 Subject: [PATCH 5/6] switch to stable sort --- ghcide/src/Text/Fuzzy/Parallel.hs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs index 5b3bcfcce7..2d0fae49db 100644 --- a/ghcide/src/Text/Fuzzy/Parallel.hs +++ b/ghcide/src/Text/Fuzzy/Parallel.hs @@ -1,7 +1,6 @@ -- | Parallel versions of 'filter' and 'simpleFilter' module Text.Fuzzy.Parallel -( - filter, +( filter, simpleFilter, -- reexports Fuzzy(..), @@ -10,16 +9,14 @@ module Text.Fuzzy.Parallel import Control.Monad.ST (runST) import Control.Parallel.Strategies (Eval, Strategy, evalTraversable, - parListChunk, parTraversable, - rseq, using) + parTraversable, rseq, using) import Data.Function (on) -import Data.List (sortOn) -import Data.Maybe (catMaybes) import Data.Monoid.Textual (TextualMonoid) import Data.Ord (Down (Down)) import Data.Vector (Vector, (!)) import qualified Data.Vector as V -import qualified Data.Vector.Algorithms.Heap as VA +-- need to use a stable sort +import qualified Data.Vector.Algorithms.Tim as VA import Prelude hiding (filter) import Text.Fuzzy (Fuzzy (..), match) @@ -43,7 +40,7 @@ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do `using` parVectorChunk chunkSize (evalTraversable forceScore))) v' <- V.unsafeThaw v - VA.partialSortBy (compare `on` (Down . score)) v' maxRes + VA.sortBy (compare `on` (Down . score)) v' v'' <- V.unsafeFreeze v' return $ take maxRes $ V.toList v'' From 2ea06d1bf4d1e74026aff3f97b84218bef9b54a0 Mon Sep 17 00:00:00 2001 From: Pepe Iborra Date: Wed, 22 Sep 2021 10:37:02 +0200 Subject: [PATCH 6/6] clean ups --- .../IDE/Plugin/Completions/Logic.hs | 9 ++-- ghcide/src/Text/Fuzzy/Parallel.hs | 41 ++++++++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs index a345e24889..eff74b5de3 100644 --- a/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs +++ b/ghcide/src/Development/IDE/Plugin/Completions/Logic.hs @@ -544,9 +544,9 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu filtModNameCompls = map mkModCompl $ mapMaybe (T.stripPrefix enteredQual) - $ Fuzzy.simpleFilter chunkSize maxC fullPrefix allModNamesAsNS + $ Fuzzy.simpleFilter chunkSize fullPrefix allModNamesAsNS - filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize maxC prefixText ctxCompls "" "" label False + filtCompls = map Fuzzy.original $ Fuzzy.filter chunkSize prefixText ctxCompls "" "" label False where mcc = case maybe_parsed of @@ -593,7 +593,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu filtListWith f list = [ f label - | label <- Fuzzy.simpleFilter chunkSize maxC fullPrefix list + | label <- Fuzzy.simpleFilter chunkSize fullPrefix list , enteredQual `T.isPrefixOf` label ] @@ -621,7 +621,8 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu -> return [] | otherwise -> do -- assumes that nubOrdBy is stable - let uniqueFiltCompls = nubOrdBy uniqueCompl filtCompls + -- nubOrd is very slow - take 10x the maximum configured + let uniqueFiltCompls = nubOrdBy uniqueCompl $ take (maxC*10) filtCompls let compls = map (mkCompl plId ideOpts) uniqueFiltCompls return $ filtModNameCompls ++ filtKeywordCompls diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs index 2d0fae49db..7af9b40547 100644 --- a/ghcide/src/Text/Fuzzy/Parallel.hs +++ b/ghcide/src/Text/Fuzzy/Parallel.hs @@ -16,6 +16,7 @@ import Data.Ord (Down (Down)) import Data.Vector (Vector, (!)) import qualified Data.Vector as V -- need to use a stable sort +import Data.Bifunctor (second) import qualified Data.Vector.Algorithms.Tim as VA import Prelude hiding (filter) import Text.Fuzzy (Fuzzy (..), match) @@ -26,7 +27,6 @@ import Text.Fuzzy (Fuzzy (..), match) -- 200 filter :: (TextualMonoid s) => Int -- ^ Chunk size. 1000 works well. - -> Int -- ^ Max results -> s -- ^ Pattern. -> [t] -- ^ The list of values containing the text to search in. -> s -- ^ The text to add before each match. @@ -34,7 +34,7 @@ filter :: (TextualMonoid s) -> (t -> s) -- ^ The function to extract the text from the container. -> Bool -- ^ Case sensitivity. -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first. -filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do +filter chunkSize pattern ts pre post extract caseSen = runST $ do let v = (V.mapMaybe id (V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts) `using` @@ -42,7 +42,7 @@ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do v' <- V.unsafeThaw v VA.sortBy (compare `on` (Down . score)) v' v'' <- V.unsafeFreeze v' - return $ take maxRes $ V.toList v'' + return $ V.toList v'' -- | Return all elements of the list that have a fuzzy -- match against the pattern. Runs with default settings where @@ -53,12 +53,19 @@ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do {-# INLINABLE simpleFilter #-} simpleFilter :: (TextualMonoid s) => Int -- ^ Chunk size. 1000 works well. - -> Int -- ^ Max results -> s -- ^ Pattern to look for. -> [s] -- ^ List of texts to check. -> [s] -- ^ The ones that match. -simpleFilter chunk maxRes pattern xs = - map original $ filter chunk maxRes pattern xs mempty mempty id False +simpleFilter chunk pattern xs = + map original $ filter chunk pattern xs mempty mempty id False + +-------------------------------------------------------------------------------- + +-- | Evaluation that forces the 'score' field +forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) +forceScore it@Fuzzy{score} = do + score' <- rseq score + return it{score = score'} -------------------------------------------------------------------------------- @@ -75,18 +82,24 @@ parVectorChunk chunkSize st v = -- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]] chunkVector :: Int -> Vector a -> [Vector a] chunkVector chunkSize v = do - let indices = pairwise $ [0, chunkSize .. l-1] ++ [l] + let indices = chunkIndices chunkSize (0,l) l = V.length v - [V.fromListN (h-l) [v ! j | j <- [l .. h-1]] + [V.fromListN (h-l+1) [v ! j | j <- [l .. h]] | (l,h) <- indices] +-- >>> chunkIndices 3 (0,9) +-- >>> chunkIndices 3 (0,10) +-- >>> chunkIndices 3 (0,11) +-- [(0,2),(3,5),(6,8)] +-- [(0,2),(3,5),(6,8),(9,9)] +-- [(0,2),(3,5),(6,8),(9,10)] +chunkIndices :: Int -> (Int,Int) -> [(Int,Int)] +chunkIndices chunkSize (from,to) = + map (second pred) $ + pairwise $ + [from, from+chunkSize .. to-1] ++ [to] + pairwise :: [a] -> [(a,a)] pairwise [] = [] pairwise [_] = [] pairwise (x:y:xs) = (x,y) : pairwise (y:xs) - --- | Evaluation that forces the 'score' field -forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s) -forceScore it@Fuzzy{score} = do - score' <- rseq score - return it{score = score'}