@@ -8,41 +8,44 @@ module Text.Fuzzy.Parallel
88 match
99) where
1010
11- import Control.Parallel.Strategies (Eval , evalTraversable ,
12- parListChunk , rseq , using )
11+ import Control.Monad.ST (runST )
12+ import Control.Parallel.Strategies (Eval , Strategy , evalTraversable ,
13+ parListChunk , parTraversable ,
14+ rseq , using )
15+ import Data.Function (on )
1316import Data.List (sortOn )
1417import Data.Maybe (catMaybes )
1518import Data.Monoid.Textual (TextualMonoid )
1619import Data.Ord (Down (Down ))
20+ import Data.Vector (Vector , (!) )
21+ import qualified Data.Vector as V
22+ import qualified Data.Vector.Algorithms.Heap as VA
1723import Prelude hiding (filter )
1824import Text.Fuzzy (Fuzzy (.. ), match )
1925
20- -- | Evaluation that forces the 'score' field
21- forceScore :: TextualMonoid s => Fuzzy t s -> Eval (Fuzzy t s )
22- forceScore it@ Fuzzy {score} = do
23- score' <- rseq score
24- return it{score = score'}
25-
2626-- | The function to filter a list of values by fuzzy search on the text extracted from them.
2727--
28- -- >>> filter "ML" [("Standard ML", 1990),("OCaml",1996),("Scala",2003)] "<" ">" fst False
29- -- [Fuzzy {original = ("Standard ML",1990), rendered = "standard <m><l>", score = 4},Fuzzy {original = ("OCaml",1996), rendered = "oca<m><l>", score = 4}]
30- {-# INLINABLE filter #-}
28+ -- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
29+ -- 200
3130filter :: (TextualMonoid s )
3231 => Int -- ^ Chunk size. 1000 works well.
32+ -> Int -- ^ Max results
3333 -> s -- ^ Pattern.
3434 -> [t ] -- ^ The list of values containing the text to search in.
3535 -> s -- ^ The text to add before each match.
3636 -> s -- ^ The text to add after each match.
3737 -> (t -> s ) -- ^ The function to extract the text from the container.
3838 -> Bool -- ^ Case sensitivity.
3939 -> [Fuzzy t s ] -- ^ The list of results, sorted, highest score first.
40- filter chunkSize pattern ts pre post extract caseSen =
41- sortOn (Down . score)
42- (catMaybes
43- (map (\ t -> match pattern t pre post extract caseSen) ts
40+ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
41+ let v = (V. catMaybes
42+ (V. map (\ t -> match pattern t pre post extract caseSen) (V. fromList ts)
4443 `using`
45- parListChunk chunkSize (evalTraversable forceScore)))
44+ parVectorChunk chunkSize (evalTraversable forceScore)))
45+ v' <- V. unsafeThaw v
46+ VA. partialSortBy (compare `on` (Down . score)) v' maxRes
47+ v'' <- V. unsafeFreeze v'
48+ return $ take maxRes $ V. toList v''
4649
4750-- | Return all elements of the list that have a fuzzy
4851-- match against the pattern. Runs with default settings where
@@ -53,8 +56,40 @@ filter chunkSize pattern ts pre post extract caseSen =
5356{-# INLINABLE simpleFilter #-}
5457simpleFilter :: (TextualMonoid s )
5558 => Int -- ^ Chunk size. 1000 works well.
59+ -> Int -- ^ Max results
5660 -> s -- ^ Pattern to look for.
5761 -> [s ] -- ^ List of texts to check.
5862 -> [s ] -- ^ The ones that match.
59- simpleFilter chunk pattern xs =
60- map original $ filter chunk pattern xs mempty mempty id False
63+ simpleFilter chunk maxRes pattern xs =
64+ map original $ filter chunk maxRes pattern xs mempty mempty id False
65+
66+ --------------------------------------------------------------------------------
67+
68+ -- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
69+ parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a )
70+ parVectorChunk chunkSize st v =
71+ V. concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)
72+
73+ -- >>> chunkVector 3 (V.fromList [0..10])
74+ -- >>> chunkVector 3 (V.fromList [0..11])
75+ -- >>> chunkVector 3 (V.fromList [0..12])
76+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
77+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
78+ -- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
79+ chunkVector :: Int -> Vector a -> [Vector a ]
80+ chunkVector chunkSize v = do
81+ let indices = pairwise $ [0 , chunkSize .. l- 1 ] ++ [l]
82+ l = V. length v
83+ [V. fromListN (h- l) [v ! j | j <- [l .. h- 1 ]]
84+ | (l,h) <- indices]
85+
86+ pairwise :: [a ] -> [(a ,a )]
87+ pairwise [] = []
88+ pairwise [_] = []
89+ pairwise (x: y: xs) = (x,y) : pairwise (y: xs)
90+
91+ -- | Evaluation that forces the 'score' field
92+ forceScore :: TextualMonoid s => Fuzzy t s -> Eval (Fuzzy t s )
93+ forceScore it@ Fuzzy {score} = do
94+ score' <- rseq score
95+ return it{score = score'}
0 commit comments