@@ -10,14 +10,12 @@ module Text.Fuzzy.Parallel
1010import Control.Monad.ST (runST )
1111import Control.Parallel.Strategies (Eval , Strategy , evalTraversable ,
1212 parTraversable , rseq , using )
13- import Data.Function (on )
1413import Data.Monoid.Textual (TextualMonoid )
15- import Data.Ord (Down (Down ))
1614import Data.Vector (Vector , (!) )
1715import qualified Data.Vector as V
1816-- need to use a stable sort
1917import Data.Bifunctor (second )
20- import qualified Data.Vector.Algorithms.Tim as VA
18+ import qualified Data.Monoid.Factorial as T
2119import Prelude hiding (filter )
2220import Text.Fuzzy (Fuzzy (.. ), match )
2321
@@ -27,22 +25,20 @@ import Text.Fuzzy (Fuzzy (..), match)
2725-- 200
2826filter :: (TextualMonoid s )
2927 => Int -- ^ Chunk size. 1000 works well.
28+ -> Int -- ^ Max. number of results wanted
3029 -> s -- ^ Pattern.
3130 -> [t ] -- ^ The list of values containing the text to search in.
3231 -> s -- ^ The text to add before each match.
3332 -> s -- ^ The text to add after each match.
3433 -> (t -> s ) -- ^ The function to extract the text from the container.
3534 -> Bool -- ^ Case sensitivity.
3635 -> [Fuzzy t s ] -- ^ The list of results, sorted, highest score first.
37- filter chunkSize pattern ts pre post extract caseSen = runST $ do
36+ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
3837 let v = (V. mapMaybe id
3938 (V. map (\ t -> match pattern t pre post extract caseSen) (V. fromList ts)
4039 `using`
4140 parVectorChunk chunkSize (evalTraversable forceScore)))
42- v' <- V. unsafeThaw v
43- VA. sortBy (compare `on` (Down . score)) v'
44- v'' <- V. unsafeFreeze v'
45- return $ V. toList v''
41+ return $ partialSortByAscScore maxRes (T. length pattern ) v
4642
4743-- | Return all elements of the list that have a fuzzy
4844-- match against the pattern. Runs with default settings where
@@ -53,11 +49,12 @@ filter chunkSize pattern ts pre post extract caseSen = runST $ do
5349{-# INLINABLE simpleFilter #-}
5450simpleFilter :: (TextualMonoid s )
5551 => Int -- ^ Chunk size. 1000 works well.
52+ -> Int -- ^ Max. number of results wanted
5653 -> s -- ^ Pattern to look for.
5754 -> [s ] -- ^ List of texts to check.
5855 -> [s ] -- ^ The ones that match.
59- simpleFilter chunk pattern xs =
60- map original $ filter chunk pattern xs mempty mempty id False
56+ simpleFilter chunk maxRes pattern xs =
57+ map original $ filter chunk maxRes pattern xs mempty mempty id False
6158
6259--------------------------------------------------------------------------------
6360
@@ -103,3 +100,35 @@ pairwise :: [a] -> [(a,a)]
103100pairwise [] = []
104101pairwise [_] = []
105102pairwise (x: y: xs) = (x,y) : pairwise (y: xs)
103+
104+ -- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
105+ --- >>> partialSortByAscScore 3 5 $ V.fromList $ map (\x -> Fuzzy x x (length x)) ["A","B","ABCDE","ABBC"]
106+ -- [Fuzzy {original = "ABCDE", rendered = "ABCDE", score = 5},Fuzzy {original = "ABBC", rendered = "ABBC", score = 4},Fuzzy {original = "A", rendered = "A", score = 1}]
107+ partialSortByAscScore :: TextualMonoid s
108+ => Int -- ^ Number of items needed
109+ -> Int -- ^ Value of a perfect score
110+ -> Vector (Fuzzy t s )
111+ -> [Fuzzy t s ]
112+ partialSortByAscScore wantedCount perfectScore v = loop 0 (SortState minBound perfectScore 0 ) [] where
113+ l = V. length v
114+ loop index st@ SortState {.. } acc
115+ | foundCount == wantedCount = reverse acc
116+ | index == l
117+ = if bestScoreSeen < scoreWanted
118+ then loop 0 st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound } acc
119+ else reverse acc
120+ | otherwise =
121+ case v! index of
122+ x | score x == scoreWanted
123+ -> loop (index+ 1 ) st{foundCount = foundCount+ 1 } (x: acc)
124+ | score x < scoreWanted && score x > bestScoreSeen
125+ -> loop (index+ 1 ) st{bestScoreSeen = score x} acc
126+ | otherwise
127+ -> loop (index+ 1 ) st acc
128+
129+ data SortState a = SortState
130+ { bestScoreSeen :: ! Int
131+ , scoreWanted :: ! Int
132+ , foundCount :: ! Int
133+ }
134+ deriving Show
0 commit comments