11-- | Parallel versions of 'filter' and 'simpleFilter'
22
33module Text.Fuzzy.Parallel
4- ( filter ,
5- simpleFilter,
6- match,
4+ ( filter , filter',
5+ simpleFilter, simpleFilter',
6+ match, defChunkSize, defMaxResults,
77 Scored (.. )
88) where
99
@@ -29,7 +29,6 @@ data Scored a = Scored {score :: !Int, original:: !a}
2929-- Just 5
3030--
3131{-# INLINABLE match #-}
32-
3332match :: T. Text -- ^ Pattern in lowercase except for first character
3433 -> T. Text -- ^ The text to search in.
3534 -> Maybe Int -- ^ The score
@@ -70,22 +69,13 @@ match (T.Text pArr pOff pLen) (T.Text sArr sOff sLen) = go 0 1 pOff sOff
7069
7170 toLowerAscii w = if (w - 65 ) < 26 then w .|. 0x20 else w
7271
73- -- | The function to filter a list of values by fuzzy search on the text extracted from them.
74- filter :: Int -- ^ Chunk size. 1000 works well.
75- -> Int -- ^ Max. number of results wanted
76- -> T. Text -- ^ Pattern.
77- -> [t ] -- ^ The list of values containing the text to search in.
78- -> (t -> T. Text ) -- ^ The function to extract the text from the container.
79- -> [Scored t ] -- ^ The list of results, sorted, highest score first.
80- filter chunkSize maxRes pattern ts extract = partialSortByAscScore maxRes perfectScore (concat vss)
81- where
82- -- Preserve case for the first character, make all others lowercase
83- pattern' = case T. uncons pattern of
84- Just (c, rest) -> T. cons c (T. toLower rest)
85- _ -> pattern
86- vss = map (mapMaybe (\ t -> flip Scored t <$> match pattern' (extract t))) (chunkList chunkSize ts)
87- `using` parList (evalList rseq)
88- perfectScore = fromMaybe (error $ T. unpack pattern ) $ match pattern' pattern'
72+ -- | Sensible default value for chunk size to use when calling simple filter.
73+ defChunkSize :: Int
74+ defChunkSize = 1000
75+
76+ -- | Sensible default value for the number of max results to use when calling simple filter.
77+ defMaxResults :: Int
78+ defMaxResults = 10
8979
9080-- | Return all elements of the list that have a fuzzy
9181-- match against the pattern. Runs with default settings where
@@ -102,6 +92,52 @@ simpleFilter :: Int -- ^ Chunk size. 1000 works well.
10292simpleFilter chunk maxRes pattern xs =
10393 filter chunk maxRes pattern xs id
10494
95+
96+ -- | The function to filter a list of values by fuzzy search on the text extracted from them,
97+ -- using a custom matching function which determines how close words are.
98+ filter' :: Int -- ^ Chunk size. 1000 works well.
99+ -> Int -- ^ Max. number of results wanted
100+ -> T. Text -- ^ Pattern.
101+ -> [t ] -- ^ The list of values containing the text to search in.
102+ -> (t -> T. Text ) -- ^ The function to extract the text from the container.
103+ -> (T. Text -> T. Text -> Maybe Int )
104+ -- ^ Custom scoring function to use for calculating how close words are
105+ -- When the function returns Nothing, this means the values are incomparable.
106+ -> [Scored t ] -- ^ The list of results, sorted, highest score first.
107+ filter' chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
108+ where
109+ -- Preserve case for the first character, make all others lowercase
110+ pattern' = case T. uncons pattern of
111+ Just (c, rest) -> T. cons c (T. toLower rest)
112+ _ -> pattern
113+ vss = map (mapMaybe (\ t -> flip Scored t <$> match' pattern' (extract t))) (chunkList chunkSize ts)
114+ `using` parList (evalList rseq)
115+ perfectScore = fromMaybe (error $ T. unpack pattern ) $ match' pattern' pattern'
116+
117+ -- | The function to filter a list of values by fuzzy search on the text extracted from them,
118+ -- using a custom matching function which determines how close words are.
119+ filter :: Int -- ^ Chunk size. 1000 works well.
120+ -> Int -- ^ Max. number of results wanted
121+ -> T. Text -- ^ Pattern.
122+ -> [t ] -- ^ The list of values containing the text to search in.
123+ -> (t -> T. Text ) -- ^ The function to extract the text from the container.
124+ -> [Scored t ] -- ^ The list of results, sorted, highest score first.
125+ filter chunkSize maxRes pattern ts extract =
126+ filter' chunkSize maxRes pattern ts extract match
127+
128+ -- | Return all elements of the list that have a fuzzy match against the pattern,
129+ -- the closeness of the match is determined using the custom scoring match function that is passed.
130+ -- Runs with default settings where nothing is added around the matches, as case insensitive.
131+ {-# INLINABLE simpleFilter' #-}
132+ simpleFilter' :: Int -- ^ Chunk size. 1000 works well.
133+ -> Int -- ^ Max. number of results wanted
134+ -> T. Text -- ^ Pattern to look for.
135+ -> [T. Text ] -- ^ List of texts to check.
136+ -> (T. Text -> T. Text -> Maybe Int )
137+ -- ^ Custom scoring function to use for calculating how close words are
138+ -> [Scored T. Text ] -- ^ The ones that match.
139+ simpleFilter' chunk maxRes pattern xs match' =
140+ filter' chunk maxRes pattern xs id match'
105141--------------------------------------------------------------------------------
106142
107143chunkList :: Int -> [a ] -> [[a ]]
0 commit comments