11{-# LANGUAGE OverloadedRecordDot #-}
22{-# LANGUAGE OverloadedStrings #-}
33
4- module Ide.Plugin.SemanticTokens.Tokenize (hieAstSpanIdentifiers ) where
4+ module Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList ) where
55
66import Control.Lens (Identity (runIdentity ))
7- import Control.Monad (forM_ , guard )
7+ import Control.Monad (foldM , guard )
88import Control.Monad.State.Strict (MonadState (get ),
99 MonadTrans (lift ),
10- execStateT , modify , put )
11- import Control.Monad.Trans.State.Strict (StateT )
10+ evalStateT , modify , put )
11+ import Control.Monad.Trans.State.Strict (StateT , runStateT )
1212import Data.Char (isAlphaNum )
13+ import Data.DList (DList )
14+ import qualified Data.DList as DL
1315import qualified Data.Map.Strict as M
1416import qualified Data.Map.Strict as Map
15- import qualified Data.Set as S
1617import Data.Text (Text )
1718import qualified Data.Text as T
1819import qualified Data.Text.Rope as Char
@@ -22,95 +23,100 @@ import Data.Text.Utf16.Rope.Mixed (Rope)
2223import qualified Data.Text.Utf16.Rope.Mixed as Rope
2324import Development.IDE.GHC.Compat
2425import Development.IDE.GHC.Error (realSrcSpanToCodePointRange )
25- import Ide.Plugin.SemanticTokens.Types (RangeIdSetMap )
26+ import Ide.Plugin.SemanticTokens.Types (HsSemanticTokenType (TModule ),
27+ RangeHsSemanticTokenTypes (.. ))
2628import Language.LSP.Protocol.Types (Position (Position ),
2729 Range (Range ), UInt , mkRange )
2830import Language.LSP.VFS hiding (line )
2931import Prelude hiding (length , span )
3032
3133type Tokenizer m a = StateT PTokenState m a
34+ type HsSemanticLookup = Identifier -> Maybe HsSemanticTokenType
3235
3336
3437data PTokenState = PTokenState
35- { rangeIdSetMap :: ! RangeIdSetMap ,
36- rope :: ! Rope , -- the remains of rope we are working on
37- cursor :: ! Char. Position, -- the cursor position of the current rope to the start of the original file in code point position
38- columnsInUtf16 :: ! UInt -- the column of the start of the current rope in utf16
38+ {
39+ rope :: ! Rope -- the remains of rope we are working on
40+ , cursor :: ! Char. Position -- the cursor position of the current rope to the start of the original file in code point position
41+ , columnsInUtf16 :: ! UInt -- the column of the start of the current rope in utf16
3942 }
4043
41- runTokenizer :: (Monad m ) => Tokenizer m a -> PTokenState -> m RangeIdSetMap
42- runTokenizer p st = rangeIdSetMap <$> execStateT p st
43-
4444data SplitResult
4545 = NoSplit (Text , Range ) -- does not need to split, token text, token range
4646 | Split (Text , Range , Range ) -- token text, prefix range(module range), token range
4747 deriving (Show )
4848
49+ getSplitTokenText :: SplitResult -> Text
50+ getSplitTokenText (NoSplit (t, _)) = t
51+ getSplitTokenText (Split (t, _, _)) = t
52+
4953
5054mkPTokenState :: VirtualFile -> PTokenState
5155mkPTokenState vf =
5256 PTokenState
53- { rangeIdSetMap = mempty ,
57+ {
5458 rope = Rope. fromText $ toText vf. _file_text,
5559 cursor = Char. Position 0 0 ,
5660 columnsInUtf16 = 0
5761 }
5862
59- addRangeIdSetMap :: (Monad m ) => Range -> Identifier -> Tokenizer m ()
60- addRangeIdSetMap r i = modify $ \ s -> s {rangeIdSetMap = Map. insertWith (<>) r (S. singleton i) $ rangeIdSetMap s}
61-
62- -- lift a Tokenizer Maybe () to Tokenizer m (),
63- -- if the Maybe is Nothing, do nothing, recover the state
64- -- if the Maybe is Just (), do the action, and keep the state
65- liftMaybeM :: (Monad m ) => Tokenizer Maybe () -> Tokenizer m ()
63+ -- lift a Tokenizer Maybe a to Tokenizer m a,
64+ -- if the Maybe is Nothing, do nothing, recover the state, and return the mempty value
65+ -- if the Maybe is Just x, do the action, and keep the state, and return x
66+ liftMaybeM :: (Monad m , Monoid a ) => Tokenizer Maybe a -> Tokenizer m a
6667liftMaybeM p = do
6768 st <- get
68- forM_ (execStateT p st) put
69+ maybe ( return mempty ) ( \ (ans, st') -> put st' >> return ans) $ runStateT p st
6970
70- hieAstSpanIdentifiers :: VirtualFile -> HieAST a -> RangeIdSetMap
71- hieAstSpanIdentifiers vf ast = runIdentity $ runTokenizer (foldAst ast) (mkPTokenState vf)
71+ foldMapM :: ( Monad m , Monoid b , Foldable t ) => ( a -> m b ) -> t a -> m b
72+ foldMapM f ta = foldM ( \ b a -> mappend b <$> f a) mempty ta
7273
74+ computeRangeHsSemanticTokenTypeList :: HsSemanticLookup -> VirtualFile -> HieAST a -> RangeHsSemanticTokenTypes
75+ computeRangeHsSemanticTokenTypeList lookupHsTokenType vf ast =
76+ RangeHsSemanticTokenTypes $ DL. toList $ runIdentity $ evalStateT (foldAst lookupHsTokenType ast) (mkPTokenState vf)
7377-- | foldAst
7478-- visit every leaf node in the ast in depth first order
75- foldAst :: (Monad m ) => HieAST t -> Tokenizer m ()
76- foldAst ast = if null (nodeChildren ast)
77- then liftMaybeM (visitLeafIds ast)
78- else mapM_ foldAst $ nodeChildren ast
79+ foldAst :: (Monad m ) => HsSemanticLookup -> HieAST t -> Tokenizer m (DList ( Range , HsSemanticTokenType ) )
80+ foldAst lookupHsTokenType ast = if null (nodeChildren ast)
81+ then liftMaybeM (visitLeafIds lookupHsTokenType ast)
82+ else foldMapM ( foldAst lookupHsTokenType) $ nodeChildren ast
7983
80- visitLeafIds :: HieAST t -> Tokenizer Maybe ()
81- visitLeafIds leaf = liftMaybeM $ do
84+ visitLeafIds :: HsSemanticLookup -> HieAST t -> Tokenizer Maybe (DList ( Range , HsSemanticTokenType ) )
85+ visitLeafIds lookupHsTokenType leaf = liftMaybeM $ do
8286 let span = nodeSpan leaf
8387 (ran, token) <- focusTokenAt leaf
8488 -- if `focusTokenAt` succeed, we can safely assume we have shift the cursor correctly
8589 -- we do not need to recover the cursor state, even if the following computation failed
8690 liftMaybeM $ do
8791 -- only handle the leaf node with single column token
8892 guard $ srcSpanStartLine span == srcSpanEndLine span
89- splitResult <- lift $ splitRangeByText token ran
90- mapM_ (combineNodeIds ran splitResult) $ Map. filterWithKey (\ k _ -> k == SourceInfo ) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
93+ splitResult <- lift $ splitRangeByText token ran
94+ foldMapM (combineNodeIds lookupHsTokenType ran splitResult) $ Map. filterWithKey (\ k _ -> k == SourceInfo ) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
9195 where
92- combineNodeIds :: (Monad m ) => Range -> SplitResult -> NodeInfo a -> Tokenizer m ()
93- combineNodeIds ran ranSplit (NodeInfo _ _ bd) = mapM_ (getIdentifier ran ranSplit) (M. keys bd)
94- getIdentifier :: (Monad m ) => Range -> SplitResult -> Identifier -> Tokenizer m ()
95- getIdentifier ran ranSplit idt = liftMaybeM $ do
96+ combineNodeIds :: (Monad m ) => HsSemanticLookup -> Range -> SplitResult -> NodeInfo a -> Tokenizer m (DList (Range , HsSemanticTokenType ))
97+ combineNodeIds lookupHsTokenType ran ranSplit (NodeInfo _ _ bd) =
98+ case (maybeTokenType, ranSplit) of
99+ (Nothing , _) -> return mempty
100+ (Just TModule , _) -> return $ DL. singleton (ran, TModule )
101+ (Just tokenType, NoSplit (_, tokenRan)) -> return $ DL. singleton (tokenRan, tokenType)
102+ (Just tokenType, Split (_, ranPrefix, tokenRan)) -> return $ DL. fromList [(ranPrefix, TModule ),(tokenRan, tokenType)]
103+ where maybeTokenType = foldMap (getIdentifier lookupHsTokenType ranSplit) (M. keys bd)
104+
105+ getIdentifier :: HsSemanticLookup -> SplitResult -> Identifier -> Maybe HsSemanticTokenType
106+ getIdentifier lookupHsTokenType ranSplit idt = do
96107 case idt of
97- Left _moduleName -> addRangeIdSetMap ran idt
108+ Left _moduleName -> Just TModule
98109 Right name -> do
99- occStr <- lift $ T. pack <$> case (occNameString . nameOccName) name of
110+ occStr <- T. pack <$> case (occNameString . nameOccName) name of
100111 -- the generated selector name with {-# LANGUAGE DuplicateRecordFields #-}
101112 ' $' : ' s' : ' e' : ' l' : ' :' : xs -> Just $ takeWhile (/= ' :' ) xs
102113 -- other generated names that should not be visible
103114 ' $' : c : _ | isAlphaNum c -> Nothing
104115 c : ' :' : _ | isAlphaNum c -> Nothing
105116 ns -> Just ns
106- case ranSplit of
107- (NoSplit (tk, r)) -> do
108- guard $ tk == occStr
109- addRangeIdSetMap r idt
110- (Split (tk, r1, r2)) -> do
111- guard $ tk == occStr
112- addRangeIdSetMap r1 (Left $ mkModuleName " " )
113- addRangeIdSetMap r2 idt
117+ guard $ getSplitTokenText ranSplit == occStr
118+ lookupHsTokenType idt
119+
114120
115121focusTokenAt ::
116122 -- | leaf node we want to focus on
0 commit comments