1
1
from __future__ import annotations
2
2
3
- from typing import List , TYPE_CHECKING , DefaultDict , Any , Optional
3
+ from typing import List , TYPE_CHECKING , DefaultDict , Any , Optional , Tuple , Dict
4
4
5
5
from operator import attrgetter
6
- from collections import defaultdict
6
+ from collections import defaultdict , OrderedDict
7
7
import numpy as np
8
8
9
9
if TYPE_CHECKING :
@@ -65,13 +65,18 @@ def last_group(self) -> str:
65
65
66
66
class FeatureExtractor :
67
67
68
+ feature_cache : Dict [Tuple [SingleAnalysis , ...], DefaultDict [Any , np .int32 ]] = dict ()
69
+
68
70
def __init__ (self , use_cache : bool ):
69
71
self .use_cache = use_cache
70
72
71
73
def extract_from_trigram (self , trigram : List [SingleAnalysis ]) -> DefaultDict [Any , np .int32 ]:
72
74
73
75
if self .use_cache :
74
- raise ValueError (f"feature cache for FeatureExtractor has not been implemented yet!" )
76
+ # raise ValueError(f"feature cache for FeatureExtractor has not been implemented yet!")
77
+ cached = self .feature_cache .get (tuple (trigram ))
78
+ if cached is not None :
79
+ return cached
75
80
76
81
feats = defaultdict (np .int32 )
77
82
@@ -89,7 +94,7 @@ def extract_from_trigram(self, trigram: List[SingleAnalysis]) -> DefaultDict[Any
89
94
r2 : str = w2 .lemma
90
95
r3 : str = w3 .lemma
91
96
92
- ig1 : str = '+' .join (w1 .igs )
97
+ # ig1: str = '+'.join(w1.igs)
93
98
ig2 : str = '+' .join (w2 .igs )
94
99
ig3 : str = '+' .join (w3 .igs )
95
100
@@ -118,8 +123,12 @@ def extract_from_trigram(self, trigram: List[SingleAnalysis]) -> DefaultDict[Any
118
123
119
124
feats [f"22:{ trigram [2 ].group_boundaries .shape [0 ]} " ] += 1
120
125
121
- for k in feats .keys ():
122
- feats [k ] = np .int32 (feats [k ])
126
+ # do this outside
127
+ # for k in feats.keys():
128
+ # feats[k] = np.int32(feats[k])
129
+
130
+ if self .use_cache :
131
+ self .feature_cache [tuple (trigram )] = feats
123
132
124
133
return feats
125
134
@@ -138,12 +147,23 @@ def best_path(self, sentence: List[WordAnalysis]) -> 'PerceptronAmbiguityResolve
138
147
PerceptronAmbiguityResolver .sentence_begin ,
139
148
PerceptronAmbiguityResolver .sentence_begin ,
140
149
previous = None ,
141
- score = 0
150
+ score = np . float32 ( 0 )
142
151
)
143
152
]
153
+ # current_list: OrderedDict['PerceptronAmbiguityResolver.Hypothesis', np.float32] = OrderedDict(
154
+ # [
155
+ # (PerceptronAmbiguityResolver.Hypothesis(
156
+ # PerceptronAmbiguityResolver.sentence_begin,
157
+ # PerceptronAmbiguityResolver.sentence_begin,
158
+ # previous=None,
159
+ # score=np.float32(0)
160
+ # ), np.float32(0))
161
+ # ]
162
+ # )
144
163
145
164
for analysis_data in sentence :
146
165
next_list : List ['PerceptronAmbiguityResolver.Hypothesis' ] = []
166
+ # next_list: OrderedDict['PerceptronAmbiguityResolver.Hypothesis', np.float32] = OrderedDict()
147
167
148
168
analyses : List [SingleAnalysis ] = list (analysis_data .analysis_results )
149
169
@@ -157,15 +177,26 @@ def best_path(self, sentence: List[WordAnalysis]) -> 'PerceptronAmbiguityResolve
157
177
158
178
trigram_score = np .float32 (0 )
159
179
for key in features .keys ():
160
- trigram_score += self .model .get_ (key ) * features .get (key )
180
+ trigram_score += np . float32 ( self .model .get_ (key ) * np . float32 ( features .get (key )) )
161
181
162
182
new_hyp = PerceptronAmbiguityResolver .Hypothesis (
163
183
h .current ,
164
184
analysis ,
165
185
h ,
166
- score = h .score + trigram_score
186
+ score = np . float32 ( h .score + trigram_score )
167
187
)
168
- next_list .append (new_hyp )
188
+
189
+ i , found = next (((i , c ) for i , c in enumerate (next_list ) if new_hyp == c ), (None , None ))
190
+
191
+ if found is not None and new_hyp .score > found .score :
192
+ next_list [i ] = new_hyp
193
+ elif found is None :
194
+ next_list .append (new_hyp )
195
+ # if new_hyp in next_list:
196
+ # new_hyp.score = max(next_list[new_hyp], new_hyp.score)
197
+
198
+ # next_list[new_hyp] = new_hyp.score
199
+ # next_list.append(new_hyp)
169
200
170
201
current_list = next_list
171
202
@@ -175,7 +206,7 @@ def best_path(self, sentence: List[WordAnalysis]) -> 'PerceptronAmbiguityResolve
175
206
176
207
trigram_score = np .float32 (0 )
177
208
for key in features .keys ():
178
- trigram_score += self .model .get_ (key ) * features .get (key )
209
+ trigram_score += np . float32 ( self .model .get_ (key ) * np . float32 ( features .get (key )) )
179
210
180
211
h .score += trigram_score
181
212
@@ -189,10 +220,8 @@ def best_path(self, sentence: List[WordAnalysis]) -> 'PerceptronAmbiguityResolve
189
220
190
221
return PerceptronAmbiguityResolver .DecodeResult (list (reversed (result )), best_score )
191
222
192
-
193
-
194
223
class DecodeResult :
195
- def __init__ (self , best_parse : List [SingleAnalysis ], score : float ):
224
+ def __init__ (self , best_parse : List [SingleAnalysis ], score : np . float32 ):
196
225
self .best_parse = best_parse
197
226
self .score = score
198
227
@@ -202,7 +231,7 @@ def __init__(
202
231
prev : SingleAnalysis ,
203
232
current : SingleAnalysis ,
204
233
previous : Optional ['PerceptronAmbiguityResolver.Hypothesis' ],
205
- score : float
234
+ score : np . float32
206
235
):
207
236
self .prev = prev
208
237
self .current = current
0 commit comments