codeflash-ai · codeflash-ai · Oct 24, 2025
diff --git a/stanza/models/common/chuliu_edmonds.py b/stanza/models/common/chuliu_edmonds.py
@@ -46,75 +46,46 @@ def maybe_pop_cycle(i):
             stack.pop()
             onstack[i] = False
             cycle[i] = True
-            if cycle.sum() > 1:
+            if np.count_nonzero(cycle) > 1:
                 cycles.append(cycle)
 
     def initialize_strong_connect(i):
         _index[0] += 1
-        index = _index[-1]
+        index = _index[0]
         indices[i] = lowlinks[i] = index - 1
         stack.append(i)
         onstack[i] = True
 
     def strong_connect(i):
-        # this ridiculous atrocity is because somehow people keep
-        # coming up with graphs which overflow python's call stack
-        # so instead we make our own call stack and turn the recursion
-        # into a loop
-        # see for example
-        #   https://github.com/stanfordnlp/stanza/issues/962
-        #   https://github.com/spraakbanken/sparv-pipeline/issues/166
-        # in an ideal world this block of code would look like this
-        #    initialize_strong_connect(i)
-        #    dependents = iter(np.where(np.equal(tree, i))[0])
-        #    for j in dependents:
-        #        if indices[j] == -1:
-        #            strong_connect(j)
-        #            lowlinks[i] = min(lowlinks[i], lowlinks[j])
-        #        elif onstack[j]:
-        #            lowlinks[i] = min(lowlinks[i], indices[j])
-        #
-        #     maybe_pop_cycle(i)
         call_stack = [(i, None, None)]
-        while len(call_stack) > 0:
+        while call_stack:
             i, dependents_iterator, j = call_stack.pop()
-            if dependents_iterator is None: # first time getting here for this i
+            if dependents_iterator is None:
                 initialize_strong_connect(i)
-                dependents_iterator = iter(np.where(np.equal(tree, i))[0])
-            else: # been here before.  j was the dependent we were just considering
+                dependents_arr = np.where(tree == i)[0]
+                n_dependents = len(dependents_arr)
+                # Faster than building and using iterator repeatedly
+                dependents_iterator = (dependents_arr, 0)
+            else:
+                dependents_arr, idx = dependents_iterator
                 lowlinks[i] = min(lowlinks[i], lowlinks[j])
-            for j in dependents_iterator:
+
+            dependents_arr, idx = dependents_iterator
+
+            while idx < len(dependents_arr):
+                j = dependents_arr[idx]
+                idx += 1
                 if indices[j] == -1:
-                    # have to remember where we were...
-                    # put the current iterator & its state on the "call stack"
-                    # we will come back to it later
-                    call_stack.append((i, dependents_iterator, j))
-                    # also, this is what we do next...
+                    # push current iterator & state to call_stack
+                    call_stack.append((i, (dependents_arr, idx), j))
                     call_stack.append((j, None, None))
-                    # this will break this iterator for now
-                    # the next time through, we will continue progressing this iterator
                     break
                 elif onstack[j]:
                     lowlinks[i] = min(lowlinks[i], indices[j])
             else:
-                # this is an intended use of for/else
-                # please stop filing git issues on obscure language features
-                # we finished iterating without a break
-                # and can finally resolve any possible cycles
+                # If completed, maybe cycle
                 maybe_pop_cycle(i)
-            # at this point, there are two cases:
-            #
-            # we iterated all the way through an iterator (the else in the for/else)
-            # and have resolved any possible cycles.  can then proceed to the previous
-            # iterator we were considering (or finish, if there are no others)
-            # OR
-            # we have hit a break in the iteration over the dependents
-            # for a node
-            # and we need to dig deeper into the graph and resolve the dependent's dependents
-            # before we can continue the previous node
-            #
-            # either way, we check to see if there are unfinished subtrees
-            # when that is finally done, we can return
+            # If break, will revisit as above
 
     #-------------------------------------------------------------
     for i in range(len(tree)):
@@ -126,38 +97,27 @@ def process_cycle(tree, cycle, scores):
     """
     Build a subproblem with one cycle broken
     """
-    # indices of cycle in original tree; (c) in t
     cycle_locs = np.where(cycle)[0]
-    # heads of cycle in original tree; (c) in t
     cycle_subtree = tree[cycle]
-    # scores of cycle in original tree; (c) in R
     cycle_scores = scores[cycle, cycle_subtree]
-    # total score of cycle; () in R
-    cycle_score = cycle_scores.sum()
+    cycle_score = np.sum(cycle_scores)
 
-    # locations of noncycle; (t) in [0,1]
     noncycle = np.logical_not(cycle)
-    # indices of noncycle in original tree; (n) in t
     noncycle_locs = np.where(noncycle)[0]
-    #print(cycle_locs, noncycle_locs)
 
-    # scores of cycle's potential heads; (c x n) - (c) + () -> (n x c) in R
-    metanode_head_scores = scores[cycle][:,noncycle] - cycle_scores[:,None] + cycle_score
-    # scores of cycle's potential dependents; (n x c) in R
-    metanode_dep_scores = scores[noncycle][:,cycle]
-    # best noncycle head for each cycle dependent; (n) in c
+    # Reduce allocations: np.ix_ is faster than double-index for repeated use
+    metanode_head_scores = scores[np.ix_(cycle, noncycle)] - cycle_scores[:, None] + cycle_score
+    metanode_dep_scores = scores[np.ix_(noncycle, cycle)]
+
     metanode_heads = np.argmax(metanode_head_scores, axis=0)
-    # best cycle head for each noncycle dependent; (n) in c
     metanode_deps = np.argmax(metanode_dep_scores, axis=1)
 
-    # scores of noncycle graph; (n x n) in R
-    subscores = scores[noncycle][:,noncycle]
-    # pad to contracted graph; (n+1 x n+1) in R
-    subscores = np.pad(subscores, ( (0,1) , (0,1) ), 'constant')
-    # set the contracted graph scores of cycle's potential heads; (c x n)[:, (n) in n] in R -> (n) in R
+    subscores = scores[np.ix_(noncycle, noncycle)]
+    subscores = np.pad(subscores, ((0, 1), (0, 1)), 'constant')
+
     subscores[-1, :-1] = metanode_head_scores[metanode_heads, np.arange(len(noncycle_locs))]
-    # set the contracted graph scores of cycle's potential dependents; (n x c)[(n) in n] in R-> (n) in R
-    subscores[:-1,-1] = metanode_dep_scores[np.arange(len(noncycle_locs)), metanode_deps]
+    subscores[:-1, -1] = metanode_dep_scores[np.arange(len(noncycle_locs)), metanode_deps]
+
     return subscores, cycle_locs, noncycle_locs, metanode_heads, metanode_deps
 
 
@@ -166,42 +126,31 @@ def expand_contracted_tree(tree, contracted_tree, cycle_locs, noncycle_locs, met
     Given a partially solved tree with a cycle and a solved subproblem
     for the cycle, build a larger solution without the cycle
     """
-    # head of the cycle; () in n
-    #print(contracted_tree)
     cycle_head = contracted_tree[-1]
-    # fixed tree: (n) in n+1
     contracted_tree = contracted_tree[:-1]
-    # initialize new tree; (t) in 0
+
     new_tree = -np.ones_like(tree)
-    #print(0, new_tree)
-    # fixed tree with no heads coming from the cycle: (n) in [0,1]
     contracted_subtree = contracted_tree < len(contracted_tree)
-    # add the nodes to the new tree (t)[(n)[(n) in [0,1]] in t] in t = (n)[(n)[(n) in [0,1]] in n] in t
+
+    # Assign: non-cycle node heads from non-cycle parents
     new_tree[noncycle_locs[contracted_subtree]] = noncycle_locs[contracted_tree[contracted_subtree]]
-    #print(1, new_tree)
-    # fixed tree with heads coming from the cycle: (n) in [0,1]
-    contracted_subtree = np.logical_not(contracted_subtree)
-    # add the nodes to the tree (t)[(n)[(n) in [0,1]] in t] in t = (c)[(n)[(n) in [0,1]] in c] in t
-    new_tree[noncycle_locs[contracted_subtree]] = cycle_locs[metanode_deps[contracted_subtree]]
-    #print(2, new_tree)
-    # add the old cycle to the tree; (t)[(c) in t] in t = (t)[(c) in t] in t
+    # Assign: non-cycle node heads from cycle parents
+    contracted_not_subtree = ~contracted_subtree
+    new_tree[noncycle_locs[contracted_not_subtree]] = cycle_locs[metanode_deps[contracted_not_subtree]]
+    # Cycle part: assign original cycle tree relationships
     new_tree[cycle_locs] = tree[cycle_locs]
-    #print(3, new_tree)
-    # root of the cycle; (n)[() in n] in c = () in c
+
     cycle_root = metanode_heads[cycle_head]
-    # add the root of the cycle to the new tree; (t)[(c)[() in c] in t] = (c)[() in c]
     new_tree[cycle_locs[cycle_root]] = noncycle_locs[cycle_head]
-    #print(4, new_tree)
     return new_tree
 
 def prepare_scores(scores):
     """
     Alter the scores matrix to avoid self loops and handle the root
     """
-    # prevent self-loops, set up the root location
     np.fill_diagonal(scores, -float('inf')) # prevent self-loops
     scores[0] = -float('inf')
-    scores[0,0] = 0
+    scores[0, 0] = 0
 
 def chuliu_edmonds(scores):
     subtree_stack = []
@@ -210,32 +159,15 @@ def chuliu_edmonds(scores):
     tree = np.argmax(scores, axis=1)
     cycles = tarjan(tree)
 
-    #print(scores)
-    #print(cycles)
-
-    # recursive implementation:
-    #if cycles:
-    #    # t = len(tree); c = len(cycle); n = len(noncycle)
-    #    # cycles.pop(): locations of cycle; (t) in [0,1]
-    #    subscores, cycle_locs, noncycle_locs, metanode_heads, metanode_deps = process_cycle(tree, cycles.pop(), scores)
-    #    # MST with contraction; (n+1) in n+1
-    #    contracted_tree = chuliu_edmonds(subscores)
-    #    tree = expand_contracted_tree(tree, contracted_tree, cycle_locs, noncycle_locs, metanode_heads, metanode_deps)
-    # unfortunately, while the recursion is simpler to understand, it can get too deep for python's stack limit
-    # so instead we make our own recursion, with blackjack and (you know how it goes)
-
     while cycles:
-        # t = len(tree); c = len(cycle); n = len(noncycle)
-        # cycles.pop(): locations of cycle; (t) in [0,1]
         subscores, cycle_locs, noncycle_locs, metanode_heads, metanode_deps = process_cycle(tree, cycles.pop(), scores)
         subtree_stack.append((tree, cycles, scores, subscores, cycle_locs, noncycle_locs, metanode_heads, metanode_deps))
-
         scores = subscores
         prepare_scores(scores)
         tree = np.argmax(scores, axis=1)
         cycles = tarjan(tree)
 
-    while len(subtree_stack) > 0:
+    while subtree_stack:
         contracted_tree = tree
         (tree, cycles, scores, subscores, cycle_locs, noncycle_locs, metanode_heads, metanode_deps) = subtree_stack.pop()
         tree = expand_contracted_tree(tree, contracted_tree, cycle_locs, noncycle_locs, metanode_heads, metanode_deps)