Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 22 additions & 53 deletions stanza/models/common/chuliu_edmonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,25 @@ def tarjan(tree):
has an output of
[np.array([False, True, True, True])]
"""
indices = -np.ones_like(tree)
lowlinks = -np.ones_like(tree)
onstack = np.zeros_like(tree, dtype=bool)
stack = list()
N = len(tree)
indices = np.full(N, -1, dtype=int)
lowlinks = np.full(N, -1, dtype=int)
onstack = np.zeros(N, dtype=bool)
stack = []
_index = [0]
cycles = []
#-------------------------------------------------------------

# Precompute dependents for each node for efficiency
# dependents_map[i] is array of node indices having head == i
dependents_map = [[] for _ in range(N)]
for idx, head in enumerate(tree):
if head >= 0 and head < N and idx != head: # avoid self-loop in precompute
dependents_map[head].append(idx)

def maybe_pop_cycle(i):
if lowlinks[i] == indices[i]:
# There's a cycle!
cycle = np.zeros_like(indices, dtype=bool)
cycle = np.zeros(N, dtype=bool)
while stack[-1] != i:
j = stack.pop()
onstack[j] = False
Expand All @@ -57,67 +65,28 @@ def initialize_strong_connect(i):
onstack[i] = True

def strong_connect(i):
# this ridiculous atrocity is because somehow people keep
# coming up with graphs which overflow python's call stack
# so instead we make our own call stack and turn the recursion
# into a loop
# see for example
# https://github.com/stanfordnlp/stanza/issues/962
# https://github.com/spraakbanken/sparv-pipeline/issues/166
# in an ideal world this block of code would look like this
# initialize_strong_connect(i)
# dependents = iter(np.where(np.equal(tree, i))[0])
# for j in dependents:
# if indices[j] == -1:
# strong_connect(j)
# lowlinks[i] = min(lowlinks[i], lowlinks[j])
# elif onstack[j]:
# lowlinks[i] = min(lowlinks[i], indices[j])
#
# maybe_pop_cycle(i)
# Non-recursive DFS using an explicit stack
call_stack = [(i, None, None)]
while len(call_stack) > 0:
while call_stack:
i, dependents_iterator, j = call_stack.pop()
if dependents_iterator is None: # first time getting here for this i
if dependents_iterator is None:
initialize_strong_connect(i)
dependents_iterator = iter(np.where(np.equal(tree, i))[0])
else: # been here before. j was the dependent we were just considering
dependents = dependents_map[i]
dependents_iterator = iter(dependents)
else:
lowlinks[i] = min(lowlinks[i], lowlinks[j])
for j in dependents_iterator:
if indices[j] == -1:
# have to remember where we were...
# put the current iterator & its state on the "call stack"
# we will come back to it later
call_stack.append((i, dependents_iterator, j))
# also, this is what we do next...
call_stack.append((j, None, None))
# this will break this iterator for now
# the next time through, we will continue progressing this iterator
break
elif onstack[j]:
lowlinks[i] = min(lowlinks[i], indices[j])
else:
# this is an intended use of for/else
# please stop filing git issues on obscure language features
# we finished iterating without a break
# and can finally resolve any possible cycles
maybe_pop_cycle(i)
# at this point, there are two cases:
#
# we iterated all the way through an iterator (the else in the for/else)
# and have resolved any possible cycles. can then proceed to the previous
# iterator we were considering (or finish, if there are no others)
# OR
# we have hit a break in the iteration over the dependents
# for a node
# and we need to dig deeper into the graph and resolve the dependent's dependents
# before we can continue the previous node
#
# either way, we check to see if there are unfinished subtrees
# when that is finally done, we can return

#-------------------------------------------------------------
for i in range(len(tree)):
# Main loop
for i in range(N):
if indices[i] == -1:
strong_connect(i)
return cycles
Expand Down