@@ -378,7 +378,7 @@ where
378378fn remove_cycle (
379379 query_map : & QueryMap ,
380380 jobs : & mut Vec < QueryJobId > ,
381- wakelist : & mut Vec < Arc < QueryWaiter > > ,
381+ wakelist : & Mutex < Vec < Arc < QueryWaiter > > > ,
382382) -> bool {
383383 let mut visited = FxHashSet :: default ( ) ;
384384 let mut stack = Vec :: new ( ) ;
@@ -466,7 +466,7 @@ fn remove_cycle(
466466 * waiter. cycle . lock ( ) = Some ( error) ;
467467
468468 // Put the waiter on the list of things to resume
469- wakelist. push ( waiter) ;
469+ wakelist. lock ( ) . push ( waiter) ;
470470
471471 true
472472 } else {
@@ -478,36 +478,37 @@ fn remove_cycle(
478478/// If a query cycle is found it will break the cycle by finding an edge which
479479/// uses a query latch and then resuming that waiter.
480480/// There may be multiple cycles involved in a deadlock, so this searches
481- /// all active queries for cycles before finally resuming all the waiters at once.
481+ /// all active queries for cycles. But only one waiter will be resumed at once.
482482pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483- let mut wakelist = Vec :: new ( ) ;
483+ static WAKELIST : Mutex < Vec < Arc < QueryWaiter > > > = Mutex :: new ( Vec :: new ( ) ) ;
484484 let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
485485
486486 let mut found_cycle = false ;
487487
488488 while jobs. len ( ) > 0 {
489- if remove_cycle ( & query_map, & mut jobs, & mut wakelist ) {
489+ if remove_cycle ( & query_map, & mut jobs, & WAKELIST ) {
490490 found_cycle = true ;
491491 }
492492 }
493-
493+ let mut wake = WAKELIST . lock ( ) ;
494494 // Check that a cycle was found. It is possible for a deadlock to occur without
495495 // a query cycle if a query which can be waited on uses Rayon to do multithreading
496496 // internally. Such a query (X) may be executing on 2 threads (A and B) and A may
497497 // wait using Rayon on B. Rayon may then switch to executing another query (Y)
498498 // which in turn will wait on X causing a deadlock. We have a false dependency from
499499 // X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here
500500 // only considers the true dependency and won't detect a cycle.
501- if !found_cycle {
501+ if !found_cycle && wake . is_empty ( ) {
502502 panic ! (
503503 "deadlock detected as we're unable to find a query cycle to break\n \
504504 current query map:\n {:#?}",
505505 query_map
506506 ) ;
507507 }
508508
509- // FIXME: Ensure this won't cause a deadlock before we return
510- for waiter in wakelist. into_iter ( ) {
509+ // Only one waiter is resumed at a time to avoid waking up multiple
510+ // waiters at the same time and causing deadlock due to thread grabbing.
511+ if let Some ( waiter) = wake. pop ( ) {
511512 waiter. notify ( registry) ;
512513 }
513514}
0 commit comments