@@ -378,7 +378,7 @@ where
378378fn remove_cycle (
379379 query_map : & QueryMap ,
380380 jobs : & mut Vec < QueryJobId > ,
381- wakelist : & mut Vec < Arc < QueryWaiter > > ,
381+ wakelist : & Mutex < Vec < Arc < QueryWaiter > > > ,
382382) -> bool {
383383 let mut visited = FxHashSet :: default ( ) ;
384384 let mut stack = Vec :: new ( ) ;
@@ -466,7 +466,7 @@ fn remove_cycle(
466466 * waiter. cycle . lock ( ) = Some ( error) ;
467467
468468 // Put the waiter on the list of things to resume
469- wakelist. push ( waiter) ;
469+ wakelist. lock ( ) . push ( waiter) ;
470470
471471 true
472472 } else {
@@ -478,36 +478,40 @@ fn remove_cycle(
478478/// If a query cycle is found it will break the cycle by finding an edge which
479479/// uses a query latch and then resuming that waiter.
480480/// There may be multiple cycles involved in a deadlock, so this searches
481- /// all active queries for cycles before finally resuming all the waiters at once.
481+ /// all active queries for cycles. But only one waiter will be resumed at once.
482482pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483- let mut wakelist = Vec :: new ( ) ;
483+ // FIXME: change `remove_cycle` to make it return one waiter at a call.
484+ // So that we can avoid use the global list here.
485+ static WAKELIST : Mutex < Vec < Arc < QueryWaiter > > > = Mutex :: new ( Vec :: new ( ) ) ;
486+
484487 let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
485488
486489 let mut found_cycle = false ;
487490
488491 while jobs. len ( ) > 0 {
489- if remove_cycle ( & query_map, & mut jobs, & mut wakelist ) {
492+ if remove_cycle ( & query_map, & mut jobs, & WAKELIST ) {
490493 found_cycle = true ;
491494 }
492495 }
493-
496+ let mut wake = WAKELIST . lock ( ) ;
494497 // Check that a cycle was found. It is possible for a deadlock to occur without
495498 // a query cycle if a query which can be waited on uses Rayon to do multithreading
496499 // internally. Such a query (X) may be executing on 2 threads (A and B) and A may
497500 // wait using Rayon on B. Rayon may then switch to executing another query (Y)
498501 // which in turn will wait on X causing a deadlock. We have a false dependency from
499502 // X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here
500503 // only considers the true dependency and won't detect a cycle.
501- if !found_cycle {
504+ if !found_cycle && wake . is_empty ( ) {
502505 panic ! (
503506 "deadlock detected as we're unable to find a query cycle to break\n \
504507 current query map:\n {:#?}",
505508 query_map
506509 ) ;
507510 }
508511
509- // FIXME: Ensure this won't cause a deadlock before we return
510- for waiter in wakelist. into_iter ( ) {
512+ // Only one waiter is resumed at a time to avoid waking up multiple
513+ // waiters at the same time and causing deadlock due to thread grabbing.
514+ if let Some ( waiter) = wake. pop ( ) {
511515 waiter. notify ( registry) ;
512516 }
513517}
0 commit comments