@@ -409,9 +409,33 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)
409409    SetConsoleCtrlHandler ((PHANDLER_ROUTINE )sigint_handler ,1 );
410410}
411411
412- static  volatile  HANDLE  hBtThread  =  0 ;
412+ static  TIMECAPS  timecaps ;
413+ static  HANDLE  hBtThread  =  0 ;
414+ static  uv_cond_t  bt_data_prof_cond  =  CONDITION_VARIABLE_INIT ;
415+ 
416+ #ifdef  _CPU_X86_64_ 
417+ // Callback data structure for profile timeout 
418+ typedef  struct  {
419+     _Atomic (int ) * abort_ptr ;
420+     int  tid ;
421+ } profile_timeout_data_t ;
422+ 
423+ static  void  CALLBACK  profile_timeout_cb (PVOID  lpParam , BOOLEAN  TimerOrWaitFired )
424+ {
425+     profile_timeout_data_t  * data  =  (profile_timeout_data_t * )lpParam ;
426+     if  (TimerOrWaitFired  &&  data  !=  NULL  &&  data -> abort_ptr  !=  NULL ) {
427+         // Timeout reached, signal an abort should occur 
428+         // jl_safe_fprintf(ios_safe_stderr, "profile_timeout_cb called.\n"); 
429+         if  (jl_atomic_exchange (data -> abort_ptr , 2 ) ==  1 ) {
430+             // jl_safe_fprintf(ios_safe_stderr, "profile_timeout_cb jl_thread_resume.\n"); 
431+             jl_thread_resume (data -> tid );
432+             data -> tid  =  -1 ;
433+         }
434+     }
435+ }
436+ #endif 
413437
414- int  jl_thread_suspend_and_get_state (int  tid , int  timeout , bt_context_t  * ctx )
438+ static   int  jl_thread_suspend_and_get_state (int  tid , int  timeout , bt_context_t  * ctx )
415439{
416440    (void )timeout ;
417441    jl_ptls_t  ptls2  =  jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
@@ -421,8 +445,10 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
421445    if  (ct2  ==  NULL ) // this thread is already dead 
422446        return  0 ;
423447    HANDLE  hThread  =  ptls2 -> system_id ;
424-     if  ((DWORD )- 1  ==  SuspendThread (hThread ))
448+     if  ((DWORD )- 1  ==  SuspendThread (hThread )) {
449+         // jl_safe_fprintf(ios_safe_stderr, "failed to suspend thread %d: %lu\n", tid, GetLastError()); 
425450        return  0 ;
451+     }
426452    assert (sizeof (* ctx ) ==  sizeof (CONTEXT ));
427453    memset (ctx , 0 , sizeof (CONTEXT ));
428454    ctx -> ContextFlags  =  CONTEXT_CONTROL  | CONTEXT_INTEGER ;
@@ -439,90 +465,127 @@ void jl_thread_resume(int tid)
439465    jl_ptls_t  ptls2  =  jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
440466    HANDLE  hThread  =  ptls2 -> system_id ;
441467    if  ((DWORD )- 1  ==  ResumeThread (hThread )) {
442-         fputs ( "failed to resume main thread! aborting."  ,  stderr );
468+         jl_safe_fprintf ( ios_safe_stderr ,  "failed to resume main thread! aborting.\n"  );
443469        abort ();
444470    }
445471}
446472
447473int  jl_thread_suspend (int16_t  tid , bt_context_t  * ctx )
448474{
449-     uv_mutex_lock (& jl_in_stackwalk );
450-     jl_lock_profile ();
451-     ULONG_PTR  lock_cookie  =  0 ;
452-     LdrLockLoaderLock (0x1 , NULL , & lock_cookie );
475+     jl_lock_profile (); // prevent concurrent mutation 
476+     uv_mutex_lock (& jl_in_stackwalk ); // prevent multi-threaded dbghelp calls 
453477    int  success  =  jl_thread_suspend_and_get_state (tid , 0 , ctx );
454-     LdrUnlockLoaderLock (0x1 , lock_cookie );
455-     jl_unlock_profile ();
456478    uv_mutex_unlock (& jl_in_stackwalk );
479+     jl_unlock_profile ();
457480    return  success ;
458481}
459482
460483static  DWORD  WINAPI  profile_bt ( LPVOID  lparam  )
461484{
462485    // Note: illegal to use jl_* functions from this thread except for profiling-specific functions 
486+     // Dummy event for RegisterWaitForSingleObject (to use timeout callback) 
487+     HANDLE  hProfileEvent  =  CreateEvent (NULL , TRUE, FALSE, NULL );
488+     if  (hProfileEvent  ==  NULL ) {
489+         jl_safe_fprintf (ios_safe_stderr , "failed to create profile event.\n" );
490+         abort ();
491+     }
463492    while  (1 ) {
464493        DWORD  timeout_ms  =  nsecprof  / (GIGA  / 1000 );
465494        Sleep (timeout_ms  >  0  ? timeout_ms  : 1 );
466-         if  (profile_running ) {
467-             if  (jl_profile_is_buffer_full ()) {
468-                 jl_profile_stop_timer (); // does not change the thread state 
469-                 SuspendThread (GetCurrentThread ());
470-                 continue ;
471-             }
472-             else  if  (profile_all_tasks ) {
473-                 // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace` 
474-                 jl_profile_task ();
475-             }
476-             else  {
477-                 // TODO: bring this up to parity with other OS by adding loop over tid here 
478-                 bt_context_t  c ;
479-                 if  (!jl_thread_suspend (0 , & c )) {
480-                     fputs ("failed to suspend main thread. aborting profiling." , stderr );
495+         if  (jl_profile_is_buffer_full ())
496+             jl_profile_stop_timer (); // does not change the thread state 
497+         if  (!profile_running ) {
498+             uv_mutex_lock (& bt_data_prof_lock );
499+             while  (!profile_running )
500+                 uv_cond_wait (& bt_data_prof_cond , & bt_data_prof_lock );
501+             uv_mutex_unlock (& bt_data_prof_lock );
502+         }
503+         else  if  (profile_all_tasks ) {
504+             // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace` 
505+             jl_profile_task ();
506+         }
507+         else  {
508+             // Profile all threads, similar to Unix implementation 
509+             bt_context_t  c ;
510+             int  nthreads  =  jl_atomic_load_acquire (& jl_n_threads );
511+             int  * randperm  =  profile_get_randperm (nthreads );
512+             for  (int  idx  =  nthreads ; idx --  >  0 ; ) {
513+                 int  tid  =  randperm [idx ];
514+                 if  (!profile_running )
515+                     break ;
516+                 if  (jl_profile_is_buffer_full ()) {
481517                    jl_profile_stop_timer ();
482518                    break ;
483519                }
520+                 if  (!jl_thread_suspend (tid , & c ))
521+                     continue ;
522+                 jl_ptls_t  ptls  =  jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
523+                 jl_task_t  * t2  =  jl_atomic_load_relaxed (& ptls -> current_task );
524+                 int  state  =  jl_atomic_load_relaxed (& ptls -> sleep_check_state ) ==  0  ? PROFILE_STATE_THREAD_NOT_SLEEPING  : PROFILE_STATE_THREAD_SLEEPING ;
525+ 
526+                 // Set up timeout handler for stackwalk 
527+ #ifdef  _CPU_X86_64_ 
528+                 _Atomic(int ) abort_profiling  =  0 ;
529+                 profile_timeout_data_t  timeout_data ;
530+                 timeout_data .abort_ptr  =  & abort_profiling ;
531+                 timeout_data .tid  =  tid ;
532+                 jl_set_profile_abort_ptr (& abort_profiling );
533+                 HANDLE  hWaitHandle  =  NULL ;
534+                 if  (!RegisterWaitForSingleObject (& hWaitHandle , hProfileEvent , profile_timeout_cb ,
535+                                                  & timeout_data , 100 , WT_EXECUTEONLYONCE  | WT_EXECUTEINWAITTHREAD )) {
536+                     // Failed to register wait, proceed without timeout protection 
537+                     hWaitHandle  =  NULL ;
538+                 }
539+ #endif 
484540                // Get backtrace data 
485541                profile_bt_size_cur  +=  rec_backtrace_ctx ((jl_bt_element_t * )profile_bt_data_prof  +  profile_bt_size_cur ,
486542                        profile_bt_size_max  -  profile_bt_size_cur  -  1 , & c , NULL );
487- 
488-                 jl_ptls_t  ptls  =  jl_atomic_load_relaxed (& jl_all_tls_states )[0 ]; // given only profiling hMainThread 
543+ #ifdef  _CPU_X86_64_ 
544+                 // Clear abort pointer from TLS 
545+                 jl_set_profile_abort_ptr (NULL );
546+                 // Wait for callback to complete or cancel before continuing 
547+                 if  (hWaitHandle  !=  NULL )
548+                     UnregisterWaitEx (hWaitHandle , INVALID_HANDLE_VALUE );
549+                 if  (timeout_data .tid  !=  -1 )
550+ #endif 
551+                     jl_thread_resume (tid );
489552
490553                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block 
491-                 profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  ptls -> tid  +  1 ;
554+                 profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  tid  +  1 ;
492555
493556                // META_OFFSET_TASKID store task id (never null) 
494-                 profile_bt_data_prof [profile_bt_size_cur ++ ].jlvalue  =  (jl_value_t * )jl_atomic_load_relaxed ( & ptls -> current_task ) ;
557+                 profile_bt_data_prof [profile_bt_size_cur ++ ].jlvalue  =  (jl_value_t * )t2 ;
495558
496559                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock 
497560                profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  cycleclock ();
498561
499562                // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block) 
500-                 int  state  =  jl_atomic_load_relaxed (& ptls -> sleep_check_state ) ==  0  ? PROFILE_STATE_THREAD_NOT_SLEEPING  : PROFILE_STATE_THREAD_SLEEPING ;
501563                profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  state ;
502564
503565                // Mark the end of this block with two 0's 
504566                profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  0 ;
505567                profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr  =  0 ;
506-                 jl_thread_resume (0 );
507-                 jl_check_profile_autostop ();
508568            }
569+             jl_check_profile_autostop ();
509570        }
510571    }
511-     uv_mutex_unlock ( & jl_in_stackwalk ); 
512-     jl_profile_stop_timer ( );
572+     // this is unreachable, but would be the relevant cleanup 
573+     uv_mutex_lock ( & bt_data_prof_lock );
513574    hBtThread  =  NULL ;
575+     uv_mutex_unlock (& bt_data_prof_lock );
576+     jl_profile_stop_timer ();
577+     CloseHandle (hProfileEvent );
514578    return  0 ;
515579}
516580
517- static  volatile  TIMECAPS  timecaps ;
518- 
519581JL_DLLEXPORT  int  jl_profile_start_timer (uint8_t  all_tasks )
520582{
583+     uv_mutex_lock (& bt_data_prof_lock );
521584    if  (hBtThread  ==  NULL ) {
522- 
523585        TIMECAPS  _timecaps ;
524586        if  (MMSYSERR_NOERROR  !=  timeGetDevCaps (& _timecaps , sizeof (_timecaps ))) {
525-             fputs ("failed to get timer resolution" , stderr );
587+             uv_mutex_unlock (& bt_data_prof_lock );
588+             jl_safe_fprintf (ios_safe_stderr , "failed to get timer resolution.\n" );
526589            return  -2 ;
527590        }
528591        timecaps  =  _timecaps ;
@@ -534,15 +597,12 @@ JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
534597            0 ,                      // argument to thread function 
535598            0 ,                      // use default creation flags 
536599            0 );                     // returns the thread identifier 
537-         if  (hBtThread  ==  NULL )
600+         if  (hBtThread  ==  NULL ) {
601+             uv_mutex_unlock (& bt_data_prof_lock );
602+             jl_safe_fprintf (ios_safe_stderr , "failed to allocate profile thread.\n" );
538603            return  -1 ;
539-         (void )SetThreadPriority (hBtThread , THREAD_PRIORITY_ABOVE_NORMAL );
540-     }
541-     else  {
542-         if  ((DWORD )- 1  ==  ResumeThread (hBtThread )) {
543-             fputs ("failed to resume profiling thread." , stderr );
544-             return  -2 ;
545604        }
605+         (void )SetThreadPriority (hBtThread , THREAD_PRIORITY_ABOVE_NORMAL );
546606    }
547607    if  (profile_running  ==  0 ) {
548608        // Failure to change the timer resolution is not fatal. However, it is important to 
@@ -552,6 +612,8 @@ JL_DLLEXPORT int jl_profile_start_timer(uint8_t all_tasks)
552612    }
553613    profile_all_tasks  =  all_tasks ;
554614    profile_running  =  1 ; // set `profile_running` finally 
615+     uv_cond_broadcast (& bt_data_prof_cond );
616+     uv_mutex_unlock (& bt_data_prof_lock );
555617    return  0 ;
556618}
557619JL_DLLEXPORT  void  jl_profile_stop_timer (void )
0 commit comments