Fork from other than the main thread causes wrong pthread condition on NetBSD

Issue #2724 has been updated by naruse (Yui NARUSE).

Description updated


Bug #2724: fork from other than the main thread causes wrong pthread
condition on NetBSD
https://bugs.ruby-lang.org/issues/2724#change-26135

Author: naruse (Yui NARUSE)
Status: Third Party’s Issue
Priority: Normal
Assignee:
Category: core
Target version:
ruby -v: ruby 1.9.2dev (2010-02-07 trunk 26615) [i386-netbsdelf5.0.1]

NetBSD 5.0.[01] において、main thread 以外の pthread から fork すると、
pthread とカーネルスレッド (lwp) との関連が壊れるという現象が確認されています。

後述のパッチがあまりにアレなのでこの問題は Third Party’s Issue とし、
Ruby 側では修正を入れない事としますが、情報の共有と記録のために
ここにチケットを切っておきます。

なお、この workaround の作成には @_enamiさんの助けがありました。

追記:
NetBSD 側では kern/42772 として報告、修正されています。
http://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=42772

Index: thread_pthread.c

— thread_pthread.c (revision 26615)
+++ thread_pthread.c (working copy)
@@ -17,6 +17,93 @@
#include <sys/resource.h>
#endif

+#if defined(NetBSD_Version) && NetBSD_Version >= 500000000
+/* Hack for NetBSD 5.0.x’s broken pthread->pt_lid /
+/
Copied from /src/lib/libpthread/pthread_int.h /
+#define BROKEN_PTHREAD_T_PT_LID
+#include <lwp.h>
+#include <pthread_queue.h>
+#include <sys/tree.h>
+
+#define PTHREAD_KEYS_MAX 256
+#define PTHREAD__UNPARK_MAX 32
+
+/

    • The size of this structure needs to be no larger than struct
    • __pthread_cleanup_store, defined in pthread.h.
  • */
    +struct pt_clean_t {
  •   PTQ_ENTRY(pt_clean_t)   ptc_next;
    
  •   void    (*ptc_cleanup)(void *);
    
  •   void    *ptc_arg;
    

+};
+
+struct pthread_lock_ops {

  •   void    (*plo_init)(__cpu_simple_lock_t *);
    
  •   int     (*plo_try)(__cpu_simple_lock_t *);
    
  •   void    (*plo_unlock)(__cpu_simple_lock_t *);
    
  •   void    (*plo_lock)(__cpu_simple_lock_t *);
    

+};
+
+struct __pthread_st {

  •   pthread_t       pt_self;        /* Must be first. */
    
  •   unsigned int    pt_magic;       /* Magic number */
    
  •   int             pt_state;       /* running, blocked, etc. */
    
  •   pthread_mutex_t pt_lock;        /* lock on state */
    
  •   int             pt_flags;       /* see PT_FLAG_* below */
    
  •   int             pt_cancel;      /* Deferred cancellation */
    
  •   int             pt_errno;       /* Thread-specific errno. */
    
  •   stack_t         pt_stack;       /* Our stack */
    
  •   void            *pt_exitval;    /* Read by pthread_join() */
    
  •   char            *pt_name;       /* Thread's name, set by the 
    

app. */

  •   int             pt_willpark;    /* About to park */
    
  •   lwpid_t         pt_unpark;      /* Unpark this when parking */
    
  •   struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC 
    

overhead */

  •   pthread_mutex_t *pt_droplock;   /* Drop this lock if cancelled 
    

*/

  •   pthread_cond_t  pt_joiners;     /* Threads waiting to join. */
    
  •   /* Threads to defer waking, usually until 
    

pthread_mutex_unlock(). */

  •   lwpid_t         pt_waiters[PTHREAD__UNPARK_MAX];
    
  •   size_t          pt_nwaiters;
    
  •   /* Stack of cancellation cleanup handlers and their arguments 
    

*/

  •   PTQ_HEAD(, pt_clean_t)  pt_cleanup_stack;
    
  •   /* LWP ID and entry on the list of all threads. */
    
  •   lwpid_t         pt_lid;
    
  •   RB_ENTRY(__pthread_st) pt_alltree;
    
  •   PTQ_ENTRY(__pthread_st) pt_allq;
    
  •   PTQ_ENTRY(__pthread_st) pt_deadq;
    
  •   /*
    
  •    * General synchronization data.  We try to align, as threads
    
  •    * on other CPUs will access this data frequently.
    
  •    */
    
  •   int             pt_dummy1 __aligned(128);
    
  •   struct lwpctl   *pt_lwpctl;     /* Kernel/user comms area */
    
  •   volatile int    pt_blocking;    /* Blocking in userspace */
    
  •   volatile int    pt_rwlocked;    /* Handed rwlock successfully 
    

*/

  •   volatile int    pt_signalled;   /* Received 
    

pthread_cond_signal() */

  •   volatile int    pt_mutexwait;   /* Waiting to acquire mutex */
    
  •   void * volatile pt_mutexnext;   /* Next thread in chain */
    
  •   void * volatile pt_sleepobj;    /* Object slept on */
    
  •   PTQ_ENTRY(__pthread_st) pt_sleep;
    
  •   void            (*pt_early)(void *);
    
  •   int             pt_dummy2 __aligned(128);
    
  •   /* Thread-specific data.  Large so it sits close to the end. */
    
  •   int             pt_havespecific;
    
  •   void            *pt_specific[PTHREAD_KEYS_MAX];
    
  •   /*
    
  •    * Context for thread creation.  At the end as it's cached
    
  •    * and then only ever passed to _lwp_create().
    
  •    */
    
  •   ucontext_t      pt_uc;
    

+};
+#endif /* NetBSD */
+
+
static void native_mutex_lock(pthread_mutex_t *lock);
static void native_mutex_unlock(pthread_mutex_t *lock);
static int native_mutex_trylock(pthread_mutex_t *lock);
@@ -833,6 +920,9 @@
native_reset_timer_thread(void)
{
timer_thread_id = 0;
+#ifdef BROKEN_PTHREAD_T_PT_LID

  • ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self();
    +#endif
    }

#ifdef HAVE_SIGALTSTACK