Percona Server moved to https://jira.percona.com/projects/PS

Bug #1319904
Comment #8

Comment 8 for bug 1319904

Revision history for this message

markus_albe (markus-albe) wrote on 2014-11-26: Re: mysqld daemon with installed handlersocket plugin did not stop

Customer reproduced with 5.6.21 and I verified through code inspection:

Error log showed

2014-11-25 05:16:14 31200 [Note] /usr/sbin/mysqld: Normal shutdown
2014-11-25 05:16:14 31200 [Note] Giving 24 client threads a chance to die gracefully
2014-11-25 05:16:14 31200 [Note] Event Scheduler: Purging the queue. 0 events
2014-11-25 05:16:14 31200 [Note] Shutting down slave threads
2014-11-25 05:16:16 31200 [Note] Forcefully disconnecting 24 remaining clients

Then pmp showed:
23 __lll_lock_wait(libpthread.so.0),_L_lock_995(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),dena::dbcontext::term_thread(handlersocket.so),dena::hstcpsvr_worker::run(handlersocket.so),dena::thread::thread_main(handlersocket.so),start_thread(libpthread.so.0),clone(libc.so.6)

1 __lll_lock_wait(libpthread.so.0),_L_lock_995(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),remove_global_thread,dena::dbcontext::term_thread(handlersocket.so),dena::hstcpsvr_worker::run(handlersocket.so),dena::thread::thread_main(handlersocket.so),start_thread(libpthread.so.0),clone(libc.so.6)

Definition of dena::dbcontext::term_thread (from https://github.com/DeNA/HandlerSocket-Plugin-for-MySQL/blob/master/handlersocket/database.cpp#L346):

dbcontext::term_thread()
{
DBG_THR(fprintf(stderr, "HNDSOCK thread end %p\n", thd));
unlock_tables_if();
my_pthread_setspecific_ptr(THR_THD, 0);
{
pthread_mutex_lock(&LOCK_thread_count);
#if MYSQL_VERSION_ID >= 50600
remove_global_thread(thd);
#else
--thread_count;
#endif
delete thd;
thd = 0;
pthread_mutex_unlock(&LOCK_thread_count);
my_thread_end();
}
}

And definition of remove_global_thread (from 5.6.21/sql/mysqld.cc@892):

void remove_global_thread(THD *thd)
{
  DBUG_PRINT("info", ("remove_global_thread %p current_linfo %p",
                      thd, thd->current_linfo));
  mysql_mutex_lock(&LOCK_thd_remove);
  mysql_mutex_lock(&LOCK_thread_count);
  DBUG_ASSERT(thd->release_resources_done());
  /*
    Used by binlog_reset_master. It would be cleaner to use
    DEBUG_SYNC here, but that's not possible because the THD's debug
    sync feature has been shut down at this point.
   */
  DBUG_EXECUTE_IF("sleep_after_lock_thread_count_before_delete_thd",
                  sleep(5););

  const size_t num_erased= global_thread_list->erase(thd);
  if (num_erased == 1)
    --global_thread_count;
  // Removing a THD that was never added is an error.
  DBUG_ASSERT(1 == num_erased);

  mysql_mutex_unlock(&LOCK_thd_remove);
  mysql_cond_broadcast(&COND_thread_count);
  mysql_mutex_unlock(&LOCK_thread_count);
}

So my understanding is that the second time this tries to acquire the mutex here (mysql_mutex_lock(&LOCK_thread_count)) it will wait forever because LOCK_thread_count is not a recursive mutex (thanks Vlad for input about that):

fgrep -RnI --exclude-dir=build* --exclude-dir=mysql-test --exclude=tags --exclude=*.P* --exclude=*.T* "LOCK_thread_count" . | grep init
./tests/thread_test.c:196: pthread_mutex_init(&LOCK_thread_count,MY_MUTEX_INIT_FAST);
./sql/mysqld.cc:3638: mysql_mutex_init(key_LOCK_thread_count, &LOCK_thread_count, MY_MUTEX_INIT_FAST);
./mysys/thr_lock.c:1608: if ((error= mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST)))
./mysys/thr_alarm.c:883: mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST);

About MY_MUTEX_INIT_FAST:

#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
extern pthread_mutexattr_t my_fast_mutexattr;
#define MY_MUTEX_INIT_FAST &my_fast_mutexattr
#else
#define MY_MUTEX_INIT_FAST NULL
#endif
...
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
  /*
    Set mutex type to "fast" a.k.a "adaptive"
    In this case the thread may steal the mutex from some other thread
    that is waiting for the same mutex. This will save us some
    context switches but may cause a thread to 'starve forever' while
    waiting for the mutex (not likely if the code within the mutex is
    short).
  */
  pthread_mutexattr_init(&my_fast_mutexattr);
  pthread_mutexattr_settype(&my_fast_mutexattr,
                            PTHREAD_MUTEX_ADAPTIVE_NP);
#endif

Customer reproduced with 5.6.21 and I verified through code inspection:

Error log showed

Definition of dena::dbcontext::term_thread (from https://github.com/DeNA/HandlerSocket-Plugin-for-MySQL/blob/master/handlersocket/database.cpp#L346):

And definition of remove_global_thread (from 5.6.21/sql/mysqld.cc@892):

void remove_global_thread(THD *thd)
{
  DBUG_PRINT("info", ("remove_global_thread %p current_linfo %p",
                      thd, thd->current_linfo));
  mysql_mutex_lock(&LOCK_thd_remove);
  mysql_mutex_lock(&LOCK_thread_count);
  DBUG_ASSERT(thd->release_resources_done());
  /*
    Used by binlog_reset_master.  It would be cleaner to use
    DEBUG_SYNC here, but that's not possible because the THD's debug
    sync feature has been shut down at this point.
   */
  DBUG_EXECUTE_IF("sleep_after_lock_thread_count_before_delete_thd",
                  sleep(5););

const size_t num_erased= global_thread_list->erase(thd);
  if (num_erased == 1)
    --global_thread_count;
  // Removing a THD that was never added is an error.
  DBUG_ASSERT(1 == num_erased);

mysql_mutex_unlock(&LOCK_thd_remove);
  mysql_cond_broadcast(&COND_thread_count);
  mysql_mutex_unlock(&LOCK_thread_count);
}

So my understanding is that the second time this tries to acquire the mutex here (mysql_mutex_lock(&LOCK_thread_count)) it will wait forever because  LOCK_thread_count is not a recursive mutex (thanks Vlad for input about that):

fgrep -RnI --exclude-dir=build* --exclude-dir=mysql-test --exclude=tags --exclude=*.P* --exclude=*.T* "LOCK_thread_count" . | grep init
./tests/thread_test.c:196:  pthread_mutex_init(&LOCK_thread_count,MY_MUTEX_INIT_FAST);
./sql/mysqld.cc:3638:  mysql_mutex_init(key_LOCK_thread_count, &LOCK_thread_count, MY_MUTEX_INIT_FAST);
./mysys/thr_lock.c:1608:  if ((error= mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST)))
./mysys/thr_alarm.c:883:  mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_INIT_FAST);

About MY_MUTEX_INIT_FAST:

#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
extern pthread_mutexattr_t my_fast_mutexattr;
#define MY_MUTEX_INIT_FAST &my_fast_mutexattr
#else
#define MY_MUTEX_INIT_FAST   NULL
#endif
...
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
  /*
    Set mutex type to "fast" a.k.a "adaptive"
    In this case the thread may steal the mutex from some other thread
    that is waiting for the same mutex.  This will save us some
    context switches but may cause a thread to 'starve forever' while
    waiting for the mutex (not likely if the code within the mutex is
    short).
  */
  pthread_mutexattr_init(&my_fast_mutexattr);
  pthread_mutexattr_settype(&my_fast_mutexattr,
                            PTHREAD_MUTEX_ADAPTIVE_NP);
#endif