Customer reproduced with 5.6.21 and I verified through code inspection:
Error log showed
2014-11-25 05:16:14 31200 [Note] /usr/sbin/mysqld: Normal shutdown
2014-11-25 05:16:14 31200 [Note] Giving 24 client threads a chance to die gracefully
2014-11-25 05:16:14 31200 [Note] Event Scheduler: Purging the queue. 0 events
2014-11-25 05:16:14 31200 [Note] Shutting down slave threads
2014-11-25 05:16:16 31200 [Note] Forcefully disconnecting 24 remaining clients
Then pmp showed:
23 __lll_lock_wait(libpthread.so.0),_L_lock_995(libpthread.so.0),pthread_mutex_lock(libpthread.so.0),dena::dbcontext::term_thread(handlersocket.so),dena::hstcpsvr_worker::run(handlersocket.so),dena::thread::thread_main(handlersocket.so),start_thread(libpthread.so.0),clone(libc.so.6)
And definition of remove_global_thread (from 5.6.21/sql/mysqld.cc@892):
void remove_global_thread(THD *thd)
{
DBUG_PRINT("info", ("remove_global_thread %p current_linfo %p", thd, thd->current_linfo));
mysql_mutex_lock(&LOCK_thd_remove);
mysql_mutex_lock(&LOCK_thread_count);
DBUG_ASSERT(thd->release_resources_done());
/*
Used by binlog_reset_master. It would be cleaner to use
DEBUG_SYNC here, but that's not possible because the THD's debug
sync feature has been shut down at this point.
*/
DBUG_EXECUTE_IF("sleep_after_lock_thread_count_before_delete_thd", sleep(5););
const size_t num_erased= global_thread_list->erase(thd);
if (num_erased == 1)
--global_thread_count;
// Removing a THD that was never added is an error.
DBUG_ASSERT(1 == num_erased);
So my understanding is that the second time this tries to acquire the mutex here (mysql_mutex_lock(&LOCK_thread_count)) it will wait forever because LOCK_thread_count is not a recursive mutex (thanks Vlad for input about that):
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
extern pthread_mutexattr_t my_fast_mutexattr;
#define MY_MUTEX_INIT_FAST &my_fast_mutexattr
#else
#define MY_MUTEX_INIT_FAST NULL
#endif
...
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
/*
Set mutex type to "fast" a.k.a "adaptive"
In this case the thread may steal the mutex from some other thread
that is waiting for the same mutex. This will save us some
context switches but may cause a thread to 'starve forever' while
waiting for the mutex (not likely if the code within the mutex is
short).
*/
pthread_mutexattr_init(&my_fast_mutexattr);
pthread_mutexattr_settype(&my_fast_mutexattr, PTHREAD_MUTEX_ADAPTIVE_NP);
#endif
Customer reproduced with 5.6.21 and I verified through code inspection:
Error log showed
2014-11-25 05:16:14 31200 [Note] /usr/sbin/mysqld: Normal shutdown
2014-11-25 05:16:14 31200 [Note] Giving 24 client threads a chance to die gracefully
2014-11-25 05:16:14 31200 [Note] Event Scheduler: Purging the queue. 0 events
2014-11-25 05:16:14 31200 [Note] Shutting down slave threads
2014-11-25 05:16:16 31200 [Note] Forcefully disconnecting 24 remaining clients
Then pmp showed: wait(libpthread .so.0), _L_lock_ 995(libpthread. so.0),pthread_ mutex_lock( libpthread. so.0),dena: :dbcontext: :term_thread( handlersocket. so),dena: :hstcpsvr_ worker: :run(handlersoc ket.so) ,dena:: thread: :thread_ main(handlersoc ket.so) ,start_ thread( libpthread. so.0),clone( libc.so. 6)
23 __lll_lock_
1 __lll_lock_ wait(libpthread .so.0), _L_lock_ 995(libpthread. so.0),pthread_ mutex_lock( libpthread. so.0),remove_ global_ thread, dena::dbcontext ::term_ thread( handlersocket. so),dena: :hstcpsvr_ worker: :run(handlersoc ket.so) ,dena:: thread: :thread_ main(handlersoc ket.so) ,start_ thread( libpthread. so.0),clone( libc.so. 6)
Definition of dena::dbcontext ::term_ thread (from https:/ /github. com/DeNA/ HandlerSocket- Plugin- for-MySQL/ blob/master/ handlersocket/ database. cpp#L346):
dbcontext: :term_thread( ) fprintf( stderr, "HNDSOCK thread end %p\n", thd)); setspecific_ ptr(THR_ THD, 0); mutex_lock( &LOCK_thread_ count); global_ thread( thd); mutex_unlock( &LOCK_thread_ count);
{
DBG_THR(
unlock_tables_if();
my_pthread_
{
pthread_
#if MYSQL_VERSION_ID >= 50600
remove_
#else
--thread_count;
#endif
delete thd;
thd = 0;
pthread_
my_thread_end();
}
}
And definition of remove_ global_ thread (from 5.6.21/ sql/mysqld. cc@892) :
void remove_ global_ thread( THD *thd) PRINT(" info", ("remove_ global_ thread %p current_linfo %p",
thd, thd->current_ linfo)) ; mutex_lock( &LOCK_thd_ remove) ; mutex_lock( &LOCK_thread_ count); ASSERT( thd->release_ resources_ done()) ; reset_master. It would be cleaner to use EXECUTE_ IF("sleep_ after_lock_ thread_ count_before_ delete_ thd",
sleep( 5););
{
DBUG_
mysql_
mysql_
DBUG_
/*
Used by binlog_
DEBUG_SYNC here, but that's not possible because the THD's debug
sync feature has been shut down at this point.
*/
DBUG_
const size_t num_erased= global_ thread_ list->erase( thd); thread_ count;
if (num_erased == 1)
--global_
// Removing a THD that was never added is an error.
DBUG_ASSERT(1 == num_erased);
mysql_ mutex_unlock( &LOCK_thd_ remove) ; cond_broadcast( &COND_thread_ count); mutex_unlock( &LOCK_thread_ count);
mysql_
mysql_
}
So my understanding is that the second time this tries to acquire the mutex here (mysql_ mutex_lock( &LOCK_thread_ count)) it will wait forever because LOCK_thread_count is not a recursive mutex (thanks Vlad for input about that):
fgrep -RnI --exclude- dir=build* --exclude- dir=mysql- test --exclude=tags --exclude=*.P* --exclude=*.T* "LOCK_thread_count" . | grep init thread_ test.c: 196: pthread_ mutex_init( &LOCK_thread_ count,MY_ MUTEX_INIT_ FAST); cc:3638: mysql_mutex_ init(key_ LOCK_thread_ count, &LOCK_thread_count, MY_MUTEX_ INIT_FAST) ; thr_lock. c:1608: if ((error= mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_ INIT_FAST) )) thr_alarm. c:883: mysql_mutex_init(0, &LOCK_thread_count, MY_MUTEX_ INIT_FAST) ;
./tests/
./sql/mysqld.
./mysys/
./mysys/
About MY_MUTEX_INIT_FAST:
#ifdef PTHREAD_ ADAPTIVE_ MUTEX_INITIALIZ ER_NP ADAPTIVE_ MUTEX_INITIALIZ ER_NP mutexattr_ init(&my_ fast_mutexattr) ; mutexattr_ settype( &my_fast_ mutexattr,
PTHREAD_ MUTEX_ADAPTIVE_ NP);
extern pthread_mutexattr_t my_fast_mutexattr;
#define MY_MUTEX_INIT_FAST &my_fast_mutexattr
#else
#define MY_MUTEX_INIT_FAST NULL
#endif
...
#ifdef PTHREAD_
/*
Set mutex type to "fast" a.k.a "adaptive"
In this case the thread may steal the mutex from some other thread
that is waiting for the same mutex. This will save us some
context switches but may cause a thread to 'starve forever' while
waiting for the mutex (not likely if the code within the mutex is
short).
*/
pthread_
pthread_
#endif