PBXT

Code review comment for lp:~vkolesnikov/pbxt/pbxt-07-low-disk-index-flush

pbxt-07-low-disk-index-flush
Merge into 1.0.07-rc

Revision history for this message

Paul McCullagh (paul-mccullagh) wrote on 2009-08-17:

Hi Vlad,

There is a problem with this change. Actually a serious bug.

See below:

On Aug 14, 2009, at 11:30 AM, Vladimir Kolesnikov wrote:

> Vladimir Kolesnikov has proposed merging lp:~vkolesnikov/pbxt/
> pbxt-07-low-disk-index-flush into lp:pbxt/1.0.07-rc.
>
> Requested reviews:
> PBXT Core (pbxt-core)
> --
> https://code.launchpad.net/~vkolesnikov/pbxt/pbxt-07-low-disk-index-
> flush/+merge/10141
> Your team PBXT Core is subscribed to branch lp:pbxt/1.0.07-rc.
> === modified file 'ChangeLog'
> --- ChangeLog 2009-08-11 09:17:53 +0000
> +++ ChangeLog 2009-08-14 09:23:20 +0000
> @@ -1,6 +1,10 @@
> PBXT Release Notes
> ==================
>
> +------- 1.0.07r RC - 2009-08-14
> +
> +RN267: Fixed an assertion failure: if checkpointer failed to
> successully flush indices (e.g. due to low disk state) then next
> time xt_flush_indices is called it would generate an assertion failure
> +
> RN266: Fixed a crash: when initialization failed with an exception
> THD structure was released twice
>
> ------- 1.0.07q RC - 2009-08-09
>
> === modified file 'src/index_xt.cc'
> --- src/index_xt.cc 2009-07-07 14:21:07 +0000
> +++ src/index_xt.cc 2009-08-13 13:49:21 +0000
> @@ -2785,7 +2785,6 @@
> *bytes_flushed += (dirty_blocks * XT_INDEX_PAGE_SIZE);
>
> curr_flush_seq = tab->tab_ind_flush_seq;
> - tab->tab_ind_flush_seq++;
>
> /* Write the dirty pages: */
> indp = tab->tab_dic.dic_keys;
> @@ -2981,6 +2980,13 @@
> }
> }
>
> + /* At this point all dirty blocks should have been successully
> flushed to disk,
> + * otherwise we must not increment this value as an assertion
> above in this
> + * function ASSERT_NS(block->cp_flush_seq == curr_flush_seq)
> + * will fail.
> + */
> + tab->tab_ind_flush_seq++;
> +
> indp = tab->tab_dic.dic_keys;
> for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
> ind = *indp;

I think you have moved tab_ind_flush_seq++ too far down. All locks on
the index are released before the if (wrote_something) block:

indp = tab->tab_dic.dic_keys;
for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
ind = *indp;
XT_INDEX_UNLOCK(ind, ot);
}

if (wrote_something) {
....

After that point, the index can be updated again. Then it is important
that the updates use a different flush sequence number. If not, the
page will be freed by the code that occurred just above this point:

  /* Free up flushed pages: */
  indp = tab->tab_dic.dic_keys;
  for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
                    ...

And the changes in the cache will be lost!

>
> === modified file 'src/lock_xt.h'
> --- src/lock_xt.h 2008-11-17 10:14:17 +0000
> +++ src/lock_xt.h 2009-08-13 13:49:21 +0000
> @@ -234,6 +234,38 @@
> return val;
> }
>
> +inline void xt_atomic_inc4(volatile xtWord4 *mptr)
> +{
> +#ifdef XT_ATOMIC_WIN32_X86
> + __asm MOV ECX, mptr
> + __asm LOCK INC DWORD PTR [ECX]
> +#elif defined(XT_ATOMIC_GNUC_X86)
> + asm volatile ("lock; incl %0" : : "m" (*mptr) : "memory");
> +#elif defined(XT_ATOMIC_GCC_OPS)
> + __sync_fetch_and_add(mptr, 1);
> +#elif defined(XT_ATOMIC_SOLARIS_LIB)
> + atomic_inc_32_nv(mptr);
> +#else
> + ++(*mptr);
> +#endif
> +}
> +
> +inline void xt_atomic_dec4(volatile xtWord4 *mptr)
> +{
> +#ifdef XT_ATOMIC_WIN32_X86
> + __asm MOV ECX, mptr
> + __asm LOCK DEC DWORD PTR [ECX]
> +#elif defined(XT_ATOMIC_GNUC_X86)
> + asm volatile ("lock; decl %0" : : "m" (*mptr) : "memory");
> +#elif defined(XT_ATOMIC_GCC_OPS)
> + __sync_fetch_and_sub(mptr, 1);
> +#elif defined(XT_ATOMIC_SOLARIS_LIB)
> + atomic_dec_32_nv(mptr);
> +#else
> + --(*mptr);
> +#endif
> +}
> +

Please check your merge, I have blank lines between each line in the
code above.

>
> inline void xt_atomic_set4(volatile xtWord4 *mptr, xtWord4 val)
> {
> #ifdef XT_SPL_WIN32_ASM
>
> === modified file 'src/strutil_xt.cc'
> --- src/strutil_xt.cc 2009-08-09 15:46:45 +0000
> +++ src/strutil_xt.cc 2009-08-14 09:23:20 +0000
> @@ -367,7 +367,7 @@
>
> xtPublic c_char *xt_get_version(void)
> {
> - return "1.0.07q RC";
> + return "1.0.07r RC";
> }
>
> /* Copy and URL decode! */
>
> === modified file 'src/tabcache_xt.cc'
> --- src/tabcache_xt.cc 2009-01-30 10:23:54 +0000
> +++ src/tabcache_xt.cc 2009-08-13 13:49:21 +0000
> @@ -345,7 +345,7 @@
> if (!tc_fetch(file, ref_id, &seg, &page, offset, thread))
> return FAILED;
> #endif
> - page->tcp_lock_count++;
> + xt_atomic_inc4(&page->tcp_lock_count);

An atomic operation is not required here because the page is locked.

Question: is it required because of on chip cache co-ordination
problems?

> xt_rwmutex_unlock(&seg->tcs_lock, thread->t_id);
> *ret_page = page;
> return OK;
> @@ -376,7 +376,7 @@
> #endif
>
> if (page->tcp_lock_count > 0)
> - page->tcp_lock_count--;
> + xt_atomic_dec4(&page->tcp_lock_count);
>
> xt_rwmutex_unlock(&seg->tcs_lock, thread->t_id);
> }
>

--
Paul McCullagh
PrimeBase Technologies
www.primebase.org
www.blobstreaming.org
pbxt.blogspot.com

Hi Vlad,

There is a problem with this change. Actually a serious bug.

See below:

On Aug 14, 2009, at 11:30 AM, Vladimir Kolesnikov wrote:

> Vladimir Kolesnikov has proposed merging lp:~vkolesnikov/pbxt/ 
> pbxt-07-low-disk-index-flush into lp:pbxt/1.0.07-rc.
>
> Requested reviews:
>    PBXT Core (pbxt-core)
> -- 
> https://code.launchpad.net/~vkolesnikov/pbxt/pbxt-07-low-disk-index- 
> flush/+merge/10141
> Your team PBXT Core is subscribed to branch lp:pbxt/1.0.07-rc.
> === modified file 'ChangeLog'
> --- ChangeLog	2009-08-11 09:17:53 +0000
> +++ ChangeLog	2009-08-14 09:23:20 +0000
> @@ -1,6 +1,10 @@
> PBXT Release Notes
> ==================
>
> +------- 1.0.07r RC - 2009-08-14
> +
> +RN267: Fixed an assertion failure: if checkpointer failed to  
> successully flush indices (e.g. due to low disk state) then next  
> time xt_flush_indices is called it would generate an assertion failure
> +
> RN266: Fixed a crash: when initialization failed with an exception  
> THD structure was released twice
>
> ------- 1.0.07q RC - 2009-08-09
>
> === modified file 'src/index_xt.cc'
> --- src/index_xt.cc	2009-07-07 14:21:07 +0000
> +++ src/index_xt.cc	2009-08-13 13:49:21 +0000
> @@ -2785,7 +2785,6 @@
> 		*bytes_flushed += (dirty_blocks * XT_INDEX_PAGE_SIZE);
>
> 	curr_flush_seq = tab->tab_ind_flush_seq;
> -	tab->tab_ind_flush_seq++;
>
> 	/* Write the dirty pages: */
> 	indp = tab->tab_dic.dic_keys;
> @@ -2981,6 +2980,13 @@
> 			}
> 		}
>
> +		/* At this point all dirty blocks should have been successully  
> flushed to disk,
> +		 * otherwise we must not increment this value as an assertion  
> above in this
> +		 * function ASSERT_NS(block->cp_flush_seq == curr_flush_seq)
> +		 * will fail.
> +		 */
> +		tab->tab_ind_flush_seq++;
> +
> 		indp = tab->tab_dic.dic_keys;
> 		for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
> 			ind = *indp;

I think you have moved tab_ind_flush_seq++ too far down. All locks on  
the index are released before the if (wrote_something) block:

indp = tab->tab_dic.dic_keys;
	for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
		ind = *indp;
		XT_INDEX_UNLOCK(ind, ot);
	}

if (wrote_something) {
            ....

After that point, the index can be updated again. Then it is important  
that the updates use a different flush sequence number. If not, the  
page will be freed by the code that occurred just above this point:

/* Free up flushed pages: */
		indp = tab->tab_dic.dic_keys;
		for (i=0; i<tab->tab_dic.dic_key_count; i++, indp++) {
                    ...

And the changes in the cache will be lost!

>
> === modified file 'src/lock_xt.h'
> --- src/lock_xt.h	2008-11-17 10:14:17 +0000
> +++ src/lock_xt.h	2009-08-13 13:49:21 +0000
> @@ -234,6 +234,38 @@
> 	return val;
> }
>
> +inline void xt_atomic_inc4(volatile xtWord4 *mptr)
> +{
> +#ifdef XT_ATOMIC_WIN32_X86
> +	__asm MOV  ECX, mptr
> +	__asm LOCK INC	DWORD PTR [ECX]
> +#elif defined(XT_ATOMIC_GNUC_X86)
> +	asm volatile ("lock; incl %0" : : "m" (*mptr) : "memory");
> +#elif defined(XT_ATOMIC_GCC_OPS)
> +	__sync_fetch_and_add(mptr, 1);
> +#elif defined(XT_ATOMIC_SOLARIS_LIB)
> +	atomic_inc_32_nv(mptr);
> +#else
> +	++(*mptr);
> +#endif
> +}
> +
> +inline void xt_atomic_dec4(volatile xtWord4 *mptr)
> +{
> +#ifdef XT_ATOMIC_WIN32_X86
> +	__asm MOV  ECX, mptr
> +	__asm LOCK DEC	DWORD PTR [ECX]
> +#elif defined(XT_ATOMIC_GNUC_X86)
> +	asm volatile ("lock; decl %0" : : "m" (*mptr) : "memory");
> +#elif defined(XT_ATOMIC_GCC_OPS)
> +	__sync_fetch_and_sub(mptr, 1);
> +#elif defined(XT_ATOMIC_SOLARIS_LIB)
> +	atomic_dec_32_nv(mptr);
> +#else
> +	--(*mptr);
> +#endif
> +}
> +

Please check your merge, I have blank lines between each line in the  
code above.

>
> inline void xt_atomic_set4(volatile xtWord4 *mptr, xtWord4 val)
> {
> #ifdef XT_SPL_WIN32_ASM
>
> === modified file 'src/strutil_xt.cc'
> --- src/strutil_xt.cc	2009-08-09 15:46:45 +0000
> +++ src/strutil_xt.cc	2009-08-14 09:23:20 +0000
> @@ -367,7 +367,7 @@
>
> xtPublic c_char *xt_get_version(void)
> {
> -	return "1.0.07q RC";
> +	return "1.0.07r RC";
> }
>
> /* Copy and URL decode! */
>
> === modified file 'src/tabcache_xt.cc'
> --- src/tabcache_xt.cc	2009-01-30 10:23:54 +0000
> +++ src/tabcache_xt.cc	2009-08-13 13:49:21 +0000
> @@ -345,7 +345,7 @@
> 	if (!tc_fetch(file, ref_id, &seg, &page, offset, thread))
> 		return FAILED;
> #endif
> -	page->tcp_lock_count++;
> +	xt_atomic_inc4(&page->tcp_lock_count);

An atomic operation is not required here because the page is locked.

Question: is it required because of on chip cache co-ordination  
problems?

> 	xt_rwmutex_unlock(&seg->tcs_lock, thread->t_id);
> 	*ret_page = page;
> 	return OK;
> @@ -376,7 +376,7 @@
> #endif
>
> 	if (page->tcp_lock_count > 0)
> -		page->tcp_lock_count--;
> +		xt_atomic_dec4(&page->tcp_lock_count);
>
> 	xt_rwmutex_unlock(&seg->tcs_lock, thread->t_id);
> }
>

--
Paul McCullagh
PrimeBase Technologies
www.primebase.org
www.blobstreaming.org
pbxt.blogspot.com

« Back to merge proposal