(In reply to H.J. Lu from comment #5) > (In reply to Sunil Pandey from comment #4) > > Created attachment 12601 [details] > > strncmp_avx2 patch for pr25933 > > > > Tested attached patch on > > > > https://gitlab.com/x86-glibc/glibc/-/commits/users/hjl/pr25933/master > > Looks good. Please try this > > diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S > b/sysdeps/x86_64/multiarch/strcmp-avx2.S > index 48d03a9f46..dabc3e7590 100644 > --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S > +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S > @@ -256,6 +256,11 @@ L(next_3_vectors): > vpmovmskb %ymm0, %ecx > testl %ecx, %ecx > jne L(return_3_vec_size) > +# ifdef USE_AS_STRNCMP > + /* Check if VEC_SIZE * 4 already exceeded max compare count %r11 */ > + cmpq $(VEC_SIZE * 4), %r11 > + jbe L(zero) > +# endif > L(main_loop_header): > leaq (VEC_SIZE * 4)(%rdi), %rdx > movl $PAGE_SIZE, %ecx
It fixes the issue on my setup as expected.
$ ./test-strncmp simple_strncmp stupid_strncmp __strncmp_avx2 __strncmp_sse42 __strncmp_ssse3 __strncmp_sse2 $ echo $? 0
$ git diff diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S index 48d03a9f46..84ffe2cd5c 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S @@ -256,6 +256,11 @@ L(next_3_vectors): vpmovmskb %ymm0, %ecx testl %ecx, %ecx jne L(return_3_vec_size) +# ifdef USE_AS_STRNCMP + /* Check if VEC_SIZE * 4 already exceeded max compare count %r11 */ + cmpq $(VEC_SIZE * 4), %r11 + jbe L(zero) +# endif L(main_loop_header): leaq (VEC_SIZE * 4)(%rdi), %rdx movl $PAGE_SIZE, %ecx
(In reply to H.J. Lu from comment #5) /gitlab. com/x86- glibc/glibc/ -/commits/ users/hjl/ pr25933/ master x86_64/ multiarch/ strcmp- avx2.S x86_64/ multiarch/ strcmp- avx2.S .dabc3e7590 100644 x86_64/ multiarch/ strcmp- avx2.S x86_64/ multiarch/ strcmp- avx2.S 3_vec_size) loop_header) :
> (In reply to Sunil Pandey from comment #4)
> > Created attachment 12601 [details]
> > strncmp_avx2 patch for pr25933
> >
> > Tested attached patch on
> >
> > https:/
>
> Looks good. Please try this
>
> diff --git a/sysdeps/
> b/sysdeps/
> index 48d03a9f46.
> --- a/sysdeps/
> +++ b/sysdeps/
> @@ -256,6 +256,11 @@ L(next_3_vectors):
> vpmovmskb %ymm0, %ecx
> testl %ecx, %ecx
> jne L(return_
> +# ifdef USE_AS_STRNCMP
> + /* Check if VEC_SIZE * 4 already exceeded max compare count %r11 */
> + cmpq $(VEC_SIZE * 4), %r11
> + jbe L(zero)
> +# endif
> L(main_
> leaq (VEC_SIZE * 4)(%rdi), %rdx
> movl $PAGE_SIZE, %ecx
It fixes the issue on my setup as expected.
$ ./test-strncmp
simple_ strncmp stupid_strncmp __strncmp_avx2 __strncmp_sse42 __strncmp_ssse3 __strncmp_sse2
$ echo $?
0
$ git diff x86_64/ multiarch/ strcmp- avx2.S b/sysdeps/ x86_64/ multiarch/ strcmp- avx2.S .84ffe2cd5c 100644 x86_64/ multiarch/ strcmp- avx2.S x86_64/ multiarch/ strcmp- avx2.S 3_vec_size) loop_header) :
diff --git a/sysdeps/
index 48d03a9f46.
--- a/sysdeps/
+++ b/sysdeps/
@@ -256,6 +256,11 @@ L(next_3_vectors):
vpmovmskb %ymm0, %ecx
testl %ecx, %ecx
jne L(return_
+# ifdef USE_AS_STRNCMP
+ /* Check if VEC_SIZE * 4 already exceeded max compare count %r11 */
+ cmpq $(VEC_SIZE * 4), %r11
+ jbe L(zero)
+# endif
L(main_
leaq (VEC_SIZE * 4)(%rdi), %rdx
movl $PAGE_SIZE, %ecx