Merge lp:~akopytov/percona-server/bug832528 into lp:percona-server/5.1

Proposed by Alexey Kopytov
Status: Merged
Merged at revision: 279
Proposed branch: lp:~akopytov/percona-server/bug832528
Merge into: lp:percona-server/5.1
Diff against target: 422 lines (+410/-0)
2 files modified
patches/bug53761.patch (+409/-0)
patches/series (+1/-0)
To merge this branch: bzr merge lp:~akopytov/percona-server/bug832528
Reviewer Review Type Date Requested Status
Yasufumi Kinoshita (community) Approve
Review via email: mp+72843@code.launchpad.net

Description of the change

Backport of the fix for MySQL bug #53761 to 5.1. There is no test case, because:

1. The bug can only be triggered on a fairly large dataset (verified the fix manually on customer's data dump).
2. The original fix did not contain a test case.

To post a comment you must log in.
Revision history for this message
Alexey Kopytov (akopytov) wrote :
Revision history for this message
Yasufumi Kinoshita (yasufumi-kinoshita) wrote :

I think no conflict with other XtraDB patches.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'patches/bug53761.patch'
2--- patches/bug53761.patch 1970-01-01 00:00:00 +0000
3+++ patches/bug53761.patch 2011-08-25 09:13:51 +0000
4@@ -0,0 +1,409 @@
5+# name : bug53761.patch
6+# maintainer : Alexey
7+#
8+# Backport of the fix for MySQL bug #53761 to 5.1
9+#
10+--- a/storage/innodb_plugin/btr/btr0cur.c
11++++ b/storage/innodb_plugin/btr/btr0cur.c
12+@@ -3117,6 +3117,7 @@
13+ {
14+ btr_path_t* slot;
15+ rec_t* rec;
16++ page_t* page;
17+
18+ ut_a(cursor->path_arr);
19+
20+@@ -3139,8 +3140,155 @@
21+
22+ slot = cursor->path_arr + (root_height - height);
23+
24++ page = page_align(rec);
25++
26+ slot->nth_rec = page_rec_get_n_recs_before(rec);
27+- slot->n_recs = page_get_n_recs(page_align(rec));
28++ slot->n_recs = page_get_n_recs(page);
29++ slot->page_no = page_get_page_no(page);
30++ slot->page_level = btr_page_get_level_low(page);
31++}
32++
33++/*******************************************************************//**
34++Estimate the number of rows between slot1 and slot2 for any level on a
35++B-tree. This function starts from slot1->page and reads a few pages to
36++the right, counting their records. If we reach slot2->page quickly then
37++we know exactly how many records there are between slot1 and slot2 and
38++we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
39++then we calculate the average number of records in the pages scanned
40++so far and assume that all pages that we did not scan up to slot2->page
41++contain the same number of records, then we multiply that average to
42++the number of pages between slot1->page and slot2->page (which is
43++n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
44++@return number of rows (exact or estimated) */
45++static
46++ib_int64_t
47++btr_estimate_n_rows_in_range_on_level(
48++/*==================================*/
49++ dict_index_t* index, /*!< in: index */
50++ btr_path_t* slot1, /*!< in: left border */
51++ btr_path_t* slot2, /*!< in: right border */
52++ ib_int64_t n_rows_on_prev_level, /*!< in: number of rows
53++ on the previous level for the
54++ same descend paths; used to
55++ determine the numbe of pages
56++ on this level */
57++ ibool* is_n_rows_exact) /*!< out: TRUE if the returned
58++ value is exact i.e. not an
59++ estimation */
60++{
61++ ulint space;
62++ ib_int64_t n_rows;
63++ ulint n_pages_read;
64++ ulint page_no;
65++ ulint zip_size;
66++ ulint level;
67++
68++ space = dict_index_get_space(index);
69++
70++ n_rows = 0;
71++ n_pages_read = 0;
72++
73++ /* Assume by default that we will scan all pages between
74++ slot1->page_no and slot2->page_no */
75++ *is_n_rows_exact = TRUE;
76++
77++ /* add records from slot1->page_no which are to the right of
78++ the record which serves as a left border of the range, if any */
79++ if (slot1->nth_rec < slot1->n_recs) {
80++ n_rows += slot1->n_recs - slot1->nth_rec;
81++ }
82++
83++ /* add records from slot2->page_no which are to the left of
84++ the record which servers as a right border of the range, if any */
85++ if (slot2->nth_rec > 1) {
86++ n_rows += slot2->nth_rec - 1;
87++ }
88++
89++ /* count the records in the pages between slot1->page_no and
90++ slot2->page_no (non inclusive), if any */
91++
92++ zip_size = fil_space_get_zip_size(space);
93++
94++ /* Do not read more than this number of pages in order not to hurt
95++ performance with this code which is just an estimation. If we read
96++ this many pages before reaching slot2->page_no then we estimate the
97++ average from the pages scanned so far */
98++ #define N_PAGES_READ_LIMIT 10
99++
100++ page_no = slot1->page_no;
101++ level = slot1->page_level;
102++
103++ do {
104++ mtr_t mtr;
105++ page_t* page;
106++ buf_block_t* block;
107++
108++ mtr_start(&mtr);
109++
110++ /* fetch the page */
111++ block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
112++ &mtr);
113++
114++ page = buf_block_get_frame(block);
115++
116++ /* It is possible that the tree has been reorganized in the
117++ meantime and this is a different page. If this happens the
118++ calculated estimate will be bogus, which is not fatal as
119++ this is only an estimate. We are sure that a page with
120++ page_no exists because InnoDB never frees pages, only
121++ reuses them. */
122++ if (fil_page_get_type(page) != FIL_PAGE_INDEX
123++ || ut_dulint_cmp(btr_page_get_index_id(page), index->id)
124++ || btr_page_get_level_low(page) != level) {
125++
126++ /* The page got reused for something else */
127++ goto inexact;
128++ }
129++
130++ n_pages_read++;
131++
132++ if (page_no != slot1->page_no) {
133++ /* Do not count the records on slot1->page_no,
134++ we already counted them before this loop. */
135++ n_rows += page_get_n_recs(page);
136++ }
137++
138++ page_no = btr_page_get_next(page, &mtr);
139++
140++ mtr_commit(&mtr);
141++
142++ if (n_pages_read == N_PAGES_READ_LIMIT
143++ || page_no == FIL_NULL) {
144++ /* Either we read too many pages or
145++ we reached the end of the level without passing
146++ through slot2->page_no, the tree must have changed
147++ in the meantime */
148++ goto inexact;
149++ }
150++
151++ } while (page_no != slot2->page_no);
152++
153++ return(n_rows);
154++
155++inexact:
156++
157++ *is_n_rows_exact = FALSE;
158++
159++ /* We did interrupt before reaching slot2->page */
160++
161++ if (n_pages_read > 0) {
162++ /* The number of pages on this level is
163++ n_rows_on_prev_level, multiply it by the
164++ average number of recs per page so far */
165++ n_rows = n_rows_on_prev_level
166++ * n_rows / n_pages_read;
167++ } else {
168++ /* The tree changed before we could even
169++ start with slot1->page_no */
170++ n_rows = 10;
171++ }
172++
173++ return(n_rows);
174+ }
175+
176+ /*******************************************************************//**
177+@@ -3165,6 +3313,7 @@
178+ ibool diverged_lot;
179+ ulint divergence_level;
180+ ib_int64_t n_rows;
181++ ibool is_n_rows_exact;
182+ ulint i;
183+ mtr_t mtr;
184+
185+@@ -3207,6 +3356,7 @@
186+ /* We have the path information for the range in path1 and path2 */
187+
188+ n_rows = 1;
189++ is_n_rows_exact = TRUE;
190+ diverged = FALSE; /* This becomes true when the path is not
191+ the same any more */
192+ diverged_lot = FALSE; /* This becomes true when the paths are
193+@@ -3222,7 +3372,7 @@
194+ if (slot1->nth_rec == ULINT_UNDEFINED
195+ || slot2->nth_rec == ULINT_UNDEFINED) {
196+
197+- if (i > divergence_level + 1) {
198++ if (i > divergence_level + 1 && !is_n_rows_exact) {
199+ /* In trees whose height is > 1 our algorithm
200+ tends to underestimate: multiply the estimate
201+ by 2: */
202+@@ -3234,7 +3384,9 @@
203+ to over 1 / 2 of the estimated rows in the whole
204+ table */
205+
206+- if (n_rows > index->table->stat_n_rows / 2) {
207++ if (n_rows > index->table->stat_n_rows / 2
208++ && !is_n_rows_exact) {
209++
210+ n_rows = index->table->stat_n_rows / 2;
211+
212+ /* If there are just 0 or 1 rows in the table,
213+@@ -3260,10 +3412,15 @@
214+ divergence_level = i;
215+ }
216+ } else {
217+- /* Maybe the tree has changed between
218+- searches */
219+-
220+- return(10);
221++ /* It is possible that
222++ slot1->nth_rec >= slot2->nth_rec
223++ if, for example, we have a single page
224++ tree which contains (inf, 5, 6, supr)
225++ and we select where x > 20 and x < 30;
226++ in this case slot1->nth_rec will point
227++ to the supr record and slot2->nth_rec
228++ will point to 6 */
229++ n_rows = 0;
230+ }
231+
232+ } else if (diverged && !diverged_lot) {
233+@@ -3287,8 +3444,9 @@
234+ }
235+ } else if (diverged_lot) {
236+
237+- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
238+- / 2;
239++ n_rows = btr_estimate_n_rows_in_range_on_level(
240++ index, slot1, slot2, n_rows,
241++ &is_n_rows_exact);
242+ }
243+ }
244+ }
245+--- a/storage/innodb_plugin/include/btr0cur.h
246++++ b/storage/innodb_plugin/include/btr0cur.h
247+@@ -652,6 +652,11 @@
248+ order); value ULINT_UNDEFINED
249+ denotes array end */
250+ ulint n_recs; /*!< number of records on the page */
251++ ulint page_no; /*!< no of the page containing the record */
252++ ulint page_level; /*!< level of the page, if later we fetch
253++ the page under page_no and it is no different
254++ level then we know that the tree has been
255++ reorganized */
256+ };
257+
258+ #define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
259+--- a/mysql-test/suite/innodb_plugin/r/innodb_gis.result
260++++ b/mysql-test/suite/innodb_plugin/r/innodb_gis.result
261+@@ -572,7 +572,7 @@
262+ EXPLAIN
263+ SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
264+ id select_type table type possible_keys key key_len ref rows Extra
265+-1 SIMPLE t2 ref p p 28 const 1 Using where
266++1 SIMPLE t2 ref p p 28 const 2 Using where
267+ SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
268+ COUNT(*)
269+ 2
270+--- a/mysql-test/suite/innodb_plugin/r/innodb_mysql.result
271++++ b/mysql-test/suite/innodb_plugin/r/innodb_mysql.result
272+@@ -889,13 +889,13 @@
273+ id 1
274+ select_type SIMPLE
275+ table t1
276+-type range
277++type index
278+ possible_keys bkey
279+-key bkey
280+-key_len 5
281++key PRIMARY
282++key_len 4
283+ ref NULL
284+-rows 16
285+-Extra Using where; Using index; Using filesort
286++rows 32
287++Extra Using where
288+ SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a;
289+ a b
290+ 1 2
291+@@ -934,12 +934,12 @@
292+ id 1
293+ select_type SIMPLE
294+ table t1
295+-type range
296++type index
297+ possible_keys bkey
298+ key bkey
299+ key_len 5
300+ ref NULL
301+-rows 16
302++rows 32
303+ Extra Using where; Using index
304+ SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a;
305+ a b
306+@@ -989,7 +989,7 @@
307+ key bkey
308+ key_len 5
309+ ref const
310+-rows 8
311++rows 16
312+ Extra Using where; Using index; Using filesort
313+ SELECT * FROM t2 WHERE b=1 ORDER BY a;
314+ a b c
315+@@ -1018,7 +1018,7 @@
316+ key bkey
317+ key_len 10
318+ ref const,const
319+-rows 8
320++rows 16
321+ Extra Using where; Using index
322+ SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a;
323+ a b c
324+@@ -1047,7 +1047,7 @@
325+ key bkey
326+ key_len 10
327+ ref const,const
328+-rows 8
329++rows 16
330+ Extra Using where; Using index
331+ SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a;
332+ a b c
333+@@ -1076,7 +1076,7 @@
334+ key bkey
335+ key_len 10
336+ ref const,const
337+-rows 8
338++rows 16
339+ Extra Using where; Using index
340+ SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a;
341+ a b c
342+@@ -1211,7 +1211,7 @@
343+ key b
344+ key_len 5
345+ ref const
346+-rows 1
347++rows 2
348+ Extra Using where; Using index
349+ SELECT * FROM t1 WHERE b=2 ORDER BY a ASC;
350+ a b
351+@@ -1226,7 +1226,7 @@
352+ key b
353+ key_len 5
354+ ref const
355+-rows 1
356++rows 2
357+ Extra Using where; Using index
358+ SELECT * FROM t1 WHERE b=2 ORDER BY a DESC;
359+ a b
360+@@ -1370,7 +1370,7 @@
361+ INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1;
362+ EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
363+ id select_type table type possible_keys key key_len ref rows Extra
364+-1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where
365++1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where
366+ SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
367+ a b c
368+ 8 1 1
369+@@ -1729,7 +1729,7 @@
370+ FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
371+ id select_type table type possible_keys key key_len ref rows Extra
372+ 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
373+-2 DERIVED t1 index c3,c2 c2 10 NULL 5
374++2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort
375+ DROP TABLE t1;
376+ CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
377+ ENGINE=InnoDB;
378+@@ -1743,7 +1743,7 @@
379+ FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
380+ id select_type table type possible_keys key key_len ref rows Extra
381+ 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
382+-2 DERIVED t1 index c3,c2 c2 18 NULL 5
383++2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort
384+ DROP TABLE t1;
385+ CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2),
386+ KEY (c3), KEY (c2, c3))
387+@@ -1758,7 +1758,7 @@
388+ FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
389+ id select_type table type possible_keys key key_len ref rows Extra
390+ 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
391+-2 DERIVED t1 index c3,c2 c2 14 NULL 5
392++2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort
393+ DROP TABLE t1;
394+ End of 5.1 tests
395+ drop table if exists t1, t2, t3;
396+@@ -1834,7 +1834,7 @@
397+ key b
398+ key_len 5
399+ ref NULL
400+-rows 3
401++rows 5
402+ Extra Using where; Using index
403+ EXPLAIN SELECT c FROM bar WHERE c>2;;
404+ id 1
405+@@ -2430,7 +2430,7 @@
406+ WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
407+ id select_type table type possible_keys key key_len ref rows Extra
408+ 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
409+-2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 3537 Using sort_union(idx,PRIMARY); Using where
410++2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 1536 Using sort_union(idx,PRIMARY); Using where
411+ SELECT COUNT(*) FROM
412+ (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
413+ WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
414
415=== modified file 'patches/series'
416--- patches/series 2011-08-01 21:45:17 +0000
417+++ patches/series 2011-08-25 09:13:51 +0000
418@@ -60,3 +60,4 @@
419 bug813587.patch
420 innodb_bug47167_test_fix.patch
421 disable_query_cache_28249_test_sporadic_failure.patch
422+bug53761.patch

Subscribers

People subscribed via source and target branches