Merge lp:~akopytov/percona-server/bug832528 into lp:percona-server/5.1

Proposed by Alexey Kopytov
Status: Merged
Merged at revision: 279
Proposed branch: lp:~akopytov/percona-server/bug832528
Merge into: lp:percona-server/5.1
Diff against target: 422 lines (+410/-0)
2 files modified
patches/bug53761.patch (+409/-0)
patches/series (+1/-0)
To merge this branch: bzr merge lp:~akopytov/percona-server/bug832528
Reviewer Review Type Date Requested Status
Yasufumi Kinoshita (community) Approve
Review via email: mp+72843@code.launchpad.net

Description of the change

Backport of the fix for MySQL bug #53761 to 5.1. There is no test case, because:

1. The bug can only be triggered on a fairly large dataset (verified the fix manually on customer's data dump).
2. The original fix did not contain a test case.

To post a comment you must log in.
Revision history for this message
Alexey Kopytov (akopytov) wrote :
Revision history for this message
Yasufumi Kinoshita (yasufumi-kinoshita) wrote :

I think no conflict with other XtraDB patches.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added file 'patches/bug53761.patch'
--- patches/bug53761.patch 1970-01-01 00:00:00 +0000
+++ patches/bug53761.patch 2011-08-25 09:13:51 +0000
@@ -0,0 +1,409 @@
1# name : bug53761.patch
2# maintainer : Alexey
3#
4# Backport of the fix for MySQL bug #53761 to 5.1
5#
6--- a/storage/innodb_plugin/btr/btr0cur.c
7+++ b/storage/innodb_plugin/btr/btr0cur.c
8@@ -3117,6 +3117,7 @@
9 {
10 btr_path_t* slot;
11 rec_t* rec;
12+ page_t* page;
13
14 ut_a(cursor->path_arr);
15
16@@ -3139,8 +3140,155 @@
17
18 slot = cursor->path_arr + (root_height - height);
19
20+ page = page_align(rec);
21+
22 slot->nth_rec = page_rec_get_n_recs_before(rec);
23- slot->n_recs = page_get_n_recs(page_align(rec));
24+ slot->n_recs = page_get_n_recs(page);
25+ slot->page_no = page_get_page_no(page);
26+ slot->page_level = btr_page_get_level_low(page);
27+}
28+
29+/*******************************************************************//**
30+Estimate the number of rows between slot1 and slot2 for any level on a
31+B-tree. This function starts from slot1->page and reads a few pages to
32+the right, counting their records. If we reach slot2->page quickly then
33+we know exactly how many records there are between slot1 and slot2 and
34+we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
35+then we calculate the average number of records in the pages scanned
36+so far and assume that all pages that we did not scan up to slot2->page
37+contain the same number of records, then we multiply that average to
38+the number of pages between slot1->page and slot2->page (which is
39+n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
40+@return number of rows (exact or estimated) */
41+static
42+ib_int64_t
43+btr_estimate_n_rows_in_range_on_level(
44+/*==================================*/
45+ dict_index_t* index, /*!< in: index */
46+ btr_path_t* slot1, /*!< in: left border */
47+ btr_path_t* slot2, /*!< in: right border */
48+ ib_int64_t n_rows_on_prev_level, /*!< in: number of rows
49+ on the previous level for the
50+ same descend paths; used to
51+ determine the numbe of pages
52+ on this level */
53+ ibool* is_n_rows_exact) /*!< out: TRUE if the returned
54+ value is exact i.e. not an
55+ estimation */
56+{
57+ ulint space;
58+ ib_int64_t n_rows;
59+ ulint n_pages_read;
60+ ulint page_no;
61+ ulint zip_size;
62+ ulint level;
63+
64+ space = dict_index_get_space(index);
65+
66+ n_rows = 0;
67+ n_pages_read = 0;
68+
69+ /* Assume by default that we will scan all pages between
70+ slot1->page_no and slot2->page_no */
71+ *is_n_rows_exact = TRUE;
72+
73+ /* add records from slot1->page_no which are to the right of
74+ the record which serves as a left border of the range, if any */
75+ if (slot1->nth_rec < slot1->n_recs) {
76+ n_rows += slot1->n_recs - slot1->nth_rec;
77+ }
78+
79+ /* add records from slot2->page_no which are to the left of
80+ the record which servers as a right border of the range, if any */
81+ if (slot2->nth_rec > 1) {
82+ n_rows += slot2->nth_rec - 1;
83+ }
84+
85+ /* count the records in the pages between slot1->page_no and
86+ slot2->page_no (non inclusive), if any */
87+
88+ zip_size = fil_space_get_zip_size(space);
89+
90+ /* Do not read more than this number of pages in order not to hurt
91+ performance with this code which is just an estimation. If we read
92+ this many pages before reaching slot2->page_no then we estimate the
93+ average from the pages scanned so far */
94+ #define N_PAGES_READ_LIMIT 10
95+
96+ page_no = slot1->page_no;
97+ level = slot1->page_level;
98+
99+ do {
100+ mtr_t mtr;
101+ page_t* page;
102+ buf_block_t* block;
103+
104+ mtr_start(&mtr);
105+
106+ /* fetch the page */
107+ block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
108+ &mtr);
109+
110+ page = buf_block_get_frame(block);
111+
112+ /* It is possible that the tree has been reorganized in the
113+ meantime and this is a different page. If this happens the
114+ calculated estimate will be bogus, which is not fatal as
115+ this is only an estimate. We are sure that a page with
116+ page_no exists because InnoDB never frees pages, only
117+ reuses them. */
118+ if (fil_page_get_type(page) != FIL_PAGE_INDEX
119+ || ut_dulint_cmp(btr_page_get_index_id(page), index->id)
120+ || btr_page_get_level_low(page) != level) {
121+
122+ /* The page got reused for something else */
123+ goto inexact;
124+ }
125+
126+ n_pages_read++;
127+
128+ if (page_no != slot1->page_no) {
129+ /* Do not count the records on slot1->page_no,
130+ we already counted them before this loop. */
131+ n_rows += page_get_n_recs(page);
132+ }
133+
134+ page_no = btr_page_get_next(page, &mtr);
135+
136+ mtr_commit(&mtr);
137+
138+ if (n_pages_read == N_PAGES_READ_LIMIT
139+ || page_no == FIL_NULL) {
140+ /* Either we read too many pages or
141+ we reached the end of the level without passing
142+ through slot2->page_no, the tree must have changed
143+ in the meantime */
144+ goto inexact;
145+ }
146+
147+ } while (page_no != slot2->page_no);
148+
149+ return(n_rows);
150+
151+inexact:
152+
153+ *is_n_rows_exact = FALSE;
154+
155+ /* We did interrupt before reaching slot2->page */
156+
157+ if (n_pages_read > 0) {
158+ /* The number of pages on this level is
159+ n_rows_on_prev_level, multiply it by the
160+ average number of recs per page so far */
161+ n_rows = n_rows_on_prev_level
162+ * n_rows / n_pages_read;
163+ } else {
164+ /* The tree changed before we could even
165+ start with slot1->page_no */
166+ n_rows = 10;
167+ }
168+
169+ return(n_rows);
170 }
171
172 /*******************************************************************//**
173@@ -3165,6 +3313,7 @@
174 ibool diverged_lot;
175 ulint divergence_level;
176 ib_int64_t n_rows;
177+ ibool is_n_rows_exact;
178 ulint i;
179 mtr_t mtr;
180
181@@ -3207,6 +3356,7 @@
182 /* We have the path information for the range in path1 and path2 */
183
184 n_rows = 1;
185+ is_n_rows_exact = TRUE;
186 diverged = FALSE; /* This becomes true when the path is not
187 the same any more */
188 diverged_lot = FALSE; /* This becomes true when the paths are
189@@ -3222,7 +3372,7 @@
190 if (slot1->nth_rec == ULINT_UNDEFINED
191 || slot2->nth_rec == ULINT_UNDEFINED) {
192
193- if (i > divergence_level + 1) {
194+ if (i > divergence_level + 1 && !is_n_rows_exact) {
195 /* In trees whose height is > 1 our algorithm
196 tends to underestimate: multiply the estimate
197 by 2: */
198@@ -3234,7 +3384,9 @@
199 to over 1 / 2 of the estimated rows in the whole
200 table */
201
202- if (n_rows > index->table->stat_n_rows / 2) {
203+ if (n_rows > index->table->stat_n_rows / 2
204+ && !is_n_rows_exact) {
205+
206 n_rows = index->table->stat_n_rows / 2;
207
208 /* If there are just 0 or 1 rows in the table,
209@@ -3260,10 +3412,15 @@
210 divergence_level = i;
211 }
212 } else {
213- /* Maybe the tree has changed between
214- searches */
215-
216- return(10);
217+ /* It is possible that
218+ slot1->nth_rec >= slot2->nth_rec
219+ if, for example, we have a single page
220+ tree which contains (inf, 5, 6, supr)
221+ and we select where x > 20 and x < 30;
222+ in this case slot1->nth_rec will point
223+ to the supr record and slot2->nth_rec
224+ will point to 6 */
225+ n_rows = 0;
226 }
227
228 } else if (diverged && !diverged_lot) {
229@@ -3287,8 +3444,9 @@
230 }
231 } else if (diverged_lot) {
232
233- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
234- / 2;
235+ n_rows = btr_estimate_n_rows_in_range_on_level(
236+ index, slot1, slot2, n_rows,
237+ &is_n_rows_exact);
238 }
239 }
240 }
241--- a/storage/innodb_plugin/include/btr0cur.h
242+++ b/storage/innodb_plugin/include/btr0cur.h
243@@ -652,6 +652,11 @@
244 order); value ULINT_UNDEFINED
245 denotes array end */
246 ulint n_recs; /*!< number of records on the page */
247+ ulint page_no; /*!< no of the page containing the record */
248+ ulint page_level; /*!< level of the page, if later we fetch
249+ the page under page_no and it is no different
250+ level then we know that the tree has been
251+ reorganized */
252 };
253
254 #define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
255--- a/mysql-test/suite/innodb_plugin/r/innodb_gis.result
256+++ b/mysql-test/suite/innodb_plugin/r/innodb_gis.result
257@@ -572,7 +572,7 @@
258 EXPLAIN
259 SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
260 id select_type table type possible_keys key key_len ref rows Extra
261-1 SIMPLE t2 ref p p 28 const 1 Using where
262+1 SIMPLE t2 ref p p 28 const 2 Using where
263 SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
264 COUNT(*)
265 2
266--- a/mysql-test/suite/innodb_plugin/r/innodb_mysql.result
267+++ b/mysql-test/suite/innodb_plugin/r/innodb_mysql.result
268@@ -889,13 +889,13 @@
269 id 1
270 select_type SIMPLE
271 table t1
272-type range
273+type index
274 possible_keys bkey
275-key bkey
276-key_len 5
277+key PRIMARY
278+key_len 4
279 ref NULL
280-rows 16
281-Extra Using where; Using index; Using filesort
282+rows 32
283+Extra Using where
284 SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a;
285 a b
286 1 2
287@@ -934,12 +934,12 @@
288 id 1
289 select_type SIMPLE
290 table t1
291-type range
292+type index
293 possible_keys bkey
294 key bkey
295 key_len 5
296 ref NULL
297-rows 16
298+rows 32
299 Extra Using where; Using index
300 SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a;
301 a b
302@@ -989,7 +989,7 @@
303 key bkey
304 key_len 5
305 ref const
306-rows 8
307+rows 16
308 Extra Using where; Using index; Using filesort
309 SELECT * FROM t2 WHERE b=1 ORDER BY a;
310 a b c
311@@ -1018,7 +1018,7 @@
312 key bkey
313 key_len 10
314 ref const,const
315-rows 8
316+rows 16
317 Extra Using where; Using index
318 SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a;
319 a b c
320@@ -1047,7 +1047,7 @@
321 key bkey
322 key_len 10
323 ref const,const
324-rows 8
325+rows 16
326 Extra Using where; Using index
327 SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a;
328 a b c
329@@ -1076,7 +1076,7 @@
330 key bkey
331 key_len 10
332 ref const,const
333-rows 8
334+rows 16
335 Extra Using where; Using index
336 SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a;
337 a b c
338@@ -1211,7 +1211,7 @@
339 key b
340 key_len 5
341 ref const
342-rows 1
343+rows 2
344 Extra Using where; Using index
345 SELECT * FROM t1 WHERE b=2 ORDER BY a ASC;
346 a b
347@@ -1226,7 +1226,7 @@
348 key b
349 key_len 5
350 ref const
351-rows 1
352+rows 2
353 Extra Using where; Using index
354 SELECT * FROM t1 WHERE b=2 ORDER BY a DESC;
355 a b
356@@ -1370,7 +1370,7 @@
357 INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1;
358 EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
359 id select_type table type possible_keys key key_len ref rows Extra
360-1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where
361+1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where
362 SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
363 a b c
364 8 1 1
365@@ -1729,7 +1729,7 @@
366 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
367 id select_type table type possible_keys key key_len ref rows Extra
368 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
369-2 DERIVED t1 index c3,c2 c2 10 NULL 5
370+2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort
371 DROP TABLE t1;
372 CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
373 ENGINE=InnoDB;
374@@ -1743,7 +1743,7 @@
375 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
376 id select_type table type possible_keys key key_len ref rows Extra
377 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
378-2 DERIVED t1 index c3,c2 c2 18 NULL 5
379+2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort
380 DROP TABLE t1;
381 CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2),
382 KEY (c3), KEY (c2, c3))
383@@ -1758,7 +1758,7 @@
384 FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
385 id select_type table type possible_keys key key_len ref rows Extra
386 1 PRIMARY <derived2> system NULL NULL NULL NULL 1
387-2 DERIVED t1 index c3,c2 c2 14 NULL 5
388+2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort
389 DROP TABLE t1;
390 End of 5.1 tests
391 drop table if exists t1, t2, t3;
392@@ -1834,7 +1834,7 @@
393 key b
394 key_len 5
395 ref NULL
396-rows 3
397+rows 5
398 Extra Using where; Using index
399 EXPLAIN SELECT c FROM bar WHERE c>2;;
400 id 1
401@@ -2430,7 +2430,7 @@
402 WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
403 id select_type table type possible_keys key key_len ref rows Extra
404 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
405-2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 3537 Using sort_union(idx,PRIMARY); Using where
406+2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 1536 Using sort_union(idx,PRIMARY); Using where
407 SELECT COUNT(*) FROM
408 (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY)
409 WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t;
0410
=== modified file 'patches/series'
--- patches/series 2011-08-01 21:45:17 +0000
+++ patches/series 2011-08-25 09:13:51 +0000
@@ -60,3 +60,4 @@
60bug813587.patch60bug813587.patch
61innodb_bug47167_test_fix.patch61innodb_bug47167_test_fix.patch
62disable_query_cache_28249_test_sporadic_failure.patch62disable_query_cache_28249_test_sporadic_failure.patch
63bug53761.patch

Subscribers

People subscribed via source and target branches