Merge lp:~akopytov/percona-server/bug832528 into lp:percona-server/5.1
- bug832528
- Merge into 5.1
Proposed by
Alexey Kopytov
Status: | Merged | ||||
---|---|---|---|---|---|
Merged at revision: | 279 | ||||
Proposed branch: | lp:~akopytov/percona-server/bug832528 | ||||
Merge into: | lp:percona-server/5.1 | ||||
Diff against target: |
422 lines (+410/-0) 2 files modified
patches/bug53761.patch (+409/-0) patches/series (+1/-0) |
||||
To merge this branch: | bzr merge lp:~akopytov/percona-server/bug832528 | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Yasufumi Kinoshita (community) | Approve | ||
Review via email: mp+72843@code.launchpad.net |
Commit message
Description of the change
Backport of the fix for MySQL bug #53761 to 5.1. There is no test case, because:
1. The bug can only be triggered on a fairly large dataset (verified the fix manually on customer's data dump).
2. The original fix did not contain a test case.
To post a comment you must log in.
Revision history for this message
Alexey Kopytov (akopytov) wrote : | # |
Revision history for this message
Yasufumi Kinoshita (yasufumi-kinoshita) wrote : | # |
I think no conflict with other XtraDB patches.
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'patches/bug53761.patch' |
2 | --- patches/bug53761.patch 1970-01-01 00:00:00 +0000 |
3 | +++ patches/bug53761.patch 2011-08-25 09:13:51 +0000 |
4 | @@ -0,0 +1,409 @@ |
5 | +# name : bug53761.patch |
6 | +# maintainer : Alexey |
7 | +# |
8 | +# Backport of the fix for MySQL bug #53761 to 5.1 |
9 | +# |
10 | +--- a/storage/innodb_plugin/btr/btr0cur.c |
11 | ++++ b/storage/innodb_plugin/btr/btr0cur.c |
12 | +@@ -3117,6 +3117,7 @@ |
13 | + { |
14 | + btr_path_t* slot; |
15 | + rec_t* rec; |
16 | ++ page_t* page; |
17 | + |
18 | + ut_a(cursor->path_arr); |
19 | + |
20 | +@@ -3139,8 +3140,155 @@ |
21 | + |
22 | + slot = cursor->path_arr + (root_height - height); |
23 | + |
24 | ++ page = page_align(rec); |
25 | ++ |
26 | + slot->nth_rec = page_rec_get_n_recs_before(rec); |
27 | +- slot->n_recs = page_get_n_recs(page_align(rec)); |
28 | ++ slot->n_recs = page_get_n_recs(page); |
29 | ++ slot->page_no = page_get_page_no(page); |
30 | ++ slot->page_level = btr_page_get_level_low(page); |
31 | ++} |
32 | ++ |
33 | ++/*******************************************************************//** |
34 | ++Estimate the number of rows between slot1 and slot2 for any level on a |
35 | ++B-tree. This function starts from slot1->page and reads a few pages to |
36 | ++the right, counting their records. If we reach slot2->page quickly then |
37 | ++we know exactly how many records there are between slot1 and slot2 and |
38 | ++we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly |
39 | ++then we calculate the average number of records in the pages scanned |
40 | ++so far and assume that all pages that we did not scan up to slot2->page |
41 | ++contain the same number of records, then we multiply that average to |
42 | ++the number of pages between slot1->page and slot2->page (which is |
43 | ++n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE. |
44 | ++@return number of rows (exact or estimated) */ |
45 | ++static |
46 | ++ib_int64_t |
47 | ++btr_estimate_n_rows_in_range_on_level( |
48 | ++/*==================================*/ |
49 | ++ dict_index_t* index, /*!< in: index */ |
50 | ++ btr_path_t* slot1, /*!< in: left border */ |
51 | ++ btr_path_t* slot2, /*!< in: right border */ |
52 | ++ ib_int64_t n_rows_on_prev_level, /*!< in: number of rows |
53 | ++ on the previous level for the |
54 | ++ same descend paths; used to |
55 | ++ determine the numbe of pages |
56 | ++ on this level */ |
57 | ++ ibool* is_n_rows_exact) /*!< out: TRUE if the returned |
58 | ++ value is exact i.e. not an |
59 | ++ estimation */ |
60 | ++{ |
61 | ++ ulint space; |
62 | ++ ib_int64_t n_rows; |
63 | ++ ulint n_pages_read; |
64 | ++ ulint page_no; |
65 | ++ ulint zip_size; |
66 | ++ ulint level; |
67 | ++ |
68 | ++ space = dict_index_get_space(index); |
69 | ++ |
70 | ++ n_rows = 0; |
71 | ++ n_pages_read = 0; |
72 | ++ |
73 | ++ /* Assume by default that we will scan all pages between |
74 | ++ slot1->page_no and slot2->page_no */ |
75 | ++ *is_n_rows_exact = TRUE; |
76 | ++ |
77 | ++ /* add records from slot1->page_no which are to the right of |
78 | ++ the record which serves as a left border of the range, if any */ |
79 | ++ if (slot1->nth_rec < slot1->n_recs) { |
80 | ++ n_rows += slot1->n_recs - slot1->nth_rec; |
81 | ++ } |
82 | ++ |
83 | ++ /* add records from slot2->page_no which are to the left of |
84 | ++ the record which servers as a right border of the range, if any */ |
85 | ++ if (slot2->nth_rec > 1) { |
86 | ++ n_rows += slot2->nth_rec - 1; |
87 | ++ } |
88 | ++ |
89 | ++ /* count the records in the pages between slot1->page_no and |
90 | ++ slot2->page_no (non inclusive), if any */ |
91 | ++ |
92 | ++ zip_size = fil_space_get_zip_size(space); |
93 | ++ |
94 | ++ /* Do not read more than this number of pages in order not to hurt |
95 | ++ performance with this code which is just an estimation. If we read |
96 | ++ this many pages before reaching slot2->page_no then we estimate the |
97 | ++ average from the pages scanned so far */ |
98 | ++ #define N_PAGES_READ_LIMIT 10 |
99 | ++ |
100 | ++ page_no = slot1->page_no; |
101 | ++ level = slot1->page_level; |
102 | ++ |
103 | ++ do { |
104 | ++ mtr_t mtr; |
105 | ++ page_t* page; |
106 | ++ buf_block_t* block; |
107 | ++ |
108 | ++ mtr_start(&mtr); |
109 | ++ |
110 | ++ /* fetch the page */ |
111 | ++ block = buf_page_get(space, zip_size, page_no, RW_S_LATCH, |
112 | ++ &mtr); |
113 | ++ |
114 | ++ page = buf_block_get_frame(block); |
115 | ++ |
116 | ++ /* It is possible that the tree has been reorganized in the |
117 | ++ meantime and this is a different page. If this happens the |
118 | ++ calculated estimate will be bogus, which is not fatal as |
119 | ++ this is only an estimate. We are sure that a page with |
120 | ++ page_no exists because InnoDB never frees pages, only |
121 | ++ reuses them. */ |
122 | ++ if (fil_page_get_type(page) != FIL_PAGE_INDEX |
123 | ++ || ut_dulint_cmp(btr_page_get_index_id(page), index->id) |
124 | ++ || btr_page_get_level_low(page) != level) { |
125 | ++ |
126 | ++ /* The page got reused for something else */ |
127 | ++ goto inexact; |
128 | ++ } |
129 | ++ |
130 | ++ n_pages_read++; |
131 | ++ |
132 | ++ if (page_no != slot1->page_no) { |
133 | ++ /* Do not count the records on slot1->page_no, |
134 | ++ we already counted them before this loop. */ |
135 | ++ n_rows += page_get_n_recs(page); |
136 | ++ } |
137 | ++ |
138 | ++ page_no = btr_page_get_next(page, &mtr); |
139 | ++ |
140 | ++ mtr_commit(&mtr); |
141 | ++ |
142 | ++ if (n_pages_read == N_PAGES_READ_LIMIT |
143 | ++ || page_no == FIL_NULL) { |
144 | ++ /* Either we read too many pages or |
145 | ++ we reached the end of the level without passing |
146 | ++ through slot2->page_no, the tree must have changed |
147 | ++ in the meantime */ |
148 | ++ goto inexact; |
149 | ++ } |
150 | ++ |
151 | ++ } while (page_no != slot2->page_no); |
152 | ++ |
153 | ++ return(n_rows); |
154 | ++ |
155 | ++inexact: |
156 | ++ |
157 | ++ *is_n_rows_exact = FALSE; |
158 | ++ |
159 | ++ /* We did interrupt before reaching slot2->page */ |
160 | ++ |
161 | ++ if (n_pages_read > 0) { |
162 | ++ /* The number of pages on this level is |
163 | ++ n_rows_on_prev_level, multiply it by the |
164 | ++ average number of recs per page so far */ |
165 | ++ n_rows = n_rows_on_prev_level |
166 | ++ * n_rows / n_pages_read; |
167 | ++ } else { |
168 | ++ /* The tree changed before we could even |
169 | ++ start with slot1->page_no */ |
170 | ++ n_rows = 10; |
171 | ++ } |
172 | ++ |
173 | ++ return(n_rows); |
174 | + } |
175 | + |
176 | + /*******************************************************************//** |
177 | +@@ -3165,6 +3313,7 @@ |
178 | + ibool diverged_lot; |
179 | + ulint divergence_level; |
180 | + ib_int64_t n_rows; |
181 | ++ ibool is_n_rows_exact; |
182 | + ulint i; |
183 | + mtr_t mtr; |
184 | + |
185 | +@@ -3207,6 +3356,7 @@ |
186 | + /* We have the path information for the range in path1 and path2 */ |
187 | + |
188 | + n_rows = 1; |
189 | ++ is_n_rows_exact = TRUE; |
190 | + diverged = FALSE; /* This becomes true when the path is not |
191 | + the same any more */ |
192 | + diverged_lot = FALSE; /* This becomes true when the paths are |
193 | +@@ -3222,7 +3372,7 @@ |
194 | + if (slot1->nth_rec == ULINT_UNDEFINED |
195 | + || slot2->nth_rec == ULINT_UNDEFINED) { |
196 | + |
197 | +- if (i > divergence_level + 1) { |
198 | ++ if (i > divergence_level + 1 && !is_n_rows_exact) { |
199 | + /* In trees whose height is > 1 our algorithm |
200 | + tends to underestimate: multiply the estimate |
201 | + by 2: */ |
202 | +@@ -3234,7 +3384,9 @@ |
203 | + to over 1 / 2 of the estimated rows in the whole |
204 | + table */ |
205 | + |
206 | +- if (n_rows > index->table->stat_n_rows / 2) { |
207 | ++ if (n_rows > index->table->stat_n_rows / 2 |
208 | ++ && !is_n_rows_exact) { |
209 | ++ |
210 | + n_rows = index->table->stat_n_rows / 2; |
211 | + |
212 | + /* If there are just 0 or 1 rows in the table, |
213 | +@@ -3260,10 +3412,15 @@ |
214 | + divergence_level = i; |
215 | + } |
216 | + } else { |
217 | +- /* Maybe the tree has changed between |
218 | +- searches */ |
219 | +- |
220 | +- return(10); |
221 | ++ /* It is possible that |
222 | ++ slot1->nth_rec >= slot2->nth_rec |
223 | ++ if, for example, we have a single page |
224 | ++ tree which contains (inf, 5, 6, supr) |
225 | ++ and we select where x > 20 and x < 30; |
226 | ++ in this case slot1->nth_rec will point |
227 | ++ to the supr record and slot2->nth_rec |
228 | ++ will point to 6 */ |
229 | ++ n_rows = 0; |
230 | + } |
231 | + |
232 | + } else if (diverged && !diverged_lot) { |
233 | +@@ -3287,8 +3444,9 @@ |
234 | + } |
235 | + } else if (diverged_lot) { |
236 | + |
237 | +- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs)) |
238 | +- / 2; |
239 | ++ n_rows = btr_estimate_n_rows_in_range_on_level( |
240 | ++ index, slot1, slot2, n_rows, |
241 | ++ &is_n_rows_exact); |
242 | + } |
243 | + } |
244 | + } |
245 | +--- a/storage/innodb_plugin/include/btr0cur.h |
246 | ++++ b/storage/innodb_plugin/include/btr0cur.h |
247 | +@@ -652,6 +652,11 @@ |
248 | + order); value ULINT_UNDEFINED |
249 | + denotes array end */ |
250 | + ulint n_recs; /*!< number of records on the page */ |
251 | ++ ulint page_no; /*!< no of the page containing the record */ |
252 | ++ ulint page_level; /*!< level of the page, if later we fetch |
253 | ++ the page under page_no and it is no different |
254 | ++ level then we know that the tree has been |
255 | ++ reorganized */ |
256 | + }; |
257 | + |
258 | + #define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */ |
259 | +--- a/mysql-test/suite/innodb_plugin/r/innodb_gis.result |
260 | ++++ b/mysql-test/suite/innodb_plugin/r/innodb_gis.result |
261 | +@@ -572,7 +572,7 @@ |
262 | + EXPLAIN |
263 | + SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); |
264 | + id select_type table type possible_keys key key_len ref rows Extra |
265 | +-1 SIMPLE t2 ref p p 28 const 1 Using where |
266 | ++1 SIMPLE t2 ref p p 28 const 2 Using where |
267 | + SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); |
268 | + COUNT(*) |
269 | + 2 |
270 | +--- a/mysql-test/suite/innodb_plugin/r/innodb_mysql.result |
271 | ++++ b/mysql-test/suite/innodb_plugin/r/innodb_mysql.result |
272 | +@@ -889,13 +889,13 @@ |
273 | + id 1 |
274 | + select_type SIMPLE |
275 | + table t1 |
276 | +-type range |
277 | ++type index |
278 | + possible_keys bkey |
279 | +-key bkey |
280 | +-key_len 5 |
281 | ++key PRIMARY |
282 | ++key_len 4 |
283 | + ref NULL |
284 | +-rows 16 |
285 | +-Extra Using where; Using index; Using filesort |
286 | ++rows 32 |
287 | ++Extra Using where |
288 | + SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a; |
289 | + a b |
290 | + 1 2 |
291 | +@@ -934,12 +934,12 @@ |
292 | + id 1 |
293 | + select_type SIMPLE |
294 | + table t1 |
295 | +-type range |
296 | ++type index |
297 | + possible_keys bkey |
298 | + key bkey |
299 | + key_len 5 |
300 | + ref NULL |
301 | +-rows 16 |
302 | ++rows 32 |
303 | + Extra Using where; Using index |
304 | + SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a; |
305 | + a b |
306 | +@@ -989,7 +989,7 @@ |
307 | + key bkey |
308 | + key_len 5 |
309 | + ref const |
310 | +-rows 8 |
311 | ++rows 16 |
312 | + Extra Using where; Using index; Using filesort |
313 | + SELECT * FROM t2 WHERE b=1 ORDER BY a; |
314 | + a b c |
315 | +@@ -1018,7 +1018,7 @@ |
316 | + key bkey |
317 | + key_len 10 |
318 | + ref const,const |
319 | +-rows 8 |
320 | ++rows 16 |
321 | + Extra Using where; Using index |
322 | + SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a; |
323 | + a b c |
324 | +@@ -1047,7 +1047,7 @@ |
325 | + key bkey |
326 | + key_len 10 |
327 | + ref const,const |
328 | +-rows 8 |
329 | ++rows 16 |
330 | + Extra Using where; Using index |
331 | + SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a; |
332 | + a b c |
333 | +@@ -1076,7 +1076,7 @@ |
334 | + key bkey |
335 | + key_len 10 |
336 | + ref const,const |
337 | +-rows 8 |
338 | ++rows 16 |
339 | + Extra Using where; Using index |
340 | + SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a; |
341 | + a b c |
342 | +@@ -1211,7 +1211,7 @@ |
343 | + key b |
344 | + key_len 5 |
345 | + ref const |
346 | +-rows 1 |
347 | ++rows 2 |
348 | + Extra Using where; Using index |
349 | + SELECT * FROM t1 WHERE b=2 ORDER BY a ASC; |
350 | + a b |
351 | +@@ -1226,7 +1226,7 @@ |
352 | + key b |
353 | + key_len 5 |
354 | + ref const |
355 | +-rows 1 |
356 | ++rows 2 |
357 | + Extra Using where; Using index |
358 | + SELECT * FROM t1 WHERE b=2 ORDER BY a DESC; |
359 | + a b |
360 | +@@ -1370,7 +1370,7 @@ |
361 | + INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1; |
362 | + EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; |
363 | + id select_type table type possible_keys key key_len ref rows Extra |
364 | +-1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where |
365 | ++1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where |
366 | + SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; |
367 | + a b c |
368 | + 8 1 1 |
369 | +@@ -1729,7 +1729,7 @@ |
370 | + FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; |
371 | + id select_type table type possible_keys key key_len ref rows Extra |
372 | + 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 |
373 | +-2 DERIVED t1 index c3,c2 c2 10 NULL 5 |
374 | ++2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort |
375 | + DROP TABLE t1; |
376 | + CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3)) |
377 | + ENGINE=InnoDB; |
378 | +@@ -1743,7 +1743,7 @@ |
379 | + FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; |
380 | + id select_type table type possible_keys key key_len ref rows Extra |
381 | + 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 |
382 | +-2 DERIVED t1 index c3,c2 c2 18 NULL 5 |
383 | ++2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort |
384 | + DROP TABLE t1; |
385 | + CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2), |
386 | + KEY (c3), KEY (c2, c3)) |
387 | +@@ -1758,7 +1758,7 @@ |
388 | + FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; |
389 | + id select_type table type possible_keys key key_len ref rows Extra |
390 | + 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 |
391 | +-2 DERIVED t1 index c3,c2 c2 14 NULL 5 |
392 | ++2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort |
393 | + DROP TABLE t1; |
394 | + End of 5.1 tests |
395 | + drop table if exists t1, t2, t3; |
396 | +@@ -1834,7 +1834,7 @@ |
397 | + key b |
398 | + key_len 5 |
399 | + ref NULL |
400 | +-rows 3 |
401 | ++rows 5 |
402 | + Extra Using where; Using index |
403 | + EXPLAIN SELECT c FROM bar WHERE c>2;; |
404 | + id 1 |
405 | +@@ -2430,7 +2430,7 @@ |
406 | + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; |
407 | + id select_type table type possible_keys key key_len ref rows Extra |
408 | + 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away |
409 | +-2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 3537 Using sort_union(idx,PRIMARY); Using where |
410 | ++2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 1536 Using sort_union(idx,PRIMARY); Using where |
411 | + SELECT COUNT(*) FROM |
412 | + (SELECT * FROM t1 FORCE INDEX (idx,PRIMARY) |
413 | + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; |
414 | |
415 | === modified file 'patches/series' |
416 | --- patches/series 2011-08-01 21:45:17 +0000 |
417 | +++ patches/series 2011-08-25 09:13:51 +0000 |
418 | @@ -60,3 +60,4 @@ |
419 | bug813587.patch |
420 | innodb_bug47167_test_fix.patch |
421 | disable_query_cache_28249_test_sporadic_failure.patch |
422 | +bug53761.patch |
http:// jenkins. percona. com/view/ Percona% 20Server% 205.1/job/ percona- server- 5.1-param/ 126/