Merge lp:~maria-captains/maria/maria-xtradb into lp:~maria-captains/maria/5.1-converting

Proposed by Percona
Status: Rejected
Rejected by: Sergei Golubchik
Proposed branch: lp:~maria-captains/maria/maria-xtradb
Merge into: lp:~maria-captains/maria/5.1-converting
Diff against target: None lines
To merge this branch: bzr merge lp:~maria-captains/maria/maria-xtradb
Reviewer Review Type Date Requested Status
Maria-captains Pending
Review via email: mp+5149@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Percona (percona-team) wrote :

Proposal to merge replacement InnoDB->XtraDB

Revision history for this message
Sergei Golubchik (sergii) wrote :

proposal for the old lp:~maria-captains/maria/5.1-converting tree.
it the proposal is still relevant, please resubmit for the current tree

Unmerged revisions

2689. By Vadim Tkachenko

replace InnoDB by XtraDB release 4

2688. By Michael Widenius

Merge with trunk

2687. By Michael Widenius

Apply patch by Antony Dovgal:
- Move SAFE_MUTEX to be stored in config.h by configure.in (not as a flag used with compiler command line)
- Generate my_config.h in configure

2686. By Michael Widenius

Merge with trunk

2685. By Michael Widenius

Avoid compiler warnings on windows

2684. By Michael Widenius

Ignore generated file event-config.h

2683. By Michael Widenius

Added mariadb and mariadb-version as my.conf option tags
Fixed compiler error when configuring without --lib-event

2682. By Kristian Nielsen

Fix build error in some configs.

Remove non-source file from bzr.

2681. By Michael Widenius

Added missing fix from last commit

2680. By Michael Widenius

Added "pool-of-threads" handling (with libevent)
This is a backport of code from MySQL 6.0 with cleanups and extensions

The following new options are supported
configure options:
  --with-libevent ; Enable use of libevent, which is needed for pool of threads

mysqld options:
--thread-handling=pool-of-threads ; Use a pool of threads to handle queries
--thread-pool-size=# ; Define how many threads should be created to handle all queries
--extra-port=# ; Extra tcp port that uses the old one-thread-per-connection method
--extra-max-connections=# ; Number of connections to accept to 'extra-port'
--test-ignore-wrong-options ; Ignore setting an enum value to a wrong option (for mysql-test-run)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'storage/innobase/CMakeLists.txt' (properties changed: +x to -x)
2--- storage/innobase/CMakeLists.txt 2008-05-22 22:25:21 +0000
3+++ storage/innobase/CMakeLists.txt 2009-03-31 04:19:17 +0000
4@@ -15,7 +15,7 @@
5
6 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
7 SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
8-ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
9+ADD_DEFINITIONS(-D_WIN32 -D_LIB)
10
11 # Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C)
12 # Removing Win64 compiler optimizations for all innodb/mem/* files.
13@@ -33,7 +33,7 @@
14 ${CMAKE_SOURCE_DIR}/extra/yassl/include)
15
16 SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
17- buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
18+ buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
19 data/data0data.c data/data0type.c
20 dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
21 dyn/dyn0dyn.c
22@@ -41,30 +41,57 @@
23 fil/fil0fil.c
24 fsp/fsp0fsp.c
25 fut/fut0fut.c fut/fut0lst.c
26- ha/ha0ha.c ha/hash0hash.c
27+ ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
28 ibuf/ibuf0ibuf.c
29 pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
30- lock/lock0lock.c
31+ lock/lock0lock.c lock/lock0iter.c
32 log/log0log.c log/log0recv.c
33 mach/mach0data.c
34 mem/mem0mem.c mem/mem0pool.c
35 mtr/mtr0log.c mtr/mtr0mtr.c
36 os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
37- page/page0cur.c page/page0page.c
38+ page/page0cur.c page/page0page.c page/page0zip.c
39 que/que0que.c
40- handler/ha_innodb.cc
41+ handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
42 read/read0read.c
43 rem/rem0cmp.c rem/rem0rec.c
44- row/row0ins.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c
45+ row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c
46+ row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c
47 row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
48 srv/srv0que.c srv/srv0srv.c srv/srv0start.c
49 sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
50 thr/thr0loc.c
51- trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
52+ trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
53+ trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
54 usr/usr0sess.c
55 ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
56
57 IF(NOT SOURCE_SUBLIBS)
58 ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
59 ADD_DEPENDENCIES(innobase GenError)
60+ SET_TARGET_PROPERTIES(innobase PROPERTIES COMPILE_FLAGS "-DMYSQL_SERVER")
61+
62+ IF(INNODB_DYNAMIC_PLUGIN)
63+ # The dynamic plugin requires CMake 2.6.0 or later. Otherwise, the /DELAYLOAD property
64+ # will not be set
65+ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
66+ ADD_LIBRARY(ha_innodb SHARED ${INNOBASE_SOURCES} ha_innodb.def handler/win_delay_loader.cc)
67+ ADD_DEPENDENCIES(ha_innodb GenError mysqld)
68+ # If build type is not specified as Release, default to Debug
69+ # This is a workaround to a problem in CMake 2.6, which does not
70+ # set the path of mysqld.lib correctly
71+ IF(CMAKE_BUILD_TYPE MATCHES Release)
72+ SET(CMAKE_BUILD_TYPE "Release")
73+ ELSE(CMAKE_BUILD_TYPE MATCHES Release)
74+ SET(CMAKE_BUILD_TYPE "Debug")
75+ ENDIF(CMAKE_BUILD_TYPE MATCHES Release)
76+ TARGET_LINK_LIBRARIES(ha_innodb strings zlib)
77+ TARGET_LINK_LIBRARIES(ha_innodb ${CMAKE_SOURCE_DIR}/sql/${CMAKE_BUILD_TYPE}/mysqld.lib)
78+ SET_TARGET_PROPERTIES(ha_innodb PROPERTIES OUTPUT_NAME ha_innodb)
79+ SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/MAP /MAPINFO:EXPORTS")
80+ SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/ENTRY:\"_DllMainCRTStartup@12\"")
81+ SET_TARGET_PROPERTIES(ha_innodb PROPERTIES COMPILE_FLAGS "-DMYSQL_DYNAMIC_PLUGIN")
82+ SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/DELAYLOAD:mysqld.exe")
83+ ENDIF(INNODB_DYNAMIC_PLUGIN)
84+
85 ENDIF(NOT SOURCE_SUBLIBS)
86
87=== added file 'storage/innobase/COPYING.Google'
88--- storage/innobase/COPYING.Google 1970-01-01 00:00:00 +0000
89+++ storage/innobase/COPYING.Google 2009-03-31 04:19:17 +0000
90@@ -0,0 +1,30 @@
91+Portions of this software contain modifications contributed by Google, Inc.
92+These contributions are used with the following license:
93+
94+Copyright (c) 2008, Google Inc. All rights reserved.
95+
96+Redistribution and use in source and binary forms, with or without
97+modification, are permitted provided that the following conditions
98+are met:
99+ * Redistributions of source code must retain the above copyright
100+ notice, this list of conditions and the following disclaimer.
101+ * Redistributions in binary form must reproduce the above
102+ copyright notice, this list of conditions and the following
103+ disclaimer in the documentation and/or other materials
104+ provided with the distribution.
105+ * Neither the name of the Google Inc. nor the names of its
106+ contributors may be used to endorse or promote products
107+ derived from this software without specific prior written
108+ permission.
109+
110+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
111+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
112+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
113+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
114+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
115+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
116+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
117+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
118+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
119+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
120+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
121
122=== added file 'storage/innobase/ChangeLog'
123--- storage/innobase/ChangeLog 1970-01-01 00:00:00 +0000
124+++ storage/innobase/ChangeLog 2009-03-31 04:19:17 +0000
125@@ -0,0 +1,775 @@
126+2009-03-05 The InnoDB Team
127+
128+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
129+ mysql-test/innodb-autoinc.test:
130+ Fix Bug#43203 Overflow from auto incrementing causes server segv
131+
132+2009-02-25 The InnoDB Team
133+
134+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
135+ mysql-test/innodb-autoinc.test:
136+ Fix Bug#42714 AUTO_INCREMENT errors in 5.1.31
137+
138+2009-02-23 The InnoDB Team
139+
140+ * btr/btr0cur.c:
141+ Fix Bug#43043 Crash on BLOB delete operation
142+
143+2009-02-20 The InnoDB Team
144+
145+ * handler/ha_innodb.cc:
146+ Make innodb_use_sys_malloc=ON the default.
147+
148+2009-02-20 The InnoDB Team
149+
150+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
151+ mysql-test/innodb-autoinc.test:
152+ Fix Bug#42400 InnoDB autoinc code can't handle floating-point columns
153+
154+2009-02-18 The InnoDB Team
155+
156+ * include/ut0mem.h, os/os0proc.c, ut/ut0mem.c:
157+ Protect ut_total_allocated_memory with ut_list_mutex in
158+ os_mem_alloc_large() and os_mem_free_large(). The lack of this mutex
159+ protection could cause an assertion failure during fast index
160+ creation. Also, add UNIV_MEM_ALLOC and UNIV_MEM_FREE instrumentation
161+ to os_mem_alloc_large() and os_mem_free_large(), so that Valgrind can
162+ detect more errors.
163+
164+2009-02-11 The InnoDB Team
165+
166+ * handler/ha_innodb.cc:
167+ Make innodb_thread_concurrency=0 the default. The old default value
168+ was 8. A non-zero setting may be useful when InnoDB is showing severe
169+ scalability problems under multiple concurrent connections.
170+
171+2009-02-10 The InnoDB Team
172+
173+ * handler/ha_innodb.cc, handler/ha_innodb.h:
174+ Fix Bug#41676 Table names are case insensitive in locking
175+
176+2009-02-10 The InnoDB Team
177+
178+ * mem/mem0dbg.c, mem/mem0mem.c, mem/mem0pool.c:
179+ When innodb_use_sys_malloc is set, ignore
180+ innodb_additional_mem_pool_size, because nothing will be allocated
181+ from mem_comm_pool.
182+
183+2009-02-10 The InnoDB Team
184+
185+ * ut/ut0mem.c:
186+ Map ut_malloc_low(), ut_realloc(), and ut_free() directly to malloc(),
187+ realloc(), and free() when innodb_use_sys_malloc is set. As a side
188+ effect, ut_total_allocated_memory ("Total memory allocated" in the
189+ "BUFFER POOL AND MEMORY" section of SHOW ENGINE INNODB STATUS) will
190+ exclude any memory allocated by these functions when
191+ innodb_use_sys_malloc is set.
192+
193+2009-02-10 The InnoDB Team
194+
195+ * btr/btr0cur.c, btr/btr0sea.c, buf/buf0buf.c, handler/ha_innodb.cc,
196+ include/buf0buf.ic, include/os0sync.h, include/srv0srv.h,
197+ include/sync0rw.h, include/sync0rw.ic, include/sync0sync.h,
198+ include/sync0sync.ic, include/univ.i, row/row0sel.c, srv/srv0srv.c,
199+ srv/srv0start.c, sync/sync0arr.c, sync/sync0rw.c, sync/sync0sync.c:
200+ On those platforms that support it, implement the synchronization
201+ primitives of InnoDB mutexes and read/write locks with GCC atomic
202+ builtins instead of Pthreads mutexes and InnoDB mutexes. These changes
203+ are based on a patch supplied by Mark Callaghan of Google under a BSD
204+ license.
205+
206+2009-01-30 The InnoDB Team
207+
208+ * btr/btr0cur.c, btr/btr0sea.c, buf/buf0buf.c, handler/ha_innodb.cc,
209+ include/btr0sea.h, include/buf0buf.h, include/sync0sync.h,
210+ sync/sync0sync.c:
211+ Make the configuration parameter innodb_adaptive_hash_index dynamic,
212+ so that it can be changed at runtime.
213+
214+2009-01-29 The InnoDB Team
215+
216+ * handler/ha_innodb.cc, ibuf/ibuf0ibuf.c, include/ibuf0ibuf.h,
217+ include/ibuf0ibuf.ic:
218+ Implement the settable global variable innodb_change_buffering,
219+ with the allowed values 'none' and 'inserts'. The default value
220+ 'inserts' enables the buffering of inserts to non-unique secondary
221+ index trees when the B-tree leaf page is not in the buffer pool.
222+
223+2009-01-27 The InnoDB Team
224+
225+ * buf/buf0lru.c:
226+ Fix a race condition in buf_LRU_invalidate_tablespace(): The
227+ compressed page size (zip_size) was read while the block descriptor
228+ was no longer protected by a mutex. This could lead to corruption
229+ when a table is dropped on a busy system that contains compressed
230+ tables.
231+
232+2009-01-26 The InnoDB Team
233+
234+ * btr/btr0sea.c, buf/buf0buf.c, include/buf0buf.h, include/buf0buf.ic,
235+ include/mtr0log.ic, include/row0upd.ic, mtr/mtr0mtr.c:
236+ Implement buf_block_align() with pointer arithmetics, as it is in the
237+ built-in InnoDB distributed with MySQL. Do not acquire the buffer pool
238+ mutex before buf_block_align(). This removes a scalability bottleneck
239+ in the adaptive hash index lookup. In CHECK TABLE, check that
240+ buf_pool->page_hash is consistent with buf_block_align().
241+
242+2009-01-23 The InnoDB Team
243+
244+ * btr/btr0sea.c:
245+ Fix Bug#42279 Race condition in btr_search_drop_page_hash_when_freed()
246+
247+2009-01-23 The InnoDB Team
248+
249+ * buf/buf0buf.c, include/buf0buf.h:
250+ Remove the unused mode BUF_GET_NOWAIT of buf_page_get_gen()
251+
252+2009-01-20 The InnoDB Team
253+
254+ * include/rem0rec.h, include/rem0rec.ic:
255+ Fix Bug#41571 MySQL segfaults after innodb recovery
256+
257+2009-01-20 The InnoDB Team
258+
259+ * lock/lock0lock.c:
260+ Fix Bug#42152 Race condition in lock_is_table_exclusive()
261+
262+2009-01-14 The InnoDB Team
263+
264+ * include/trx0roll.h, trx/trx0roll.c, trx/trx0trx.c:
265+ Fix Bug#38187 Error 153 when creating savepoints
266+
267+2009-01-14 The InnoDB Team
268+
269+ * dict/dict0load.c:
270+ Fix Bug#42075 dict_load_indexes failure in dict_load_table will
271+ corrupt the dictionary cache
272+
273+2009-01-13 The InnoDB Team
274+
275+ * buf/buf0buddy.c, dict/dict0dict.c, dict/dict0mem.c, fil/fil0fil.c,
276+ ha/ha0storage.c, handler/ha_innodb.cc, handler/win_delay_loader.cc,
277+ include/buf0buf.ic, include/dict0dict.ic, include/hash0hash.h,
278+ thr/thr0loc.c, trx/trx0i_s.c:
279+ Add the parameter ASSERTION to HASH_SEARCH() macro, and use it for
280+ light validation of the traversed items in hash table lookups when
281+ UNIV_DEBUG is enabled.
282+
283+2009-01-09 The InnoDB Team
284+
285+ * buf/buf0flu.c, include/buf0flu.h, include/buf0flu.ic:
286+ Remove unused code from the functions
287+ buf_flush_insert_into_flush_list() and
288+ buf_flush_insert_sorted_into_flush_list().
289+
290+2009-01-09 The InnoDB Team
291+
292+ * buf/buf0flu.c:
293+ Simplify the functions buf_flush_try_page() and buf_flush_batch(). Add
294+ debug assertions and an explanation to buf_flush_write_block_low().
295+
296+2009-01-07 The InnoDB Team
297+
298+ * row/row0merge.c:
299+ Fix a bug in recovery when dropping temporary indexes.
300+
301+2009-01-07 The InnoDB Team
302+
303+ * handler/ha_innodb.cc, handler/ha_innodb.h, handler/handler0alter.cc:
304+ Fix Bug#41680 calls to trx_allocate_for_mysql are not consistent
305+
306+2009-01-07 The InnoDB Team
307+
308+ * mysql-test/innodb_bug41904.result, mysql-test/innodb_bug41904.test,
309+ row/row0merge.c:
310+ Fix Bug#41904 create unique index problem
311+
312+2009-01-02 The InnoDB Team
313+
314+ * handler/ha_innodb.cc, include/srv0srv.h, mem/mem0pool.c,
315+ mysql-test/innodb-use-sys-malloc-master.opt,
316+ mysql-test/innodb-use-sys-malloc.result,
317+ mysql-test/innodb-use-sys-malloc.test, srv/srv0srv.c, srv/srv0start.c:
318+ Implement the configuration parameter innodb_use_sys_malloc (false by
319+ default), for disabling InnoDB's internal memory allocator and using
320+ system malloc/free instead. The "BUFFER POOL AND MEMORY" section of
321+ SHOW ENGINE INNODB STATUS will report "in additional pool allocated
322+ allocated 0" when innodb_use_sys_malloc is set.
323+
324+2008-12-30 The InnoDB Team
325+
326+ * btr/btr0btr.c:
327+ When setting the PAGE_LEVEL of a compressed B-tree page from or to 0,
328+ compress the page at the same time. This is necessary, because the
329+ column information stored on the compressed page will differ between
330+ leaf and non-leaf pages. Leaf pages are identified by PAGE_LEVEL=0.
331+ This bug can make InnoDB crash when all rows of a compressed table are
332+ deleted.
333+
334+2008-12-17 The InnoDB Team
335+
336+ * include/row0sel.h, include/row0upd.h, pars/pars0pars.c,
337+ row/row0mysql.c, row/row0sel.c, row/row0upd.c:
338+ Remove update-in-place select from the internal SQL interpreter. It
339+ was only used for updating the InnoDB internal data dictionary when
340+ renaming or dropping tables. It could have caused deadlocks when
341+ acquiring latches on insert buffer bitmap pages.
342+
343+2008-12-17 The InnoDB Team
344+
345+ * btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c, ha/ha0ha.c,
346+ ha/hash0hash.c, include/buf0buf.h, include/ha0ha.h, include/ha0ha.ic,
347+ include/hash0hash.h, include/univ.i:
348+ Introduce the preprocessor symbol UNIV_AHI_DEBUG for enabling adaptive
349+ hash index debugging independently of UNIV_DEBUG.
350+
351+2008-12-16 The InnoDB Team
352+
353+ * btr/btr0cur.c:
354+ Do not update the free bits in the insert buffer bitmap when inserting
355+ or deleting from the insert buffer B-tree. Assert that records in the
356+ insert buffer B-tree are never updated.
357+
358+2008-12-12 The InnoDB Team
359+
360+ * buf/buf0buf.c, fil/fil0fil.c, fsp/fsp0fsp.c, ibuf/ibuf0ibuf.c,
361+ include/fil0fil.h, include/ibuf0ibuf.h, include/ibuf0ibuf.ic,
362+ include/ibuf0types.h:
363+ Clean up the insert buffer subsystem so that only one insert
364+ buffer B-tree exists.
365+ Originally, there were provisions in InnoDB for multiple insert
366+ buffer B-trees, apparently one for each tablespace.
367+ When Heikki Tuuri implemented multiple InnoDB tablespaces in
368+ MySQL/InnoDB 4.1, he made the insert buffer live only in the
369+ system tablespace (space 0) but left the provisions in the code.
370+
371+2008-12-11 The InnoDB Team
372+
373+ * include/srv0srv.h, os/os0proc.c, srv/srv0srv.c:
374+ Fix the issue that the InnoDB plugin fails if innodb_buffer_pool_size
375+ is defined bigger than 4096M on 64-bit Windows. This bug should not
376+ have affected other 64-bit systems.
377+
378+2008-12-09 The InnoDB Team
379+
380+ * handler/ha_innodb.cc:
381+ Fix Bug#40386 Not flushing query cache after truncate.
382+
383+2008-12-09 The InnoDB Team
384+
385+ * handler/ha_innodb.cc, srv/srv0srv.c, trx/trx0trx.c:
386+ Fix Bug#40760 "set global innodb_thread_concurrency = 0;" is not safe
387+
388+2008-12-04 The InnoDB Team
389+
390+ * handler/ha_innodb.cc, handler/mysql_addons.cc,
391+ include/mysql_addons.h, trx/trx0i_s.c, win-plugin/win-plugin.diff:
392+ Remove dependencies to MySQL internals (defining MYSQL_SERVER).
393+
394+2008-12-02 The InnoDB Team
395+
396+ * page/page0cur.c:
397+ When allocating space for a record from the free list of previously
398+ purged records, zero out the DB_TRX_ID and DB_ROLL_PTR of the purged
399+ record if the new record would not overwrite these fields. This fixes
400+ a harmless content mismatch reported by page_zip_validate().
401+
402+2008-12-02 The InnoDB Team
403+
404+ * row/row0merge.c:
405+ Replace the WHILE 1 with WHILE 1=1 in the SQL procedure, so that the
406+ loop will actually be entered and temporary indexes be dropped during
407+ crash recovery.
408+
409+2008-12-01 The InnoDB Team
410+
411+ InnoDB Plugin 1.0.2 released
412+
413+2008-10-31 The InnoDB Team
414+
415+ * dict/dict0mem.c, include/dict0mem.h, include/lock0lock.h,
416+ include/row0mysql.h, include/trx0trx.h, include/univ.i,
417+ include/ut0vec.h, include/ut0vec.ic, lock/lock0lock.c,
418+ row/row0mysql.c, trx/trx0trx.c:
419+ Fix Bug#26316 Triggers create duplicate entries on auto-increment
420+ columns
421+
422+2008-10-30 The InnoDB Team
423+
424+ * handler/ha_innodb.cc, handler/handler0vars.h,
425+ handler/win_delay_loader.cc, mysql-test/innodb_bug40360.result,
426+ mysql-test/innodb_bug40360.test:
427+ Fix Bug#40360 Binlog related errors with binlog off
428+
429+2008-10-29 The InnoDB Team
430+
431+ * include/data0type.ic:
432+ Fix Bug#40369 dtype_get_sql_null_size() returns 0 or 1, not the size
433+
434+2008-10-29 The InnoDB Team
435+
436+ * handler/ha_innodb.cc, include/srv0srv.h, srv/srv0srv.c:
437+ Fix Bug#38189 innodb_stats_on_metadata missing
438+
439+2008-10-28 The InnoDB Team
440+
441+ * CMakeLists.txt, ha_innodb.def, handler/ha_innodb.cc,
442+ handler/handler0alter.cc, handler/handler0vars.h, handler/i_s.cc,
443+ handler/win_delay_loader.cc, win-plugin/*:
444+ Implemented the delayloading of externals for the plugin on Windows.
445+ This makes it possible to build a dynamic plugin (ha_innodb.dll) on
446+ Windows.
447+
448+2008-10-27 The InnoDB Team
449+
450+ * CMakeLists.txt:
451+ Fix Bug#19424 InnoDB: Possibly a memory overrun of the buffer being
452+ freed (64-bit Visual C)
453+
454+2008-10-23 The InnoDB Team
455+
456+ * ibuf/ibuf0ibuf.c:
457+ ibuf_delete_rec(): When the cursor to the insert buffer record
458+ cannot be restored, do not complain if the tablespace does not
459+ exist, because the insert buffer record may have been discarded by
460+ some other thread. This bug has existed in MySQL/InnoDB since
461+ version 4.1, when innodb_file_per_table was implemented.
462+ This may fix Bug#27276 InnoDB Error: ibuf cursor restoration fails.
463+
464+2008-10-22 The InnoDB Team
465+
466+ * dict/dict0dict.c, dict/dict0mem.c, handler/ha_innodb.cc,
467+ handler/ha_innodb.h, include/dict0dict.h, include/dict0mem.h,
468+ row/row0mysql.c:
469+ Fix Bug#39830 Table autoinc value not updated on first insert
470+ Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
471+ ::info
472+ Fix Bug#36411 "Failed to read auto-increment value from storage
473+ engine" in 5.1.24 auto-inc
474+
475+2008-10-22 The InnoDB Team
476+
477+ * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
478+ Fix Bug#40224 New AUTOINC changes mask reporting of deadlock/timeout
479+ errors
480+
481+2008-10-16 The InnoDB Team
482+
483+ * dict/dict0dict.c, mysql-test/innodb-index.result,
484+ mysql-test/innodb-index.test:
485+ Skip the undo log size check when creating REDUNDANT and COMPACT
486+ tables. In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED, column
487+ prefix indexes require that prefixes of externally stored columns
488+ be written to the undo log. This may make the undo log record
489+ bigger than the record on the B-tree page. The maximum size of an
490+ undo log record is the page size. That must be checked for, in
491+ dict_index_add_to_cache(). However, this restriction must not
492+ be enforced on REDUNDANT or COMPACT tables.
493+
494+2008-10-15 The InnoDB Team
495+
496+ * btr/btr0cur.c, include/btr0cur.h, row/row0ext.c, row/row0sel.c,
497+ row/row0upd.c:
498+ When the server crashes while freeing an externally stored column
499+ of a compressed table, the BTR_EXTERN_LEN field in the BLOB
500+ pointer will be written as 0. Tolerate this in the functions that
501+ deal with externally stored columns. This fixes problems after
502+ crash recovery, in the rollback of incomplete transactions, and in
503+ the purge of delete-marked records.
504+
505+2008-10-15 The InnoDB Team
506+
507+ * btr/btr0btr.c, include/page0zip.h, page/page0zip.c, include/univ.i:
508+ When a B-tree node of a compressed table is split or merged, the
509+ compression may fail. In this case, the entire compressed page
510+ will be copied and the excess records will be deleted. However,
511+ page_zip_copy(), now renamed to page_zip_copy_recs(), copied too
512+ many fields in the page header, overwriting PAGE_BTR_SEG_LEAF and
513+ PAGE_BTR_SEG_TOP when splitting the B-tree root. This caused
514+ corruption of compressed tables. Furthermore, the lock table and
515+ the adaptive hash index would be corrupted, because we forgot to
516+ update them when invoking page_zip_copy_recs().
517+
518+ Introduce the symbol UNIV_ZIP_DEBUG for triggering the copying of
519+ compressed pages more often, for debugging purposes.
520+
521+2008-10-10 The InnoDB Team
522+
523+ * handler/handler0alter.cc, include/row0merge.h, row/row0merge.c,
524+ row/row0mysql.c:
525+ Fix some locking issues, mainly in fast index creation. The
526+ InnoDB data dictionary cache should be latched whenever a
527+ transaction is holding locks on any data dictionary tables.
528+ Otherwise, lock waits or deadlocks could occur. Furthermore, the
529+ data dictionary transaction must be committed (and the locks
530+ released) before the data dictionary latch is released.
531+
532+ ha_innobase::add_index(): Lock the data dictionary before renaming
533+ or dropping the created indexes, because neither operation will
534+ commit the data dictionary transaction.
535+
536+ ha_innobase::final_drop_index(): Commit the transactions before
537+ unlocking the data dictionary.
538+
539+2008-10-09 The InnoDB Team
540+
541+ * buf/buf0lru.c:
542+ Fix Bug#39939 DROP TABLE/DISCARD TABLESPACE takes long time in
543+ buf_LRU_invalidate_tablespace()
544+
545+2008-10-08 The InnoDB Team
546+
547+ * dict/dict0crea.c, trx/trx0roll.c, include/row0mysql.h,
548+ row/row0merge.c, row/row0mysql.c:
549+ When dropping a table, hold the data dictionary latch until the
550+ transaction has been committed. The data dictionary latch is
551+ supposed to prevent lock waits and deadlocks in the data
552+ dictionary tables. Due to this bug, DROP TABLE could cause a
553+ deadlock or hang. Note that because of Bug#33650 and Bug#39833,
554+ MySQL may also drop a (temporary) table when executing CREATE INDEX
555+ or ALTER TABLE ... ADD INDEX.
556+
557+2008-10-04 The InnoDB Team
558+
559+ * handler/ha_innodb.cc, mysql-test/innodb_bug39438-master.opt,
560+ mysql-test/innodb_bug39438.result, mysql-test/innodb_bug39438.test:
561+ Fix Bug#39438 Testcase for Bug#39436 crashes on 5.1 in
562+ fil_space_get_latch
563+
564+2008-10-04 The InnoDB Team
565+
566+ * include/lock0lock.h, lock/lock0lock.c,
567+ mysql-test/innodb_bug38231.result, mysql-test/innodb_bug38231.test,
568+ row/row0mysql.c:
569+ Fix Bug#38231 Innodb crash in lock_reset_all_on_table() on TRUNCATE +
570+ LOCK / UNLOCK
571+
572+2008-10-04 The InnoDB Team
573+
574+ * handler/ha_innodb.cc:
575+ Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
576+ ::info
577+
578+2008-10-04 The InnoDB Team
579+
580+ * handler/ha_innodb.cc, handler/ha_innodb.h:
581+ Fix Bug#37788 InnoDB Plugin: AUTO_INCREMENT wrong for compressed
582+ tables
583+
584+2008-10-04 The InnoDB Team
585+
586+ * dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h,
587+ include/dict0dict.h, include/dict0mem.h, row/row0mysql.c:
588+ Fix Bug#39830 Table autoinc value not updated on first insert
589+
590+2008-10-03 The InnoDB Team
591+
592+ * mysql-test/innodb-index.test, mysql-test/innodb-index.result,
593+ mysql-test/innodb-timeout.test, mysql-test/innodb-timeout.result,
594+ srv/srv0srv.c, include/srv0srv.h, handler/ha_innodb.cc,
595+ include/ha_prototypes.h:
596+ Fix Bug#36285 innodb_lock_wait_timeout is not dynamic, not per session
597+
598+2008-09-19 The InnoDB Team
599+
600+ * os/os0proc.c:
601+ Fix a memory leak on Windows. The memory leak was due to wrong
602+ parameters passed into VirtualFree() call. As the result, the
603+ call fails with Windows error 87.
604+
605+2008-09-17 The InnoDB Team
606+
607+ * mysql-test/innodb.result, mysql-test/innodb-zip.result,
608+ mysql-test/innodb-zip.test, mysql-test/innodb.test, ibuf/ibuf0ibuf.c,
609+ dict/dict0crea.c, dict/dict0load.c, dict/dict0boot.c,
610+ include/dict0dict.h, include/trx0trx.h, dict/dict0dict.c,
611+ trx/trx0trx.c, include/ha_prototypes.h, handler/ha_innodb.cc:
612+ When creating an index in innodb_strict_mode, check that the
613+ maximum record size will never exceed the B-tree page size limit.
614+ For uncompressed tables, there should always be enough space for
615+ two records in an empty B-tree page. For compressed tables, there
616+ should be enough space for storing two node pointer records or one
617+ data record in an empty page in uncompressed format.
618+ The purpose of this check is to guarantee that INSERT or UPDATE
619+ will never fail due to too big record size.
620+
621+2008-09-17 The InnoDB Team
622+
623+ * btr/btr0cur.c, data/data0data.c, include/page0zip.h,
624+ include/page0zip.ic, page/page0zip.c, mysql-test/innodb_bug36172.test:
625+ Prevent infinite B-tree page splits in compressed tables by
626+ ensuring that there will always be enough space for two node
627+ pointer records in an empty B-tree page. Also, require that at
628+ least one data record will fit in an empty compressed page. This
629+ will reduce the maximum size of records in compressed tables.
630+
631+2008-09-09 The InnoDB Team
632+
633+ * mysql-test/innodb.result:
634+ Fix the failing innodb test by merging changes that MySQL made to
635+ that file (r2646.12.1 in MySQL BZR repository)
636+
637+2008-09-09 The InnoDB Team
638+
639+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
640+ mysql-test/innodb-autoinc.test:
641+ Fix Bug#38839 auto increment does not work properly with InnoDB after
642+ update
643+
644+2008-09-09 The InnoDB Team
645+
646+ * dict/dict0dict.c, handler/handler0alter.cc, include/dict0dict.h,
647+ mysql-test/innodb-index.result, mysql-test/innodb-index.test:
648+ Fix Bug#38786 InnoDB plugin crashes on drop table/create table with FK
649+
650+2008-08-21 The InnoDB Team
651+
652+ * handler/ha_innodb.cc, include/ha_prototypes.h, row/row0sel.c:
653+ Fix Bug#37885 row_search_for_mysql may gap lock unnecessarily with SQL
654+ comments in query
655+
656+2008-08-21 The InnoDB Team
657+
658+ * handler/ha_innodb.cc:
659+ Fix Bug#38185 ha_innobase::info can hold locks even when called with
660+ HA_STATUS_NO_LOCK
661+
662+2008-08-18 The InnoDB Team
663+
664+ * buf/buf0buf.c, buf/buf0lru.c, include/buf0buf.ic, include/univ.i:
665+ Introduce UNIV_LRU_DEBUG for debugging the LRU buffer pool cache
666+
667+2008-08-08 The InnoDB Team
668+
669+ * buf/buf0lru.c, include/buf0buf.h:
670+ Fix two recovery bugs that could lead to a crash in debug builds with
671+ small buffer size
672+
673+2008-08-07 The InnoDB Team
674+
675+ * btr/btr0cur.c, handler/ha_innodb.cc, include/srv0srv.h,
676+ srv/srv0srv.c:
677+ Add a parameter innodb_stats_sample_pages to allow users to control
678+ the number of index dives when InnoDB estimates the cardinality of
679+ an index (ANALYZE TABLE, SHOW TABLE STATUS etc)
680+
681+2008-08-07 The InnoDB Team
682+
683+ * trx/trx0i_s.c:
684+ Fix a bug that would lead to a crash if a SELECT was issued from the
685+ INFORMATION_SCHEMA tables and there are rolling back transactions at
686+ the same time
687+
688+2008-08-06 The InnoDB Team
689+
690+ * btr/btr0btr.c, btr/btr0cur.c, ibuf/ibuf0ibuf.c, include/btr0cur.h,
691+ include/trx0roll.h, include/trx0types.h, row/row0purge.c,
692+ row/row0uins.c, row/row0umod.c, trx/trx0roll.c:
693+ In the rollback of incomplete transactions after crash recovery,
694+ tolerate clustered index records whose externally stored columns
695+ have not been written.
696+
697+2008-07-30 The InnoDB Team
698+
699+ * trx/trx0trx.c:
700+ Fixes a race in recovery where the recovery thread recovering a
701+ PREPARED trx and the background rollback thread can both try
702+ to free the trx after its status is set to COMMITTED_IN_MEMORY.
703+
704+2008-07-29 The InnoDB Team
705+
706+ * include/trx0rec.h, row/row0purge.c, row/row0vers.c, trx/trx0rec.c:
707+ Fix a BLOB corruption bug
708+
709+2008-07-15 The InnoDB Team
710+
711+ * btr/btr0sea.c, dict/dict0dict.c, include/btr0sea.h:
712+ Fixed a timing hole where a thread dropping an index can free the
713+ in-memory index struct while another thread is still using that
714+ structure to remove entries from adaptive hash index belonging
715+ to one of the pages that belongs to the index being dropped.
716+
717+2008-07-04 The InnoDB Team
718+
719+ * mysql-test/innodb-index.result:
720+ Fix the failing innodb-index test by adjusting the result to a new
721+ MySQL behavior (the change occured in BZR-r2667)
722+
723+2008-07-03 The InnoDB Team
724+
725+ * mysql-test/innodb-zip.result, mysql-test/innodb-zip.test:
726+ Remove the negative test cases that produce warnings
727+
728+2008-07-02 The InnoDB Team
729+
730+ * mysql-test/innodb-replace.result, mysql-test/innodb-index.test:
731+ Disable part of innodb-index test because MySQL changed its behavior
732+ and is not calling ::add_index() anymore when adding primary index on
733+ non-NULL column
734+
735+2008-07-01 The InnoDB Team
736+
737+ * mysql-test/innodb-replace.result, mysql-test/innodb-replace.test:
738+ Fix the failing innodb-replace test by merging changes that MySQL
739+ made to that file (r2659 in MySQL BZR repository)
740+
741+2008-07-01 The InnoDB Team
742+
743+ * lock/lock0lock.c:
744+ Fix Bug#36942 Performance problem in lock_get_n_rec_locks (SHOW INNODB
745+ STATUS)
746+
747+2008-07-01 The InnoDB Team
748+
749+ * ha/ha0ha.c:
750+ Fix Bug#36941 Performance problem in ha_print_info (SHOW INNODB
751+ STATUS)
752+
753+2008-07-01 The InnoDB Team
754+
755+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
756+ mysql-test/innodb-autoinc.test:
757+ Fix Bug#37531 After truncate, auto_increment behaves incorrectly for
758+ InnoDB
759+
760+2008-06-19 The InnoDB Team
761+
762+ * handler/ha_innodb.cc:
763+ Rewrite the function innodb_plugin_init() to support parameters in
764+ different order (in static and dynamic InnoDB) and to support more
765+ parameters in the static InnoDB
766+
767+2008-06-19 The InnoDB Team
768+
769+ * handler/handler0alter.cc:
770+ Fix a bug in ::add_index() which set the transaction state to "active"
771+ but never restored it to the original value. This bug caused warnings
772+ to be printed by the rpl.rpl_ddl mysql-test.
773+
774+2008-06-19 The InnoDB Team
775+
776+ * mysql-test/patches:
777+ Add a directory which contains patches, which need to be applied to
778+ MySQL source in order to get some mysql-tests to succeed. The patches
779+ cannot be committed in MySQL repository because they are specific to
780+ the InnoDB plugin.
781+
782+2008-06-19 The InnoDB Team
783+
784+ * mysql-test/innodb-zip.result, mysql-test/innodb-zip.test,
785+ row/row0row.c:
786+ Fix an anomaly when updating a record with BLOB prefix
787+
788+2008-06-18 The InnoDB Team
789+
790+ * include/trx0sys.h, srv/srv0start.c, trx/trx0sys.c:
791+ Fix a bug in recovery which was a side effect of the file_format_check
792+ changes
793+
794+2008-06-09 The InnoDB Team
795+
796+ * mysql-test/innodb.result:
797+ Fix the failing innodb test by merging changes that MySQL made to that
798+ file
799+
800+2008-06-06 The InnoDB Team
801+
802+ * buf/buf0buf.c, handler/ha_innodb.cc, include/buf0buf.h,
803+ include/srv0srv.h, srv/srv0srv.c:
804+ Fix Bug#36600 SHOW STATUS takes a lot of CPU in
805+ buf_get_latched_pages_number
806+
807+ * handler/ha_innodb.cc, os/os0file.c:
808+ Fix Bug#11894 innodb_file_per_table crashes w/ Windows .sym symbolic
809+ link hack
810+
811+ * include/ut0ut.h, srv/srv0srv.c, ut/ut0ut.c:
812+ Fix Bug#36819 ut_usectime does not handle errors from gettimeofday
813+
814+ * handler/ha_innodb.cc:
815+ Fix Bug#35602 Failed to read auto-increment value from storage engine
816+
817+ * srv/srv0start.c:
818+ Fix Bug#36149 Read buffer overflow in srv0start.c found during "make
819+ test"
820+
821+2008-05-08 The InnoDB Team
822+
823+ * btr/btr0btr.c, mysql-test/innodb_bug36172.result,
824+ mysql-test/innodb_bug36172.test:
825+ Fix Bug#36172 insert into compressed innodb table crashes
826+
827+2008-05-08 The InnoDB Team
828+
829+ InnoDB Plugin 1.0.1 released
830+
831+2008-05-06 The InnoDB Team
832+
833+ * handler/ha_innodb.cc, include/srv0srv.h, include/sync0sync.h,
834+ include/trx0sys.h, mysql-test/innodb-zip.result,
835+ mysql-test/innodb-zip.test, srv/srv0srv.c, srv/srv0start.c,
836+ sync/sync0sync.c, trx/trx0sys.c:
837+ Implement the system tablespace tagging
838+
839+ * handler/ha_innodb.cc, handler/i_s.cc, include/univ.i,
840+ srv/srv0start.c:
841+ Add InnoDB version in INFORMATION_SCHEMA.PLUGINS.PLUGIN_VERSION,
842+ in the startup message and in a server variable innodb_version.
843+
844+ * sync/sync0sync.c:
845+ Fix a bug in the sync debug code where a lock with level
846+ SYNC_LEVEL_VARYING would cause an assertion failure when a thread
847+ tried to release it.
848+
849+2008-04-30 The InnoDB Team
850+
851+ * Makefile.am:
852+ Fix Bug#36434 ha_innodb.so is installed in the wrong directory
853+
854+ * handler/ha_innodb.cc:
855+ Merge change from MySQL (Fix Bug#35406 5.1-opt crashes on select from
856+ I_S.REFERENTIAL_CONSTRAINTS):
857+ ChangeSet@1.2563, 2008-03-18 19:42:04+04:00, gluh@mysql.com +1 -0
858+
859+ * scripts/install_innodb_plugins.sql:
860+ Added
861+
862+ * mysql-test/innodb.result:
863+ Merge change from MySQL (this fixes the failing innodb test):
864+ ChangeSet@1.1810.3601.4, 2008-02-07 02:33:21+04:00
865+
866+ * row/row0sel.c:
867+ Fix Bug#35226 RBR event crashes slave
868+
869+ * handler/ha_innodb.cc:
870+ Change the fix for Bug#32440 to show bytes instead of kilobytes in
871+ INFORMATION_SCHEMA.TABLES.DATA_FREE
872+
873+ * handler/ha_innodb.cc, mysql-test/innodb.result,
874+ mysql-test/innodb.test:
875+ Fix Bug#29507 TRUNCATE shows to many rows effected
876+
877+ * handler/ha_innodb.cc, mysql-test/innodb.result,
878+ mysql-test/innodb.test:
879+ Fix Bug#35537 Innodb doesn't increment handler_update and
880+ handler_delete
881+
882+2008-04-29 The InnoDB Team
883+
884+ * handler/i_s.cc, include/srv0start.h, srv/srv0start.c:
885+ Fix Bug#36310 InnoDB plugin crash
886+
887+2008-04-23 The InnoDB Team
888+
889+ * mysql-test/innodb_bug36169.result, mysql-test/innodb_bug36169.test,
890+ row/row0mysql.c:
891+ Fix Bug#36169 create innodb compressed table with too large row size
892+ crashed
893+
894+ * (outside the source tree):
895+ Fix Bug#36222 New InnoDB plugin 1.0 has wrong MKDIR_P defined in
896+ Makefile.in
897+
898+2008-04-15 The InnoDB Team
899+
900+ InnoDB Plugin 1.0.0 released
901
902=== modified file 'storage/innobase/Makefile.am'
903--- storage/innobase/Makefile.am 2008-12-14 18:54:01 +0000
904+++ storage/innobase/Makefile.am 2009-03-31 04:19:17 +0000
905@@ -33,7 +33,8 @@
906 include/btr0cur.h include/btr0cur.ic \
907 include/btr0pcur.h include/btr0pcur.ic \
908 include/btr0sea.h include/btr0sea.ic \
909- include/btr0types.h include/buf0buf.h \
910+ include/btr0types.h include/buf0buddy.h \
911+ include/buf0buddy.ic include/buf0buf.h \
912 include/buf0buf.ic include/buf0flu.h \
913 include/buf0flu.ic include/buf0lru.h \
914 include/buf0lru.ic include/buf0rea.h \
915@@ -53,7 +54,10 @@
916 include/fsp0fsp.ic include/fut0fut.h \
917 include/fut0fut.ic include/fut0lst.h \
918 include/fut0lst.ic include/ha0ha.h \
919- include/ha0ha.ic include/hash0hash.h \
920+ include/ha0ha.ic \
921+ include/ha0storage.h \
922+ include/ha0storage.ic \
923+ include/hash0hash.h \
924 include/hash0hash.ic include/ibuf0ibuf.h \
925 include/ibuf0ibuf.ic include/ibuf0types.h \
926 include/lock0iter.h \
927@@ -68,12 +72,15 @@
928 include/mem0pool.h include/mem0pool.ic \
929 include/mtr0log.h include/mtr0log.ic \
930 include/mtr0mtr.h include/mtr0mtr.ic \
931- include/mtr0types.h include/os0file.h \
932+ include/mtr0types.h \
933+ include/mysql_addons.h \
934+ include/os0file.h \
935 include/os0proc.h include/os0proc.ic \
936 include/os0sync.h include/os0sync.ic \
937 include/os0thread.h include/os0thread.ic \
938 include/page0cur.h include/page0cur.ic \
939 include/page0page.h include/page0page.ic \
940+ include/page0zip.h include/page0zip.ic \
941 include/page0types.h include/pars0grm.h \
942 include/pars0opt.h include/pars0opt.ic \
943 include/pars0pars.h include/pars0pars.ic \
944@@ -84,7 +91,9 @@
945 include/read0types.h include/rem0cmp.h \
946 include/rem0cmp.ic include/rem0rec.h \
947 include/rem0rec.ic include/rem0types.h \
948+ include/row0ext.h include/row0ext.ic \
949 include/row0ins.h include/row0ins.ic \
950+ include/row0merge.h \
951 include/row0mysql.h include/row0mysql.ic \
952 include/row0purge.h include/row0purge.ic \
953 include/row0row.h include/row0row.ic \
954@@ -101,6 +110,7 @@
955 include/sync0rw.ic include/sync0sync.h \
956 include/sync0sync.ic include/sync0types.h \
957 include/thr0loc.h include/thr0loc.ic \
958+ include/trx0i_s.h \
959 include/trx0purge.h include/trx0purge.ic \
960 include/trx0rec.h include/trx0rec.ic \
961 include/trx0roll.h include/trx0roll.ic \
962@@ -119,41 +129,52 @@
963 include/ut0ut.ic include/ut0vec.h \
964 include/ut0vec.ic include/ut0list.h \
965 include/ut0list.ic include/ut0wqueue.h \
966- include/ha_prototypes.h handler/ha_innodb.h
967+ include/ha_prototypes.h handler/ha_innodb.h \
968+ include/handler0alter.h \
969+ handler/i_s.h handler/innodb_patch_info.h
970
971 EXTRA_LIBRARIES= libinnobase.a
972 noinst_LIBRARIES= @plugin_innobase_static_target@
973 libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \
974- btr/btr0sea.c buf/buf0buf.c buf/buf0flu.c \
975+ btr/btr0sea.c buf/buf0buddy.c \
976+ buf/buf0buf.c buf/buf0flu.c \
977 buf/buf0lru.c buf/buf0rea.c data/data0data.c \
978 data/data0type.c dict/dict0boot.c \
979 dict/dict0crea.c dict/dict0dict.c \
980 dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c \
981 eval/eval0eval.c eval/eval0proc.c \
982 fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \
983- fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \
984+ fut/fut0lst.c ha/ha0ha.c \
985+ ha/ha0storage.c \
986+ ha/hash0hash.c \
987 ibuf/ibuf0ibuf.c lock/lock0iter.c \
988 lock/lock0lock.c \
989 log/log0log.c log/log0recv.c mach/mach0data.c \
990 mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \
991 mtr/mtr0mtr.c os/os0file.c os/os0proc.c \
992 os/os0sync.c os/os0thread.c page/page0cur.c \
993- page/page0page.c pars/lexyy.c pars/pars0grm.c \
994+ page/page0page.c page/page0zip.c \
995+ pars/lexyy.c pars/pars0grm.c \
996 pars/pars0opt.c pars/pars0pars.c \
997 pars/pars0sym.c que/que0que.c read/read0read.c \
998- rem/rem0cmp.c rem/rem0rec.c row/row0ins.c \
999+ rem/rem0cmp.c rem/rem0rec.c row/row0ext.c \
1000+ row/row0ins.c row/row0merge.c \
1001 row/row0mysql.c row/row0purge.c row/row0row.c \
1002 row/row0sel.c row/row0uins.c row/row0umod.c \
1003 row/row0undo.c row/row0upd.c row/row0vers.c \
1004 srv/srv0que.c srv/srv0srv.c srv/srv0start.c \
1005 sync/sync0arr.c sync/sync0rw.c \
1006- sync/sync0sync.c thr/thr0loc.c trx/trx0purge.c \
1007+ sync/sync0sync.c thr/thr0loc.c \
1008+ trx/trx0i_s.c \
1009+ trx/trx0purge.c \
1010 trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c \
1011 trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c \
1012 usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c \
1013 ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c \
1014 ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \
1015- handler/ha_innodb.cc
1016+ handler/ha_innodb.cc handler/handler0alter.cc \
1017+ handler/i_s.cc \
1018+ handler/mysql_addons.cc
1019
1020 libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
1021 libinnobase_a_CFLAGS= $(AM_CFLAGS)
1022
1023=== added file 'storage/innobase/README'
1024--- storage/innobase/README 1970-01-01 00:00:00 +0000
1025+++ storage/innobase/README 2009-03-31 04:19:17 +0000
1026@@ -0,0 +1,29 @@
1027+This is the source of the InnoDB Plugin 1.0.3 for MySQL 5.1
1028+===========================================================
1029+
1030+Instructions for compiling the plugin:
1031+--------------------------------------
1032+
1033+1. Get the latest MySQL 5.1 sources from
1034+ http://dev.mysql.com/downloads/mysql/5.1.html#source
1035+
1036+2. Replace the contents of the mysql-5.1.N/storage/innobase/ directory
1037+ with the contents of this directory.
1038+
1039+3. Optional (only necessary if you are going to run tests from the
1040+ mysql-test suite): cd into the innobase directory and run ./setup.sh
1041+
1042+4. Compile MySQL as usual.
1043+
1044+5. Enjoy!
1045+
1046+See the online documentation for more detailed instructions:
1047+http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html
1048+
1049+For more information about InnoDB visit
1050+http://www.innodb.com
1051+
1052+Please report any problems or issues with the plugin in the InnoDB Forums
1053+http://forums.innodb.com/ or in the MySQL Bugs database http://bugs.mysql.com
1054+
1055+Thank you for using the InnoDB plugin!
1056
1057=== modified file 'storage/innobase/btr/btr0btr.c'
1058--- storage/innobase/btr/btr0btr.c 2007-07-10 14:34:21 +0000
1059+++ storage/innobase/btr/btr0btr.c 2009-03-31 04:19:17 +0000
1060@@ -1,8 +1,24 @@
1061+/*****************************************************************************
1062+
1063+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
1064+
1065+This program is free software; you can redistribute it and/or modify it under
1066+the terms of the GNU General Public License as published by the Free Software
1067+Foundation; version 2 of the License.
1068+
1069+This program is distributed in the hope that it will be useful, but WITHOUT
1070+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1071+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
1072+
1073+You should have received a copy of the GNU General Public License along with
1074+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
1075+Place, Suite 330, Boston, MA 02111-1307 USA
1076+
1077+*****************************************************************************/
1078+
1079 /******************************************************
1080 The B-tree
1081
1082-(c) 1994-1996 Innobase Oy
1083-
1084 Created 6/2/1994 Heikki Tuuri
1085 *******************************************************/
1086
1087@@ -14,6 +30,7 @@
1088
1089 #include "fsp0fsp.h"
1090 #include "page0page.h"
1091+#include "page0zip.h"
1092 #include "btr0cur.h"
1093 #include "btr0sea.h"
1094 #include "btr0pcur.h"
1095@@ -77,47 +94,65 @@
1096 we allocate pages for the non-leaf levels of the tree.
1097 */
1098
1099-/****************************************************************
1100-Returns the upper level node pointer to a page. It is assumed that
1101-mtr holds an x-latch on the tree. */
1102-static
1103-rec_t*
1104-btr_page_get_father_node_ptr(
1105-/*=========================*/
1106- /* out: pointer to node pointer record */
1107+#ifdef UNIV_BTR_DEBUG
1108+/******************************************************************
1109+Checks a file segment header within a B-tree root page. */
1110+static
1111+ibool
1112+btr_root_fseg_validate(
1113+/*===================*/
1114+ /* out: TRUE if valid */
1115+ const fseg_header_t* seg_header, /* in: segment header */
1116+ ulint space) /* in: tablespace identifier */
1117+{
1118+ ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
1119+
1120+ ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
1121+ ut_a(offset >= FIL_PAGE_DATA);
1122+ ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
1123+ return(TRUE);
1124+}
1125+#endif /* UNIV_BTR_DEBUG */
1126+
1127+/******************************************************************
1128+Gets the root node of a tree and x-latches it. */
1129+static
1130+buf_block_t*
1131+btr_root_block_get(
1132+/*===============*/
1133+ /* out: root page, x-latched */
1134 dict_index_t* index, /* in: index tree */
1135- page_t* page, /* in: page: must contain at least one
1136- user record */
1137- mtr_t* mtr); /* in: mtr */
1138-/*****************************************************************
1139-Empties an index page. */
1140-static
1141-void
1142-btr_page_empty(
1143-/*===========*/
1144- page_t* page, /* in: page to be emptied */
1145- mtr_t* mtr); /* in: mtr */
1146-/*****************************************************************
1147-Returns TRUE if the insert fits on the appropriate half-page
1148-with the chosen split_rec. */
1149-static
1150-ibool
1151-btr_page_insert_fits(
1152-/*=================*/
1153- /* out: TRUE if fits */
1154- btr_cur_t* cursor, /* in: cursor at which insert
1155- should be made */
1156- rec_t* split_rec, /* in: suggestion for first record
1157- on upper half-page, or NULL if
1158- tuple should be first */
1159- const ulint* offsets, /* in: rec_get_offsets(
1160- split_rec, cursor->index) */
1161- dtuple_t* tuple, /* in: tuple to insert */
1162- mem_heap_t* heap); /* in: temporary memory heap */
1163+ mtr_t* mtr) /* in: mtr */
1164+{
1165+ ulint space;
1166+ ulint zip_size;
1167+ ulint root_page_no;
1168+ buf_block_t* block;
1169+
1170+ space = dict_index_get_space(index);
1171+ zip_size = dict_table_zip_size(index->table);
1172+ root_page_no = dict_index_get_page(index);
1173+
1174+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
1175+ ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
1176+ == dict_table_is_comp(index->table));
1177+#ifdef UNIV_BTR_DEBUG
1178+ if (!dict_index_is_ibuf(index)) {
1179+ const page_t* root = buf_block_get_frame(block);
1180+
1181+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1182+ + root, space));
1183+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1184+ + root, space));
1185+ }
1186+#endif /* UNIV_BTR_DEBUG */
1187+
1188+ return(block);
1189+}
1190
1191 /******************************************************************
1192 Gets the root node of a tree and x-latches it. */
1193-
1194+UNIV_INTERN
1195 page_t*
1196 btr_root_get(
1197 /*=========*/
1198@@ -125,23 +160,13 @@
1199 dict_index_t* index, /* in: index tree */
1200 mtr_t* mtr) /* in: mtr */
1201 {
1202- ulint space;
1203- ulint root_page_no;
1204- page_t* root;
1205-
1206- space = dict_index_get_space(index);
1207- root_page_no = dict_index_get_page(index);
1208-
1209- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
1210- ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table));
1211-
1212- return(root);
1213+ return(buf_block_get_frame(btr_root_block_get(index, mtr)));
1214 }
1215
1216 /*****************************************************************
1217 Gets pointer to the previous user record in the tree. It is assumed that
1218 the caller has appropriate latches on the page and its neighbor. */
1219-
1220+UNIV_INTERN
1221 rec_t*
1222 btr_get_prev_user_rec(
1223 /*==================*/
1224@@ -153,7 +178,6 @@
1225 page_t* page;
1226 page_t* prev_page;
1227 ulint prev_page_no;
1228- ulint space;
1229
1230 if (!page_rec_is_infimum(rec)) {
1231
1232@@ -165,23 +189,30 @@
1233 }
1234 }
1235
1236- page = buf_frame_align(rec);
1237+ page = page_align(rec);
1238 prev_page_no = btr_page_get_prev(page, mtr);
1239- space = buf_frame_get_space_id(page);
1240
1241 if (prev_page_no != FIL_NULL) {
1242
1243- prev_page = buf_page_get_with_no_latch(space, prev_page_no,
1244- mtr);
1245+ ulint space;
1246+ ulint zip_size;
1247+ buf_block_t* prev_block;
1248+
1249+ space = page_get_space_id(page);
1250+ zip_size = fil_space_get_zip_size(space);
1251+
1252+ prev_block = buf_page_get_with_no_latch(space, zip_size,
1253+ prev_page_no, mtr);
1254+ prev_page = buf_block_get_frame(prev_block);
1255 /* The caller must already have a latch to the brother */
1256- ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
1257- MTR_MEMO_PAGE_S_FIX))
1258- || (mtr_memo_contains(mtr, buf_block_align(prev_page),
1259- MTR_MEMO_PAGE_X_FIX)));
1260+ ut_ad(mtr_memo_contains(mtr, prev_block,
1261+ MTR_MEMO_PAGE_S_FIX)
1262+ || mtr_memo_contains(mtr, prev_block,
1263+ MTR_MEMO_PAGE_X_FIX));
1264+#ifdef UNIV_BTR_DEBUG
1265 ut_a(page_is_comp(prev_page) == page_is_comp(page));
1266-#ifdef UNIV_BTR_DEBUG
1267 ut_a(btr_page_get_next(prev_page, mtr)
1268- == buf_frame_get_page_no(page));
1269+ == page_get_page_no(page));
1270 #endif /* UNIV_BTR_DEBUG */
1271
1272 return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
1273@@ -193,7 +224,7 @@
1274 /*****************************************************************
1275 Gets pointer to the next user record in the tree. It is assumed that the
1276 caller has appropriate latches on the page and its neighbor. */
1277-
1278+UNIV_INTERN
1279 rec_t*
1280 btr_get_next_user_rec(
1281 /*==================*/
1282@@ -205,7 +236,6 @@
1283 page_t* page;
1284 page_t* next_page;
1285 ulint next_page_no;
1286- ulint space;
1287
1288 if (!page_rec_is_supremum(rec)) {
1289
1290@@ -217,25 +247,30 @@
1291 }
1292 }
1293
1294- page = buf_frame_align(rec);
1295+ page = page_align(rec);
1296 next_page_no = btr_page_get_next(page, mtr);
1297- space = buf_frame_get_space_id(page);
1298
1299 if (next_page_no != FIL_NULL) {
1300-
1301- next_page = buf_page_get_with_no_latch(space, next_page_no,
1302- mtr);
1303+ ulint space;
1304+ ulint zip_size;
1305+ buf_block_t* next_block;
1306+
1307+ space = page_get_space_id(page);
1308+ zip_size = fil_space_get_zip_size(space);
1309+
1310+ next_block = buf_page_get_with_no_latch(space, zip_size,
1311+ next_page_no, mtr);
1312+ next_page = buf_block_get_frame(next_block);
1313 /* The caller must already have a latch to the brother */
1314- ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
1315- MTR_MEMO_PAGE_S_FIX))
1316- || (mtr_memo_contains(mtr, buf_block_align(next_page),
1317- MTR_MEMO_PAGE_X_FIX)));
1318+ ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
1319+ || mtr_memo_contains(mtr, next_block,
1320+ MTR_MEMO_PAGE_X_FIX));
1321 #ifdef UNIV_BTR_DEBUG
1322+ ut_a(page_is_comp(next_page) == page_is_comp(page));
1323 ut_a(btr_page_get_prev(next_page, mtr)
1324- == buf_frame_get_page_no(page));
1325+ == page_get_page_no(page));
1326 #endif /* UNIV_BTR_DEBUG */
1327
1328- ut_a(page_is_comp(next_page) == page_is_comp(page));
1329 return(page_rec_get_next(page_get_infimum_rec(next_page)));
1330 }
1331
1332@@ -244,37 +279,49 @@
1333
1334 /******************************************************************
1335 Creates a new index page (not the root, and also not
1336-used in page reorganization). */
1337+used in page reorganization). @see btr_page_empty(). */
1338 static
1339 void
1340 btr_page_create(
1341 /*============*/
1342- page_t* page, /* in: page to be created */
1343+ buf_block_t* block, /* in/out: page to be created */
1344+ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1345 dict_index_t* index, /* in: index */
1346+ ulint level, /* in: the B-tree level of the page */
1347 mtr_t* mtr) /* in: mtr */
1348 {
1349- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1350- MTR_MEMO_PAGE_X_FIX));
1351- page_create(page, mtr, dict_table_is_comp(index->table));
1352- buf_block_align(page)->check_index_page_at_flush = TRUE;
1353-
1354- btr_page_set_index_id(page, index->id, mtr);
1355+ page_t* page = buf_block_get_frame(block);
1356+
1357+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1358+
1359+ if (UNIV_LIKELY_NULL(page_zip)) {
1360+ page_create_zip(block, index, level, mtr);
1361+ } else {
1362+ page_create(block, mtr, dict_table_is_comp(index->table));
1363+ /* Set the level of the new index page */
1364+ btr_page_set_level(page, NULL, level, mtr);
1365+ }
1366+
1367+ block->check_index_page_at_flush = TRUE;
1368+
1369+ btr_page_set_index_id(page, page_zip, index->id, mtr);
1370 }
1371
1372 /******************************************************************
1373 Allocates a new file page to be used in an ibuf tree. Takes the page from
1374 the free list of the tree, which must contain pages! */
1375 static
1376-page_t*
1377+buf_block_t*
1378 btr_page_alloc_for_ibuf(
1379 /*====================*/
1380- /* out: new allocated page, x-latched */
1381+ /* out: new allocated block, x-latched */
1382 dict_index_t* index, /* in: index tree */
1383 mtr_t* mtr) /* in: mtr */
1384 {
1385 fil_addr_t node_addr;
1386 page_t* root;
1387 page_t* new_page;
1388+ buf_block_t* new_block;
1389
1390 root = btr_root_get(index, mtr);
1391
1392@@ -282,11 +329,11 @@
1393 + PAGE_BTR_IBUF_FREE_LIST, mtr);
1394 ut_a(node_addr.page != FIL_NULL);
1395
1396- new_page = buf_page_get(dict_index_get_space(index), node_addr.page,
1397- RW_X_LATCH, mtr);
1398-#ifdef UNIV_SYNC_DEBUG
1399- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
1400-#endif /* UNIV_SYNC_DEBUG */
1401+ new_block = buf_page_get(dict_index_get_space(index),
1402+ dict_table_zip_size(index->table),
1403+ node_addr.page, RW_X_LATCH, mtr);
1404+ new_page = buf_block_get_frame(new_block);
1405+ buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
1406
1407 flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1408 new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
1409@@ -294,17 +341,17 @@
1410 ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1411 mtr));
1412
1413- return(new_page);
1414+ return(new_block);
1415 }
1416
1417 /******************************************************************
1418 Allocates a new file page to be used in an index tree. NOTE: we assume
1419 that the caller has made the reservation for free extents! */
1420-
1421-page_t*
1422+UNIV_INTERN
1423+buf_block_t*
1424 btr_page_alloc(
1425 /*===========*/
1426- /* out: new allocated page, x-latched;
1427+ /* out: new allocated block, x-latched;
1428 NULL if out of space */
1429 dict_index_t* index, /* in: index */
1430 ulint hint_page_no, /* in: hint of a good page */
1431@@ -316,10 +363,10 @@
1432 {
1433 fseg_header_t* seg_header;
1434 page_t* root;
1435- page_t* new_page;
1436+ buf_block_t* new_block;
1437 ulint new_page_no;
1438
1439- if (index->type & DICT_IBUF) {
1440+ if (dict_index_is_ibuf(index)) {
1441
1442 return(btr_page_alloc_for_ibuf(index, mtr));
1443 }
1444@@ -343,18 +390,17 @@
1445 return(NULL);
1446 }
1447
1448- new_page = buf_page_get(dict_index_get_space(index), new_page_no,
1449- RW_X_LATCH, mtr);
1450-#ifdef UNIV_SYNC_DEBUG
1451- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
1452-#endif /* UNIV_SYNC_DEBUG */
1453+ new_block = buf_page_get(dict_index_get_space(index),
1454+ dict_table_zip_size(index->table),
1455+ new_page_no, RW_X_LATCH, mtr);
1456+ buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
1457
1458- return(new_page);
1459+ return(new_block);
1460 }
1461
1462 /******************************************************************
1463 Gets the number of pages in a B-tree. */
1464-
1465+UNIV_INTERN
1466 ulint
1467 btr_get_size(
1468 /*=========*/
1469@@ -404,17 +450,17 @@
1470 btr_page_free_for_ibuf(
1471 /*===================*/
1472 dict_index_t* index, /* in: index tree */
1473- page_t* page, /* in: page to be freed, x-latched */
1474+ buf_block_t* block, /* in: block to be freed, x-latched */
1475 mtr_t* mtr) /* in: mtr */
1476 {
1477 page_t* root;
1478
1479- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1480- MTR_MEMO_PAGE_X_FIX));
1481+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1482 root = btr_root_get(index, mtr);
1483
1484 flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1485- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
1486+ buf_block_get_frame(block)
1487+ + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
1488
1489 ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1490 mtr));
1491@@ -424,30 +470,27 @@
1492 Frees a file page used in an index tree. Can be used also to (BLOB)
1493 external storage pages, because the page level 0 can be given as an
1494 argument. */
1495-
1496+UNIV_INTERN
1497 void
1498 btr_page_free_low(
1499 /*==============*/
1500 dict_index_t* index, /* in: index tree */
1501- page_t* page, /* in: page to be freed, x-latched */
1502+ buf_block_t* block, /* in: block to be freed, x-latched */
1503 ulint level, /* in: page level */
1504 mtr_t* mtr) /* in: mtr */
1505 {
1506 fseg_header_t* seg_header;
1507 page_t* root;
1508- ulint space;
1509- ulint page_no;
1510
1511- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1512- MTR_MEMO_PAGE_X_FIX));
1513+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1514 /* The page gets invalid for optimistic searches: increment the frame
1515 modify clock */
1516
1517- buf_frame_modify_clock_inc(page);
1518-
1519- if (index->type & DICT_IBUF) {
1520-
1521- btr_page_free_for_ibuf(index, page, mtr);
1522+ buf_block_modify_clock_inc(block);
1523+
1524+ if (dict_index_is_ibuf(index)) {
1525+
1526+ btr_page_free_for_ibuf(index, block, mtr);
1527
1528 return;
1529 }
1530@@ -460,30 +503,27 @@
1531 seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1532 }
1533
1534- space = buf_frame_get_space_id(page);
1535- page_no = buf_frame_get_page_no(page);
1536-
1537- fseg_free_page(seg_header, space, page_no, mtr);
1538+ fseg_free_page(seg_header,
1539+ buf_block_get_space(block),
1540+ buf_block_get_page_no(block), mtr);
1541 }
1542
1543 /******************************************************************
1544 Frees a file page used in an index tree. NOTE: cannot free field external
1545 storage pages because the page must contain info on its level. */
1546-
1547+UNIV_INTERN
1548 void
1549 btr_page_free(
1550 /*==========*/
1551 dict_index_t* index, /* in: index tree */
1552- page_t* page, /* in: page to be freed, x-latched */
1553+ buf_block_t* block, /* in: block to be freed, x-latched */
1554 mtr_t* mtr) /* in: mtr */
1555 {
1556 ulint level;
1557
1558- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1559- MTR_MEMO_PAGE_X_FIX));
1560- level = btr_page_get_level(page, mtr);
1561+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
1562
1563- btr_page_free_low(index, page, level, mtr);
1564+ btr_page_free_low(index, block, level, mtr);
1565 }
1566
1567 /******************************************************************
1568@@ -493,6 +533,8 @@
1569 btr_node_ptr_set_child_page_no(
1570 /*===========================*/
1571 rec_t* rec, /* in: node pointer record */
1572+ page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
1573+ part will be updated, or NULL */
1574 const ulint* offsets,/* in: array returned by rec_get_offsets() */
1575 ulint page_no,/* in: child node address */
1576 mtr_t* mtr) /* in: mtr */
1577@@ -501,92 +543,99 @@
1578 ulint len;
1579
1580 ut_ad(rec_offs_validate(rec, NULL, offsets));
1581- ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
1582+ ut_ad(!page_is_leaf(page_align(rec)));
1583 ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
1584
1585 /* The child address is in the last field */
1586 field = rec_get_nth_field(rec, offsets,
1587 rec_offs_n_fields(offsets) - 1, &len);
1588
1589- ut_ad(len == 4);
1590+ ut_ad(len == REC_NODE_PTR_SIZE);
1591
1592- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
1593+ if (UNIV_LIKELY_NULL(page_zip)) {
1594+ page_zip_write_node_ptr(page_zip, rec,
1595+ rec_offs_data_size(offsets),
1596+ page_no, mtr);
1597+ } else {
1598+ mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
1599+ }
1600 }
1601
1602 /****************************************************************
1603 Returns the child page of a node pointer and x-latches it. */
1604 static
1605-page_t*
1606+buf_block_t*
1607 btr_node_ptr_get_child(
1608 /*===================*/
1609 /* out: child page, x-latched */
1610- rec_t* node_ptr,/* in: node pointer */
1611+ const rec_t* node_ptr,/* in: node pointer */
1612+ dict_index_t* index, /* in: index */
1613 const ulint* offsets,/* in: array returned by rec_get_offsets() */
1614 mtr_t* mtr) /* in: mtr */
1615 {
1616 ulint page_no;
1617 ulint space;
1618- page_t* page;
1619
1620- ut_ad(rec_offs_validate(node_ptr, NULL, offsets));
1621- space = buf_frame_get_space_id(node_ptr);
1622+ ut_ad(rec_offs_validate(node_ptr, index, offsets));
1623+ space = page_get_space_id(page_align(node_ptr));
1624 page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
1625
1626- page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
1627-
1628- return(page);
1629+ return(btr_block_get(space, dict_table_zip_size(index->table),
1630+ page_no, RW_X_LATCH, mtr));
1631 }
1632
1633 /****************************************************************
1634 Returns the upper level node pointer to a page. It is assumed that mtr holds
1635 an x-latch on the tree. */
1636 static
1637-rec_t*
1638-btr_page_get_father_for_rec(
1639-/*========================*/
1640- /* out: pointer to node pointer record,
1641+ulint*
1642+btr_page_get_father_node_ptr(
1643+/*=========================*/
1644+ /* out: rec_get_offsets() of the
1645+ node pointer record */
1646+ ulint* offsets,/* in: work area for the return value */
1647+ mem_heap_t* heap, /* in: memory heap to use */
1648+ btr_cur_t* cursor, /* in: cursor pointing to user record,
1649+ out: cursor on node pointer record,
1650 its page x-latched */
1651- dict_index_t* index, /* in: index tree */
1652- page_t* page, /* in: page: must contain at least one
1653- user record */
1654- rec_t* user_rec,/* in: user_record on page */
1655 mtr_t* mtr) /* in: mtr */
1656 {
1657- mem_heap_t* heap;
1658 dtuple_t* tuple;
1659- btr_cur_t cursor;
1660+ rec_t* user_rec;
1661 rec_t* node_ptr;
1662- ulint offsets_[REC_OFFS_NORMAL_SIZE];
1663- ulint* offsets = offsets_;
1664- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1665+ ulint level;
1666+ ulint page_no;
1667+ dict_index_t* index;
1668+
1669+ page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
1670+ index = btr_cur_get_index(cursor);
1671
1672 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1673 MTR_MEMO_X_LOCK));
1674+
1675+ ut_ad(dict_index_get_page(index) != page_no);
1676+
1677+ level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
1678+ user_rec = btr_cur_get_rec(cursor);
1679 ut_a(page_rec_is_user_rec(user_rec));
1680-
1681- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
1682-
1683- heap = mem_heap_create(100);
1684-
1685- tuple = dict_index_build_node_ptr(index, user_rec, 0, heap,
1686- btr_page_get_level(page, mtr));
1687-
1688- btr_cur_search_to_nth_level(index,
1689- btr_page_get_level(page, mtr) + 1,
1690- tuple, PAGE_CUR_LE,
1691- BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
1692-
1693- node_ptr = btr_cur_get_rec(&cursor);
1694+ tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
1695+
1696+ btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
1697+ BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
1698+
1699+ node_ptr = btr_cur_get_rec(cursor);
1700+ ut_ad(!page_rec_is_comp(node_ptr)
1701+ || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
1702 offsets = rec_get_offsets(node_ptr, index, offsets,
1703 ULINT_UNDEFINED, &heap);
1704
1705 if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
1706- != buf_frame_get_page_no(page))) {
1707+ != page_no)) {
1708 rec_t* print_rec;
1709 fputs("InnoDB: Dump of the child page:\n", stderr);
1710- buf_page_print(buf_frame_align(page));
1711+ buf_page_print(page_align(user_rec), 0);
1712 fputs("InnoDB: Dump of the parent page:\n", stderr);
1713- buf_page_print(buf_frame_align(node_ptr));
1714+ buf_page_print(page_align(node_ptr), 0);
1715
1716 fputs("InnoDB: Corruption of an index tree: table ", stderr);
1717 ut_print_name(stderr, NULL, TRUE, index->table_name);
1718@@ -596,8 +645,9 @@
1719 "InnoDB: father ptr page no %lu, child page no %lu\n",
1720 (ulong)
1721 btr_node_ptr_get_child_page_no(node_ptr, offsets),
1722- (ulong) buf_frame_get_page_no(page));
1723- print_rec = page_rec_get_next(page_get_infimum_rec(page));
1724+ (ulong) page_no);
1725+ print_rec = page_rec_get_next(
1726+ page_get_infimum_rec(page_align(user_rec)));
1727 offsets = rec_get_offsets(print_rec, index,
1728 offsets, ULINT_UNDEFINED, &heap);
1729 page_rec_print(print_rec, offsets);
1730@@ -613,51 +663,82 @@
1731 "forcing-recovery.html about\n"
1732 "InnoDB: forcing recovery. "
1733 "Then dump + drop + reimport.\n", stderr);
1734+
1735+ ut_error;
1736 }
1737
1738- ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets)
1739- == buf_frame_get_page_no(page));
1740+ return(offsets);
1741+}
1742+
1743+/****************************************************************
1744+Returns the upper level node pointer to a page. It is assumed that mtr holds
1745+an x-latch on the tree. */
1746+static
1747+ulint*
1748+btr_page_get_father_block(
1749+/*======================*/
1750+ /* out: rec_get_offsets() of the
1751+ node pointer record */
1752+ ulint* offsets,/* in: work area for the return value */
1753+ mem_heap_t* heap, /* in: memory heap to use */
1754+ dict_index_t* index, /* in: b-tree index */
1755+ buf_block_t* block, /* in: child page in the index */
1756+ mtr_t* mtr, /* in: mtr */
1757+ btr_cur_t* cursor) /* out: cursor on node pointer record,
1758+ its page x-latched */
1759+{
1760+ rec_t* rec
1761+ = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1762+ block)));
1763+ btr_cur_position(index, rec, block, cursor);
1764+ return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
1765+}
1766+
1767+/****************************************************************
1768+Seeks to the upper level node pointer to a page.
1769+It is assumed that mtr holds an x-latch on the tree. */
1770+static
1771+void
1772+btr_page_get_father(
1773+/*================*/
1774+ dict_index_t* index, /* in: b-tree index */
1775+ buf_block_t* block, /* in: child page in the index */
1776+ mtr_t* mtr, /* in: mtr */
1777+ btr_cur_t* cursor) /* out: cursor on node pointer record,
1778+ its page x-latched */
1779+{
1780+ mem_heap_t* heap;
1781+ rec_t* rec
1782+ = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1783+ block)));
1784+ btr_cur_position(index, rec, block, cursor);
1785+
1786+ heap = mem_heap_create(100);
1787+ btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
1788 mem_heap_free(heap);
1789-
1790- return(node_ptr);
1791-}
1792-
1793-/****************************************************************
1794-Returns the upper level node pointer to a page. It is assumed that
1795-mtr holds an x-latch on the tree. */
1796-static
1797-rec_t*
1798-btr_page_get_father_node_ptr(
1799-/*=========================*/
1800- /* out: pointer to node pointer record */
1801- dict_index_t* index, /* in: index tree */
1802- page_t* page, /* in: page: must contain at least one
1803- user record */
1804- mtr_t* mtr) /* in: mtr */
1805-{
1806- return(btr_page_get_father_for_rec(
1807- index, page,
1808- page_rec_get_next(page_get_infimum_rec(page)), mtr));
1809 }
1810
1811 /****************************************************************
1812 Creates the root node for a new index tree. */
1813-
1814+UNIV_INTERN
1815 ulint
1816 btr_create(
1817 /*=======*/
1818- /* out: page number of the created root, FIL_NULL if
1819- did not succeed */
1820- ulint type, /* in: type of the index */
1821- ulint space, /* in: space where created */
1822- dulint index_id,/* in: index id */
1823- ulint comp, /* in: nonzero=compact page format */
1824- mtr_t* mtr) /* in: mini-transaction handle */
1825+ /* out: page number of the created root,
1826+ FIL_NULL if did not succeed */
1827+ ulint type, /* in: type of the index */
1828+ ulint space, /* in: space where created */
1829+ ulint zip_size,/* in: compressed page size in bytes
1830+ or 0 for uncompressed pages */
1831+ dulint index_id,/* in: index id */
1832+ dict_index_t* index, /* in: index */
1833+ mtr_t* mtr) /* in: mini-transaction handle */
1834 {
1835 ulint page_no;
1836- buf_frame_t* ibuf_hdr_frame;
1837+ buf_block_t* block;
1838 buf_frame_t* frame;
1839 page_t* page;
1840+ page_zip_des_t* page_zip;
1841
1842 /* Create the two new segments (one, in the case of an ibuf tree) for
1843 the index tree; the segment headers are put on the allocated root page
1844@@ -666,39 +747,41 @@
1845
1846 if (type & DICT_IBUF) {
1847 /* Allocate first the ibuf header page */
1848- ibuf_hdr_frame = fseg_create(
1849- space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
1850-
1851-#ifdef UNIV_SYNC_DEBUG
1852- buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
1853-#endif /* UNIV_SYNC_DEBUG */
1854- ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
1855+ buf_block_t* ibuf_hdr_block = fseg_create(
1856+ space, 0,
1857+ IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
1858+
1859+ buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
1860+
1861+ ut_ad(buf_block_get_page_no(ibuf_hdr_block)
1862 == IBUF_HEADER_PAGE_NO);
1863 /* Allocate then the next page to the segment: it will be the
1864 tree root page */
1865
1866- page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER
1867+ page_no = fseg_alloc_free_page(buf_block_get_frame(
1868+ ibuf_hdr_block)
1869+ + IBUF_HEADER
1870 + IBUF_TREE_SEG_HEADER,
1871 IBUF_TREE_ROOT_PAGE_NO,
1872 FSP_UP, mtr);
1873 ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
1874
1875- frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
1876+ block = buf_page_get(space, zip_size, page_no,
1877+ RW_X_LATCH, mtr);
1878 } else {
1879- frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
1880- mtr);
1881+ block = fseg_create(space, 0,
1882+ PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
1883 }
1884
1885- if (frame == NULL) {
1886+ if (block == NULL) {
1887
1888 return(FIL_NULL);
1889 }
1890
1891- page_no = buf_frame_get_page_no(frame);
1892+ page_no = buf_block_get_page_no(block);
1893+ frame = buf_block_get_frame(block);
1894
1895-#ifdef UNIV_SYNC_DEBUG
1896- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
1897-#endif /* UNIV_SYNC_DEBUG */
1898+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1899
1900 if (type & DICT_IBUF) {
1901 /* It is an insert buffer tree: initialize the free list */
1902@@ -709,34 +792,41 @@
1903 } else {
1904 /* It is a non-ibuf tree: create a file segment for leaf
1905 pages */
1906- fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
1907- mtr);
1908+ fseg_create(space, page_no,
1909+ PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr);
1910 /* The fseg create acquires a second latch on the page,
1911 therefore we must declare it: */
1912-#ifdef UNIV_SYNC_DEBUG
1913- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
1914-#endif /* UNIV_SYNC_DEBUG */
1915+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1916 }
1917
1918 /* Create a new index page on the the allocated segment page */
1919- page = page_create(frame, mtr, comp);
1920- buf_block_align(page)->check_index_page_at_flush = TRUE;
1921+ page_zip = buf_block_get_page_zip(block);
1922+
1923+ if (UNIV_LIKELY_NULL(page_zip)) {
1924+ page = page_create_zip(block, index, 0, mtr);
1925+ } else {
1926+ page = page_create(block, mtr,
1927+ dict_table_is_comp(index->table));
1928+ /* Set the level of the new index page */
1929+ btr_page_set_level(page, NULL, 0, mtr);
1930+ }
1931+
1932+ block->check_index_page_at_flush = TRUE;
1933
1934 /* Set the index id of the page */
1935- btr_page_set_index_id(page, index_id, mtr);
1936-
1937- /* Set the level of the new index page */
1938- btr_page_set_level(page, 0, mtr);
1939+ btr_page_set_index_id(page, page_zip, index_id, mtr);
1940
1941 /* Set the next node and previous node fields */
1942- btr_page_set_next(page, FIL_NULL, mtr);
1943- btr_page_set_prev(page, FIL_NULL, mtr);
1944+ btr_page_set_next(page, page_zip, FIL_NULL, mtr);
1945+ btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
1946
1947 /* We reset the free bits for the page to allow creation of several
1948 trees in the same mtr, otherwise the latch on a bitmap page would
1949 prevent it because of the latching order */
1950
1951- ibuf_reset_free_bits_with_type(type, page);
1952+ if (!(type & DICT_CLUSTERED)) {
1953+ ibuf_reset_free_bits(block);
1954+ }
1955
1956 /* In the following assertion we test that two records of maximum
1957 allowed size fit on the root page: this fact is needed to ensure
1958@@ -750,11 +840,13 @@
1959 /****************************************************************
1960 Frees a B-tree except the root page, which MUST be freed after this
1961 by calling btr_free_root. */
1962-
1963+UNIV_INTERN
1964 void
1965 btr_free_but_not_root(
1966 /*==================*/
1967 ulint space, /* in: space where created */
1968+ ulint zip_size, /* in: compressed page size in bytes
1969+ or 0 for uncompressed pages */
1970 ulint root_page_no) /* in: root page number */
1971 {
1972 ibool finished;
1973@@ -764,7 +856,13 @@
1974 leaf_loop:
1975 mtr_start(&mtr);
1976
1977- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
1978+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
1979+#ifdef UNIV_BTR_DEBUG
1980+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1981+ + root, space));
1982+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1983+ + root, space));
1984+#endif /* UNIV_BTR_DEBUG */
1985
1986 /* NOTE: page hash indexes are dropped when a page is freed inside
1987 fsp0fsp. */
1988@@ -780,7 +878,11 @@
1989 top_loop:
1990 mtr_start(&mtr);
1991
1992- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
1993+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
1994+#ifdef UNIV_BTR_DEBUG
1995+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1996+ + root, space));
1997+#endif /* UNIV_BTR_DEBUG */
1998
1999 finished = fseg_free_step_not_header(
2000 root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
2001@@ -794,33 +896,36 @@
2002
2003 /****************************************************************
2004 Frees the B-tree root page. Other tree MUST already have been freed. */
2005-
2006+UNIV_INTERN
2007 void
2008 btr_free_root(
2009 /*==========*/
2010 ulint space, /* in: space where created */
2011+ ulint zip_size, /* in: compressed page size in bytes
2012+ or 0 for uncompressed pages */
2013 ulint root_page_no, /* in: root page number */
2014 mtr_t* mtr) /* in: a mini-transaction which has already
2015 been started */
2016 {
2017- ibool finished;
2018- page_t* root;
2019-
2020- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
2021-
2022- btr_search_drop_page_hash_index(root);
2023-top_loop:
2024- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
2025- if (!finished) {
2026-
2027- goto top_loop;
2028- }
2029+ buf_block_t* block;
2030+ fseg_header_t* header;
2031+
2032+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
2033+
2034+ btr_search_drop_page_hash_index(block);
2035+
2036+ header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
2037+#ifdef UNIV_BTR_DEBUG
2038+ ut_a(btr_root_fseg_validate(header, space));
2039+#endif /* UNIV_BTR_DEBUG */
2040+
2041+ while (!fseg_free_step(header, mtr));
2042 }
2043
2044 /*****************************************************************
2045 Reorganizes an index page. */
2046 static
2047-void
2048+ibool
2049 btr_page_reorganize_low(
2050 /*====================*/
2051 ibool recovery,/* in: TRUE if called in recovery:
2052@@ -828,20 +933,26 @@
2053 there cannot exist locks on the
2054 page, and a hash index should not be
2055 dropped: it cannot exist */
2056- page_t* page, /* in: page to be reorganized */
2057+ buf_block_t* block, /* in: page to be reorganized */
2058 dict_index_t* index, /* in: record descriptor */
2059 mtr_t* mtr) /* in: mtr */
2060 {
2061- page_t* new_page;
2062- ulint log_mode;
2063- ulint data_size1;
2064- ulint data_size2;
2065- ulint max_ins_size1;
2066- ulint max_ins_size2;
2067+ page_t* page = buf_block_get_frame(block);
2068+ page_zip_des_t* page_zip = buf_block_get_page_zip(block);
2069+ buf_block_t* temp_block;
2070+ page_t* temp_page;
2071+ ulint log_mode;
2072+ ulint data_size1;
2073+ ulint data_size2;
2074+ ulint max_ins_size1;
2075+ ulint max_ins_size2;
2076+ ibool success = FALSE;
2077
2078- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
2079- MTR_MEMO_PAGE_X_FIX));
2080+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2081 ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
2082+#ifdef UNIV_ZIP_DEBUG
2083+ ut_a(!page_zip || page_zip_validate(page_zip, page));
2084+#endif /* UNIV_ZIP_DEBUG */
2085 data_size1 = page_get_data_size(page);
2086 max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
2087
2088@@ -853,41 +964,53 @@
2089 /* Turn logging off */
2090 log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
2091
2092- new_page = buf_frame_alloc();
2093+ temp_block = buf_block_alloc(0);
2094+ temp_page = temp_block->frame;
2095
2096 /* Copy the old page to temporary space */
2097- buf_frame_copy(new_page, page);
2098+ buf_frame_copy(temp_page, page);
2099
2100- if (!recovery) {
2101- btr_search_drop_page_hash_index(page);
2102+ if (UNIV_LIKELY(!recovery)) {
2103+ btr_search_drop_page_hash_index(block);
2104 }
2105
2106 /* Recreate the page: note that global data on page (possible
2107 segment headers, next page-field, etc.) is preserved intact */
2108
2109- page_create(page, mtr, page_is_comp(page));
2110- buf_block_align(page)->check_index_page_at_flush = TRUE;
2111+ page_create(block, mtr, dict_table_is_comp(index->table));
2112+ block->check_index_page_at_flush = TRUE;
2113
2114 /* Copy the records from the temporary space to the recreated page;
2115 do not copy the lock bits yet */
2116
2117- page_copy_rec_list_end_no_locks(page, new_page,
2118- page_get_infimum_rec(new_page),
2119+ page_copy_rec_list_end_no_locks(block, temp_block,
2120+ page_get_infimum_rec(temp_page),
2121 index, mtr);
2122 /* Copy max trx id to recreated page */
2123- page_set_max_trx_id(page, page_get_max_trx_id(new_page));
2124-
2125- if (!recovery) {
2126+ page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
2127+
2128+ if (UNIV_LIKELY_NULL(page_zip)
2129+ && UNIV_UNLIKELY
2130+ (!page_zip_compress(page_zip, page, index, NULL))) {
2131+
2132+ /* Restore the old page and exit. */
2133+ buf_frame_copy(page, temp_page);
2134+
2135+ goto func_exit;
2136+ }
2137+
2138+ if (UNIV_LIKELY(!recovery)) {
2139 /* Update the record lock bitmaps */
2140- lock_move_reorganize_page(page, new_page);
2141+ lock_move_reorganize_page(block, temp_block);
2142 }
2143
2144 data_size2 = page_get_data_size(page);
2145 max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
2146
2147- if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
2148- buf_page_print(page);
2149- buf_page_print(new_page);
2150+ if (UNIV_UNLIKELY(data_size1 != data_size2)
2151+ || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
2152+ buf_page_print(page, 0);
2153+ buf_page_print(temp_page, 0);
2154 fprintf(stderr,
2155 "InnoDB: Error: page old data size %lu"
2156 " new data size %lu\n"
2157@@ -898,30 +1021,43 @@
2158 (unsigned long) data_size1, (unsigned long) data_size2,
2159 (unsigned long) max_ins_size1,
2160 (unsigned long) max_ins_size2);
2161+ } else {
2162+ success = TRUE;
2163 }
2164
2165- buf_frame_free(new_page);
2166+func_exit:
2167+#ifdef UNIV_ZIP_DEBUG
2168+ ut_a(!page_zip || page_zip_validate(page_zip, page));
2169+#endif /* UNIV_ZIP_DEBUG */
2170+ buf_block_free(temp_block);
2171
2172 /* Restore logging mode */
2173 mtr_set_log_mode(mtr, log_mode);
2174+
2175+ return(success);
2176 }
2177
2178 /*****************************************************************
2179-Reorganizes an index page. */
2180-
2181-void
2182+Reorganizes an index page.
2183+IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
2184+page of a non-clustered index, the caller must update the insert
2185+buffer free bits in the same mini-transaction in such a way that the
2186+modification will be redo-logged. */
2187+UNIV_INTERN
2188+ibool
2189 btr_page_reorganize(
2190 /*================*/
2191- page_t* page, /* in: page to be reorganized */
2192+ /* out: TRUE on success, FALSE on failure */
2193+ buf_block_t* block, /* in: page to be reorganized */
2194 dict_index_t* index, /* in: record descriptor */
2195 mtr_t* mtr) /* in: mtr */
2196 {
2197- btr_page_reorganize_low(FALSE, page, index, mtr);
2198+ return(btr_page_reorganize_low(FALSE, block, index, mtr));
2199 }
2200
2201 /***************************************************************
2202 Parses a redo log record of reorganizing a page. */
2203-
2204+UNIV_INTERN
2205 byte*
2206 btr_parse_page_reorganize(
2207 /*======================*/
2208@@ -930,38 +1066,53 @@
2209 byte* end_ptr __attribute__((unused)),
2210 /* in: buffer end */
2211 dict_index_t* index, /* in: record descriptor */
2212- page_t* page, /* in: page or NULL */
2213+ buf_block_t* block, /* in: page to be reorganized, or NULL */
2214 mtr_t* mtr) /* in: mtr or NULL */
2215 {
2216 ut_ad(ptr && end_ptr);
2217
2218 /* The record is empty, except for the record initial part */
2219
2220- if (page) {
2221- btr_page_reorganize_low(TRUE, page, index, mtr);
2222+ if (UNIV_LIKELY(block != NULL)) {
2223+ btr_page_reorganize_low(TRUE, block, index, mtr);
2224 }
2225
2226 return(ptr);
2227 }
2228
2229 /*****************************************************************
2230-Empties an index page. */
2231+Empties an index page. @see btr_page_create().*/
2232 static
2233 void
2234 btr_page_empty(
2235 /*===========*/
2236- page_t* page, /* in: page to be emptied */
2237- mtr_t* mtr) /* in: mtr */
2238+ buf_block_t* block, /* in: page to be emptied */
2239+ page_zip_des_t* page_zip,/* out: compressed page, or NULL */
2240+ dict_index_t* index, /* in: index of the page */
2241+ ulint level, /* in: the B-tree level of the page */
2242+ mtr_t* mtr) /* in: mtr */
2243 {
2244- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
2245- MTR_MEMO_PAGE_X_FIX));
2246- btr_search_drop_page_hash_index(page);
2247+ page_t* page = buf_block_get_frame(block);
2248+
2249+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2250+ ut_ad(page_zip == buf_block_get_page_zip(block));
2251+#ifdef UNIV_ZIP_DEBUG
2252+ ut_a(!page_zip || page_zip_validate(page_zip, page));
2253+#endif /* UNIV_ZIP_DEBUG */
2254+
2255+ btr_search_drop_page_hash_index(block);
2256
2257 /* Recreate the page: note that global data on page (possible
2258 segment headers, next page-field, etc.) is preserved intact */
2259
2260- page_create(page, mtr, page_is_comp(page));
2261- buf_block_align(page)->check_index_page_at_flush = TRUE;
2262+ if (UNIV_LIKELY_NULL(page_zip)) {
2263+ page_create_zip(block, index, level, mtr);
2264+ } else {
2265+ page_create(block, mtr, dict_table_is_comp(index->table));
2266+ btr_page_set_level(page, NULL, level, mtr);
2267+ }
2268+
2269+ block->check_index_page_at_flush = TRUE;
2270 }
2271
2272 /*****************************************************************
2273@@ -970,7 +1121,7 @@
2274 NOTE that the operation of this function must always succeed,
2275 we cannot reverse it: therefore enough free disk space must be
2276 guaranteed to be available before this function is called. */
2277-
2278+UNIV_INTERN
2279 rec_t*
2280 btr_root_raise_and_insert(
2281 /*======================*/
2282@@ -979,7 +1130,8 @@
2283 on the root page; when the function returns,
2284 the cursor is positioned on the predecessor
2285 of the inserted record */
2286- dtuple_t* tuple, /* in: tuple to insert */
2287+ const dtuple_t* tuple, /* in: tuple to insert */
2288+ ulint n_ext, /* in: number of externally stored columns */
2289 mtr_t* mtr) /* in: mtr */
2290 {
2291 dict_index_t* index;
2292@@ -992,77 +1144,127 @@
2293 ulint level;
2294 rec_t* node_ptr_rec;
2295 page_cur_t* page_cursor;
2296+ page_zip_des_t* root_page_zip;
2297+ page_zip_des_t* new_page_zip;
2298+ buf_block_t* root_block;
2299+ buf_block_t* new_block;
2300
2301 root = btr_cur_get_page(cursor);
2302+ root_block = btr_cur_get_block(cursor);
2303+ root_page_zip = buf_block_get_page_zip(root_block);
2304+#ifdef UNIV_ZIP_DEBUG
2305+ ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
2306+#endif /* UNIV_ZIP_DEBUG */
2307 index = btr_cur_get_index(cursor);
2308-
2309- ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root));
2310+#ifdef UNIV_BTR_DEBUG
2311+ if (!dict_index_is_ibuf(index)) {
2312+ ulint space = dict_index_get_space(index);
2313+
2314+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
2315+ + root, space));
2316+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
2317+ + root, space));
2318+ }
2319+
2320+ ut_a(dict_index_get_page(index) == page_get_page_no(root));
2321+#endif /* UNIV_BTR_DEBUG */
2322 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2323 MTR_MEMO_X_LOCK));
2324- ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
2325- MTR_MEMO_PAGE_X_FIX));
2326- btr_search_drop_page_hash_index(root);
2327+ ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
2328
2329 /* Allocate a new page to the tree. Root splitting is done by first
2330 moving the root records to the new page, emptying the root, putting
2331 a node pointer to the new page, and then splitting the new page. */
2332
2333- new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
2334- btr_page_get_level(root, mtr), mtr);
2335-
2336- btr_page_create(new_page, index, mtr);
2337-
2338 level = btr_page_get_level(root, mtr);
2339
2340- /* Set the levels of the new index page and root page */
2341- btr_page_set_level(new_page, level, mtr);
2342- btr_page_set_level(root, level + 1, mtr);
2343+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
2344+ new_page = buf_block_get_frame(new_block);
2345+ new_page_zip = buf_block_get_page_zip(new_block);
2346+ ut_a(!new_page_zip == !root_page_zip);
2347+ ut_a(!new_page_zip
2348+ || page_zip_get_size(new_page_zip)
2349+ == page_zip_get_size(root_page_zip));
2350+
2351+ btr_page_create(new_block, new_page_zip, index, level, mtr);
2352
2353 /* Set the next node and previous node fields of new page */
2354- btr_page_set_next(new_page, FIL_NULL, mtr);
2355- btr_page_set_prev(new_page, FIL_NULL, mtr);
2356-
2357- /* Move the records from root to the new page */
2358-
2359- page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
2360- index, mtr);
2361+ btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
2362+ btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
2363+
2364+ /* Copy the records from root to the new page one by one. */
2365+
2366+ if (0
2367+#ifdef UNIV_ZIP_COPY
2368+ || new_page_zip
2369+#endif /* UNIV_ZIP_COPY */
2370+ || UNIV_UNLIKELY
2371+ (!page_copy_rec_list_end(new_block, root_block,
2372+ page_get_infimum_rec(root),
2373+ index, mtr))) {
2374+ ut_a(new_page_zip);
2375+
2376+ /* Copy the page byte for byte. */
2377+ page_zip_copy_recs(new_page_zip, new_page,
2378+ root_page_zip, root, index, mtr);
2379+
2380+ /* Update the lock table and possible hash index. */
2381+
2382+ lock_move_rec_list_end(new_block, root_block,
2383+ page_get_infimum_rec(root));
2384+
2385+ btr_search_move_or_delete_hash_entries(new_block, root_block,
2386+ index);
2387+ }
2388+
2389 /* If this is a pessimistic insert which is actually done to
2390 perform a pessimistic update then we have stored the lock
2391 information of the record to be inserted on the infimum of the
2392 root page: we cannot discard the lock structs on the root page */
2393
2394- lock_update_root_raise(new_page, root);
2395+ lock_update_root_raise(new_block, root_block);
2396
2397 /* Create a memory heap where the node pointer is stored */
2398 heap = mem_heap_create(100);
2399
2400 rec = page_rec_get_next(page_get_infimum_rec(new_page));
2401- new_page_no = buf_frame_get_page_no(new_page);
2402+ new_page_no = buf_block_get_page_no(new_block);
2403
2404 /* Build the node pointer (= node key and page address) for the
2405 child */
2406
2407 node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
2408 level);
2409- /* Reorganize the root to get free space */
2410- btr_page_reorganize(root, index, mtr);
2411-
2412- page_cursor = btr_cur_get_page_cur(cursor);
2413-
2414- /* Insert node pointer to the root */
2415-
2416- page_cur_set_before_first(root, page_cursor);
2417-
2418- node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
2419- index, mtr);
2420-
2421- ut_ad(node_ptr_rec);
2422-
2423 /* The node pointer must be marked as the predefined minimum record,
2424 as there is no lower alphabetical limit to records in the leftmost
2425 node of a level: */
2426-
2427- btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr);
2428+ dtuple_set_info_bits(node_ptr,
2429+ dtuple_get_info_bits(node_ptr)
2430+ | REC_INFO_MIN_REC_FLAG);
2431+
2432+ /* Rebuild the root page to get free space */
2433+ btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
2434+
2435+ /* Set the next node and previous node fields, although
2436+ they should already have been set. The previous node field
2437+ must be FIL_NULL if root_page_zip != NULL, because the
2438+ REC_INFO_MIN_REC_FLAG (of the first user record) will be
2439+ set if and only if btr_page_get_prev() == FIL_NULL. */
2440+ btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
2441+ btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
2442+
2443+ page_cursor = btr_cur_get_page_cur(cursor);
2444+
2445+ /* Insert node pointer to the root */
2446+
2447+ page_cur_set_before_first(root_block, page_cursor);
2448+
2449+ node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
2450+ index, 0, mtr);
2451+
2452+ /* The root page should only contain the node pointer
2453+ to new_page at this point. Thus, the data should fit. */
2454+ ut_a(node_ptr_rec);
2455
2456 /* Free the memory heap */
2457 mem_heap_free(heap);
2458@@ -1070,23 +1272,25 @@
2459 /* We play safe and reset the free bits for the new page */
2460
2461 #if 0
2462- fprintf(stderr, "Root raise new page no %lu\n",
2463- buf_frame_get_page_no(new_page));
2464+ fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
2465 #endif
2466
2467- ibuf_reset_free_bits(index, new_page);
2468+ if (!dict_index_is_clust(index)) {
2469+ ibuf_reset_free_bits(new_block);
2470+ }
2471+
2472 /* Reposition the cursor to the child node */
2473- page_cur_search(new_page, index, tuple,
2474+ page_cur_search(new_block, index, tuple,
2475 PAGE_CUR_LE, page_cursor);
2476
2477 /* Split the child and insert tuple */
2478- return(btr_page_split_and_insert(cursor, tuple, mtr));
2479+ return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
2480 }
2481
2482 /*****************************************************************
2483 Decides if the page should be split at the convergence point of inserts
2484 converging to the left. */
2485-
2486+UNIV_INTERN
2487 ibool
2488 btr_page_get_split_rec_to_left(
2489 /*===========================*/
2490@@ -1131,7 +1335,7 @@
2491 /*****************************************************************
2492 Decides if the page should be split at the convergence point of inserts
2493 converging to the right. */
2494-
2495+UNIV_INTERN
2496 ibool
2497 btr_page_get_split_rec_to_right(
2498 /*============================*/
2499@@ -1194,32 +1398,44 @@
2500 rec_t*
2501 btr_page_get_sure_split_rec(
2502 /*========================*/
2503- /* out: split record, or NULL if
2504- tuple will be the first record on
2505- upper half-page */
2506- btr_cur_t* cursor, /* in: cursor at which insert
2507- should be made */
2508- dtuple_t* tuple) /* in: tuple to insert */
2509+ /* out: split record, or NULL if tuple
2510+ will be the first record on upper half-page */
2511+ btr_cur_t* cursor, /* in: cursor at which insert should be made */
2512+ const dtuple_t* tuple, /* in: tuple to insert */
2513+ ulint n_ext) /* in: number of externally stored columns */
2514 {
2515- page_t* page;
2516- ulint insert_size;
2517- ulint free_space;
2518- ulint total_data;
2519- ulint total_n_recs;
2520- ulint total_space;
2521- ulint incl_data;
2522- rec_t* ins_rec;
2523- rec_t* rec;
2524- rec_t* next_rec;
2525- ulint n;
2526- mem_heap_t* heap;
2527- ulint* offsets;
2528+ page_t* page;
2529+ page_zip_des_t* page_zip;
2530+ ulint insert_size;
2531+ ulint free_space;
2532+ ulint total_data;
2533+ ulint total_n_recs;
2534+ ulint total_space;
2535+ ulint incl_data;
2536+ rec_t* ins_rec;
2537+ rec_t* rec;
2538+ rec_t* next_rec;
2539+ ulint n;
2540+ mem_heap_t* heap;
2541+ ulint* offsets;
2542
2543 page = btr_cur_get_page(cursor);
2544
2545- insert_size = rec_get_converted_size(cursor->index, tuple);
2546+ insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2547 free_space = page_get_free_space_of_empty(page_is_comp(page));
2548
2549+ page_zip = btr_cur_get_page_zip(cursor);
2550+ if (UNIV_LIKELY_NULL(page_zip)) {
2551+ /* Estimate the free space of an empty compressed page. */
2552+ ulint free_space_zip = page_zip_empty_size(
2553+ cursor->index->n_fields,
2554+ page_zip_get_size(page_zip));
2555+
2556+ if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
2557+ free_space = (ulint) free_space_zip;
2558+ }
2559+ }
2560+
2561 /* free_space is now the free space of a created new page */
2562
2563 total_data = page_get_data_size(page) + insert_size;
2564@@ -1242,7 +1458,7 @@
2565 otherwise the last included record will be the first on the right
2566 half page */
2567
2568- for (;;) {
2569+ do {
2570 /* Decide the next record to include */
2571 if (rec == ins_rec) {
2572 rec = NULL; /* NULL denotes that tuple is
2573@@ -1264,38 +1480,34 @@
2574 }
2575
2576 n++;
2577-
2578- if (incl_data + page_dir_calc_reserved_space(n)
2579- >= total_space / 2) {
2580-
2581- if (incl_data + page_dir_calc_reserved_space(n)
2582- <= free_space) {
2583- /* The next record will be the first on
2584- the right half page if it is not the
2585- supremum record of page */
2586-
2587- if (rec == ins_rec) {
2588- rec = NULL;
2589-
2590- goto func_exit;
2591- } else if (rec == NULL) {
2592- next_rec = page_rec_get_next(ins_rec);
2593- } else {
2594- next_rec = page_rec_get_next(rec);
2595- }
2596- ut_ad(next_rec);
2597- if (!page_rec_is_supremum(next_rec)) {
2598- rec = next_rec;
2599- }
2600- }
2601+ } while (incl_data + page_dir_calc_reserved_space(n)
2602+ < total_space / 2);
2603+
2604+ if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
2605+ /* The next record will be the first on
2606+ the right half page if it is not the
2607+ supremum record of page */
2608+
2609+ if (rec == ins_rec) {
2610+ rec = NULL;
2611+
2612+ goto func_exit;
2613+ } else if (rec == NULL) {
2614+ next_rec = page_rec_get_next(ins_rec);
2615+ } else {
2616+ next_rec = page_rec_get_next(rec);
2617+ }
2618+ ut_ad(next_rec);
2619+ if (!page_rec_is_supremum(next_rec)) {
2620+ rec = next_rec;
2621+ }
2622+ }
2623
2624 func_exit:
2625- if (UNIV_LIKELY_NULL(heap)) {
2626- mem_heap_free(heap);
2627- }
2628- return(rec);
2629- }
2630+ if (UNIV_LIKELY_NULL(heap)) {
2631+ mem_heap_free(heap);
2632 }
2633+ return(rec);
2634 }
2635
2636 /*****************************************************************
2637@@ -1305,25 +1517,26 @@
2638 ibool
2639 btr_page_insert_fits(
2640 /*=================*/
2641- /* out: TRUE if fits */
2642- btr_cur_t* cursor, /* in: cursor at which insert
2643- should be made */
2644- rec_t* split_rec, /* in: suggestion for first record
2645- on upper half-page, or NULL if
2646- tuple to be inserted should be first */
2647- const ulint* offsets, /* in: rec_get_offsets(
2648- split_rec, cursor->index) */
2649- dtuple_t* tuple, /* in: tuple to insert */
2650- mem_heap_t* heap) /* in: temporary memory heap */
2651+ /* out: TRUE if fits */
2652+ btr_cur_t* cursor, /* in: cursor at which insert
2653+ should be made */
2654+ const rec_t* split_rec,/* in: suggestion for first record
2655+ on upper half-page, or NULL if
2656+ tuple to be inserted should be first */
2657+ const ulint* offsets,/* in: rec_get_offsets(
2658+ split_rec, cursor->index) */
2659+ const dtuple_t* tuple, /* in: tuple to insert */
2660+ ulint n_ext, /* in: number of externally stored columns */
2661+ mem_heap_t* heap) /* in: temporary memory heap */
2662 {
2663- page_t* page;
2664- ulint insert_size;
2665- ulint free_space;
2666- ulint total_data;
2667- ulint total_n_recs;
2668- rec_t* rec;
2669- rec_t* end_rec;
2670- ulint* offs;
2671+ page_t* page;
2672+ ulint insert_size;
2673+ ulint free_space;
2674+ ulint total_data;
2675+ ulint total_n_recs;
2676+ const rec_t* rec;
2677+ const rec_t* end_rec;
2678+ ulint* offs;
2679
2680 page = btr_cur_get_page(cursor);
2681
2682@@ -1333,7 +1546,7 @@
2683 ut_ad(!offsets
2684 || rec_offs_validate(split_rec, cursor->index, offsets));
2685
2686- insert_size = rec_get_converted_size(cursor->index, tuple);
2687+ insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2688 free_space = page_get_free_space_of_empty(page_is_comp(page));
2689
2690 /* free_space is now the free space of a created new page */
2691@@ -1388,7 +1601,7 @@
2692 return(TRUE);
2693 }
2694
2695- rec = page_rec_get_next(rec);
2696+ rec = page_rec_get_next_const(rec);
2697 }
2698
2699 return(FALSE);
2700@@ -1397,7 +1610,7 @@
2701 /***********************************************************
2702 Inserts a data tuple to a tree on a non-leaf level. It is assumed
2703 that mtr holds an x-latch on the tree. */
2704-
2705+UNIV_INTERN
2706 void
2707 btr_insert_on_non_leaf_level(
2708 /*=========================*/
2709@@ -1421,7 +1634,7 @@
2710 | BTR_KEEP_SYS_FLAG
2711 | BTR_NO_UNDO_LOG_FLAG,
2712 &cursor, tuple, &rec,
2713- &dummy_big_rec, NULL, mtr);
2714+ &dummy_big_rec, 0, NULL, mtr);
2715 ut_a(err == DB_SUCCESS);
2716 }
2717
2718@@ -1433,32 +1646,30 @@
2719 btr_attach_half_pages(
2720 /*==================*/
2721 dict_index_t* index, /* in: the index tree */
2722- page_t* page, /* in: page to be split */
2723+ buf_block_t* block, /* in/out: page to be split */
2724 rec_t* split_rec, /* in: first record on upper
2725 half page */
2726- page_t* new_page, /* in: the new half page */
2727+ buf_block_t* new_block, /* in/out: the new half page */
2728 ulint direction, /* in: FSP_UP or FSP_DOWN */
2729 mtr_t* mtr) /* in: mtr */
2730 {
2731 ulint space;
2732- rec_t* node_ptr;
2733- page_t* prev_page;
2734- page_t* next_page;
2735+ ulint zip_size;
2736 ulint prev_page_no;
2737 ulint next_page_no;
2738 ulint level;
2739+ page_t* page = buf_block_get_frame(block);
2740 page_t* lower_page;
2741 page_t* upper_page;
2742 ulint lower_page_no;
2743 ulint upper_page_no;
2744+ page_zip_des_t* lower_page_zip;
2745+ page_zip_des_t* upper_page_zip;
2746 dtuple_t* node_ptr_upper;
2747 mem_heap_t* heap;
2748
2749- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
2750- MTR_MEMO_PAGE_X_FIX));
2751- ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
2752- MTR_MEMO_PAGE_X_FIX));
2753- ut_a(page_is_comp(page) == page_is_comp(new_page));
2754+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2755+ ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
2756
2757 /* Create a memory heap where the data tuple is stored */
2758 heap = mem_heap_create(1024);
2759@@ -1466,33 +1677,41 @@
2760 /* Based on split direction, decide upper and lower pages */
2761 if (direction == FSP_DOWN) {
2762
2763- lower_page_no = buf_frame_get_page_no(new_page);
2764- upper_page_no = buf_frame_get_page_no(page);
2765- lower_page = new_page;
2766- upper_page = page;
2767+ btr_cur_t cursor;
2768+ ulint* offsets;
2769+
2770+ lower_page = buf_block_get_frame(new_block);
2771+ lower_page_no = buf_block_get_page_no(new_block);
2772+ lower_page_zip = buf_block_get_page_zip(new_block);
2773+ upper_page = buf_block_get_frame(block);
2774+ upper_page_no = buf_block_get_page_no(block);
2775+ upper_page_zip = buf_block_get_page_zip(block);
2776
2777 /* Look up the index for the node pointer to page */
2778- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
2779+ offsets = btr_page_get_father_block(NULL, heap, index,
2780+ block, mtr, &cursor);
2781
2782 /* Replace the address of the old child node (= page) with the
2783 address of the new lower half */
2784
2785- btr_node_ptr_set_child_page_no(node_ptr,
2786- rec_get_offsets(
2787- node_ptr, index,
2788- NULL, ULINT_UNDEFINED,
2789- &heap),
2790- lower_page_no, mtr);
2791+ btr_node_ptr_set_child_page_no(
2792+ btr_cur_get_rec(&cursor),
2793+ btr_cur_get_page_zip(&cursor),
2794+ offsets, lower_page_no, mtr);
2795 mem_heap_empty(heap);
2796 } else {
2797- lower_page_no = buf_frame_get_page_no(page);
2798- upper_page_no = buf_frame_get_page_no(new_page);
2799- lower_page = page;
2800- upper_page = new_page;
2801+ lower_page = buf_block_get_frame(block);
2802+ lower_page_no = buf_block_get_page_no(block);
2803+ lower_page_zip = buf_block_get_page_zip(block);
2804+ upper_page = buf_block_get_frame(new_block);
2805+ upper_page_no = buf_block_get_page_no(new_block);
2806+ upper_page_zip = buf_block_get_page_zip(new_block);
2807 }
2808
2809 /* Get the level of the split pages */
2810- level = btr_page_get_level(page, mtr);
2811+ level = btr_page_get_level(buf_block_get_frame(block), mtr);
2812+ ut_ad(level
2813+ == btr_page_get_level(buf_block_get_frame(new_block), mtr));
2814
2815 /* Build the node pointer (= node key and page address) for the upper
2816 half */
2817@@ -1512,37 +1731,46 @@
2818
2819 prev_page_no = btr_page_get_prev(page, mtr);
2820 next_page_no = btr_page_get_next(page, mtr);
2821- space = buf_frame_get_space_id(page);
2822+ space = buf_block_get_space(block);
2823+ zip_size = buf_block_get_zip_size(block);
2824
2825 /* Update page links of the level */
2826
2827 if (prev_page_no != FIL_NULL) {
2828-
2829- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
2830- ut_a(page_is_comp(prev_page) == page_is_comp(page));
2831+ buf_block_t* prev_block = btr_block_get(space, zip_size,
2832+ prev_page_no,
2833+ RW_X_LATCH, mtr);
2834 #ifdef UNIV_BTR_DEBUG
2835- ut_a(btr_page_get_next(prev_page, mtr)
2836- == buf_frame_get_page_no(page));
2837+ ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
2838+ ut_a(btr_page_get_next(prev_block->frame, mtr)
2839+ == buf_block_get_page_no(block));
2840 #endif /* UNIV_BTR_DEBUG */
2841
2842- btr_page_set_next(prev_page, lower_page_no, mtr);
2843+ btr_page_set_next(buf_block_get_frame(prev_block),
2844+ buf_block_get_page_zip(prev_block),
2845+ lower_page_no, mtr);
2846 }
2847
2848 if (next_page_no != FIL_NULL) {
2849-
2850- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
2851- ut_a(page_is_comp(next_page) == page_is_comp(page));
2852-
2853- btr_page_set_prev(next_page, upper_page_no, mtr);
2854+ buf_block_t* next_block = btr_block_get(space, zip_size,
2855+ next_page_no,
2856+ RW_X_LATCH, mtr);
2857+#ifdef UNIV_BTR_DEBUG
2858+ ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
2859+ ut_a(btr_page_get_prev(next_block->frame, mtr)
2860+ == page_get_page_no(page));
2861+#endif /* UNIV_BTR_DEBUG */
2862+
2863+ btr_page_set_prev(buf_block_get_frame(next_block),
2864+ buf_block_get_page_zip(next_block),
2865+ upper_page_no, mtr);
2866 }
2867
2868- btr_page_set_prev(lower_page, prev_page_no, mtr);
2869- btr_page_set_next(lower_page, upper_page_no, mtr);
2870- btr_page_set_level(lower_page, level, mtr);
2871+ btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
2872+ btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
2873
2874- btr_page_set_prev(upper_page, lower_page_no, mtr);
2875- btr_page_set_next(upper_page, next_page_no, mtr);
2876- btr_page_set_level(upper_page, level, mtr);
2877+ btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
2878+ btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
2879 }
2880
2881 /*****************************************************************
2882@@ -1552,7 +1780,7 @@
2883 function must always succeed, we cannot reverse it: therefore
2884 enough free disk space must be guaranteed to be available before
2885 this function is called. */
2886-
2887+UNIV_INTERN
2888 rec_t*
2889 btr_page_split_and_insert(
2890 /*======================*/
2891@@ -1562,23 +1790,30 @@
2892 btr_cur_t* cursor, /* in: cursor at which to insert; when the
2893 function returns, the cursor is positioned
2894 on the predecessor of the inserted record */
2895- dtuple_t* tuple, /* in: tuple to insert */
2896+ const dtuple_t* tuple, /* in: tuple to insert */
2897+ ulint n_ext, /* in: number of externally stored columns */
2898 mtr_t* mtr) /* in: mtr */
2899 {
2900+ buf_block_t* block;
2901 page_t* page;
2902+ page_zip_des_t* page_zip;
2903 ulint page_no;
2904 byte direction;
2905 ulint hint_page_no;
2906+ buf_block_t* new_block;
2907 page_t* new_page;
2908+ page_zip_des_t* new_page_zip;
2909 rec_t* split_rec;
2910- page_t* left_page;
2911- page_t* right_page;
2912+ buf_block_t* left_block;
2913+ buf_block_t* right_block;
2914+ buf_block_t* insert_block;
2915 page_t* insert_page;
2916 page_cur_t* page_cursor;
2917 rec_t* first_rec;
2918 byte* buf = 0; /* remove warning */
2919 rec_t* move_limit;
2920 ibool insert_will_fit;
2921+ ibool insert_left;
2922 ulint n_iterations = 0;
2923 rec_t* rec;
2924 mem_heap_t* heap;
2925@@ -1597,13 +1832,14 @@
2926 ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
2927 #endif /* UNIV_SYNC_DEBUG */
2928
2929- page = btr_cur_get_page(cursor);
2930-
2931- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
2932- MTR_MEMO_PAGE_X_FIX));
2933- ut_ad(page_get_n_recs(page) >= 2);
2934-
2935- page_no = buf_frame_get_page_no(page);
2936+ block = btr_cur_get_block(cursor);
2937+ page = buf_block_get_frame(block);
2938+ page_zip = buf_block_get_page_zip(block);
2939+
2940+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2941+ ut_ad(page_get_n_recs(page) >= 1);
2942+
2943+ page_no = buf_block_get_page_no(block);
2944
2945 /* 1. Decide the split record; split_rec == NULL means that the
2946 tuple to be inserted should be the first record on the upper
2947@@ -1612,7 +1848,7 @@
2948 if (n_iterations > 0) {
2949 direction = FSP_UP;
2950 hint_page_no = page_no + 1;
2951- split_rec = btr_page_get_sure_split_rec(cursor, tuple);
2952+ split_rec = btr_page_get_sure_split_rec(cursor, tuple, n_ext);
2953
2954 } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
2955 direction = FSP_UP;
2956@@ -1624,37 +1860,80 @@
2957 } else {
2958 direction = FSP_UP;
2959 hint_page_no = page_no + 1;
2960- split_rec = page_get_middle_rec(page);
2961+
2962+ if (page_get_n_recs(page) == 1) {
2963+ page_cur_t pcur;
2964+
2965+ /* There is only one record in the index page
2966+ therefore we can't split the node in the middle
2967+ by default. We need to determine whether the
2968+ new record will be inserted to the left or right. */
2969+
2970+ /* Read the first (and only) record in the page. */
2971+ page_cur_set_before_first(block, &pcur);
2972+ page_cur_move_to_next(&pcur);
2973+ first_rec = page_cur_get_rec(&pcur);
2974+
2975+ offsets = rec_get_offsets(
2976+ first_rec, cursor->index, offsets,
2977+ n_uniq, &heap);
2978+
2979+ /* If the new record is less than the existing record
2980+ the the split in the middle will copy the existing
2981+ record to the new node. */
2982+ if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
2983+ split_rec = page_get_middle_rec(page);
2984+ } else {
2985+ split_rec = NULL;
2986+ }
2987+ } else {
2988+ split_rec = page_get_middle_rec(page);
2989+ }
2990 }
2991
2992 /* 2. Allocate a new page to the index */
2993- new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
2994- btr_page_get_level(page, mtr), mtr);
2995- btr_page_create(new_page, cursor->index, mtr);
2996+ new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
2997+ btr_page_get_level(page, mtr), mtr);
2998+ new_page = buf_block_get_frame(new_block);
2999+ new_page_zip = buf_block_get_page_zip(new_block);
3000+ btr_page_create(new_block, new_page_zip, cursor->index,
3001+ btr_page_get_level(page, mtr), mtr);
3002
3003 /* 3. Calculate the first record on the upper half-page, and the
3004 first record (move_limit) on original page which ends up on the
3005 upper half */
3006
3007- if (split_rec != NULL) {
3008- first_rec = split_rec;
3009- move_limit = split_rec;
3010+ if (split_rec) {
3011+ first_rec = move_limit = split_rec;
3012+
3013+ offsets = rec_get_offsets(split_rec, cursor->index, offsets,
3014+ n_uniq, &heap);
3015+
3016+ insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
3017+
3018+ if (UNIV_UNLIKELY(!insert_left && new_page_zip
3019+ && n_iterations > 0)) {
3020+ /* If a compressed page has already been split,
3021+ avoid further splits by inserting the record
3022+ to an empty page. */
3023+ split_rec = NULL;
3024+ goto insert_right;
3025+ }
3026 } else {
3027- buf = mem_alloc(rec_get_converted_size(cursor->index, tuple));
3028+insert_right:
3029+ insert_left = FALSE;
3030+ buf = mem_alloc(rec_get_converted_size(cursor->index,
3031+ tuple, n_ext));
3032
3033- first_rec = rec_convert_dtuple_to_rec(buf,
3034- cursor->index, tuple);
3035+ first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
3036+ tuple, n_ext);
3037 move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
3038 }
3039
3040 /* 4. Do first the modifications in the tree structure */
3041
3042- btr_attach_half_pages(cursor->index, page, first_rec,
3043- new_page, direction, mtr);
3044-
3045- if (split_rec == NULL) {
3046- mem_free(buf);
3047- }
3048+ btr_attach_half_pages(cursor->index, block,
3049+ first_rec, new_block, direction, mtr);
3050
3051 /* If the split is made on the leaf level and the insert will fit
3052 on the appropriate half-page, we may release the tree x-latch.
3053@@ -1662,19 +1941,17 @@
3054 thus reducing the tree latch contention. */
3055
3056 if (split_rec) {
3057- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
3058- n_uniq, &heap);
3059-
3060- insert_will_fit = btr_page_insert_fits(cursor,
3061- split_rec, offsets,
3062- tuple, heap);
3063+ insert_will_fit = !new_page_zip
3064+ && btr_page_insert_fits(cursor, split_rec,
3065+ offsets, tuple, n_ext, heap);
3066 } else {
3067- insert_will_fit = btr_page_insert_fits(cursor,
3068- NULL, NULL,
3069- tuple, heap);
3070+ mem_free(buf);
3071+ insert_will_fit = !new_page_zip
3072+ && btr_page_insert_fits(cursor, NULL,
3073+ NULL, tuple, n_ext, heap);
3074 }
3075
3076- if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
3077+ if (insert_will_fit && page_is_leaf(page)) {
3078
3079 mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
3080 MTR_MEMO_X_LOCK);
3081@@ -1684,100 +1961,184 @@
3082 if (direction == FSP_DOWN) {
3083 /* fputs("Split left\n", stderr); */
3084
3085- page_move_rec_list_start(new_page, page, move_limit,
3086- cursor->index, mtr);
3087- left_page = new_page;
3088- right_page = page;
3089-
3090- lock_update_split_left(right_page, left_page);
3091+ if (0
3092+#ifdef UNIV_ZIP_COPY
3093+ || page_zip
3094+#endif /* UNIV_ZIP_COPY */
3095+ || UNIV_UNLIKELY
3096+ (!page_move_rec_list_start(new_block, block, move_limit,
3097+ cursor->index, mtr))) {
3098+ /* For some reason, compressing new_page failed,
3099+ even though it should contain fewer records than
3100+ the original page. Copy the page byte for byte
3101+ and then delete the records from both pages
3102+ as appropriate. Deleting will always succeed. */
3103+ ut_a(new_page_zip);
3104+
3105+ page_zip_copy_recs(new_page_zip, new_page,
3106+ page_zip, page, cursor->index, mtr);
3107+ page_delete_rec_list_end(move_limit - page + new_page,
3108+ new_block, cursor->index,
3109+ ULINT_UNDEFINED,
3110+ ULINT_UNDEFINED, mtr);
3111+
3112+ /* Update the lock table and possible hash index. */
3113+
3114+ lock_move_rec_list_start(
3115+ new_block, block, move_limit,
3116+ new_page + PAGE_NEW_INFIMUM);
3117+
3118+ btr_search_move_or_delete_hash_entries(
3119+ new_block, block, cursor->index);
3120+
3121+ /* Delete the records from the source page. */
3122+
3123+ page_delete_rec_list_start(move_limit, block,
3124+ cursor->index, mtr);
3125+ }
3126+
3127+ left_block = new_block;
3128+ right_block = block;
3129+
3130+ lock_update_split_left(right_block, left_block);
3131 } else {
3132 /* fputs("Split right\n", stderr); */
3133
3134- page_move_rec_list_end(new_page, page, move_limit,
3135- cursor->index, mtr);
3136- left_page = page;
3137- right_page = new_page;
3138-
3139- lock_update_split_right(right_page, left_page);
3140- }
3141+ if (0
3142+#ifdef UNIV_ZIP_COPY
3143+ || page_zip
3144+#endif /* UNIV_ZIP_COPY */
3145+ || UNIV_UNLIKELY
3146+ (!page_move_rec_list_end(new_block, block, move_limit,
3147+ cursor->index, mtr))) {
3148+ /* For some reason, compressing new_page failed,
3149+ even though it should contain fewer records than
3150+ the original page. Copy the page byte for byte
3151+ and then delete the records from both pages
3152+ as appropriate. Deleting will always succeed. */
3153+ ut_a(new_page_zip);
3154+
3155+ page_zip_copy_recs(new_page_zip, new_page,
3156+ page_zip, page, cursor->index, mtr);
3157+ page_delete_rec_list_start(move_limit - page
3158+ + new_page, new_block,
3159+ cursor->index, mtr);
3160+
3161+ /* Update the lock table and possible hash index. */
3162+
3163+ lock_move_rec_list_end(new_block, block, move_limit);
3164+
3165+ btr_search_move_or_delete_hash_entries(
3166+ new_block, block, cursor->index);
3167+
3168+ /* Delete the records from the source page. */
3169+
3170+ page_delete_rec_list_end(move_limit, block,
3171+ cursor->index,
3172+ ULINT_UNDEFINED,
3173+ ULINT_UNDEFINED, mtr);
3174+ }
3175+
3176+ left_block = block;
3177+ right_block = new_block;
3178+
3179+ lock_update_split_right(right_block, left_block);
3180+ }
3181+
3182+#ifdef UNIV_ZIP_DEBUG
3183+ if (UNIV_LIKELY_NULL(page_zip)) {
3184+ ut_a(page_zip_validate(page_zip, page));
3185+ ut_a(page_zip_validate(new_page_zip, new_page));
3186+ }
3187+#endif /* UNIV_ZIP_DEBUG */
3188+
3189+ /* At this point, split_rec, move_limit and first_rec may point
3190+ to garbage on the old page. */
3191
3192 /* 6. The split and the tree modification is now completed. Decide the
3193 page where the tuple should be inserted */
3194
3195- if (split_rec == NULL) {
3196- insert_page = right_page;
3197-
3198+ if (insert_left) {
3199+ insert_block = left_block;
3200 } else {
3201- offsets = rec_get_offsets(first_rec, cursor->index,
3202- offsets, n_uniq, &heap);
3203-
3204- if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) {
3205-
3206- insert_page = right_page;
3207- } else {
3208- insert_page = left_page;
3209- }
3210+ insert_block = right_block;
3211 }
3212
3213+ insert_page = buf_block_get_frame(insert_block);
3214+
3215 /* 7. Reposition the cursor for insert and try insertion */
3216 page_cursor = btr_cur_get_page_cur(cursor);
3217
3218- page_cur_search(insert_page, cursor->index, tuple,
3219+ page_cur_search(insert_block, cursor->index, tuple,
3220 PAGE_CUR_LE, page_cursor);
3221
3222- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
3223-
3224- if (rec != NULL) {
3225- /* Insert fit on the page: update the free bits for the
3226- left and right pages in the same mtr */
3227-
3228- ibuf_update_free_bits_for_two_pages_low(cursor->index,
3229- left_page,
3230- right_page, mtr);
3231- /* fprintf(stderr, "Split and insert done %lu %lu\n",
3232- buf_frame_get_page_no(left_page),
3233- buf_frame_get_page_no(right_page)); */
3234- mem_heap_free(heap);
3235- return(rec);
3236+ rec = page_cur_tuple_insert(page_cursor, tuple,
3237+ cursor->index, n_ext, mtr);
3238+
3239+#ifdef UNIV_ZIP_DEBUG
3240+ {
3241+ page_zip_des_t* insert_page_zip
3242+ = buf_block_get_page_zip(insert_block);
3243+ ut_a(!insert_page_zip
3244+ || page_zip_validate(insert_page_zip, insert_page));
3245+ }
3246+#endif /* UNIV_ZIP_DEBUG */
3247+
3248+ if (UNIV_LIKELY(rec != NULL)) {
3249+
3250+ goto func_exit;
3251 }
3252
3253 /* 8. If insert did not fit, try page reorganization */
3254
3255- btr_page_reorganize(insert_page, cursor->index, mtr);
3256-
3257- page_cur_search(insert_page, cursor->index, tuple,
3258+ if (UNIV_UNLIKELY
3259+ (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
3260+
3261+ goto insert_failed;
3262+ }
3263+
3264+ page_cur_search(insert_block, cursor->index, tuple,
3265 PAGE_CUR_LE, page_cursor);
3266- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
3267+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
3268+ n_ext, mtr);
3269
3270- if (rec == NULL) {
3271+ if (UNIV_UNLIKELY(rec == NULL)) {
3272 /* The insert did not fit on the page: loop back to the
3273 start of the function for a new split */
3274-
3275+insert_failed:
3276 /* We play safe and reset the free bits for new_page */
3277- ibuf_reset_free_bits(cursor->index, new_page);
3278+ if (!dict_index_is_clust(cursor->index)) {
3279+ ibuf_reset_free_bits(new_block);
3280+ }
3281
3282 /* fprintf(stderr, "Split second round %lu\n",
3283- buf_frame_get_page_no(page)); */
3284+ page_get_page_no(page)); */
3285 n_iterations++;
3286- ut_ad(n_iterations < 2);
3287+ ut_ad(n_iterations < 2
3288+ || buf_block_get_page_zip(insert_block));
3289 ut_ad(!insert_will_fit);
3290
3291 goto func_start;
3292 }
3293
3294+func_exit:
3295 /* Insert fit on the page: update the free bits for the
3296 left and right pages in the same mtr */
3297
3298- ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
3299- right_page, mtr);
3300+ if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
3301+ ibuf_update_free_bits_for_two_pages_low(
3302+ buf_block_get_zip_size(left_block),
3303+ left_block, right_block, mtr);
3304+ }
3305+
3306 #if 0
3307 fprintf(stderr, "Split and insert done %lu %lu\n",
3308- buf_frame_get_page_no(left_page),
3309- buf_frame_get_page_no(right_page));
3310+ buf_block_get_page_no(left_block),
3311+ buf_block_get_page_no(right_block));
3312 #endif
3313
3314- ut_ad(page_validate(left_page, cursor->index));
3315- ut_ad(page_validate(right_page, cursor->index));
3316+ ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
3317+ ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
3318
3319 mem_heap_free(heap);
3320 return(rec);
3321@@ -1789,48 +2150,57 @@
3322 void
3323 btr_level_list_remove(
3324 /*==================*/
3325+ ulint space, /* in: space where removed */
3326+ ulint zip_size,/* in: compressed page size in bytes
3327+ or 0 for uncompressed pages */
3328 page_t* page, /* in: page to remove */
3329 mtr_t* mtr) /* in: mtr */
3330 {
3331- ulint space;
3332 ulint prev_page_no;
3333- page_t* prev_page;
3334 ulint next_page_no;
3335- page_t* next_page;
3336
3337 ut_ad(page && mtr);
3338- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
3339- MTR_MEMO_PAGE_X_FIX));
3340+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
3341+ ut_ad(space == page_get_space_id(page));
3342 /* Get the previous and next page numbers of page */
3343
3344 prev_page_no = btr_page_get_prev(page, mtr);
3345 next_page_no = btr_page_get_next(page, mtr);
3346- space = buf_frame_get_space_id(page);
3347
3348 /* Update page links of the level */
3349
3350 if (prev_page_no != FIL_NULL) {
3351-
3352- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
3353+ buf_block_t* prev_block
3354+ = btr_block_get(space, zip_size, prev_page_no,
3355+ RW_X_LATCH, mtr);
3356+ page_t* prev_page
3357+ = buf_block_get_frame(prev_block);
3358+#ifdef UNIV_BTR_DEBUG
3359 ut_a(page_is_comp(prev_page) == page_is_comp(page));
3360-#ifdef UNIV_BTR_DEBUG
3361 ut_a(btr_page_get_next(prev_page, mtr)
3362- == buf_frame_get_page_no(page));
3363+ == page_get_page_no(page));
3364 #endif /* UNIV_BTR_DEBUG */
3365
3366- btr_page_set_next(prev_page, next_page_no, mtr);
3367+ btr_page_set_next(prev_page,
3368+ buf_block_get_page_zip(prev_block),
3369+ next_page_no, mtr);
3370 }
3371
3372 if (next_page_no != FIL_NULL) {
3373-
3374- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
3375+ buf_block_t* next_block
3376+ = btr_block_get(space, zip_size, next_page_no,
3377+ RW_X_LATCH, mtr);
3378+ page_t* next_page
3379+ = buf_block_get_frame(next_block);
3380+#ifdef UNIV_BTR_DEBUG
3381 ut_a(page_is_comp(next_page) == page_is_comp(page));
3382-#ifdef UNIV_BTR_DEBUG
3383 ut_a(btr_page_get_prev(next_page, mtr)
3384- == buf_frame_get_page_no(page));
3385+ == page_get_page_no(page));
3386 #endif /* UNIV_BTR_DEBUG */
3387
3388- btr_page_set_prev(next_page, prev_page_no, mtr);
3389+ btr_page_set_prev(next_page,
3390+ buf_block_get_page_zip(next_block),
3391+ prev_page_no, mtr);
3392 }
3393 }
3394
3395@@ -1842,11 +2212,10 @@
3396 btr_set_min_rec_mark_log(
3397 /*=====================*/
3398 rec_t* rec, /* in: record */
3399- ulint comp, /* nonzero=compact record format */
3400+ byte type, /* in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
3401 mtr_t* mtr) /* in: mtr */
3402 {
3403- mlog_write_initial_log_record(
3404- rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
3405+ mlog_write_initial_log_record(rec, type, mtr);
3406
3407 /* Write rec offset as a 2-byte ulint */
3408 mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
3409@@ -1855,7 +2224,7 @@
3410 /********************************************************************
3411 Parses the redo log record for setting an index record as the predefined
3412 minimum record. */
3413-
3414+UNIV_INTERN
3415 byte*
3416 btr_parse_set_min_rec_mark(
3417 /*=======================*/
3418@@ -1878,7 +2247,7 @@
3419
3420 rec = page + mach_read_from_2(ptr);
3421
3422- btr_set_min_rec_mark(rec, comp, mtr);
3423+ btr_set_min_rec_mark(rec, mtr);
3424 }
3425
3426 return(ptr + 2);
3427@@ -1886,46 +2255,50 @@
3428
3429 /********************************************************************
3430 Sets a record as the predefined minimum record. */
3431-
3432+UNIV_INTERN
3433 void
3434 btr_set_min_rec_mark(
3435 /*=================*/
3436 rec_t* rec, /* in: record */
3437- ulint comp, /* in: nonzero=compact page format */
3438 mtr_t* mtr) /* in: mtr */
3439 {
3440 ulint info_bits;
3441
3442- info_bits = rec_get_info_bits(rec, comp);
3443-
3444- rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
3445-
3446- btr_set_min_rec_mark_log(rec, comp, mtr);
3447+ if (UNIV_LIKELY(page_rec_is_comp(rec))) {
3448+ info_bits = rec_get_info_bits(rec, TRUE);
3449+
3450+ rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
3451+
3452+ btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
3453+ } else {
3454+ info_bits = rec_get_info_bits(rec, FALSE);
3455+
3456+ rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
3457+
3458+ btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
3459+ }
3460 }
3461
3462 /*****************************************************************
3463 Deletes on the upper level the node pointer to a page. */
3464-
3465+UNIV_INTERN
3466 void
3467 btr_node_ptr_delete(
3468 /*================*/
3469 dict_index_t* index, /* in: index tree */
3470- page_t* page, /* in: page whose node pointer is deleted */
3471+ buf_block_t* block, /* in: page whose node pointer is deleted */
3472 mtr_t* mtr) /* in: mtr */
3473 {
3474- rec_t* node_ptr;
3475 btr_cur_t cursor;
3476 ibool compressed;
3477 ulint err;
3478
3479- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
3480- MTR_MEMO_PAGE_X_FIX));
3481+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3482+
3483 /* Delete node pointer on father page */
3484-
3485- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
3486-
3487- btr_cur_position(index, node_ptr, &cursor);
3488- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
3489+ btr_page_get_father(index, block, mtr, &cursor);
3490+
3491+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
3492 mtr);
3493 ut_a(err == DB_SUCCESS);
3494
3495@@ -1942,80 +2315,118 @@
3496 btr_lift_page_up(
3497 /*=============*/
3498 dict_index_t* index, /* in: index tree */
3499- page_t* page, /* in: page which is the only on its level;
3500+ buf_block_t* block, /* in: page which is the only on its level;
3501 must not be empty: use
3502 btr_discard_only_page_on_level if the last
3503 record from the page should be removed */
3504 mtr_t* mtr) /* in: mtr */
3505 {
3506+ buf_block_t* father_block;
3507 page_t* father_page;
3508- page_t* iter_page;
3509- page_t* pages[BTR_MAX_LEVELS];
3510 ulint page_level;
3511+ page_zip_des_t* father_page_zip;
3512+ page_t* page = buf_block_get_frame(block);
3513 ulint root_page_no;
3514- ulint ancestors;
3515+ buf_block_t* blocks[BTR_MAX_LEVELS];
3516+ ulint n_blocks; /* last used index in blocks[] */
3517 ulint i;
3518
3519 ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
3520 ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
3521- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
3522- MTR_MEMO_PAGE_X_FIX));
3523- father_page = buf_frame_align(
3524- btr_page_get_father_node_ptr(index, page, mtr));
3525+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3526
3527 page_level = btr_page_get_level(page, mtr);
3528 root_page_no = dict_index_get_page(index);
3529
3530- ancestors = 1;
3531- pages[0] = father_page;
3532-
3533- /* Store all ancestor pages so we can reset their levels later on.
3534- We have to do all the searches on the tree now because later on,
3535- after we've replaced the first level, the tree is in an inconsistent
3536- state and can not be searched. */
3537- iter_page = father_page;
3538- for (;;) {
3539- if (buf_block_get_page_no(buf_block_align(iter_page))
3540- == root_page_no) {
3541-
3542- break;
3543+ {
3544+ btr_cur_t cursor;
3545+ mem_heap_t* heap = mem_heap_create(100);
3546+ ulint* offsets;
3547+ buf_block_t* b;
3548+
3549+ offsets = btr_page_get_father_block(NULL, heap, index,
3550+ block, mtr, &cursor);
3551+ father_block = btr_cur_get_block(&cursor);
3552+ father_page_zip = buf_block_get_page_zip(father_block);
3553+ father_page = buf_block_get_frame(father_block);
3554+
3555+ n_blocks = 0;
3556+
3557+ /* Store all ancestor pages so we can reset their
3558+ levels later on. We have to do all the searches on
3559+ the tree now because later on, after we've replaced
3560+ the first level, the tree is in an inconsistent state
3561+ and can not be searched. */
3562+ for (b = father_block;
3563+ buf_block_get_page_no(b) != root_page_no; ) {
3564+ ut_a(n_blocks < BTR_MAX_LEVELS);
3565+
3566+ offsets = btr_page_get_father_block(offsets, heap,
3567+ index, b,
3568+ mtr, &cursor);
3569+
3570+ blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
3571 }
3572
3573- ut_a(ancestors < BTR_MAX_LEVELS);
3574-
3575- iter_page = buf_frame_align(
3576- btr_page_get_father_node_ptr(index, iter_page, mtr));
3577-
3578- pages[ancestors++] = iter_page;
3579+ mem_heap_free(heap);
3580 }
3581
3582- btr_search_drop_page_hash_index(page);
3583+ btr_search_drop_page_hash_index(block);
3584
3585 /* Make the father empty */
3586- btr_page_empty(father_page, mtr);
3587-
3588- /* Move records to the father */
3589- page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
3590- index, mtr);
3591- lock_update_copy_and_discard(father_page, page);
3592-
3593- /* Go upward to root page, decreasing levels by one. */
3594- for (i = 0; i < ancestors; i++) {
3595- iter_page = pages[i];
3596-
3597- ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1));
3598-
3599- btr_page_set_level(iter_page, page_level, mtr);
3600- page_level++;
3601+ btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
3602+
3603+ /* Copy the records to the father page one by one. */
3604+ if (0
3605+#ifdef UNIV_ZIP_COPY
3606+ || father_page_zip
3607+#endif /* UNIV_ZIP_COPY */
3608+ || UNIV_UNLIKELY
3609+ (!page_copy_rec_list_end(father_block, block,
3610+ page_get_infimum_rec(page),
3611+ index, mtr))) {
3612+ const page_zip_des_t* page_zip
3613+ = buf_block_get_page_zip(block);
3614+ ut_a(father_page_zip);
3615+ ut_a(page_zip);
3616+
3617+ /* Copy the page byte for byte. */
3618+ page_zip_copy_recs(father_page_zip, father_page,
3619+ page_zip, page, index, mtr);
3620+
3621+ /* Update the lock table and possible hash index. */
3622+
3623+ lock_move_rec_list_end(father_block, block,
3624+ page_get_infimum_rec(page));
3625+
3626+ btr_search_move_or_delete_hash_entries(father_block, block,
3627+ index);
3628+ }
3629+
3630+ lock_update_copy_and_discard(father_block, block);
3631+
3632+ /* Go upward to root page, decrementing levels by one. */
3633+ for (i = 0; i < n_blocks; i++, page_level++) {
3634+ page_t* page = buf_block_get_frame(blocks[i]);
3635+ page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
3636+
3637+ ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
3638+
3639+ btr_page_set_level(page, page_zip, page_level, mtr);
3640+#ifdef UNIV_ZIP_DEBUG
3641+ ut_a(!page_zip || page_zip_validate(page_zip, page));
3642+#endif /* UNIV_ZIP_DEBUG */
3643 }
3644
3645 /* Free the file page */
3646- btr_page_free(index, page, mtr);
3647+ btr_page_free(index, block, mtr);
3648
3649- /* We play safe and reset the free bits for the father */
3650- ibuf_reset_free_bits(index, father_page);
3651+ /* We play it safe and reset the free bits for the father */
3652+ if (!dict_index_is_clust(index)) {
3653+ ibuf_reset_free_bits(father_block);
3654+ }
3655 ut_ad(page_validate(father_page, index));
3656- ut_ad(btr_check_node_ptr(index, father_page, mtr));
3657+ ut_ad(btr_check_node_ptr(index, father_block, mtr));
3658 }
3659
3660 /*****************************************************************
3661@@ -2026,12 +2437,12 @@
3662 level lifts the records of the page to the father page, thus reducing the
3663 tree height. It is assumed that mtr holds an x-latch on the tree and on the
3664 page. If cursor is on the leaf level, mtr must also hold x-latches to the
3665-brothers, if they exist. NOTE: it is assumed that the caller has reserved
3666-enough free extents so that the compression will always succeed if done! */
3667-
3668-void
3669+brothers, if they exist. */
3670+UNIV_INTERN
3671+ibool
3672 btr_compress(
3673 /*=========*/
3674+ /* out: TRUE on success */
3675 btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
3676 the page must not be empty: in record delete
3677 use btr_discard_page if the page would become
3678@@ -2040,33 +2451,35 @@
3679 {
3680 dict_index_t* index;
3681 ulint space;
3682+ ulint zip_size;
3683 ulint left_page_no;
3684 ulint right_page_no;
3685+ buf_block_t* merge_block;
3686 page_t* merge_page;
3687- page_t* father_page;
3688+ page_zip_des_t* merge_page_zip;
3689 ibool is_left;
3690+ buf_block_t* block;
3691 page_t* page;
3692- rec_t* orig_pred;
3693- rec_t* orig_succ;
3694- rec_t* node_ptr;
3695+ btr_cur_t father_cursor;
3696+ mem_heap_t* heap;
3697+ ulint* offsets;
3698 ulint data_size;
3699 ulint n_recs;
3700 ulint max_ins_size;
3701 ulint max_ins_size_reorg;
3702 ulint level;
3703- ulint comp;
3704
3705+ block = btr_cur_get_block(cursor);
3706 page = btr_cur_get_page(cursor);
3707 index = btr_cur_get_index(cursor);
3708- comp = page_is_comp(page);
3709- ut_a((ibool)!!comp == dict_table_is_comp(index->table));
3710+ ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
3711
3712 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3713 MTR_MEMO_X_LOCK));
3714- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
3715- MTR_MEMO_PAGE_X_FIX));
3716+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3717 level = btr_page_get_level(page, mtr);
3718 space = dict_index_get_space(index);
3719+ zip_size = dict_table_zip_size(index->table);
3720
3721 left_page_no = btr_page_get_prev(page, mtr);
3722 right_page_no = btr_page_get_next(page, mtr);
3723@@ -2076,10 +2489,9 @@
3724 left_page_no, right_page_no);
3725 #endif
3726
3727- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
3728- ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
3729- father_page = buf_frame_align(node_ptr);
3730- ut_a(comp == page_is_comp(father_page));
3731+ heap = mem_heap_create(100);
3732+ offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
3733+ &father_cursor);
3734
3735 /* Decide the page to which we try to merge and which will inherit
3736 the locks */
3737@@ -2088,166 +2500,286 @@
3738
3739 if (is_left) {
3740
3741- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
3742- mtr);
3743+ merge_block = btr_block_get(space, zip_size, left_page_no,
3744+ RW_X_LATCH, mtr);
3745+ merge_page = buf_block_get_frame(merge_block);
3746 #ifdef UNIV_BTR_DEBUG
3747 ut_a(btr_page_get_next(merge_page, mtr)
3748- == buf_frame_get_page_no(page));
3749+ == buf_block_get_page_no(block));
3750 #endif /* UNIV_BTR_DEBUG */
3751 } else if (right_page_no != FIL_NULL) {
3752
3753- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
3754- mtr);
3755+ merge_block = btr_block_get(space, zip_size, right_page_no,
3756+ RW_X_LATCH, mtr);
3757+ merge_page = buf_block_get_frame(merge_block);
3758 #ifdef UNIV_BTR_DEBUG
3759 ut_a(btr_page_get_prev(merge_page, mtr)
3760- == buf_frame_get_page_no(page));
3761+ == buf_block_get_page_no(block));
3762 #endif /* UNIV_BTR_DEBUG */
3763 } else {
3764 /* The page is the only one on the level, lift the records
3765 to the father */
3766- btr_lift_page_up(index, page, mtr);
3767-
3768- return;
3769+ btr_lift_page_up(index, block, mtr);
3770+ mem_heap_free(heap);
3771+ return(TRUE);
3772 }
3773
3774 n_recs = page_get_n_recs(page);
3775 data_size = page_get_data_size(page);
3776- ut_a(page_is_comp(merge_page) == comp);
3777+#ifdef UNIV_BTR_DEBUG
3778+ ut_a(page_is_comp(merge_page) == page_is_comp(page));
3779+#endif /* UNIV_BTR_DEBUG */
3780
3781 max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
3782 merge_page, n_recs);
3783 if (data_size > max_ins_size_reorg) {
3784
3785 /* No space for merge */
3786+err_exit:
3787+ /* We play it safe and reset the free bits. */
3788+ if (zip_size
3789+ && page_is_leaf(merge_page)
3790+ && !dict_index_is_clust(index)) {
3791+ ibuf_reset_free_bits(merge_block);
3792+ }
3793
3794- return;
3795+ mem_heap_free(heap);
3796+ return(FALSE);
3797 }
3798
3799 ut_ad(page_validate(merge_page, index));
3800
3801 max_ins_size = page_get_max_insert_size(merge_page, n_recs);
3802
3803- if (data_size > max_ins_size) {
3804+ if (UNIV_UNLIKELY(data_size > max_ins_size)) {
3805
3806 /* We have to reorganize merge_page */
3807
3808- btr_page_reorganize(merge_page, index, mtr);
3809+ if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
3810+ index, mtr))) {
3811+
3812+ goto err_exit;
3813+ }
3814
3815 max_ins_size = page_get_max_insert_size(merge_page, n_recs);
3816
3817 ut_ad(page_validate(merge_page, index));
3818- ut_ad(page_get_max_insert_size(merge_page, n_recs)
3819- == max_ins_size_reorg);
3820- }
3821-
3822- if (data_size > max_ins_size) {
3823-
3824- /* Add fault tolerance, though this should never happen */
3825-
3826- return;
3827- }
3828-
3829- btr_search_drop_page_hash_index(page);
3830-
3831- /* Remove the page from the level list */
3832- btr_level_list_remove(page, mtr);
3833-
3834+ ut_ad(max_ins_size == max_ins_size_reorg);
3835+
3836+ if (UNIV_UNLIKELY(data_size > max_ins_size)) {
3837+
3838+ /* Add fault tolerance, though this should
3839+ never happen */
3840+
3841+ goto err_exit;
3842+ }
3843+ }
3844+
3845+ merge_page_zip = buf_block_get_page_zip(merge_block);
3846+#ifdef UNIV_ZIP_DEBUG
3847+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
3848+ const page_zip_des_t* page_zip
3849+ = buf_block_get_page_zip(block);
3850+ ut_a(page_zip);
3851+ ut_a(page_zip_validate(merge_page_zip, merge_page));
3852+ ut_a(page_zip_validate(page_zip, page));
3853+ }
3854+#endif /* UNIV_ZIP_DEBUG */
3855+
3856+ /* Move records to the merge page */
3857 if (is_left) {
3858- btr_node_ptr_delete(index, page, mtr);
3859+ rec_t* orig_pred = page_copy_rec_list_start(
3860+ merge_block, block, page_get_supremum_rec(page),
3861+ index, mtr);
3862+
3863+ if (UNIV_UNLIKELY(!orig_pred)) {
3864+ goto err_exit;
3865+ }
3866+
3867+ btr_search_drop_page_hash_index(block);
3868+
3869+ /* Remove the page from the level list */
3870+ btr_level_list_remove(space, zip_size, page, mtr);
3871+
3872+ btr_node_ptr_delete(index, block, mtr);
3873+ lock_update_merge_left(merge_block, orig_pred, block);
3874 } else {
3875- mem_heap_t* heap = NULL;
3876- ulint offsets_[REC_OFFS_NORMAL_SIZE];
3877- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
3878+ rec_t* orig_succ;
3879+#ifdef UNIV_BTR_DEBUG
3880+ byte fil_page_prev[4];
3881+#endif /* UNIV_BTR_DEBUG */
3882+
3883+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
3884+ /* The function page_zip_compress(), which will be
3885+ invoked by page_copy_rec_list_end() below,
3886+ requires that FIL_PAGE_PREV be FIL_NULL.
3887+ Clear the field, but prepare to restore it. */
3888+#ifdef UNIV_BTR_DEBUG
3889+ memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
3890+#endif /* UNIV_BTR_DEBUG */
3891+#if FIL_NULL != 0xffffffff
3892+# error "FIL_NULL != 0xffffffff"
3893+#endif
3894+ memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
3895+ }
3896+
3897+ orig_succ = page_copy_rec_list_end(merge_block, block,
3898+ page_get_infimum_rec(page),
3899+ cursor->index, mtr);
3900+
3901+ if (UNIV_UNLIKELY(!orig_succ)) {
3902+ ut_a(merge_page_zip);
3903+#ifdef UNIV_BTR_DEBUG
3904+ /* FIL_PAGE_PREV was restored from merge_page_zip. */
3905+ ut_a(!memcmp(fil_page_prev,
3906+ merge_page + FIL_PAGE_PREV, 4));
3907+#endif /* UNIV_BTR_DEBUG */
3908+ goto err_exit;
3909+ }
3910+
3911+ btr_search_drop_page_hash_index(block);
3912+
3913+#ifdef UNIV_BTR_DEBUG
3914+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
3915+ /* Restore FIL_PAGE_PREV in order to avoid an assertion
3916+ failure in btr_level_list_remove(), which will set
3917+ the field again to FIL_NULL. Even though this makes
3918+ merge_page and merge_page_zip inconsistent for a
3919+ split second, it is harmless, because the pages
3920+ are X-latched. */
3921+ memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
3922+ }
3923+#endif /* UNIV_BTR_DEBUG */
3924+
3925+ /* Remove the page from the level list */
3926+ btr_level_list_remove(space, zip_size, page, mtr);
3927+
3928 /* Replace the address of the old child node (= page) with the
3929 address of the merge page to the right */
3930
3931- btr_node_ptr_set_child_page_no(node_ptr,
3932- rec_get_offsets(
3933- node_ptr, index,
3934- offsets_,
3935- ULINT_UNDEFINED,
3936- &heap),
3937- right_page_no, mtr);
3938- if (UNIV_LIKELY_NULL(heap)) {
3939- mem_heap_free(heap);
3940+ btr_node_ptr_set_child_page_no(
3941+ btr_cur_get_rec(&father_cursor),
3942+ btr_cur_get_page_zip(&father_cursor),
3943+ offsets, right_page_no, mtr);
3944+ btr_node_ptr_delete(index, merge_block, mtr);
3945+
3946+ lock_update_merge_right(merge_block, orig_succ, block);
3947+ }
3948+
3949+ mem_heap_free(heap);
3950+
3951+ if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
3952+ /* Update the free bits of the B-tree page in the
3953+ insert buffer bitmap. This has to be done in a
3954+ separate mini-transaction that is committed before the
3955+ main mini-transaction. We cannot update the insert
3956+ buffer bitmap in this mini-transaction, because
3957+ btr_compress() can be invoked recursively without
3958+ committing the mini-transaction in between. Since
3959+ insert buffer bitmap pages have a lower rank than
3960+ B-tree pages, we must not access other pages in the
3961+ same mini-transaction after accessing an insert buffer
3962+ bitmap page. */
3963+
3964+ /* The free bits in the insert buffer bitmap must
3965+ never exceed the free space on a page. It is safe to
3966+ decrement or reset the bits in the bitmap in a
3967+ mini-transaction that is committed before the
3968+ mini-transaction that affects the free space. */
3969+
3970+ /* It is unsafe to increment the bits in a separately
3971+ committed mini-transaction, because in crash recovery,
3972+ the free bits could momentarily be set too high. */
3973+
3974+ if (zip_size) {
3975+ /* Because the free bits may be incremented
3976+ and we cannot update the insert buffer bitmap
3977+ in the same mini-transaction, the only safe
3978+ thing we can do here is the pessimistic
3979+ approach: reset the free bits. */
3980+ ibuf_reset_free_bits(merge_block);
3981+ } else {
3982+ /* On uncompressed pages, the free bits will
3983+ never increase here. Thus, it is safe to
3984+ write the bits accurately in a separate
3985+ mini-transaction. */
3986+ ibuf_update_free_bits_if_full(merge_block,
3987+ UNIV_PAGE_SIZE,
3988+ ULINT_UNDEFINED);
3989 }
3990- btr_node_ptr_delete(index, merge_page, mtr);
3991- }
3992-
3993- /* Move records to the merge page */
3994- if (is_left) {
3995- orig_pred = page_rec_get_prev(
3996- page_get_supremum_rec(merge_page));
3997- page_copy_rec_list_start(merge_page, page,
3998- page_get_supremum_rec(page),
3999- index, mtr);
4000-
4001- lock_update_merge_left(merge_page, orig_pred, page);
4002- } else {
4003- orig_succ = page_rec_get_next(
4004- page_get_infimum_rec(merge_page));
4005- page_copy_rec_list_end(merge_page, page,
4006- page_get_infimum_rec(page),
4007- index, mtr);
4008-
4009- lock_update_merge_right(orig_succ, page);
4010- }
4011-
4012- /* We have added new records to merge_page: update its free bits */
4013- ibuf_update_free_bits_if_full(index, merge_page,
4014- UNIV_PAGE_SIZE, ULINT_UNDEFINED);
4015+ }
4016
4017 ut_ad(page_validate(merge_page, index));
4018+#ifdef UNIV_ZIP_DEBUG
4019+ ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
4020+#endif /* UNIV_ZIP_DEBUG */
4021
4022 /* Free the file page */
4023- btr_page_free(index, page, mtr);
4024+ btr_page_free(index, block, mtr);
4025
4026- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
4027+ ut_ad(btr_check_node_ptr(index, merge_block, mtr));
4028+ return(TRUE);
4029 }
4030
4031 /*****************************************************************
4032-Discards a page that is the only page on its level. */
4033+Discards a page that is the only page on its level. This will empty
4034+the whole B-tree, leaving just an empty root page. This function
4035+should never be reached, because btr_compress(), which is invoked in
4036+delete operations, calls btr_lift_page_up() to flatten the B-tree. */
4037 static
4038 void
4039 btr_discard_only_page_on_level(
4040 /*===========================*/
4041 dict_index_t* index, /* in: index tree */
4042- page_t* page, /* in: page which is the only on its level */
4043+ buf_block_t* block, /* in: page which is the only on its level */
4044 mtr_t* mtr) /* in: mtr */
4045 {
4046- rec_t* node_ptr;
4047- page_t* father_page;
4048- ulint page_level;
4049-
4050- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
4051- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
4052- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
4053- MTR_MEMO_PAGE_X_FIX));
4054- btr_search_drop_page_hash_index(page);
4055-
4056- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
4057- father_page = buf_frame_align(node_ptr);
4058-
4059- page_level = btr_page_get_level(page, mtr);
4060-
4061- lock_update_discard(page_get_supremum_rec(father_page), page);
4062-
4063- btr_page_set_level(father_page, page_level, mtr);
4064-
4065- /* Free the file page */
4066- btr_page_free(index, page, mtr);
4067-
4068- if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) {
4069- /* The father is the root page */
4070-
4071- btr_page_empty(father_page, mtr);
4072-
4073- /* We play safe and reset the free bits for the father */
4074- ibuf_reset_free_bits(index, father_page);
4075- } else {
4076- ut_ad(page_get_n_recs(father_page) == 1);
4077-
4078- btr_discard_only_page_on_level(index, father_page, mtr);
4079+ ulint page_level = 0;
4080+
4081+ while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
4082+ btr_cur_t cursor;
4083+ buf_block_t* father;
4084+ const page_t* page = buf_block_get_frame(block);
4085+
4086+ ut_a(page_get_n_recs(page) == 1);
4087+ ut_a(page_level == btr_page_get_level(page, mtr));
4088+ ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
4089+ ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
4090+
4091+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4092+ btr_search_drop_page_hash_index(block);
4093+
4094+ btr_page_get_father(index, block, mtr, &cursor);
4095+ father = btr_cur_get_block(&cursor);
4096+
4097+ lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
4098+
4099+ /* Free the file page */
4100+ btr_page_free(index, block, mtr);
4101+
4102+ block = father;
4103+ page_level++;
4104+ }
4105+
4106+ /* block is the root page, which must be empty, except
4107+ for the node pointer to the (now discarded) block(s). */
4108+
4109+#ifdef UNIV_BTR_DEBUG
4110+ if (!dict_index_is_ibuf(index)) {
4111+ const page_t* root = buf_block_get_frame(block);
4112+ const ulint space = dict_index_get_space(index);
4113+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
4114+ + root, space));
4115+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
4116+ + root, space));
4117+ }
4118+#endif /* UNIV_BTR_DEBUG */
4119+
4120+ btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
4121+
4122+ /* We play it safe and reset the free bits for the root */
4123+ if (!dict_index_is_clust(index)) {
4124+ ibuf_reset_free_bits(block);
4125 }
4126 }
4127
4128@@ -2255,7 +2787,7 @@
4129 Discards a page from a B-tree. This is used to remove the last record from
4130 a B-tree page: the whole page must be removed at the same time. This cannot
4131 be used for the root page, which is allowed to be empty. */
4132-
4133+UNIV_INTERN
4134 void
4135 btr_discard_page(
4136 /*=============*/
4137@@ -2265,85 +2797,104 @@
4138 {
4139 dict_index_t* index;
4140 ulint space;
4141+ ulint zip_size;
4142 ulint left_page_no;
4143 ulint right_page_no;
4144+ buf_block_t* merge_block;
4145 page_t* merge_page;
4146+ buf_block_t* block;
4147 page_t* page;
4148 rec_t* node_ptr;
4149
4150- page = btr_cur_get_page(cursor);
4151+ block = btr_cur_get_block(cursor);
4152 index = btr_cur_get_index(cursor);
4153
4154- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
4155+ ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
4156 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
4157 MTR_MEMO_X_LOCK));
4158- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
4159- MTR_MEMO_PAGE_X_FIX));
4160+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4161 space = dict_index_get_space(index);
4162+ zip_size = dict_table_zip_size(index->table);
4163
4164 /* Decide the page which will inherit the locks */
4165
4166- left_page_no = btr_page_get_prev(page, mtr);
4167- right_page_no = btr_page_get_next(page, mtr);
4168+ left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
4169+ right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
4170
4171 if (left_page_no != FIL_NULL) {
4172- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
4173- mtr);
4174+ merge_block = btr_block_get(space, zip_size, left_page_no,
4175+ RW_X_LATCH, mtr);
4176+ merge_page = buf_block_get_frame(merge_block);
4177 #ifdef UNIV_BTR_DEBUG
4178 ut_a(btr_page_get_next(merge_page, mtr)
4179- == buf_frame_get_page_no(page));
4180+ == buf_block_get_page_no(block));
4181 #endif /* UNIV_BTR_DEBUG */
4182 } else if (right_page_no != FIL_NULL) {
4183- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
4184- mtr);
4185+ merge_block = btr_block_get(space, zip_size, right_page_no,
4186+ RW_X_LATCH, mtr);
4187+ merge_page = buf_block_get_frame(merge_block);
4188 #ifdef UNIV_BTR_DEBUG
4189 ut_a(btr_page_get_prev(merge_page, mtr)
4190- == buf_frame_get_page_no(page));
4191+ == buf_block_get_page_no(block));
4192 #endif /* UNIV_BTR_DEBUG */
4193 } else {
4194- btr_discard_only_page_on_level(index, page, mtr);
4195+ btr_discard_only_page_on_level(index, block, mtr);
4196
4197 return;
4198 }
4199
4200+ page = buf_block_get_frame(block);
4201 ut_a(page_is_comp(merge_page) == page_is_comp(page));
4202- btr_search_drop_page_hash_index(page);
4203+ btr_search_drop_page_hash_index(block);
4204
4205- if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
4206+ if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
4207
4208 /* We have to mark the leftmost node pointer on the right
4209 side page as the predefined minimum record */
4210-
4211 node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
4212
4213 ut_ad(page_rec_is_user_rec(node_ptr));
4214
4215- btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr);
4216+ /* This will make page_zip_validate() fail on merge_page
4217+ until btr_level_list_remove() completes. This is harmless,
4218+ because everything will take place within a single
4219+ mini-transaction and because writing to the redo log
4220+ is an atomic operation (performed by mtr_commit()). */
4221+ btr_set_min_rec_mark(node_ptr, mtr);
4222 }
4223
4224- btr_node_ptr_delete(index, page, mtr);
4225+ btr_node_ptr_delete(index, block, mtr);
4226
4227 /* Remove the page from the level list */
4228- btr_level_list_remove(page, mtr);
4229+ btr_level_list_remove(space, zip_size, page, mtr);
4230+#ifdef UNIV_ZIP_DEBUG
4231+ {
4232+ page_zip_des_t* merge_page_zip
4233+ = buf_block_get_page_zip(merge_block);
4234+ ut_a(!merge_page_zip
4235+ || page_zip_validate(merge_page_zip, merge_page));
4236+ }
4237+#endif /* UNIV_ZIP_DEBUG */
4238
4239 if (left_page_no != FIL_NULL) {
4240- lock_update_discard(page_get_supremum_rec(merge_page), page);
4241+ lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
4242+ block);
4243 } else {
4244- lock_update_discard(page_rec_get_next(
4245- page_get_infimum_rec(merge_page)),
4246- page);
4247+ lock_update_discard(merge_block,
4248+ lock_get_min_heap_no(merge_block),
4249+ block);
4250 }
4251
4252 /* Free the file page */
4253- btr_page_free(index, page, mtr);
4254+ btr_page_free(index, block, mtr);
4255
4256- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
4257+ ut_ad(btr_check_node_ptr(index, merge_block, mtr));
4258 }
4259
4260 #ifdef UNIV_BTR_PRINT
4261 /*****************************************************************
4262 Prints size info of a B-tree. */
4263-
4264+UNIV_INTERN
4265 void
4266 btr_print_size(
4267 /*===========*/
4268@@ -2353,7 +2904,7 @@
4269 fseg_header_t* seg;
4270 mtr_t mtr;
4271
4272- if (index->type & DICT_IBUF) {
4273+ if (dict_index_is_ibuf(index)) {
4274 fputs("Sorry, cannot print info of an ibuf tree:"
4275 " use ibuf functions\n", stderr);
4276
4277@@ -2387,51 +2938,53 @@
4278 btr_print_recursive(
4279 /*================*/
4280 dict_index_t* index, /* in: index tree */
4281- page_t* page, /* in: index page */
4282+ buf_block_t* block, /* in: index page */
4283 ulint width, /* in: print this many entries from start
4284 and end */
4285 mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
4286 ulint** offsets,/* in/out: buffer for rec_get_offsets() */
4287 mtr_t* mtr) /* in: mtr */
4288 {
4289+ const page_t* page = buf_block_get_frame(block);
4290 page_cur_t cursor;
4291 ulint n_recs;
4292 ulint i = 0;
4293 mtr_t mtr2;
4294- rec_t* node_ptr;
4295- page_t* child;
4296
4297- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
4298- MTR_MEMO_PAGE_X_FIX));
4299+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4300 fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
4301 (ulong) btr_page_get_level(page, mtr),
4302- (ulong) buf_frame_get_page_no(page));
4303+ (ulong) buf_block_get_page_no(block));
4304
4305- page_print(page, index, width, width);
4306+ page_print(block, index, width, width);
4307
4308 n_recs = page_get_n_recs(page);
4309
4310- page_cur_set_before_first(page, &cursor);
4311+ page_cur_set_before_first(block, &cursor);
4312 page_cur_move_to_next(&cursor);
4313
4314 while (!page_cur_is_after_last(&cursor)) {
4315
4316- if (0 == btr_page_get_level(page, mtr)) {
4317+ if (page_is_leaf(page)) {
4318
4319 /* If this is the leaf level, do nothing */
4320
4321 } else if ((i <= width) || (i >= n_recs - width)) {
4322
4323+ const rec_t* node_ptr;
4324+
4325 mtr_start(&mtr2);
4326
4327 node_ptr = page_cur_get_rec(&cursor);
4328
4329 *offsets = rec_get_offsets(node_ptr, index, *offsets,
4330 ULINT_UNDEFINED, heap);
4331- child = btr_node_ptr_get_child(node_ptr,
4332- *offsets, &mtr2);
4333- btr_print_recursive(index, child, width,
4334- heap, offsets, &mtr2);
4335+ btr_print_recursive(index,
4336+ btr_node_ptr_get_child(node_ptr,
4337+ index,
4338+ *offsets,
4339+ &mtr2),
4340+ width, heap, offsets, &mtr2);
4341 mtr_commit(&mtr2);
4342 }
4343
4344@@ -2442,7 +2995,7 @@
4345
4346 /******************************************************************
4347 Prints directories and other info of all nodes in the tree. */
4348-
4349+UNIV_INTERN
4350 void
4351 btr_print_index(
4352 /*============*/
4353@@ -2451,18 +3004,18 @@
4354 and end */
4355 {
4356 mtr_t mtr;
4357- page_t* root;
4358+ buf_block_t* root;
4359 mem_heap_t* heap = NULL;
4360 ulint offsets_[REC_OFFS_NORMAL_SIZE];
4361 ulint* offsets = offsets_;
4362- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
4363+ rec_offs_init(offsets_);
4364
4365 fputs("--------------------------\n"
4366 "INDEX TREE PRINT\n", stderr);
4367
4368 mtr_start(&mtr);
4369
4370- root = btr_root_get(index, &mtr);
4371+ root = btr_root_block_get(index, &mtr);
4372
4373 btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
4374 if (UNIV_LIKELY_NULL(heap)) {
4375@@ -2478,43 +3031,42 @@
4376 #ifdef UNIV_DEBUG
4377 /****************************************************************
4378 Checks that the node pointer to a page is appropriate. */
4379-
4380+UNIV_INTERN
4381 ibool
4382 btr_check_node_ptr(
4383 /*===============*/
4384 /* out: TRUE */
4385 dict_index_t* index, /* in: index tree */
4386- page_t* page, /* in: index page */
4387+ buf_block_t* block, /* in: index page */
4388 mtr_t* mtr) /* in: mtr */
4389 {
4390 mem_heap_t* heap;
4391- rec_t* node_ptr;
4392- dtuple_t* node_ptr_tuple;
4393-
4394- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
4395- MTR_MEMO_PAGE_X_FIX));
4396- if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
4397-
4398- return(TRUE);
4399- }
4400-
4401- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
4402-
4403- if (btr_page_get_level(page, mtr) == 0) {
4404+ dtuple_t* tuple;
4405+ ulint* offsets;
4406+ btr_cur_t cursor;
4407+ page_t* page = buf_block_get_frame(block);
4408+
4409+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4410+ if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
4411
4412 return(TRUE);
4413 }
4414
4415 heap = mem_heap_create(256);
4416-
4417- node_ptr_tuple = dict_index_build_node_ptr(
4418+ offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
4419+ &cursor);
4420+
4421+ if (page_is_leaf(page)) {
4422+
4423+ goto func_exit;
4424+ }
4425+
4426+ tuple = dict_index_build_node_ptr(
4427 index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
4428 btr_page_get_level(page, mtr));
4429
4430- ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr,
4431- rec_get_offsets(node_ptr, index,
4432- NULL, ULINT_UNDEFINED, &heap)));
4433-
4434+ ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
4435+func_exit:
4436 mem_heap_free(heap);
4437
4438 return(TRUE);
4439@@ -2527,40 +3079,40 @@
4440 void
4441 btr_index_rec_validate_report(
4442 /*==========================*/
4443- page_t* page, /* in: index page */
4444- rec_t* rec, /* in: index record */
4445- dict_index_t* index) /* in: index */
4446+ const page_t* page, /* in: index page */
4447+ const rec_t* rec, /* in: index record */
4448+ const dict_index_t* index) /* in: index */
4449 {
4450 fputs("InnoDB: Record in ", stderr);
4451 dict_index_name_print(stderr, NULL, index);
4452 fprintf(stderr, ", page %lu, at offset %lu\n",
4453- buf_frame_get_page_no(page), (ulint)(rec - page));
4454+ page_get_page_no(page), (ulint) page_offset(rec));
4455 }
4456
4457 /****************************************************************
4458 Checks the size and number of fields in a record based on the definition of
4459 the index. */
4460-
4461+UNIV_INTERN
4462 ibool
4463 btr_index_rec_validate(
4464 /*===================*/
4465- /* out: TRUE if ok */
4466- rec_t* rec, /* in: index record */
4467- dict_index_t* index, /* in: index */
4468- ibool dump_on_error) /* in: TRUE if the function
4469- should print hex dump of record
4470- and page on error */
4471+ /* out: TRUE if ok */
4472+ const rec_t* rec, /* in: index record */
4473+ const dict_index_t* index, /* in: index */
4474+ ibool dump_on_error) /* in: TRUE if the function
4475+ should print hex dump of record
4476+ and page on error */
4477 {
4478 ulint len;
4479 ulint n;
4480 ulint i;
4481- page_t* page;
4482+ const page_t* page;
4483 mem_heap_t* heap = NULL;
4484 ulint offsets_[REC_OFFS_NORMAL_SIZE];
4485 ulint* offsets = offsets_;
4486- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
4487+ rec_offs_init(offsets_);
4488
4489- page = buf_frame_align(rec);
4490+ page = page_align(rec);
4491
4492 if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
4493 /* The insert buffer index tree can contain records from any
4494@@ -2589,7 +3141,7 @@
4495 (ulong) rec_get_n_fields_old(rec), (ulong) n);
4496
4497 if (dump_on_error) {
4498- buf_page_print(page);
4499+ buf_page_print(page, 0);
4500
4501 fputs("InnoDB: corrupt record ", stderr);
4502 rec_print_old(stderr, rec);
4503@@ -2604,7 +3156,7 @@
4504 ulint fixed_size = dict_col_get_fixed_size(
4505 dict_index_get_nth_col(index, i));
4506
4507- rec_get_nth_field(rec, offsets, i, &len);
4508+ rec_get_nth_field_offs(offsets, i, &len);
4509
4510 /* Note that if fixed_size != 0, it equals the
4511 length of a fixed-size column in the clustered index.
4512@@ -2627,7 +3179,7 @@
4513 (ulong) i, (ulong) len, (ulong) fixed_size);
4514
4515 if (dump_on_error) {
4516- buf_page_print(page);
4517+ buf_page_print(page, 0);
4518
4519 fputs("InnoDB: corrupt record ", stderr);
4520 rec_print_new(stderr, rec, offsets);
4521@@ -2654,13 +3206,13 @@
4522 btr_index_page_validate(
4523 /*====================*/
4524 /* out: TRUE if ok */
4525- page_t* page, /* in: index page */
4526+ buf_block_t* block, /* in: index page */
4527 dict_index_t* index) /* in: index */
4528 {
4529 page_cur_t cur;
4530 ibool ret = TRUE;
4531
4532- page_cur_set_before_first(page, &cur);
4533+ page_cur_set_before_first(block, &cur);
4534 page_cur_move_to_next(&cur);
4535
4536 for (;;) {
4537@@ -2686,13 +3238,13 @@
4538 void
4539 btr_validate_report1(
4540 /*=================*/
4541- /* out: TRUE if ok */
4542- dict_index_t* index, /* in: index */
4543- ulint level, /* in: B-tree level */
4544- page_t* page) /* in: index page */
4545+ /* out: TRUE if ok */
4546+ dict_index_t* index, /* in: index */
4547+ ulint level, /* in: B-tree level */
4548+ const buf_block_t* block) /* in: index page */
4549 {
4550 fprintf(stderr, "InnoDB: Error in page %lu of ",
4551- buf_frame_get_page_no(page));
4552+ buf_block_get_page_no(block));
4553 dict_index_name_print(stderr, NULL, index);
4554 if (level) {
4555 fprintf(stderr, ", index tree level %lu", level);
4556@@ -2706,15 +3258,15 @@
4557 void
4558 btr_validate_report2(
4559 /*=================*/
4560- /* out: TRUE if ok */
4561- dict_index_t* index, /* in: index */
4562- ulint level, /* in: B-tree level */
4563- page_t* page1, /* in: first index page */
4564- page_t* page2) /* in: second index page */
4565+ /* out: TRUE if ok */
4566+ const dict_index_t* index, /* in: index */
4567+ ulint level, /* in: B-tree level */
4568+ const buf_block_t* block1, /* in: first index page */
4569+ const buf_block_t* block2) /* in: second index page */
4570 {
4571 fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
4572- buf_frame_get_page_no(page1),
4573- buf_frame_get_page_no(page2));
4574+ buf_block_get_page_no(block1),
4575+ buf_block_get_page_no(block2));
4576 dict_index_name_print(stderr, NULL, index);
4577 if (level) {
4578 fprintf(stderr, ", index tree level %lu", level);
4579@@ -2734,12 +3286,14 @@
4580 ulint level) /* in: level number */
4581 {
4582 ulint space;
4583+ ulint zip_size;
4584+ buf_block_t* block;
4585 page_t* page;
4586+ buf_block_t* right_block = 0; /* remove warning */
4587 page_t* right_page = 0; /* remove warning */
4588 page_t* father_page;
4589- page_t* right_father_page;
4590- rec_t* node_ptr;
4591- rec_t* right_node_ptr;
4592+ btr_cur_t node_cur;
4593+ btr_cur_t right_node_cur;
4594 rec_t* rec;
4595 ulint right_page_no;
4596 ulint left_page_no;
4597@@ -2750,26 +3304,39 @@
4598 mem_heap_t* heap = mem_heap_create(256);
4599 ulint* offsets = NULL;
4600 ulint* offsets2= NULL;
4601+#ifdef UNIV_ZIP_DEBUG
4602+ page_zip_des_t* page_zip;
4603+#endif /* UNIV_ZIP_DEBUG */
4604
4605 mtr_start(&mtr);
4606
4607 mtr_x_lock(dict_index_get_lock(index), &mtr);
4608
4609- page = btr_root_get(index, &mtr);
4610+ block = btr_root_block_get(index, &mtr);
4611+ page = buf_block_get_frame(block);
4612
4613- space = buf_frame_get_space_id(page);
4614+ space = dict_index_get_space(index);
4615+ zip_size = dict_table_zip_size(index->table);
4616
4617 while (level != btr_page_get_level(page, &mtr)) {
4618-
4619- ut_a(btr_page_get_level(page, &mtr) > 0);
4620-
4621- page_cur_set_before_first(page, &cursor);
4622+ const rec_t* node_ptr;
4623+
4624+ ut_a(space == buf_block_get_space(block));
4625+ ut_a(space == page_get_space_id(page));
4626+#ifdef UNIV_ZIP_DEBUG
4627+ page_zip = buf_block_get_page_zip(block);
4628+ ut_a(!page_zip || page_zip_validate(page_zip, page));
4629+#endif /* UNIV_ZIP_DEBUG */
4630+ ut_a(!page_is_leaf(page));
4631+
4632+ page_cur_set_before_first(block, &cursor);
4633 page_cur_move_to_next(&cursor);
4634
4635 node_ptr = page_cur_get_rec(&cursor);
4636 offsets = rec_get_offsets(node_ptr, index, offsets,
4637 ULINT_UNDEFINED, &heap);
4638- page = btr_node_ptr_get_child(node_ptr, offsets, &mtr);
4639+ block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
4640+ page = buf_block_get_frame(block);
4641 }
4642
4643 /* Now we are on the desired level. Loop through the pages on that
4644@@ -2784,17 +3351,22 @@
4645 offsets = offsets2 = NULL;
4646 mtr_x_lock(dict_index_get_lock(index), &mtr);
4647
4648+#ifdef UNIV_ZIP_DEBUG
4649+ page_zip = buf_block_get_page_zip(block);
4650+ ut_a(!page_zip || page_zip_validate(page_zip, page));
4651+#endif /* UNIV_ZIP_DEBUG */
4652+
4653 /* Check ordering etc. of records */
4654
4655 if (!page_validate(page, index)) {
4656- btr_validate_report1(index, level, page);
4657+ btr_validate_report1(index, level, block);
4658
4659 ret = FALSE;
4660 } else if (level == 0) {
4661 /* We are on level 0. Check that the records have the right
4662 number of fields, and field lengths are right. */
4663
4664- if (!btr_index_page_validate(page, index)) {
4665+ if (!btr_index_page_validate(block, index)) {
4666
4667 ret = FALSE;
4668 }
4669@@ -2805,32 +3377,32 @@
4670 right_page_no = btr_page_get_next(page, &mtr);
4671 left_page_no = btr_page_get_prev(page, &mtr);
4672
4673- ut_a((page_get_n_recs(page) > 0)
4674- || ((level == 0)
4675- && (buf_frame_get_page_no(page)
4676- == dict_index_get_page(index))));
4677+ ut_a(page_get_n_recs(page) > 0 || (level == 0
4678+ && page_get_page_no(page)
4679+ == dict_index_get_page(index)));
4680
4681 if (right_page_no != FIL_NULL) {
4682- rec_t* right_rec;
4683- right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
4684- &mtr);
4685+ const rec_t* right_rec;
4686+ right_block = btr_block_get(space, zip_size, right_page_no,
4687+ RW_X_LATCH, &mtr);
4688+ right_page = buf_block_get_frame(right_block);
4689 if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
4690- != buf_frame_get_page_no(page))) {
4691- btr_validate_report2(index, level, page, right_page);
4692+ != page_get_page_no(page))) {
4693+ btr_validate_report2(index, level, block, right_block);
4694 fputs("InnoDB: broken FIL_PAGE_NEXT"
4695 " or FIL_PAGE_PREV links\n", stderr);
4696- buf_page_print(page);
4697- buf_page_print(right_page);
4698+ buf_page_print(page, 0);
4699+ buf_page_print(right_page, 0);
4700
4701 ret = FALSE;
4702 }
4703
4704 if (UNIV_UNLIKELY(page_is_comp(right_page)
4705 != page_is_comp(page))) {
4706- btr_validate_report2(index, level, page, right_page);
4707+ btr_validate_report2(index, level, block, right_block);
4708 fputs("InnoDB: 'compact' flag mismatch\n", stderr);
4709- buf_page_print(page);
4710- buf_page_print(right_page);
4711+ buf_page_print(page, 0);
4712+ buf_page_print(right_page, 0);
4713
4714 ret = FALSE;
4715
4716@@ -2848,13 +3420,13 @@
4717 offsets, offsets2,
4718 index) >= 0)) {
4719
4720- btr_validate_report2(index, level, page, right_page);
4721+ btr_validate_report2(index, level, block, right_block);
4722
4723 fputs("InnoDB: records in wrong order"
4724 " on adjacent pages\n", stderr);
4725
4726- buf_page_print(page);
4727- buf_page_print(right_page);
4728+ buf_page_print(page, 0);
4729+ buf_page_print(right_page, 0);
4730
4731 fputs("InnoDB: record ", stderr);
4732 rec = page_rec_get_prev(page_get_supremum_rec(page));
4733@@ -2876,54 +3448,54 @@
4734 page_is_comp(page)));
4735 }
4736
4737- if (buf_frame_get_page_no(page) != dict_index_get_page(index)) {
4738+ if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
4739
4740 /* Check father node pointers */
4741
4742- node_ptr = btr_page_get_father_node_ptr(index, page, &mtr);
4743- father_page = buf_frame_align(node_ptr);
4744- offsets = rec_get_offsets(node_ptr, index,
4745- offsets, ULINT_UNDEFINED, &heap);
4746-
4747- if (btr_node_ptr_get_child_page_no(node_ptr, offsets)
4748- != buf_frame_get_page_no(page)
4749- || node_ptr != btr_page_get_father_for_rec(
4750- index, page,
4751- page_rec_get_prev(page_get_supremum_rec(page)),
4752- &mtr)) {
4753- btr_validate_report1(index, level, page);
4754+ rec_t* node_ptr;
4755+
4756+ offsets = btr_page_get_father_block(offsets, heap, index,
4757+ block, &mtr, &node_cur);
4758+ father_page = btr_cur_get_page(&node_cur);
4759+ node_ptr = btr_cur_get_rec(&node_cur);
4760+
4761+ btr_cur_position(
4762+ index, page_rec_get_prev(page_get_supremum_rec(page)),
4763+ block, &node_cur);
4764+ offsets = btr_page_get_father_node_ptr(offsets, heap,
4765+ &node_cur, &mtr);
4766+
4767+ if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
4768+ || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
4769+ offsets)
4770+ != buf_block_get_page_no(block))) {
4771+
4772+ btr_validate_report1(index, level, block);
4773
4774 fputs("InnoDB: node pointer to the page is wrong\n",
4775 stderr);
4776
4777- buf_page_print(father_page);
4778- buf_page_print(page);
4779+ buf_page_print(father_page, 0);
4780+ buf_page_print(page, 0);
4781
4782 fputs("InnoDB: node ptr ", stderr);
4783- rec_print_new(stderr, node_ptr, offsets);
4784+ rec_print(stderr, node_ptr, index);
4785
4786+ rec = btr_cur_get_rec(&node_cur);
4787 fprintf(stderr, "\n"
4788 "InnoDB: node ptr child page n:o %lu\n",
4789- (unsigned long) btr_node_ptr_get_child_page_no
4790- (node_ptr, offsets));
4791+ (ulong) btr_node_ptr_get_child_page_no(
4792+ rec, offsets));
4793
4794 fputs("InnoDB: record on page ", stderr);
4795- rec = btr_page_get_father_for_rec(
4796- index, page,
4797- page_rec_get_prev(page_get_supremum_rec(page)),
4798- &mtr);
4799- rec_print(stderr, rec, index);
4800+ rec_print_new(stderr, rec, offsets);
4801 putc('\n', stderr);
4802 ret = FALSE;
4803
4804 goto node_ptr_fails;
4805 }
4806
4807- if (btr_page_get_level(page, &mtr) > 0) {
4808- offsets = rec_get_offsets(node_ptr, index,
4809- offsets, ULINT_UNDEFINED,
4810- &heap);
4811-
4812+ if (!page_is_leaf(page)) {
4813 node_ptr_tuple = dict_index_build_node_ptr(
4814 index,
4815 page_rec_get_next(page_get_infimum_rec(page)),
4816@@ -2931,13 +3503,13 @@
4817
4818 if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
4819 offsets)) {
4820- rec_t* first_rec = page_rec_get_next(
4821+ const rec_t* first_rec = page_rec_get_next(
4822 page_get_infimum_rec(page));
4823
4824- btr_validate_report1(index, level, page);
4825+ btr_validate_report1(index, level, block);
4826
4827- buf_page_print(father_page);
4828- buf_page_print(page);
4829+ buf_page_print(father_page, 0);
4830+ buf_page_print(page, 0);
4831
4832 fputs("InnoDB: Error: node ptrs differ"
4833 " on levels > 0\n"
4834@@ -2963,30 +3535,35 @@
4835 page_get_supremum_rec(father_page)));
4836 ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
4837 } else {
4838- right_node_ptr = btr_page_get_father_node_ptr(
4839- index, right_page, &mtr);
4840- if (page_rec_get_next(node_ptr)
4841+ const rec_t* right_node_ptr
4842+ = page_rec_get_next(node_ptr);
4843+
4844+ offsets = btr_page_get_father_block(
4845+ offsets, heap, index, right_block,
4846+ &mtr, &right_node_cur);
4847+ if (right_node_ptr
4848 != page_get_supremum_rec(father_page)) {
4849
4850- if (right_node_ptr
4851- != page_rec_get_next(node_ptr)) {
4852+ if (btr_cur_get_rec(&right_node_cur)
4853+ != right_node_ptr) {
4854 ret = FALSE;
4855 fputs("InnoDB: node pointer to"
4856 " the right page is wrong\n",
4857 stderr);
4858
4859 btr_validate_report1(index, level,
4860- page);
4861+ block);
4862
4863- buf_page_print(father_page);
4864- buf_page_print(page);
4865- buf_page_print(right_page);
4866+ buf_page_print(father_page, 0);
4867+ buf_page_print(page, 0);
4868+ buf_page_print(right_page, 0);
4869 }
4870 } else {
4871- right_father_page = buf_frame_align(
4872- right_node_ptr);
4873+ page_t* right_father_page
4874+ = btr_cur_get_page(&right_node_cur);
4875
4876- if (right_node_ptr != page_rec_get_next(
4877+ if (btr_cur_get_rec(&right_node_cur)
4878+ != page_rec_get_next(
4879 page_get_infimum_rec(
4880 right_father_page))) {
4881 ret = FALSE;
4882@@ -2995,15 +3572,15 @@
4883 stderr);
4884
4885 btr_validate_report1(index, level,
4886- page);
4887+ block);
4888
4889- buf_page_print(father_page);
4890- buf_page_print(right_father_page);
4891- buf_page_print(page);
4892- buf_page_print(right_page);
4893+ buf_page_print(father_page, 0);
4894+ buf_page_print(right_father_page, 0);
4895+ buf_page_print(page, 0);
4896+ buf_page_print(right_page, 0);
4897 }
4898
4899- if (buf_frame_get_page_no(right_father_page)
4900+ if (page_get_page_no(right_father_page)
4901 != btr_page_get_next(father_page, &mtr)) {
4902
4903 ret = FALSE;
4904@@ -3012,12 +3589,12 @@
4905 stderr);
4906
4907 btr_validate_report1(index, level,
4908- page);
4909+ block);
4910
4911- buf_page_print(father_page);
4912- buf_page_print(right_father_page);
4913- buf_page_print(page);
4914- buf_page_print(right_page);
4915+ buf_page_print(father_page, 0);
4916+ buf_page_print(right_father_page, 0);
4917+ buf_page_print(page, 0);
4918+ buf_page_print(right_page, 0);
4919 }
4920 }
4921 }
4922@@ -3032,7 +3609,9 @@
4923 if (right_page_no != FIL_NULL) {
4924 mtr_start(&mtr);
4925
4926- page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
4927+ block = btr_block_get(space, zip_size, right_page_no,
4928+ RW_X_LATCH, &mtr);
4929+ page = buf_block_get_frame(block);
4930
4931 goto loop;
4932 }
4933@@ -3043,7 +3622,7 @@
4934
4935 /******************************************************************
4936 Checks the consistency of an index tree. */
4937-
4938+UNIV_INTERN
4939 ibool
4940 btr_validate_index(
4941 /*===============*/
4942
4943=== modified file 'storage/innobase/btr/btr0cur.c'
4944--- storage/innobase/btr/btr0cur.c 2008-12-14 18:13:36 +0000
4945+++ storage/innobase/btr/btr0cur.c 2009-03-31 04:19:17 +0000
4946@@ -1,3 +1,28 @@
4947+/*****************************************************************************
4948+
4949+Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
4950+Copyright (c) 2008, Google Inc.
4951+
4952+Portions of this file contain modifications contributed and copyrighted by
4953+Google, Inc. Those modifications are gratefully acknowledged and are described
4954+briefly in the InnoDB documentation. The contributions by Google are
4955+incorporated with their permission, and subject to the conditions contained in
4956+the file COPYING.Google.
4957+
4958+This program is free software; you can redistribute it and/or modify it under
4959+the terms of the GNU General Public License as published by the Free Software
4960+Foundation; version 2 of the License.
4961+
4962+This program is distributed in the hope that it will be useful, but WITHOUT
4963+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
4964+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
4965+
4966+You should have received a copy of the GNU General Public License along with
4967+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
4968+Place, Suite 330, Boston, MA 02111-1307 USA
4969+
4970+*****************************************************************************/
4971+
4972 /******************************************************
4973 The index tree cursor
4974
4975@@ -12,8 +37,6 @@
4976 if leaf splitting has been started, it is difficult to undo, except
4977 by crashing the database and doing a roll-forward.
4978
4979-(c) 1994-2001 Innobase Oy
4980-
4981 Created 10/16/1994 Heikki Tuuri
4982 *******************************************************/
4983
4984@@ -24,38 +47,38 @@
4985 #endif
4986
4987 #include "page0page.h"
4988+#include "page0zip.h"
4989 #include "rem0rec.h"
4990 #include "rem0cmp.h"
4991+#include "buf0lru.h"
4992 #include "btr0btr.h"
4993 #include "btr0sea.h"
4994 #include "row0upd.h"
4995 #include "trx0rec.h"
4996+#include "trx0roll.h" /* trx_is_recv() */
4997 #include "que0que.h"
4998 #include "row0row.h"
4999 #include "srv0srv.h"
5000 #include "ibuf0ibuf.h"
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches