Merge lp:~percona-dev/percona-server/5.1.57-minimize_buf_pool_shm into lp:percona-server/5.1

Proposed by Yasufumi Kinoshita
Status: Merged
Approved by: Stewart Smith
Approved revision: no longer in the source branch.
Merged at revision: 235
Proposed branch: lp:~percona-dev/percona-server/5.1.57-minimize_buf_pool_shm
Merge into: lp:percona-server/5.1
Diff against target: 1236 lines (+30/-1087)
2 files modified
innodb_buffer_pool_shm.patch (+22/-1079)
innodb_fast_shutdown.patch (+8/-8)
To merge this branch: bzr merge lp:~percona-dev/percona-server/5.1.57-minimize_buf_pool_shm
Reviewer Review Type Date Requested Status
Stewart Smith Pending
Review via email: mp+65319@code.launchpad.net

Description of the change

invalidate the options added by innodb_buffer_pool_shm.patch
chunk size alignment is remained for compatibility

To post a comment you must log in.
Revision history for this message
Stewart Smith (stewart) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'innodb_buffer_pool_shm.patch'
--- innodb_buffer_pool_shm.patch 2011-05-10 07:31:20 +0000
+++ innodb_buffer_pool_shm.patch 2011-06-21 09:06:00 +0000
@@ -8,300 +8,20 @@
8diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c8diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
9--- a/storage/innodb_plugin/buf/buf0buf.c 2010-07-14 16:32:49.669501663 +09009--- a/storage/innodb_plugin/buf/buf0buf.c 2010-07-14 16:32:49.669501663 +0900
10+++ b/storage/innodb_plugin/buf/buf0buf.c 2010-07-14 16:40:16.149438645 +090010+++ b/storage/innodb_plugin/buf/buf0buf.c 2010-07-14 16:40:16.149438645 +0900
11@@ -53,6 +53,10 @@11@@ -769,10 +769,12 @@
12 #include "page0zip.h"
13 #include "trx0trx.h"
14 #include "srv0start.h"
15+#include "que0que.h"
16+#include "read0read.h"
17+#include "row0row.h"
18+#include "ha_prototypes.h"
19
20 /* prototypes for new functions added to ha_innodb.cc */
21 trx_t* innobase_get_trx();
22@@ -310,6 +314,30 @@
23 UNIV_INTERN ibool buf_debug_prints = FALSE;
24 #endif /* UNIV_DEBUG */
25
26+/* Buffer pool shared memory segment information */
27+typedef struct buf_shm_info_struct buf_shm_info_t;
28+
29+struct buf_shm_info_struct {
30+ char head_str[8];
31+ ulint binary_id;
32+ ibool is_new; /* during initializing */
33+ ibool clean; /* clean shutdowned and free */
34+ ibool reusable; /* reusable */
35+ ulint buf_pool_size; /* backup value */
36+ ulint page_size; /* backup value */
37+ ulint frame_offset; /* offset of the first frame based on chunk->mem */
38+ ulint zip_hash_offset;
39+ ulint zip_hash_n;
40+
41+ ulint checksum;
42+
43+ buf_pool_t buf_pool_backup;
44+ buf_chunk_t chunk_backup;
45+
46+ ib_uint64_t dummy;
47+};
48+
49+#define BUF_SHM_INFO_HEAD "XTRA_SHM"
50 #endif /* !UNIV_HOTBACKUP */
51
52 /********************************************************************//**
53@@ -756,6 +784,45 @@
54 #endif /* UNIV_SYNC_DEBUG */
55 }
56
57+static
58+void
59+buf_block_reuse(
60+/*============*/
61+ buf_block_t* block,
62+ ptrdiff_t frame_offset)
63+{
64+ /* block_init */
65+ block->frame += frame_offset;
66+
67+ UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
68+
69+ block->index = NULL;
70+
71+#ifdef UNIV_DEBUG
72+ /* recreate later */
73+ block->page.in_page_hash = FALSE;
74+ block->page.in_zip_hash = FALSE;
75+#endif /* UNIV_DEBUG */
76+
77+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
78+ block->n_pointers = 0;
79+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
80+
81+ if (block->page.zip.data)
82+ block->page.zip.data += frame_offset;
83+
84+ block->is_hashed = FALSE;
85+
86+ mutex_create(&block->mutex, SYNC_BUF_BLOCK);
87+
88+ rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
89+ ut_ad(rw_lock_validate(&(block->lock)));
90+
91+#ifdef UNIV_SYNC_DEBUG
92+ rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
93+#endif /* UNIV_SYNC_DEBUG */
94+}
95+
96 /********************************************************************//**
97 Allocates a chunk of buffer frames.
98 @return chunk, or NULL on failure */
99@@ -768,26 +835,190 @@
100 {
101 buf_block_t* block;12 buf_block_t* block;
102 byte* frame;13 byte* frame;
103+ ulint zip_hash_n = 0;
104+ ulint zip_hash_mem_size = 0;
105+ hash_table_t* zip_hash_tmp = NULL;
106 ulint i;14 ulint i;
107+ ulint size_target;15+ ulint size_target;
108+ buf_shm_info_t* shm_info = NULL;
109 16
110 /* Round down to a multiple of page size,17 /* Round down to a multiple of page size,
111 although it already should be. */18 although it already should be. */
112 mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);19 mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
113+ size_target = (mem_size / UNIV_PAGE_SIZE) - 1;20+ size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
114+
115+ srv_buffer_pool_shm_is_reused = FALSE;
116+
117+ if (srv_buffer_pool_shm_key) {
118+ /* zip_hash size */
119+ zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
120+ zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
121+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
122+ }
123+
124 /* Reserve space for the block descriptors. */21 /* Reserve space for the block descriptors. */
125 mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)22 mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
126 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);23 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
127+ if (srv_buffer_pool_shm_key) {24@@ -810,6 +812,10 @@
128+ mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
129+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
130+ mem_size += zip_hash_mem_size;
131+ }
132
133 chunk->mem_size = mem_size;
134+
135+ if (srv_buffer_pool_shm_key) {
136+ ulint binary_id;
137+ ibool is_new;
138+
139+ ut_a(buf_pool->n_chunks == 1);
140+
141+ fprintf(stderr,
142+ "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
143+ "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
144+ "InnoDB: * the mysqld executable between restarts of the server.\n"
145+ "InnoDB: * the value of innodb_buffer_pool_size.\n"
146+ "InnoDB: * the value of innodb_page_size.\n"
147+ "InnoDB: * datafiles created by InnoDB during this session.\n"
148+ "InnoDB: Otherwise, data corruption in datafiles may result.\n");
149+
150+ /* FIXME: This is vague id still */
151+ binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
152+ + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
153+ + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
154+ + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
155+ + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
156+ + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
157+ + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
158+ + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
159+ + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
160+ + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
161+ + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
162+
163+ chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
164+
165+ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
166+ return(NULL);
167+ }
168+init_again:
169+#ifdef UNIV_SET_MEM_TO_ZERO
170+ if (is_new) {
171+ memset(chunk->mem, '\0', chunk->mem_size);
172+ }
173+#endif
174+ /* for ut_fold_binary_32(), these values should be 32-bit aligned */
175+ ut_a(sizeof(buf_shm_info_t) % 4 == 0);
176+ ut_a((ulint)chunk->mem % 4 == 0);
177+ ut_a(chunk->mem_size % 4 == 0);
178+
179+ shm_info = chunk->mem;
180+
181+ zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
182+
183+ if (is_new) {
184+ strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
185+ shm_info->binary_id = binary_id;
186+ shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */
187+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
188+ shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */
189+ shm_info->buf_pool_size = srv_buf_pool_size;
190+ shm_info->page_size = srv_page_size;
191+ shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
192+ shm_info->zip_hash_n = zip_hash_n;
193+ } else {
194+ ulint checksum;
195+
196+ if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
197+ fprintf(stderr,
198+ "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
199+ return(NULL);
200+ }
201+ if (shm_info->binary_id != binary_id) {
202+ fprintf(stderr,
203+ "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
204+ return(NULL);
205+ }
206+ if (shm_info->is_new) {
207+ fprintf(stderr,
208+ "InnoDB: Error: The shared memory was not initialized yet.\n");
209+ return(NULL);
210+ }
211+ if (shm_info->buf_pool_size != srv_buf_pool_size) {
212+ fprintf(stderr,
213+ "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
214+ shm_info->buf_pool_size, srv_buf_pool_size);
215+ return(NULL);
216+ }
217+ if (shm_info->page_size != srv_page_size) {
218+ fprintf(stderr,
219+ "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
220+ shm_info->page_size, srv_page_size);
221+ return(NULL);
222+ }
223+ if (!shm_info->reusable) {
224+ fprintf(stderr,
225+ "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
226+ "InnoDB: The shared memory segment is initialized.\n");
227+ is_new = TRUE;
228+ goto init_again;
229+ }
230+ if (!shm_info->clean) {
231+ fprintf(stderr,
232+ "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
233+ "InnoDB: The shared memory segment is initialized.\n");
234+ is_new = TRUE;
235+ goto init_again;
236+ }
237+
238+ ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
239+ ut_a(shm_info->zip_hash_n == zip_hash_n);
240+
241+ /* check checksum */
242+ if (srv_buffer_pool_shm_checksum) {
243+ checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
244+ chunk->mem_size - sizeof(buf_shm_info_t));
245+ } else {
246+ checksum = BUF_NO_CHECKSUM_MAGIC;
247+ }
248+
249+ if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
250+ && shm_info->checksum != checksum) {
251+ fprintf(stderr,
252+ "InnoDB: Error: checksum of the shared memory is not match. "
253+ "(stored=%lu calculated=%lu)\n",
254+ shm_info->checksum, checksum);
255+ return(NULL);
256+ }
257+
258+ /* flag to use the segment. */
259+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
260+ }
261+
262+ /* init zip_hash contents */
263+ if (is_new) {
264+ hash_create_init(zip_hash_tmp, zip_hash_n);
265+ } else {
266+ /* adjust offset is done later */
267+ hash_create_reuse(zip_hash_tmp);
268+
269+ srv_buffer_pool_shm_is_reused = TRUE;
270+ }
271+ } else {
272 chunk->mem = os_mem_alloc_large(&chunk->mem_size);
273
274 if (UNIV_UNLIKELY(chunk->mem == NULL)) {
275
276 return(NULL);
277 }
278+ }
279
280 /* Allocate the block descriptors from
281 the start of the memory block. */
282+ if (srv_buffer_pool_shm_key) {
283+ chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
284+ } else {
285 chunk->blocks = chunk->mem;
286+ }
287
288 /* Align a pointer to the first frame. Note that when
289 os_large_page_size is smaller than UNIV_PAGE_SIZE,
290@@ -795,8 +1026,13 @@
291 it is bigger, we may allocate more blocks than requested. */
292
293 frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
294+ if (srv_buffer_pool_shm_key) {
295+ /* reserve zip_hash space and always -1 for reproductibity */
296+ chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
297+ } else {
298 chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
299 - (frame != chunk->mem);
300+ }
301
302 /* Subtract the space needed for block descriptors. */
303 {
304@@ -810,6 +1046,102 @@
305 chunk->size = size;25 chunk->size = size;
306 }26 }
307 27
@@ -309,404 +29,52 @@
309+ chunk->size = size_target;29+ chunk->size = size_target;
310+ }30+ }
311+31+
312+ if (shm_info && !(shm_info->is_new)) {
313+ /* convert the shared memory segment for reuse */
314+ ptrdiff_t phys_offset;
315+ ptrdiff_t logi_offset;
316+ ptrdiff_t blocks_offset;
317+ void* previous_frame_address;
318+
319+ if (chunk->size < shm_info->chunk_backup.size) {
320+ fprintf(stderr,
321+ "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
322+ "InnoDB: Retrying may avoid this situation.\n");
323+ shm_info->clean = TRUE; /* release the flag for retrying */
324+ return(NULL);
325+ }
326+
327+ chunk->size = shm_info->chunk_backup.size;
328+ phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
329+ logi_offset = frame - chunk->blocks[0].frame;
330+ previous_frame_address = chunk->blocks[0].frame;
331+ blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
332+
333+ if (phys_offset || logi_offset || blocks_offset) {
334+ fprintf(stderr,
335+ "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
336+ "InnoDB: Previous frames in address : %p\n"
337+ "InnoDB: Previous frames were located : %p\n"
338+ "InnoDB: Current frames should be located: %p\n"
339+ "InnoDB: Pysical offset : %ld (%#lx)\n"
340+ "InnoDB: Logical offset (frames) : %ld (%#lx)\n"
341+ "InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
342+ (byte*)chunk->mem + shm_info->frame_offset,
343+ chunk->blocks[0].frame, frame,
344+ phys_offset, phys_offset, logi_offset, logi_offset,
345+ blocks_offset, blocks_offset);
346+ } else {
347+ fprintf(stderr,
348+ "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
349+ }
350+
351+ if (phys_offset) {
352+ fprintf(stderr,
353+ "InnoDB: Aligning physical offset...");
354+
355+ memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
356+ chunk->size * UNIV_PAGE_SIZE);
357+
358+ fprintf(stderr,
359+ " Done.\n");
360+ }
361+
362+ /* buf_block_t */
363+ block = chunk->blocks;
364+ for (i = chunk->size; i--; ) {
365+ buf_block_reuse(block, logi_offset);
366+ block++;
367+ }
368+
369+ if (logi_offset || blocks_offset) {
370+ fprintf(stderr,
371+ "InnoDB: Aligning logical offset...");
372+
373+
374+ /* buf_pool_t buf_pool_backup */
375+ UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
376+ previous_frame_address, logi_offset, blocks_offset);
377+ UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
378+ previous_frame_address, logi_offset, blocks_offset);
379+ UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
380+ previous_frame_address, logi_offset, blocks_offset);
381+ if (shm_info->buf_pool_backup.LRU_old)
382+ shm_info->buf_pool_backup.LRU_old =
383+ (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
384+ + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
385+ ? logi_offset : blocks_offset));
386+
387+ UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
388+ previous_frame_address, logi_offset, blocks_offset);
389+
390+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
391+ previous_frame_address, logi_offset, blocks_offset);
392+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
393+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
394+ previous_frame_address, logi_offset, blocks_offset);
395+ }
396+
397+ HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
398+ previous_frame_address, logi_offset, blocks_offset);
399+
400+ fprintf(stderr,
401+ " Done.\n");
402+ }
403+ } else {
404 /* Init block structs and assign frames for them. Then we32 /* Init block structs and assign frames for them. Then we
405 assign the frames to the first blocks (we already mapped the33 assign the frames to the first blocks (we already mapped the
406 memory above). */34 memory above). */
407@@ -833,6 +1165,11 @@
408 block++;
409 frame += UNIV_PAGE_SIZE;
410 }
411+ }
412+
413+ if (shm_info) {
414+ shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
415+ }
416
417 return(chunk);
418 }
419@@ -1014,6 +1351,8 @@
420 UNIV_MEM_UNDESC(block);
421 }
422
423+ ut_a(!srv_buffer_pool_shm_key);
424+
425 os_mem_free_large(chunk->mem, chunk->mem_size);
426 }
427
428@@ -1063,7 +1402,10 @@
429 srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
430
431 buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
432+ /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
433+ if (!srv_buffer_pool_shm_key) {
434 buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
435+ }
436
437 buf_pool->last_printout_time = time(NULL);
438
439@@ -1078,6 +1420,86 @@
440 --------------------------- */
441 /* All fields are initialized by mem_zalloc(). */
442
443+ if (srv_buffer_pool_shm_key) {
444+ buf_shm_info_t* shm_info;
445+
446+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
447+ shm_info = chunk->mem;
448+
449+ buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
450+
451+ if(shm_info->is_new) {
452+ shm_info->is_new = FALSE; /* initialization was finished */
453+ } else {
454+ buf_block_t* block = chunk->blocks;
455+ buf_page_t* b;
456+
457+ /* shm_info->buf_pool_backup should be converted */
458+ /* at buf_chunk_init(). So copy simply. */
459+ buf_pool->flush_list = shm_info->buf_pool_backup.flush_list;
460+ buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock;
461+ buf_pool->free = shm_info->buf_pool_backup.free;
462+ buf_pool->LRU = shm_info->buf_pool_backup.LRU;
463+ buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old;
464+ buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len;
465+ buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU;
466+ buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean;
467+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
468+ buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i];
469+ }
470+
471+ for (i = 0; i < chunk->size; i++, block++) {
472+ if (buf_block_get_state(block)
473+ == BUF_BLOCK_FILE_PAGE) {
474+ ut_d(block->page.in_page_hash = TRUE);
475+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
476+ buf_page_address_fold(
477+ block->page.space,
478+ block->page.offset),
479+ &block->page);
480+ }
481+ }
482+
483+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
484+ b = UT_LIST_GET_NEXT(zip_list, b)) {
485+ ut_ad(!b->in_flush_list);
486+ ut_ad(b->in_LRU_list);
487+
488+ ut_d(b->in_page_hash = TRUE);
489+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
490+ buf_page_address_fold(b->space, b->offset), b);
491+ }
492+
493+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
494+ b = UT_LIST_GET_NEXT(flush_list, b)) {
495+ ut_ad(b->in_flush_list);
496+ ut_ad(b->in_LRU_list);
497+
498+ switch (buf_page_get_state(b)) {
499+ case BUF_BLOCK_ZIP_DIRTY:
500+ ut_d(b->in_page_hash = TRUE);
501+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
502+ buf_page_address_fold(b->space,
503+ b->offset), b);
504+ break;
505+ case BUF_BLOCK_FILE_PAGE:
506+ /* uncompressed page */
507+ break;
508+ case BUF_BLOCK_ZIP_FREE:
509+ case BUF_BLOCK_ZIP_PAGE:
510+ case BUF_BLOCK_NOT_USED:
511+ case BUF_BLOCK_READY_FOR_USE:
512+ case BUF_BLOCK_MEMORY:
513+ case BUF_BLOCK_REMOVE_HASH:
514+ ut_error;
515+ break;
516+ }
517+ }
518+
519+
520+ }
521+ }
522+
523 mutex_exit(&LRU_list_mutex);
524 rw_lock_x_unlock(&page_hash_latch);
525 buf_pool_mutex_exit();
526@@ -1102,6 +1524,34 @@
527 buf_chunk_t* chunk;
528 buf_chunk_t* chunks;
529
530+ if (srv_buffer_pool_shm_key) {
531+ buf_shm_info_t* shm_info;
532+
533+ ut_a(buf_pool->n_chunks == 1);
534+
535+ chunk = buf_pool->chunks;
536+ shm_info = chunk->mem;
537+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
538+
539+ /* validation the shared memory segment doesn't have unrecoverable contents. */
540+ /* Currently, validation became not needed */
541+ shm_info->reusable = TRUE;
542+
543+ memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
544+ memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
545+
546+ if (srv_fast_shutdown < 2) {
547+ if (srv_buffer_pool_shm_checksum) {
548+ shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
549+ chunk->mem_size - sizeof(buf_shm_info_t));
550+ } else {
551+ shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
552+ }
553+ shm_info->clean = TRUE;
554+ }
555+
556+ os_shm_free(chunk->mem, chunk->mem_size);
557+ } else {
558 chunks = buf_pool->chunks;
559 chunk = chunks + buf_pool->n_chunks;
560
561@@ -1110,10 +1560,13 @@
562 would fail at shutdown. */
563 os_mem_free_large(chunk->mem, chunk->mem_size);
564 }
565+ }
566
567 mem_free(buf_pool->chunks);
568 hash_table_free(buf_pool->page_hash);
569+ if (!srv_buffer_pool_shm_key) {
570 hash_table_free(buf_pool->zip_hash);
571+ }
572 mem_free(buf_pool);
573 buf_pool = NULL;
574 }
575@@ -1308,6 +1761,11 @@
576 //buf_pool_mutex_enter();
577 mutex_enter(&LRU_list_mutex);
578
579+ if (srv_buffer_pool_shm_key) {
580+ /* Cannot support shrink */
581+ goto func_done;
582+ }
583+
584 shrink_again:
585 if (buf_pool->n_chunks <= 1) {
586
587@@ -1551,6 +2009,11 @@
588 buf_pool_resize(void)
589 /*=================*/
590 {
591+ if (srv_buffer_pool_shm_key) {
592+ /* Cannot support resize */
593+ return;
594+ }
595+
596 //buf_pool_mutex_enter();
597 mutex_enter(&LRU_list_mutex);
598
599diff -ruN a/storage/innodb_plugin/ha/hash0hash.c b/storage/innodb_plugin/ha/hash0hash.c
600--- a/storage/innodb_plugin/ha/hash0hash.c 2010-06-04 00:49:59.000000000 +0900
601+++ b/storage/innodb_plugin/ha/hash0hash.c 2010-07-14 16:40:16.150438366 +0900
602@@ -128,6 +128,70 @@
603 }
604
605 /*************************************************************//**
606+*/
607+UNIV_INTERN
608+ulint
609+hash_create_needed(
610+/*===============*/
611+ ulint n)
612+{
613+ ulint prime;
614+ ulint offset;
615+
616+ prime = ut_find_prime(n);
617+
618+ offset = (sizeof(hash_table_t) + 7) / 8;
619+ offset *= 8;
620+
621+ return(offset + sizeof(hash_cell_t) * prime);
622+}
623+
624+UNIV_INTERN
625+void
626+hash_create_init(
627+/*=============*/
628+ hash_table_t* table,
629+ ulint n)
630+{
631+ ulint prime;
632+ ulint offset;
633+
634+ prime = ut_find_prime(n);
635+
636+ offset = (sizeof(hash_table_t) + 7) / 8;
637+ offset *= 8;
638+
639+ table->array = (hash_cell_t*)(((byte*)table) + offset);
640+ table->n_cells = prime;
641+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
642+ table->adaptive = FALSE;
643+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
644+ table->n_mutexes = 0;
645+ table->mutexes = NULL;
646+ table->heaps = NULL;
647+ table->heap = NULL;
648+ ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
649+
650+ /* Initialize the cell array */
651+ hash_table_clear(table);
652+}
653+
654+UNIV_INTERN
655+void
656+hash_create_reuse(
657+/*==============*/
658+ hash_table_t* table)
659+{
660+ ulint offset;
661+
662+ offset = (sizeof(hash_table_t) + 7) / 8;
663+ offset *= 8;
664+
665+ table->array = (hash_cell_t*)(((byte*)table) + offset);
666+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
667+}
668+
669+/*************************************************************//**
670 Frees a hash table. */
671 UNIV_INTERN
672 void
673diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc35diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
674--- a/storage/innodb_plugin/handler/ha_innodb.cc 2010-07-14 16:34:18.597725479 +090036--- a/storage/innodb_plugin/handler/ha_innodb.cc 2010-07-14 16:34:18.597725479 +0900
675+++ b/storage/innodb_plugin/handler/ha_innodb.cc 2010-07-14 16:40:16.159323612 +090037+++ b/storage/innodb_plugin/handler/ha_innodb.cc 2010-07-14 16:40:16.159323612 +0900
676@@ -198,6 +198,7 @@38@@ -198,6 +198,8 @@
677 static my_bool innobase_create_status_file = FALSE;39 static my_bool innobase_create_status_file = FALSE;
678 static my_bool innobase_stats_on_metadata = TRUE;40 static my_bool innobase_stats_on_metadata = TRUE;
679 static my_bool innobase_use_sys_stats_table = FALSE;41 static my_bool innobase_use_sys_stats_table = FALSE;
680+static my_bool innobase_buffer_pool_shm_checksum = TRUE;42+static my_bool innobase_buffer_pool_shm_checksum = TRUE;
43+static uint innobase_buffer_pool_shm_key = 0;
681 44
682 static char* internal_innobase_data_file_path = NULL;45 static char* internal_innobase_data_file_path = NULL;
683 46
684@@ -2476,6 +2477,7 @@47@@ -2460,6 +2462,12 @@
685 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;48
686 srv_use_checksums = (ibool) innobase_use_checksums;49 srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
687 srv_fast_checksum = (ibool) innobase_fast_checksum;50
688+ srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;51+ if (innobase_buffer_pool_shm_key) {
689 52+ fprintf(stderr,
690 #ifdef HAVE_LARGE_PAGES53+ "InnoDB: Warning: innodb_buffer_pool_shm_key is deprecated function.\n"
691 if ((os_use_large_pages = (ibool) my_use_large_pages))54+ "InnoDB: innodb_buffer_pool_shm_key was ignored.\n");
692@@ -11476,6 +11478,16 @@55+ }
56+
57 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
58
59 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
60@@ -11476,6 +11484,16 @@
693 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",61 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
694 NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);62 NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
695 63
696+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,64+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, innobase_buffer_pool_shm_key,
697+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,65+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
698+ "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",66+ "[Deprecated option] no effect",
699+ NULL, NULL, 0, 0, INT_MAX32, 0);67+ NULL, NULL, 0, 0, INT_MAX32, 0);
700+68+
701+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,69+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
702+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,70+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
703+ "Enable buffer_pool_shm checksum validation (enabled by default).",71+ "[Deprecated option] no effect",
704+ NULL, NULL, TRUE);72+ NULL, NULL, TRUE);
705+73+
706 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,74 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
707 PLUGIN_VAR_RQCMDARG,75 PLUGIN_VAR_RQCMDARG,
708 "Helps in performance tuning in heavily concurrent environments.",76 "Helps in performance tuning in heavily concurrent environments.",
709@@ -11764,6 +11776,8 @@77@@ -11764,6 +11782,8 @@
710 MYSQL_SYSVAR(additional_mem_pool_size),78 MYSQL_SYSVAR(additional_mem_pool_size),
711 MYSQL_SYSVAR(autoextend_increment),79 MYSQL_SYSVAR(autoextend_increment),
712 MYSQL_SYSVAR(buffer_pool_size),80 MYSQL_SYSVAR(buffer_pool_size),
@@ -715,428 +83,3 @@
715 MYSQL_SYSVAR(checksums),83 MYSQL_SYSVAR(checksums),
716 MYSQL_SYSVAR(fast_checksum),84 MYSQL_SYSVAR(fast_checksum),
717 MYSQL_SYSVAR(commit_concurrency),85 MYSQL_SYSVAR(commit_concurrency),
718diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
719--- a/storage/innodb_plugin/handler/innodb_patch_info.h 2010-07-14 16:34:18.603733950 +0900
720+++ b/storage/innodb_plugin/handler/innodb_patch_info.h 2010-07-14 16:40:16.164323927 +0900
721@@ -47,5 +47,6 @@
722 {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
723 {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
724 {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"},
725+{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"},
726 {NULL, NULL, NULL, NULL}
727 };
728diff -ruN a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
729--- a/storage/innodb_plugin/include/buf0buf.h 2010-07-14 16:33:23.823323393 +0900
730+++ b/storage/innodb_plugin/include/buf0buf.h 2010-07-14 16:40:16.166323436 +0900
731@@ -36,6 +36,7 @@
732 #include "ut0rbt.h"
733 #ifndef UNIV_HOTBACKUP
734 #include "os0proc.h"
735+#include "srv0srv.h"
736
737 /** @name Modes for buf_page_get_gen */
738 /* @{ */
739@@ -1302,7 +1303,10 @@
740 /**********************************************************************//**
741 Compute the hash fold value for blocks in buf_pool->zip_hash. */
742 /* @{ */
743-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
744+/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
745+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
746+ ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
747+ :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
748 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
749 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
750 /* @} */
751diff -ruN a/storage/innodb_plugin/include/hash0hash.h b/storage/innodb_plugin/include/hash0hash.h
752--- a/storage/innodb_plugin/include/hash0hash.h 2010-06-04 00:49:59.000000000 +0900
753+++ b/storage/innodb_plugin/include/hash0hash.h 2010-07-14 16:40:16.168323262 +0900
754@@ -49,6 +49,28 @@
755 hash_create(
756 /*========*/
757 ulint n); /*!< in: number of array cells */
758+
759+/*************************************************************//**
760+*/
761+UNIV_INTERN
762+ulint
763+hash_create_needed(
764+/*===============*/
765+ ulint n);
766+
767+UNIV_INTERN
768+void
769+hash_create_init(
770+/*=============*/
771+ hash_table_t* table,
772+ ulint n);
773+
774+UNIV_INTERN
775+void
776+hash_create_reuse(
777+/*==============*/
778+ hash_table_t* table);
779+
780 #ifndef UNIV_HOTBACKUP
781 /*************************************************************//**
782 Creates a mutex array to protect a hash table. */
783@@ -327,6 +349,33 @@
784 }\
785 }\
786 } while (0)
787+
788+/********************************************************************//**
789+Align nodes with moving location.*/
790+#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
791+do {\
792+ ulint i2222;\
793+ ulint cell_count2222;\
794+\
795+ cell_count2222 = hash_get_n_cells(TABLE);\
796+\
797+ for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
798+ NODE_TYPE* node2222;\
799+\
800+ if ((TABLE)->array[i2222].node) \
801+ (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
802+ + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
803+ node2222 = HASH_GET_FIRST((TABLE), i2222);\
804+\
805+ while (node2222) {\
806+ if (node2222->PTR_NAME) \
807+ node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
808+ + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
809+\
810+ node2222 = node2222->PTR_NAME;\
811+ }\
812+ }\
813+} while (0)
814
815 /************************************************************//**
816 Gets the mutex index for a fold value in a hash table.
817diff -ruN a/storage/innodb_plugin/include/os0proc.h b/storage/innodb_plugin/include/os0proc.h
818--- a/storage/innodb_plugin/include/os0proc.h 2010-06-04 00:49:59.000000000 +0900
819+++ b/storage/innodb_plugin/include/os0proc.h 2010-07-14 16:40:16.169321536 +0900
820@@ -32,6 +32,11 @@
821 #ifdef UNIV_LINUX
822 #include <sys/ipc.h>
823 #include <sys/shm.h>
824+#else
825+# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
826+#include <sys/ipc.h>
827+#include <sys/shm.h>
828+# endif
829 #endif
830
831 typedef void* os_process_t;
832@@ -70,6 +75,29 @@
833 ulint size); /*!< in: size returned by
834 os_mem_alloc_large() */
835
836+
837+/****************************************************************//**
838+Allocates or attaches and reuses shared memory segment.
839+The content is not cleared automatically.
840+@return allocated memory */
841+UNIV_INTERN
842+void*
843+os_shm_alloc(
844+/*=========*/
845+ ulint* n, /*!< in/out: number of bytes */
846+ uint key,
847+ ibool* is_new);
848+
849+/****************************************************************//**
850+Detach shared memory segment. */
851+UNIV_INTERN
852+void
853+os_shm_free(
854+/*========*/
855+ void *ptr, /*!< in: pointer returned by
856+ os_shm_alloc() */
857+ ulint size); /*!< in: size returned by
858+ os_shm_alloc() */
859 #ifndef UNIV_NONINL
860 #include "os0proc.ic"
861 #endif
862diff -ruN a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
863--- a/storage/innodb_plugin/include/srv0srv.h 2010-07-14 16:32:49.695323045 +0900
864+++ b/storage/innodb_plugin/include/srv0srv.h 2010-07-14 16:40:16.171325784 +0900
865@@ -156,6 +156,10 @@
866 extern ulint srv_mem_pool_size;
867 extern ulint srv_lock_table_size;
868
869+extern uint srv_buffer_pool_shm_key;
870+extern ibool srv_buffer_pool_shm_is_reused;
871+extern ibool srv_buffer_pool_shm_checksum;
872+
873 extern ibool srv_thread_concurrency_timer_based;
874
875 extern ulint srv_n_file_io_threads;
876diff -ruN a/storage/innodb_plugin/include/ut0lst.h b/storage/innodb_plugin/include/ut0lst.h
877--- a/storage/innodb_plugin/include/ut0lst.h 2010-06-04 00:49:59.000000000 +0900
878+++ b/storage/innodb_plugin/include/ut0lst.h 2010-07-14 16:40:16.172321547 +0900
879@@ -257,5 +257,48 @@
880 ut_a(ut_list_node_313 == NULL); \
881 } while (0)
882
883+/********************************************************************//**
884+Align nodes with moving location.
885+@param NAME the name of the list
886+@param TYPE node type
887+@param BASE base node (not a pointer to it)
888+@param OFFSET offset moved */
889+#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \
890+do { \
891+ ulint ut_list_i_313; \
892+ TYPE* ut_list_node_313; \
893+ \
894+ if ((BASE).start) \
895+ (BASE).start = (void*)((byte*)((BASE).start) \
896+ + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
897+ if ((BASE).end) \
898+ (BASE).end = (void*)((byte*)((BASE).end) \
899+ + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
900+ \
901+ ut_list_node_313 = (BASE).start; \
902+ \
903+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
904+ ut_a(ut_list_node_313); \
905+ if ((ut_list_node_313->NAME).prev) \
906+ (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
907+ + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
908+ if ((ut_list_node_313->NAME).next) \
909+ (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
910+ + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
911+ ut_list_node_313 = (ut_list_node_313->NAME).next; \
912+ } \
913+ \
914+ ut_a(ut_list_node_313 == NULL); \
915+ \
916+ ut_list_node_313 = (BASE).end; \
917+ \
918+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
919+ ut_a(ut_list_node_313); \
920+ ut_list_node_313 = (ut_list_node_313->NAME).prev; \
921+ } \
922+ \
923+ ut_a(ut_list_node_313 == NULL); \
924+} while (0)
925+
926 #endif
927
928diff -ruN a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c
929--- a/storage/innodb_plugin/log/log0recv.c 2010-10-01 15:25:27.106299166 +0900
930+++ b/storage/innodb_plugin/log/log0recv.c 2010-10-01 15:26:33.689261436 +0900
931@@ -2899,6 +2899,7 @@
932 /*==========================*/
933 {
934 ut_a(!recv_needed_recovery);
935+ ut_a(!srv_buffer_pool_shm_is_reused);
936
937 recv_needed_recovery = TRUE;
938
939diff -ruN a/storage/innodb_plugin/os/os0proc.c b/storage/innodb_plugin/os/os0proc.c
940--- a/storage/innodb_plugin/os/os0proc.c 2010-06-04 00:49:59.000000000 +0900
941+++ b/storage/innodb_plugin/os/os0proc.c 2010-07-14 16:40:16.174322953 +0900
942@@ -229,3 +229,173 @@
943 }
944 #endif
945 }
946+
947+/****************************************************************//**
948+Allocates or attaches and reuses shared memory segment.
949+The content is not cleared automatically.
950+@return allocated memory */
951+UNIV_INTERN
952+void*
953+os_shm_alloc(
954+/*=========*/
955+ ulint* n, /*!< in/out: number of bytes */
956+ uint key,
957+ ibool* is_new)
958+{
959+ void* ptr;
960+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
961+ ulint size;
962+ int shmid;
963+
964+ *is_new = FALSE;
965+ fprintf(stderr,
966+ "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n",
967+ key, key);
968+# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
969+ if (!os_use_large_pages || !os_large_page_size) {
970+ goto skip;
971+ }
972+
973+ /* Align block size to os_large_page_size */
974+ ut_ad(ut_is_2pow(os_large_page_size));
975+ size = ut_2pow_round(*n + (os_large_page_size - 1),
976+ os_large_page_size);
977+
978+ shmid = shmget((key_t)key, (size_t)size,
979+ IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
980+ if (shmid < 0) {
981+ if (errno == EEXIST) {
982+ fprintf(stderr,
983+ "InnoDB: HugeTLB: The shared memory segment exists.\n");
984+ shmid = shmget((key_t)key, (size_t)size,
985+ SHM_HUGETLB | SHM_R | SHM_W);
986+ if (shmid < 0) {
987+ fprintf(stderr,
988+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
989+ size, errno);
990+ goto skip;
991+ } else {
992+ fprintf(stderr,
993+ "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
994+ }
995+ } else {
996+ fprintf(stderr,
997+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
998+ size, errno);
999+ goto skip;
1000+ }
1001+ } else {
1002+ *is_new = TRUE;
1003+ fprintf(stderr,
1004+ "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
1005+ }
1006+
1007+ ptr = shmat(shmid, NULL, 0);
1008+ if (ptr == (void *)-1) {
1009+ fprintf(stderr,
1010+ "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1011+ errno);
1012+ ptr = NULL;
1013+ }
1014+
1015+ if (ptr) {
1016+ *n = size;
1017+ os_fast_mutex_lock(&ut_list_mutex);
1018+ ut_total_allocated_memory += size;
1019+ os_fast_mutex_unlock(&ut_list_mutex);
1020+ UNIV_MEM_ALLOC(ptr, size);
1021+ return(ptr);
1022+ }
1023+skip:
1024+ *is_new = FALSE;
1025+# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1026+# ifdef HAVE_GETPAGESIZE
1027+ size = getpagesize();
1028+# else
1029+ size = UNIV_PAGE_SIZE;
1030+# endif
1031+ /* Align block size to system page size */
1032+ ut_ad(ut_is_2pow(size));
1033+ size = *n = ut_2pow_round(*n + (size - 1), size);
1034+
1035+ shmid = shmget((key_t)key, (size_t)size,
1036+ IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1037+ if (shmid < 0) {
1038+ if (errno == EEXIST) {
1039+ fprintf(stderr,
1040+ "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
1041+ shmid = shmget((key_t)key, (size_t)size,
1042+ SHM_R | SHM_W);
1043+ if (shmid < 0) {
1044+ fprintf(stderr,
1045+ "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1046+ size, errno);
1047+ ptr = NULL;
1048+ goto end;
1049+ } else {
1050+ fprintf(stderr,
1051+ "InnoDB: The existent shared memory segment is used.\n");
1052+ }
1053+ } else {
1054+ fprintf(stderr,
1055+ "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1056+ size, errno);
1057+ ptr = NULL;
1058+ goto end;
1059+ }
1060+ } else {
1061+ *is_new = TRUE;
1062+ fprintf(stderr,
1063+ "InnoDB: A new shared memory segment has been created.\n");
1064+ }
1065+
1066+ ptr = shmat(shmid, NULL, 0);
1067+ if (ptr == (void *)-1) {
1068+ fprintf(stderr,
1069+ "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1070+ errno);
1071+ ptr = NULL;
1072+ }
1073+
1074+ if (ptr) {
1075+ *n = size;
1076+ os_fast_mutex_lock(&ut_list_mutex);
1077+ ut_total_allocated_memory += size;
1078+ os_fast_mutex_unlock(&ut_list_mutex);
1079+ UNIV_MEM_ALLOC(ptr, size);
1080+ }
1081+end:
1082+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1083+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1084+ ptr = NULL;
1085+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1086+ return(ptr);
1087+}
1088+
1089+/****************************************************************//**
1090+Detach shared memory segment. */
1091+UNIV_INTERN
1092+void
1093+os_shm_free(
1094+/*========*/
1095+ void *ptr, /*!< in: pointer returned by
1096+ os_shm_alloc() */
1097+ ulint size) /*!< in: size returned by
1098+ os_shm_alloc() */
1099+{
1100+ os_fast_mutex_lock(&ut_list_mutex);
1101+ ut_a(ut_total_allocated_memory >= size);
1102+ os_fast_mutex_unlock(&ut_list_mutex);
1103+
1104+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1105+ if (!shmdt(ptr)) {
1106+ os_fast_mutex_lock(&ut_list_mutex);
1107+ ut_a(ut_total_allocated_memory >= size);
1108+ ut_total_allocated_memory -= size;
1109+ os_fast_mutex_unlock(&ut_list_mutex);
1110+ UNIV_MEM_FREE(ptr, size);
1111+ }
1112+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1113+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1114+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1115+}
1116diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
1117--- a/storage/innodb_plugin/srv/srv0srv.c 2010-07-14 16:33:23.848391648 +0900
1118+++ b/storage/innodb_plugin/srv/srv0srv.c 2010-07-14 16:40:16.177323553 +0900
1119@@ -211,6 +211,11 @@
1120 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
1121 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
1122
1123+/* key value for shm */
1124+UNIV_INTERN uint srv_buffer_pool_shm_key = 0;
1125+UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE;
1126+UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE;
1127+
1128 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1129 instead. */
1130 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
1131diff -ruN a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
1132--- a/storage/innodb_plugin/srv/srv0start.c 2010-07-14 16:33:23.851391514 +0900
1133+++ b/storage/innodb_plugin/srv/srv0start.c 2010-07-14 16:40:16.180321173 +0900
1134@@ -1750,6 +1750,8 @@
1135 Note that this is not as heavy weight as it seems. At
1136 this point there will be only ONE page in the buf_LRU
1137 and there must be no page in the buf_flush list. */
1138+ /* buffer_pool_shm should not be reused when recovery was needed. */
1139+ if (!srv_buffer_pool_shm_is_reused)
1140 buf_pool_invalidate();
1141
1142 /* We always try to do a recovery, even if the database had
114386
=== modified file 'innodb_fast_shutdown.patch'
--- innodb_fast_shutdown.patch 2011-05-12 11:00:37 +0000
+++ innodb_fast_shutdown.patch 2011-06-21 09:06:00 +0000
@@ -165,7 +165,7 @@
165diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c165diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
166--- a/storage/innodb_plugin/srv/srv0srv.c 2010-11-16 21:33:00.000000000 +0300166--- a/storage/innodb_plugin/srv/srv0srv.c 2010-11-16 21:33:00.000000000 +0300
167+++ b/storage/innodb_plugin/srv/srv0srv.c 2010-11-16 21:34:06.000000000 +0300167+++ b/storage/innodb_plugin/srv/srv0srv.c 2010-11-16 21:34:06.000000000 +0300
168@@ -713,6 +713,8 @@168@@ -708,6 +708,8 @@
169 169
170 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;170 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
171 171
@@ -174,7 +174,7 @@
174 UNIV_INTERN srv_sys_t* srv_sys = NULL;174 UNIV_INTERN srv_sys_t* srv_sys = NULL;
175 175
176 /* padding to prevent other memory update hotspots from residing on176 /* padding to prevent other memory update hotspots from residing on
177@@ -1018,6 +1020,7 @@177@@ -1013,6 +1015,7 @@
178 }178 }
179 179
180 srv_lock_timeout_thread_event = os_event_create(NULL);180 srv_lock_timeout_thread_event = os_event_create(NULL);
@@ -182,7 +182,7 @@
182 182
183 for (i = 0; i < SRV_MASTER + 1; i++) {183 for (i = 0; i < SRV_MASTER + 1; i++) {
184 srv_n_threads_active[i] = 0;184 srv_n_threads_active[i] = 0;
185@@ -2245,7 +2248,7 @@185@@ -2240,7 +2243,7 @@
186 /* Wake up every 5 seconds to see if we need to print186 /* Wake up every 5 seconds to see if we need to print
187 monitor information. */187 monitor information. */
188 188
@@ -191,7 +191,7 @@
191 191
192 current_time = time(NULL);192 current_time = time(NULL);
193 193
194@@ -2387,7 +2390,7 @@194@@ -2382,7 +2385,7 @@
195 /* When someone is waiting for a lock, we wake up every second195 /* When someone is waiting for a lock, we wake up every second
196 and check if a timeout has passed for a lock wait */196 and check if a timeout has passed for a lock wait */
197 197
@@ -200,7 +200,7 @@
200 200
201 srv_lock_timeout_active = TRUE;201 srv_lock_timeout_active = TRUE;
202 202
203@@ -2561,7 +2564,7 @@203@@ -2556,7 +2559,7 @@
204 204
205 fflush(stderr);205 fflush(stderr);
206 206
@@ -209,7 +209,7 @@
209 209
210 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {210 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
211 211
212@@ -2605,7 +2608,7 @@212@@ -2600,7 +2603,7 @@
213 last_dump_time = time(NULL);213 last_dump_time = time(NULL);
214 214
215 loop:215 loop:
@@ -218,7 +218,7 @@
218 218
219 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {219 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
220 goto exit_func;220 goto exit_func;
221@@ -2788,7 +2791,7 @@221@@ -2783,7 +2786,7 @@
222 if (!skip_sleep) {222 if (!skip_sleep) {
223 if (next_itr_time > cur_time) {223 if (next_itr_time > cur_time) {
224 224
@@ -227,7 +227,7 @@
227 srv_main_sleeps++;227 srv_main_sleeps++;
228 228
229 /*229 /*
230@@ -3495,9 +3498,10 @@230@@ -3490,9 +3493,10 @@
231 mutex_exit(&kernel_mutex);231 mutex_exit(&kernel_mutex);
232 232
233 sleep_ms = 10;233 sleep_ms = 10;

Subscribers

People subscribed via source and target branches