Merge lp:~stewart/drizzle/bug646898-functionengine-mem-usage-position into lp:drizzle/7.0

Proposed by Stewart Smith
Status: Merged
Approved by: Brian Aker
Approved revision: 2189
Merged at revision: 2196
Proposed branch: lp:~stewart/drizzle/bug646898-functionengine-mem-usage-position
Merge into: lp:drizzle/7.0
Diff against target: 137 lines (+68/-11)
2 files modified
plugin/function_engine/cursor.cc (+62/-9)
plugin/function_engine/cursor.h (+6/-2)
To merge this branch: bzr merge lp:~stewart/drizzle/bug646898-functionengine-mem-usage-position
Reviewer Review Type Date Requested Status
Drizzle Developers Pending
Review via email: mp+50845@code.launchpad.net

Description of the change

greatly reduce the memory usage of function_engine tables when ::position() is used.
by storing packed rows instead of the full row image, we can decrease memory usage by more than a order of magnitude in some situations.

  Replace FunctionCursor row_cache of full rows with one that is the
  packed row (using similar routines as ARHCIVE does before
  compression). This means we use minimal memory for each row. The ref
  for ::position() is now a offset into the row_cache buffer instead of
  an incrementing index into an array.

  This change GREATLY reduces the memory required for various queries on
  table function tables.

  e.g. (measured using valgrind --tool=massif) running the
  information_schema_dictionary test suite
  BEFORE: peaked at 77.5MB heap usage
  AFTER: peaked at 33.7MB heap usage

  for schema_dictionary suite:
  BEFORE: peaked at 782.6MB heap usage
  AFTER: peaked at 31.05MB heap usage

  (i.e. memory requirements can be reduced by moer than an order of
  magnitude)

  A future patch may want to also overflow to disk if needed.

http://hudson.drizzle.org/view/Drizzle-param/job/drizzle-param/753/

To post a comment you must log in.
Revision history for this message
Brian Aker (brianaker) wrote :

Thanks, in general I think we need to find a generic solution to this problem at some point.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'plugin/function_engine/cursor.cc'
2--- plugin/function_engine/cursor.cc 2011-02-17 00:14:13 +0000
3+++ plugin/function_engine/cursor.cc 2011-02-23 01:11:55 +0000
4@@ -23,6 +23,7 @@
5 #include <plugin/function_engine/cursor.h>
6 #include <drizzled/session.h>
7 #include <drizzled/internal/my_sys.h>
8+#include <drizzled/field/blob.h>
9
10 #include <unistd.h>
11 #include <fcntl.h>
12@@ -48,7 +49,7 @@
13 tool= static_cast<Function *>(getEngine())->getFunction(name);
14 // assert(tool);
15
16- record_id= 0;
17+ row_cache_position= 0;
18
19 if (not tool)
20 return HA_ERR_NO_SUCH_TABLE;
21@@ -100,15 +101,55 @@
22 return more_rows ? 0 : HA_ERR_END_OF_FILE;
23 }
24
25+uint32_t FunctionCursor::max_row_length()
26+{
27+ uint32_t length= (uint32_t)(getTable()->getRecordLength() + getTable()->sizeFields()*2);
28+
29+ uint32_t *ptr, *end;
30+ for (ptr= getTable()->getBlobField(), end=ptr + getTable()->sizeBlobFields();
31+ ptr != end ;
32+ ptr++)
33+ {
34+ length += 2 + ((Field_blob*)getTable()->getField(*ptr))->get_length();
35+ }
36+
37+ return length;
38+}
39+
40+unsigned int FunctionCursor::pack_row(const unsigned char *record)
41+{
42+ unsigned char *ptr;
43+
44+ record_buffer.resize(max_row_length());
45+
46+ /* Copy null bits */
47+ memcpy(&record_buffer[0], record, getTable()->getShare()->null_bytes);
48+ ptr= &record_buffer[0] + getTable()->getShare()->null_bytes;
49+
50+ for (Field **field=getTable()->getFields() ; *field ; field++)
51+ {
52+ if (!((*field)->is_null()))
53+ ptr= (*field)->pack(ptr, record + (*field)->offset(record));
54+ }
55+
56+ return((unsigned int) (ptr - &record_buffer[0]));
57+}
58+
59 void FunctionCursor::position(const unsigned char *record)
60 {
61- if (row_cache.size() <= record_id * getTable()->getShare()->getRecordLength())
62+ uint32_t max_length= max_row_length();
63+
64+ if (row_cache.size() <= row_cache_position + max_length)
65 {
66- row_cache.resize(row_cache.size() + getTable()->getShare()->getRecordLength() * 100); // Hardwired at adding an additional 100 rows of storage
67+ row_cache.resize(row_cache.size() + max_length);
68 }
69- memcpy(&row_cache[record_id * getTable()->getShare()->getRecordLength()], record, getTable()->getShare()->getRecordLength());
70- internal::my_store_ptr(ref, ref_length, record_id);
71- record_id++;
72+
73+ unsigned int r_pack_length;
74+ r_pack_length= pack_row(record);
75+ internal::my_store_ptr(ref, ref_length, row_cache_position);
76+
77+ memcpy(&row_cache[row_cache_position], &record_buffer[0], r_pack_length);
78+ row_cache_position+= r_pack_length;
79 }
80
81
82@@ -118,7 +159,7 @@
83 estimate_of_rows= rows_returned;
84
85 row_cache.clear();
86- record_id= 0;
87+ row_cache_position= 0;
88 }
89
90 int FunctionCursor::extra(enum ha_extra_function operation)
91@@ -151,8 +192,20 @@
92 ha_statistic_increment(&system_status_var::ha_read_rnd_count);
93 size_t position_id= (size_t)internal::my_get_ptr(pos, ref_length);
94
95- assert(position_id * getTable()->getShare()->getRecordLength() < row_cache.size());
96- memcpy(buf, &row_cache[position_id * getTable()->getShare()->getRecordLength()], getTable()->getShare()->getRecordLength());
97+ const unsigned char *ptr;
98+ ptr= &row_cache[position_id];
99+
100+ /* Copy null bits */
101+ memcpy(buf, ptr, getTable()->getNullBytes());
102+ ptr+= getTable()->getNullBytes();
103+ // and copy fields
104+ for (Field **field= getTable()->getFields() ; *field ; field++)
105+ {
106+ if (!((*field)->is_null()))
107+ {
108+ ptr= (*field)->unpack(buf + (*field)->offset(getTable()->getInsertRecord()), ptr);
109+ }
110+ }
111
112 return 0;
113 }
114
115=== modified file 'plugin/function_engine/cursor.h'
116--- plugin/function_engine/cursor.h 2010-12-18 04:43:40 +0000
117+++ plugin/function_engine/cursor.h 2011-02-23 01:11:55 +0000
118@@ -30,13 +30,17 @@
119 {
120 drizzled::plugin::TableFunction *tool;
121 drizzled::plugin::TableFunction::Generator *generator;
122- size_t record_id;
123- std::vector<unsigned char *> row_cache;
124+ size_t row_cache_position;
125+ std::vector<unsigned char> row_cache;
126 drizzled::ha_rows estimate_of_rows;
127 drizzled::ha_rows rows_returned;
128
129 void wipeCache();
130
131+ std::vector <unsigned char> record_buffer; // for pack_row
132+ uint32_t max_row_length();
133+ unsigned int pack_row(const unsigned char *record);
134+
135 public:
136 FunctionCursor(drizzled::plugin::StorageEngine &engine,
137 drizzled::Table &table_arg);

Subscribers

People subscribed via source and target branches