Merge lp:~drizzle-pbxt/drizzle/drizzle-pbxt-6 into lp:~drizzle-trunk/drizzle/development

Proposed by Paul McCullagh
Status: Merged
Merged at revision: not available
Proposed branch: lp:~drizzle-pbxt/drizzle/drizzle-pbxt-6
Merge into: lp:~drizzle-trunk/drizzle/development
Diff against target: 152031 lines (+151248/-0)
155 files modified
plugin/pbxt/AUTHORS (+4/-0)
plugin/pbxt/COPYING (+340/-0)
plugin/pbxt/ChangeLog (+918/-0)
plugin/pbxt/Makefile.am (+3/-0)
plugin/pbxt/README (+19/-0)
plugin/pbxt/TODO (+195/-0)
plugin/pbxt/bin/xtstat_xt.cc (+825/-0)
plugin/pbxt/plugin.am (+75/-0)
plugin/pbxt/plugin.ini (+25/-0)
plugin/pbxt/src/Makefile.am (+51/-0)
plugin/pbxt/src/backup_xt.cc (+802/-0)
plugin/pbxt/src/backup_xt.h (+34/-0)
plugin/pbxt/src/bsearch_xt.cc (+66/-0)
plugin/pbxt/src/bsearch_xt.h (+32/-0)
plugin/pbxt/src/cache_xt.cc (+1717/-0)
plugin/pbxt/src/cache_xt.h (+188/-0)
plugin/pbxt/src/ccutils_xt.cc (+69/-0)
plugin/pbxt/src/ccutils_xt.h (+220/-0)
plugin/pbxt/src/database_xt.cc (+1975/-0)
plugin/pbxt/src/database_xt.h (+301/-0)
plugin/pbxt/src/datadic_xt.cc (+3021/-0)
plugin/pbxt/src/datadic_xt.h (+301/-0)
plugin/pbxt/src/datalog_xt.cc (+2162/-0)
plugin/pbxt/src/datalog_xt.h (+229/-0)
plugin/pbxt/src/discover_xt.cc (+1694/-0)
plugin/pbxt/src/discover_xt.h (+80/-0)
plugin/pbxt/src/filesys_xt.cc (+2768/-0)
plugin/pbxt/src/filesys_xt.h (+300/-0)
plugin/pbxt/src/ha_pbxt.cc (+6483/-0)
plugin/pbxt/src/ha_pbxt.h (+392/-0)
plugin/pbxt/src/ha_xtsys.cc (+258/-0)
plugin/pbxt/src/ha_xtsys.h (+113/-0)
plugin/pbxt/src/hashtab_xt.cc (+264/-0)
plugin/pbxt/src/hashtab_xt.h (+78/-0)
plugin/pbxt/src/heap_xt.cc (+168/-0)
plugin/pbxt/src/heap_xt.h (+74/-0)
plugin/pbxt/src/index_xt.cc (+6120/-0)
plugin/pbxt/src/index_xt.h (+707/-0)
plugin/pbxt/src/iotest_xt.c (+1417/-0)
plugin/pbxt/src/linklist_xt.cc (+224/-0)
plugin/pbxt/src/linklist_xt.h (+77/-0)
plugin/pbxt/src/lock_xt.cc (+2012/-0)
plugin/pbxt/src/lock_xt.h (+566/-0)
plugin/pbxt/src/locklist_xt.cc (+154/-0)
plugin/pbxt/src/locklist_xt.h (+93/-0)
plugin/pbxt/src/memory_xt.cc (+1164/-0)
plugin/pbxt/src/memory_xt.h (+131/-0)
plugin/pbxt/src/myxt_xt.cc (+3535/-0)
plugin/pbxt/src/myxt_xt.h (+101/-0)
plugin/pbxt/src/pbms.h (+745/-0)
plugin/pbxt/src/pbms_enabled.cc (+244/-0)
plugin/pbxt/src/pbms_enabled.h (+103/-0)
plugin/pbxt/src/pthread_xt.cc (+796/-0)
plugin/pbxt/src/pthread_xt.h (+297/-0)
plugin/pbxt/src/restart_xt.cc (+4049/-0)
plugin/pbxt/src/restart_xt.h (+184/-0)
plugin/pbxt/src/sortedlist_xt.cc (+521/-0)
plugin/pbxt/src/sortedlist_xt.h (+104/-0)
plugin/pbxt/src/strutil_xt.cc (+690/-0)
plugin/pbxt/src/strutil_xt.h (+200/-0)
plugin/pbxt/src/systab_xt.cc (+666/-0)
plugin/pbxt/src/systab_xt.h (+155/-0)
plugin/pbxt/src/tabcache_xt.cc (+1528/-0)
plugin/pbxt/src/tabcache_xt.h (+296/-0)
plugin/pbxt/src/table_xt.cc (+5958/-0)
plugin/pbxt/src/table_xt.h (+737/-0)
plugin/pbxt/src/thread_xt.cc (+2751/-0)
plugin/pbxt/src/thread_xt.h (+833/-0)
plugin/pbxt/src/trace_xt.cc (+404/-0)
plugin/pbxt/src/trace_xt.h (+76/-0)
plugin/pbxt/src/util_xt.cc (+431/-0)
plugin/pbxt/src/util_xt.h (+124/-0)
plugin/pbxt/src/win_inttypes.h (+259/-0)
plugin/pbxt/src/xaction_xt.cc (+3016/-0)
plugin/pbxt/src/xaction_xt.h (+226/-0)
plugin/pbxt/src/xactlog_xt.cc (+3082/-0)
plugin/pbxt/src/xactlog_xt.h (+508/-0)
plugin/pbxt/src/xt_config.h (+132/-0)
plugin/pbxt/src/xt_defs.h (+965/-0)
plugin/pbxt/src/xt_errno.h (+137/-0)
tests/r/pbxt/alter_table.result (+888/-0)
tests/r/pbxt/alter_table_basic.result (+55/-0)
tests/r/pbxt/analyze.result (+48/-0)
tests/r/pbxt/auto_increment.result (+364/-0)
tests/r/pbxt/bench_count_distinct.result (+11/-0)
tests/r/pbxt/binary.result (+155/-0)
tests/r/pbxt/compare.result (+88/-0)
tests/r/pbxt/create.result (+1555/-0)
tests/r/pbxt/csv.result (+5112/-0)
tests/r/pbxt/ctype_utf8.result (+1376/-0)
tests/r/pbxt/data_dictionary_like_info.result (+408/-0)
tests/r/pbxt/delete.result (+221/-0)
tests/r/pbxt/derived.result (+345/-0)
tests/r/pbxt/distinct.result (+680/-0)
tests/r/pbxt/drizzlecheck.result (+38/-0)
tests/r/pbxt/drizzledump.result (+1218/-0)
tests/r/pbxt/drop.result (+96/-0)
tests/r/pbxt/endspace.result (+220/-0)
tests/r/pbxt/func_gconcat.result (+869/-0)
tests/r/pbxt/func_group.result (+1281/-0)
tests/r/pbxt/func_in.result (+538/-0)
tests/r/pbxt/func_like.result (+58/-0)
tests/r/pbxt/func_str.result (+1790/-0)
tests/r/pbxt/greedy_optimizer.result (+657/-0)
tests/r/pbxt/group_by.result (+1518/-0)
tests/r/pbxt/group_min_max.result (+2279/-0)
tests/r/pbxt/heap.result (+725/-0)
tests/r/pbxt/innodb.result (+2739/-0)
tests/r/pbxt/insert.result (+264/-0)
tests/r/pbxt/insert_select.result (+795/-0)
tests/r/pbxt/insert_update.result (+407/-0)
tests/r/pbxt/join.result (+979/-0)
tests/r/pbxt/join_nested.result (+1518/-0)
tests/r/pbxt/join_outer.result (+1213/-0)
tests/r/pbxt/key.result (+565/-0)
tests/r/pbxt/key_diff.result (+52/-0)
tests/r/pbxt/key_primary.result (+20/-0)
tests/r/pbxt/myisam.result (+1790/-0)
tests/r/pbxt/mysql_protocol.result (+18/-0)
tests/r/pbxt/mysqldump-max.result (+141/-0)
tests/r/pbxt/negation_elimination.result (+391/-0)
tests/r/pbxt/null.result (+256/-0)
tests/r/pbxt/null_key.result (+427/-0)
tests/r/pbxt/order_by.result (+2325/-0)
tests/r/pbxt/pool_of_threads.result (+2136/-0)
tests/r/pbxt/range.result (+1153/-0)
tests/r/pbxt/row.result (+445/-0)
tests/r/pbxt/schema.result (+13/-0)
tests/r/pbxt/select.result (+3716/-0)
tests/r/pbxt/select_found.result (+282/-0)
tests/r/pbxt/select_safe.result (+93/-0)
tests/r/pbxt/show_check.result (+606/-0)
tests/r/pbxt/statement_boundaries.result (+85/-0)
tests/r/pbxt/status.result (+95/-0)
tests/r/pbxt/subselect.result (+4686/-0)
tests/r/pbxt/subselect3.result (+782/-0)
tests/r/pbxt/subselect_mat.result (+970/-0)
tests/r/pbxt/subselect_no_mat.result (+4696/-0)
tests/r/pbxt/subselect_no_mat_and_semi_join.result (+4696/-0)
tests/r/pbxt/subselect_no_opts.result (+4694/-0)
tests/r/pbxt/subselect_no_semijoin.result (+4694/-0)
tests/r/pbxt/subselect_sj.result (+82/-0)
tests/r/pbxt/subselect_sj2.result (+362/-0)
tests/r/pbxt/type_blob.result (+673/-0)
tests/r/pbxt/type_enum.result (+1734/-0)
tests/r/pbxt/type_float.result (+359/-0)
tests/r/pbxt/type_newdecimal.result (+1214/-0)
tests/r/pbxt/type_ranges.result (+254/-0)
tests/r/pbxt/type_varchar.result (+445/-0)
tests/r/pbxt/union.result (+1456/-0)
tests/r/pbxt/update.result (+404/-0)
tests/r/pbxt/user_var.result (+357/-0)
tests/r/pbxt/varbinary.result (+51/-0)
tests/r/pbxt/variables.result (+591/-0)
tests/r/pbxt/warnings.result (+169/-0)
To merge this branch: bzr merge lp:~drizzle-pbxt/drizzle/drizzle-pbxt-6
Reviewer Review Type Date Requested Status
Jay Pipes (community) Approve
Review via email: mp+23178@code.launchpad.net

Description of the change

This patch includes the PBXT engine, and changes to the test/r/pbxt/*.result files.

The only change to Drizzle code is in the lp:~drizzle-pbxt/drizzle/drizzle-mod-for-pbxt patch.

The drizzle-mod-for-pbxt patch should be merged before this patch.

No changes were made to Drizzle test scripts or Drizzle test results.

./test-run --engine=pbxt now completes with 93% of the tests successful.

To post a comment you must log in.
Revision history for this message
Jay Pipes (jaypipes) wrote :

Cool with me only if (besides the current removal of the checksum table statement code) you can verify that this patch ONLY imports the PBXT plugin and no other changes to the kernel have been made?

review: Needs Information
1464. By Paul McCullagh

Removed the PBXT system/virtual tables, this meant that the call to mysql_prepare_create_table() is no longer required

1465. By Paul McCullagh

Merged with trunk

Revision history for this message
Paul McCullagh (paul-mccullagh) wrote :

As my comment to lp:~drizzle-pbxt/drizzle/drizzle-mod-for-pbxt indicated.

I have removed the PBXT system/virtual tables, and therefore also the dependency on the change in the drizzle-mod-for-pbxt patch, in this branch.

I have also merged with the trunk.

So, this patch now makes _no_ changes to and Drizzle code or test scripts.

As before 93% of the tests pass.

Revision history for this message
Jay Pipes (jaypipes) wrote :

Awesomeness :)

review: Approve
Revision history for this message
Paul McCullagh (paul-mccullagh) wrote :

Just fixed 2 compile errors I found on param build:

plugin/pbxt/src/index_xt.cc:37:19: error: bzlib.h: No such file or directory

and

plugin/pbxt/src/ha_pbxt.cc:4484: warning: variable 'err' might be clobbered by 'longjmp' or 'vfork'

Cause problems on some systems.

1466. By Paul McCullagh

bzlib.h header not available on suse-11-amd64 or ubuntu-9.10-amd64

1467. By Paul McCullagh

Some compilers complained that: variable 'err' might be clobbered by 'longjmp' or 'vfork'

1468. By Paul McCullagh

Merged with trunk

Revision history for this message
Stewart Smith (stewart) wrote :

On Sun, 11 Apr 2010 17:22:54 -0000, Paul McCullagh <email address hidden> wrote:
> So, this patch now makes _no_ changes to and Drizzle code or test
> scripts.

Awesome.

> As before 93% of the tests pass.

so I guess we have to work out what we want to do about this... maybe
merge it and have 1 builder run it and then get all the tests going.

--
Stewart Smith

Revision history for this message
Paul McCullagh (paul-mccullagh) wrote :

On Apr 12, 2010, at 1:19 AM, Stewart Smith wrote:

> On Sun, 11 Apr 2010 17:22:54 -0000, Paul McCullagh <<email address hidden>
> > wrote:
>> So, this patch now makes _no_ changes to and Drizzle code or test
>> scripts.
>
> Awesome.
>
>> As before 93% of the tests pass.
>
> so I guess we have to work out what we want to do about this... maybe
> merge it and have 1 builder run it and then get all the tests going.

It would be great if we can proceed that way. Note that the tests I am
talking about are when run with the --engine=pbxt option.

So in the current default build, PBXT is not tested as all, so you
should see no failures (actually my last test on param build showed
one test failure, which just looked like the result file needs to be
adjusted).

Anyway, once we have 100% PBXT tests running, we can added maybe add a
test run with PBXT to the default.

>
> --
> Stewart Smith

--
Paul McCullagh
PrimeBase Technologies
www.primebase.org
www.blobstreaming.org
pbxt.blogspot.com

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added directory 'plugin/pbxt'
2=== added file 'plugin/pbxt/AUTHORS'
3--- plugin/pbxt/AUTHORS 1970-01-01 00:00:00 +0000
4+++ plugin/pbxt/AUTHORS 2010-04-11 18:56:24 +0000
5@@ -0,0 +1,4 @@
6+Paul McCullagh
7+paul.mccullagh@primebase.org
8+http://www.primebase.org
9+http://pbxt.blogspot.com
10
11=== added file 'plugin/pbxt/COPYING'
12--- plugin/pbxt/COPYING 1970-01-01 00:00:00 +0000
13+++ plugin/pbxt/COPYING 2010-04-11 18:56:24 +0000
14@@ -0,0 +1,340 @@
15+ GNU GENERAL PUBLIC LICENSE
16+ Version 2, June 1991
17+
18+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
19+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20+ Everyone is permitted to copy and distribute verbatim copies
21+ of this license document, but changing it is not allowed.
22+
23+ Preamble
24+
25+ The licenses for most software are designed to take away your
26+freedom to share and change it. By contrast, the GNU General Public
27+License is intended to guarantee your freedom to share and change free
28+software--to make sure the software is free for all its users. This
29+General Public License applies to most of the Free Software
30+Foundation's software and to any other program whose authors commit to
31+using it. (Some other Free Software Foundation software is covered by
32+the GNU Library General Public License instead.) You can apply it to
33+your programs, too.
34+
35+ When we speak of free software, we are referring to freedom, not
36+price. Our General Public Licenses are designed to make sure that you
37+have the freedom to distribute copies of free software (and charge for
38+this service if you wish), that you receive source code or can get it
39+if you want it, that you can change the software or use pieces of it
40+in new free programs; and that you know you can do these things.
41+
42+ To protect your rights, we need to make restrictions that forbid
43+anyone to deny you these rights or to ask you to surrender the rights.
44+These restrictions translate to certain responsibilities for you if you
45+distribute copies of the software, or if you modify it.
46+
47+ For example, if you distribute copies of such a program, whether
48+gratis or for a fee, you must give the recipients all the rights that
49+you have. You must make sure that they, too, receive or can get the
50+source code. And you must show them these terms so they know their
51+rights.
52+
53+ We protect your rights with two steps: (1) copyright the software, and
54+(2) offer you this license which gives you legal permission to copy,
55+distribute and/or modify the software.
56+
57+ Also, for each author's protection and ours, we want to make certain
58+that everyone understands that there is no warranty for this free
59+software. If the software is modified by someone else and passed on, we
60+want its recipients to know that what they have is not the original, so
61+that any problems introduced by others will not reflect on the original
62+authors' reputations.
63+
64+ Finally, any free program is threatened constantly by software
65+patents. We wish to avoid the danger that redistributors of a free
66+program will individually obtain patent licenses, in effect making the
67+program proprietary. To prevent this, we have made it clear that any
68+patent must be licensed for everyone's free use or not licensed at all.
69+
70+ The precise terms and conditions for copying, distribution and
71+modification follow.
72+
73
74+ GNU GENERAL PUBLIC LICENSE
75+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
76+
77+ 0. This License applies to any program or other work which contains
78+a notice placed by the copyright holder saying it may be distributed
79+under the terms of this General Public License. The "Program", below,
80+refers to any such program or work, and a "work based on the Program"
81+means either the Program or any derivative work under copyright law:
82+that is to say, a work containing the Program or a portion of it,
83+either verbatim or with modifications and/or translated into another
84+language. (Hereinafter, translation is included without limitation in
85+the term "modification".) Each licensee is addressed as "you".
86+
87+Activities other than copying, distribution and modification are not
88+covered by this License; they are outside its scope. The act of
89+running the Program is not restricted, and the output from the Program
90+is covered only if its contents constitute a work based on the
91+Program (independent of having been made by running the Program).
92+Whether that is true depends on what the Program does.
93+
94+ 1. You may copy and distribute verbatim copies of the Program's
95+source code as you receive it, in any medium, provided that you
96+conspicuously and appropriately publish on each copy an appropriate
97+copyright notice and disclaimer of warranty; keep intact all the
98+notices that refer to this License and to the absence of any warranty;
99+and give any other recipients of the Program a copy of this License
100+along with the Program.
101+
102+You may charge a fee for the physical act of transferring a copy, and
103+you may at your option offer warranty protection in exchange for a fee.
104+
105+ 2. You may modify your copy or copies of the Program or any portion
106+of it, thus forming a work based on the Program, and copy and
107+distribute such modifications or work under the terms of Section 1
108+above, provided that you also meet all of these conditions:
109+
110+ a) You must cause the modified files to carry prominent notices
111+ stating that you changed the files and the date of any change.
112+
113+ b) You must cause any work that you distribute or publish, that in
114+ whole or in part contains or is derived from the Program or any
115+ part thereof, to be licensed as a whole at no charge to all third
116+ parties under the terms of this License.
117+
118+ c) If the modified program normally reads commands interactively
119+ when run, you must cause it, when started running for such
120+ interactive use in the most ordinary way, to print or display an
121+ announcement including an appropriate copyright notice and a
122+ notice that there is no warranty (or else, saying that you provide
123+ a warranty) and that users may redistribute the program under
124+ these conditions, and telling the user how to view a copy of this
125+ License. (Exception: if the Program itself is interactive but
126+ does not normally print such an announcement, your work based on
127+ the Program is not required to print an announcement.)
128+
129
130+These requirements apply to the modified work as a whole. If
131+identifiable sections of that work are not derived from the Program,
132+and can be reasonably considered independent and separate works in
133+themselves, then this License, and its terms, do not apply to those
134+sections when you distribute them as separate works. But when you
135+distribute the same sections as part of a whole which is a work based
136+on the Program, the distribution of the whole must be on the terms of
137+this License, whose permissions for other licensees extend to the
138+entire whole, and thus to each and every part regardless of who wrote it.
139+
140+Thus, it is not the intent of this section to claim rights or contest
141+your rights to work written entirely by you; rather, the intent is to
142+exercise the right to control the distribution of derivative or
143+collective works based on the Program.
144+
145+In addition, mere aggregation of another work not based on the Program
146+with the Program (or with a work based on the Program) on a volume of
147+a storage or distribution medium does not bring the other work under
148+the scope of this License.
149+
150+ 3. You may copy and distribute the Program (or a work based on it,
151+under Section 2) in object code or executable form under the terms of
152+Sections 1 and 2 above provided that you also do one of the following:
153+
154+ a) Accompany it with the complete corresponding machine-readable
155+ source code, which must be distributed under the terms of Sections
156+ 1 and 2 above on a medium customarily used for software interchange; or,
157+
158+ b) Accompany it with a written offer, valid for at least three
159+ years, to give any third party, for a charge no more than your
160+ cost of physically performing source distribution, a complete
161+ machine-readable copy of the corresponding source code, to be
162+ distributed under the terms of Sections 1 and 2 above on a medium
163+ customarily used for software interchange; or,
164+
165+ c) Accompany it with the information you received as to the offer
166+ to distribute corresponding source code. (This alternative is
167+ allowed only for noncommercial distribution and only if you
168+ received the program in object code or executable form with such
169+ an offer, in accord with Subsection b above.)
170+
171+The source code for a work means the preferred form of the work for
172+making modifications to it. For an executable work, complete source
173+code means all the source code for all modules it contains, plus any
174+associated interface definition files, plus the scripts used to
175+control compilation and installation of the executable. However, as a
176+special exception, the source code distributed need not include
177+anything that is normally distributed (in either source or binary
178+form) with the major components (compiler, kernel, and so on) of the
179+operating system on which the executable runs, unless that component
180+itself accompanies the executable.
181+
182+If distribution of executable or object code is made by offering
183+access to copy from a designated place, then offering equivalent
184+access to copy the source code from the same place counts as
185+distribution of the source code, even though third parties are not
186+compelled to copy the source along with the object code.
187+
188
189+ 4. You may not copy, modify, sublicense, or distribute the Program
190+except as expressly provided under this License. Any attempt
191+otherwise to copy, modify, sublicense or distribute the Program is
192+void, and will automatically terminate your rights under this License.
193+However, parties who have received copies, or rights, from you under
194+this License will not have their licenses terminated so long as such
195+parties remain in full compliance.
196+
197+ 5. You are not required to accept this License, since you have not
198+signed it. However, nothing else grants you permission to modify or
199+distribute the Program or its derivative works. These actions are
200+prohibited by law if you do not accept this License. Therefore, by
201+modifying or distributing the Program (or any work based on the
202+Program), you indicate your acceptance of this License to do so, and
203+all its terms and conditions for copying, distributing or modifying
204+the Program or works based on it.
205+
206+ 6. Each time you redistribute the Program (or any work based on the
207+Program), the recipient automatically receives a license from the
208+original licensor to copy, distribute or modify the Program subject to
209+these terms and conditions. You may not impose any further
210+restrictions on the recipients' exercise of the rights granted herein.
211+You are not responsible for enforcing compliance by third parties to
212+this License.
213+
214+ 7. If, as a consequence of a court judgment or allegation of patent
215+infringement or for any other reason (not limited to patent issues),
216+conditions are imposed on you (whether by court order, agreement or
217+otherwise) that contradict the conditions of this License, they do not
218+excuse you from the conditions of this License. If you cannot
219+distribute so as to satisfy simultaneously your obligations under this
220+License and any other pertinent obligations, then as a consequence you
221+may not distribute the Program at all. For example, if a patent
222+license would not permit royalty-free redistribution of the Program by
223+all those who receive copies directly or indirectly through you, then
224+the only way you could satisfy both it and this License would be to
225+refrain entirely from distribution of the Program.
226+
227+If any portion of this section is held invalid or unenforceable under
228+any particular circumstance, the balance of the section is intended to
229+apply and the section as a whole is intended to apply in other
230+circumstances.
231+
232+It is not the purpose of this section to induce you to infringe any
233+patents or other property right claims or to contest validity of any
234+such claims; this section has the sole purpose of protecting the
235+integrity of the free software distribution system, which is
236+implemented by public license practices. Many people have made
237+generous contributions to the wide range of software distributed
238+through that system in reliance on consistent application of that
239+system; it is up to the author/donor to decide if he or she is willing
240+to distribute software through any other system and a licensee cannot
241+impose that choice.
242+
243+This section is intended to make thoroughly clear what is believed to
244+be a consequence of the rest of this License.
245+
246
247+ 8. If the distribution and/or use of the Program is restricted in
248+certain countries either by patents or by copyrighted interfaces, the
249+original copyright holder who places the Program under this License
250+may add an explicit geographical distribution limitation excluding
251+those countries, so that distribution is permitted only in or among
252+countries not thus excluded. In such case, this License incorporates
253+the limitation as if written in the body of this License.
254+
255+ 9. The Free Software Foundation may publish revised and/or new versions
256+of the General Public License from time to time. Such new versions will
257+be similar in spirit to the present version, but may differ in detail to
258+address new problems or concerns.
259+
260+Each version is given a distinguishing version number. If the Program
261+specifies a version number of this License which applies to it and "any
262+later version", you have the option of following the terms and conditions
263+either of that version or of any later version published by the Free
264+Software Foundation. If the Program does not specify a version number of
265+this License, you may choose any version ever published by the Free Software
266+Foundation.
267+
268+ 10. If you wish to incorporate parts of the Program into other free
269+programs whose distribution conditions are different, write to the author
270+to ask for permission. For software which is copyrighted by the Free
271+Software Foundation, write to the Free Software Foundation; we sometimes
272+make exceptions for this. Our decision will be guided by the two goals
273+of preserving the free status of all derivatives of our free software and
274+of promoting the sharing and reuse of software generally.
275+
276+ NO WARRANTY
277+
278+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
279+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
280+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
281+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
282+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
283+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
284+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
285+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
286+REPAIR OR CORRECTION.
287+
288+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
289+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
290+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
291+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
292+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
293+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
294+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
295+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
296+POSSIBILITY OF SUCH DAMAGES.
297+
298+ END OF TERMS AND CONDITIONS
299+
300
301+ How to Apply These Terms to Your New Programs
302+
303+ If you develop a new program, and you want it to be of the greatest
304+possible use to the public, the best way to achieve this is to make it
305+free software which everyone can redistribute and change under these terms.
306+
307+ To do so, attach the following notices to the program. It is safest
308+to attach them to the start of each source file to most effectively
309+convey the exclusion of warranty; and each file should have at least
310+the "copyright" line and a pointer to where the full notice is found.
311+
312+ <one line to give the program's name and a brief idea of what it does.>
313+ Copyright (C) <year> <name of author>
314+
315+ This program is free software; you can redistribute it and/or modify
316+ it under the terms of the GNU General Public License as published by
317+ the Free Software Foundation; either version 2 of the License, or
318+ (at your option) any later version.
319+
320+ This program is distributed in the hope that it will be useful,
321+ but WITHOUT ANY WARRANTY; without even the implied warranty of
322+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
323+ GNU General Public License for more details.
324+
325+ You should have received a copy of the GNU General Public License
326+ along with this program; if not, write to the Free Software
327+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
328+
329+
330+Also add information on how to contact you by electronic and paper mail.
331+
332+If the program is interactive, make it output a short notice like this
333+when it starts in an interactive mode:
334+
335+ Gnomovision version 69, Copyright (C) year name of author
336+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
337+ This is free software, and you are welcome to redistribute it
338+ under certain conditions; type `show c' for details.
339+
340+The hypothetical commands `show w' and `show c' should show the appropriate
341+parts of the General Public License. Of course, the commands you use may
342+be called something other than `show w' and `show c'; they could even be
343+mouse-clicks or menu items--whatever suits your program.
344+
345+You should also get your employer (if you work as a programmer) or your
346+school, if any, to sign a "copyright disclaimer" for the program, if
347+necessary. Here is a sample; alter the names:
348+
349+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
350+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
351+
352+ <signature of Ty Coon>, 1 April 1989
353+ Ty Coon, President of Vice
354+
355+This General Public License does not permit incorporating your program into
356+proprietary programs. If your program is a subroutine library, you may
357+consider it more useful to permit linking proprietary applications with the
358+library. If this is what you want to do, use the GNU Library General
359+Public License instead of this License.
360
361=== added file 'plugin/pbxt/ChangeLog'
362--- plugin/pbxt/ChangeLog 1970-01-01 00:00:00 +0000
363+++ plugin/pbxt/ChangeLog 2010-04-11 18:56:24 +0000
364@@ -0,0 +1,918 @@
365+PBXT Release Notes
366+==================
367+
368+------- 1.1.01 Beta - Not yet released
369+
370+RN1/17: Fixed bug #430637: 'ERROR -86: Too many threads' reported during concurrecy test. I have removed the maximum thread limit. The system variable pbxt_max_threads has been depricated, and will no longer be supported in 1.1.
371+
372+RN1/16: Added system variable pbxt_index_dirty_threshold. This is a percentage value. When the number of dirty pages in the index cache reaches this level, all indices are flushed. The default value is 80. The value 0 indicates that the indices should only be flushed when no cache pages can be freed.
373+
374+RN1/15: Implemented re-write flushing option. Re-write flushing can be more efficient because it causes more sequential writing and less random seek operations on file flush.
375+
376+RN1/14: Implemented asynchronous flushing of files during checkpoint. This allows multiple files (current limit 10) to be flushed at the same time, which increases the total throughput on the disk.
377+
378+RN1/13: Added the pbxt_record_write_threshold system parameter. This value determines when the writer will start to transfer data from the transaction log to the record pointer (.xtr) and handle data (.xtd) files. The default is 4MB. If the value is set to zero then the parameter is ignored. The writer will also begin to transfer data when the transaction log cache is exhausted, or the record cache is full.
379+
380+RN1/12: Added option to buffer and sort data written by the writer thread. This allows writes to the handle data file (.xtd) to be sorted and consolidated.
381+
382+RN1/11: Corrected a problem that sometimes caused a pause in activity when the record cache was full.
383+
384+RN1/10: Fixed a bug in the record cache that caused PBXT to think it had run out of cache memory. The effect was that PBXT used less and less cache over time. The bug occurs during heavy concurrent access on the record cache. The affect is the PBXT gets slower and slower.
385+
386+RN1/9: PBXT now commits every 10000 rows during an import. Import is done during ALTER TABLE, CREATE INDEX, LOAD DATA INFILE, REPAIR TABLE, OPTIMIZE TABLE and DROP INDEX statements.
387+
388+RN1/8: Temporary tables changes: Index files of temporary tables are no longer flushed, on recovery temporary tables are ignored.
389+
390+RN1/7: Indexes are no longer locked during index flush. This is done by copying index pages before modification, if a flush is in progress.
391+
392+RN1/6: Added a compile option to compact indexes where data is mainly appended to the end of the index.
393+
394+RN1/5: Changes to index flushing:
395+- Implemented flushing of index as a background task. Several threads can wait for the index flush to complete. Threads can be notified when it is safe to continue (i.e. when all pages have been marked clean).
396+- Increased the size of the page sort buffer used to sort pages to be written to the index file.
397+- Added an option to fill pages (in the case of consecutive pages) written to the index. An option also allows the block size written to the index cache to be changed. Note that performance tests (on Mac) show no overall improvement here, flush is faster, but more data is written.
398+- Flush the index file after dirty index pages have been marked clean, this allows work to continue during flushing, in low-memory situations.
399+- Added an option to write only the area of an index page that has changed.
400+- When out of index cache, we now check other tables to see if the index cache needs to be flushed. This is not done if more than 1/4 of the entire index cache is dirty due to the current index. Tables are sorted, the one with the most dirty index pages is flushed first.
401+- When out of index cache, if less than 1/4 of the entire index cache is dirty then no flushing is done. This can happen if another thread has cleaned the cache in the meantime.
402+
403+RN1/4: Added try lock variation for R/W locks (required to fix deadlock RN246).
404+
405+RN1/3: Fixed a deadlock that could occur during low index cache situations and added some checks for index corruption.
406+
407+RN1/2: Added thread pool for executing asynchronous tasks.
408+
409+RN1/1: Added memory resident tables. You can create a memory resident table using "STORAGE MEMORY", for example:
410+
411+CREATE TABLE t1 (É) ENGINE=PBXT STORAGE MEMORY
412+
413+------- 1.0.10i RC4 - 2010-03-17
414+
415+RN312: Fixed bug #534361: Valgrind error: write of uninitialised bytes in xt_flush_indices()
416+
417+RN311: Fixed ilog corruption when running out of disk space during an index flush operation, which lead to corruption of the index.
418+
419+------- 1.0.10h RC4 - 2010-02-25
420+
421+RN310: Fixed Windows atomic INC/DEC operations, which lead to atomic R/W lock not working correctly. The result was that some index entries were not foound.
422+
423+RN309: Fixed a bug that caused a crash when the index was corrupted. The crash occurs if the index page in not completely written, and an item in the index has a bad length.
424+
425+RN308: Fixed bug #509803: can't run tpcc (cannot compare FKs that rely on indexes of different length).
426+
427+------- 1.0.10g RC4 - 2010-02-11
428+
429+RN307: 2010-02-15: Set the internal version number 1.0.10g.
430+
431+RN306: All tests now run with MySQL 5.1.42.
432+
433+RN305: Fixed a bug that could cause a crash in filesort. The problem was that the return row estimate was incorrect, which caused the result of estimate_rows_upper_bound() to overflow to zero. Row estimate has been changed, and no longer takes into account deleted rows (so the row estimate is now a maximum).
434+
435+RN304: Fixed bug #513012: On a table with a trigger the same record is updated more than once in one statement
436+
437+------- 1.0.10f RC4 - 2010-01-29
438+
439+RN303: Fix back-ported from 1.1: Fixed a bug in the record cache that caused PBXT to think it had run out of cache memory. The effect was that PBXT used less and less cache over time. The bug occurs during heavy concurrent access on the record cache. The affect is the PBXT gets slower and slower.
440+
441+RN302: Fix back-ported from 1.1: Corrected a problem that sometimes caused a pause in activity when the record cache was full.
442+
443+------- 1.0.10e RC4 - 2010-01-25
444+
445+RN301: Fixed index statistics calculation. This bug lead to the wrong indices being selected by the optimizer because all indices returned the same cost.
446+
447+RN300: Fixed bug #509968: START TRANSACTION WITH CONSISTENT SNAPSHOT breaks transactional flow.
448+
449+RN299: Fixed bug #509218: Server asserts with Assertion `mutex->__data.__owner == 0' failed on high concurrency OLTP test.
450+
451+------- 1.0.10d RC4 - 2010-01-11
452+
453+RN298: Fixed a bug that caused huge amounts of transaction log to be written when pbxt_flush_log_at_trx_commit = 2.
454+
455+------- 1.0.10c RC4 - 2009-12-29
456+
457+RN297: Updated "LOCK TABLES ... READ LOCAL" behavior to be more restrictive and compatible with InnoDB
458+
459+RN296: Fixed bug #499026: START TRANSACTION WITH CONSISTENT SNAPSHOT does not work for PBXT
460+
461+------- 1.0.10 RC4 - 2009-12-18
462+
463+RN295: PBXT tests now all run with MySQL 5.1.41.
464+
465+RN294: Fixed bug #483714: a broken table can prevent other tables from opening
466+
467+RN293: Added system variable pbxt_flush_log_at_trx_commit. The value of this variable determines whether the transaction log is written and/or flushed when a transaction is ended. A value of 0 means don't write or flush the transaction log, 1 means write and flush and 2 means write, but do not flush. No matter what the setting is choosen, the transaction log is written and flushed at least once per second.
468+
469+------- 1.0.09g RC3 - 2009-12-16
470+
471+RN292: Fixed a bug that resulted in 2-phase commit not being used between PBXT and the binlog. This bug was a result of a hack which as added to solve a problem in an pre-release version of MySQL 5.1. The hack was removed.
472+
473+------- 1.0.09f RC3 - 2009-11-30
474+
475+RN291: Fixed bug #489088: On shutdown MySQL reports: [Warning] Plugin 'PBXT' will be forced to shutdown.
476+
477+RN290: Fixed bug #345524: pbxt does not compile on 64 bit windows. Currently atomic operations are not supported on this platform.
478+
479+RN286: Fixed a bug introduced in RN281, which could cause an index scan to hang. The original change was to prevent a warning in Valgrind.
480+
481+RN285: Merged changes required to compile with Drizzle.
482+
483+RN284: Fixed bug that cause the error "[ERROR] Invalid (old?) table or database name 'mysqld.1'", when running temp_table.test under MariaDB (thanks to Monty for his initial bug fix). Added a fix for partition table names as well.
484+
485+RN283: Added win_inttypes.h to the distribution. This file is only required for the Windows build.
486+
487+RN282: Fixed bug #451101: jump or move depends on uninitialised value in myxt_get_key_length
488+
489+RN281: Fixed bug #451080: Uninitialised memory write in XTDatabaseLog::xlog_append
490+
491+RN280: Fixed bug #451085: jump or move depends on uninitialised value in my_type_to_string
492+
493+RN279: Fixed bug #441000: xtstat crashes with segmentation fault on startup if max_pbxt_threads exceeded.
494+
495+------- 1.0.09e RC3 - 2009-11-20
496+
497+RN278: Fixed compile error with MySQL 5.1.41.
498+
499+------- 1.0.09d RC3 - 2009-09-30
500+
501+RN277: Added r/o flag to pbxt_max_threads server variable (this fix is related to bug #430637)
502+
503+RN276: Added test case for replication on tables w/o PKs (see bug #430716)
504+
505+RN275: Fixed bug #430600: 'Failed to read auto-increment value from storage engine' error.
506+
507+RN274: Fixed bug #431240: This report is public edit xtstat fails if no PBXT table has been created. xtstat now accepts --database=information_schema or --database=pbxt. Depending on this setting PBXT will either use the information_schema.pbxt_statistics or the pbxt.statistics table. If information_schema is used, then the statistics are displayed even when no PBXT table exists. Recovery activity is also displayed, unless pbxt_support_xa=1, in which case MySQL will wait for PBXT recovery to complete before allowing connections.
508+
509+RN273: Fixed bug #430633: XA_RBDEADLOCK is not returned on XA END after the transacting ended with a deadlock.
510+
511+RN272: Fixed bug #430596: Backup/restore does not work well even on a basic PBXT table with auto-increment.
512+
513+------- 1.0.09c RC3 - 2009-09-16
514+
515+RN271: Windows build update: now you can simply put the pbxt directory under <mysql-root>/storage and build the PBXT engine as a part of the source tree. The engine will be linked statically. Be sure to specify the WITH_PBXT_STORAGE_ENGINE option when running win\configure.js
516+
517+RN270: Correctly disabled PBMS so that this version now compiles under Windows. If PBMS_ENABLED is defined, PBXT will not compile under Windows becaause of a getpid() call in pbms.h.
518+
519+------- 1.0.09 RC3 - 2009-09-09
520+
521+RN269: Implemented online backup. A native online backup driver now performs BACKUP and RESTORE DATABASE operations for PBXT. NOTE: This feature is only supported by MySQL 6.0.9 or later.
522+
523+RN268: Implemented XA support. PBXT now supports all XA related MySQL statements. The variable pbxt_support_xa determines if XA support is enabled. Note: due to MySQL bug #47134, enabling XA support could lead to a crash.
524+
525+------- 1.0.08d RC2 - 2009-09-02
526+
527+RN267: Fixed a bug that caused MySQL to crash on shutdown, after an incorrect command line parameter was given. The crash occurred because the background recovery task was not cleaned up before the PBXT engine was de-initialized.
528+
529+------- 1.0.08c RC2 - 2009-08-18
530+
531+RN266: Updated BLOB streaming glue, used with the PBMS engine. The glue code is now identical to the version of "1.0.08-rc-pbms" version of PBXT available from http://blobstreaming.org/download.
532+
533+RN265: Changes the sequential reading of data log files to skip gaps, instead of returning EOF. This ensures that extended data records are preserved even when something goes wrong with the way the file is written.
534+
535+RN264: Fixed a bug that cased an "Data log not found" error after an out of disk space error on a log file. This bug is similar to RN262 in that it allows "gaps" to appear in the data logs.
536+
537+RN263: Updated xtstat to compile on Windows/MS Visual C++.
538+
539+RN262: Merged changes for PBMS version 0.5.09.
540+
541+RN261: Concerning bug #377788: Cannot find index for FK. Fixed buffer overflow which occurred when the error was reported.
542+
543+RN260: Fixed bug #377788: Cannot find index for FK. PBXT now correctly uses prefix of an index to support FK references (e.g. if key = (c1, c2) then an index on (c1, c2, c3) will work). Also fixed buffer overflow, which occurred when reporting the error.
544+
545+RN259: Fixed bug #309424: xtstat doesn't use my.cnf. You can now add an [xtstat] section to my.cnf, for use with xtstat.
546+
547+RN258: updated xt_p_join implementation for Windows to check if a thread has already exited or has not yet started
548+
549+RN257: Removed false assertion that could fail during restore if a transaction log page was zero-filled
550+
551+RN256: Update datalog eof pointer only if write opearions were sucessful
552+
553+RN255: Added re-allocation of of filemap if allocating the of the new map failed. This often happens if there's not enough space on disk.
554+
555+RN254: When a table with a corrupted index is detected, PBXT creates a file called 'repair-pending' in the pbxt directory, with the name of the table in it. Each table in the file is listed on a line by itself (the last line has no trailing \n). When the table is repaired (using the REPAIR TABLE command), this entry is removed from the file.
556+
557+RN253: Use fcntl(F_FULLFSYNC) instead of fsync on platforms that support it. Improper fsync operation was presumably the reason of index corruption on Mac OS X.
558+
559+RN252: Fixed bug #368692: PBXT not reporting data size correctly in information_schema.
560+
561+------- 1.0.08 RC2 - 2009-06-30
562+
563+RN251: A Windows-specific test update, also removed false assertion that failed on Windows.
564+
565+RN250: Fixed a bug that caused recovery to fail when the transaction log ID exceeded 255. The problem was a checksum failed in the log record.
566+
567+RN249: Fixed bug #313176: Test case timeout. This happened because record cache pages where not properly freed and as soon as cache filled up the performacne degraded.
568+
569+RN248: PBXT now compiles and runs with MySQL 5.1.35. All tests pass.
570+
571+RN247: Fixed bug #369086: Incosistent/Incorrect Truncate behavior
572+
573+RN246: Fixed bug #378222: Drop sakila causes error: Cannot delete or update a parent row: a foreign key constraint fails
574+
575+RN245: Fixed bug #379315: Inconsistent behavior of DELETE IGNORE and FK constraint.
576+
577+RN244: Fixed a recovery problem: during the recovery of "record modified" action the table was updated before the old index entries were removed; then the xres_remove_index_entries was supplied the new record which lead to incorrect index update.
578+
579+RN243: Fixed a bug that caused a recovery failure if partitioned pbxt tables where present. This happended because the recovery used a MySQL function to open tables and the PBXT handler was not yet registered
580+
581+RN242: Fixed a bug that caused a deadlock if pbxt initialization failed. This happened because pbxt ceanup was done from pbxt_init() with PLUGIN_lock being held by MySQL which lead to a deadlock in the freeer thread
582+
583+RN241: Fixed a heap corruption bug (writing to a freed memory location). It happened only when memory mapped files were used leading to heap inconsistency and program crash or termination by heap checker. Likely to happen right after or during DROP TABLE but possible in other cases too.
584+
585+RN240: Load the record cache on read when no using memory mapped files.
586+
587+RN239: Added PBXT variable pbxt_max_threads. This is the maximum number of threads that can be created PBXT. By default this value is set to 0 which means the number of threads is derived from the MySQL variable max_connections. The value used is max_connections+7. Under Drizzle the default value is 500.
588+
589+RN238: Added an option to wait for the sweeper to clean up old transactions on a particular connection. This prevents the sweeper from getting too far behind.
590+
591+RN237: Added an option to lazy delete fixed length index entries. This means the index entries are just marked for deletion, instead of removing the items from the index page. This has the advantage that an exclusive lock is not always required for deletion.
592+
593+RN236: Fixed bug #349177: a bug in configure.in script.
594+
595+RN235: Fixed bug 349176: a compiler warning.
596+
597+RN234: Completed Drizzle integration. All Drizzle tests now run with PBXT.
598+
599+RN233: Fixed bugs which occur when PBXT is used together with PBMS (BLOB Streaming engine).
600+
601+RN232: Merged Drizzle-specific changes into the main tree.
602+
603+RN231: Fixed a bug that caused bad performance as the number of threads increased. This occurred when the number of open table handles exceeded 'table_open_cache', and MySQL started closing open table handlers. PBXT was flushing a table when all table handlers were closed. PBXT will now only do this when the FLUSH TABLES statement is used.
604+
605+RN230: Improved efficiency of conflict resolution: Implemented a queue for threads waiting for a lock. Threads no longer poll to take a lock. If a temp lock is granted because of an update, then the thread granted the temp lock will also wait for the transaction that did the update to quit.
606+
607+RN229: Fixed bug #313391: LOAD DATA ... REPLACE broken.
608+
609+RN228: Fixed bug #341115: 'Out of memory' error (a bug in key comparison algorithm).
610+
611+RN227: Changed conflict handling to use spin locks and improve efficiency.
612+
613+RN226: Fixed bug #340316: Issue with bigint unsigned auto-increment field.
614+
615+RN225: Fixed bug #308557: UPDATE fails to match all rows in a transactional scenario.
616+
617+RN224: Fixed a deadlock which could occur during table scans.
618+
619+RN223: Index scans now use handles to cache buffers instead of making a copy of the index page. The handles are "copy-on-write".
620+
621+RN222: Fixed a bug that caused the server to hang on startup if PBXT ran out of record cache while waiting for the sweeper to complete.
622+
623+RN221: Fixed an index recovery bug. This occurred if the server crashed after operating in low index cache sitations.
624+
625+RN220: Improved index selectivity estimation: added scanning from the end of index backwards.
626+
627+RN219: Fixed a problem: during intersected range scan not all fields were returned by engine to MySQL.
628+
629+RN218: Changed the way row locking (used by SELECT FOR UPDATE) works. Previously we locked a group of rows at once (although there were many groups). However, this caused conflicts even when the same rows were not locked. We now locks individual rows.
630+
631+RN217: Fixed bug #315564: Rollbacked inserts remain permanently in table.
632+
633+RN216: Added lock tracing. In DEBUG mode, each thread has a list of locks (semaphores, mutexes, r/w locks that it holds).
634+
635+RN215: Fixed a bug that caused a crash during restart if an index file was flushed during recovery.
636+
637+RN214: Fixed bug #310184: Deadlock when trying to wake up transactions
638+
639+RN213: Fixed an index corruption bug on SPARC Solaris. Note this error will occur on any machine that does not use the x86 (little endian) byte order.
640+
641+------- 1.0.07 RC - 2008-12-15
642+
643+RN212: Fixed build problems on NetBSD.
644+
645+RN211: Fixed build problems on FreeBSD.
646+
647+RN210: Fixed build problems on OpenSolaris.
648+
649+RN209: Added handling of the foreign_key_checks system flag.
650+
651+RN208: xtstat will now automatically reconnect if the connection to server is lost.
652+
653+RN207: Foreign key references are now checked on CREATE TABLE.
654+
655+RN206: Fixed a crash if inserting into a table that has an FK that references a column that has no index on it.
656+
657+RN205: Added processing of foreign key action SET DEFAULT.
658+
659+RN204: Fixed an index recovery problem: unswept index entries were not recovered correctly
660+
661+RN203: Fixed foreign key bug: REPLACE fails with 'on delete cascade'
662+
663+RN202: Fixes and updates to tests, now all tests pass on windows and linux.
664+
665+RN201: Fixed ref-counting for mmapped files.
666+
667+RN200: Fixed an index recovery problem: unswept index entries were not recovered correctly .
668+
669+RN199: Recovery now takes place on plug-in startup. Previously recovery occurred when the first PBXT table was accessed.
670+
671+RN198: Fixed a recovery bug that caused index entries to get out of sync with the data file.
672+
673+RN197: Improved the efficiency of group commit.
674+
675+RN196: Changed checkpointing so that it now works during idle time. Every record, row or index file fllush now also contributes to the checkpoint (fuzzy checkpointing). Checkpointing is forced to complete after about 50% of the checkpoint threshold in order to ensure the correct maximum for log reading on recovery.
676+
677+RN195: Fixed scheduling bug that caused sweeper to get behind with the cleanup, which caused performance problems in high conflict situations. Foreground threads will now wait if the sweeper gets too far behind.
678+
679+RN194: Created the xtstat program which monitors the internal performance of PBXT. Run xtstat --help for more details information of the output.
680+
681+RN193: Implemented the pbxt.statistics virtual table. The statistics table returns information about the internal activity of the engine. This includes I/O byte counts, cache hit counts and usage, commit count, etc.
682+
683+RN192: Due to timing issues in the engine API it could happen that the client received an OK for a committed transaction before the transaction was actually committed. This problem has been fixed.
684+
685+RN191: Fixed a bug that caused a hang when conflicts occured while reading a covering index.
686+
687+RN190: Previously the sweeper delayed deletion of transaction structures until all transactions that were running during sweeping have quit. This is now handled by the same code that fixed the bug in RN189.
688+
689+RN189: Fixed a bug that could cause a row to go missing due to a visibility issue.
690+
691+RN188: Fixed a bug which ocurred when using CREATE TABLE ... AVG_ROW_LENGTH=x, and the table contained BLOBs. In this case, alter table corrupted the table.
692+
693+RN187: Windows now stores paths in the location file in UNIX format by converting all '\' characters to '/'. Note that the location file is only cross-platform if the paths are relative (which is the default).
694+
695+RN186: Set version number to 1.0.07.
696+
697+------- 1.0.06 Beta 2 - 2008-11-06
698+
699+RN185: Disabled support for INSERT DELAYED because of MySQL bug #40505
700+
701+RN184: Implemented info(flag == HA_STATUS_AUTO) engine API call. This call returns the next value that will be assigned as auto-increment value on the table.
702+
703+RN183: Turned off streaming on Windows (see XT_STREAMING macro in sources)
704+
705+RN182: Switch code base to the latest version of BLOB streaming engine (PBMS): www.blobstreaming.org.
706+
707+RN181: Updated pbxt-test-run default parameters (--force is on, --default-storage-engine is pbxt, --base-dir is set according to config)
708+
709+RN180: PBXT can now cope with a missing .xti file (the file that contains the table indexes). This file can be regenerated using REPAIR TABLE.
710+
711+RN179: On recovery PBXT now creates a filed called 'recovery-progress' in the pbxt database. The recovery percentage complete is written to this file as recovery progresses. Note that this file will not be created if no recoery is necessary or if PBXT estimates that it will read less then 10MB to do recovery.
712+
713+RN178: Fixed a problem in CHECK TABLE that caused memory corruption for fixed-size records
714+
715+RN177: Added "crash debugging". When enabled, crash debugging does the following:
716+ - Create a core dump on Windows if the server crashes.
717+ - Make a backup copy of the datadir directory before recovery if the server crashes.
718+ - Keep at least 5 of the previous transaction logs.
719+Currently crash debugging is disabled by default. To disable, create a file called 'no-debug' in the pbxt database folder, and restart the server. When crash debugging is disabled by default, it can be enabled by creating a file called 'crash-debug; in the pbxt database folder.
720+
721+RN176: Fixed a bug: a lock was not released appropriately
722+
723+RN175: Fixed some debug assertions
724+
725+RN174: Fixed some of test/mysql-test tests
726+
727+RN173: Fixed a RENAME TABLE bug, that prevented index files from being properly recreated
728+
729+RN172: Added the file ./pbxt/lock-pid. This file is locked while the server is running, and contains the process of the server. PBXT will return an error on startup if the file is locked or the process is still running in order to prevent a second server from being started.
730+
731+RN171: Implemented the AVG_ROW_LENGTH table attribute. When set, this value determines the size of the fixed length data component of a record. Normally this size is estimated depending on the column definitions. The command CHECK TABLE dumps the current average row length to the log. This can be used to find a suitable value for AVG_ROW_LENGTH.
732+
733+RN170: Changed configure so that debug/optimize flags set for building the engine override the flags set for MySQL. If --with-debug is not specified, then the engine will use the flags set when building MySQL. If MySQL was built with --with-debug=full, the DEBUG will be defined for the engine. When building the engine, the following flags can be set:
734+ yes - Debug symbols enabled, no optimization, DEBUG not defined.
735+ full - Debug symbols enabled, no optimization, DEBUG defined.
736+ only - Debug symbols enabled, MySQL flags used, DEBUG not defined.
737+ prof - Profile code enabled, optimization on, DEBUG not defined.
738+ no - No debug symbols, optimization on, DEBUG not defined.
739+
740+RN169: Used MySQL root Makefile instead of config.status in order to extract settings (such as CFLAGS and CXXFLAGS) for the PBXT build.
741+
742+RN168: Fixed Windows build after merging changes for Drizzle.
743+
744+RN167: Fixed "This table requires primary key" error in sql-bench.
745+
746+RN166: Fixed threading problems that caused crashes in sql-bench.
747+
748+RN165: Added sql-bench to pbxt source tree.
749+
750+RN164: Ported PBXT to Drizzle. To compile for Drizzle DRIZZLED must be defined on the command line. The -drz.am and -drz.in files are must be used when PBXT is embedded in Drizzle.
751+
752+RN163: Added "make test" build step. Running "make test" from the root of pbxt source tree will launch test/mysql-test/pbxt-test-run.pl with appropriate options to execute the pbxt functional test suite. On Windows where
753+pbxt is statically linked into mysql server binary pbxt testing works by going to test/mysql-test directory and running ./pbxt-test-run.pl with --base-dir argument pointing to a mysql source tree (mysql binaries are taken
754+from there) and passing the rest of usual arguments (--force --mysqld=--default-storage-engine=pbxt)
755+
756+RN162: The 'pbxt' database must now be dropped explicitly. It is automatically created when the first PBXT table is created. After that, the pbxt database can be dropped once all PBXT tables have been dropped. Dropping the pbxt database will also cause all transaction (pbxt/system directory) and data logs (pbxt/data directory) to also be deleted.
757+
758+RN161: Added pbxt.location system table. This table can only be dropped when all PBXT tables have been deleted. Dropping the system table will cause all transaction (pbxt/system directory) and data logs (pbxt/data directory) to also be deleted.
759+
760+RN160: Made changes to run with MySQL 6.0.6.
761+
762+RN159: Changes to configure: added --with-plugindir=<path>, which should be used to specify the plugin directory. This means that --libdir should no longer be used. For backwards compatibility configure will still recognize this options if the path ends with 'plugin'.
763+
764+Also updated --help, to include all options, and better desciptions of the options.
765+
766+The configure options are now as follows:
767+
768+--with-mysql=<path> - (Required) It specifies the path to the MySQL source tree. The source should already be built. All other options will be taken from the MySQL build by default.
769+--with-debug=yes/no - (Optional) Specify if then engine should be built with different debug options to the MySQL source tree.
770+--with-plugindur=<path> - (Optional) Specify an alternative installation directory for the plugin. By default it will be installed in the plugin directory of the MySQL installation.
771+
772+
773+RN158: Added support for core dumps on Windows. This can be enabled by defining XT_COREDUMP. On by default at the moment. If the server crashes a file called PBXTCore00000001.dmp will be created in the data directory. This file can be openned using MS VS.
774+
775+RN157: Fixed a compile problem with tv_nsec which is not supported on all platforms.
776+
777+RN156: Updated tests to run with MySQL 5.1.28.
778+
779+RN155: Errors during cascade update of VARCHAR values with trailing spaces
780+
781+RN154: Fixed a bug: impossible to create a foreign key that referenced an ENUM or SET column
782+
783+RN153: Fixed a bug that caused the following problems: #1. Foreign keys: crash if update cascade and autocommit=0 #2. Foreign keys: crash if update cascade and multi-level recursion
784+
785+RN152: Fixed missing information about foreign keys in I_S.table_constraints and I_S.referential_constraints
786+
787+------- 1.0.05 Beta - 2008-08-30
788+
789+RN151: "Quick config": It is now possible to configure the engine by just specifying the mysql source code tree (the --with-mysql option). The --libdir and --with-debug setting will be deduced automatically.
790+
791+RN150: Added system variable pbxt_sweeper_priority, 0 = low (default), 1 = normal (same as user threads), 2 = high. The sweeper cleans up deleted records (deleted records also result from an update). If allowed to accumulate, these records can slow searches. Higher priority for the sweeper is recommended on systems with 4 or more cores.
792+
793+RN149: Record cleanup is now initiated if a deleted record is found, and the transaction that deleted the record has ended. Since waking up the sweeper is an expensive operation, normally the sweeper will run every 1/10th of a second.
794+
795+RN148: Fixed a bug which caused transaction starvation (one transaction was constantly locked out) during high conflict updates. This lead to cleanup of records not being done, which lead to a general slow down.
796+
797+RN147: Fixed a problem with TRUNCATE TABLE: a failed TRUNCATE TABLE could put the engine into an invalid state that later caused a crash
798+
799+RN146: Fixed a bug that caused the error: "-49: Record format unknown, either corrupted or upgrade required".
800+
801+RN145: Added pbxt_db_offline_log_function system variable, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.
802+
803+------- 1.0.04 Alpha - 2008-08-02
804+
805+RN144: Completed port and testing of Windows version.
806+
807+RN143: Fixed a bug which caused the free-er thread to hang. This was a result of an invalid operation ID, which was the result of the checkpointer flushing the table at the same time as a foreground thread.
808+
809+RN142: The fast RW/mutex lock can now handle nested calls. This is possible during a sequential scan.
810+
811+RN141: The normal behavior in MySQL is that an auto-increment values will be re-issued if you delete the row containing the current maximum auto-increment value and then restart the server. To prevent this you can use ALTER TABLE my_table AUTO_INCREMENT = <current-max-auto-increment> + 1, before deleting the current maximum auto-increment value.
812+
813+A new system variable, pbxt_auto_increment_mode, has been added so that this work around is not necessary. When set to 0 (the default), auto-increment works as described above. When set to 1, the AUTO_INCREMENT value of the table is automatically to prevent previously issued auto-increment values being returned.
814+
815+However, if the server crashes, a gap of up to 100 unique values can result, because the table AUTO_INCREMENT value is incremented in steps of 100.
816+
817+RN140: Index statistics are now automatically recalculated when the table row count exceeds 200.
818+
819+RN139: Fixed a bug that caused index corruption, error: "int idx_push(index_xt.cc:172) -2: Core B-tree too deep".
820+
821+RN138: Handle startup and recovery when an index is corrupted.
822+
823+RN137: Fixed a bug in the zero wait R/W lock that caused the lock to fail (the state is extremely volatile, and must be written to memory after increment).
824+
825+RN136: Fixed a bug that cause the error "int xt_pwrite_file(filesys_xt.cc:789) errno (14): Bad address".
826+
827+RN135: Fixed TRUNCATE TABLE that did not work correctly when the table contained BLOBs stored in the BLOB streaming engine (www.blobstreaming.org).
828+
829+RN134: Fixed a bug that caused duplicate rows to be returned from an index scan (using a SELECT FOR UPDATE) if a concurrent update was done.
830+
831+RN133: Optimised PBXT for multi-processor scale-up. This mostly involved using different types of locks instead of the standard pthread mutex and reader/writer locks [TODO: 0038].
832+
833+------- 1.0.03 Alpha - 2008-05-30
834+
835+RN132: Fixed bug when using PBXT in conjunction with the BLOB streaming engine (www.blobstreaming.org). Uploaded BLOBs could not be inserted into a table.
836+
837+RN131: Fixed wait for background processes on shutdown. Shutdown will wait a maximum of 16 seconds for each process.
838+
839+RN130: Fixed calculation of bytes to be read for recovery.
840+
841+RN129: Fixed bug in cleanup of unterminated transactions.
842+
843+RN128: The writer will now start working when one of the following is true:
844+- it is time for a checkpoint,
845+- the log cache is almost full,
846+- the free'er is waiting for the writer,
847+- there is no other activity.
848+
849+RN127: Fixed checkpoint frequency. Checkpointing is now done correctly after 'pbxt_checkpoint_frequency' bytes.
850+
851+RN126: Implemented index consistent write [TODO: 0050].
852+
853+RN125: Implemented memory mapping for row pointer (.xtr) and handle data files (.xtd).
854+
855+RN124: Index files now use direct I/O.
856+
857+------- 1.0.02 Alpha - 2008-04-25
858+
859+RN123: Fixed compile errors with MySQL 5.1.24.
860+
861+------- 1.0.01 Alpha - 2008-03-28
862+
863+RN122: ++++ NOTE: This version is not compatible with older versions of PBXT ++++.
864+
865+RN121: Transaction logs are now global so that multi-database statements are now possible. This makes it also possible to work PBXT temporary tables.
866+
867+RN120: Transaction logs pre-allocated and recycled.
868+
869+RN119: Transaction log writes on 512 byte boundaries only.
870+
871+------- 1.0.00 Alpha - 2008-03-10
872+
873+This version has alpha status because of the large number of changes done for full durability.
874+
875+RN118: ++++ NOTE: This version is incompatible to older versions of PBXT ++++.
876+
877+RN117: Documentation now avaliable at http://www.primebase.org/documentation.
878+
879+RN116: Corrected the plug.in file so that PBXT compiles when dropped into the storage directory in the MySQL source tree.
880+
881+RN115: Compiled and tested with MySQL 5.1.23.
882+
883+RN114: Increased index block size. Minimum is now 4K. Default is 16K.
884+
885+RN113: Calculate index selectivity to return a more accurate value from records_in_range(). NOTE: FLUSH TABLESl will update the index statistics, after data has been inserted or updated.
886+
887+RN112: Optimized table storage, saving 8 bytes per row.
888+
889+RN111: Optimized search on keys containing 2 or 3 not null integer values.
890+
891+RN110: Optimization: store the row ID in the index so that an index entry can be verified as current without loading the record. This is necessary to optimize an access with index coverage.
892+
893+RN109: Optimization: only load the record extended data if required.
894+
895+RN108: Implemented SHOW ENGINE PBXT STATUS;
896+
897+RN107: Added the following system variables:
898+
899+pbxt_index_cache_size - The amount of memory allocated to the index cache, used only to cache index data
900+pbxt_record_cache_size - The amount of memory allocated to the record cache used to cache table data
901+pbxt_log_cache_size - The amount of memory allocated to the transaction log cache used to cache on transaction log data
902+pbxt_log_file_threshold - The size of a transaction log before rollover, and a new log is created
903+pbxt_transaction_buffer_size - The size of the global transaction log buffer (the engine allocates 2 buffers of this size)
904+pbxt_log_buffer_size - The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log
905+pbxt_checkpoint_frequency - The amount of data written to the transaction log before a checkpoint is performed
906+pbxt_data_log_threshold - The maximum size of a data log file
907+pbxt_garbage_threshold - The percentage of garbage in a data log file before it is compacted
908+
909+RN106: PBXT now compiles for MySQL 6.0.3.
910+
911+RN104: Updates now locks a record temporarily. This prevents most "record changed" errors, however, it makes UPDATE statements a type of "committed read". This means that you may update a different value to that which you selected in repeatable read mode. To avoid this, use SELECT FOR UPDATE if you plan to UPDATE records after reading.
912+
913+RN103: Implemented SELECT FOR UPDATE. This is implemented by turning SELECT FOR UPDATE into a type of "committed read". This means that, if you do a SELECT followed by a SELECT FOR UPDATE you can get different results, even in repeatable read mode.
914+
915+RN102: Implemented recovery of index entries. Note: indexes are not yet fully consistent. This means that index can become currupted due to a crash. Data, however, cannot be lost. The indices can be rebuild using REPAIR TABLE.
916+
917+RN101: Writing and flushing of a single transaction write-ahead log.
918+
919+RN100: Automatic rollover of transaction logs as they become full.
920+
921+RN99: Implementation of the transaction log cache.
922+
923+RN98: Group commit.
924+
925+RN97: Implementation of the writer thread that applies changes in the transaction log to the database.
926+
927+RN96: Implementation of the checkpointer thread that periodically flushes the database and writes a checkpoint which determines the recovery start point.
928+
929+RN95: Implementation of the free'er thread that is responsible for keeping the record cache at a preset level.
930+
931+RN94: Modifications to the record cache so that rows are stored in pages, in order to speed up sequence access.
932+
933+RN93: Implemented the recovery process which applies changes written to the log that are not in the database, on startup.
934+
935+RN92: Modification of the sweeper thread which cleans up rolled-back transactions and deleted data, to use the new transaction log format.
936+
937+RN91: Modifications to the data logs so that they use the same record structure as the transaction logs.
938+
939+RN90: The data logs are now managed "per database" in order to minimize the work done to flush and commit a transaction.
940+
941+RN89: Implementation of a file handle pool for the data logs.
942+
943+------- 0.9.91 Beta - 2007-10-30
944+
945+RN88: The format of the URL genearated by MyBS has been changed. The format of the BLOB URLs is now as follows:
946+
947+'~*' <db-name> '/' <type-char> <table-id> '-' <blob-id> '-' <access-code> '-' <server-id>
948+
949+Where <type-char> is '_' or '~'.
950+
951+Examples: ~*test/_11-128-fbd590b-0, ~*test/~1-524-3dc45b09-0
952+
953+In other words, the characters '>' has been replace by '*', '^' has been replace by '_' and ':' has been replace by '~'. The reason for this is that the characters '>' and '^' are not allowed in URLs, and must be URL-encoded. The character ':' is reserved, but allowed.
954+
955+NOTE: This change makes this version incompatible with previous versions of MyBS. If you have a table with BLOB URLs, you can upgrade the URLs as follows:
956+
957+UPDATE blob_table SET blob_col = REPLACE(REPLACE(blob_col, '~>', '~*'), '/:', '/~');
958+
959+Replacing '^' is not necessary because BLOB URLs with '^' should not appear in tables.
960+
961+------- 0.9.90 Beta - 2007-10-17
962+
963+RN87: Corrected stack trace of errors passed through the BLOB streaming API.
964+
965+RN86: Added new engine API accessor functions that appeared in 5.1.21 (thanks Stewart).
966+
967+RN85: Added plug.in file. PBXT now compiles when dropped into the storage directory of the MySQL build tree. However, you have rebuild configure. For example:
968+
969+rm -rf autom4te.cache/
970+aclocal
971+autoconf
972+autoheader
973+automake -a
974+./configure --help
975+./configure --with-plugins=max --without-innodb --prefix=/usr/local/mysql --with-debug=full
976+
977+NOTE: ./configure --help should show that the PBXT has been included.
978+
979+RN84: Fixed several problems with shutdown of PBXT in combiniation with MyBS.
980+
981+------- 0.9.89 Beta - 2007-08-17
982+
983+RN83 (2007-08-21): Fixed a crash due to a compile bug that does not like the contruct *((xtWordPS *) &(v)) = (xtWordPS) (x) (macro allocr_() and alloczr_()).
984+
985+RN82: It is now possible to insert non-URL values into a LONGBLOB field, in the previous version the generated an "Invalid URL" error. Such values can be retrieved as a stream using a field reference.
986+
987+RN81: Fixed a bug that caused PBXT to crash during certina operations when MyBS was not installed.
988+
989+RN80: Set engine as capable of row-level replication, but not as statement replication. Statement replication does not work because MVCC is not serializable.
990+
991+------- 0.9.88 Beta - 2007-07-25
992+
993+RN79: Made some corrections in order to compile with MySQL 5.1.20.
994+
995+RN78: Support for the features of the MyBS BLOB Streaming engine, version 0.5 Alpha.
996+
997+RN77: Bugfix: The server crashes during BLOB data handling. The reason is the table field structure is shared, and may not be changed.
998+
999+------- 0.9.87 Beta - 2007-06-19
1000+
1001+RN76: The major feature of this release is support for the BLOB Streaming Engine. The current version enables the download of specific BLOB columns via the Streaming Engine. For example:
1002+
1003+use test;
1004+CREATE TABLE notes_tab (
1005+ n_id INTEGER PRIMARY KEY,
1006+ n_text BLOB
1007+) ENGINE=pbxt;
1008+INSERT notes_tab VALUES (1, "This is a BLOB streaming test!");
1009+
1010+The URL:
1011+
1012+http://localhost:8080/test/notes_tab/n_text/n_id=1
1013+
1014+will return the value "This is a BLOB streaming test!"
1015+
1016+RN75: Bugfix: MySQL prints error: "Plugin 'PBXT' will be forced to shutdown". This error was caused by the plug-in having a reference to itself.
1017+
1018+RN74: Added system variable pbxt_index_cache_size and pbxt_record_cache_size. These variable can now be set on the mysqld command line (for example: --pbxt_record_cache_size=50MB). The values are also displayed by SHOW VARIABLES.
1019+
1020+------- 0.9.86 Beta - 2007-04-07
1021+
1022+RN74: ++++ NOTE: This version is incompatible to older versions of PBXT ++++.
1023+
1024+In order to upgrade, install the older version of PBXT. Convert all tables to MyISAM using ALTER TABLE t1 ENGINE=MyISAM. Then install the new version of PBXT and convert back using ALTER TABLE t1 ENGINE=PBXT.
1025+
1026+RN73: Each table will now use a maximum of 4 data log files. This means a maximum of 7 files per table. The minimum is 3 for tables that do not have a variable field that exceeds about 40 bytes in size. This means that under Linux PBXT requires a maximum of 7 file handles per table used. Windows lock of pread/pwrite (atomic seek and read/write) functions means it requires a file handler per file per open table handler. [TODO: 0044]
1027+
1028+RN72: All threads now write to the same data log file. Recovery and compaction take this fact into account. Each thread still writes its own transaction log.
1029+
1030+RN71: Removed all directory scans when creating and dropping table. Increased the table limit to 10000.
1031+
1032+RN70: Changed locking to avoid a deadlock when TRUNCATE TABLE is used together with other DML.
1033+
1034+RN69: procedures and functions are now considered atomic, and execute in a single transaction.
1035+
1036+RN68: Bug fixed: all files are now correctly flushed before commit.
1037+
1038+------- 0.9.85 Beta - 2007-03-15
1039+
1040+RN67: Changed the implementation of the pushsr_ and allocr_ macros because "*((void **) &(v) = " caused a crash due to a compiler error on some platforms (thanks Luciano for your help on this one and RN66).
1041+
1042+RN66: Fixed a bug that caused PBXT to corrupt the index file when the size exceeded 4GB. [TODO: 0031]
1043+
1044+RN65: PBXT now runs under Windows. This source tree must be placed in the MySQL source storage directory in order to compile. Further details of how to build are in the windows-readme.txt file. [TODO: 0027]
1045+
1046+RN64: Improved speed of table lookup by ID after a table has been deleted. The sweeper needs to ignore these records. Scanning the directory each time was too slow.
1047+
1048+RN63: Added checking for repeat update of a record in a statement.
1049+
1050+RN62: Committed read no longer blocks due to a change made by another transaction (the XT_REPEATABLE_READ_BLOCKS define, turns blocking on).
1051+
1052+RN61: Avoid checking for duplicates if an index is not modified by an update.
1053+
1054+RN60: Records updated repeatedly by a transaction are now updated in place. [TODO: 0040]
1055+
1056+------- 0.9.8 Beta - 2007-01-30
1057+
1058+RN59: Reduced the number of file handles used to a maximum of one per file. This assumes that pread() and pwrite() allows multiple threads to use the same file handle (according to my tests, this is the case).
1059+
1060+RN58: Added the configure flag --with-debug=only which compiles a version of the plug-in with debug symbols that will link to an non-debug MySQL server.
1061+
1062+RN57: Changed error number returned on lock from 1205 (lock timeout) to 1020 (optimistic lock failure).
1063+
1064+RN56: Added UNIX environment variable for PBXT system parameters. These must be set before starting mysqld, for example:
1065+
1066+setenv pbxt_index_cache_size 400MB
1067+setenv pbxt_record_cache_size "1 GB"
1068+
1069+Values are in bytes unless one of the following units is specified: GB, MB, Kb
1070+
1071+RN55: Fixed a bug which prevented VARCHAR values from being compressed correctly when stored in variable length rows.
1072+
1073+RN54: Fixed a bug which caused a crash when PBXT was used with MySQL 5.1.14. This bug also caused data to be corrupted on insert.
1074+
1075+RN53: Set query caching mode to transactional. [TODO: 0027]
1076+
1077+RN52: Added conditions so that the engine compiles with MySQL 5.1.14 and 5.1.13.
1078+
1079+------- 0.9.74 Beta - 2006-12-14
1080+
1081+RN51: DELETE FROM <table>; is no longer implemented by re-creating the table. This statement now works by deleting all rows. TRUNCATE is implemented as before, by re-creating the table.
1082+
1083+RN50: The test scripts innodb.test and innodb-mysql.test have been modified to run with PBXT.
1084+
1085+RN49: [TODO: 0020] Implemented foreign keys. Functionality is identical to InnoDB with 2 exceptions:
1086+
1087+* Data types of referenced columns must be an exact match (e.g. you cannot mix VARCHAR and CHAR values).
1088+* Currently an exact matching index is required on referenced columns (i.e. the index may not have more columns that the columns used in the foreign key definition).
1089+
1090+Also note the following:
1091+
1092+* It is possible to create foreign keys that reference non-existent tables or columns. An error will occur when updating a table with an incorrect foreign key declaration.
1093+* If you alter the data-type of a column referenced by a foreign key set you need to set foreign_key_checks=0; or an error will occur.
1094+
1095+RN48: Fixed a bug in the implementation of indexes on ENUM and SET types.
1096+
1097+RN47: Fixed a bug that caused a crash when an index was place on a BLOB column, and data was retrieved from the index directly.
1098+
1099+------- 0.9.73 Beta - 2006-10-31
1100+
1101+RN46: Updated test scripts to run with MySQL 5.1.13.
1102+
1103+------- 0.9.72 Beta - 2006-10-19
1104+
1105+RN45: Corrected compilation errors that occurred due to a change to struct st_mysql_plugin.
1106+
1107+------- 0.9.71 Beta - 2006-10-04
1108+
1109+RN44: Corrected compilation errors that occurred due to changes in the storage engine API.
1110+
1111+------- 0.9.7 Beta - 2006-09-20
1112+
1113+RN43: This is the first Beta release of PrimeBase XT. It has been integrated into MySQL 4.1.21 and is available as a plug-in for MySQL 5.1.12, or later. This version has been extensively tested using mysql-test-run, on various Linux and Mac OS X platforms.
1114+
1115+RN42: ++++ NOTE: This version is incompatible to older versions of PBXT ++++. Files created by older versions cannot be opened by version 0.9.7.
1116+
1117+RN41: Renaming or deleting a table while using a name with different case to the original created name did not work.
1118+
1119+RN40: Fixed a bug when grouping and searching on indexed columns that contain a null.
1120+
1121+RN39: Fixed bugs related to trailing spaces on VARCHAR values. Values that only vary by the number of trailing spaces (for example "aa" and "aa "), are now correctly handled as identical.
1122+
1123+RN38: The default AUTO_INCREMENT value was not correctly preserved during ALTER TABLE.
1124+
1125+RN37: Created a MySQL 5.1 Plugin version of PBXT. [TODO: 0017]
1126+
1127+RN36: Fixed a race condition in the row cache which had the affect that inserted rows dissappeared after cleanup because the cache was out of date. I was only able to reproduce this error on multi-processor machines.
1128+
1129+------- 0.9.6 - 2006-08-05
1130+
1131+RN35: ++++ NOTE: This version is incompatible to older versions of PBXT ++++.
1132+
1133+The disk format of tables and log files has changed slightly in this version. As a result, files created by older versions cannot be opened by version 0.9.6. An error will be generated. If you have data wish to preserve, first start the older version of XT and convert all tables to MyISAM. The stop the server and removed all transaction log file (files of the form xtlog-*.xt). Then start the new version and convert tables back to XT.
1134+
1135+RN34: Implemented READ COMMITTED transaction mode. XT now supports READ COMMITTED and SERIALIZABLE transaction modes. NOTE: if the mode is set to REPEATABLE READ, SERIALIZABLE is used. If the mode is set to READ UNCOMMITTED READ COMMITTED is used.
1136+
1137+RN33: The implementation of AUTO_INCREMENT on a paritial index is non-standard. A unique value is generated without regard to the value of the index prefix. For example, assume we have the following table: CREATE TABLE t1 (c1 CHAR(10) not null, c2 INT not null AUTO_INCREMENT, PRIMARY KEY(c1, c2));
1138+
1139+With the following contents: c1 c2
1140+ A 8
1141+ B 1
1142+
1143+After executing the following statement: insert into t1 (c1) values ('B');
1144+
1145+This is the result using PBXT: c1 c2
1146+ A 8
1147+ B 1
1148+ B 9
1149+
1150+The standard result would be: c1 c2
1151+ A 8
1152+ B 1
1153+ B 2
1154+
1155+RN32: PBXT does not permit access to multiple databases within a single transaction. For example:
1156+
1157+begin;
1158+update database_1.t1 set a=10;
1159+update database_2.t2 set d=10;
1160+commit;
1161+
1162+In this case the following error is returned: 1015: Can't lock file (errno: -1)
1163+
1164+RN31: The implementation of COUNT(*) has changed. For effectiency, rows are not counted. The information is taken from the header of the record (.xtr) files. This information is only 100% accurate after transaction cleanup has completed. Which basically means, only when PBXT is idle. ANALYZE TABLE waits for all background activity to stop, so the statement may be executed before a COUNT(*) to ensure an accurate result. NOTE: Other then waiting for background processes, ANALYSE TABLE is not implemented.
1165+
1166+RN30: Two concurrency bugs have been fixed: a shared lock was used instead of an exclusive lock when deleting from a transaction list, the transaction segment semaphore was not initialized. XT now runs correctly in a multi-processor environment. The test used was sysbench on a dual-process, dual-core, AMD 64-bit machine running SUSE Linux 10.0.
1167+
1168+RN29: PBXT compiles and runs on under 64-bit Lunix. [TODO: 0009]
1169+
1170+RN28: ./mysql-test-run --force --mysqld=--default-storage-engine=pbxt will now execute most tests successfully. Changes to the tests and the result have been documented in http://www.primebase.com/xt/download/pbxt-test-run-changes.txt. [TODO: 0004, 0019]
1171+
1172+RN27: Fixed a bug that caused the server to crash if when using tables locks and transactions. For example: LOCK TABLES, BEGIN, COMMIT, SELECT. This sequence now returns an error. The correct sequence is:
1173+
1174+LOCK TABLES, BEGIN, COMMIT, UNLOCK TABLES, SELECT
1175+or
1176+LOCK TABLES, BEGIN, COMMIT, BEGIN, SELECT COMMIT, UNLOCK TABLES
1177+
1178+RN26: Fixed a concurrency problem which caused a number of threads to hang during the sysbench test - see RN30 above (bug reported by Vadim).
1179+
1180+RN25: Fixed a bug that caused the server to hang when ha_pbxt::create() and ha_pbxt::ha_open() where given different, but equivalent paths for a particular table.
1181+
1182+RN24: Fixed bug in the indexing of blob columns, for example: create table t1(name_id int, name blob, INDEX name_idx (name(5)));
1183+
1184+RN23: When a duplicate key error occurs in auto-commit mode, the transaction is now rolled back.
1185+
1186+RN22: Fixed incorrect duplicate key error. In the case of a unique key which allows NULLs, duplicates are allowed if the inserted key contains a NULL. For example:
1187+
1188+create table t1 (id int not null, str char(10), unique(str));
1189+insert into t1 values (1, null),(2, null),(3, "foo"),(4, "bar");
1190+
1191+RN21: PBXT now returns the correct error code on duplicate key: 1062 instead of 1022.
1192+
1193+RN19: Implemented AUTO_INCREMENT on partial keys. However, the XT implementation is non-standard. Increment of partial index works, but the ID generated is incremented like a non-partial index. For example:
1194+
1195+create table t1 (c1 char(10) not null, c2 int not null auto_increment, primary key(c1, c2));
1196+select * from t1;
1197+c1 c2
1198+A 8
1199+B 1
1200+
1201+insert into t1 (c1) values ('B');
1202+select * from t1;
1203+c1 c2
1204+A 8
1205+B 1
1206+B 9
1207+
1208+The standard result would be:
1209+c1 c2
1210+A 8
1211+B 1
1212+B 2
1213+
1214+RN18: Implemented TRUNCATE TABLE and DELETE FROM <table>; (i.e. a DELETE without WHERE clause). Previously DELETE FROM <table>; did not cause an error, but no rows where deleted (TRUNCATE TABLE returned an error). [TODO: 0012, 0022]
1215+
1216+RN17: Implemented CREATE TABLE (...) auto_increment=<value>;
1217+
1218+------- 0.9.51 - 2006-07-06
1219+
1220+RN16: Fixed crash which could occur when creating the first table in a database (bug reported by Hakan).
1221+
1222+------- 0.9.5 - 2006-07-03
1223+
1224+RN15: This version concludes the re-structuring of the PBXT implementation. I have made a number of major changes, including:
1225+
1226+- All files except the transaction logs are now associated with a particular table. All table related files begin with the name of the table. The extension indicates the function.
1227+
1228+- I have merged the handle and the fixed length row data for performance reasons.
1229+
1230+- Only the variable size component of a row is stored in the data log files. As a result the data logs can now be considered as a type of "overflow" area.
1231+
1232+- Memory mapped files are no longer used because it is not possible to flush changes to the disk.
1233+
1234+RN14: File names have the following forms:
1235+
1236+[table-name]-[table-id].xtr - These files contains the table row pointers. Each row pointer occupies 8 bytes and refers to a list of records. The file name also contains the table ID. This is a unique number which is used internally by XT to identify the table.
1237+
1238+[table-name].xtd - This file contains the fixed length data of a table. Each data item includes a handle and a record. The handle references a record in the data log file if the table contains variable length records.
1239+
1240+[table-name].xti - This file contains the index data of the table.
1241+
1242+[table-name]-[log-id].xtl - This is a data log file. It contains the variable length data of the table. A table may have any number of data log files, each with a unique ID.
1243+
1244+xtlog-[log-id].xt - These files are the transaction logs. Log entries that specify updates reference a data file record. Each active thread has its own transaction log in order to avoid contension.
1245+
1246+RN13: Fixed the bug "Hang on DROP DATABASE". [TODO: 0016]
1247+
1248+RN12: PBXT currently only supports the "Serializable" transaction isolation level. This is the highest isolation level possible and includes the "repeatable-read" functionality [TODO: 0015]. This is implemented by giving every transaction a snapshot of the database at the point when the transaction is started.
1249+
1250+If the transaction tries to update a record that was updated by some other transaction after the snapshot was taken, a locked error is returned. A deadlock can occur if 2 transactions update the same record in a different order. PBXT can detect all deadlocks.
1251+
1252+RN11: I have implemented write buffering on the table data files. [TODO: 0013]
1253+
1254+RN10: The unique constraint (UNIQUE INDEX/PRIMARY KEY) is now checked correctly. [TODO: 0008]
1255+
1256+RN9: I have implemented a conventional B-tree algorithm for the indices (instead of the Lehman and Yoa B*-link tree). Although this reduces concurrency it improves the performance of queries significantly because of the simplicity of the algorithm. Deletion is also implemented in a very simple manner. [TODO: 0007]
1257+
1258+RN8: PBXT now has only 2 caches [TODO: 0006]:
1259+
1260+The Index Cache (pbxt_index_cache_size): This is the amount of memory the PBXT storage engine uses to cache index data and row pointers. This is all the data in the files with the extensions '.xti' and '.xtr'. This cache is managed in blocks of 2K.
1261+
1262+The Record Cache (pbxt_record_cache_size): This is the amount of memory the PBXT storage engine uses to cache table row data (handles and records). This is all the data in the files with the extension '.xtd'.
1263+
1264+The size of the caches are determined by the values of the system variables pbxt_index_cache_size and pbxt_row_cache_size. By default these values are set to 32MB.
1265+
1266+RN7: Auto-increment is now implemented in memory. This is done by doing a MAX() select when a table is first opened to get the high value. After that, then high value is incremented in memory on INSERT. On UPDATE (or INSERT) the value in memory is adjusted if necessary. This method also makes it possible for rows to be inserted simultaneously on the same table. [TODO: 0005, 0014]
1267+
1268+RN6: ./run-all-tests --create-options=TYPE=PBXT succeeds. [TODO: 0004]
1269+
1270+RN5: Using sql-bench and my own Java based test I have confirmed that PBXT behaves correctly during multi-threaded access. [PARTIALY TODO: 0002]
1271+
1272+RN4: Load/Stability test. Using sql-bench I have tested PBXT under load over a long period of time. [PARTIALY TODO: 0001]
1273+
1274+------- 0.9.2 - 2006-04-01
1275+
1276+RN3: Fixed a bug that cause the error "-6: Handle is out of range: [0:0]".
1277+
1278+RN2: Implemented SET, ENUM and YEAR data types.
1279+
1280+RN1: Fixed a bug in the error reporting when a table is created with a datatype that is not supported. [TODO: 0011]
1281+
1282+
1283
1284=== added file 'plugin/pbxt/Makefile.am'
1285--- plugin/pbxt/Makefile.am 1970-01-01 00:00:00 +0000
1286+++ plugin/pbxt/Makefile.am 2010-04-11 18:56:24 +0000
1287@@ -0,0 +1,3 @@
1288+SUBDIRS = src
1289+
1290+EXTRA_DIST = plug.ini
1291
1292=== added file 'plugin/pbxt/NEWS'
1293=== added file 'plugin/pbxt/README'
1294--- plugin/pbxt/README 1970-01-01 00:00:00 +0000
1295+++ plugin/pbxt/README 2010-04-11 18:56:24 +0000
1296@@ -0,0 +1,19 @@
1297+PrimeBase XT for MySQL 5.1
1298+==========================
1299+
1300+This is the PrimeBase XT (PBXT) transactional storage engine for MySQL. PBXT is "pluggable", which means that it can be loaded dynamically by MySQL at runtime. It uses a unique "write-once" update strategy and MVCC (multi-version concurrency control) to provide optimal performance over a wide range of tasks.
1301+
1302+This package includes the complete source code for the engine. Although this is a standalone project it must be built against a compiled version of the MySQL 5.1 source tree, because it references headers files used internally by the server.
1303+
1304+Details about how to build PBXT both under UNIX or Windows, as a standalone plug-in, or as part of the MySQL source code, is distribed in the documentation which is avaliable online at:
1305+
1306+http://www.primebase.org/documentation
1307+
1308+Bug reports, questions and comments can be sent directly to me.
1309+
1310+Thanks for your support!
1311+
1312+Paul McCullagh
1313+SNAP Innovation GmbH
1314+paul.mccullagh@primebase.org
1315+
1316
1317=== added file 'plugin/pbxt/TODO'
1318--- plugin/pbxt/TODO 1970-01-01 00:00:00 +0000
1319+++ plugin/pbxt/TODO 2010-04-11 18:56:24 +0000
1320@@ -0,0 +1,195 @@
1321+PBXT To-Do List
1322+===============
1323+
1324+My thanks to all who have downloaded and tested PBXT. If an issue you reported before the date below is not on this list, please e-mail me again.
1325+
1326+------- 2008-12-09
1327+
1328+0063: The option for not using memory mapped files must be fixed.
1329+
1330+0062: Dynamic option for using memory mapping on a table (Dimitri).
1331+
1332+------- 2008-09-12
1333+
1334+0061: Add records per key result to ha_pbxt:info() call (Mark).
1335+
1336+------- 2008-08-31
1337+
1338+0060: Add table option to determine if a table should be memory mapped or not (also requested by Dimitri).
1339+
1340+0059: Add table options:
1341+ AVG_ROW_LENGTH [=] value
1342+ DATA DIRECTORY [=] 'absolute path to directory'
1343+ INDEX DIRECTORY [=] 'absolute path to directory'
1344+ MAX_ROWS [=] value
1345+
1346+------- 2008-03-28
1347+
1348+0058: Consolidate writes when changes in the log are applied to the database.
1349+
1350+------- 2008-03-07
1351+
1352+0057: Cluster updates onto a single page.
1353+
1354+0056: Add checksum to index and data pages.
1355+
1356+0055: When no index cache is available, the complete index must be flushed (not just single pages).
1357+
1358+0054: Optimize indexes by not creating indexes that are a complete sub-set of some other index. In this case we must be able to identify part of an index as unique. For example: primary key (a, b), index (a, b, c). Here we would just create index (a, b, c), and specify that the part (a, b) must be unique. Operations on (a, b) will be directed to index (a, b, c).
1359+
1360+0053: Check and test lock tables.
1361+
1362+0052: Cache data log data in the handle data cache. Must be purged when a handle data record is written.
1363+
1364+0051: Write data log data alternatively to the transaction log. The compactor must then compact transaction logs.
1365+
1366+0050: [RESOLVED: RN126] Implement consistent write for indexes.
1367+
1368+0049: [RESOLVED: RN114] Set the index block size to 4K, or 16K as used by InnoDB.
1369+
1370+0048: [RESOLVED: RN110] Add row ID to indexes. This should only be set once the row is cleaned by the sweeper. Then the row ID can be used to make a quite check if the row is the most recent version.
1371+
1372+------- 2007-06-19
1373+
1374+0047: Test build with ./configure --with-innodb under Linux (Vadim).
1375+
1376+0046: [RESOLVED: RN85] Add plug.in file to enable drop in compile under Linux.
1377+
1378+0045: Provide libstdc++.so.6 binaries (Vadim).
1379+
1380+0044: [RESOLVED: RN73] Limit number of file handles used per table (Brian).
1381+
1382+0043: XA (two-phase commit) support (Peter).
1383+
1384+------- 2007-03-13
1385+
1386+0042: [RESOLVED: RN108] Implemement STATUS commands.
1387+
1388+0041: Implement index prefix compression.
1389+
1390+------- 2007-03-07
1391+
1392+0040: [RESOLVED: RN60] Update in-place when a transaction updates the same record more than once.
1393+
1394+0039: Set the number and size of the segments dynamically according to the amount of memory in the cache (and the number of CPUs?) (as discussed with: Peter & Vadim).
1395+
1396+0038: [RESOLVED: RN133] Improve the efficiency of the locks by using atomic compare and swap (Peter & Vadim).
1397+
1398+0037: [RESOLVED: RN133] Instead of a global LRU list, use a LRU list for segment of the cache (Peter & Vadim). [ Note: a global list using a TAS lock and change time (so that LRU is not always updated) is most efficient].
1399+
1400+0036: Add support for deferred foreign key checking (requested by: Mark).
1401+
1402+0035: [RESOLVED: RN71] Remove the 2000 table limit (reported by: Hakan).
1403+
1404+------- 2007-02-28
1405+
1406+0035: [RESOLVED: RN74, RN107] Build in the PBXT system parameters (currently they must be set using environment variables.
1407+
1408+0034: [RESOLVED: RN117] Initial documentation (yes, it must be done!)
1409+
1410+0033: Make the error code returned on lock error configurable.
1411+
1412+0032: [RESOLVED: RN65] Create a source code pluggable version for Windows.
1413+
1414+0031: [RESOLVED: RN66] PBXT corrupts the index file when the size exceeds 4 GB (reported by: Luciano)
1415+
1416+0030: [RESOLVED: RN102] Implement pbxt_index_flush_delay. Postpones index writing in order to speed up imports. [Resolution uses that fact hat index entries that are missing are added during recovery. As a result, index flushing can be delayed.]
1417+
1418+0029: [RESOLVED: RN103] Implement SELECT ... FOR UPDATE (recommended by: Robin).
1419+
1420+------- 2007-02-14
1421+
1422+0028: Implement CREATE TABLE ... DATA/INDEX DIRECTORY (suggested by: Robin).
1423+
1424+------- 2006-12-06
1425+
1426+0027: [RESOLVED: RN53] Bug in pbxt with query caching (reported by: Giuseppe) caused violation of transaction isolation.
1427+
1428+------- 2006-08-05
1429+
1430+0026: Implement BACKUP and RESTORE table (planned for the first post release version).
1431+
1432+0025: Implement DISABLE/ENABLE KEYS. Works for FOREIGN KEYs, currently no plans to implement for disabling indexes.
1433+
1434+0024: Implement ANALYZE TABLE (planned for the first post release version).
1435+
1436+0023: Implement CHECK TABLE (planned for the first release candidate).
1437+
1438+0022: [RESOLVED: RN18] Implement TRUNCATE TABLE and DELETE FROM <table>; (i.e. a DELETE without WHERE clause). Currently this function does not cause an error, but no rows are deleted.
1439+
1440+------- 2006-07-06
1441+
1442+0021: [RESOLVED: RN28] .../mysql-test/mysql-test-run --force --mysqld=--default-storage-engine=pbxt produces a number of errors (reported by: Hakan): As far as I can tell some failures are unnessary but others are bugs. All need to be checked.
1443+
1444+------- 2006-07-03
1445+
1446+0020: [RESOLVED: RN49] Implement referential integrity (planned for the first release candidate).
1447+
1448+------- 2006-04-01
1449+
1450+0019: [RESOLVED: RN28] mysql-test-run hangs on alter table (reported by: Hakan): Running a test like ./mysql-test-run.pl --mysqld=--default-storage-engine=pbxt, hangs on ALTER TABLE.
1451+
1452+0018: Implement GEOMETRY date type. Note: There are currently no plans to implement this feature.
1453+
1454+------- 2006-03-31
1455+
1456+0017: [RESOLVED: RN37] MySQL 5.x Version (reported by: Ronald, Giuseppe).
1457+
1458+0016: [RESOLVED: RN13] Hang on "DROP DATABASE" (reported by: Giuseppe). Load the world database (http://downloads.mysql.com/docs/world.sql) and convert all tables into PBXT. Then, the drop database command hangs.
1459+
1460+0015: [RESOLVED: RN12] Implement isolation level "repeatable read" (reported by: Giuseppe). Current PBXT only supports isolation level "committed read". This means committed data can be seen no matter when it was committed. Use SELECT ... FOR UPDATE to guarantee repeatable read, on data already read.
1461+
1462+0014: [RESOLVED: RN7] Two transactions cannot insert simaltaneously if they use auto_increment (reported by: Giuseppe). See also 0005.
1463+
1464+0013: [RESOLVED: RN11] Implement buffered write (reported by: Giuseppe): Lack of buffered write leads to bad performance in operations such as ALTER TABLE ENGINE = PBXT and INSERT ... SELECT.
1465+
1466+0012: [RESOLVED: RN18] TRUNCATE does not work (reported by: Giuseppe)
1467+
1468+0011: [RESOLVED: RN2] Load Sakila Sample Database (reported by: Ronald): ALTER TABLE film ENGINE=PBXT; fails
1469+
1470+0010: [RESOLVED: RN6] sql-bench (reported by: Dmitry): ./run-all-tests --create-options=TYPE=PBXT fails.
1471+
1472+0009: [RESOLVED: RN29] 64-bit Linux (reported by: Hakan): PBXT current does not compile under 64-bit Linux.
1473+
1474+------- 2006-03-16
1475+
1476+0008: [RESOLVED: RN10] Enforcing the unique index constraint:
1477+
1478+An index declared as "unique" must return a "duplicate unique key" error when inserting a duplicate value. The difficulty part of implementing this in PBXT is that we may encounter a duplicate value that has not yet been committed. The index reading thread must then wait for the transaction to commit or abort.
1479+
1480+0007: [RESOLVED: RN9] Cleaning up empty index nodes:
1481+
1482+The Lehman and Yoa algorithm used for indexing does not describe a way of cleaning up empty index nodes on-the-fly. A search of the relevant literature for an algorithm also turns up empty handed (periodic "reorg" is mostly suggested). I have subsequently devised an algorithm that will do the job. This needs to be implemented.
1483+
1484+0006: [RESOLVED: RN8] Cache Balancing:
1485+
1486+PBXT uses a number of small caches in order to improve concurrency (rather than one large cache). A process is required to manage the amount of cache memory used as a whole. The process must distribute the overall amount of memory available for caching over the small caches, according to demand.
1487+
1488+0005: [RESOLVED: RN7] Implement a faster auto-increment method
1489+
1490+Currently the auto-increment is handled by the default method used in MySQL. This is done by performing a "fetch-last" on the index for each insert to find the highest key value. This works well unless there are large number empty index nodes due to the problem described in (2) above.
1491+
1492+PBXT Testing To-Do List
1493+
1494+This is my first take on what still must be tested. My thanks to Ronald Bradford who is working on a generic testing framework that can be used to test PBXT.
1495+
1496+0004: [RESOLVED: RN6, RN28] MySQL Tests:
1497+
1498+Several tests (for mysql-test-run) written for other engines can be adapted and used to test PBXT.
1499+
1500+0003: [RESOLVED: RN30] Multi-processor Test:
1501+
1502+There is a difference between preemptive multitasking and true multitasking, which you have on a multi-processor (or dual core) machine. I don't expect any fundamental problems here, but it must be tested.
1503+
1504+0002: [RESOLVED: RN5, RN30, RN43] Multi-user/locking Test:
1505+
1506+How does the engine perform with a number of concurrent users running various transactions on a number of different tables?
1507+This is a difficult test to write because it need to simulate a production situation. To test at least 2 or 3 machines is required. The idea is not to use too much data so that a lot of conflicts may occur.
1508+
1509+0001: [RESOLVED: RN4, RN43] Load/Stability Test:
1510+
1511+How does the engine perform under heavy load over a long period of time? How stable is the engine on power outage, etc?
1512+
1513+The test could use a variation of the test program written for test (3) above. At least 3 test machines would be required. The test must be modified to cause as much activity as possible. The test should monitor the performance under load.
1514+
1515+
1516
1517=== added directory 'plugin/pbxt/bin'
1518=== added file 'plugin/pbxt/bin/xtstat_xt.cc'
1519--- plugin/pbxt/bin/xtstat_xt.cc 1970-01-01 00:00:00 +0000
1520+++ plugin/pbxt/bin/xtstat_xt.cc 2010-04-11 18:56:24 +0000
1521@@ -0,0 +1,825 @@
1522+/* Copyright (c) 2005 PrimeBase Technologies GmbH
1523+ *
1524+ * PrimeBase XT
1525+ *
1526+ * This program is free software; you can redistribute it and/or modify
1527+ * it under the terms of the GNU General Public License as published by
1528+ * the Free Software Foundation; either version 2 of the License, or
1529+ * (at your option) any later version.
1530+ *
1531+ * This program is distributed in the hope that it will be useful,
1532+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1533+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1534+ * GNU General Public License for more details.
1535+ *
1536+ * You should have received a copy of the GNU General Public License
1537+ * along with this program; if not, write to the Free Software
1538+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1539+ *
1540+ * 2008-11-19 Paul McCullagh
1541+ *
1542+ * H&G2JCtL
1543+ */
1544+
1545+#include "xt_config.h"
1546+
1547+#include <mysql.h>
1548+#include <stdio.h>
1549+#include <stdlib.h>
1550+#include <ctype.h>
1551+#include <string.h>
1552+
1553+#include "strutil_xt.h"
1554+#include "util_xt.h"
1555+
1556+//#define DEBUG_INTERRUPT
1557+
1558+#define OPT_NONE -1
1559+#define OPT_HELP 0
1560+#define OPT_HOST 1
1561+#define OPT_USER 2
1562+#define OPT_PASSWORD 3
1563+#define OPT_DATABASE 4
1564+#define OPT_PORT 5
1565+#define OPT_SOCKET 6
1566+#define OPT_DELAY 7
1567+#define OPT_PROTOCOL 8
1568+#define OPT_DISPLAY 9
1569+
1570+#define OPT_HAS_VALUE 1
1571+#define OPT_OPTIONAL 2
1572+#define OPT_INTEGER 4
1573+
1574+llong record_cache_size;
1575+llong index_cache_size;
1576+llong log_cache_size;
1577+
1578+llong accumulative_values[XT_STAT_CURRENT_MAX];
1579+int columns_used;
1580+int use_i_s = 0;
1581+
1582+struct DisplayOrder {
1583+ int do_statistic;
1584+ bool do_combo;
1585+} display_order[XT_STAT_CURRENT_MAX];
1586+
1587+struct Options {
1588+ int opt_id;
1589+ const char opt_char;
1590+ const char *opt_name;
1591+ int opt_flags;
1592+ const char *opt_desc;
1593+ const char *opt_value_str;
1594+ int opt_value_int;
1595+ bool opt_value_bool;
1596+} options[] = {
1597+ { OPT_HELP, '?', "help", 0,
1598+ "Prints help text", NULL, 0, false },
1599+ { OPT_HOST, 'h', "host", OPT_HAS_VALUE,
1600+ "Connect to host", NULL, 0, false },
1601+ { OPT_USER, 'u', "user", OPT_HAS_VALUE,
1602+ "User for login if not current user", NULL, 0, false },
1603+ { OPT_PASSWORD, 'p', "password", OPT_HAS_VALUE | OPT_OPTIONAL,
1604+ "Password to use when connecting to server. If password is not given it's asked from the tty", NULL, 0, false },
1605+ { OPT_DATABASE, 'd', "database", OPT_HAS_VALUE,
1606+ "Database to be used (pbxt or information_schema required), default is information_schema", "information_schema", 0, false },
1607+ { OPT_PORT, 'P', "port", OPT_HAS_VALUE | OPT_INTEGER,
1608+ "Port number to use for connection", NULL, 3306, false },
1609+ { OPT_SOCKET, 'S', "socket", OPT_HAS_VALUE,
1610+ "Socket file to use for connection", NULL, 0, false },
1611+ { OPT_DELAY, 'D', "delay", OPT_HAS_VALUE | OPT_INTEGER,
1612+ "Delay in seconds between polls of the database", NULL, 1, false },
1613+ { OPT_PROTOCOL, 0, "protocol", OPT_HAS_VALUE,
1614+ "Connection protocol to use: default/tcp/socket/pipe/memory", "default", MYSQL_PROTOCOL_DEFAULT, false },
1615+ { OPT_DISPLAY, 0, "display", OPT_HAS_VALUE,
1616+ "Columns to display: use short names separated by |, partial match allowed", "time-msec,commt,row-ins,rec,ind,ilog,xlog,data,to,dirty", 0, false },
1617+ { OPT_NONE, 0, NULL, 0, NULL, 0, false }
1618+};
1619+
1620+#ifdef XT_WIN
1621+#define atoll _atoi64
1622+#endif
1623+
1624+void add_statistic(int stat)
1625+{
1626+ /* Check if column has already been added: */
1627+ for (int i=0; i<columns_used; i++) {
1628+ if (display_order[i].do_statistic == stat)
1629+ return;
1630+ }
1631+ display_order[columns_used].do_statistic = stat;
1632+ display_order[columns_used].do_combo = false;
1633+ columns_used++;
1634+}
1635+
1636+void determine_display_order()
1637+{
1638+ const char *cols = options[OPT_DISPLAY].opt_value_str;
1639+ char column_1[21], column_2[21];
1640+ int i;
1641+ bool add, added, add_combo;
1642+ XTStatMetaDataPtr meta, meta2;
1643+
1644+ if (strcmp(cols, "all") == 0)
1645+ cols = "time,xact,stat,rec,ind,ilog,xlog,data,to,sweep,scan,row";
1646+ columns_used = 0;
1647+ while (*cols) {
1648+ i = 0;
1649+ while (*cols && *cols != '-' && *cols != ',') {
1650+ if (i < 20) {
1651+ column_1[i] = *cols;
1652+ i++;
1653+ }
1654+ cols++;
1655+ }
1656+ column_1[i] = 0;
1657+
1658+ i = 0;
1659+ if (*cols == '-') {
1660+ cols++;
1661+ while (*cols && *cols != '-' && *cols != ',') {
1662+ if (i < 20) {
1663+ column_2[i] = *cols;
1664+ i++;
1665+ }
1666+ cols++;
1667+ }
1668+ }
1669+ column_2[i] = 0;
1670+
1671+ if (*cols == ',')
1672+ cols++;
1673+
1674+ if (strcmp(column_1, "ms") == 0)
1675+ strcpy(column_1, "msec");
1676+ if (strcmp(column_2, "ms") == 0)
1677+ strcpy(column_2, "msec");
1678+ add_combo = false;
1679+ if (strcmp(column_1, "syncs/ms") == 0) {
1680+ strcpy(column_1, "syncs");
1681+ add_combo = true;
1682+ }
1683+ if (strcmp(column_2, "syncs/ms") == 0) {
1684+ strcpy(column_2, "syncs");
1685+ add_combo = true;
1686+ }
1687+
1688+ added = false;
1689+ for (i=0; i<XT_STAT_MAXIMUM; i++) {
1690+ meta = xt_get_stat_meta_data(xt_get_stat_meta_order(i));
1691+ add = false;
1692+ if (strcmp(meta->sm_short_line_1, column_1) == 0) {
1693+ if (column_2[0]) {
1694+ if (strcmp(meta->sm_short_line_2, column_2) == 0)
1695+ add = true;
1696+ }
1697+ else {
1698+ if (xt_get_stat_meta_order(i) != XT_STAT_XLOG_CACHE_USAGE)
1699+ add = true;
1700+ }
1701+ }
1702+ else if (!column_2[0]) {
1703+ if (strcmp(meta->sm_short_line_2, column_1) == 0) {
1704+ /* XT_STAT_XLOG_CACHE_USAGE is ignored, unless explicity listed! */
1705+ if (xt_get_stat_meta_order(i) != XT_STAT_XLOG_CACHE_USAGE)
1706+ add = true;
1707+ }
1708+ }
1709+ if (add) {
1710+ added = true;
1711+ add_statistic(xt_get_stat_meta_order(i));
1712+ if (add_combo)
1713+ add_statistic(xt_get_stat_meta_order(i+1));
1714+ }
1715+ }
1716+ if (!added) {
1717+ if (column_2[0])
1718+ fprintf(stderr, "ERROR: No statistic matches display option: '%s-%s'\n", column_1, column_2);
1719+ else
1720+ fprintf(stderr, "ERROR: No statistic matches display option: '%s'\n", column_1);
1721+ fprintf(stderr, "Display options: %s\n", options[OPT_DISPLAY].opt_value_str);
1722+ exit(1);
1723+ }
1724+ }
1725+
1726+ /* Setup "combo" fields: */
1727+ for (i=0; i<columns_used; i++) {
1728+ meta = xt_get_stat_meta_data(display_order[i].do_statistic);
1729+ if (meta->sm_flags & XT_STAT_COMBO_FIELD) {
1730+ if (i+1 < columns_used) {
1731+ meta2 = xt_get_stat_meta_data(display_order[i+1].do_statistic);
1732+ if (meta2->sm_flags & XT_STAT_COMBO_FIELD_2) {
1733+ if (strcmp(meta->sm_short_line_1, meta2->sm_short_line_1) == 0)
1734+ display_order[i].do_combo = true;
1735+ }
1736+ }
1737+ }
1738+ }
1739+}
1740+
1741+void format_percent_value(char *buffer, double value, double perc)
1742+{
1743+ value = value * (double) 100 / (double) perc;
1744+ if (value >= 100)
1745+ sprintf(buffer, "%.0f", value);
1746+ else
1747+ sprintf(buffer, "%.1f", value);
1748+ buffer[4] = 0;
1749+ if (buffer[3] == '.')
1750+ buffer[3] = 0;
1751+}
1752+
1753+#define XT_1_K ((double) 1024)
1754+#define XT_1_M ((double) 1024 * (double) 1024)
1755+#define XT_1_G ((double) 1024 * (double) 1024 * (double) 1024)
1756+#define XT_1_T ((double) 1024 * (double) 1024 * (double) 1024 * (double) 1024)
1757+#define XT_10000_K ((double) 10000 * XT_1_K)
1758+#define XT_10000_M ((double) 10000 * XT_1_M)
1759+#define XT_10000_G ((double) 10000 * XT_1_G)
1760+
1761+void format_byte_value(char *buffer, double value)
1762+{
1763+ double dval;
1764+ char string[100];
1765+ char ch;
1766+
1767+ if (value < (double) 100000) {
1768+ /* byte value from 0 to 99999: */
1769+ sprintf(buffer, "%.0f", value);
1770+ return;
1771+ }
1772+
1773+ if (value < XT_10000_K) {
1774+ dval = value / XT_1_K;
1775+ ch = 'K';
1776+ }
1777+ else if (value < XT_10000_M) {
1778+ dval = value / XT_1_M;
1779+ ch = 'M';
1780+ }
1781+ else if (value < XT_10000_G) {
1782+ dval = value / XT_1_G;
1783+ ch = 'G';
1784+ }
1785+ else {
1786+ dval = value / XT_1_T;
1787+ ch = 'T';
1788+ }
1789+
1790+ if (dval < (double) 10.0)
1791+ sprintf(string, "%.2f", dval);
1792+ else if (dval < (double) 100.0)
1793+ sprintf(string, "%.1f", dval);
1794+ else
1795+ sprintf(string, "%.0f", dval);
1796+ if (string[3] == '.')
1797+ string[3] = 0;
1798+ else
1799+ string[4] = 0;
1800+ sprintf(buffer, "%s%c", string, ch);
1801+}
1802+
1803+/*
1804+ * Uses:
1805+ * t = thousands
1806+ * m = millions
1807+ * b = billions
1808+ */
1809+void format_mini_count_value(char *buffer, double value)
1810+{
1811+ double dval;
1812+ char string[100];
1813+ char ch;
1814+
1815+ if (value < (double) 100) {
1816+ /* Value from 0 to 99: */
1817+ sprintf(buffer, "%.0f", value);
1818+ return;
1819+ }
1820+
1821+ if (value < (double) 1000) {
1822+ sprintf(buffer, "<t");
1823+ return;
1824+ }
1825+
1826+ if (value < (double) 10000) {
1827+ /* Value is less than 1m */
1828+ dval = value / (double) 1000.0;
1829+ ch = 't';
1830+ }
1831+ else if (value < (double) 1000000) {
1832+ sprintf(buffer, "<m");
1833+ return;
1834+ }
1835+ else if (value < (double) 10000000) {
1836+ /* Value is less than 1b */
1837+ dval = value / (double) 1000000.0;
1838+ ch = 'm';
1839+ }
1840+ else if (value < (double) 1000000000) {
1841+ sprintf(buffer, "<b");
1842+ return;
1843+ }
1844+ else {
1845+ /* Value is greater than 1 billion */
1846+ dval = value / (double) 1000000000.0;
1847+ ch = 'b';
1848+ }
1849+
1850+ sprintf(string, "%1.0f", dval);
1851+ string[1] = 0;
1852+ sprintf(buffer, "%s%c", string, ch);
1853+}
1854+
1855+#define XT_1_THOUSAND ((double) 1000)
1856+#define XT_1_MILLION ((double) 1000 * (double) 1000)
1857+#define XT_1_BILLION ((double) 1000 * (double) 1000 * (double) 1000)
1858+#define XT_1_TRILLION ((double) 1000 * (double) 1000 * (double) 1000 * (double) 1000)
1859+#define XT_10_THOUSAND ((double) 10 * (double) 1000)
1860+#define XT_10_MILLION ((double) 10 * (double) 1000 * (double) 1000)
1861+#define XT_10_BILLION ((double) 10 * (double) 1000 * (double) 1000 * (double) 1000)
1862+#define XT_10_TRILLION ((double) 10 * (double) 1000 * (double) 1000 * (double) 1000 * (double) 1000)
1863+
1864+void format_count_value(char *buffer, double value)
1865+{
1866+ double dval;
1867+ char string[100];
1868+ char ch;
1869+
1870+ if (value < (double) 0) {
1871+ strcpy(buffer, "0");
1872+ return;
1873+ }
1874+
1875+ if (value < XT_10_THOUSAND) {
1876+ /* byte value from 0 to 99999: */
1877+ sprintf(buffer, "%.0f", value);
1878+ return;
1879+ }
1880+
1881+ if (value < XT_10_MILLION) {
1882+ /* Value is less than 10 million */
1883+ dval = value / XT_1_THOUSAND;
1884+ ch = 't';
1885+ }
1886+ else if (value < XT_10_BILLION) {
1887+ /* Value is less than 10 million */
1888+ dval = value / XT_1_MILLION;
1889+ ch = 'm';
1890+ }
1891+ else if (value < XT_10_TRILLION) {
1892+ /* Value is less than 10 trillion */
1893+ dval = value / XT_1_BILLION;
1894+ ch = 'b';
1895+ }
1896+ else {
1897+ dval = value / XT_1_TRILLION;
1898+ ch = 't';
1899+ }
1900+
1901+ if (dval < (double) 10.0)
1902+ sprintf(string, "%.2f", dval);
1903+ else if (dval < (double) 100.0)
1904+ sprintf(string, "%.1f", dval);
1905+ else
1906+ sprintf(string, "%.0f", dval);
1907+ if (string[3] == '.')
1908+ string[3] = 0;
1909+ else
1910+ string[4] = 0;
1911+ sprintf(buffer, "%s%c", string, ch);
1912+}
1913+
1914+void print_help()
1915+{
1916+ struct Options *opt;
1917+ char command[100];
1918+
1919+ printf("Usage: xtstat [ options ]\n");
1920+ printf("e.g. xtstat -D10 : Poll every 10 seconds\n");
1921+ opt = options;
1922+ printf("Options :-\n");
1923+ while (opt->opt_id != OPT_NONE) {
1924+ strcpy(command, opt->opt_name);
1925+ if (opt->opt_flags & OPT_HAS_VALUE) {
1926+ if (opt->opt_flags & OPT_OPTIONAL)
1927+ strcat(command, "[=value]");
1928+ else
1929+ strcat(command, "=value");
1930+ }
1931+ if (opt->opt_char)
1932+ printf("-%c, --%-16s %s.\n", opt->opt_char, command, opt->opt_desc);
1933+ else
1934+ printf(" --%-16s %s.\n", command, opt->opt_desc);
1935+ opt++;
1936+ }
1937+}
1938+
1939+void print_stat_key()
1940+{
1941+ printf("Key :-\n");
1942+ printf("K = Kilobytes (1,024 bytes)\n");
1943+ printf("M = Megabytes (1,048,576 bytes)\n");
1944+ printf("G = Gigabytes (1,073,741,024 bytes)\n");
1945+ printf("T = Terabytes (1,099,511,627,776 bytes)\n");
1946+ printf("t = thousands (1,000s)\n");
1947+ printf("m = millions (1,000,000s)\n");
1948+ printf("b = billions (1,000,000,000s)\n");
1949+}
1950+
1951+void print_stat_info()
1952+{
1953+ XTStatMetaDataPtr meta;
1954+ char buffer[40];
1955+ char desc[400];
1956+
1957+ printf("Statistics :-\n");
1958+ for (int i=0; i<XT_STAT_CURRENT_MAX; i++) {
1959+ meta = xt_get_stat_meta_data(i);
1960+ sprintf(desc, meta->sm_description, "milli");
1961+ sprintf(buffer, "%s-%s", meta->sm_short_line_1, meta->sm_short_line_2);
1962+ if (meta->sm_flags & XT_STAT_COMBO_FIELD) {
1963+ /* Combine next 2 fields: */
1964+ i++;
1965+ strcat(buffer, "/ms");
1966+ strcat(desc, "/time taken in milliseconds");
1967+ }
1968+ printf("%-13s %-21s - %s.\n", buffer, meta->sm_name, desc);
1969+ }
1970+}
1971+
1972+bool match_arg(char *what, const char *opt, char **value)
1973+{
1974+ while (*what && *opt && isalpha(*what)) {
1975+ if (*what != *opt)
1976+ return false;
1977+ what++;
1978+ opt++;
1979+ }
1980+ if (*opt)
1981+ return false;
1982+ if (*what == '=')
1983+ *value = what + 1;
1984+ else if (*what)
1985+ return false;
1986+ else
1987+ *value = NULL;
1988+ return true;
1989+}
1990+
1991+void parse_args(int argc, char **argv)
1992+{
1993+ char *ptr;
1994+ char *value;
1995+ int i = 1;
1996+ struct Options *opt;
1997+ bool found;
1998+
1999+ while (i < argc) {
2000+ ptr = argv[i];
2001+ found = false;
2002+ if (*ptr == '-') {
2003+ ptr++;
2004+ if (*ptr == '-') {
2005+ ptr++;
2006+ opt = options;
2007+ while (opt->opt_id != OPT_NONE) {
2008+ if (match_arg(ptr, opt->opt_name, &value)) {
2009+ found = true;
2010+ opt->opt_value_str = value;
2011+ opt->opt_value_bool = true;
2012+ break;
2013+ }
2014+ opt++;
2015+ }
2016+ }
2017+ else {
2018+ opt = options;
2019+ while (opt->opt_id != OPT_NONE) {
2020+ if (*ptr == opt->opt_char) {
2021+ ptr++;
2022+ if (*ptr)
2023+ opt->opt_value_str = ptr;
2024+ else {
2025+ opt->opt_value_str = NULL;
2026+ if (i+1 < argc) {
2027+ ptr = argv[i+1];
2028+ if (*ptr != '-') {
2029+ opt->opt_value_str = ptr;
2030+ i++;
2031+ }
2032+ }
2033+ }
2034+ found = true;
2035+ opt->opt_value_bool = true;
2036+ break;
2037+ }
2038+ opt++;
2039+ }
2040+ }
2041+ }
2042+
2043+ if (!found) {
2044+ fprintf(stderr, "Unknown option: %s\n", argv[i]);
2045+ print_help();
2046+ exit(1);
2047+ }
2048+
2049+ if (opt->opt_flags & OPT_HAS_VALUE) {
2050+ if (!(opt->opt_flags & OPT_OPTIONAL)) {
2051+ if (!opt->opt_value_str) {
2052+ fprintf(stderr, "Option requires a value: %s\n", argv[i]);
2053+ printf("Use --help for help on commands and usage\n");
2054+ exit(1);
2055+ }
2056+ }
2057+ }
2058+ else {
2059+ if (opt->opt_value_str) {
2060+ fprintf(stderr, "Option does not accept a value: %s\n", argv[i]);
2061+ printf("Use --help for help on commands and usage\n");
2062+ exit(1);
2063+ }
2064+ }
2065+
2066+ if (opt->opt_value_str && (opt->opt_flags & OPT_INTEGER))
2067+ opt->opt_value_int = atoi(opt->opt_value_str);
2068+
2069+ if (opt->opt_id == OPT_HELP) {
2070+ print_help();
2071+ print_stat_key();
2072+ print_stat_info();
2073+ exit(1);
2074+ }
2075+
2076+ i++;
2077+ }
2078+}
2079+
2080+#ifdef DEBUG_INTERRUPT
2081+void interrupt_pbxt(MYSQL *conn)
2082+{
2083+ MYSQL_RES *res;
2084+
2085+ if (mysql_query(conn, "show engine pbxt status")) {
2086+ fprintf(stderr, "%s\n", mysql_error(conn));
2087+ exit(1);
2088+ }
2089+
2090+ res = mysql_use_result(conn);
2091+ mysql_free_result(res);
2092+}
2093+#endif
2094+
2095+static bool display_parameters(MYSQL *conn)
2096+{
2097+ MYSQL_RES *res;
2098+ MYSQL_ROW row;
2099+
2100+ /* send SQL query */
2101+ if (mysql_query(conn, "show variables like 'pbxt_%'"))
2102+ return false;
2103+
2104+ if (!(res = mysql_use_result(conn)))
2105+ return false;
2106+
2107+ /* output table name */
2108+ printf("-- PBXT System Variables --\n");
2109+ while ((row = mysql_fetch_row(res)) != NULL) {
2110+ if (strcmp(row[0], "pbxt_index_cache_size") == 0)
2111+ index_cache_size = xt_byte_size_to_int8(row[1]);
2112+ else if (strcmp(row[0], "pbxt_record_cache_size") == 0)
2113+ record_cache_size = xt_byte_size_to_int8(row[1]);
2114+ else if (strcmp(row[0], "pbxt_log_cache_size") == 0)
2115+ log_cache_size = xt_byte_size_to_int8(row[1]);
2116+ printf("%-29s= %s\n", row[0], row[1]);
2117+ }
2118+
2119+ mysql_free_result(res);
2120+
2121+ for (int i=0; i<XT_STAT_CURRENT_MAX; i++)
2122+ accumulative_values[i] = 0;
2123+
2124+ printf("Display options: %s\n", options[OPT_DISPLAY].opt_value_str);
2125+ return true;
2126+}
2127+
2128+static bool connect(MYSQL *conn)
2129+{
2130+ unsigned int type;
2131+
2132+ if (strcasecmp(options[OPT_PROTOCOL].opt_value_str, "tcp") == 0)
2133+ type = MYSQL_PROTOCOL_TCP;
2134+ else if (strcasecmp(options[OPT_PROTOCOL].opt_value_str, "socket") == 0)
2135+ type = MYSQL_PROTOCOL_SOCKET;
2136+ else if (strcasecmp(options[OPT_PROTOCOL].opt_value_str, "pipe") == 0)
2137+ type = MYSQL_PROTOCOL_PIPE;
2138+ else if (strcasecmp(options[OPT_PROTOCOL].opt_value_str, "memory") == 0)
2139+ type = MYSQL_PROTOCOL_MEMORY;
2140+ else
2141+ type = MYSQL_PROTOCOL_DEFAULT;
2142+
2143+ if (mysql_options(conn, MYSQL_OPT_PROTOCOL, (char *) &type))
2144+ return false;
2145+
2146+ if (mysql_options(conn, MYSQL_READ_DEFAULT_GROUP, "xtstat"))
2147+ return false;
2148+
2149+ if (strcasecmp(options[OPT_DATABASE].opt_value_str, "pbxt") == 0)
2150+ use_i_s = FALSE;
2151+ else if (strcasecmp(options[OPT_DATABASE].opt_value_str, "information_schema") == 0)
2152+ use_i_s = TRUE;
2153+ else
2154+ use_i_s = TRUE;
2155+
2156+ /* Connect to database */
2157+ if (!mysql_real_connect(conn,
2158+ options[OPT_HOST].opt_value_str,
2159+ options[OPT_USER].opt_value_str,
2160+ options[OPT_PASSWORD].opt_value_str,
2161+ options[OPT_DATABASE].opt_value_str,
2162+ options[OPT_PORT].opt_value_int,
2163+ options[OPT_SOCKET].opt_value_str,
2164+ 0))
2165+ return false;
2166+
2167+ return true;
2168+}
2169+
2170+int main(int argc, char **argv)
2171+{
2172+ MYSQL *conn;
2173+ MYSQL_RES *res;
2174+ MYSQL_ROW row;
2175+ llong current_values[XT_STAT_CURRENT_MAX];
2176+ double value;
2177+ char str_value[100];
2178+ XTStatMetaDataPtr meta;
2179+ int len;
2180+ int stat;
2181+ int err;
2182+ bool select_worked = true;
2183+
2184+ xt_set_time_unit("msec");
2185+ parse_args(argc, argv);
2186+
2187+ determine_display_order();
2188+
2189+ if (!(conn = mysql_init(NULL))) {
2190+ fprintf(stderr, "Insufficient memory\n");
2191+ exit(1);
2192+ }
2193+
2194+ if (!connect(conn) || !display_parameters(conn)) {
2195+ fprintf(stderr, "%s\n", mysql_error(conn));
2196+ exit(1);
2197+ }
2198+
2199+ retry:
2200+ for (int loop = 0; ; loop++) {
2201+ if (use_i_s)
2202+ err = mysql_query(conn, "select id, Value from information_schema.pbxt_statistics order by ID");
2203+ else
2204+ err = mysql_query(conn, "select id, Value from pbxt.statistics order by ID");
2205+ if (err)
2206+ goto reconnect;
2207+
2208+ if (!(res = mysql_use_result(conn)))
2209+ goto reconnect;
2210+ select_worked = true;
2211+
2212+ while ((row = mysql_fetch_row(res)) != NULL) {
2213+ stat = atoi(row[0])-1;
2214+ current_values[stat] = atoll(row[1]);
2215+ }
2216+ mysql_free_result(res);
2217+
2218+#ifdef DEBUG_INTERRUPT
2219+ if (current_values[XT_STAT_COMMITS] - accumulative_values[XT_STAT_COMMITS] == 0 &&
2220+ current_values[XT_STAT_STAT_READS] - accumulative_values[XT_STAT_STAT_READS] == 0 &&
2221+#ifdef XT_TIME_DISK_WRITES
2222+ current_values[XT_STAT_REC_WRITE_TIME] - accumulative_values[XT_STAT_REC_WRITE_TIME] == 0 &&
2223+ current_values[XT_STAT_IND_WRITE_TIME] - accumulative_values[XT_STAT_IND_WRITE_TIME] == 0 &&
2224+ current_values[XT_STAT_ILOG_WRITE_TIME] - accumulative_values[XT_STAT_ILOG_WRITE_TIME] == 0 &&
2225+#endif
2226+ current_values[XT_STAT_STAT_WRITES] - accumulative_values[XT_STAT_STAT_WRITES] == 0)
2227+ interrupt_pbxt(conn);
2228+#endif
2229+
2230+ if ((loop % 25) == 0) {
2231+ for (int column=0; column<columns_used; column++) {
2232+ len = 5;
2233+ meta = xt_get_stat_meta_data(display_order[column].do_statistic);
2234+ strcpy(str_value, meta->sm_short_line_1);
2235+ if (display_order[column].do_combo) {
2236+ /* Combine next 2 fields: */
2237+ len = 8;
2238+ column++;
2239+ }
2240+ else if (meta->sm_flags & XT_STAT_PERCENTAGE)
2241+ len = 4;
2242+ else if (meta->sm_flags & XT_STAT_DATE)
2243+ len = 15;
2244+ printf("%*s ", len, str_value);
2245+ }
2246+ printf("\n");
2247+ for (int column=0; column<columns_used; column++) {
2248+ len = 5;
2249+ meta = xt_get_stat_meta_data(display_order[column].do_statistic);
2250+ strcpy(str_value, meta->sm_short_line_2);
2251+ if (display_order[column].do_combo) {
2252+ /* Combine next 2 fields: */
2253+ len = 8;
2254+ column++;
2255+ strcat(str_value, "/ms");
2256+ }
2257+ else if (meta->sm_flags & XT_STAT_PERCENTAGE)
2258+ len = 4;
2259+ else if (meta->sm_flags & XT_STAT_DATE)
2260+ len = 15;
2261+ printf("%*s ", len, str_value);
2262+ }
2263+ printf("\n");
2264+ }
2265+
2266+ for (int column=0; column<columns_used; column++) {
2267+ len = 5;
2268+ stat = display_order[column].do_statistic;
2269+ meta = xt_get_stat_meta_data(stat);
2270+ if (meta->sm_flags & XT_STAT_ACCUMULATIVE) {
2271+ /* Take care of overflow! */
2272+ if (current_values[stat] < accumulative_values[stat])
2273+ value = (double) (0xFFFFFFFF - (accumulative_values[stat] - current_values[stat]));
2274+ else
2275+ value = (double) (current_values[stat] - accumulative_values[stat]);
2276+ }
2277+ else
2278+ value = (double) current_values[stat];
2279+ accumulative_values[stat] = current_values[stat];
2280+ if (meta->sm_flags & XT_STAT_TIME_VALUE)
2281+ value = value / (double) 1000;
2282+ if (display_order[column].do_combo) {
2283+ format_mini_count_value(str_value, value);
2284+ strcat(str_value, "/");
2285+ column++;
2286+ stat = display_order[column].do_statistic;
2287+ value = (double) (current_values[stat] - accumulative_values[stat]);
2288+ accumulative_values[stat] = current_values[stat];
2289+ value = value / (double) 1000;
2290+ format_count_value(&str_value[strlen(str_value)], value);
2291+ len = 8;
2292+ }
2293+ else if (meta->sm_flags & XT_STAT_PERCENTAGE) {
2294+ double perc = 100;
2295+ switch (stat) {
2296+ case XT_STAT_REC_CACHE_USAGE: perc = (double) record_cache_size; break;
2297+ case XT_STAT_IND_CACHE_USAGE: perc = (double) index_cache_size; break;
2298+ case XT_STAT_IND_CACHE_DIRTY: perc = (double) index_cache_size; break;
2299+ case XT_STAT_XLOG_CACHE_USAGE: perc = (double) log_cache_size; break;
2300+ }
2301+ format_percent_value(str_value, value, perc);
2302+ len = 4;
2303+ }
2304+ else if (meta->sm_flags & XT_STAT_DATE) {
2305+ time_t ticks = (time_t) value;
2306+ const struct tm *ltime = localtime(&ticks);
2307+ strftime(str_value, 99, "%y%m%d %H:%M:%S", ltime);
2308+ len = 15;
2309+ }
2310+ else if (meta->sm_flags & XT_STAT_BYTE_COUNT)
2311+ format_byte_value(str_value, value);
2312+ else
2313+ format_count_value(str_value, value);
2314+ if (column == columns_used-1)
2315+ printf("%*s\n", len, str_value);
2316+ else
2317+ printf("%*s ", len, str_value);
2318+ }
2319+
2320+ sleep(options[OPT_DELAY].opt_value_int);
2321+ }
2322+
2323+ /* close connection */
2324+ mysql_close(conn);
2325+ return 0;
2326+
2327+ reconnect:
2328+ /* Reconnect... */
2329+ if (select_worked) {
2330+ /* Only print message if the SELECT worked.
2331+ * or we will get a screen full of messages:
2332+ */
2333+ fprintf(stderr, "%s\n", mysql_error(conn));
2334+ printf("Reconnecting...\n");
2335+ }
2336+ mysql_close(conn);
2337+ if (!(conn = mysql_init(NULL))) {
2338+ fprintf(stderr, "Insufficient memory\n");
2339+ exit(1);
2340+ }
2341+ do {
2342+ sleep(2);
2343+ } while (!connect(conn));
2344+ select_worked = false;
2345+ goto retry;
2346+}
2347
2348=== added file 'plugin/pbxt/plugin.am'
2349--- plugin/pbxt/plugin.am 1970-01-01 00:00:00 +0000
2350+++ plugin/pbxt/plugin.am 2010-04-11 18:56:24 +0000
2351@@ -0,0 +1,75 @@
2352+# Used to build Makefile.in
2353+
2354+noinst_LTLIBRARIES+= plugin/pbxt/libpbxt.la
2355+
2356+noinst_HEADERS+= \
2357+ plugin/pbxt/src/bsearch_xt.h
2358+ plugin/pbxt/src/cache_xt.h \
2359+ plugin/pbxt/src/ccutils_xt.h \
2360+ plugin/pbxt/src/database_xt.h \
2361+ plugin/pbxt/src/datadic_xt.h \
2362+ plugin/pbxt/src/datalog_xt.h \
2363+ plugin/pbxt/src/filesys_xt.h \
2364+ plugin/pbxt/src/hashtab_xt.h \
2365+ plugin/pbxt/src/ha_pbxt.h \
2366+ plugin/pbxt/src/heap_xt.h \
2367+ plugin/pbxt/src/index_xt.h \
2368+ plugin/pbxt/src/linklist_xt.h \
2369+ plugin/pbxt/src/memory_xt.h \
2370+ plugin/pbxt/src/myxt_xt.h \
2371+ plugin/pbxt/src/pthread_xt.h \
2372+ plugin/pbxt/src/restart_xt.h \
2373+ plugin/pbxt/src/sortedlist_xt.h \
2374+ plugin/pbxt/src/strutil_xt.h \
2375+ plugin/pbxt/src/tabcache_xt.h \
2376+ plugin/pbxt/src/table_xt.h \
2377+ plugin/pbxt/src/trace_xt.h \
2378+ plugin/pbxt/src/thread_xt.h \
2379+ plugin/pbxt/src/util_xt.h \
2380+ plugin/pbxt/src/xaction_xt.h \
2381+ plugin/pbxt/src/xactlog_xt.h \
2382+ plugin/pbxt/src/lock_xt.h \
2383+ plugin/pbxt/src/systab_xt.h \
2384+ plugin/pbxt/src/ha_xtsys.h \
2385+ plugin/pbxt/src/discover_xt.h \
2386+ plugin/pbxt/src/pbms.h \
2387+ plugin/pbxt/src/xt_config.h \
2388+ plugin/pbxt/src/xt_defs.h \
2389+ plugin/pbxt/src/xt_errno.h
2390+
2391+
2392+plugin_pbxt_libpbxt_la_CXXFLAGS= ${AM_CXXFLAGS} -DDRIZZLED -Wno-long-long -Wno-overloaded-virtual -Wno-sign-compare -Wno-unused-function
2393+plugin_pbxt_libpbxt_la_CFLAGS= ${AM_CFLAGS} -DDRIZZLED -std=c99
2394+
2395+plugin_pbxt_libpbxt_la_SOURCES= \
2396+ plugin/pbxt/src/bsearch_xt.cc \
2397+ plugin/pbxt/src/cache_xt.cc \
2398+ plugin/pbxt/src/ccutils_xt.cc \
2399+ plugin/pbxt/src/database_xt.cc \
2400+ plugin/pbxt/src/datadic_xt.cc \
2401+ plugin/pbxt/src/datalog_xt.cc \
2402+ plugin/pbxt/src/filesys_xt.cc \
2403+ plugin/pbxt/src/hashtab_xt.cc \
2404+ plugin/pbxt/src/heap_xt.cc \
2405+ plugin/pbxt/src/index_xt.cc \
2406+ plugin/pbxt/src/linklist_xt.cc \
2407+ plugin/pbxt/src/memory_xt.cc \
2408+ plugin/pbxt/src/myxt_xt.cc \
2409+ plugin/pbxt/src/pthread_xt.cc \
2410+ plugin/pbxt/src/restart_xt.cc \
2411+ plugin/pbxt/src/sortedlist_xt.cc \
2412+ plugin/pbxt/src/strutil_xt.cc \
2413+ plugin/pbxt/src/tabcache_xt.cc \
2414+ plugin/pbxt/src/table_xt.cc \
2415+ plugin/pbxt/src/trace_xt.cc \
2416+ plugin/pbxt/src/thread_xt.cc \
2417+ plugin/pbxt/src/systab_xt.cc \
2418+ plugin/pbxt/src/ha_xtsys.cc \
2419+ plugin/pbxt/src/discover_xt.cc \
2420+ plugin/pbxt/src/util_xt.cc \
2421+ plugin/pbxt/src/xaction_xt.cc \
2422+ plugin/pbxt/src/xactlog_xt.cc \
2423+ plugin/pbxt/src/lock_xt.cc
2424+
2425+
2426+EXTRA_DIST+= CMakeLists.txt
2427
2428=== added file 'plugin/pbxt/plugin.ini'
2429--- plugin/pbxt/plugin.ini 1970-01-01 00:00:00 +0000
2430+++ plugin/pbxt/plugin.ini 2010-04-11 18:56:24 +0000
2431@@ -0,0 +1,25 @@
2432+#
2433+# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
2434+#
2435+# This program is free software; you can redistribute it and/or modify it under
2436+# the terms of the GNU General Public License as published by the Free Software
2437+# Foundation; version 2 of the License.
2438+#
2439+# This program is distributed in the hope that it will be useful, but WITHOUT
2440+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2441+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
2442+#
2443+# You should have received a copy of the GNU General Public License along with
2444+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
2445+# Place, Suite 330, Boston, MA 02111-1307 USA
2446+#
2447+
2448+[plugin]
2449+name=pbxt
2450+title=PBXT Storage Engine
2451+description=MVCC-based transactional engine
2452+sources=src/ha_pbxt.cc
2453+load_by_default=yes
2454+libs=plugin/pbxt/libpbxt.la
2455+cflags=-DDRIZZLED -std=c99
2456+cxxflags=-DDRIZZLED -Wno-long-long -Wno-overloaded-virtual
2457
2458=== added directory 'plugin/pbxt/src'
2459=== added file 'plugin/pbxt/src/Makefile.am'
2460--- plugin/pbxt/src/Makefile.am 1970-01-01 00:00:00 +0000
2461+++ plugin/pbxt/src/Makefile.am 2010-04-11 18:56:24 +0000
2462@@ -0,0 +1,51 @@
2463+# Used to build Makefile.in
2464+
2465+MYSQLDATAdir = $(localstatedir)
2466+MYSQLSHAREdir = $(pkgdatadir)
2467+MYSQLBASEdir= $(prefix)
2468+MYSQLLIBdir= $(pkglibdir)
2469+pkgplugindir = $(pkglibdir)/plugin
2470+
2471+AM_CPPFLAGS = -I$(top_srcdir)
2472+
2473+#LIBS = -lbz2
2474+LIBS =
2475+
2476+LDADD =
2477+
2478+noinst_HEADERS = bsearch_xt.h cache_xt.h ccutils_xt.h database_xt.h \
2479+ datadic_xt.h datalog_xt.h filesys_xt.h hashtab_xt.h \
2480+ ha_pbxt.h heap_xt.h index_xt.h linklist_xt.h \
2481+ memory_xt.h myxt_xt.h pthread_xt.h restart_xt.h \
2482+ sortedlist_xt.h strutil_xt.h \
2483+ tabcache_xt.h table_xt.h trace_xt.h thread_xt.h \
2484+ util_xt.h xaction_xt.h xactlog_xt.h lock_xt.h \
2485+ systab_xt.h ha_xtsys.h discover_xt.h backup_xt.h \
2486+ pbms.h pbms_enabled.h xt_config.h xt_defs.h xt_errno.h locklist_xt.h
2487+
2488+plugin_LTLIBRARIES = libpbxt.la
2489+
2490+libpbxt_la_SOURCES = bsearch_xt.cc cache_xt.cc ccutils_xt.cc database_xt.cc \
2491+ datadic_xt.cc datalog_xt.cc filesys_xt.cc hashtab_xt.cc \
2492+ ha_pbxt.cc heap_xt.cc index_xt.cc linklist_xt.cc \
2493+ memory_xt.cc myxt_xt.cc pthread_xt.cc restart_xt.cc \
2494+ pbms_enabled.cc sortedlist_xt.cc strutil_xt.cc \
2495+ tabcache_xt.cc table_xt.cc trace_xt.cc thread_xt.cc \
2496+ systab_xt.cc ha_xtsys.cc discover_xt.cc backup_xt.cc \
2497+ util_xt.cc xaction_xt.cc xactlog_xt.cc lock_xt.cc locklist_xt.cc
2498+
2499+libpbxt_la_LDFLAGS = -module
2500+
2501+# These are the warning Drizzle uses:
2502+# DRIZZLE_WARNINGS = -W -Wall -Wextra -pedantic -Wundef -Wredundant-decls -Wno-strict-aliasing -Wno-long-long -Wno-unused-parameter
2503+
2504+libpbxt_la_CXXFLAGS = $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN -Wno-overloaded-virtual
2505+libpbxt_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -std=c99
2506+
2507+EXTRA_LIBRARIES = libpbxt.a
2508+noinst_LIBRARIES = libpbxt.a
2509+libpbxt_a_SOURCES = $(libpbxt_la_SOURCES)
2510+libpbxt_a_CXXFLAGS = $(AM_CXXFLAGS) -DDRIZZLED -Wno-long-long -Wno-overloaded-virtual
2511+libpbxt_a_CFLAGS = $(AM_CFLAGS) -DDRIZZLED -std=c99
2512+
2513+EXTRA_DIST = CMakeLists.txt
2514
2515=== added file 'plugin/pbxt/src/backup_xt.cc'
2516--- plugin/pbxt/src/backup_xt.cc 1970-01-01 00:00:00 +0000
2517+++ plugin/pbxt/src/backup_xt.cc 2010-04-11 18:56:24 +0000
2518@@ -0,0 +1,802 @@
2519+/* Copyright (c) 2009 PrimeBase Technologies GmbH
2520+ *
2521+ * PrimeBase XT
2522+ *
2523+ * This program is free software; you can redistribute it and/or modify
2524+ * it under the terms of the GNU General Public License as published by
2525+ * the Free Software Foundation; either version 2 of the License, or
2526+ * (at your option) any later version.
2527+ *
2528+ * This program is distributed in the hope that it will be useful,
2529+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2530+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2531+ * GNU General Public License for more details.
2532+ *
2533+ * You should have received a copy of the GNU General Public License
2534+ * along with this program; if not, write to the Free Software
2535+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2536+ *
2537+ * 2009-09-07 Paul McCullagh
2538+ *
2539+ * H&G2JCtL
2540+ */
2541+
2542+#include "xt_config.h"
2543+
2544+#ifdef MYSQL_SUPPORTS_BACKUP
2545+
2546+#include <string.h>
2547+#include <stdio.h>
2548+#include <stdlib.h>
2549+#include <time.h>
2550+#include <ctype.h>
2551+
2552+#include "mysql_priv.h"
2553+#include <backup/api_types.h>
2554+#include <backup/backup_engine.h>
2555+#include <backup/backup_aux.h> // for build_table_list()
2556+#include <hash.h>
2557+
2558+#include "ha_pbxt.h"
2559+
2560+#include "backup_xt.h"
2561+#include "pthread_xt.h"
2562+#include "filesys_xt.h"
2563+#include "database_xt.h"
2564+#include "strutil_xt.h"
2565+#include "memory_xt.h"
2566+#include "trace_xt.h"
2567+#include "myxt_xt.h"
2568+
2569+#ifdef OK
2570+#undef OK
2571+#endif
2572+
2573+#ifdef byte
2574+#undef byte
2575+#endif
2576+
2577+#ifdef DEBUG
2578+//#define TRACE_BACKUP_CALLS
2579+//#define TEST_SMALL_BLOCK 100000
2580+#endif
2581+
2582+using backup::byte;
2583+using backup::result_t;
2584+using backup::version_t;
2585+using backup::Table_list;
2586+using backup::Table_ref;
2587+using backup::Buffer;
2588+
2589+#ifdef TRACE_BACKUP_CALLS
2590+#define XT_TRACE_CALL() ha_trace_function(__FUNC__, NULL)
2591+#else
2592+#define XT_TRACE_CALL()
2593+#endif
2594+
2595+#define XT_RESTORE_BATCH_SIZE 10000
2596+
2597+#define BUP_STATE_BEFORE_LOCK 0
2598+#define BUP_STATE_AFTER_LOCK 1
2599+
2600+#define BUP_STANDARD_VAR_RECORD 1
2601+#define BUP_RECORD_BLOCK_4_START 2 // Part of a record, with a 4 byte total length, and 4 byte data length
2602+#define BUP_RECORD_BLOCK_4 3 // Part of a record, with a 4 byte length
2603+#define BUP_RECORD_BLOCK_4_END 4 // Last part of a record with a 4 byte length
2604+
2605+/*
2606+ * -----------------------------------------------------------------------
2607+ * UTILITIES
2608+ */
2609+
2610+#ifdef TRACE_BACKUP_CALLS
2611+static void ha_trace_function(const char *function, char *table)
2612+{
2613+ char func_buf[50], *ptr;
2614+ XTThreadPtr thread = xt_get_self();
2615+
2616+ if ((ptr = strchr(function, '('))) {
2617+ ptr--;
2618+ while (ptr > function) {
2619+ if (!(isalnum(*ptr) || *ptr == '_'))
2620+ break;
2621+ ptr--;
2622+ }
2623+ ptr++;
2624+ xt_strcpy(50, func_buf, ptr);
2625+ if ((ptr = strchr(func_buf, '(')))
2626+ *ptr = 0;
2627+ }
2628+ else
2629+ xt_strcpy(50, func_buf, function);
2630+ if (table)
2631+ printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
2632+ else
2633+ printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
2634+}
2635+#endif
2636+
2637+/*
2638+ * -----------------------------------------------------------------------
2639+ * BACKUP DRIVER
2640+ */
2641+
2642+class PBXTBackupDriver: public Backup_driver
2643+{
2644+ public:
2645+ PBXTBackupDriver(const Table_list &);
2646+ virtual ~PBXTBackupDriver();
2647+
2648+ virtual size_t size();
2649+ virtual size_t init_size();
2650+ virtual result_t begin(const size_t);
2651+ virtual result_t end();
2652+ virtual result_t get_data(Buffer &);
2653+ virtual result_t prelock();
2654+ virtual result_t lock();
2655+ virtual result_t unlock();
2656+ virtual result_t cancel();
2657+ virtual void free();
2658+ void lock_tables_TL_READ_NO_INSERT();
2659+
2660+ private:
2661+ XTThreadPtr bd_thread;
2662+ int bd_state;
2663+ u_int bd_table_no;
2664+ XTOpenTablePtr bd_ot;
2665+ xtWord1 *bd_row_buf;
2666+
2667+ /* Non-zero if we last returned only part of
2668+ * a row.
2669+ */
2670+ xtWord1 *db_write_block(xtWord1 *buffer, xtWord1 bup_type, size_t *size, xtWord4 row_len);
2671+ xtWord1 *db_write_block(xtWord1 *buffer, xtWord1 bup_type, size_t *size, xtWord4 total_len, xtWord4 row_len);
2672+
2673+ xtWord4 bd_row_offset;
2674+ xtWord4 bd_row_size;
2675+};
2676+
2677+
2678+PBXTBackupDriver::PBXTBackupDriver(const Table_list &tables):
2679+Backup_driver(tables),
2680+bd_state(BUP_STATE_BEFORE_LOCK),
2681+bd_table_no(0),
2682+bd_ot(NULL),
2683+bd_row_buf(NULL),
2684+bd_row_offset(0),
2685+bd_row_size(0)
2686+{
2687+}
2688+
2689+PBXTBackupDriver::~PBXTBackupDriver()
2690+{
2691+}
2692+
2693+/** Estimates total size of backup. @todo improve it */
2694+size_t PBXTBackupDriver::size()
2695+{
2696+ XT_TRACE_CALL();
2697+ return UNKNOWN_SIZE;
2698+}
2699+
2700+/** Estimates size of backup before lock. @todo improve it */
2701+size_t PBXTBackupDriver::init_size()
2702+{
2703+ XT_TRACE_CALL();
2704+ return 0;
2705+}
2706+
2707+result_t PBXTBackupDriver::begin(const size_t)
2708+{
2709+ THD *thd = current_thd;
2710+ XTExceptionRec e;
2711+
2712+ XT_TRACE_CALL();
2713+
2714+ if (!(bd_thread = xt_ha_set_current_thread(thd, &e))) {
2715+ xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2716+ return backup::ERROR;
2717+ }
2718+
2719+ return backup::OK;
2720+}
2721+
2722+result_t PBXTBackupDriver::end()
2723+{
2724+ XT_TRACE_CALL();
2725+ if (bd_ot) {
2726+ xt_tab_seq_exit(bd_ot);
2727+ xt_db_return_table_to_pool_ns(bd_ot);
2728+ bd_ot = NULL;
2729+ }
2730+ if (bd_thread->st_xact_data) {
2731+ if (!xt_xn_commit(bd_thread))
2732+ return backup::ERROR;
2733+ }
2734+ return backup::OK;
2735+}
2736+
2737+xtWord1 *PBXTBackupDriver::db_write_block(xtWord1 *buffer, xtWord1 bup_type, size_t *ret_size, xtWord4 row_len)
2738+{
2739+ register size_t size = *ret_size;
2740+
2741+ *buffer = bup_type; // Record type identifier.
2742+ buffer++;
2743+ size--;
2744+ memcpy(buffer, bd_ot->ot_row_wbuffer, row_len);
2745+ buffer += row_len;
2746+ size -= row_len;
2747+ *ret_size = size;
2748+ return buffer;
2749+}
2750+
2751+xtWord1 *PBXTBackupDriver::db_write_block(xtWord1 *buffer, xtWord1 bup_type, size_t *ret_size, xtWord4 total_len, xtWord4 row_len)
2752+{
2753+ register size_t size = *ret_size;
2754+
2755+ *buffer = bup_type; // Record type identifier.
2756+ buffer++;
2757+ size--;
2758+ if (bup_type == BUP_RECORD_BLOCK_4_START) {
2759+ XT_SET_DISK_4(buffer, total_len);
2760+ buffer += 4;
2761+ size -= 4;
2762+ }
2763+ XT_SET_DISK_4(buffer, row_len);
2764+ buffer += 4;
2765+ size -= 4;
2766+ memcpy(buffer, bd_ot->ot_row_wbuffer+bd_row_offset, row_len);
2767+ buffer += row_len;
2768+ size -= row_len;
2769+ bd_row_size -= row_len;
2770+ bd_row_offset += row_len;
2771+ *ret_size = size;
2772+ return buffer;
2773+}
2774+
2775+result_t PBXTBackupDriver::get_data(Buffer &buf)
2776+{
2777+ xtBool eof = FALSE;
2778+ size_t size;
2779+ xtWord4 row_len;
2780+ xtWord1 *buffer;
2781+
2782+ XT_TRACE_CALL();
2783+
2784+ if (bd_state == BUP_STATE_BEFORE_LOCK) {
2785+ buf.table_num = 0;
2786+ buf.size = 0;
2787+ buf.last = FALSE;
2788+ return backup::READY;
2789+ }
2790+
2791+ /* Open the backup table: */
2792+ if (!bd_ot) {
2793+ XTThreadPtr self = bd_thread;
2794+ XTTableHPtr tab;
2795+ char path[PATH_MAX];
2796+
2797+ if (bd_table_no == m_tables.count()) {
2798+ buf.size = 0;
2799+ buf.table_num = 0;
2800+ buf.last = TRUE;
2801+ return backup::DONE;
2802+ }
2803+
2804+ m_tables[bd_table_no].internal_name(path, sizeof(path));
2805+ bd_table_no++;
2806+ try_(a) {
2807+ xt_ha_open_database_of_table(self, (XTPathStrPtr) path);
2808+ tab = xt_use_table(self, (XTPathStrPtr) path, FALSE, FALSE);
2809+ pushr_(xt_heap_release, tab);
2810+ if (!(bd_ot = xt_db_open_table_using_tab(tab, bd_thread)))
2811+ xt_throw(self);
2812+ freer_(); // xt_heap_release(tab)
2813+
2814+ /* Prepare the seqential scan: */
2815+ xt_tab_seq_exit(bd_ot);
2816+ if (!xt_tab_seq_init(bd_ot))
2817+ xt_throw(self);
2818+
2819+ if (bd_row_buf) {
2820+ xt_free(self, bd_row_buf);
2821+ bd_row_buf = NULL;
2822+ }
2823+ bd_row_buf = (xtWord1 *) xt_malloc(self, bd_ot->ot_table->tab_dic.dic_mysql_buf_size);
2824+ bd_ot->ot_cols_req = bd_ot->ot_table->tab_dic.dic_no_of_cols;
2825+ }
2826+ catch_(a) {
2827+ ;
2828+ }
2829+ cont_(a);
2830+
2831+ if (!bd_ot)
2832+ goto failed;
2833+ }
2834+
2835+ buf.table_num = bd_table_no;
2836+#ifdef TEST_SMALL_BLOCK
2837+ buf.size = TEST_SMALL_BLOCK;
2838+#endif
2839+ size = buf.size;
2840+ buffer = (xtWord1 *) buf.data;
2841+ ASSERT_NS(size > 9);
2842+
2843+ /* First check of a record was partically written
2844+ * last time.
2845+ */
2846+ write_row:
2847+ if (bd_row_size > 0) {
2848+ row_len = bd_row_size;
2849+ if (bd_row_offset == 0) {
2850+ if (row_len+1 > size) {
2851+ ASSERT_NS(size > 9);
2852+ row_len = size - 9;
2853+ buffer = db_write_block(buffer, BUP_RECORD_BLOCK_4_START, &size, bd_row_size, row_len);
2854+ goto done;
2855+ }
2856+ buffer = db_write_block(buffer, BUP_STANDARD_VAR_RECORD, &size, row_len);
2857+ bd_row_size = 0;
2858+ }
2859+ else {
2860+ if (row_len+5 > size) {
2861+ row_len = size - 5;
2862+ buffer = db_write_block(buffer, BUP_RECORD_BLOCK_4, &size, 0, row_len);
2863+ goto done;
2864+ }
2865+ buffer = db_write_block(buffer, BUP_RECORD_BLOCK_4_END, &size, 0, row_len);
2866+ }
2867+ }
2868+
2869+ /* Now continue with the sequential scan. */
2870+ while (size > 1) {
2871+ if (!xt_tab_seq_next(bd_ot, bd_row_buf, &eof))
2872+ goto failed;
2873+ if (eof) {
2874+ /* We will go the next table, on the next call. */
2875+ xt_tab_seq_exit(bd_ot);
2876+ xt_db_return_table_to_pool_ns(bd_ot);
2877+ bd_ot = NULL;
2878+ break;
2879+ }
2880+ if (!(row_len = myxt_store_row_data(bd_ot, 0, (char *) bd_row_buf)))
2881+ goto failed;
2882+ if (row_len+1 > size) {
2883+ /* Does not fit: */
2884+ bd_row_offset = 0;
2885+ bd_row_size = row_len;
2886+ /* Only add part of the row, if there is still
2887+ * quite a bit of space left:
2888+ */
2889+ if (size >= (32 * 1024))
2890+ goto write_row;
2891+ break;
2892+ }
2893+ buffer = db_write_block(buffer, BUP_STANDARD_VAR_RECORD, &size, row_len);
2894+ }
2895+
2896+ done:
2897+ buf.size = buf.size - size;
2898+ /* This indicates wnd of data for a table! */
2899+ buf.last = eof;
2900+
2901+ return backup::OK;
2902+
2903+ failed:
2904+ xt_log_and_clear_exception(bd_thread);
2905+ return backup::ERROR;
2906+}
2907+
2908+result_t PBXTBackupDriver::prelock()
2909+{
2910+ XT_TRACE_CALL();
2911+ return backup::READY;
2912+}
2913+
2914+result_t PBXTBackupDriver::lock()
2915+{
2916+ XT_TRACE_CALL();
2917+ bd_thread->st_xact_mode = XT_XACT_COMMITTED_READ;
2918+ bd_thread->st_ignore_fkeys = FALSE;
2919+ bd_thread->st_auto_commit = FALSE;
2920+ bd_thread->st_table_trans = FALSE;
2921+ bd_thread->st_abort_trans = FALSE;
2922+ bd_thread->st_stat_ended = FALSE;
2923+ bd_thread->st_stat_trans = FALSE;
2924+ bd_thread->st_is_update = NULL;
2925+ if (!xt_xn_begin(bd_thread))
2926+ return backup::ERROR;
2927+ bd_state = BUP_STATE_AFTER_LOCK;
2928+ return backup::OK;
2929+}
2930+
2931+result_t PBXTBackupDriver::unlock()
2932+{
2933+ XT_TRACE_CALL();
2934+ return backup::OK;
2935+}
2936+
2937+result_t PBXTBackupDriver::cancel()
2938+{
2939+ XT_TRACE_CALL();
2940+ return backup::OK; // free() will be called and suffice
2941+}
2942+
2943+void PBXTBackupDriver::free()
2944+{
2945+ XT_TRACE_CALL();
2946+ if (bd_ot) {
2947+ xt_tab_seq_exit(bd_ot);
2948+ xt_db_return_table_to_pool_ns(bd_ot);
2949+ bd_ot = NULL;
2950+ }
2951+ if (bd_row_buf) {
2952+ xt_free_ns(bd_row_buf);
2953+ bd_row_buf = NULL;
2954+ }
2955+ if (bd_thread->st_xact_data)
2956+ xt_xn_rollback(bd_thread);
2957+ delete this;
2958+}
2959+
2960+void PBXTBackupDriver::lock_tables_TL_READ_NO_INSERT()
2961+{
2962+ XT_TRACE_CALL();
2963+}
2964+
2965+/*
2966+ * -----------------------------------------------------------------------
2967+ * BACKUP DRIVER
2968+ */
2969+
2970+class PBXTRestoreDriver: public Restore_driver
2971+{
2972+ public:
2973+ PBXTRestoreDriver(const Table_list &tables);
2974+ virtual ~PBXTRestoreDriver();
2975+
2976+ virtual result_t begin(const size_t);
2977+ virtual result_t end();
2978+ virtual result_t send_data(Buffer &buf);
2979+ virtual result_t cancel();
2980+ virtual void free();
2981+
2982+ private:
2983+ XTThreadPtr rd_thread;
2984+ u_int rd_table_no;
2985+ XTOpenTablePtr rd_ot;
2986+ STRUCT_TABLE *rd_my_table;
2987+ xtWord1 *rb_row_buf;
2988+ u_int rb_col_cnt;
2989+ u_int rb_insert_count;
2990+
2991+ /* Long rows are accumulated here: */
2992+ xtWord4 rb_row_len;
2993+ xtWord4 rb_data_size;
2994+ xtWord1 *rb_row_data;
2995+};
2996+
2997+PBXTRestoreDriver::PBXTRestoreDriver(const Table_list &tables):
2998+Restore_driver(tables),
2999+rd_thread(NULL),
3000+rd_table_no(0),
3001+rd_ot(NULL),
3002+rb_row_buf(NULL),
3003+rb_row_len(0),
3004+rb_data_size(0),
3005+rb_row_data(NULL)
3006+{
3007+}
3008+
3009+PBXTRestoreDriver::~PBXTRestoreDriver()
3010+{
3011+}
3012+
3013+result_t PBXTRestoreDriver::begin(const size_t)
3014+{
3015+ THD *thd = current_thd;
3016+ XTExceptionRec e;
3017+
3018+ XT_TRACE_CALL();
3019+
3020+ if (!(rd_thread = xt_ha_set_current_thread(thd, &e))) {
3021+ xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
3022+ return backup::ERROR;
3023+ }
3024+
3025+ return backup::OK;
3026+}
3027+
3028+result_t PBXTRestoreDriver::end()
3029+{
3030+ XT_TRACE_CALL();
3031+ if (rd_ot) {
3032+ xt_db_return_table_to_pool_ns(rd_ot);
3033+ rd_ot = NULL;
3034+ }
3035+ //if (rb_row_buf) {
3036+ // xt_free_ns(rb_row_buf);
3037+ // rb_row_buf = NULL;
3038+ //}
3039+ if (rb_row_data) {
3040+ xt_free_ns(rb_row_data);
3041+ rb_row_data = NULL;
3042+ }
3043+ if (rd_thread->st_xact_data) {
3044+ if (!xt_xn_commit(rd_thread))
3045+ return backup::ERROR;
3046+ }
3047+ return backup::OK;
3048+}
3049+
3050+
3051+result_t PBXTRestoreDriver::send_data(Buffer &buf)
3052+{
3053+ size_t size;
3054+ xtWord1 type;
3055+ xtWord1 *buffer;
3056+ xtWord4 row_len;
3057+ xtWord1 *rec_data;
3058+
3059+ XT_TRACE_CALL();
3060+
3061+ if (buf.table_num != rd_table_no) {
3062+ XTThreadPtr self = rd_thread;
3063+ XTTableHPtr tab;
3064+ char path[PATH_MAX];
3065+
3066+ if (rd_ot) {
3067+ xt_db_return_table_to_pool_ns(rd_ot);
3068+ rd_ot = NULL;
3069+ }
3070+
3071+ if (rd_thread->st_xact_data) {
3072+ if (!xt_xn_commit(rd_thread))
3073+ goto failed;
3074+ }
3075+ if (!xt_xn_begin(rd_thread))
3076+ goto failed;
3077+ rb_insert_count = 0;
3078+
3079+ rd_table_no = buf.table_num;
3080+ m_tables[rd_table_no-1].internal_name(path, sizeof(path));
3081+ try_(a) {
3082+ xt_ha_open_database_of_table(self, (XTPathStrPtr) path);
3083+ tab = xt_use_table(self, (XTPathStrPtr) path, FALSE, FALSE);
3084+ pushr_(xt_heap_release, tab);
3085+ if (!(rd_ot = xt_db_open_table_using_tab(tab, rd_thread)))
3086+ xt_throw(self);
3087+ freer_(); // xt_heap_release(tab)
3088+
3089+ rd_my_table = rd_ot->ot_table->tab_dic.dic_my_table;
3090+ if (rd_my_table->found_next_number_field) {
3091+ rd_my_table->in_use = current_thd;
3092+ rd_my_table->next_number_field = rd_my_table->found_next_number_field;
3093+ rd_my_table->mark_columns_used_by_index_no_reset(rd_my_table->s->next_number_index, rd_my_table->read_set);
3094+ }
3095+
3096+ /* This is safe because only one thread can restore a table at
3097+ * a time!
3098+ */
3099+ rb_row_buf = (xtWord1 *) rd_my_table->record[0];
3100+ //if (rb_row_buf) {
3101+ // xt_free(self, rb_row_buf);
3102+ // rb_row_buf = NULL;
3103+ //}
3104+ //rb_row_buf = (xtWord1 *) xt_malloc(self, rd_ot->ot_table->tab_dic.dic_mysql_buf_size);
3105+
3106+ rb_col_cnt = rd_ot->ot_table->tab_dic.dic_no_of_cols;
3107+
3108+ }
3109+ catch_(a) {
3110+ ;
3111+ }
3112+ cont_(a);
3113+
3114+ if (!rd_ot)
3115+ goto failed;
3116+ }
3117+
3118+ buffer = (xtWord1 *) buf.data;
3119+ size = buf.size;
3120+
3121+ while (size > 0) {
3122+ type = *buffer;
3123+ switch (type) {
3124+ case BUP_STANDARD_VAR_RECORD:
3125+ rec_data = buffer + 1;
3126+ break;
3127+ case BUP_RECORD_BLOCK_4_START:
3128+ buffer++;
3129+ row_len = XT_GET_DISK_4(buffer);
3130+ buffer += 4;
3131+ if (rb_data_size < row_len) {
3132+ if (!xt_realloc_ns((void **) &rb_row_data, row_len))
3133+ goto failed;
3134+ rb_data_size = row_len;
3135+ }
3136+ row_len = XT_GET_DISK_4(buffer);
3137+ buffer += 4;
3138+ ASSERT_NS(row_len <= rb_data_size);
3139+ if (row_len > rb_data_size) {
3140+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3141+ goto failed;
3142+ }
3143+ memcpy(rb_row_data, buffer, row_len);
3144+ rb_row_len = row_len;
3145+ buffer += row_len;
3146+ if (row_len + 9 > size) {
3147+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3148+ goto failed;
3149+ }
3150+ size -= row_len + 9;
3151+ continue;
3152+ case BUP_RECORD_BLOCK_4:
3153+ buffer++;
3154+ row_len = XT_GET_DISK_4(buffer);
3155+ buffer += 4;
3156+ ASSERT_NS(rb_row_len + row_len <= rb_data_size);
3157+ if (rb_row_len + row_len > rb_data_size) {
3158+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3159+ goto failed;
3160+ }
3161+ memcpy(rb_row_data + rb_row_len, buffer, row_len);
3162+ rb_row_len += row_len;
3163+ buffer += row_len;
3164+ if (row_len + 5 > size) {
3165+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3166+ goto failed;
3167+ }
3168+ size -= row_len + 5;
3169+ continue;
3170+ case BUP_RECORD_BLOCK_4_END:
3171+ buffer++;
3172+ row_len = XT_GET_DISK_4(buffer);
3173+ buffer += 4;
3174+ ASSERT_NS(rb_row_len + row_len <= rb_data_size);
3175+ if (rb_row_len + row_len > rb_data_size) {
3176+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3177+ goto failed;
3178+ }
3179+ memcpy(rb_row_data + rb_row_len, buffer, row_len);
3180+ buffer += row_len;
3181+ if (row_len + 5 > size) {
3182+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3183+ goto failed;
3184+ }
3185+ size -= row_len + 5;
3186+ rec_data = rb_row_data;
3187+ break;
3188+ default:
3189+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3190+ goto failed;
3191+ }
3192+
3193+ if (!(row_len = myxt_load_row_data(rd_ot, rec_data, rb_row_buf, rb_col_cnt)))
3194+ goto failed;
3195+
3196+ if (rd_ot->ot_table->tab_dic.dic_my_table->found_next_number_field)
3197+ ha_set_auto_increment(rd_ot, rd_ot->ot_table->tab_dic.dic_my_table->found_next_number_field);
3198+
3199+ if (!xt_tab_new_record(rd_ot, rb_row_buf))
3200+ goto failed;
3201+
3202+ if (type == BUP_STANDARD_VAR_RECORD) {
3203+ buffer += row_len+1;
3204+ if (row_len + 1 > size) {
3205+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_BACKUP_FORMAT);
3206+ goto failed;
3207+ }
3208+ size -= row_len + 1;
3209+ }
3210+
3211+ rb_insert_count++;
3212+ if (rb_insert_count == XT_RESTORE_BATCH_SIZE) {
3213+ if (!xt_xn_commit(rd_thread))
3214+ goto failed;
3215+ if (!xt_xn_begin(rd_thread))
3216+ goto failed;
3217+ rb_insert_count = 0;
3218+ }
3219+ }
3220+
3221+ return backup::OK;
3222+
3223+ failed:
3224+ xt_log_and_clear_exception(rd_thread);
3225+ return backup::ERROR;
3226+}
3227+
3228+
3229+result_t PBXTRestoreDriver::cancel()
3230+{
3231+ XT_TRACE_CALL();
3232+ /* Nothing to do in cancel(); free() will suffice */
3233+ return backup::OK;
3234+}
3235+
3236+void PBXTRestoreDriver::free()
3237+{
3238+ XT_TRACE_CALL();
3239+ if (rd_ot) {
3240+ xt_db_return_table_to_pool_ns(rd_ot);
3241+ rd_ot = NULL;
3242+ }
3243+ //if (rb_row_buf) {
3244+ // xt_free_ns(rb_row_buf);
3245+ // rb_row_buf = NULL;
3246+ //}
3247+ if (rb_row_data) {
3248+ xt_free_ns(rb_row_data);
3249+ rb_row_data = NULL;
3250+ }
3251+ if (rd_thread->st_xact_data)
3252+ xt_xn_rollback(rd_thread);
3253+ delete this;
3254+}
3255+
3256+/*
3257+ * -----------------------------------------------------------------------
3258+ * BACKUP ENGINE FACTORY
3259+ */
3260+
3261+#define PBXT_BACKUP_VERSION 1
3262+
3263+
3264+class PBXTBackupEngine: public Backup_engine
3265+{
3266+ public:
3267+ PBXTBackupEngine() { };
3268+
3269+ virtual version_t version() const {
3270+ return PBXT_BACKUP_VERSION;
3271+ };
3272+
3273+ virtual result_t get_backup(const uint32, const Table_list &, Backup_driver* &);
3274+
3275+ virtual result_t get_restore(const version_t, const uint32, const Table_list &,Restore_driver* &);
3276+
3277+ virtual void free()
3278+ {
3279+ delete this;
3280+ }
3281+};
3282+
3283+result_t PBXTBackupEngine::get_backup(const u_int count, const Table_list &tables, Backup_driver* &drv)
3284+{
3285+ PBXTBackupDriver *ptr = new PBXTBackupDriver(tables);
3286+
3287+ if (!ptr)
3288+ return backup::ERROR;
3289+ drv = ptr;
3290+ return backup::OK;
3291+}
3292+
3293+result_t PBXTBackupEngine::get_restore(const version_t ver, const uint32,
3294+ const Table_list &tables, Restore_driver* &drv)
3295+{
3296+ if (ver > PBXT_BACKUP_VERSION)
3297+ {
3298+ return backup::ERROR;
3299+ }
3300+
3301+ PBXTRestoreDriver *ptr = new PBXTRestoreDriver(tables);
3302+
3303+ if (!ptr)
3304+ return backup::ERROR;
3305+ drv = (Restore_driver *) ptr;
3306+ return backup::OK;
3307+}
3308+
3309+
3310+Backup_result_t pbxt_backup_engine(handlerton *self, Backup_engine* &be)
3311+{
3312+ be = new PBXTBackupEngine();
3313+
3314+ if (!be)
3315+ return backup::ERROR;
3316+
3317+ return backup::OK;
3318+}
3319+
3320+#endif
3321
3322=== added file 'plugin/pbxt/src/backup_xt.h'
3323--- plugin/pbxt/src/backup_xt.h 1970-01-01 00:00:00 +0000
3324+++ plugin/pbxt/src/backup_xt.h 2010-04-11 18:56:24 +0000
3325@@ -0,0 +1,34 @@
3326+/* Copyright (c) 2009 PrimeBase Technologies GmbH
3327+ *
3328+ * PrimeBase XT
3329+ *
3330+ * This program is free software; you can redistribute it and/or modify
3331+ * it under the terms of the GNU General Public License as published by
3332+ * the Free Software Foundation; either version 2 of the License, or
3333+ * (at your option) any later version.
3334+ *
3335+ * This program is distributed in the hope that it will be useful,
3336+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3337+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3338+ * GNU General Public License for more details.
3339+ *
3340+ * You should have received a copy of the GNU General Public License
3341+ * along with this program; if not, write to the Free Software
3342+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3343+ *
3344+ * 2009-09-07 Paul McCullagh
3345+ *
3346+ * H&G2JCtL
3347+ */
3348+
3349+#ifndef __backup_xt_h__
3350+#define __backup_xt_h__
3351+
3352+#include "xt_defs.h"
3353+
3354+#ifdef MYSQL_SUPPORTS_BACKUP
3355+
3356+Backup_result_t pbxt_backup_engine(handlerton *self, Backup_engine* &be);
3357+
3358+#endif
3359+#endif
3360
3361=== added file 'plugin/pbxt/src/bsearch_xt.cc'
3362--- plugin/pbxt/src/bsearch_xt.cc 1970-01-01 00:00:00 +0000
3363+++ plugin/pbxt/src/bsearch_xt.cc 2010-04-11 18:56:24 +0000
3364@@ -0,0 +1,66 @@
3365+/* Copyright (c) 2005 PrimeBase Technologies GmbH
3366+ *
3367+ * PrimeBase XT
3368+ *
3369+ * This program is free software; you can redistribute it and/or modify
3370+ * it under the terms of the GNU General Public License as published by
3371+ * the Free Software Foundation; either version 2 of the License, or
3372+ * (at your option) any later version.
3373+ *
3374+ * This program is distributed in the hope that it will be useful,
3375+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3376+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3377+ * GNU General Public License for more details.
3378+ *
3379+ * You should have received a copy of the GNU General Public License
3380+ * along with this program; if not, write to the Free Software
3381+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3382+ *
3383+ * 2004-01-03 Paul McCullagh
3384+ *
3385+ * H&G2JCtL
3386+ */
3387+
3388+#include "xt_config.h"
3389+
3390+#include <stdio.h>
3391+
3392+#include "bsearch_xt.h"
3393+#include "pthread_xt.h"
3394+#include "thread_xt.h"
3395+
3396+/**
3397+ * Binary search a array of 'count' items, with byte size 'size'. This
3398+ * function returns a pointer to the element and the 'index'
3399+ * of the element if found.
3400+ *
3401+ * If not found the index of the insert point of the item
3402+ * is returned (0 <= index <= count).
3403+ *
3404+ * The comparison routine 'compar' may throw an exception.
3405+ * In this case the error details will be stored in 'thread'.
3406+ */
3407+void *xt_bsearch(XTThreadPtr thread, const void *key, register const void *base, size_t count, size_t size, size_t *idx, const void *thunk, XTCompareFunc compar)
3408+{
3409+ register size_t i;
3410+ register size_t guess;
3411+ register int r;
3412+
3413+ i = 0;
3414+ while (i < count) {
3415+ guess = (i + count - 1) >> 1;
3416+ r = (compar)(thread, thunk, key, ((char *) base) + guess * size);
3417+ if (r == 0) {
3418+ *idx = guess;
3419+ return ((char *) base) + guess * size;
3420+ }
3421+ if (r < 0)
3422+ count = guess;
3423+ else
3424+ i = guess + 1;
3425+ }
3426+
3427+ *idx = i;
3428+ return NULL;
3429+}
3430+
3431
3432=== added file 'plugin/pbxt/src/bsearch_xt.h'
3433--- plugin/pbxt/src/bsearch_xt.h 1970-01-01 00:00:00 +0000
3434+++ plugin/pbxt/src/bsearch_xt.h 2010-04-11 18:56:24 +0000
3435@@ -0,0 +1,32 @@
3436+/* Copyright (c) 2005 PrimeBase Technologies GmbH
3437+ *
3438+ * PrimeBase XT
3439+ *
3440+ * This program is free software; you can redistribute it and/or modify
3441+ * it under the terms of the GNU General Public License as published by
3442+ * the Free Software Foundation; either version 2 of the License, or
3443+ * (at your option) any later version.
3444+ *
3445+ * This program is distributed in the hope that it will be useful,
3446+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3447+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3448+ * GNU General Public License for more details.
3449+ *
3450+ * You should have received a copy of the GNU General Public License
3451+ * along with this program; if not, write to the Free Software
3452+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3453+ *
3454+ * 2004-01-03 Paul McCullagh
3455+ *
3456+ * H&G2JCtL
3457+ */
3458+#ifndef __xt_bsearch_h__
3459+#define __xt_bsearch_h__
3460+
3461+#include "xt_defs.h"
3462+
3463+struct XTThread;
3464+
3465+void *xt_bsearch(struct XTThread *self, const void *key, register const void *base, size_t count, size_t size, size_t *idx, const void *thunk, XTCompareFunc compar);
3466+
3467+#endif
3468
3469=== added file 'plugin/pbxt/src/cache_xt.cc'
3470--- plugin/pbxt/src/cache_xt.cc 1970-01-01 00:00:00 +0000
3471+++ plugin/pbxt/src/cache_xt.cc 2010-04-11 18:56:24 +0000
3472@@ -0,0 +1,1717 @@
3473+/* Copyright (c) 2005 PrimeBase Technologies GmbH, Germany
3474+ *
3475+ * PrimeBase XT
3476+ *
3477+ * This program is free software; you can redistribute it and/or modify
3478+ * it under the terms of the GNU General Public License as published by
3479+ * the Free Software Foundation; either version 2 of the License, or
3480+ * (at your option) any later version.
3481+ *
3482+ * This program is distributed in the hope that it will be useful,
3483+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3484+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3485+ * GNU General Public License for more details.
3486+ *
3487+ * You should have received a copy of the GNU General Public License
3488+ * along with this program; if not, write to the Free Software
3489+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3490+ *
3491+ * 2005-05-24 Paul McCullagh
3492+ *
3493+ * H&G2JCtL
3494+ */
3495+
3496+#include "xt_config.h"
3497+
3498+#ifdef DRIZZLED
3499+#include <bitset>
3500+#endif
3501+
3502+#ifndef XT_WIN
3503+#include <unistd.h>
3504+#endif
3505+
3506+#include <stdio.h>
3507+#include <time.h>
3508+
3509+#include "pthread_xt.h"
3510+#include "thread_xt.h"
3511+#include "filesys_xt.h"
3512+#include "cache_xt.h"
3513+#include "table_xt.h"
3514+#include "trace_xt.h"
3515+#include "util_xt.h"
3516+
3517+#define XT_TIME_DIFF(start, now) (\
3518+ ((xtWord4) (now) < (xtWord4) (start)) ? \
3519+ ((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
3520+ ((xtWord4) (now) - (xtWord4) (start)))
3521+
3522+/*
3523+ * -----------------------------------------------------------------------
3524+ * D I S K C A C H E
3525+ */
3526+
3527+#define IDX_CAC_SEGMENT_COUNT ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
3528+#define IDX_CAC_SEGMENT_MASK (IDX_CAC_SEGMENT_COUNT - 1)
3529+
3530+#ifdef XT_NO_ATOMICS
3531+#define IDX_CAC_USE_PTHREAD_RW
3532+#else
3533+//#define IDX_CAC_USE_PTHREAD_RW
3534+#define IDX_CAC_USE_XSMUTEX
3535+//#define IDX_USE_SPINXSLOCK
3536+#endif
3537+
3538+#if defined(IDX_CAC_USE_PTHREAD_RW)
3539+#define IDX_CAC_LOCK_TYPE xt_rwlock_type
3540+#define IDX_CAC_INIT_LOCK(s, i) xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
3541+#define IDX_CAC_FREE_LOCK(s, i) xt_free_rwlock(&(i)->cs_lock)
3542+#define IDX_CAC_READ_LOCK(i, o) xt_slock_rwlock_ns(&(i)->cs_lock)
3543+#define IDX_CAC_WRITE_LOCK(i, o) xt_xlock_rwlock_ns(&(i)->cs_lock)
3544+#define IDX_CAC_UNLOCK(i, o) xt_unlock_rwlock_ns(&(i)->cs_lock)
3545+#elif defined(IDX_CAC_USE_XSMUTEX)
3546+#define IDX_CAC_LOCK_TYPE XTMutexXSLockRec
3547+#define IDX_CAC_INIT_LOCK(s, i) xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
3548+#define IDX_CAC_FREE_LOCK(s, i) xt_xsmutex_free(s, &(i)->cs_lock)
3549+#define IDX_CAC_READ_LOCK(i, o) xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
3550+#define IDX_CAC_WRITE_LOCK(i, o) xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
3551+#define IDX_CAC_UNLOCK(i, o) xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
3552+#elif defined(IDX_CAC_USE_SPINXSLOCK)
3553+#define IDX_CAC_LOCK_TYPE XTSpinXSLockRec
3554+#define IDX_CAC_INIT_LOCK(s, i) xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
3555+#define IDX_CAC_FREE_LOCK(s, i) xt_spinxslock_free(s, &(i)->cs_lock)
3556+#define IDX_CAC_READ_LOCK(i, s) xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
3557+#define IDX_CAC_WRITE_LOCK(i, s) xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
3558+#define IDX_CAC_UNLOCK(i, s) xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
3559+#else
3560+#error Please define the lock type
3561+#endif
3562+
3563+#ifdef XT_NO_ATOMICS
3564+#define ID_HANDLE_USE_PTHREAD_RW
3565+#else
3566+//#define ID_HANDLE_USE_PTHREAD_RW
3567+#define ID_HANDLE_USE_SPINLOCK
3568+#endif
3569+
3570+#if defined(ID_HANDLE_USE_PTHREAD_RW)
3571+#define ID_HANDLE_LOCK_TYPE xt_mutex_type
3572+#define ID_HANDLE_INIT_LOCK(s, i) xt_init_mutex_with_autoname(s, i)
3573+#define ID_HANDLE_FREE_LOCK(s, i) xt_free_mutex(i)
3574+#define ID_HANDLE_LOCK(i) xt_lock_mutex_ns(i)
3575+#define ID_HANDLE_UNLOCK(i) xt_unlock_mutex_ns(i)
3576+#elif defined(ID_HANDLE_USE_SPINLOCK)
3577+#define ID_HANDLE_LOCK_TYPE XTSpinLockRec
3578+#define ID_HANDLE_INIT_LOCK(s, i) xt_spinlock_init_with_autoname(s, i)
3579+#define ID_HANDLE_FREE_LOCK(s, i) xt_spinlock_free(s, i)
3580+#define ID_HANDLE_LOCK(i) xt_spinlock_lock(i)
3581+#define ID_HANDLE_UNLOCK(i) xt_spinlock_unlock(i)
3582+#endif
3583+
3584+#define XT_HANDLE_SLOTS 37
3585+
3586+/*
3587+#ifdef DEBUG
3588+#define XT_INIT_HANDLE_COUNT 0
3589+#define XT_INIT_HANDLE_BLOCKS 0
3590+#else
3591+#define XT_INIT_HANDLE_COUNT 40
3592+#define XT_INIT_HANDLE_BLOCKS 10
3593+#endif
3594+*/
3595+
3596+/* A disk cache segment. The cache is divided into a number of segments
3597+ * to improve concurrency.
3598+ */
3599+typedef struct DcSegment {
3600+ IDX_CAC_LOCK_TYPE cs_lock; /* The cache segment lock. */
3601+ XTIndBlockPtr *cs_hash_table;
3602+} DcSegmentRec, *DcSegmentPtr;
3603+
3604+typedef struct DcHandleSlot {
3605+ ID_HANDLE_LOCK_TYPE hs_handles_lock;
3606+ XTIndHandleBlockPtr hs_free_blocks;
3607+ XTIndHandlePtr hs_free_handles;
3608+ XTIndHandlePtr hs_used_handles;
3609+} DcHandleSlotRec, *DcHandleSlotPtr;
3610+
3611+typedef struct DcGlobals {
3612+ xt_mutex_type cg_lock; /* The public cache lock. */
3613+ DcSegmentRec cg_segment[IDX_CAC_SEGMENT_COUNT];
3614+ XTIndBlockPtr cg_blocks;
3615+#ifdef XT_USE_DIRECT_IO_ON_INDEX
3616+ xtWord1 *cg_buffer;
3617+#endif
3618+ XTIndBlockPtr cg_free_list;
3619+ xtWord4 cg_free_count;
3620+ xtWord4 cg_ru_now; /* A counter as described by Jim Starkey (my thanks) */
3621+ XTIndBlockPtr cg_lru_block;
3622+ XTIndBlockPtr cg_mru_block;
3623+ xtWord4 cg_hash_size;
3624+ xtWord4 cg_block_count;
3625+ xtWord4 cg_max_free;
3626+#ifdef DEBUG_CHECK_IND_CACHE
3627+ u_int cg_reserved_by_ots; /* Number of blocks reserved by open tables. */
3628+ u_int cg_read_count; /* Number of blocks being read. */
3629+#endif
3630+
3631+ /* Index cache handles: */
3632+ DcHandleSlotRec cg_handle_slot[XT_HANDLE_SLOTS];
3633+} DcGlobalsRec;
3634+
3635+static DcGlobalsRec ind_cac_globals;
3636+
3637+#ifdef XT_USE_MYSYS
3638+#ifdef xtPublic
3639+#undef xtPublic
3640+#endif
3641+#include "my_global.h"
3642+#include "my_sys.h"
3643+#include "keycache.h"
3644+KEY_CACHE my_cache;
3645+#undef pthread_rwlock_rdlock
3646+#undef pthread_rwlock_wrlock
3647+#undef pthread_rwlock_try_wrlock
3648+#undef pthread_rwlock_unlock
3649+#undef pthread_mutex_lock
3650+#undef pthread_mutex_unlock
3651+#undef pthread_cond_wait
3652+#undef pthread_cond_broadcast
3653+#undef xt_mutex_type
3654+#define xtPublic
3655+#endif
3656+
3657+/*
3658+ * -----------------------------------------------------------------------
3659+ * INDEX CACHE HANDLES
3660+ */
3661+
3662+static XTIndHandlePtr ind_alloc_handle()
3663+{
3664+ XTIndHandlePtr handle;
3665+
3666+ if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
3667+ return NULL;
3668+ xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
3669+ return handle;
3670+}
3671+
3672+static void ind_free_handle(XTIndHandlePtr handle)
3673+{
3674+ xt_spinlock_free(NULL, &handle->ih_lock);
3675+ xt_free_ns(handle);
3676+}
3677+
3678+static void ind_handle_exit(XTThreadPtr self)
3679+{
3680+ DcHandleSlotPtr hs;
3681+ XTIndHandlePtr handle;
3682+ XTIndHandleBlockPtr hptr;
3683+
3684+ for (int i=0; i<XT_HANDLE_SLOTS; i++) {
3685+ hs = &ind_cac_globals.cg_handle_slot[i];
3686+
3687+ while (hs->hs_used_handles) {
3688+ handle = hs->hs_used_handles;
3689+ xt_ind_release_handle(handle, FALSE, self);
3690+ }
3691+
3692+ while (hs->hs_free_blocks) {
3693+ hptr = hs->hs_free_blocks;
3694+ hs->hs_free_blocks = hptr->hb_next;
3695+ xt_free(self, hptr);
3696+ }
3697+
3698+ while (hs->hs_free_handles) {
3699+ handle = hs->hs_free_handles;
3700+ hs->hs_free_handles = handle->ih_next;
3701+ ind_free_handle(handle);
3702+ }
3703+
3704+ ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
3705+ }
3706+}
3707+
3708+static void ind_handle_init(XTThreadPtr self)
3709+{
3710+ DcHandleSlotPtr hs;
3711+
3712+ for (int i=0; i<XT_HANDLE_SLOTS; i++) {
3713+ hs = &ind_cac_globals.cg_handle_slot[i];
3714+ memset(hs, 0, sizeof(DcHandleSlotRec));
3715+ ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
3716+ }
3717+}
3718+
3719+//#define CHECK_HANDLE_STRUCTS
3720+
3721+#ifdef CHECK_HANDLE_STRUCTS
3722+static int gdummy = 0;
3723+
3724+static void ic_stop_here()
3725+{
3726+ gdummy = gdummy + 1;
3727+ printf("Nooo %d!\n", gdummy);
3728+}
3729+
3730+static void ic_check_handle_structs()
3731+{
3732+ XTIndHandlePtr handle, phandle;
3733+ XTIndHandleBlockPtr hptr, phptr;
3734+ int count = 0;
3735+ int ctest;
3736+
3737+ phandle = NULL;
3738+ handle = ind_cac_globals.cg_used_handles;
3739+ while (handle) {
3740+ if (handle == phandle)
3741+ ic_stop_here();
3742+ if (handle->ih_prev != phandle)
3743+ ic_stop_here();
3744+ if (handle->ih_cache_reference) {
3745+ ctest = handle->x.ih_cache_block->cb_handle_count;
3746+ if (ctest == 0 || ctest > 100)
3747+ ic_stop_here();
3748+ }
3749+ else {
3750+ ctest = handle->x.ih_handle_block->hb_ref_count;
3751+ if (ctest == 0 || ctest > 100)
3752+ ic_stop_here();
3753+ }
3754+ phandle = handle;
3755+ handle = handle->ih_next;
3756+ count++;
3757+ if (count > 1000)
3758+ ic_stop_here();
3759+ }
3760+
3761+ count = 0;
3762+ hptr = ind_cac_globals.cg_free_blocks;
3763+ while (hptr) {
3764+ if (hptr == phptr)
3765+ ic_stop_here();
3766+ phptr = hptr;
3767+ hptr = hptr->hb_next;
3768+ count++;
3769+ if (count > 1000)
3770+ ic_stop_here();
3771+ }
3772+
3773+ count = 0;
3774+ handle = ind_cac_globals.cg_free_handles;
3775+ while (handle) {
3776+ if (handle == phandle)
3777+ ic_stop_here();
3778+ phandle = handle;
3779+ handle = handle->ih_next;
3780+ count++;
3781+ if (count > 1000)
3782+ ic_stop_here();
3783+ }
3784+}
3785+#endif
3786+
3787+/*
3788+ * Get a handle to the index block.
3789+ * This function is called by index scanners (readers).
3790+ */
3791+xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
3792+{
3793+ DcHandleSlotPtr hs;
3794+ XTIndHandlePtr handle;
3795+
3796+ hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
3797+
3798+ ASSERT_NS(iref->ir_xlock == FALSE);
3799+ ASSERT_NS(iref->ir_updated == FALSE);
3800+ ID_HANDLE_LOCK(&hs->hs_handles_lock);
3801+#ifdef CHECK_HANDLE_STRUCTS
3802+ ic_check_handle_structs();
3803+#endif
3804+ if ((handle = hs->hs_free_handles))
3805+ hs->hs_free_handles = handle->ih_next;
3806+ else {
3807+ if (!(handle = ind_alloc_handle())) {
3808+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
3809+ xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
3810+ return NULL;
3811+ }
3812+ }
3813+ if (hs->hs_used_handles)
3814+ hs->hs_used_handles->ih_prev = handle;
3815+ handle->ih_next = hs->hs_used_handles;
3816+ handle->ih_prev = NULL;
3817+ handle->ih_address = iref->ir_block->cb_address;
3818+ handle->ih_cache_reference = TRUE;
3819+ handle->x.ih_cache_block = iref->ir_block;
3820+ handle->ih_branch = iref->ir_branch;
3821+ /* {HANDLE-COUNT-USAGE}
3822+ * This is safe because:
3823+ *
3824+ * I have an Slock on the cache block, and I have
3825+ * at least an Slock on the index.
3826+ * So this excludes anyone who is reading
3827+ * cb_handle_count in the index.
3828+ * (all cache block writers, and the freeer).
3829+ *
3830+ * The increment is safe because I have the list
3831+ * lock (hs_handles_lock), which is required by anyone else
3832+ * who increments or decrements this value.
3833+ */
3834+ iref->ir_block->cb_handle_count++;
3835+ hs->hs_used_handles = handle;
3836+#ifdef CHECK_HANDLE_STRUCTS
3837+ ic_check_handle_structs();
3838+#endif
3839+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
3840+ xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
3841+ return handle;
3842+}
3843+
3844+xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
3845+{
3846+ DcHandleSlotPtr hs;
3847+ XTIndBlockPtr block = NULL;
3848+ u_int hash_idx = 0;
3849+ DcSegmentPtr seg = NULL;
3850+ XTIndBlockPtr xblock;
3851+
3852+ /* The lock order is:
3853+ * 1. Cache segment (cs_lock) - This is only by ind_free_block()!
3854+ * 1. S/Slock cache block (cb_lock)
3855+ * 2. List lock (cg_handles_lock).
3856+ * 3. Handle lock (ih_lock)
3857+ */
3858+ if (!have_lock)
3859+ xt_spinlock_lock(&handle->ih_lock);
3860+
3861+ /* Get the lock on the cache page if required: */
3862+ if (handle->ih_cache_reference) {
3863+ u_int file_id;
3864+ xtIndexNodeID address;
3865+
3866+ block = handle->x.ih_cache_block;
3867+
3868+ file_id = block->cb_file_id;
3869+ address = block->cb_address;
3870+ hash_idx = XT_NODE_ID(address) + (file_id * 223);
3871+ seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
3872+ hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
3873+ }
3874+
3875+ xt_spinlock_unlock(&handle->ih_lock);
3876+
3877+ /* Because of the lock order, I have to release the
3878+ * handle before I get a lock on the cache block.
3879+ *
3880+ * But, by doing this, this cache block may be gone!
3881+ */
3882+ if (block) {
3883+ IDX_CAC_READ_LOCK(seg, thread);
3884+ xblock = seg->cs_hash_table[hash_idx];
3885+ while (xblock) {
3886+ if (block == xblock) {
3887+ /* Found the block...
3888+ * {HANDLE-COUNT-SLOCK}
3889+ * 04.05.2009, changed to slock.
3890+ * The xlock causes too much contention
3891+ * on the cache block for read only loads.
3892+ *
3893+ * Is it safe?
3894+ * See below...
3895+ */
3896+ XT_IPAGE_READ_LOCK(&block->cb_lock);
3897+ goto block_found;
3898+ }
3899+ xblock = xblock->cb_next;
3900+ }
3901+ block = NULL;
3902+ block_found:
3903+ IDX_CAC_UNLOCK(seg, thread);
3904+ }
3905+
3906+ hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
3907+
3908+ ID_HANDLE_LOCK(&hs->hs_handles_lock);
3909+#ifdef CHECK_HANDLE_STRUCTS
3910+ ic_check_handle_structs();
3911+#endif
3912+
3913+ /* I don't need to lock the handle because I have locked
3914+ * the list, and no other thread can change the
3915+ * handle without first getting a lock on the list.
3916+ *
3917+ * In addition, the caller is the only owner of the
3918+ * handle, and the only thread with an independent
3919+ * reference to the handle.
3920+ * All other access occur over the list.
3921+ */
3922+
3923+ /* Remove the reference to the cache or a handle block: */
3924+ if (handle->ih_cache_reference) {
3925+ ASSERT_NS(block == handle->x.ih_cache_block);
3926+ ASSERT_NS(block && block->cb_handle_count > 0);
3927+ /* {HANDLE-COUNT-USAGE}
3928+ * This is safe here because I have excluded
3929+ * all readers by taking an Xlock on the
3930+ * cache block (CHANGED - see below).
3931+ *
3932+ * {HANDLE-COUNT-SLOCK}
3933+ * 04.05.2009, changed to slock.
3934+ * Should be OK, because:
3935+ * A have a lock on the list lock (hs_handles_lock),
3936+ * which prevents concurrent updates to cb_handle_count.
3937+ *
3938+ * I have also have a read lock on the cache block
3939+ * but not a lock on the index. As a result, we cannot
3940+ * excluded all index writers (and readers of
3941+ * cb_handle_count.
3942+ */
3943+ block->cb_handle_count--;
3944+ }
3945+ else {
3946+ XTIndHandleBlockPtr hptr = handle->x.ih_handle_block;
3947+
3948+ ASSERT_NS(!handle->ih_cache_reference);
3949+ ASSERT_NS(hptr->hb_ref_count > 0);
3950+ hptr->hb_ref_count--;
3951+ if (!hptr->hb_ref_count) {
3952+ /* Put it back on the free list: */
3953+ hptr->hb_next = hs->hs_free_blocks;
3954+ hs->hs_free_blocks = hptr;
3955+ }
3956+ }
3957+
3958+ /* Unlink the handle: */
3959+ if (handle->ih_next)
3960+ handle->ih_next->ih_prev = handle->ih_prev;
3961+ if (handle->ih_prev)
3962+ handle->ih_prev->ih_next = handle->ih_next;
3963+ if (hs->hs_used_handles == handle)
3964+ hs->hs_used_handles = handle->ih_next;
3965+
3966+ /* Put it on the free list: */
3967+ handle->ih_next = hs->hs_free_handles;
3968+ hs->hs_free_handles = handle;
3969+
3970+#ifdef CHECK_HANDLE_STRUCTS
3971+ ic_check_handle_structs();
3972+#endif
3973+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
3974+
3975+ if (block)
3976+ XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
3977+}
3978+
3979+/* Call this function before a referenced cache block is modified!
3980+ * This function is called by index updaters.
3981+ */
3982+xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
3983+{
3984+ DcHandleSlotPtr hs;
3985+ XTIndHandleBlockPtr hptr;
3986+ u_int branch_size;
3987+ XTIndHandlePtr handle;
3988+ u_int i = 0;
3989+
3990+ hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
3991+
3992+ ID_HANDLE_LOCK(&hs->hs_handles_lock);
3993+
3994+ /* {HANDLE-COUNT-USAGE}
3995+ * This is only called by updaters of this index block, or
3996+ * the free which holds an Xlock on the index block.
3997+ * These are all mutually exclusive for the index block.
3998+ *
3999+ * {HANDLE-COUNT-SLOCK}
4000+ * Do this check again, after we have the list lock (hs_handles_lock).
4001+ * There is a small chance that the count has changed, since we last
4002+ * checked because xt_ind_release_handle() only holds
4003+ * an slock on the index page.
4004+ *
4005+ * An updater can sometimes have a XLOCK on the index and an slock
4006+ * on the cache block. In this case xt_ind_release_handle()
4007+ * could have run through.
4008+ */
4009+ if (!iref->ir_block->cb_handle_count) {
4010+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
4011+ return OK;
4012+ }
4013+
4014+#ifdef CHECK_HANDLE_STRUCTS
4015+ ic_check_handle_structs();
4016+#endif
4017+ if ((hptr = hs->hs_free_blocks))
4018+ hs->hs_free_blocks = hptr->hb_next;
4019+ else {
4020+ if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
4021+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
4022+ return FAILED;
4023+ }
4024+ }
4025+
4026+ branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
4027+ memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
4028+ hptr->hb_ref_count = iref->ir_block->cb_handle_count;
4029+
4030+ handle = hs->hs_used_handles;
4031+ while (handle) {
4032+ if (handle->ih_branch == iref->ir_branch) {
4033+ i++;
4034+ xt_spinlock_lock(&handle->ih_lock);
4035+ ASSERT_NS(handle->ih_cache_reference);
4036+ handle->ih_cache_reference = FALSE;
4037+ handle->x.ih_handle_block = hptr;
4038+ handle->ih_branch = &hptr->hb_branch;
4039+ xt_spinlock_unlock(&handle->ih_lock);
4040+#ifndef DEBUG
4041+ if (i == hptr->hb_ref_count)
4042+ break;
4043+#endif
4044+ }
4045+ handle = handle->ih_next;
4046+ }
4047+#ifdef DEBUG
4048+ ASSERT_NS(hptr->hb_ref_count == i);
4049+#endif
4050+ /* {HANDLE-COUNT-USAGE}
4051+ * It is safe to modify cb_handle_count when I have the
4052+ * list lock, and I have excluded all readers!
4053+ */
4054+ iref->ir_block->cb_handle_count = 0;
4055+#ifdef CHECK_HANDLE_STRUCTS
4056+ ic_check_handle_structs();
4057+#endif
4058+ ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
4059+
4060+ return OK;
4061+}
4062+
4063+xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
4064+{
4065+ xt_spinlock_lock(&handle->ih_lock);
4066+}
4067+
4068+xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
4069+{
4070+ xt_spinlock_unlock(&handle->ih_lock);
4071+}
4072+
4073+/*
4074+ * -----------------------------------------------------------------------
4075+ * INIT/EXIT
4076+ */
4077+
4078+/*
4079+ * Initialize the disk cache.
4080+ */
4081+xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
4082+{
4083+ XTIndBlockPtr block;
4084+
4085+#ifdef XT_USE_MYSYS
4086+ init_key_cache(&my_cache, 1024, cache_size, 100, 300);
4087+#endif
4088+ /* Memory is devoted to the page data alone, I no longer count the size of the directory,
4089+ * or the page overhead: */
4090+ ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
4091+ ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
4092+ ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
4093+ if (ind_cac_globals.cg_max_free < 8)
4094+ ind_cac_globals.cg_max_free = 8;
4095+ if (ind_cac_globals.cg_max_free > 128)
4096+ ind_cac_globals.cg_max_free = 128;
4097+
4098+ try_(a) {
4099+ for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
4100+ ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
4101+ IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
4102+ }
4103+
4104+ block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
4105+ ind_cac_globals.cg_blocks = block;
4106+ xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
4107+#ifdef XT_USE_DIRECT_IO_ON_INDEX
4108+ xtWord1 *buffer;
4109+#ifdef XT_WIN
4110+ size_t psize = 512;
4111+#else
4112+ size_t psize = getpagesize();
4113+#endif
4114+ size_t diff;
4115+
4116+ buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
4117+ diff = (size_t) buffer % psize;
4118+ if (diff != 0) {
4119+ xt_free(self, buffer);
4120+ buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
4121+ diff = (size_t) buffer % psize;
4122+ if (diff != 0)
4123+ diff = psize - diff;
4124+ }
4125+ ind_cac_globals.cg_buffer = buffer;
4126+ buffer += diff;
4127+#endif
4128+
4129+ for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
4130+ XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
4131+ block->cb_state = IDX_CAC_BLOCK_FREE;
4132+ block->cb_next = ind_cac_globals.cg_free_list;
4133+#ifdef XT_USE_DIRECT_IO_ON_INDEX
4134+ block->cb_data = buffer;
4135+ buffer += XT_INDEX_PAGE_SIZE;
4136+#endif
4137+ ind_cac_globals.cg_free_list = block;
4138+ block++;
4139+ }
4140+ ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
4141+#ifdef DEBUG_CHECK_IND_CACHE
4142+ ind_cac_globals.cg_reserved_by_ots = 0;
4143+#endif
4144+ ind_handle_init(self);
4145+ }
4146+ catch_(a) {
4147+ xt_ind_exit(self);
4148+ throw_();
4149+ }
4150+ cont_(a);
4151+}
4152+
4153+xtPublic void xt_ind_exit(XTThreadPtr self)
4154+{
4155+#ifdef XT_USE_MYSYS
4156+ end_key_cache(&my_cache, 1);
4157+#endif
4158+ for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
4159+ if (ind_cac_globals.cg_segment[i].cs_hash_table) {
4160+ xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
4161+ ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
4162+ IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
4163+ }
4164+ }
4165+
4166+ /* Must be done before freeing the blocks! */
4167+ ind_handle_exit(self);
4168+
4169+ if (ind_cac_globals.cg_blocks) {
4170+ xt_free(self, ind_cac_globals.cg_blocks);
4171+ ind_cac_globals.cg_blocks = NULL;
4172+ xt_free_mutex(&ind_cac_globals.cg_lock);
4173+ }
4174+#ifdef XT_USE_DIRECT_IO_ON_INDEX
4175+ if (ind_cac_globals.cg_buffer) {
4176+ xt_free(self, ind_cac_globals.cg_buffer);
4177+ ind_cac_globals.cg_buffer = NULL;
4178+ }
4179+#endif
4180+
4181+ memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
4182+}
4183+
4184+xtPublic xtInt8 xt_ind_get_usage()
4185+{
4186+ xtInt8 size = 0;
4187+
4188+ size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
4189+ return size;
4190+}
4191+
4192+xtPublic xtInt8 xt_ind_get_size()
4193+{
4194+ xtInt8 size = 0;
4195+
4196+ size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
4197+ return size;
4198+}
4199+
4200+xtPublic u_int xt_ind_get_blocks()
4201+{
4202+ return ind_cac_globals.cg_block_count;
4203+}
4204+
4205+/*
4206+ * -----------------------------------------------------------------------
4207+ * INDEX CHECKING
4208+ */
4209+
4210+xtPublic void xt_ind_check_cache(XTIndexPtr ind)
4211+{
4212+ XTIndBlockPtr block;
4213+ u_int free_count, inuse_count, clean_count;
4214+ xtBool check_count = FALSE;
4215+
4216+ if (ind == (XTIndex *) 1) {
4217+ ind = NULL;
4218+ check_count = TRUE;
4219+ }
4220+
4221+ // Check the dirty list:
4222+ if (ind) {
4223+ u_int cnt = 0;
4224+
4225+ block = ind->mi_dirty_list;
4226+ while (block) {
4227+ cnt++;
4228+ ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
4229+ block = block->cb_dirty_next;
4230+ }
4231+ ASSERT_NS(ind->mi_dirty_blocks == cnt);
4232+ }
4233+
4234+ xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
4235+
4236+ // Check the free list:
4237+ free_count = 0;
4238+ block = ind_cac_globals.cg_free_list;
4239+ while (block) {
4240+ free_count++;
4241+ ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
4242+ block = block->cb_next;
4243+ }
4244+ ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
4245+
4246+ /* Check the LRU list: */
4247+ XTIndBlockPtr list_block, plist_block;
4248+
4249+ plist_block = NULL;
4250+ list_block = ind_cac_globals.cg_lru_block;
4251+ if (list_block) {
4252+ ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
4253+ ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
4254+ ASSERT_NS(list_block->cb_lr_used == NULL);
4255+ inuse_count = 0;
4256+ clean_count = 0;
4257+ while (list_block) {
4258+ inuse_count++;
4259+ ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
4260+ if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
4261+ clean_count++;
4262+ ASSERT_NS(block != list_block);
4263+ ASSERT_NS(list_block->cb_lr_used == plist_block);
4264+ plist_block = list_block;
4265+ list_block = list_block->cb_mr_used;
4266+ }
4267+ ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
4268+ }
4269+ else {
4270+ inuse_count = 0;
4271+ clean_count = 0;
4272+ ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
4273+ }
4274+
4275+#ifdef DEBUG_CHECK_IND_CACHE
4276+ ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
4277+#endif
4278+ xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
4279+ if (check_count) {
4280+ /* We have just flushed, check how much is now free/clean. */
4281+ if (free_count + clean_count < 10) {
4282+ /* This could be a problem: */
4283+ printf("Cache very low!\n");
4284+ }
4285+ }
4286+}
4287+
4288+/*
4289+ * -----------------------------------------------------------------------
4290+ * FREEING INDEX CACHE
4291+ */
4292+
4293+/*
4294+ * This function return TRUE if the block is freed.
4295+ * This function returns FALSE if the block cannot be found, or the
4296+ * block is not clean.
4297+ *
4298+ * We also return FALSE if we cannot copy the block to the handle
4299+ * (if this is required). This will be due to out-of-memory!
4300+ */
4301+static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
4302+{
4303+ XTIndBlockPtr xblock, pxblock;
4304+ u_int hash_idx;
4305+ u_int file_id;
4306+ xtIndexNodeID address;
4307+ DcSegmentPtr seg;
4308+
4309+#ifdef DEBUG_CHECK_IND_CACHE
4310+ xt_ind_check_cache(NULL);
4311+#endif
4312+ file_id = block->cb_file_id;
4313+ address = block->cb_address;
4314+
4315+ hash_idx = XT_NODE_ID(address) + (file_id * 223);
4316+ seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
4317+ hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
4318+
4319+ IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
4320+
4321+ pxblock = NULL;
4322+ xblock = seg->cs_hash_table[hash_idx];
4323+ while (xblock) {
4324+ if (block == xblock) {
4325+ /* Found the block... */
4326+ /* It is possible that a thread enters this code holding a
4327+ * lock on a page. This can cause a deadlock:
4328+ *
4329+ * #0 0x91faa2ce in semaphore_wait_signal_trap
4330+ * #1 0x91fb1da5 in pthread_mutex_lock
4331+ * #2 0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
4332+ * #3 0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
4333+ * #4 0x00dee402 in ind_free_block at cache_xt.cc:879
4334+ * #5 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
4335+ * #6 0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
4336+ * #7 0x00e22118 in xt_idx_insert at index_xt.cc:2047
4337+ * #8 0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
4338+ * #9 0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
4339+ * #10 0x0023a00f in handler::ha_write_row at handler.cc:4570
4340+ * #11 0x001a32c8 in write_record at sql_insert.cc:1568
4341+ * #12 0x001ab635 in mysql_insert at sql_insert.cc:812
4342+ * #13 0x0010e068 in mysql_execute_command at sql_parse.cc:3066
4343+ * #14 0x0011480d in mysql_parse at sql_parse.cc:5787
4344+ * #15 0x00115afb in dispatch_command at sql_parse.cc:1200
4345+ * #16 0x00116de2 in do_command at sql_parse.cc:857
4346+ * #17 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
4347+ * #18 0x91fdb155 in _pthread_start
4348+ * #19 0x91fdb012 in thread_start
4349+ *
4350+ * #0 0x91fb146e in __semwait_signal
4351+ * #1 0x91fb12ef in nanosleep$UNIX2003
4352+ * #2 0x91fb1236 in usleep$UNIX2003
4353+ * #3 0x00e52112 in xt_yield at thread_xt.cc:1274
4354+ * #4 0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
4355+ * #5 0x00dee444 in ind_free_block at cache_xt.cc:886
4356+ * #6 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
4357+ * #7 0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
4358+ * #8 0x00def604 in xt_ind_fetch at cache_xt.cc:1386
4359+ * #9 0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
4360+ * #10 0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
4361+ * #11 0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
4362+ * #12 0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
4363+ * #13 0x00e615ed in xn_sw_main at xaction_xt.cc:2433
4364+ * #14 0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
4365+ * #15 0x00e53f80 in thr_main at thread_xt.cc:1017
4366+ * #16 0x91fdb155 in _pthread_start
4367+ * #17 0x91fdb012 in thread_start
4368+ *
4369+ * So we back off if a lock is held!
4370+ */
4371+ if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
4372+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4373+#ifdef DEBUG_CHECK_IND_CACHE
4374+ xt_ind_check_cache(NULL);
4375+#endif
4376+ return FALSE;
4377+ }
4378+ if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
4379+ /* This block cannot be freeed: */
4380+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4381+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4382+#ifdef DEBUG_CHECK_IND_CACHE
4383+ xt_ind_check_cache(NULL);
4384+#endif
4385+ return FALSE;
4386+ }
4387+
4388+ goto free_the_block;
4389+ }
4390+ pxblock = xblock;
4391+ xblock = xblock->cb_next;
4392+ }
4393+
4394+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4395+
4396+ /* Not found (this can happen, if block was freed by another thread) */
4397+#ifdef DEBUG_CHECK_IND_CACHE
4398+ xt_ind_check_cache(NULL);
4399+#endif
4400+ return FALSE;
4401+
4402+ free_the_block:
4403+
4404+ /* If the block is reference by a handle, then we
4405+ * have to copy the data to the handle before we
4406+ * free the page:
4407+ */
4408+ /* {HANDLE-COUNT-USAGE}
4409+ * This access is safe because:
4410+ *
4411+ * We have an Xlock on the cache block, which excludes
4412+ * all other writers that want to change the cache block
4413+ * and also all readers of the cache block, because
4414+ * they all have at least an Slock on the cache block.
4415+ */
4416+ if (block->cb_handle_count) {
4417+ XTIndReferenceRec iref;
4418+
4419+ iref.ir_xlock = TRUE;
4420+ iref.ir_updated = FALSE;
4421+ iref.ir_block = block;
4422+ iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
4423+ if (!xt_ind_copy_on_write(&iref)) {
4424+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4425+ return FALSE;
4426+ }
4427+ }
4428+
4429+ /* Block is clean, remove from the hash table: */
4430+ if (pxblock)
4431+ pxblock->cb_next = block->cb_next;
4432+ else
4433+ seg->cs_hash_table[hash_idx] = block->cb_next;
4434+
4435+ xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
4436+
4437+ /* Remove from the MRU list: */
4438+ if (ind_cac_globals.cg_lru_block == block)
4439+ ind_cac_globals.cg_lru_block = block->cb_mr_used;
4440+ if (ind_cac_globals.cg_mru_block == block)
4441+ ind_cac_globals.cg_mru_block = block->cb_lr_used;
4442+
4443+ /* Note, I am updating blocks for which I have no lock
4444+ * here. But I think this is OK because I have a lock
4445+ * for the MRU list.
4446+ */
4447+ if (block->cb_lr_used)
4448+ block->cb_lr_used->cb_mr_used = block->cb_mr_used;
4449+ if (block->cb_mr_used)
4450+ block->cb_mr_used->cb_lr_used = block->cb_lr_used;
4451+
4452+ /* The block is now free: */
4453+ block->cb_next = ind_cac_globals.cg_free_list;
4454+ ind_cac_globals.cg_free_list = block;
4455+ ind_cac_globals.cg_free_count++;
4456+ ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
4457+ block->cb_state = IDX_CAC_BLOCK_FREE;
4458+ IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
4459+
4460+ /* Unlock BEFORE the block is reused! */
4461+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4462+
4463+ xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
4464+
4465+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4466+
4467+#ifdef DEBUG_CHECK_IND_CACHE
4468+ xt_ind_check_cache(NULL);
4469+#endif
4470+ return TRUE;
4471+}
4472+
4473+#define IND_CACHE_MAX_BLOCKS_TO_FREE 100
4474+
4475+/*
4476+ * Return the number of blocks freed.
4477+ *
4478+ * The idea is to grab a list of blocks to free.
4479+ * The list consists of the LRU blocks that are
4480+ * clean.
4481+ *
4482+ * Free as many as possible (up to max of blocks_required)
4483+ * from the list, even if LRU position has changed
4484+ * (or we have a race if there are too few blocks).
4485+ * However, if the block cannot be found, or is dirty
4486+ * we must skip it.
4487+ *
4488+ * Repeat until we find no blocks for the list, or
4489+ * we have freed 'blocks_required'.
4490+ *
4491+ * 'not_this' is a block that must not be freed because
4492+ * it is locked by the calling thread!
4493+ */
4494+static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
4495+{
4496+ register DcGlobalsRec *dcg = &ind_cac_globals;
4497+ XTIndBlockPtr to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
4498+ int count;
4499+ XTIndBlockPtr block;
4500+ u_int blocks_freed = 0;
4501+ XTIndBlockPtr locked_block;
4502+
4503+#ifdef XT_USE_DIRECT_IO_ON_INDEX
4504+#error This will not work!
4505+#endif
4506+ locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
4507+
4508+ retry:
4509+ xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
4510+ block = dcg->cg_lru_block;
4511+ count = 0;
4512+ while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
4513+ if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
4514+ to_free[count] = block;
4515+ count++;
4516+ }
4517+ block = block->cb_mr_used;
4518+ }
4519+ xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
4520+
4521+ if (!count)
4522+ return blocks_freed;
4523+
4524+ for (int i=0; i<count; i++) {
4525+ if (ind_free_block(ot, to_free[i]))
4526+ blocks_freed++;
4527+ if (blocks_freed >= blocks_required &&
4528+ ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
4529+ return blocks_freed;
4530+ }
4531+
4532+ goto retry;
4533+}
4534+
4535+/*
4536+ * -----------------------------------------------------------------------
4537+ * MAIN CACHE FUNCTIONS
4538+ */
4539+
4540+/*
4541+ * Fetch the block. Note, if we are about to write the block
4542+ * then there is no need to read it from disk!
4543+ */
4544+static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
4545+{
4546+ register XTOpenFilePtr file = ot->ot_ind_file;
4547+ register XTIndBlockPtr block, new_block;
4548+ register DcSegmentPtr seg;
4549+ register u_int hash_idx;
4550+ register DcGlobalsRec *dcg = &ind_cac_globals;
4551+ size_t red_size;
4552+
4553+#ifdef DEBUG_CHECK_IND_CACHE
4554+ xt_ind_check_cache(NULL);
4555+#endif
4556+ /* Address, plus file ID multiplied by my favorite prime number! */
4557+ hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
4558+ seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
4559+ hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
4560+
4561+ IDX_CAC_READ_LOCK(seg, ot->ot_thread);
4562+ block = seg->cs_hash_table[hash_idx];
4563+ while (block) {
4564+ if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
4565+ ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
4566+
4567+ /* Check how recently this page has been used: */
4568+ if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
4569+ xt_lock_mutex_ns(&dcg->cg_lock);
4570+
4571+ /* Move to the front of the MRU list: */
4572+ block->cb_ru_time = ++dcg->cg_ru_now;
4573+ if (dcg->cg_mru_block != block) {
4574+ /* Remove from the MRU list: */
4575+ if (dcg->cg_lru_block == block)
4576+ dcg->cg_lru_block = block->cb_mr_used;
4577+ if (block->cb_lr_used)
4578+ block->cb_lr_used->cb_mr_used = block->cb_mr_used;
4579+ if (block->cb_mr_used)
4580+ block->cb_mr_used->cb_lr_used = block->cb_lr_used;
4581+
4582+ /* Make the block the most recently used: */
4583+ if ((block->cb_lr_used = dcg->cg_mru_block))
4584+ dcg->cg_mru_block->cb_mr_used = block;
4585+ block->cb_mr_used = NULL;
4586+ dcg->cg_mru_block = block;
4587+ if (!dcg->cg_lru_block)
4588+ dcg->cg_lru_block = block;
4589+ }
4590+
4591+ xt_unlock_mutex_ns(&dcg->cg_lock);
4592+ }
4593+
4594+ *ret_seg = seg;
4595+#ifdef DEBUG_CHECK_IND_CACHE
4596+ xt_ind_check_cache(NULL);
4597+#endif
4598+ ot->ot_thread->st_statistics.st_ind_cache_hit++;
4599+ return block;
4600+ }
4601+ block = block->cb_next;
4602+ }
4603+
4604+ /* Block not found... */
4605+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4606+
4607+ /* Check the open table reserve list first: */
4608+ if ((new_block = ot->ot_ind_res_bufs)) {
4609+ ot->ot_ind_res_bufs = new_block->cb_next;
4610+ ot->ot_ind_res_count--;
4611+#ifdef DEBUG_CHECK_IND_CACHE
4612+ xt_lock_mutex_ns(&dcg->cg_lock);
4613+ dcg->cg_reserved_by_ots--;
4614+ dcg->cg_read_count++;
4615+ xt_unlock_mutex_ns(&dcg->cg_lock);
4616+#endif
4617+ goto use_free_block;
4618+ }
4619+
4620+ free_some_blocks:
4621+ if (!dcg->cg_free_list) {
4622+ if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
4623+ if (!dcg->cg_free_list) {
4624+ xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
4625+#ifdef DEBUG_CHECK_IND_CACHE
4626+ xt_ind_check_cache(NULL);
4627+#endif
4628+ return NULL;
4629+ }
4630+ }
4631+ }
4632+
4633+ /* Get a free block: */
4634+ xt_lock_mutex_ns(&dcg->cg_lock);
4635+ if (!(new_block = dcg->cg_free_list)) {
4636+ xt_unlock_mutex_ns(&dcg->cg_lock);
4637+ goto free_some_blocks;
4638+ }
4639+ ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
4640+ dcg->cg_free_list = new_block->cb_next;
4641+ dcg->cg_free_count--;
4642+#ifdef DEBUG_CHECK_IND_CACHE
4643+ dcg->cg_read_count++;
4644+#endif
4645+ xt_unlock_mutex_ns(&dcg->cg_lock);
4646+
4647+ use_free_block:
4648+ new_block->cb_address = address;
4649+ new_block->cb_file_id = file->fr_id;
4650+ ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
4651+ new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
4652+ new_block->cb_handle_count = 0;
4653+ new_block->cp_del_count = 0;
4654+ new_block->cb_dirty_next = NULL;
4655+ new_block->cb_dirty_prev = NULL;
4656+#ifdef IND_OPT_DATA_WRITTEN
4657+ new_block->cb_header = FALSE;
4658+ new_block->cb_min_pos = 0xFFFF;
4659+ new_block->cb_max_pos = 0;
4660+#endif
4661+
4662+ if (read_data) {
4663+ if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
4664+ xt_lock_mutex_ns(&dcg->cg_lock);
4665+ new_block->cb_next = dcg->cg_free_list;
4666+ dcg->cg_free_list = new_block;
4667+ dcg->cg_free_count++;
4668+#ifdef DEBUG_CHECK_IND_CACHE
4669+ dcg->cg_read_count--;
4670+#endif
4671+ ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
4672+ new_block->cb_state = IDX_CAC_BLOCK_FREE;
4673+ IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
4674+ xt_unlock_mutex_ns(&dcg->cg_lock);
4675+#ifdef DEBUG_CHECK_IND_CACHE
4676+ xt_ind_check_cache(NULL);
4677+#endif
4678+ return NULL;
4679+ }
4680+ IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
4681+ ot->ot_thread->st_statistics.st_ind_cache_miss++;
4682+ }
4683+ else
4684+ red_size = 0;
4685+ // PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
4686+
4687+ IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
4688+ block = seg->cs_hash_table[hash_idx];
4689+ while (block) {
4690+ if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
4691+ /* Oops, someone else was faster! */
4692+ xt_lock_mutex_ns(&dcg->cg_lock);
4693+ new_block->cb_next = dcg->cg_free_list;
4694+ dcg->cg_free_list = new_block;
4695+ dcg->cg_free_count++;
4696+#ifdef DEBUG_CHECK_IND_CACHE
4697+ dcg->cg_read_count--;
4698+#endif
4699+ ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
4700+ new_block->cb_state = IDX_CAC_BLOCK_FREE;
4701+ IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
4702+ xt_unlock_mutex_ns(&dcg->cg_lock);
4703+ goto done_ok;
4704+ }
4705+ block = block->cb_next;
4706+ }
4707+ block = new_block;
4708+
4709+ /* Make the block the most recently used: */
4710+ xt_lock_mutex_ns(&dcg->cg_lock);
4711+ block->cb_ru_time = ++dcg->cg_ru_now;
4712+ if ((block->cb_lr_used = dcg->cg_mru_block))
4713+ dcg->cg_mru_block->cb_mr_used = block;
4714+ block->cb_mr_used = NULL;
4715+ dcg->cg_mru_block = block;
4716+ if (!dcg->cg_lru_block)
4717+ dcg->cg_lru_block = block;
4718+#ifdef DEBUG_CHECK_IND_CACHE
4719+ dcg->cg_read_count--;
4720+#endif
4721+ xt_unlock_mutex_ns(&dcg->cg_lock);
4722+
4723+ /* {LAZY-DEL-INDEX-ITEMS}
4724+ * Conditionally count the number of deleted entries in the index:
4725+ * We do this before other threads can read the block.
4726+ */
4727+ if (ind->mi_lazy_delete && read_data)
4728+ xt_ind_count_deleted_items(ot->ot_table, ind, block);
4729+
4730+ /* Add to the hash table: */
4731+ block->cb_next = seg->cs_hash_table[hash_idx];
4732+ seg->cs_hash_table[hash_idx] = block;
4733+
4734+ done_ok:
4735+ *ret_seg = seg;
4736+#ifdef DEBUG_CHECK_IND_CACHE
4737+ xt_ind_check_cache(NULL);
4738+#endif
4739+ return block;
4740+}
4741+
4742+static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
4743+{
4744+ register XTOpenFilePtr file = ot->ot_ind_file;
4745+ register XTIndBlockPtr block;
4746+ register DcSegmentPtr seg;
4747+ register u_int hash_idx;
4748+ register DcGlobalsRec *dcg = &ind_cac_globals;
4749+
4750+ hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
4751+ seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
4752+ hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
4753+
4754+ IDX_CAC_READ_LOCK(seg, ot->ot_thread);
4755+ block = seg->cs_hash_table[hash_idx];
4756+ while (block) {
4757+ if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
4758+ ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
4759+
4760+ *ret_seg = seg;
4761+ *ret_block = block;
4762+ return OK;
4763+ }
4764+ block = block->cb_next;
4765+ }
4766+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4767+
4768+ /* Block not found: */
4769+ *ret_seg = NULL;
4770+ *ret_block = NULL;
4771+ return OK;
4772+}
4773+
4774+xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
4775+{
4776+ XTIndBlockPtr block;
4777+ DcSegmentPtr seg;
4778+
4779+ if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
4780+ return FAILED;
4781+
4782+ XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
4783+ if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
4784+ if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
4785+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4786+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4787+ return FAILED;
4788+ }
4789+ }
4790+#ifdef IND_OPT_DATA_WRITTEN
4791+ block->cb_header = TRUE;
4792+ block->cb_min_pos = 0;
4793+ if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
4794+ block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
4795+ ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
4796+ ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
4797+#endif
4798+ ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
4799+ memcpy(block->cb_data, data, size);
4800+ if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
4801+ TRACK_BLOCK_WRITE(offset);
4802+ xt_spinlock_lock(&ind->mi_dirty_lock);
4803+ if ((block->cb_dirty_next = ind->mi_dirty_list))
4804+ ind->mi_dirty_list->cb_dirty_prev = block;
4805+ block->cb_dirty_prev = NULL;
4806+ ind->mi_dirty_list = block;
4807+ ind->mi_dirty_blocks++;
4808+ xt_spinlock_unlock(&ind->mi_dirty_lock);
4809+ if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
4810+ ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
4811+ ot->ot_thread->st_statistics.st_ind_cache_dirty++;
4812+ }
4813+ block->cb_state = IDX_CAC_BLOCK_DIRTY;
4814+ }
4815+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4816+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4817+#ifdef XT_TRACK_INDEX_UPDATES
4818+ ot->ot_ind_changed++;
4819+#endif
4820+ return OK;
4821+}
4822+
4823+/*
4824+ * Update the cache, if in RAM.
4825+ */
4826+xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
4827+{
4828+ XTIndBlockPtr block;
4829+ DcSegmentPtr seg;
4830+
4831+ if (!ind_cac_get(ot, address, &seg, &block))
4832+ return FAILED;
4833+
4834+ if (block) {
4835+ XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
4836+ /* This should only be done to pages that are free, which
4837+ * are not on the dirty list, so they must be clean!
4838+ */
4839+ ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
4840+ memcpy(block->cb_data, data, size);
4841+
4842+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4843+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4844+ }
4845+
4846+ return OK;
4847+}
4848+
4849+xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
4850+{
4851+ XTIndBlockPtr block;
4852+ DcSegmentPtr seg;
4853+
4854+ if (!ind_cac_get(ot, address, &seg, &block))
4855+ return FAILED;
4856+
4857+ if (block) {
4858+ XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
4859+ ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
4860+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4861+ iref->ir_block = block;
4862+ iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
4863+ }
4864+ else {
4865+ iref->ir_block = NULL;
4866+ iref->ir_branch = NULL;
4867+ }
4868+ iref->ir_xlock = TRUE;
4869+ iref->ir_updated = FALSE;
4870+
4871+ return OK;
4872+}
4873+
4874+/*
4875+ * Note, this function may only be called if the block has
4876+ * been freed.
4877+ */
4878+xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
4879+{
4880+ XTIndBlockPtr block;
4881+ DcSegmentPtr seg;
4882+
4883+ if (!ind_cac_get(ot, address, &seg, &block))
4884+ return FAILED;
4885+ if (block) {
4886+ XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
4887+
4888+ if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
4889+ if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
4890+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4891+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4892+ return FAILED;
4893+ }
4894+ }
4895+
4896+ /* {PAGE-NO-IN-INDEX-FILE}
4897+ * This is the one exeption to the rule that a block
4898+ * that is in the IDX_CAC_BLOCK_LOGGED may be released
4899+ * from the cache!
4900+ */
4901+ ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
4902+
4903+ if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
4904+ /* Take the block off the dirty list: */
4905+ xt_spinlock_lock(&ind->mi_dirty_lock);
4906+ if (block->cb_dirty_next)
4907+ block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
4908+ if (block->cb_dirty_prev)
4909+ block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
4910+ if (ind->mi_dirty_list == block)
4911+ ind->mi_dirty_list = block->cb_dirty_next;
4912+ ind->mi_dirty_blocks--;
4913+ xt_spinlock_unlock(&ind->mi_dirty_lock);
4914+ block->cb_state = IDX_CAC_BLOCK_CLEAN;
4915+ ot->ot_thread->st_statistics.st_ind_cache_dirty--;
4916+#ifdef IND_OPT_DATA_WRITTEN
4917+ block->cb_header = FALSE;
4918+ block->cb_min_pos = 0xFFFF;
4919+ block->cb_max_pos = 0;
4920+#endif
4921+ }
4922+ XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
4923+
4924+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4925+ }
4926+
4927+ return OK;
4928+}
4929+
4930+xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
4931+{
4932+ XTIndBlockPtr block;
4933+ DcSegmentPtr seg;
4934+
4935+ if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
4936+ return FAILED;
4937+
4938+ XT_IPAGE_READ_LOCK(&block->cb_lock);
4939+ memcpy(data, block->cb_data, size);
4940+ XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
4941+ IDX_CAC_UNLOCK(seg, ot->ot_thread);
4942+ return OK;
4943+}
4944+
4945+xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
4946+{
4947+ register XTIndBlockPtr block;
4948+ DcSegmentPtr seg;
4949+ xtWord2 branch_size;
4950+ u_int rec_size;
4951+ xtBool xlock = FALSE;
4952+
4953+#ifdef DEBUG
4954+ ASSERT_NS(iref->ir_xlock == 2);
4955+ ASSERT_NS(iref->ir_xlock == 2);
4956+#endif
4957+ if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
4958+ return FAILED;
4959+
4960+ branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
4961+ rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
4962+ if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
4963+ goto failed_corrupt;
4964+ if (ind->mi_fix_key) {
4965+ rec_size -= 2;
4966+ if (XT_IS_NODE(branch_size)) {
4967+ if (rec_size != 0) {
4968+ if (rec_size < XT_NODE_REF_SIZE)
4969+ goto failed_corrupt;
4970+ rec_size -= XT_NODE_REF_SIZE;
4971+ if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
4972+ goto failed_corrupt;
4973+ }
4974+ }
4975+ else {
4976+ if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
4977+ goto failed_corrupt;
4978+ }
4979+ }
4980+
4981+ switch (ltype) {
4982+ case XT_LOCK_READ:
4983+ break;
4984+ case XT_LOCK_WRITE:
4985+ xlock = TRUE;
4986+ break;
4987+ case XT_XLOCK_LEAF:
4988+ if (!XT_IS_NODE(branch_size))
4989+ xlock = TRUE;
4990+ break;
4991+ case XT_XLOCK_DEL_LEAF:
4992+ if (!XT_IS_NODE(branch_size)) {
4993+ if (ot->ot_table->tab_dic.dic_no_lazy_delete)
4994+ xlock = TRUE;
4995+ else {
4996+ /*
4997+ * {LAZY-DEL-INDEX-ITEMS}
4998+ *
4999+ * We are fetch a page for delete purpose.
5000+ * we decide here if we plan to do a lazy delete,
The diff has been truncated for viewing.