Merge lp:~pbeaman/akiban-persistit/bigload into lp:akiban-persistit

Proposed by Peter Beaman
Status: Merged
Approved by: Peter Beaman
Approved revision: 387
Merged at revision: 386
Proposed branch: lp:~pbeaman/akiban-persistit/bigload
Merge into: lp:akiban-persistit
Diff against target: 481 lines (+405/-4)
5 files modified
src/main/java/com/persistit/BufferPool.java (+4/-1)
src/main/java/com/persistit/Key.java (+60/-3)
src/test/java/com/persistit/stress/InsertBigLoad.java (+55/-0)
src/test/java/com/persistit/stress/unit/BigLoad.java (+243/-0)
src/test/java/com/persistit/unit/KeyTest1.java (+43/-0)
To merge this branch: bzr merge lp:~pbeaman/akiban-persistit/bigload
Reviewer Review Type Date Requested Status
Akiban Build User Needs Fixing
Nathan Williams Approve
Review via email: mp+132617@code.launchpad.net

This proposal supersedes a proposal from 2012-11-01.

Description of the change

Add a new stress test that sorts random keys into trees in a temporary volume and then inserts them in key-sort order into the main database. This is primarily a demonstration developed to help an answer from a Persistit evaluator, but since it exercises temporary volumes and potential very large databases it seems appropriate to have the code available in the stress test suite.

This branch also adds a couple of new methods to the Key class. The original version of BigLoad used these. We also have another potential user who may benefit from these and they have some general usefulness so are therefore proposed. New unit tests cover them.

To post a comment you must log in.
Revision history for this message
Peter Beaman (pbeaman) wrote : Posted in a previous version of this proposal

Note: removed a change in the Exchange class subsequent to original merge proposal. Exchange is now unchanged.

Revision history for this message
Peter Beaman (pbeaman) wrote :

Note that appendKeySegment does not check the size of the destination key - an AIOOBE can result. However, that's also true of the other append methods, so this new method behaves no worse.

Revision history for this message
Nathan Williams (nwilliams) wrote :

New test sounds good, though I didn't inspect it too closely.

Mainline additions look good too.

review: Approve
Revision history for this message
Akiban Build User (build-akiban) wrote :

There were 2 failures during build/test:

* job persistit-build failed at build number 470: http://172.16.20.104:8080/job/persistit-build/470/

* view must-pass failed: persistit-build is yellow

review: Needs Fixing
Revision history for this message
Peter Beaman (pbeaman) wrote :

Known sporadic failure in TransactionLifetimeTest. I'm re-Approving this since the changes in this proposal didn't go anywhere near the TLT failure.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/main/java/com/persistit/BufferPool.java'
--- src/main/java/com/persistit/BufferPool.java 2012-10-05 14:21:47 +0000
+++ src/main/java/com/persistit/BufferPool.java 2012-11-01 20:16:23 +0000
@@ -653,7 +653,10 @@
653 }653 }
654 }654 }
655 buffer.clearValid();655 buffer.clearValid();
656 buffer.clearDirty();656 if (buffer.isDirty()) {
657 _dirtyPageCount.decrementAndGet();
658 buffer.clearDirty();
659 }
657 buffer.setPageAddressAndVolume(0, null);660 buffer.setPageAddressAndVolume(0, null);
658 }661 }
659662
660663
=== modified file 'src/main/java/com/persistit/Key.java'
--- src/main/java/com/persistit/Key.java 2012-08-24 13:57:19 +0000
+++ src/main/java/com/persistit/Key.java 2012-11-01 20:16:23 +0000
@@ -1146,7 +1146,7 @@
1146 }1146 }
11471147
1148 /**1148 /**
1149 * Compares a bounded subarray of the backing byte array for this1149 * Compare a bounded subarray of the backing byte array for this
1150 * <code>Key</code> with another <code>Key</code>. This method is intended1150 * <code>Key</code> with another <code>Key</code>. This method is intended
1151 * for use within the library and is generally not useful for applications.1151 * for use within the library and is generally not useful for applications.
1152 * 1152 *
@@ -1162,8 +1162,9 @@
1162 * to the corresponding fragment of the supplied <code>Key</code>.1162 * to the corresponding fragment of the supplied <code>Key</code>.
1163 */1163 */
1164 public int compareKeyFragment(final Key key, final int fragmentStart, final int fragmentSize) {1164 public int compareKeyFragment(final Key key, final int fragmentStart, final int fragmentSize) {
1165 if (key == this)1165 if (key == this) {
1166 return 0;1166 return 0;
1167 }
1167 final int size1 = this._size;1168 final int size1 = this._size;
1168 final int size2 = key.getEncodedSize();1169 final int size2 = key.getEncodedSize();
1169 final byte[] bytes1 = this.getEncodedBytes();1170 final byte[] bytes1 = this.getEncodedBytes();
@@ -1178,8 +1179,9 @@
1178 for (int i = fragmentStart; i < size; i++) {1179 for (int i = fragmentStart; i < size; i++) {
1179 final int b1 = bytes1[i] & 0xFF;1180 final int b1 = bytes1[i] & 0xFF;
1180 final int b2 = bytes2[i] & 0xFF;1181 final int b2 = bytes2[i] & 0xFF;
1181 if (b1 != b2)1182 if (b1 != b2) {
1182 return b1 - b2;1183 return b1 - b2;
1184 }
1183 }1185 }
1184 if (size == fragmentSize + fragmentStart)1186 if (size == fragmentSize + fragmentStart)
1185 return 0;1187 return 0;
@@ -1191,6 +1193,42 @@
1191 }1193 }
11921194
1193 /**1195 /**
1196 * Compare the next key segment of this key to the next key segment of the
1197 * supplied Key. The next key segment is determined by the current index of
1198 * the key and can be set using the {@link #setIndex(int)} method. Returns a
1199 * positive integer if the next segment of this <code>Key</code> is larger
1200 * than the next segment of the supplied <code>Key</code>, a negative
1201 * integer if it is smaller, or zero if the segments are equal.
1202 *
1203 * @param key
1204 * @return the comparison result
1205 */
1206 public int compareKeySegment(final Key key) {
1207 if (key == this) {
1208 return 0;
1209 }
1210 final byte[] bytes1 = this.getEncodedBytes();
1211 final byte[] bytes2 = key.getEncodedBytes();
1212 final int index1 = this.getIndex();
1213 final int index2 = key.getIndex();
1214 final int count1 = this.getEncodedSize() - this.getIndex();
1215 final int count2 = key.getEncodedSize() - key.getIndex();
1216 final int count = Math.min(count1, count2);
1217
1218 for (int i = 0; i < count; i++) {
1219 final int b1 = bytes1[i + index1] & 0xFF;
1220 final int b2 = bytes2[i + index2] & 0xFF;
1221 if (b1 != b2) {
1222 return b1 - b2;
1223 }
1224 if (b1 == 0) {
1225 return 0;
1226 }
1227 }
1228 return count1 - count2;
1229 }
1230
1231 /**
1194 * Returns the index of the first encoded byte of this key that is different1232 * Returns the index of the first encoded byte of this key that is different
1195 * than the corresponding byte of the supplied <code>Key</code>. If all1233 * than the corresponding byte of the supplied <code>Key</code>. If all
1196 * bytes match, then returns the encoded length of the shorter key.1234 * bytes match, then returns the encoded length of the shorter key.
@@ -2037,6 +2075,25 @@
2037 }2075 }
20382076
2039 /**2077 /**
2078 * Append the next key segment of the supplied <code>Key</code> to this
2079 * <code>Key</code. The next key segment is determined by the current index
2080 * of the key and can be set using the {@link #setIndex(int)} method.
2081 *
2082 * @param key
2083 */
2084 public void appendKeySegment(final Key key) {
2085 int length = 0;
2086 for (int index = key.getIndex(); index < key.getEncodedSize(); index++) {
2087 length++;
2088 if (key.getEncodedBytes()[index] == 0) {
2089 break;
2090 }
2091 }
2092 System.arraycopy(key.getEncodedBytes(), key.getIndex(), _bytes, _size, length);
2093 _size += length;
2094 }
2095
2096 /**
2040 * Replaces the final key segment with the supplied boolean value. If the2097 * Replaces the final key segment with the supplied boolean value. If the
2041 * key is currently empty, this method simply appends the value. This method2098 * key is currently empty, this method simply appends the value. This method
2042 * is equivalent to invoking {@link #cut} and then {@link #append(boolean)}.2099 * is equivalent to invoking {@link #cut} and then {@link #append(boolean)}.
20432100
=== added file 'src/test/java/com/persistit/stress/InsertBigLoad.java'
--- src/test/java/com/persistit/stress/InsertBigLoad.java 1970-01-01 00:00:00 +0000
+++ src/test/java/com/persistit/stress/InsertBigLoad.java 2012-11-01 20:16:23 +0000
@@ -0,0 +1,55 @@
1/**
2 * Copyright © 2012 Akiban Technologies, Inc. All rights reserved.
3 *
4 * This program and the accompanying materials are made available
5 * under the terms of the Eclipse Public License v1.0 which
6 * accompanies this distribution, and is available at
7 * http://www.eclipse.org/legal/epl-v10.html
8 *
9 * This program may also be available under different license terms.
10 * For more information, see www.akiban.com or contact licensing@akiban.com.
11 *
12 * Contributors:
13 * Akiban Technologies, Inc.
14 */
15
16package com.persistit.stress;
17
18import com.persistit.Configuration;
19import com.persistit.Persistit;
20import com.persistit.Transaction.CommitPolicy;
21import com.persistit.stress.unit.BigLoad;
22
23public class InsertBigLoad extends AbstractSuite {
24
25 static String name() {
26 return InsertBigLoad.class.getSimpleName();
27 }
28
29 public static void main(final String[] args) throws Exception {
30 new InsertBigLoad(args).runTest();
31 }
32
33 public InsertBigLoad(final String[] args) {
34 super(name(), args);
35 }
36
37 @Override
38 public void runTest() throws Exception {
39
40 deleteFiles(substitute("$datapath$/persistit*"));
41
42 add(new BigLoad("records=10000000 buckets=10"));
43
44 final Configuration config = makeConfiguration(16384, "50000", CommitPolicy.SOFT);
45 config.setTmpVolMaxSize(100000000000l);
46 final Persistit persistit = new Persistit();
47 persistit.initialize(config);
48
49 try {
50 execute(persistit);
51 } finally {
52 persistit.close();
53 }
54 }
55}
056
=== added file 'src/test/java/com/persistit/stress/unit/BigLoad.java'
--- src/test/java/com/persistit/stress/unit/BigLoad.java 1970-01-01 00:00:00 +0000
+++ src/test/java/com/persistit/stress/unit/BigLoad.java 2012-11-01 20:16:23 +0000
@@ -0,0 +1,243 @@
1/**
2 * Copyright © 2012 Akiban Technologies, Inc. All rights reserved.
3 *
4 * This program and the accompanying materials are made available
5 * under the terms of the Eclipse Public License v1.0 which
6 * accompanies this distribution, and is available at
7 * http://www.eclipse.org/legal/epl-v10.html
8 *
9 * This program may also be available under different license terms.
10 * For more information, see www.akiban.com or contact licensing@akiban.com.
11 *
12 * Contributors:
13 * Akiban Technologies, Inc.
14 */
15
16package com.persistit.stress.unit;
17
18import java.util.Random;
19import java.util.SortedMap;
20import java.util.TreeMap;
21
22import com.persistit.Exchange;
23import com.persistit.Key;
24import com.persistit.Persistit;
25import com.persistit.Volume;
26import com.persistit.exception.PersistitException;
27import com.persistit.stress.AbstractStressTest;
28import com.persistit.util.ArgParser;
29import com.persistit.util.Util;
30
31/**
32 * <p>
33 * Simulate loading a large set (e.g., 500M) of records with large random keys.
34 * Because straightforward insertion results in highly randomized page access
35 * after the database size has exceed the amount of buffer pool memory space,
36 * this demo creates smaller sorted sets of keys and then merges them to create
37 * the final Tree in sequential order. As a side-effect, the final tree is also
38 * physically coherent in that the logical and physical order of keys disk are
39 * closely aligned.
40 * </p>
41 * <p>
42 * This class can be run stand-alone through its static main method, or within
43 * the stress test suite.
44 * </p>
45 *
46 * @author peter
47 */
48public class BigLoad extends AbstractStressTest {
49
50 private static final Random RANDOM = new Random();
51
52 private int totalRecords;
53 private int recordsPerBucket;
54
55 /**
56 * A Comparable wrapper for an Exchange. An instance of this class may be
57 * held in a SortedMap only if the Key of the Exchange does not change. In
58 * this example, the ComparableExchangeHolder is always removed from the
59 * TreeMap before the Key changes and then reinserted into a new location
60 * after the key has changed.
61 */
62 static class ComparableExchangeHolder implements Comparable<ComparableExchangeHolder> {
63
64 final Exchange exchange;
65
66 ComparableExchangeHolder(final Exchange exchange) {
67 this.exchange = exchange;
68 }
69
70 @Override
71 public int compareTo(final ComparableExchangeHolder ceh) {
72 final Key k1 = exchange.getKey();
73 final Key k2 = ceh.exchange.getKey();
74 return k1.compareTo(k2);
75 }
76 }
77
78 public BigLoad(final int totalRecords, final int buckets) {
79 super("");
80 this.totalRecords = totalRecords;
81 this.recordsPerBucket = totalRecords / buckets;
82 }
83
84 public void load(final Persistit db) throws PersistitException {
85 final long startLoadTime = System.nanoTime();
86 final Volume sortVolume = db.createTemporaryVolume();
87 final Exchange resultExchange = db.getExchange("persistit", "sorted", true);
88 System.out.printf("Loading %,d records into %,d buckets\n", totalRecords, totalRecords / recordsPerBucket);
89 final int bucketCount = loadBuckets(db, sortVolume);
90 final long endLoadTime = System.nanoTime();
91
92 System.out.printf("Merging %,d records from %,d buckets into main database\n", totalRecords, bucketCount);
93 mergeBuckets(db, bucketCount, sortVolume, resultExchange);
94 final long endMergeTime = System.nanoTime();
95 System.out.printf("Merged %,d records in %,dms\n", totalRecords, (endMergeTime - endLoadTime) / Util.NS_PER_MS);
96 sortVolume.close();
97 System.out.printf("Counting keys in main database (100M keys per dot) ");
98 resultExchange.clear().append(Key.BEFORE);
99 long count = 0;
100 while (resultExchange.next()) {
101 count++;
102 if ((count % 100000000) == 0) {
103 System.out.print(".");
104 System.out.flush();
105 }
106 }
107 final long endCountTime = System.nanoTime();
108 System.out.printf("\nCounted %,d keys in the main database in %,dms\n", count, (endCountTime - endMergeTime)
109 / Util.NS_PER_MS);
110 System.out.printf("Total time to load, merge and count %,d records is %,dms", totalRecords,
111 (endCountTime - startLoadTime) / Util.NS_PER_MS);
112 }
113
114 private int loadBuckets(final Persistit db, final Volume volume) throws PersistitException {
115 long bucketStartTime = 0;
116 int bucket = 0;
117 Exchange ex = null;
118 for (int i = 0; i < totalRecords; i++) {
119 if ((i % recordsPerBucket) == 0) {
120 final long now = System.nanoTime();
121 if (i > 0) {
122 System.out.printf("Loaded bucket %,5d in %,12dms\n", bucket, (now - bucketStartTime)
123 / Util.NS_PER_MS);
124 }
125 bucketStartTime = now;
126 bucket++;
127 ex = db.getExchange(volume, "sort" + bucket, true);
128 }
129 ex.clear().append(randomKey());
130 ex.store();
131 }
132 return bucket;
133 }
134
135 private void mergeBuckets(final Persistit db, final int bucketCount, final Volume sortVolume, final Exchange to)
136 throws PersistitException {
137 final long startLoadTime = System.nanoTime();
138 int loaded = 0;
139
140 final SortedMap<ComparableExchangeHolder, Integer> sortMap = new TreeMap<ComparableExchangeHolder, Integer>();
141 /*
142 * Load the sortMap using as keys the first key of each bucket.
143 */
144 for (int bucket = 1; bucket <= bucketCount; bucket++) {
145 final Exchange ex = db.getExchange(sortVolume, "sort" + bucket, false);
146 if (ex.append(Key.BEFORE).next()) {
147 final Integer duplicate = sortMap.put(new ComparableExchangeHolder(ex), bucket);
148 showDuplicate(duplicate, bucket, ex);
149 }
150 }
151
152 while (!sortMap.isEmpty()) {
153 final ComparableExchangeHolder ceh = sortMap.firstKey();
154 final int bucket = sortMap.remove(ceh);
155 ceh.exchange.getKey().copyTo(to.getKey());
156 if (ceh.exchange.next()) {
157 final Integer duplicate = sortMap.put(ceh, bucket);
158 showDuplicate(duplicate, bucket, ceh.exchange);
159 }
160 to.store();
161 if ((++loaded % 10000000) == 0) {
162 System.out.printf("Merged %,d records in %,dms\n", loaded, (System.nanoTime() - startLoadTime)
163 / Util.NS_PER_MS);
164 }
165 }
166 }
167
168 private String randomKey() {
169 return String.format("%020dxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
170 (RANDOM.nextLong() & Long.MAX_VALUE));
171 }
172
173 private void showDuplicate(final Integer bucket1, final int bucket2, final Exchange ex) {
174 if (bucket1 != null) {
175 System.out.printf("Duplicate key %s in buckets %,d and %,d\n", ex.getKey(), bucket1, bucket2);
176 }
177 }
178
179 /**
180 * Arguments:
181 *
182 * records - total records to load
183 *
184 * buckets - number of subdivisions to create while loading
185 *
186 * propertiesPath - path to properties file for Persistit initialization
187 *
188 * @param args
189 * @throws Exception
190 */
191
192 public static void main(final String[] args) throws Exception {
193 final int records = args.length > 0 ? Integer.parseInt(args[0]) : 1000000;
194 final int buckets = args.length > 1 ? Integer.parseInt(args[1]) : 100;
195 final BigLoad bl = new BigLoad(records, buckets);
196 final Persistit db = new Persistit();
197 if (args.length > 2) {
198 db.initialize(args[2]);
199 } else {
200 db.initialize();
201 }
202 try {
203 bl.load(db);
204 } finally {
205 db.close();
206 }
207 }
208
209 /*
210 * ----------------------------------------------------------------------
211 *
212 * Stuff below this line is required to run within the stress test suite
213 *
214 * ----------------------------------------------------------------------
215 */
216
217 public BigLoad(final String argsString) {
218 super(argsString);
219 }
220
221 private final static String[] ARGS_TEMPLATE = { "records|int:1000000:1:1000000000|Total records to create",
222 "buckets|int:100:1:1000000|Number of sort buckets", "tmpdir|string:|Temporary volume path" };
223
224 /**
225 * Method to parse stress test arguments passed by the stress test suite.
226 */
227 @Override
228 public void setUp() throws Exception {
229 super.setUp();
230 final ArgParser ap = new ArgParser("com.persistit.BigLoad", _args, ARGS_TEMPLATE).strict();
231 totalRecords = ap.getIntValue("records");
232 recordsPerBucket = totalRecords / ap.getIntValue("buckets");
233 final String path = ap.getStringValue("tmpdir");
234 if (path != null && !path.isEmpty()) {
235 getPersistit().getConfiguration().setTmpVolDir(path);
236 }
237 }
238
239 @Override
240 protected void executeTest() throws Exception {
241 load(getPersistit());
242 }
243}
0244
=== modified file 'src/test/java/com/persistit/unit/KeyTest1.java'
--- src/test/java/com/persistit/unit/KeyTest1.java 2012-08-24 13:57:19 +0000
+++ src/test/java/com/persistit/unit/KeyTest1.java 2012-11-01 20:16:23 +0000
@@ -945,6 +945,49 @@
945 }945 }
946 }946 }
947947
948 @Test
949 public void testCompareKeySegment() throws Exception {
950 final Key key1 = newKey();
951 final Key key2 = newKey();
952 key1.append(1).append(2).append(3).append("abc");
953 key2.append(3).append(2).append(1).append("abcd");
954 key1.indexTo(1);
955 key2.indexTo(1);
956 assertTrue("Should be == 0", 0 == key1.compareKeySegment(key2));
957 assertTrue("Should be == 0", 0 == key2.compareKeySegment(key1));
958 key1.indexTo(0);
959 assertTrue("Should be < 0", 0 > key1.compareKeySegment(key2));
960 assertTrue("Should be > 0", 0 < key2.compareKeySegment(key1));
961 key1.indexTo(2);
962 assertTrue("Should be > 0", 0 < key1.compareKeySegment(key2));
963 assertTrue("Should be < 0", 0 > key2.compareKeySegment(key1));
964 key1.indexTo(3);
965 assertTrue("Should be > 0", 0 < key1.compareKeySegment(key2));
966 assertTrue("Should be < 0", 0 > key2.compareKeySegment(key1));
967 key2.indexTo(3);
968 assertTrue("Should be < 0", 0 > key1.compareKeySegment(key2));
969 assertTrue("Should be > 0", 0 < key2.compareKeySegment(key1));
970 }
971
972 @Test
973 public void testAppendKeySegment() throws Exception {
974 final Key key1 = newKey();
975 final Key key2 = newKey();
976 key1.append(1).append(2).append(3).append("abc");
977 key1.indexTo(3);
978 key2.appendKeySegment(key1);
979 key1.indexTo(2);
980 key2.appendKeySegment(key1);
981 key1.indexTo(1);
982 key2.appendKeySegment(key1);
983 key1.indexTo(0);
984 key2.appendKeySegment(key1);
985 assertEquals("Key value incorrect", "{\"abc\",3,2,1}", key2.toString());
986 key1.indexTo(3);
987 key1.appendKeySegment(key1);
988 assertEquals("Key value incorrect", "{1,2,3,\"abc\",\"abc\"}", key1.toString());
989 }
990
948 private static boolean doubleEquals(final double f1, final double f2) {991 private static boolean doubleEquals(final double f1, final double f2) {
949 if (Double.isNaN(f1)) {992 if (Double.isNaN(f1)) {
950 return Double.isNaN(f2);993 return Double.isNaN(f2);

Subscribers

People subscribed via source and target branches