Merge lp:~gholt/swift/nowalyesindex into lp:~hudson-openstack/swift/trunk

Proposed by gholt
Status: Merged
Approved by: Jay Payne
Approved revision: 228
Merged at revision: 250
Proposed branch: lp:~gholt/swift/nowalyesindex
Merge into: lp:~hudson-openstack/swift/trunk
Diff against target: 303 lines (+81/-122)
1 file modified
swift/common/db.py (+81/-122)
To merge this branch: bzr merge lp:~gholt/swift/nowalyesindex
Reviewer Review Type Date Requested Status
Jay Payne (community) lab tests Approve
Greg Lange (community) Approve
Review via email: mp+50358@code.launchpad.net

Description of the change

New index code for the dbs; should improve performance for containers with a lot of recent deletes.

To post a comment you must log in.
Revision history for this message
Greg Lange (greglange) wrote :

I didn't test to see if the new index/sql is faster and I don't know what the plan for this branch is.

However, the code looks fine.

review: Approve
lp:~gholt/swift/nowalyesindex updated
225. By gholt

Merged from trunk

226. By gholt

Merged from trunk

227. By gholt

Merge from trunk

228. By gholt

Merged from trunk

Revision history for this message
gholt (gholt) wrote :

letterj is currently testing this branch. things look good so far.

Revision history for this message
Jay Payne (letterj) wrote :

Tested in the Castle Lab. Put requests per second are a little slower for new containers but faster for every other scenario.

review: Approve (lab tests)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'swift/common/db.py'
2--- swift/common/db.py 2011-02-17 17:30:41 +0000
3+++ swift/common/db.py 2011-03-11 19:43:46 +0000
4@@ -165,6 +165,7 @@
5 self.logger = logger or logging.getLogger()
6 self.account = account
7 self.container = container
8+ self._db_version = -1
9
10 def initialize(self, put_timestamp=None):
11 """
12@@ -607,7 +608,7 @@
13 conn.executescript("""
14 CREATE TABLE object (
15 ROWID INTEGER PRIMARY KEY AUTOINCREMENT,
16- name TEXT UNIQUE,
17+ name TEXT,
18 created_at TEXT,
19 size INTEGER,
20 content_type TEXT,
21@@ -615,7 +616,7 @@
22 deleted INTEGER DEFAULT 0
23 );
24
25- CREATE INDEX ix_object_deleted ON object (deleted);
26+ CREATE INDEX ix_object_deleted_name ON object (deleted, name);
27
28 CREATE TRIGGER object_insert AFTER INSERT ON object
29 BEGIN
30@@ -678,6 +679,15 @@
31 ''', (self.account, self.container, normalize_timestamp(time.time()),
32 str(uuid4()), put_timestamp))
33
34+ def get_db_version(self, conn):
35+ if self._db_version == -1:
36+ self._db_version = 0
37+ for row in conn.execute('''
38+ SELECT name FROM sqlite_master
39+ WHERE name = 'ix_object_deleted_name' '''):
40+ self._db_version = 1
41+ return self._db_version
42+
43 def _newid(self, conn):
44 conn.execute('''
45 UPDATE container_stat
46@@ -910,37 +920,6 @@
47 ''', (put_timestamp, delete_timestamp, object_count, bytes_used))
48 conn.commit()
49
50- def get_random_objects(self, max_count=100):
51- """
52- Get random objects from the DB. This is used by the container_auditor
53- when testing random objects for existence.
54-
55- :param max_count: maximum number of objects to get
56-
57- :returns: list of object names
58- """
59- try:
60- self._commit_puts()
61- except LockTimeout:
62- if not self.stale_reads_ok:
63- raise
64- rv = []
65- with self.get() as conn:
66- row = conn.execute('''
67- SELECT ROWID FROM object ORDER BY ROWID DESC LIMIT 1
68- ''').fetchone()
69- if not row:
70- return []
71- max_rowid = row['ROWID']
72- for _junk in xrange(min(max_count, max_rowid)):
73- row = conn.execute('''
74- SELECT name FROM object WHERE ROWID >= ? AND +deleted = 0
75- LIMIT 1
76- ''', (randint(0, max_rowid),)).fetchone()
77- if row:
78- rv.append(row['name'])
79- return list(set(rv))
80-
81 def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter,
82 path=None, format=None):
83 """
84@@ -988,7 +967,11 @@
85 elif prefix:
86 query += ' name >= ? AND'
87 query_args.append(prefix)
88- query += ' +deleted = 0 ORDER BY name LIMIT ?'
89+ if self.get_db_version(conn) < 1:
90+ query += ' +deleted = 0'
91+ else:
92+ query += ' deleted = 0'
93+ query += ' ORDER BY name LIMIT ?'
94 query_args.append(limit - len(results))
95 curs = conn.execute(query, query_args)
96 curs.row_factory = None
97@@ -1035,19 +1018,23 @@
98 with self.get() as conn:
99 max_rowid = -1
100 for rec in item_list:
101- conn.execute('''
102- DELETE FROM object WHERE name = ? AND
103- (created_at < ?)
104- ''', (rec['name'], rec['created_at']))
105- try:
106+ query = '''
107+ DELETE FROM object
108+ WHERE name = ? AND (created_at < ?)
109+ '''
110+ if self.get_db_version(conn) >= 1:
111+ query += ' AND deleted IN (0, 1)'
112+ conn.execute(query, (rec['name'], rec['created_at']))
113+ query = 'SELECT 1 FROM object WHERE name = ?'
114+ if self.get_db_version(conn) >= 1:
115+ query += ' AND deleted IN (0, 1)'
116+ if not conn.execute(query, (rec['name'],)).fetchall():
117 conn.execute('''
118 INSERT INTO object (name, created_at, size,
119 content_type, etag, deleted)
120 VALUES (?, ?, ?, ?, ?, ?)
121 ''', ([rec['name'], rec['created_at'], rec['size'],
122 rec['content_type'], rec['etag'], rec['deleted']]))
123- except sqlite3.IntegrityError:
124- pass
125 if source:
126 max_rowid = max(max_rowid, rec['ROWID'])
127 if source:
128@@ -1091,7 +1078,7 @@
129 conn.executescript("""
130 CREATE TABLE container (
131 ROWID INTEGER PRIMARY KEY AUTOINCREMENT,
132- name TEXT UNIQUE,
133+ name TEXT,
134 put_timestamp TEXT,
135 delete_timestamp TEXT,
136 object_count INTEGER,
137@@ -1099,8 +1086,9 @@
138 deleted INTEGER DEFAULT 0
139 );
140
141- CREATE INDEX ix_container_deleted ON container (deleted);
142- CREATE INDEX ix_container_name ON container (name);
143+ CREATE INDEX ix_container_deleted_name ON
144+ container (deleted, name);
145+
146 CREATE TRIGGER container_insert AFTER INSERT ON container
147 BEGIN
148 UPDATE account_stat
149@@ -1164,6 +1152,15 @@
150 ''', (self.account, normalize_timestamp(time.time()), str(uuid4()),
151 put_timestamp))
152
153+ def get_db_version(self, conn):
154+ if self._db_version == -1:
155+ self._db_version = 0
156+ for row in conn.execute('''
157+ SELECT name FROM sqlite_master
158+ WHERE name = 'ix_container_deleted_name' '''):
159+ self._db_version = 1
160+ return self._db_version
161+
162 def update_put_timestamp(self, timestamp):
163 """
164 Update the put_timestamp. Only modifies it if it is greater than
165@@ -1413,38 +1410,6 @@
166 FROM account_stat
167 ''').fetchone()
168
169- def get_random_containers(self, max_count=100):
170- """
171- Get random containers from the DB. This is used by the
172- account_auditor when testing random containerss for existence.
173-
174- :param max_count: maximum number of containers to get
175-
176- :returns: list of container names
177- """
178- try:
179- self._commit_puts()
180- except LockTimeout:
181- if not self.stale_reads_ok:
182- raise
183- rv = []
184- with self.get() as conn:
185- row = conn.execute('''
186- SELECT ROWID FROM container ORDER BY ROWID DESC LIMIT 1
187- ''').fetchone()
188- if not row:
189- return []
190- max_rowid = row['ROWID']
191- for _junk in xrange(min(max_count, max_rowid)):
192- row = conn.execute('''
193- SELECT name FROM container WHERE
194- ROWID >= ? AND +deleted = 0
195- LIMIT 1
196- ''', (randint(0, max_rowid),)).fetchone()
197- if row:
198- rv.append(row['name'])
199- return list(set(rv))
200-
201 def list_containers_iter(self, limit, marker, end_marker, prefix,
202 delimiter):
203 """
204@@ -1485,7 +1450,11 @@
205 elif prefix:
206 query += ' name >= ? AND'
207 query_args.append(prefix)
208- query += ' +deleted = 0 ORDER BY name LIMIT ?'
209+ if self.get_db_version(conn) < 1:
210+ query += ' +deleted = 0'
211+ else:
212+ query += ' deleted = 0'
213+ query += ' ORDER BY name LIMIT ?'
214 query_args.append(limit - len(results))
215 curs = conn.execute(query, query_args)
216 curs.row_factory = None
217@@ -1529,51 +1498,41 @@
218 record = [rec['name'], rec['put_timestamp'],
219 rec['delete_timestamp'], rec['object_count'],
220 rec['bytes_used'], rec['deleted']]
221- try:
222- conn.execute('''
223- INSERT INTO container (name, put_timestamp,
224- delete_timestamp, object_count, bytes_used,
225- deleted)
226- VALUES (?, ?, ?, ?, ?, ?)
227- ''', record)
228- except sqlite3.IntegrityError:
229- curs = conn.execute('''
230- SELECT name, put_timestamp, delete_timestamp,
231- object_count, bytes_used, deleted
232- FROM container WHERE name = ? AND
233- (put_timestamp < ? OR delete_timestamp < ? OR
234- object_count != ? OR bytes_used != ?)''',
235- (rec['name'], rec['put_timestamp'],
236- rec['delete_timestamp'], rec['object_count'],
237- rec['bytes_used']))
238- curs.row_factory = None
239- row = curs.fetchone()
240- if row:
241- row = list(row)
242- for i in xrange(5):
243- if record[i] is None and row[i] is not None:
244- record[i] = row[i]
245- if row[1] > record[1]: # Keep newest put_timestamp
246- record[1] = row[1]
247- if row[2] > record[2]: # Keep newest delete_timestamp
248- record[2] = row[2]
249- conn.execute('DELETE FROM container WHERE name = ?',
250- (record[0],))
251- # If deleted, mark as such
252- if record[2] > record[1] and \
253- record[3] in (None, '', 0, '0'):
254- record[5] = 1
255- else:
256- record[5] = 0
257- try:
258- conn.execute('''
259- INSERT INTO container (name, put_timestamp,
260- delete_timestamp, object_count, bytes_used,
261- deleted)
262- VALUES (?, ?, ?, ?, ?, ?)
263- ''', record)
264- except sqlite3.IntegrityError:
265- continue
266+ query = '''
267+ SELECT name, put_timestamp, delete_timestamp,
268+ object_count, bytes_used, deleted
269+ FROM container WHERE name = ?
270+ '''
271+ if self.get_db_version(conn) >= 1:
272+ query += ' AND deleted IN (0, 1)'
273+ curs = conn.execute(query, (rec['name'],))
274+ curs.row_factory = None
275+ row = curs.fetchone()
276+ if row:
277+ row = list(row)
278+ for i in xrange(5):
279+ if record[i] is None and row[i] is not None:
280+ record[i] = row[i]
281+ if row[1] > record[1]: # Keep newest put_timestamp
282+ record[1] = row[1]
283+ if row[2] > record[2]: # Keep newest delete_timestamp
284+ record[2] = row[2]
285+ # If deleted, mark as such
286+ if record[2] > record[1] and \
287+ record[3] in (None, '', 0, '0'):
288+ record[5] = 1
289+ else:
290+ record[5] = 0
291+ conn.execute('''
292+ DELETE FROM container WHERE name = ? AND
293+ deleted IN (0, 1)
294+ ''', (record[0],))
295+ conn.execute('''
296+ INSERT INTO container (name, put_timestamp,
297+ delete_timestamp, object_count, bytes_used,
298+ deleted)
299+ VALUES (?, ?, ?, ?, ?, ?)
300+ ''', record)
301 if source:
302 max_rowid = max(max_rowid, rec['ROWID'])
303 if source: