Merge lp:~andrewmccarthy/unity-lens-photos/shotwellspeedup into lp:unity-lens-photos

Proposed by Andrew McCarthy
Status: Merged
Approved by: David Callé
Approved revision: 110
Merged at revision: 109
Proposed branch: lp:~andrewmccarthy/unity-lens-photos/shotwellspeedup
Merge into: lp:unity-lens-photos
Diff against target: 160 lines (+63/-36)
1 file modified
src/shotwell_scope.py (+63/-36)
To merge this branch: bzr merge lp:~andrewmccarthy/unity-lens-photos/shotwellspeedup
Reviewer Review Type Date Requested Status
David Callé (community) Approve
Review via email: mp+130652@code.launchpad.net

Description of the change

This branch is an attempt to increase the speed of searching Shotwell's database. The changes include:

- Calculating the list of "matching" event ids once, before looping through the photos
- Similarly, calculating a list of matching photo thumbnails once, before looping
- Break out of the photo loop immediately if the limit is reached
- Moving some code around to reduce duplicated or unnecessary effort

I didn't remove any now-unused methods in case they're of use in the future.

In my case (21500 photos, 1200 events, 185 tags) searches that were about 10-12 seconds are now 2-4 seconds.

Hope it's of some use!

To post a comment you must log in.
Revision history for this message
David Callé (davidc3) wrote :

+1
Thanks a lot Andrew, I can clearly see the speed improvements.
Would you mind opening a bug about Shotwell search being slow and link it to this branch? I think this could make a nice SRU.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/shotwell_scope.py'
2--- src/shotwell_scope.py 2012-10-15 15:46:12 +0000
3+++ src/shotwell_scope.py 2012-10-19 22:26:21 +0000
4@@ -120,8 +120,7 @@
5 limit = 100
6 for i in self.shotwell(search, date, limit):
7 if not self.cancel_running_search:
8- hex_id = "%x" % i[4]
9- thumb_id = ("thumb" + "0"*(16 - len(hex_id)) + hex_id)
10+ thumb_id = "thumb%016x" % i[4]
11 try:
12 extension = i[1].split('.')[-1].lower ()
13 except:
14@@ -277,8 +276,38 @@
15 data_list = []
16 db = self.getDB ()
17 if db:
18- eventcursor = db.cursor()
19 self.cancel_running_search = False
20+
21+ matching_events = []
22+ matching_tags = set()
23+ search_items = []
24+ if search:
25+ search_items = search.lower().split(" ")
26+ # Make list of event ids for event names matching ALL search items
27+ allevents = self.getAllEventNames(db)
28+ for event in allevents:
29+ match = True
30+ for item in search_items:
31+ if item not in event[1].lower():
32+ match = False
33+ break
34+ if match:
35+ matching_events.append(event[0])
36+ # Make list of thumbs where tags match all items
37+ first = True
38+ for item in search_items:
39+ newset = set()
40+ # Build set of all thumbs with tags matching this item
41+ for tag in self.tagdb:
42+ if item in tag[0].lower():
43+ newset.update(tag[1].split(','))
44+ if first:
45+ matching_tags = newset
46+ first = False
47+ else:
48+ # Keep only thumbs matching this and previous search items
49+ matching_tags.intersection_update(newset)
50+
51 try:
52 photos = self.getPhotos (db, date)
53 except:
54@@ -286,38 +315,30 @@
55 i = 0
56 for photo in photos:
57 if photo[16] != 4 and photo[16] != 8 and not self.cancel_running_search:
58- item_list = []
59- event_id = str(photo[10])
60+ pid = photo[0]
61 uri = photo[1]
62- pid = photo[0]
63- try:
64- event = self.getEventNameForEventId (db, event_id, eventcursor)
65- except:
66- event = ''
67- icon_hint = photo[1]
68+ event_id = photo[10]
69 title = photo[19]
70 if not title:
71 title = uri.split("/")[-1]
72- timestamp = photo[6]
73- if timestamp == 0:
74- timestamp = photo[5]
75- date = datetime.datetime.fromtimestamp(timestamp).strftime('%d %b %Y %H:%M')
76 match = False
77- if not search:
78+ thumb_id = "thumb%016x" % pid
79+ if not search or event_id in matching_events or thumb_id in matching_tags:
80 match = True
81 else:
82- match_list = []
83- search_items = search.split(" ")
84+ match = True
85 for item in search_items:
86- if (title.lower().find(item.lower()) > -1
87- or event.lower().find(item.lower()) > -1
88- or self.isInTagDB (item.lower (), pid)):
89- match_list.append (True)
90- else:
91- match_list.append (False)
92- if all(match_list):
93- match = True
94- if match and i < limit:
95+ if item not in title.lower():
96+ match = False
97+ break
98+ if match:
99+ icon_hint = photo[1]
100+ timestamp = photo[6]
101+ if timestamp == 0:
102+ timestamp = photo[5]
103+ date = datetime.datetime.fromtimestamp(timestamp).strftime('%d %b %Y %H:%M')
104+
105+ item_list = []
106 item_list.append(title)
107 item_list.append(uri)
108 item_list.append(icon_hint)
109@@ -325,20 +346,19 @@
110 item_list.append(pid)
111 data_list.append(item_list)
112 i += 1
113- eventcursor.close ()
114+ if i >= limit:
115+ break
116 return data_list
117
118- def isInTagDB (self, term, photo):
119- for tag in self.tagdb:
120+ def isInTagDB (self, tags, term, photo):
121+ for tag in tags:
122 if tag[0].lower().find(term) > -1:
123- hex_id = "%x" % photo
124- thumb_id = ("thumb" + "0"*(16 - len(hex_id)) + hex_id)
125+ thumb_id = "thumb%016x" % photo
126 if tag[1]:
127- if tag[1].find(thumb_id) > -1:
128+ if thumb_id in tag[1]:
129 return True
130 return False
131
132-
133 def getDB (self):
134 """Check existence of our copy of Shotwell DB"""
135 db = None
136@@ -423,6 +443,14 @@
137 event = raw_event[0]
138 return str(event)
139
140+ def getAllEventNames (self, db):
141+ """Get all non-null event names with ids"""
142+ sql='SELECT id, name FROM EventTable WHERE length(name)>0'
143+ cursor = db.cursor()
144+ events = cursor.execute(sql).fetchall()
145+ cursor.close()
146+ return events
147+
148
149 def getTitle(self, uri, title):
150 """Get date from timestamp in db or from file"""
151@@ -451,8 +479,7 @@
152
153 def getTagsForPhotoId (self, db, photo):
154 """Get all tags related to a photo"""
155- hex_id = "%x" % photo
156- thumb_id = ("thumb" + "0"*(16 - len(hex_id)) + hex_id)
157+ thumb_id = "thumb%016x" % photo
158 sql='SELECT name FROM TagTable WHERE photo_id_list LIKE ?'
159 args=['%'+thumb_id+'%']
160 cursor = db.cursor ()

Subscribers

People subscribed via source and target branches

to all changes: