Merge lp:~jamesh/mediascanner/ranking into lp:mediascanner/v2

Proposed by James Henstridge
Status: Merged
Approved by: James Henstridge
Approved revision: 187
Merged at revision: 185
Proposed branch: lp:~jamesh/mediascanner/ranking
Merge into: lp:mediascanner/v2
Diff against target: 213 lines (+122/-16)
2 files modified
src/mediascanner/MediaStore.cc (+68/-4)
test/test_mediastore.cc (+54/-12)
To merge this branch: bzr merge lp:~jamesh/mediascanner/ranking
Reviewer Review Type Date Requested Status
PS Jenkins bot (community) continuous-integration Approve
Mediascanner Team Pending
Review via email: mp+195964@code.launchpad.net

Commit message

Add a ranking function based on the one in the SQLite documentation (http://sqlite.org/fts3.html#appendix_a), and use it to rank the results of MediaStore.query()

Description of the change

Add a ranking function based on the one in the SQLite documentation (http://sqlite.org/fts3.html#appendix_a), and use it to rank the results of MediaStore.query()

To post a comment you must log in.
Revision history for this message
PS Jenkins bot (ps-jenkins) wrote :
review: Approve (continuous-integration)
lp:~jamesh/mediascanner/ranking updated
187. By James Henstridge

Add a test to show that the ranking is being applied and weights matches
in the various fields correctly.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/mediascanner/MediaStore.cc'
2--- src/mediascanner/MediaStore.cc 2013-11-13 09:07:07 +0000
3+++ src/mediascanner/MediaStore.cc 2013-11-21 05:59:55 +0000
4@@ -24,9 +24,9 @@
5 #include "sqliteutils.hh"
6 #include"utils.hh"
7 #include <sqlite3.h>
8+#include <cstdint>
9 #include <cstdio>
10 #include <stdexcept>
11-#include<cassert>
12
13 using namespace std;
14
15@@ -52,6 +52,65 @@
16 query.step();
17 }
18
19+/* ranking function adapted from http://sqlite.org/fts3.html#appendix_a */
20+static void rankfunc(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) {
21+ const int32_t *aMatchinfo; /* Return value of matchinfo() */
22+ int32_t nCol; /* Number of columns in the table */
23+ int32_t nPhrase; /* Number of phrases in the query */
24+ int32_t iPhrase; /* Current phrase */
25+ double score = 0.0; /* Value to return */
26+
27+ /* Check that the number of arguments passed to this function is correct.
28+ ** If not, jump to wrong_number_args. Set aMatchinfo to point to the array
29+ ** of unsigned integer values returned by FTS function matchinfo. Set
30+ ** nPhrase to contain the number of reportable phrases in the users full-text
31+ ** query, and nCol to the number of columns in the table.
32+ */
33+ if( nVal<1 ) goto wrong_number_args;
34+ aMatchinfo = static_cast<const int32_t*>(sqlite3_value_blob(apVal[0]));
35+ nPhrase = aMatchinfo[0];
36+ nCol = aMatchinfo[1];
37+ if( nVal!=(1+nCol) ) goto wrong_number_args;
38+
39+ /* Iterate through each phrase in the users query. */
40+ for(iPhrase=0; iPhrase<nPhrase; iPhrase++){
41+ int32_t iCol; /* Current column */
42+
43+ /* Now iterate through each column in the users query. For each column,
44+ ** increment the relevancy score by:
45+ **
46+ ** (<hit count> / <global hit count>) * <column weight>
47+ **
48+ ** aPhraseinfo[] points to the start of the data for phrase iPhrase. So
49+ ** the hit count and global hit counts for each column are found in
50+ ** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively.
51+ */
52+ const int32_t *aPhraseinfo = &aMatchinfo[2 + iPhrase*nCol*3];
53+ for(iCol=0; iCol<nCol; iCol++){
54+ int32_t nHitCount = aPhraseinfo[3*iCol];
55+ int32_t nGlobalHitCount = aPhraseinfo[3*iCol+1];
56+ double weight = sqlite3_value_double(apVal[iCol+1]);
57+ if( nHitCount>0 ){
58+ score += ((double)nHitCount / (double)nGlobalHitCount) * weight;
59+ }
60+ }
61+ }
62+
63+ sqlite3_result_double(pCtx, score);
64+ return;
65+
66+ /* Jump here if the wrong number of arguments are passed to this function */
67+wrong_number_args:
68+ sqlite3_result_error(pCtx, "wrong number of arguments to function rank()", -1);
69+}
70+
71+static void register_functions(sqlite3 *db) {
72+ if (sqlite3_create_function(db, "rank", -1, SQLITE_ANY, NULL,
73+ rankfunc, NULL, NULL) != SQLITE_OK) {
74+ throw runtime_error(sqlite3_errmsg(db));
75+ }
76+}
77+
78 static void execute_sql(sqlite3 *db, const string &cmd) {
79 char *errmsg = nullptr;
80 if(sqlite3_exec(db, cmd.c_str(), nullptr, nullptr, &errmsg) != SQLITE_OK) {
81@@ -142,6 +201,7 @@
82 throw runtime_error(sqlite3_errmsg(p->db));
83 }
84 register_tokenizer(p->db);
85+ register_functions(p->db);
86 int detectedSchemaVersion = getSchemaVersion(p->db);
87 if(access == MS_READ_WRITE) {
88 if(detectedSchemaVersion != schemaVersion) {
89@@ -221,9 +281,13 @@
90
91 vector<MediaFile> MediaStore::query(const std::string &core_term, MediaType type) {
92 Statement query(p->db, R"(
93-SELECT filename, title, date, artist, album, album_artist, track_number, duration, type FROM media
94-WHERE rowid IN (SELECT docid FROM media_fts WHERE title MATCH ?)
95-AND type == ?
96+SELECT filename, title, date, artist, album, album_artist, track_number, duration, type
97+ FROM media JOIN (
98+ SELECT docid, rank(matchinfo(media_fts), 1.0, 0.5, 0.75) AS rank
99+ FROM media_fts WHERE media_fts MATCH ?
100+ ) AS ranktable ON (media.rowid = ranktable.docid)
101+ WHERE type == ?
102+ ORDER BY ranktable.rank DESC
103 )");
104 query.bind(1, core_term + "*");
105 query.bind(2, (int)type);
106
107=== modified file 'test/test_mediastore.cc'
108--- test/test_mediastore.cc 2013-11-18 07:37:17 +0000
109+++ test/test_mediastore.cc 2013-11-21 05:59:55 +0000
110@@ -55,18 +55,18 @@
111 MediaFile video1("a", "b", "1900", "c", "d", "e", 0, 5, VideoMedia);
112 MediaFile video2("aa", "b", "1900", "c", "d", "e", 0, 5, VideoMedia);
113
114- ASSERT_EQ(audio1, audio1);
115- ASSERT_EQ(video1, video1);
116+ EXPECT_EQ(audio1, audio1);
117+ EXPECT_EQ(video1, video1);
118
119- ASSERT_NE(audio1, audio2);
120- ASSERT_NE(audio1, video1);
121- ASSERT_NE(audio2, video1);
122- ASSERT_NE(audio2, video2);
123+ EXPECT_NE(audio1, audio2);
124+ EXPECT_NE(audio1, video1);
125+ EXPECT_NE(audio2, video1);
126+ EXPECT_NE(audio2, video2);
127 }
128
129 TEST_F(MediaStoreTest, mediafile_uri) {
130 MediaFile media("/path/to/file.ogg");
131- ASSERT_EQ(media.getUri(), "file:///path/to/file.ogg");
132+ EXPECT_EQ(media.getUri(), "file:///path/to/file.ogg");
133 }
134
135 TEST_F(MediaStoreTest, roundtrip) {
136@@ -77,10 +77,52 @@
137 store.insert(video);
138 vector<MediaFile> result = store.query("bbb", AudioMedia);
139 ASSERT_EQ(result.size(), 1);
140- ASSERT_EQ(result[0], audio);
141+ EXPECT_EQ(result[0], audio);
142 result = store.query("bbb", VideoMedia);
143 ASSERT_EQ(result.size(), 1);
144- ASSERT_EQ(result[0], video);
145+ EXPECT_EQ(result[0], video);
146+}
147+
148+TEST_F(MediaStoreTest, query_by_album) {
149+ MediaFile audio("/path/foo.ogg", "title", "1900-01-01", "artist", "album", "albumartist", 3, 5, AudioMedia);
150+ MediaStore store(":memory:", MS_READ_WRITE);
151+ store.insert(audio);
152+
153+ vector<MediaFile> result = store.query("album", AudioMedia);
154+ ASSERT_EQ(result.size(), 1);
155+ EXPECT_EQ(result[0], audio);
156+ }
157+
158+TEST_F(MediaStoreTest, query_by_artist) {
159+ MediaFile audio("/path/foo.ogg", "title", "1900-01-01", "artist", "album", "albumartist", 3, 5, AudioMedia);
160+ MediaStore store(":memory:", MS_READ_WRITE);
161+ store.insert(audio);
162+
163+ vector<MediaFile> result = store.query("artist", AudioMedia);
164+ ASSERT_EQ(result.size(), 1);
165+ EXPECT_EQ(result[0], audio);
166+ }
167+
168+TEST_F(MediaStoreTest, query_ranking) {
169+ MediaFile audio1("/path/foo1.ogg", "title", "1900-01-01", "artist", "album", "albumartist", 3, 5, AudioMedia);
170+ MediaFile audio2("/path/foo2.ogg", "title aaa", "1900-01-01", "artist", "album", "albumartist", 3, 5, AudioMedia);
171+ MediaFile audio3("/path/foo3.ogg", "title", "1900-01-01", "artist aaa", "album", "albumartist", 3, 5, AudioMedia);
172+ MediaFile audio4("/path/foo4.ogg", "title", "1900-01-01", "artist", "album aaa", "albumartist", 3, 5, AudioMedia);
173+ MediaFile audio5("/path/foo5.ogg", "title aaa", "1900-01-01", "artist aaa", "album aaa", "albumartist", 3, 5, AudioMedia);
174+
175+ MediaStore store(":memory:", MS_READ_WRITE);
176+ store.insert(audio1);
177+ store.insert(audio2);
178+ store.insert(audio3);
179+ store.insert(audio4);
180+ store.insert(audio5);
181+
182+ vector<MediaFile> result = store.query("aaa", AudioMedia);
183+ ASSERT_EQ(result.size(), 4);
184+ EXPECT_EQ(result[0], audio5); // Term appears in title, artist and album
185+ EXPECT_EQ(result[1], audio2); // title has highest weighting
186+ EXPECT_EQ(result[2], audio4); // then album
187+ EXPECT_EQ(result[3], audio3); // then artist
188 }
189
190 TEST_F(MediaStoreTest, unmount) {
191@@ -95,7 +137,7 @@
192 store.archiveItems("/media/username");
193 result = store.query("bbb", AudioMedia);
194 ASSERT_EQ(result.size(), 1);
195- ASSERT_EQ(result[0], audio2);
196+ EXPECT_EQ(result[0], audio2);
197
198 store.restoreItems("/media/username");
199 result = store.query("bbb", AudioMedia);
200@@ -106,11 +148,11 @@
201 string source("_a.b(c)[d]{e}f.mp3");
202 string correct = {" a b c d e f"};
203 string result = filenameToTitle(source);
204- ASSERT_EQ(correct, result);
205+ EXPECT_EQ(correct, result);
206
207 string unquoted(R"(It's a living.)");
208 string quoted(R"('It''s a living.')");
209- ASSERT_EQ(sqlQuote(unquoted), quoted);
210+ EXPECT_EQ(sqlQuote(unquoted), quoted);
211 }
212
213 TEST_F(MediaStoreTest, queryAlbums) {

Subscribers

People subscribed via source and target branches