Merge lp:~firgeis/slingshot/fuzzy-text-search into lp:~elementary-pantheon/slingshot/trunk

Proposed by Ezequiel Lewin on 2015-01-18
Status: Rejected
Rejected by: Cody Garver on 2015-05-29
Proposed branch: lp:~firgeis/slingshot/fuzzy-text-search
Merge into: lp:~elementary-pantheon/slingshot/trunk
Diff against target: 202 lines (+167/-2)
3 files modified
lib/synapse-core/CMakeLists.txt (+1/-0)
lib/synapse-core/fuzzy-text-search.vala (+157/-0)
lib/synapse-plugins/desktop-file-plugin.vala (+9/-2)
To merge this branch: bzr merge lp:~firgeis/slingshot/fuzzy-text-search
Reviewer Review Type Date Requested Status
Cody Garver 2015-01-18 Pending
Review via email: mp+246859@code.launchpad.net

Description of the change

Experimental code for better fuzzy text search. Need some input on performance

To post a comment you must log in.

Unmerged revisions

497. By Ezequiel Lewin on 2015-01-18

New text fuzzy matching initial

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/synapse-core/CMakeLists.txt'
2--- lib/synapse-core/CMakeLists.txt 2014-07-03 06:38:42 +0000
3+++ lib/synapse-core/CMakeLists.txt 2015-01-18 23:37:48 +0000
4@@ -26,6 +26,7 @@
5 result-set.vala
6 utils.vala
7 volume-service.vala
8+ fuzzy-text-search.vala
9 )
10
11 set(LINK_MODE STATIC)
12
13=== added file 'lib/synapse-core/fuzzy-text-search.vala'
14--- lib/synapse-core/fuzzy-text-search.vala 1970-01-01 00:00:00 +0000
15+++ lib/synapse-core/fuzzy-text-search.vala 2015-01-18 23:37:48 +0000
16@@ -0,0 +1,157 @@
17+// Copyright (C) 2014 Ezequiel Lewin
18+//
19+// This program is free software: you can redistribute it and/or modify
20+// it under the terms of the GNU General Public License as published by
21+// the Free Software Foundation, either version 3 of the License, or
22+// (at your option) any later version.
23+//
24+// This program is distributed in the hope that it will be useful,
25+// but WITHOUT ANY WARRANTY; without even the implied warranty of
26+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27+// GNU General Public License for more details.
28+//
29+// You should have received a copy of the GNU General Public License
30+// along with this program. If not, see <http://www.gnu.org/licenses/>.
31+//
32+
33+using Math;
34+
35+namespace Synapse
36+{
37+ public class StringMatch {
38+
39+ public int match_strings(string p_search, string p_searchee) {
40+ //Separate by spaces
41+ string[] search = p_search.split(" ");
42+ string[] searchee = p_searchee.split(" ");
43+
44+ //Remove spaces and add to list
45+ var search_list = new GenericArray<string>();
46+ var searchee_list = new GenericArray<string>();
47+
48+ foreach (string s in search)
49+ search_list.add(remove_repeats(s));
50+
51+ foreach (string s in searchee)
52+ searchee_list.add(remove_repeats(s));
53+
54+ //Match for individual words
55+ var scores = new List<int>();
56+ double score = 0;
57+
58+ //If there is only one word search it in the different ones
59+ //else match only correctly spaced words
60+
61+ if (search_list.length > 1) {
62+
63+ int maxlength = (search_list.length > searchee_list.length) ? searchee_list.length : search_list.length;
64+
65+ for (int i = 0; i < maxlength; i++) {
66+ score = get_distance (search_list[i], searchee_list[i]);
67+ scores.append (compute_score (score));
68+ }
69+ }
70+ else {
71+ /*foreach (string s in searchee_list) {
72+ score = get_distance (search_list[0], s);
73+ scores.append (compute_score (score));
74+ }*/
75+
76+ searchee_list.foreach ((s) => {
77+ score = get_distance (search_list[0], s);
78+ scores.append (compute_score (score));
79+ });
80+ }
81+
82+ double final_score = 0;
83+
84+ if (search_list.length > 1) {
85+ //Average for several word score
86+ foreach (int i in scores) {
87+ final_score += i;
88+ }
89+
90+ final_score = final_score / scores.length() ;
91+ }
92+ else {
93+ foreach (int i in scores) {
94+ final_score = (final_score > i) ? final_score : i;
95+ }
96+ }
97+
98+ return int.parse (final_score.to_string());
99+ }
100+
101+ //Removes repeated symbols
102+ public string remove_repeats(string instr) {
103+ var newstring = new StringBuilder ();
104+
105+ for (int i = 0; i < instr.length; i++) {
106+ if (!newstring.str.contains (instr[i].to_string ()))
107+ newstring.append_c (instr[i]);
108+ }
109+
110+ return newstring.str;
111+ }
112+
113+ //Jaro score for string matching
114+ public double get_distance(string s1, string s2) {
115+ double max_distance_d = (Math.fmax(s1.length, s2.length)) /2 - 1;
116+ int max_distance = int.parse(Math.lround(max_distance_d).to_string());
117+ int matches = 0;
118+ var order = new GenericArray<int>();
119+
120+ for (int i = 0; i < s1.length; i++) {
121+ char c = s1[i];
122+
123+ if (i - max_distance > s2.length)
124+ break;
125+
126+ int minlen = (i - max_distance).clamp(0, s2.length);
127+ int maxlen = (i + max_distance).clamp(0, s2.length);
128+
129+ string search = s2[minlen:maxlen];
130+
131+ int indxchar = search.index_of_char (c);
132+
133+ if (indxchar > -1) {
134+ matches++;
135+ order.add(indxchar+minlen);
136+ }
137+ }
138+
139+ if (matches == 0)
140+ return 0;
141+
142+ int trans = 0;
143+ int tmp;
144+ bool swap = false;
145+
146+ do {
147+ swap = false;
148+ for (int i = 1; i <= order.length -1; i++) {
149+ if (order[i-1] > order[i]) {
150+ trans++;
151+ tmp = order[i-1];
152+ order[i-1] = order[i];
153+ order[i] = tmp;
154+ swap = true;
155+ }
156+ }
157+ }
158+ while (swap);
159+
160+ //trans = trans / 2;
161+
162+ double d1 = (double)matches / (double)s1.length;
163+ double d2 = (double)matches / (double)s2.length;
164+ double d3 = ((double)matches - (double)trans) / (double)matches;
165+ return (d1+d2+d3) / 3;
166+ }
167+
168+ public int compute_score (double raw_score) {
169+ return int.parse ((raw_score * 100000).to_string());
170+ }
171+ }
172+
173+}
174
175=== modified file 'lib/synapse-plugins/desktop-file-plugin.vala'
176--- lib/synapse-plugins/desktop-file-plugin.vala 2014-08-31 12:12:11 +0000
177+++ lib/synapse-plugins/desktop-file-plugin.vala 2015-01-18 23:37:48 +0000
178@@ -166,8 +166,8 @@
179 unowned string unaccented_title = dfm.title_unaccented;
180 unowned string comment = dfm.description;
181 unowned string generic_name = dfm.generic_name;
182-
183- bool matched = false;
184+
185+ /*bool matched = false;
186 // FIXME: we need to do much smarter relevancy computation in fuzzy re
187 // "sysmon" matching "System Monitor" is very good as opposed to
188 // "seto" matching "System Monitor"
189@@ -196,6 +196,13 @@
190 {
191 results.add (dfm, compute_relevancy (dfm, dfm.exec == q.query_string ?
192 Match.Score.VERY_GOOD : Match.Score.AVERAGE - Match.Score.INCREMENT_SMALL));
193+ }*/
194+
195+ var searcher = new Synapse.StringMatch();
196+ int score = searcher.match_strings(q.query_string_folded, folded_title);
197+
198+ if (score > Match.Score.AVERAGE) {
199+ results.add(dfm, score);
200 }
201 }
202 }

Subscribers

People subscribed via source and target branches