Merge lp:~max-rabkin/ibid/wordlist into lp:ibid

Proposed by Max Rabkin
Status: Needs review
Proposed branch: lp:~max-rabkin/ibid/wordlist
Merge into: lp:ibid
Diff against target: 116 lines (+91/-2)
1 file modified
ibid/plugins/languages.py (+91/-2)
To merge this branch: bzr merge lp:~max-rabkin/ibid/wordlist
Reviewer Review Type Date Requested Status
Jonathan Hitchcock Needs Fixing
Max Rabkin Needs Fixing
Stefano Rivera Approve
Review via email: mp+69322@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

I'm a little worried that "is ... in ..." is too generic, but we can see how it works.

WARNING:plugins.unicode:Found a non-unicode string: "There's no such word"

It would be nice if it worked out the box. You suggested /usr/share/dict/words on IRC...

review: Approve
lp:~max-rabkin/ibid/wordlist updated
1033. By Max Rabkin

Unicode string literals

1034. By Max Rabkin

Use /usr/share/dict/words as a wordlist

Revision history for this message
Max Rabkin (max-rabkin) wrote :

This needs some usability work: better syntax, default wordlist, and a way to list wordlists.

review: Needs Fixing
Revision history for this message
Jonathan Hitchcock (vhata) wrote :

Too broad a match. We already have horribly conflicting plugins that match when they shouldn't.

review: Needs Fixing

Unmerged revisions

1034. By Max Rabkin

Use /usr/share/dict/words as a wordlist

1033. By Max Rabkin

Unicode string literals

1032. By Max Rabkin

Wordlist processor with globs and anagrams

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/languages.py'
2--- ibid/plugins/languages.py 2010-05-15 14:30:56 +0000
3+++ ibid/plugins/languages.py 2011-07-26 20:51:28 +0000
4@@ -1,6 +1,9 @@
5 # Copyright (c) 2008-2010, Michael Gorven, Stefano Rivera, Max Rabkin
6 # Released under terms of the MIT/X/Expat Licence. See COPYING for details.
7
8+import codecs
9+from collections import defaultdict
10+import fnmatch
11 from random import choice
12 import re
13 from urllib import urlencode
14@@ -9,9 +12,9 @@
15 from dictclient import Connection
16
17 from ibid.plugins import Processor, match
18-from ibid.config import Option, IntOption
19+from ibid.config import Option, IntOption, DictOption
20 from ibid.utils import decode_htmlentities, json_webservice, human_join, \
21- is_url, iri_to_uri
22+ is_url, iri_to_uri, plural
23
24 features = {}
25
26@@ -284,4 +287,90 @@
27 else:
28 raise UnknownLanguageException
29
30+features['wordlist'] = {
31+ 'description': u'Check whether words exist in a wordlist, '
32+ 'and search for anagrams or using wildcards.',
33+ 'categories': ('lookup',),
34+}
35+
36+class Wordlist(Processor):
37+ wordlists = DictOption('wordlists',
38+ u"A mapping from wordlist names to files",
39+ {'words': '/usr/share/dict/words'})
40+ usage = u"""is <word> in <wordlist>?
41+ words matching <glob> in <wordlist>
42+ [sub]anagrams of <glob> in <wordlist>"""
43+ features = ('wordlist',)
44+
45+ def get_wordlist(self, event, wordlist):
46+ try:
47+ for name, filename in self.wordlists.iteritems():
48+ if wordlist.lower() == name.lower():
49+ return codecs.open(filename, 'rU', 'utf-8')
50+ else:
51+ event.addresponse(u"I don't have a wordlist called %s", wordlist)
52+ return None
53+ except IOError:
54+ event.addresponse(u"I can't open that wordlist")
55+ return None
56+
57+ def answer(self, event, words, wordlist):
58+ if not words:
59+ event.addresponse(u"There's no such word")
60+ else:
61+ # Sort by decreasing word length, but don't use reverse=True because
62+ # we want to retain alphabetical order by stability
63+ words.sort(key=lambda w: -len(w))
64+ event.addresponse(u"%(words)s %(are)s in %(wordlist)s",
65+ {'words': human_join(words),
66+ 'are': plural(len(words), "is", "are"),
67+ 'wordlist': wordlist})
68+
69+ @match('(?:is |words matching )?{glob:chunk} in {wordlist:any}')
70+ def search(self, event, glob, wordlist):
71+ f = self.get_wordlist(event, wordlist)
72+ if f is None:
73+ return
74+
75+ words = fnmatch.filter((w.strip().upper() for w in f), glob.upper())
76+ self.answer(event, words, wordlist)
77+
78+ @match('(?P<subanagrams>sub)?(?:an|anagrams(?: of)?) {glob:chunk} in {wordlist:any}')
79+ def anagram(self, event, subanagrams, glob, wordlist):
80+ for forbidden in '[]*':
81+ if forbidden in glob:
82+ event.addresponse(u'Only letters and ? are allowed in anagram '
83+ 'searches')
84+ return
85+
86+ f = self.get_wordlist(event, wordlist)
87+ if f is None:
88+ return
89+
90+ glob = glob.upper()
91+ globcount = defaultdict(int)
92+ for char in glob:
93+ globcount[char] += 1
94+
95+ n = len(glob)
96+ results = []
97+ for word in f:
98+ word = word.strip().upper()
99+ l = len(word)
100+ if l > n or (l < n and not subanagrams):
101+ continue
102+
103+ count = defaultdict(int)
104+ blanks = 0
105+ for char in word:
106+ count[char] += 1
107+ if count[char] > globcount[char]:
108+ blanks += 1
109+ if blanks > globcount['?']:
110+ break
111+ else:
112+ results.append(word)
113+
114+ self.answer(event, results, wordlist)
115+
116 # vi: set et sta sw=4 ts=4:

Subscribers

People subscribed via source and target branches

to all changes: