1
=== removed file 'NOTES'
2
--- NOTES	2009-02-18 10:41:40 +0000
3
+++ NOTES	1970-01-01 00:00:00 +0000
4
@@ -1,22 +0,0 @@
5
1
general
6
2
=======
7
3
- 'object' and 'type' are probably keywords that shouldn't be reused, btw
8
4
- proper exception handling needed all round (and maybe some more error checks?)
9
5
- could a Processor dynamically alter priority?  of itself, or another class?  if so, would it ever want to?  "learning"?
10
6
- the patch on ConfigObj only does one level of list interpolation.  Does ConfigObj support multi-level nested lists?
11
7
12
8
plugins
13
9
=======
14
10
- process() is still a bit biased towards type=message?
15
11
- how about having a lambda decorator like @match(pattern) to allow other types of matching?
16
12
- reload config is a good example of somewhere that error reporting should occur
17
13
- load/reload is a good example of the reloader not being generic enough... I think?
18
14
- is ignore strong enough?  it just sets processed to true - postprocessors won't ignore this.  is that bad?
19
15
- applause to Michael for the Responses processor: that's clever
20
16
  -- that said, not at all convinced by Address processor
21
17
    -- it takes a basestring only, not a dict as well
22
18
    -- do we even want what it does?  cf. the Announce plugin - I think Processors should decide for themselves whether they want to address their replies or not.  (But what about private replies - adding an address in to that is crap)
23
19
- identity plugin:  'identify' is different in different scopes and it's a little confusing - not cool to a first-time reader
24
20
- twisted helper functions (with that whole blocking-wrapper thing) may as well be written
25
21
- sources processor:  needs a bit of error checking, authentication and...  all sorts.
26
22
  -- (no, I know, it was there for debugging, just a note for Future Us)
27
23
0
28
=== renamed file 'dbus-ping.py' => 'attic/dbus-ping.py'
29
=== added directory 'data'
30
=== added file 'data/README'
31
--- data/README	1970-01-01 00:00:00 +0000
32
+++ data/README	2009-07-13 14:23:03 +0000
33
@@ -0,0 +1,5 @@
34
1
These are data files used by plugins, that don't change very much.
35
2
Thus outdated versions shouldn't be a major issue.
36
3
37
4
Sources:
38
5
http://data.iana.org/TLD/tlds-alpha-by-domain.txt
39
0
6
40
=== added file 'data/tlds-alpha-by-domain.txt'
41
--- data/tlds-alpha-by-domain.txt	1970-01-01 00:00:00 +0000
42
+++ data/tlds-alpha-by-domain.txt	2009-07-13 14:23:03 +0000
43
@@ -0,0 +1,281 @@
44
1
# Version 2009071300, Last Updated Mon Jul 13 07:07:02 2009 UTC
45
2
AC
46
3
AD
47
4
AE
48
5
AERO
49
6
AF
50
7
AG
51
8
AI
52
9
AL
53
10
AM
54
11
AN
55
12
AO
56
13
AQ
57
14
AR
58
15
ARPA
59
16
AS
60
17
ASIA
61
18
AT
62
19
AU
63
20
AW
64
21
AX
65
22
AZ
66
23
BA
67
24
BB
68
25
BD
69
26
BE
70
27
BF
71
28
BG
72
29
BH
73
30
BI
74
31
BIZ
75
32
BJ
76
33
BM
77
34
BN
78
35
BO
79
36
BR
80
37
BS
81
38
BT
82
39
BV
83
40
BW
84
41
BY
85
42
BZ
86
43
CA
87
44
CAT
88
45
CC
89
46
CD
90
47
CF
91
48
CG
92
49
CH
93
50
CI
94
51
CK
95
52
CL
96
53
CM
97
54
CN
98
55
CO
99
56
COM
100
57
COOP
101
58
CR
102
59
CU
103
60
CV
104
61
CX
105
62
CY
106
63
CZ
107
64
DE
108
65
DJ
109
66
DK
110
67
DM
111
68
DO
112
69
DZ
113
70
EC
114
71
EDU
115
72
EE
116
73
EG
117
74
ER
118
75
ES
119
76
ET
120
77
EU
121
78
FI
122
79
FJ
123
80
FK
124
81
FM
125
82
FO
126
83
FR
127
84
GA
128
85
GB
129
86
GD
130
87
GE
131
88
GF
132
89
GG
133
90
GH
134
91
GI
135
92
GL
136
93
GM
137
94
GN
138
95
GOV
139
96
GP
140
97
GQ
141
98
GR
142
99
GS
143
100
GT
144
101
GU
145
102
GW
146
103
GY
147
104
HK
148
105
HM
149
106
HN
150
107
HR
151
108
HT
152
109
HU
153
110
ID
154
111
IE
155
112
IL
156
113
IM
157
114
IN
158
115
INFO
159
116
INT
160
117
IO
161
118
IQ
162
119
IR
163
120
IS
164
121
IT
165
122
JE
166
123
JM
167
124
JO
168
125
JOBS
169
126
JP
170
127
KE
171
128
KG
172
129
KH
173
130
KI
174
131
KM
175
132
KN
176
133
KP
177
134
KR
178
135
KW
179
136
KY
180
137
KZ
181
138
LA
182
139
LB
183
140
LC
184
141
LI
185
142
LK
186
143
LR
187
144
LS
188
145
LT
189
146
LU
190
147
LV
191
148
LY
192
149
MA
193
150
MC
194
151
MD
195
152
ME
196
153
MG
197
154
MH
198
155
MIL
199
156
MK
200
157
ML
201
158
MM
202
159
MN
203
160
MO
204
161
MOBI
205
162
MP
206
163
MQ
207
164
MR
208
165
MS
209
166
MT
210
167
MU
211
168
MUSEUM
212
169
MV
213
170
MW
214
171
MX
215
172
MY
216
173
MZ
217
174
NA
218
175
NAME
219
176
NC
220
177
NE
221
178
NET
222
179
NF
223
180
NG
224
181
NI
225
182
NL
226
183
NO
227
184
NP
228
185
NR
229
186
NU
230
187
NZ
231
188
OM
232
189
ORG
233
190
PA
234
191
PE
235
192
PF
236
193
PG
237
194
PH
238
195
PK
239
196
PL
240
197
PM
241
198
PN
242
199
PR
243
200
PRO
244
201
PS
245
202
PT
246
203
PW
247
204
PY
248
205
QA
249
206
RE
250
207
RO
251
208
RS
252
209
RU
253
210
RW
254
211
SA
255
212
SB
256
213
SC
257
214
SD
258
215
SE
259
216
SG
260
217
SH
261
218
SI
262
219
SJ
263
220
SK
264
221
SL
265
222
SM
266
223
SN
267
224
SO
268
225
SR
269
226
ST
270
227
SU
271
228
SV
272
229
SY
273
230
SZ
274
231
TC
275
232
TD
276
233
TEL
277
234
TF
278
235
TG
279
236
TH
280
237
TJ
281
238
TK
282
239
TL
283
240
TM
284
241
TN
285
242
TO
286
243
TP
287
244
TR
288
245
TRAVEL
289
246
TT
290
247
TV
291
248
TW
292
249
TZ
293
250
UA
294
251
UG
295
252
UK
296
253
US
297
254
UY
298
255
UZ
299
256
VA
300
257
VC
301
258
VE
302
259
VG
303
260
VI
304
261
VN
305
262
VU
306
263
WF
307
264
WS
308
265
XN--0ZWM56D
309
266
XN--11B5BS3A9AJ6G
310
267
XN--80AKHBYKNJ4F
311
268
XN--9T4B11YI5A
312
269
XN--DEBA0AD
313
270
XN--G6W251D
314
271
XN--HGBK6AJ7F53BBA
315
272
XN--HLCJ6AYA9ESC7A
316
273
XN--JXALPDLP
317
274
XN--KGBECHTV
318
275
XN--ZCKZAH
319
276
YE
320
277
YT
321
278
YU
322
279
ZA
323
280
ZM
324
281
ZW
325
0
282
326
=== modified file 'ibid/plugins/http.py'
327
--- ibid/plugins/http.py	2009-05-01 12:17:57 +0000
328
+++ ibid/plugins/http.py	2009-07-13 15:19:39 +0000
329
@@ -17,7 +17,7 @@
330
17
17
331
18
    max_size = IntOption('max_size', 'Only request this many bytes', 500)
18
    max_size = IntOption('max_size', 'Only request this many bytes', 500)
332
19
19
334
20
    @match(r'^(get|head)\s+(.+)$')
20
    @match(r'^(get|head)\s+(\S+\.\S+)$')
335
21
    def handler(self, event, action, url):
21
    def handler(self, event, action, url):
336
22
        if not url.lower().startswith("http://") and not url.lower().startswith("https://"):
22
        if not url.lower().startswith("http://") and not url.lower().startswith("https://"):
337
23
            url = "http://" + url
23
            url = "http://" + url
338
24
24
339
=== modified file 'ibid/plugins/url.py'
340
--- ibid/plugins/url.py	2009-07-10 12:01:48 +0000
341
+++ ibid/plugins/url.py	2009-07-13 15:17:24 +0000
342
@@ -5,6 +5,7 @@
343
5
import logging
5
import logging
344
6
import re
6
import re
345
7
7
346
8
from pkg_resources import resource_exists, resource_stream
347
8
from sqlalchemy import Column, Integer, Unicode, DateTime, UnicodeText, ForeignKey, Table
9
from sqlalchemy import Column, Integer, Unicode, DateTime, UnicodeText, ForeignKey, Table
348
9
10
349
10
import ibid
11
import ibid
350
@@ -112,7 +113,26 @@
351
112
    password  = Option('delicious_password', 'delicious account password')
113
    password  = Option('delicious_password', 'delicious account password')
352
113
    delicious = Delicious()
114
    delicious = Delicious()
353
114
115
355
115
    @match(r'((?:\S+://|(?:www|ftp)\.)\S+|\S+\.(?:com|org|net|za)\S*)')
116
    def setup(self):
356
117
        if resource_exists(__name__, '../../data/tlds-alpha-by-domain.txt'):
357
118
            tlds = [tld.strip().lower() for tld
358
119
                    in resource_stream(__name__, '../../data/tlds-alpha-by-domain.txt')
359
120
                        .readlines()
360
121
                    if not tld.startswith('#')
361
122
            ]
362
123
363
124
        else:
364
125
            log.warning(u"Couldn't open TLD list, falling back to minimal default")
365
126
            tlds = 'com.org.net.za'.split('.')
366
127
367
128
        self.grab.im_func.pattern = re.compile((
368
129
            r'(?:[^@]\b|\A)('               # Match a boundry, but not on an e-mail address
369
130
            r'(?:\w+://|(?:www|ftp)\.)\S+?' # Match an explicit URL or guess by www.
370
131
            r'|[^@\s:]+\.(?:%s)(?:/\S*?)?'  # Guess at the URL based on TLD
371
132
            r')[\[>)\]"\'.]*(?:\s|\Z)'      # End Boundry
372
133
        ) % '|'.join(tlds), re.I | re.DOTALL)
373
134
374
135
    @handler
375
116
    def grab(self, event, url):
136
    def grab(self, event, url):
376
117
        if url.find('://') == -1:
137
        if url.find('://') == -1:
377
118
            if url.lower().startswith('ftp'):
138
            if url.lower().startswith('ftp'):
378
@@ -154,7 +174,7 @@
379
154
    ))
174
    ))
380
155
175
381
156
    def setup(self):
176
    def setup(self):
383
157
        self.lengthen.im_func.pattern = re.compile(r'^((?:%s)\S+)$' % '|'.join([re.escape(service) for service in self.services]), re.I)
177
        self.lengthen.im_func.pattern = re.compile(r'^((?:%s)\S+)$' % '|'.join([re.escape(service) for service in self.services]), re.I|re.DOTALL)
384
158
178
385
159
    @handler
179
    @handler
386
160
    def lengthen(self, event, url):
180
    def lengthen(self, event, url):
387
161
181
388
=== modified file 'ibid/test/plugins/test_core.py'
389
--- ibid/test/plugins/test_core.py	2009-03-05 16:33:12 +0000
390
+++ ibid/test/plugins/test_core.py	2009-07-13 14:47:02 +0000
391
@@ -22,15 +22,24 @@
392
22
    def assert_addressed(self, event, addressed, message):
22
    def assert_addressed(self, event, addressed, message):
393
23
        self.assert_(hasattr(event, 'addressed'))
23
        self.assert_(hasattr(event, 'addressed'))
394
24
        self.assertEqual(event.addressed, addressed)
24
        self.assertEqual(event.addressed, addressed)
396
25
        self.assertEqual(event.message.strip(), message)
25
        self.assertEqual(event.message['deaddressed'].strip(), message)
397
26
398
27
    def create_event(self, message, event_type=u'message'):
399
28
        event = Event(u'fakesource', event_type)
400
29
        event.message = {
401
30
            'raw': message,
402
31
            'deaddressed': message,
403
32
            'clean': message,
404
33
            'stripped': message,
405
34
        }
406
35
        return event
407
26
36
408
27
    def test_non_messages(self):
37
    def test_non_messages(self):
409
28
        for event_type in [u'timer', u'rpc']:
38
        for event_type in [u'timer', u'rpc']:
412
29
            event = Event(u'fakesource', event_type)
39
            event = self.create_event(u'bot: foo', event_type)
411
30
            event.message = u'bot: foo'
413
31
            self.processor.process(event)
40
            self.processor.process(event)
414
32
            self.assertFalse(hasattr(event, u'addressed'))
41
            self.assertFalse(hasattr(event, u'addressed'))
416
33
            self.assertEqual(event.message, u'bot: foo')
42
            self.assertEqual(event.message['deaddressed'], u'bot: foo')
417
34
43
418
35
    happy_prefixes = [
44
    happy_prefixes = [
419
36
        (u'bot', u':  '),
45
        (u'bot', u':  '),
420
@@ -40,8 +49,7 @@
421
40
49
422
41
    def test_happy_prefix_names(self):
50
    def test_happy_prefix_names(self):
423
42
        for prefix in self.happy_prefixes:
51
        for prefix in self.happy_prefixes:
426
43
            event = Event(u'fakesource', u'message')
52
            event = self.create_event(u'%s%sfoo' % prefix)
425
44
            event.message = u'%s%sfoo' % prefix
427
45
            self.processor.process(event)
53
            self.processor.process(event)
428
46
            self.assert_addressed(event, prefix[0], u'foo')
54
            self.assert_addressed(event, prefix[0], u'foo')
429
47
55
430
@@ -53,8 +61,7 @@
431
53
61
432
54
    def test_sad_prefix_names(self):
62
    def test_sad_prefix_names(self):
433
55
        for prefix in self.sad_prefixes:
63
        for prefix in self.sad_prefixes:
436
56
            event = Event(u'fakesource', u'message')
64
            event = self.create_event(u'%s%sfoo' % prefix)
435
57
            event.message = u'%s%sfoo' % prefix
437
58
            self.processor.process(event)
65
            self.processor.process(event)
438
59
            self.assert_addressed(event, False, u'%s%sfoo' % prefix)
66
            self.assert_addressed(event, False, u'%s%sfoo' % prefix)
439
60
67
440
@@ -66,8 +73,7 @@
441
66
73
442
67
    def test_happy_suffix_names(self):
74
    def test_happy_suffix_names(self):
443
68
        for suffix in self.happy_suffixes:
75
        for suffix in self.happy_suffixes:
446
69
            event = Event(u'fakesource', u'message')
76
            event = self.create_event(u'foo%s%s' % suffix)
445
70
            event.message = u'foo%s%s' % suffix
447
71
            self.processor.process(event)
77
            self.processor.process(event)
448
72
            self.assert_addressed(event, suffix[1], u'foo')
78
            self.assert_addressed(event, suffix[1], u'foo')
449
73
79
450
@@ -80,8 +86,7 @@
451
80
86
452
81
    def test_sad_suffix_names(self):
87
    def test_sad_suffix_names(self):
453
82
        for suffix in self.sad_suffixes:
88
        for suffix in self.sad_suffixes:
456
83
            event = Event(u'fakesource', u'message')
89
            event = self.create_event(u'foo%s%s' % suffix)
455
84
            event.message = u'foo%s%s' % suffix
457
85
            self.processor.process(event)
90
            self.processor.process(event)
458
86
            self.assert_addressed(event, False, u'foo%s%s' % suffix)
91
            self.assert_addressed(event, False, u'foo%s%s' % suffix)
459
87
92
460
88
93
461
=== added file 'ibid/test/plugins/test_url.py'
462
--- ibid/test/plugins/test_url.py	1970-01-01 00:00:00 +0000
463
+++ ibid/test/plugins/test_url.py	2009-07-13 15:17:19 +0000
464
@@ -0,0 +1,57 @@
465
1
from twisted.trial import unittest
466
2
import ibid.test
467
3
468
4
from ibid.event import Event
469
5
from ibid.plugins import url
470
6
471
7
class TestURLGrabber(unittest.TestCase):
472
8
473
9
    def setUp(self):
474
10
        self.grab = url.Grab(u'testplugin')
475
11
476
12
    good_grabs = [
477
13
        (u'google.com', u'google.com'),
478
14
        (u'http://foo.bar', u'http://foo.bar'),
479
15
        (u'aoeuoeu <www.jar.com>', u'www.jar.com'),
480
16
        (u'aoeuoeu <www.jar.com> def', u'www.jar.com'),
481
17
        (u'<www.jar.com>', u'www.jar.com'),
482
18
        (u'so bar http://foo.bar/baz to jo', u'http://foo.bar/baz'),
483
19
        (u"'http://bar.com'", u'http://bar.com'),
484
20
        (u'Thingie boo.com/a eue', u'boo.com/a'),
485
21
        (u'joe (www.google.com) says foo', u'www.google.com'),
486
22
        (u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/',
487
23
            u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/'),
488
24
        (u'aoeu http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/ aoeu',
489
25
            u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/'),
490
26
        (u'aoeu http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/. aoeu',
491
27
            u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/'),
492
28
        (u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/.',
493
29
            u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/'),
494
30
        (u'ouoe <http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/> aoeuao',
495
31
            u'http://www.example.net/blog/2008/11/09/debugging-python-regular-expressions/'),
496
32
        # We accept that the following are non-optimal
497
33
        (u'http://en.example.org/wiki/Python_(programming_language)',
498
34
            u'http://en.example.org/wiki/Python_(programming_language'),
499
35
        (u'Python <http://en.example.org/wiki/Python_(programming_language)> is a lekker language',
500
36
            u'http://en.example.org/wiki/Python_(programming_language'),
501
37
        (u'Python <URL:http://en.example.org/wiki/Python_(programming_language)> is a lekker language',
502
38
            u'http://en.example.org/wiki/Python_(programming_language'),
503
39
    ]
504
40
505
41
    def test_good_grabs(self):
506
42
        for input, url in self.good_grabs:
507
43
            m = self.grab.grab.im_func.pattern.search(input)
508
44
            self.assertEqual(m.group(1), url)
509
45
510
46
    bad_grabs = [
511
47
        u'joe@bar.com',
512
48
        u'x joe@google.com',
513
49
        u'<joe@bar.com>',
514
50
    ]
515
51
516
52
    def test_bad_grabs(self):
517
53
        for input in self.bad_grabs:
518
54
            m = self.grab.grab.im_func.pattern.search(input)
519
55
            self.assertEqual(m, None)
520
56
521
57
# vi: set et sta sw=4 ts=4:
Reviewer	Date Requested	Status
Jonathan Hitchcock		Approve on 2009-07-13
Michael Gorven	2009-07-13	Approve on 2009-07-13
Review via email: mp+8696@code.launchpad.net