Merge ~iconstantin/usn-tool:master into usn-tool:master
- Git
- lp:~iconstantin/usn-tool
- master
- Merge into master
Proposed by
Ian Constantin
Status: | Merged |
---|---|
Approved by: | Ian Constantin |
Approved revision: | fd3b5e5f520502ccd32f44d742fb8e7af06246be |
Merged at revision: | 1f74414c808a51c07e6cc3afb2d47d9e87f2887d |
Proposed branch: | ~iconstantin/usn-tool:master |
Merge into: | usn-tool:master |
Diff against target: |
494 lines (+482/-0) 2 files modified
usn-term-check.py (+198/-0) woke.yaml (+284/-0) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Steve Beattie | Approve | ||
Review via email: mp+419967@code.launchpad.net |
Commit message
Initial upload of the usn-term-check.py and woke.yaml which are part of the non-inclusive and inconsistent term check process being added to our USN publication process.
Description of the change
To post a comment you must log in.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | diff --git a/usn-term-check.py b/usn-term-check.py |
2 | new file mode 100755 |
3 | index 0000000..ca18c13 |
4 | --- /dev/null |
5 | +++ b/usn-term-check.py |
6 | @@ -0,0 +1,198 @@ |
7 | +#!/usr/bin/python3 |
8 | +import pickle, yaml, json, os, sys |
9 | +from usn import load_database |
10 | +from argparse import ArgumentParser |
11 | + |
12 | +def parse_options(): |
13 | + parser = ArgumentParser() |
14 | + parser.add_argument('--db', action="store", required=True, help="Use specified database file") |
15 | + parser.add_argument('--usn', action="store", required=True, help="Current USN") |
16 | + parser.add_argument('--path', action="store", required=True, help="Path + usn-tool") |
17 | + options = parser.parse_args() |
18 | + return options |
19 | + |
20 | +def load_yaml(filename): |
21 | + ''' Opens provided yaml file to access our non-inclusive and consistent terms ''' |
22 | + |
23 | + if not os.path.isfile(filename): |
24 | + print("YAML file not found:", filename) |
25 | + return {}, {} |
26 | + |
27 | + with open(filename, 'r') as yaml_file: |
28 | + yaml_dict = yaml.safe_load(yaml_file) |
29 | + |
30 | + return yaml_dict["non-inclusive-terms"], yaml_dict["consistent-terms"] |
31 | + |
32 | +def load_issues(filename): |
33 | + ''' Opens/creates a local json file to track term issues ''' |
34 | + |
35 | + issues = {} |
36 | + |
37 | + # If issues file does not exist, create it |
38 | + if not os.path.isfile(filename): |
39 | + open(filename, 'w').close() |
40 | + return issues |
41 | + |
42 | + with open(filename, 'r') as issues_file: |
43 | + # To handle incorrectly formatted json files which would cause an exception on json.load |
44 | + try: |
45 | + issues = json.load(issues_file) |
46 | + except: |
47 | + pass # issues will be returned as {} |
48 | + |
49 | + return issues |
50 | + |
51 | +def record_issues(filename, issues): |
52 | + ''' Write out any identified isses to a local json file for temporary persistence ''' |
53 | + |
54 | + with open(filename, 'w') as issues_file: |
55 | + json.dump(issues, issues_file) |
56 | + |
57 | +def issues_exist(issues): |
58 | + ''' Returns True if needed is as a states for any text_type+term ''' |
59 | + |
60 | + for text_type in issues: |
61 | + if "needed" in issues[text_type].values(): |
62 | + return True |
63 | + |
64 | + return False |
65 | + |
66 | +def check_terms(text_type, text, check_type, term_list, issues): |
67 | + ''' Checks for and tracks non-inclusive or inconsistent terms in USN text ''' |
68 | + |
69 | + text = text.lower().strip().replace('\n', ' ') # ignoring case for text check |
70 | + |
71 | + ''' |
72 | + To handle phrases that may span two lines, we replace all new line characters with |
73 | + spaces, effectivley making the text we are checking one long line. |
74 | + |
75 | + It is possible this introduces double spaces - we could replace '\n' with an empty |
76 | + string, however, we will keep with the above approach, and afterwards use a while |
77 | + loop to replace all occurrences of double spaces with single spaces to ensure we are |
78 | + working with text in which all words are separated with single spaces (or other punctuation). |
79 | + ''' |
80 | + |
81 | + # Handle any double spaces |
82 | + while " " in text: |
83 | + text = text.replace(" ", " ") |
84 | + |
85 | + for term_dict in term_list: |
86 | + terms = term_dict['terms'] |
87 | + |
88 | + for term in terms: |
89 | + term = term.lower().strip() # ignoring case for text check |
90 | + |
91 | + # Perform some checks if we have previously recorded issues/warnings for this text_type and term |
92 | + if (text_type in issues) and (term in issues[text_type]): |
93 | + |
94 | + # If we previously ignored this term for the current text_type, skip the check |
95 | + if issues[text_type][term] == "ignored": |
96 | + continue |
97 | + |
98 | + # Updates state for terms previously in text, now changed/removed (meaning it was addressed) |
99 | + elif (term not in text) and (issues[text_type][term] != "addressed"): |
100 | + issues[text_type][term] = "addressed" |
101 | + continue |
102 | + |
103 | + if term in text: |
104 | + |
105 | + # If text_type not in issues dict we will add/initialize (term will be added further down) |
106 | + if text_type not in issues: |
107 | + issues[text_type] = {} |
108 | + |
109 | + # Logic to handle non-inclusive terms |
110 | + if check_type == "non-inclusive": |
111 | + print("\n---\n\nISSUE: non-inclusive term <" + term + "> found in: <" + text_type + ">") |
112 | + print("\tPlease consider using one of the following alternatives:") |
113 | + |
114 | + alternatives = term_dict["alternatives"] |
115 | + |
116 | + # If a "security_preferred" alternative exists, highlight it as the first presented alternative |
117 | + if "security_preferred" in term_dict: |
118 | + security_preferred = term_dict["security_preferred"][0] |
119 | + |
120 | + # Remove from alternatives list in order to not print it more than once |
121 | + if security_preferred in alternatives: |
122 | + alternatives.remove(security_preferred) |
123 | + |
124 | + print("\t\t--", security_preferred, "** preferred option **") |
125 | + |
126 | + # Print out the rest of the alternative term options |
127 | + for alternative in alternatives: |
128 | + print("\t\t--", alternative) |
129 | + |
130 | + # Providing the user the ability to ignore this term as being an issue |
131 | + user_choice = input("\nI) to ignore this ISSUE (any to continue): ") |
132 | + |
133 | + if user_choice.lower() == 'i': |
134 | + issues[text_type][term] = "ignored" |
135 | + else: |
136 | + issues[text_type][term] = "needed" |
137 | + |
138 | + # Logic to handle inconsistent terms |
139 | + elif check_type == "consistent": |
140 | + alternative = term_dict["alternative"][0].lower().strip() |
141 | + |
142 | + print("\n---\n\nWARNING: inconsistent term <" + term + "> found in: <" + text_type + ">") |
143 | + print("\tRecommended alternative: <" + alternative + ">") |
144 | + |
145 | + user_choice = input("\nI) to ignore this WARNING (any to continue): ") |
146 | + |
147 | + if user_choice.lower() == 'i': |
148 | + issues[text_type][term] = "ignored" |
149 | + else: |
150 | + issues[text_type][term] = "needed" |
151 | + |
152 | + ''' Optional break, by default commented/not used. |
153 | + Avoids potential redundant/false positive reportings on phrases |
154 | + for the same term, e.g. "white list" and "white listed" ''' |
155 | + #break |
156 | +def main(): |
157 | + |
158 | + # Grabbing args and defining paths |
159 | + options = parse_options() |
160 | + |
161 | + pickle_path = options.db # /tmp/pending-usn/$USN.pickle |
162 | + path = options.path # $PATH:/home/USER/DIR/usn-tool |
163 | + usn = options.usn # $USN |
164 | + |
165 | + pending_usn_dir = pickle_path[:pickle_path.index(usn + ".pickle")] # /tmp/pending-usn |
166 | + usn_tool_dir = path.split(':')[-1] # /home/USER/DIR/usn-tool |
167 | + |
168 | + yaml_path = usn_tool_dir + "/woke.yaml" # /home/USER/DIR/usn-tool/woke.yaml |
169 | + issues_file_path = pending_usn_dir + "/term_issues_" + usn # /tmp/pending-usn/term_issues_$USN |
170 | + |
171 | + # Extracting terms, USN text, and issues dicts |
172 | + non_inclusive_terms, consistent_terms = load_yaml(yaml_path) |
173 | + usn_db = load_database(pickle_path) |
174 | + issues = load_issues(issues_file_path) |
175 | + |
176 | + # Parsing USN text fields |
177 | + text_fields = {"title":usn_db[usn]["title"], |
178 | + "summary":usn_db[usn]["summary"], |
179 | + "isummary":usn_db[usn]["isummary"], |
180 | + "description":usn_db[usn]["description"]} |
181 | + |
182 | + # Adding source package description(s) to text_fields |
183 | + releases = usn_db[usn]["releases"] |
184 | + |
185 | + for release in releases: |
186 | + for src_pkg in releases[release]["sources"]: |
187 | + text_fields[release + " src-pkg description"] = releases[release]["sources"][src_pkg]["description"] |
188 | + |
189 | + # Running checks for non-inlcusive and inconsistent term use |
190 | + for text_type in text_fields: |
191 | + check_terms(text_type, text_fields[text_type], "non-inclusive", non_inclusive_terms, issues) |
192 | + check_terms(text_type, text_fields[text_type], "consistent", consistent_terms, issues) |
193 | + |
194 | + # Write out ISSUES and WARNINGS to local file |
195 | + record_issues(issues_file_path, issues) |
196 | + |
197 | + |
198 | + if issues_exist(issues): |
199 | + # Returning exit code of 7 to indicate that issues exist and that the new-usn bash script should exit |
200 | + sys.exit(7) |
201 | + |
202 | +if __name__ == "__main__": |
203 | + main() |
204 | + |
205 | diff --git a/woke.yaml b/woke.yaml |
206 | new file mode 100644 |
207 | index 0000000..c8c8b42 |
208 | --- /dev/null |
209 | +++ b/woke.yaml |
210 | @@ -0,0 +1,284 @@ |
211 | +#################################################################################################################### |
212 | +# |
213 | +# Based on Canonical's Guidelines: |
214 | +# https://docs.google.com/document/d/1mJUa1VJHOMWa723dmKmNjSKGW-nlBv9xktzGZZwacVo/edit#heading=h.5efudb237qdb |
215 | +# --- |
216 | +# Original yaml file available here: |
217 | +# https://git.launchpad.net/lp-inclusive-naming/plain/.woke.yaml |
218 | +# --- |
219 | +# |
220 | +# non-inclusive-terms: |
221 | +# security_preferred |
222 | +# - optional |
223 | +# - specified if one alternative is identified as the goto replacement for security USNs |
224 | +# |
225 | +# --- |
226 | +# |
227 | +# consistent-terms: |
228 | +# alternative |
229 | +# - should be a single option to which the inconsistent term should be changed to |
230 | +# |
231 | +#################################################################################################################### |
232 | + |
233 | +non-inclusive-terms: |
234 | + - name: whitelist |
235 | + terms: |
236 | + - whitelist |
237 | + - white-list |
238 | + - whitelisted |
239 | + - white-listed |
240 | + - white list |
241 | + - white listed |
242 | + alternatives: |
243 | + - allowlist |
244 | + - inclusion list |
245 | + security_preferred: |
246 | + - allowlist |
247 | + |
248 | + - name: blacklist |
249 | + terms: |
250 | + - blacklist |
251 | + - black-list |
252 | + - blacklisted |
253 | + - black-listed |
254 | + - black list |
255 | + - black listed |
256 | + alternatives: |
257 | + - revoke (use for certificates) |
258 | + - filtered |
259 | + - denylist |
260 | + - blocklist |
261 | + - exclusion list |
262 | + security_preferred: |
263 | + - blocklist |
264 | + |
265 | + - name: master-slave |
266 | + terms: |
267 | + - master-slave |
268 | + - master/slave |
269 | + - master slave |
270 | + - master and slave |
271 | + alternatives: |
272 | + - leader/follower |
273 | + - main/replica |
274 | + - primary/replica |
275 | + - primary/standby |
276 | + - primary/secondary |
277 | + |
278 | + - name: slave |
279 | + terms: |
280 | + - slave |
281 | + alternatives: |
282 | + - follower |
283 | + - replica |
284 | + - standby |
285 | + |
286 | + - name: grandfathered |
287 | + terms: |
288 | + - grandfathered |
289 | + - grand fathered |
290 | + - grandfather |
291 | + - grand father |
292 | + alternatives: |
293 | + - legacy status |
294 | + - legacied |
295 | + - exempted |
296 | + - carried |
297 | + - brought forward |
298 | + - rolled over |
299 | + |
300 | + - name: man-hours |
301 | + terms: |
302 | + - man hours |
303 | + - man-hours |
304 | + alternatives: |
305 | + - person hours |
306 | + - engineer hours |
307 | + |
308 | + - name: sanity |
309 | + terms: |
310 | + - sanity |
311 | + alternatives: |
312 | + - consistency check |
313 | + - validate |
314 | + - confidence |
315 | + - clarity check |
316 | + - quick check |
317 | + - coherence check |
318 | + |
319 | + - name: dummy |
320 | + terms: |
321 | + - dummy |
322 | + alternatives: |
323 | + - placeholder |
324 | + - sample |
325 | + |
326 | + - name: guys |
327 | + terms: |
328 | + - guys |
329 | + alternatives: |
330 | + - folks |
331 | + - people |
332 | + - you all |
333 | + - y'all |
334 | + - yinz |
335 | + |
336 | + - name: whitebox |
337 | + terms: |
338 | + - white-box |
339 | + - whitebox |
340 | + - white box |
341 | + alternatives: |
342 | + - open-box |
343 | + |
344 | + - name: blackbox |
345 | + terms: |
346 | + - black-box |
347 | + - blackbox |
348 | + - black box |
349 | + alternatives: |
350 | + - closed-box |
351 | + |
352 | + - name: blackhat |
353 | + terms: |
354 | + - blackhat |
355 | + - black-hat |
356 | + - black hat |
357 | + alternatives: |
358 | + - attacker |
359 | + - malicious actor |
360 | + |
361 | + - name: whitehat |
362 | + terms: |
363 | + - whitehat |
364 | + - white-hat |
365 | + - white hat |
366 | + alternatives: |
367 | + - researcher |
368 | + - security specialist |
369 | + |
370 | + - name: illegal characters |
371 | + terms: |
372 | + - illegal characters |
373 | + alternatives: |
374 | + - invalid characters |
375 | + - unsupported characters |
376 | + - special characters |
377 | + security_preferred: |
378 | + - invalid characters |
379 | + |
380 | + - name: native feature |
381 | + terms: |
382 | + - native feature |
383 | + alternatives: |
384 | + - core feature |
385 | + - built-in feature |
386 | + security_preferred: |
387 | + - built-in feature |
388 | + |
389 | + - name: chairman/foreman |
390 | + terms: |
391 | + - chairman |
392 | + - chair man |
393 | + - foreman |
394 | + - fore man |
395 | + alternatives: |
396 | + - chair |
397 | + - foreperson |
398 | + |
399 | + - name: man in the middle |
400 | + terms: |
401 | + - man in the middle |
402 | + - man-in-the-middle |
403 | + - mitm |
404 | + alternatives: |
405 | + - machine-in-the-middle |
406 | + - person-in-the-middle |
407 | + - system-in-the-middle |
408 | + - intermediary attack |
409 | + security_preferred: |
410 | + - machine-in-the-middle |
411 | + |
412 | + - name: middleman |
413 | + terms: |
414 | + - middleman |
415 | + - middle man |
416 | + alternatives: |
417 | + - middleperson |
418 | + - intermediary |
419 | + security_preferred: |
420 | + - intermediary |
421 | + |
422 | + - name: manned |
423 | + terms: |
424 | + - manned |
425 | + alternatives: |
426 | + - crewed |
427 | + - staffed |
428 | + - monitored |
429 | + - human operated |
430 | + |
431 | + - name: mom test / girlfriend test |
432 | + terms: |
433 | + - mom test |
434 | + - mom-test |
435 | + - girlfriend test |
436 | + - girlfriend-test |
437 | + alternatives: |
438 | + - user test |
439 | + - user friendly |
440 | + security_preferred: |
441 | + - user test |
442 | + |
443 | + - name: crazy |
444 | + terms: |
445 | + - crazy |
446 | + alternatives: |
447 | + - baffling |
448 | + - unexplained |
449 | + - errant |
450 | + |
451 | + - name: cripples |
452 | + terms: |
453 | + - cripples |
454 | + alternatives: |
455 | + - slows down |
456 | + - hinders |
457 | + - obstructs |
458 | + |
459 | + - name: crippling |
460 | + terms: |
461 | + - crippling |
462 | + alternatives: |
463 | + - attenuating |
464 | + - incapacitating |
465 | + |
466 | + - name: stonith/stomith |
467 | + terms: |
468 | + - stonith |
469 | + - stomith |
470 | + alternatives: |
471 | + - fence failed nodes |
472 | + - machines |
473 | + |
474 | + - name: demilitarized zone |
475 | + terms: |
476 | + - demilitarized zone |
477 | + - dmz |
478 | + alternatives: |
479 | + - perimeter network |
480 | + - passthrough network |
481 | + |
482 | +consistent-terms: |
483 | + - name: file system |
484 | + terms: |
485 | + - file system |
486 | + - file-system |
487 | + alternative: |
488 | + - filesystem |
489 | + |
490 | + - name: machine in the middle |
491 | + terms: |
492 | + - machine in the middle |
493 | + alternative: |
494 | + - machine-in-the-middle |
Approve with a couple of chnages:
1) if we can not reimplement load_database() and instead is the one in usn.py, that would preferred
2) some minor cleanups of spelling and trailing spaces in the code.
Other than those quibbles, it all looks good to me. Thanks!