Merge ~iconstantin/usn-tool:master into usn-tool:master

Proposed by Ian Constantin
Status: Merged
Approved by: Ian Constantin
Approved revision: fd3b5e5f520502ccd32f44d742fb8e7af06246be
Merged at revision: 1f74414c808a51c07e6cc3afb2d47d9e87f2887d
Proposed branch: ~iconstantin/usn-tool:master
Merge into: usn-tool:master
Diff against target: 494 lines (+482/-0)
2 files modified
usn-term-check.py (+198/-0)
woke.yaml (+284/-0)
Reviewer Review Type Date Requested Status
Steve Beattie Approve
Review via email: mp+419967@code.launchpad.net

Commit message

Initial upload of the usn-term-check.py and woke.yaml which are part of the non-inclusive and inconsistent term check process being added to our USN publication process.

To post a comment you must log in.
Revision history for this message
Steve Beattie (sbeattie) wrote :

Approve with a couple of chnages:

1) if we can not reimplement load_database() and instead is the one in usn.py, that would preferred
2) some minor cleanups of spelling and trailing spaces in the code.

Other than those quibbles, it all looks good to me. Thanks!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/usn-term-check.py b/usn-term-check.py
2new file mode 100755
3index 0000000..ca18c13
4--- /dev/null
5+++ b/usn-term-check.py
6@@ -0,0 +1,198 @@
7+#!/usr/bin/python3
8+import pickle, yaml, json, os, sys
9+from usn import load_database
10+from argparse import ArgumentParser
11+
12+def parse_options():
13+ parser = ArgumentParser()
14+ parser.add_argument('--db', action="store", required=True, help="Use specified database file")
15+ parser.add_argument('--usn', action="store", required=True, help="Current USN")
16+ parser.add_argument('--path', action="store", required=True, help="Path + usn-tool")
17+ options = parser.parse_args()
18+ return options
19+
20+def load_yaml(filename):
21+ ''' Opens provided yaml file to access our non-inclusive and consistent terms '''
22+
23+ if not os.path.isfile(filename):
24+ print("YAML file not found:", filename)
25+ return {}, {}
26+
27+ with open(filename, 'r') as yaml_file:
28+ yaml_dict = yaml.safe_load(yaml_file)
29+
30+ return yaml_dict["non-inclusive-terms"], yaml_dict["consistent-terms"]
31+
32+def load_issues(filename):
33+ ''' Opens/creates a local json file to track term issues '''
34+
35+ issues = {}
36+
37+ # If issues file does not exist, create it
38+ if not os.path.isfile(filename):
39+ open(filename, 'w').close()
40+ return issues
41+
42+ with open(filename, 'r') as issues_file:
43+ # To handle incorrectly formatted json files which would cause an exception on json.load
44+ try:
45+ issues = json.load(issues_file)
46+ except:
47+ pass # issues will be returned as {}
48+
49+ return issues
50+
51+def record_issues(filename, issues):
52+ ''' Write out any identified isses to a local json file for temporary persistence '''
53+
54+ with open(filename, 'w') as issues_file:
55+ json.dump(issues, issues_file)
56+
57+def issues_exist(issues):
58+ ''' Returns True if needed is as a states for any text_type+term '''
59+
60+ for text_type in issues:
61+ if "needed" in issues[text_type].values():
62+ return True
63+
64+ return False
65+
66+def check_terms(text_type, text, check_type, term_list, issues):
67+ ''' Checks for and tracks non-inclusive or inconsistent terms in USN text '''
68+
69+ text = text.lower().strip().replace('\n', ' ') # ignoring case for text check
70+
71+ '''
72+ To handle phrases that may span two lines, we replace all new line characters with
73+ spaces, effectivley making the text we are checking one long line.
74+
75+ It is possible this introduces double spaces - we could replace '\n' with an empty
76+ string, however, we will keep with the above approach, and afterwards use a while
77+ loop to replace all occurrences of double spaces with single spaces to ensure we are
78+ working with text in which all words are separated with single spaces (or other punctuation).
79+ '''
80+
81+ # Handle any double spaces
82+ while " " in text:
83+ text = text.replace(" ", " ")
84+
85+ for term_dict in term_list:
86+ terms = term_dict['terms']
87+
88+ for term in terms:
89+ term = term.lower().strip() # ignoring case for text check
90+
91+ # Perform some checks if we have previously recorded issues/warnings for this text_type and term
92+ if (text_type in issues) and (term in issues[text_type]):
93+
94+ # If we previously ignored this term for the current text_type, skip the check
95+ if issues[text_type][term] == "ignored":
96+ continue
97+
98+ # Updates state for terms previously in text, now changed/removed (meaning it was addressed)
99+ elif (term not in text) and (issues[text_type][term] != "addressed"):
100+ issues[text_type][term] = "addressed"
101+ continue
102+
103+ if term in text:
104+
105+ # If text_type not in issues dict we will add/initialize (term will be added further down)
106+ if text_type not in issues:
107+ issues[text_type] = {}
108+
109+ # Logic to handle non-inclusive terms
110+ if check_type == "non-inclusive":
111+ print("\n---\n\nISSUE: non-inclusive term <" + term + "> found in: <" + text_type + ">")
112+ print("\tPlease consider using one of the following alternatives:")
113+
114+ alternatives = term_dict["alternatives"]
115+
116+ # If a "security_preferred" alternative exists, highlight it as the first presented alternative
117+ if "security_preferred" in term_dict:
118+ security_preferred = term_dict["security_preferred"][0]
119+
120+ # Remove from alternatives list in order to not print it more than once
121+ if security_preferred in alternatives:
122+ alternatives.remove(security_preferred)
123+
124+ print("\t\t--", security_preferred, "** preferred option **")
125+
126+ # Print out the rest of the alternative term options
127+ for alternative in alternatives:
128+ print("\t\t--", alternative)
129+
130+ # Providing the user the ability to ignore this term as being an issue
131+ user_choice = input("\nI) to ignore this ISSUE (any to continue): ")
132+
133+ if user_choice.lower() == 'i':
134+ issues[text_type][term] = "ignored"
135+ else:
136+ issues[text_type][term] = "needed"
137+
138+ # Logic to handle inconsistent terms
139+ elif check_type == "consistent":
140+ alternative = term_dict["alternative"][0].lower().strip()
141+
142+ print("\n---\n\nWARNING: inconsistent term <" + term + "> found in: <" + text_type + ">")
143+ print("\tRecommended alternative: <" + alternative + ">")
144+
145+ user_choice = input("\nI) to ignore this WARNING (any to continue): ")
146+
147+ if user_choice.lower() == 'i':
148+ issues[text_type][term] = "ignored"
149+ else:
150+ issues[text_type][term] = "needed"
151+
152+ ''' Optional break, by default commented/not used.
153+ Avoids potential redundant/false positive reportings on phrases
154+ for the same term, e.g. "white list" and "white listed" '''
155+ #break
156+def main():
157+
158+ # Grabbing args and defining paths
159+ options = parse_options()
160+
161+ pickle_path = options.db # /tmp/pending-usn/$USN.pickle
162+ path = options.path # $PATH:/home/USER/DIR/usn-tool
163+ usn = options.usn # $USN
164+
165+ pending_usn_dir = pickle_path[:pickle_path.index(usn + ".pickle")] # /tmp/pending-usn
166+ usn_tool_dir = path.split(':')[-1] # /home/USER/DIR/usn-tool
167+
168+ yaml_path = usn_tool_dir + "/woke.yaml" # /home/USER/DIR/usn-tool/woke.yaml
169+ issues_file_path = pending_usn_dir + "/term_issues_" + usn # /tmp/pending-usn/term_issues_$USN
170+
171+ # Extracting terms, USN text, and issues dicts
172+ non_inclusive_terms, consistent_terms = load_yaml(yaml_path)
173+ usn_db = load_database(pickle_path)
174+ issues = load_issues(issues_file_path)
175+
176+ # Parsing USN text fields
177+ text_fields = {"title":usn_db[usn]["title"],
178+ "summary":usn_db[usn]["summary"],
179+ "isummary":usn_db[usn]["isummary"],
180+ "description":usn_db[usn]["description"]}
181+
182+ # Adding source package description(s) to text_fields
183+ releases = usn_db[usn]["releases"]
184+
185+ for release in releases:
186+ for src_pkg in releases[release]["sources"]:
187+ text_fields[release + " src-pkg description"] = releases[release]["sources"][src_pkg]["description"]
188+
189+ # Running checks for non-inlcusive and inconsistent term use
190+ for text_type in text_fields:
191+ check_terms(text_type, text_fields[text_type], "non-inclusive", non_inclusive_terms, issues)
192+ check_terms(text_type, text_fields[text_type], "consistent", consistent_terms, issues)
193+
194+ # Write out ISSUES and WARNINGS to local file
195+ record_issues(issues_file_path, issues)
196+
197+
198+ if issues_exist(issues):
199+ # Returning exit code of 7 to indicate that issues exist and that the new-usn bash script should exit
200+ sys.exit(7)
201+
202+if __name__ == "__main__":
203+ main()
204+
205diff --git a/woke.yaml b/woke.yaml
206new file mode 100644
207index 0000000..c8c8b42
208--- /dev/null
209+++ b/woke.yaml
210@@ -0,0 +1,284 @@
211+####################################################################################################################
212+#
213+# Based on Canonical's Guidelines:
214+# https://docs.google.com/document/d/1mJUa1VJHOMWa723dmKmNjSKGW-nlBv9xktzGZZwacVo/edit#heading=h.5efudb237qdb
215+# ---
216+# Original yaml file available here:
217+# https://git.launchpad.net/lp-inclusive-naming/plain/.woke.yaml
218+# ---
219+#
220+# non-inclusive-terms:
221+# security_preferred
222+# - optional
223+# - specified if one alternative is identified as the goto replacement for security USNs
224+#
225+# ---
226+#
227+# consistent-terms:
228+# alternative
229+# - should be a single option to which the inconsistent term should be changed to
230+#
231+####################################################################################################################
232+
233+non-inclusive-terms:
234+ - name: whitelist
235+ terms:
236+ - whitelist
237+ - white-list
238+ - whitelisted
239+ - white-listed
240+ - white list
241+ - white listed
242+ alternatives:
243+ - allowlist
244+ - inclusion list
245+ security_preferred:
246+ - allowlist
247+
248+ - name: blacklist
249+ terms:
250+ - blacklist
251+ - black-list
252+ - blacklisted
253+ - black-listed
254+ - black list
255+ - black listed
256+ alternatives:
257+ - revoke (use for certificates)
258+ - filtered
259+ - denylist
260+ - blocklist
261+ - exclusion list
262+ security_preferred:
263+ - blocklist
264+
265+ - name: master-slave
266+ terms:
267+ - master-slave
268+ - master/slave
269+ - master slave
270+ - master and slave
271+ alternatives:
272+ - leader/follower
273+ - main/replica
274+ - primary/replica
275+ - primary/standby
276+ - primary/secondary
277+
278+ - name: slave
279+ terms:
280+ - slave
281+ alternatives:
282+ - follower
283+ - replica
284+ - standby
285+
286+ - name: grandfathered
287+ terms:
288+ - grandfathered
289+ - grand fathered
290+ - grandfather
291+ - grand father
292+ alternatives:
293+ - legacy status
294+ - legacied
295+ - exempted
296+ - carried
297+ - brought forward
298+ - rolled over
299+
300+ - name: man-hours
301+ terms:
302+ - man hours
303+ - man-hours
304+ alternatives:
305+ - person hours
306+ - engineer hours
307+
308+ - name: sanity
309+ terms:
310+ - sanity
311+ alternatives:
312+ - consistency check
313+ - validate
314+ - confidence
315+ - clarity check
316+ - quick check
317+ - coherence check
318+
319+ - name: dummy
320+ terms:
321+ - dummy
322+ alternatives:
323+ - placeholder
324+ - sample
325+
326+ - name: guys
327+ terms:
328+ - guys
329+ alternatives:
330+ - folks
331+ - people
332+ - you all
333+ - y'all
334+ - yinz
335+
336+ - name: whitebox
337+ terms:
338+ - white-box
339+ - whitebox
340+ - white box
341+ alternatives:
342+ - open-box
343+
344+ - name: blackbox
345+ terms:
346+ - black-box
347+ - blackbox
348+ - black box
349+ alternatives:
350+ - closed-box
351+
352+ - name: blackhat
353+ terms:
354+ - blackhat
355+ - black-hat
356+ - black hat
357+ alternatives:
358+ - attacker
359+ - malicious actor
360+
361+ - name: whitehat
362+ terms:
363+ - whitehat
364+ - white-hat
365+ - white hat
366+ alternatives:
367+ - researcher
368+ - security specialist
369+
370+ - name: illegal characters
371+ terms:
372+ - illegal characters
373+ alternatives:
374+ - invalid characters
375+ - unsupported characters
376+ - special characters
377+ security_preferred:
378+ - invalid characters
379+
380+ - name: native feature
381+ terms:
382+ - native feature
383+ alternatives:
384+ - core feature
385+ - built-in feature
386+ security_preferred:
387+ - built-in feature
388+
389+ - name: chairman/foreman
390+ terms:
391+ - chairman
392+ - chair man
393+ - foreman
394+ - fore man
395+ alternatives:
396+ - chair
397+ - foreperson
398+
399+ - name: man in the middle
400+ terms:
401+ - man in the middle
402+ - man-in-the-middle
403+ - mitm
404+ alternatives:
405+ - machine-in-the-middle
406+ - person-in-the-middle
407+ - system-in-the-middle
408+ - intermediary attack
409+ security_preferred:
410+ - machine-in-the-middle
411+
412+ - name: middleman
413+ terms:
414+ - middleman
415+ - middle man
416+ alternatives:
417+ - middleperson
418+ - intermediary
419+ security_preferred:
420+ - intermediary
421+
422+ - name: manned
423+ terms:
424+ - manned
425+ alternatives:
426+ - crewed
427+ - staffed
428+ - monitored
429+ - human operated
430+
431+ - name: mom test / girlfriend test
432+ terms:
433+ - mom test
434+ - mom-test
435+ - girlfriend test
436+ - girlfriend-test
437+ alternatives:
438+ - user test
439+ - user friendly
440+ security_preferred:
441+ - user test
442+
443+ - name: crazy
444+ terms:
445+ - crazy
446+ alternatives:
447+ - baffling
448+ - unexplained
449+ - errant
450+
451+ - name: cripples
452+ terms:
453+ - cripples
454+ alternatives:
455+ - slows down
456+ - hinders
457+ - obstructs
458+
459+ - name: crippling
460+ terms:
461+ - crippling
462+ alternatives:
463+ - attenuating
464+ - incapacitating
465+
466+ - name: stonith/stomith
467+ terms:
468+ - stonith
469+ - stomith
470+ alternatives:
471+ - fence failed nodes
472+ - machines
473+
474+ - name: demilitarized zone
475+ terms:
476+ - demilitarized zone
477+ - dmz
478+ alternatives:
479+ - perimeter network
480+ - passthrough network
481+
482+consistent-terms:
483+ - name: file system
484+ terms:
485+ - file system
486+ - file-system
487+ alternative:
488+ - filesystem
489+
490+ - name: machine in the middle
491+ terms:
492+ - machine in the middle
493+ alternative:
494+ - machine-in-the-middle

Subscribers

People subscribed via source and target branches

to all changes: