Merge ~iconstantin/usn-tool:master into usn-tool:master

Proposed by Ian Constantin
Status: Merged
Approved by: Ian Constantin
Approved revision: fd3b5e5f520502ccd32f44d742fb8e7af06246be
Merged at revision: 1f74414c808a51c07e6cc3afb2d47d9e87f2887d
Proposed branch: ~iconstantin/usn-tool:master
Merge into: usn-tool:master
Diff against target: 494 lines (+482/-0)
2 files modified
usn-term-check.py (+198/-0)
woke.yaml (+284/-0)
Reviewer Review Type Date Requested Status
Steve Beattie Approve
Review via email: mp+419967@code.launchpad.net

Commit message

Initial upload of the usn-term-check.py and woke.yaml which are part of the non-inclusive and inconsistent term check process being added to our USN publication process.

To post a comment you must log in.
Revision history for this message
Steve Beattie (sbeattie) wrote :

Approve with a couple of chnages:

1) if we can not reimplement load_database() and instead is the one in usn.py, that would preferred
2) some minor cleanups of spelling and trailing spaces in the code.

Other than those quibbles, it all looks good to me. Thanks!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/usn-term-check.py b/usn-term-check.py
0new file mode 1007550new file mode 100755
index 0000000..ca18c13
--- /dev/null
+++ b/usn-term-check.py
@@ -0,0 +1,198 @@
1#!/usr/bin/python3
2import pickle, yaml, json, os, sys
3from usn import load_database
4from argparse import ArgumentParser
5
6def parse_options():
7 parser = ArgumentParser()
8 parser.add_argument('--db', action="store", required=True, help="Use specified database file")
9 parser.add_argument('--usn', action="store", required=True, help="Current USN")
10 parser.add_argument('--path', action="store", required=True, help="Path + usn-tool")
11 options = parser.parse_args()
12 return options
13
14def load_yaml(filename):
15 ''' Opens provided yaml file to access our non-inclusive and consistent terms '''
16
17 if not os.path.isfile(filename):
18 print("YAML file not found:", filename)
19 return {}, {}
20
21 with open(filename, 'r') as yaml_file:
22 yaml_dict = yaml.safe_load(yaml_file)
23
24 return yaml_dict["non-inclusive-terms"], yaml_dict["consistent-terms"]
25
26def load_issues(filename):
27 ''' Opens/creates a local json file to track term issues '''
28
29 issues = {}
30
31 # If issues file does not exist, create it
32 if not os.path.isfile(filename):
33 open(filename, 'w').close()
34 return issues
35
36 with open(filename, 'r') as issues_file:
37 # To handle incorrectly formatted json files which would cause an exception on json.load
38 try:
39 issues = json.load(issues_file)
40 except:
41 pass # issues will be returned as {}
42
43 return issues
44
45def record_issues(filename, issues):
46 ''' Write out any identified isses to a local json file for temporary persistence '''
47
48 with open(filename, 'w') as issues_file:
49 json.dump(issues, issues_file)
50
51def issues_exist(issues):
52 ''' Returns True if needed is as a states for any text_type+term '''
53
54 for text_type in issues:
55 if "needed" in issues[text_type].values():
56 return True
57
58 return False
59
60def check_terms(text_type, text, check_type, term_list, issues):
61 ''' Checks for and tracks non-inclusive or inconsistent terms in USN text '''
62
63 text = text.lower().strip().replace('\n', ' ') # ignoring case for text check
64
65 '''
66 To handle phrases that may span two lines, we replace all new line characters with
67 spaces, effectivley making the text we are checking one long line.
68
69 It is possible this introduces double spaces - we could replace '\n' with an empty
70 string, however, we will keep with the above approach, and afterwards use a while
71 loop to replace all occurrences of double spaces with single spaces to ensure we are
72 working with text in which all words are separated with single spaces (or other punctuation).
73 '''
74
75 # Handle any double spaces
76 while " " in text:
77 text = text.replace(" ", " ")
78
79 for term_dict in term_list:
80 terms = term_dict['terms']
81
82 for term in terms:
83 term = term.lower().strip() # ignoring case for text check
84
85 # Perform some checks if we have previously recorded issues/warnings for this text_type and term
86 if (text_type in issues) and (term in issues[text_type]):
87
88 # If we previously ignored this term for the current text_type, skip the check
89 if issues[text_type][term] == "ignored":
90 continue
91
92 # Updates state for terms previously in text, now changed/removed (meaning it was addressed)
93 elif (term not in text) and (issues[text_type][term] != "addressed"):
94 issues[text_type][term] = "addressed"
95 continue
96
97 if term in text:
98
99 # If text_type not in issues dict we will add/initialize (term will be added further down)
100 if text_type not in issues:
101 issues[text_type] = {}
102
103 # Logic to handle non-inclusive terms
104 if check_type == "non-inclusive":
105 print("\n---\n\nISSUE: non-inclusive term <" + term + "> found in: <" + text_type + ">")
106 print("\tPlease consider using one of the following alternatives:")
107
108 alternatives = term_dict["alternatives"]
109
110 # If a "security_preferred" alternative exists, highlight it as the first presented alternative
111 if "security_preferred" in term_dict:
112 security_preferred = term_dict["security_preferred"][0]
113
114 # Remove from alternatives list in order to not print it more than once
115 if security_preferred in alternatives:
116 alternatives.remove(security_preferred)
117
118 print("\t\t--", security_preferred, "** preferred option **")
119
120 # Print out the rest of the alternative term options
121 for alternative in alternatives:
122 print("\t\t--", alternative)
123
124 # Providing the user the ability to ignore this term as being an issue
125 user_choice = input("\nI) to ignore this ISSUE (any to continue): ")
126
127 if user_choice.lower() == 'i':
128 issues[text_type][term] = "ignored"
129 else:
130 issues[text_type][term] = "needed"
131
132 # Logic to handle inconsistent terms
133 elif check_type == "consistent":
134 alternative = term_dict["alternative"][0].lower().strip()
135
136 print("\n---\n\nWARNING: inconsistent term <" + term + "> found in: <" + text_type + ">")
137 print("\tRecommended alternative: <" + alternative + ">")
138
139 user_choice = input("\nI) to ignore this WARNING (any to continue): ")
140
141 if user_choice.lower() == 'i':
142 issues[text_type][term] = "ignored"
143 else:
144 issues[text_type][term] = "needed"
145
146 ''' Optional break, by default commented/not used.
147 Avoids potential redundant/false positive reportings on phrases
148 for the same term, e.g. "white list" and "white listed" '''
149 #break
150def main():
151
152 # Grabbing args and defining paths
153 options = parse_options()
154
155 pickle_path = options.db # /tmp/pending-usn/$USN.pickle
156 path = options.path # $PATH:/home/USER/DIR/usn-tool
157 usn = options.usn # $USN
158
159 pending_usn_dir = pickle_path[:pickle_path.index(usn + ".pickle")] # /tmp/pending-usn
160 usn_tool_dir = path.split(':')[-1] # /home/USER/DIR/usn-tool
161
162 yaml_path = usn_tool_dir + "/woke.yaml" # /home/USER/DIR/usn-tool/woke.yaml
163 issues_file_path = pending_usn_dir + "/term_issues_" + usn # /tmp/pending-usn/term_issues_$USN
164
165 # Extracting terms, USN text, and issues dicts
166 non_inclusive_terms, consistent_terms = load_yaml(yaml_path)
167 usn_db = load_database(pickle_path)
168 issues = load_issues(issues_file_path)
169
170 # Parsing USN text fields
171 text_fields = {"title":usn_db[usn]["title"],
172 "summary":usn_db[usn]["summary"],
173 "isummary":usn_db[usn]["isummary"],
174 "description":usn_db[usn]["description"]}
175
176 # Adding source package description(s) to text_fields
177 releases = usn_db[usn]["releases"]
178
179 for release in releases:
180 for src_pkg in releases[release]["sources"]:
181 text_fields[release + " src-pkg description"] = releases[release]["sources"][src_pkg]["description"]
182
183 # Running checks for non-inlcusive and inconsistent term use
184 for text_type in text_fields:
185 check_terms(text_type, text_fields[text_type], "non-inclusive", non_inclusive_terms, issues)
186 check_terms(text_type, text_fields[text_type], "consistent", consistent_terms, issues)
187
188 # Write out ISSUES and WARNINGS to local file
189 record_issues(issues_file_path, issues)
190
191
192 if issues_exist(issues):
193 # Returning exit code of 7 to indicate that issues exist and that the new-usn bash script should exit
194 sys.exit(7)
195
196if __name__ == "__main__":
197 main()
198
diff --git a/woke.yaml b/woke.yaml
0new file mode 100644199new file mode 100644
index 0000000..c8c8b42
--- /dev/null
+++ b/woke.yaml
@@ -0,0 +1,284 @@
1####################################################################################################################
2#
3# Based on Canonical's Guidelines:
4# https://docs.google.com/document/d/1mJUa1VJHOMWa723dmKmNjSKGW-nlBv9xktzGZZwacVo/edit#heading=h.5efudb237qdb
5# ---
6# Original yaml file available here:
7# https://git.launchpad.net/lp-inclusive-naming/plain/.woke.yaml
8# ---
9#
10# non-inclusive-terms:
11# security_preferred
12# - optional
13# - specified if one alternative is identified as the goto replacement for security USNs
14#
15# ---
16#
17# consistent-terms:
18# alternative
19# - should be a single option to which the inconsistent term should be changed to
20#
21####################################################################################################################
22
23non-inclusive-terms:
24 - name: whitelist
25 terms:
26 - whitelist
27 - white-list
28 - whitelisted
29 - white-listed
30 - white list
31 - white listed
32 alternatives:
33 - allowlist
34 - inclusion list
35 security_preferred:
36 - allowlist
37
38 - name: blacklist
39 terms:
40 - blacklist
41 - black-list
42 - blacklisted
43 - black-listed
44 - black list
45 - black listed
46 alternatives:
47 - revoke (use for certificates)
48 - filtered
49 - denylist
50 - blocklist
51 - exclusion list
52 security_preferred:
53 - blocklist
54
55 - name: master-slave
56 terms:
57 - master-slave
58 - master/slave
59 - master slave
60 - master and slave
61 alternatives:
62 - leader/follower
63 - main/replica
64 - primary/replica
65 - primary/standby
66 - primary/secondary
67
68 - name: slave
69 terms:
70 - slave
71 alternatives:
72 - follower
73 - replica
74 - standby
75
76 - name: grandfathered
77 terms:
78 - grandfathered
79 - grand fathered
80 - grandfather
81 - grand father
82 alternatives:
83 - legacy status
84 - legacied
85 - exempted
86 - carried
87 - brought forward
88 - rolled over
89
90 - name: man-hours
91 terms:
92 - man hours
93 - man-hours
94 alternatives:
95 - person hours
96 - engineer hours
97
98 - name: sanity
99 terms:
100 - sanity
101 alternatives:
102 - consistency check
103 - validate
104 - confidence
105 - clarity check
106 - quick check
107 - coherence check
108
109 - name: dummy
110 terms:
111 - dummy
112 alternatives:
113 - placeholder
114 - sample
115
116 - name: guys
117 terms:
118 - guys
119 alternatives:
120 - folks
121 - people
122 - you all
123 - y'all
124 - yinz
125
126 - name: whitebox
127 terms:
128 - white-box
129 - whitebox
130 - white box
131 alternatives:
132 - open-box
133
134 - name: blackbox
135 terms:
136 - black-box
137 - blackbox
138 - black box
139 alternatives:
140 - closed-box
141
142 - name: blackhat
143 terms:
144 - blackhat
145 - black-hat
146 - black hat
147 alternatives:
148 - attacker
149 - malicious actor
150
151 - name: whitehat
152 terms:
153 - whitehat
154 - white-hat
155 - white hat
156 alternatives:
157 - researcher
158 - security specialist
159
160 - name: illegal characters
161 terms:
162 - illegal characters
163 alternatives:
164 - invalid characters
165 - unsupported characters
166 - special characters
167 security_preferred:
168 - invalid characters
169
170 - name: native feature
171 terms:
172 - native feature
173 alternatives:
174 - core feature
175 - built-in feature
176 security_preferred:
177 - built-in feature
178
179 - name: chairman/foreman
180 terms:
181 - chairman
182 - chair man
183 - foreman
184 - fore man
185 alternatives:
186 - chair
187 - foreperson
188
189 - name: man in the middle
190 terms:
191 - man in the middle
192 - man-in-the-middle
193 - mitm
194 alternatives:
195 - machine-in-the-middle
196 - person-in-the-middle
197 - system-in-the-middle
198 - intermediary attack
199 security_preferred:
200 - machine-in-the-middle
201
202 - name: middleman
203 terms:
204 - middleman
205 - middle man
206 alternatives:
207 - middleperson
208 - intermediary
209 security_preferred:
210 - intermediary
211
212 - name: manned
213 terms:
214 - manned
215 alternatives:
216 - crewed
217 - staffed
218 - monitored
219 - human operated
220
221 - name: mom test / girlfriend test
222 terms:
223 - mom test
224 - mom-test
225 - girlfriend test
226 - girlfriend-test
227 alternatives:
228 - user test
229 - user friendly
230 security_preferred:
231 - user test
232
233 - name: crazy
234 terms:
235 - crazy
236 alternatives:
237 - baffling
238 - unexplained
239 - errant
240
241 - name: cripples
242 terms:
243 - cripples
244 alternatives:
245 - slows down
246 - hinders
247 - obstructs
248
249 - name: crippling
250 terms:
251 - crippling
252 alternatives:
253 - attenuating
254 - incapacitating
255
256 - name: stonith/stomith
257 terms:
258 - stonith
259 - stomith
260 alternatives:
261 - fence failed nodes
262 - machines
263
264 - name: demilitarized zone
265 terms:
266 - demilitarized zone
267 - dmz
268 alternatives:
269 - perimeter network
270 - passthrough network
271
272consistent-terms:
273 - name: file system
274 terms:
275 - file system
276 - file-system
277 alternative:
278 - filesystem
279
280 - name: machine in the middle
281 terms:
282 - machine in the middle
283 alternative:
284 - machine-in-the-middle

Subscribers

People subscribed via source and target branches

to all changes: