1
=== added file 'utils/glossary_checks.py'
2
--- utils/glossary_checks.py	1970-01-01 00:00:00 +0000
3
+++ utils/glossary_checks.py	2017-03-04 12:24:08 +0000
4
@@ -0,0 +1,601 @@
5
1
#!/usr/bin/env python
6
2
# encoding: utf-8
7
3
8
4
"""Runs a glossary check on all po files and writes the check results to
9
5
po_validation/glossary.
10
6
11
7
You will need to have the Translate Toolkit installed in order for the checks to work:
12
8
http://toolkit.translatehouse.org/
13
9
14
10
This script also uses hunspell to reduce the number of false positive hits, so
15
11
install as many of the needed hunspell dictionaries as you can find. This script
16
12
will inform you about missing hunspell locales.
17
13
18
14
For Debian-based Linux: sudo apt-get install translate-toolkit hunspell hunspell-ar hunspell-bg hunspell-br hunspell-ca hunspell-cs hunspell-da hunspell-de-de hunspell-el hunspell-en-ca hunspell-en-gb hunspell-en-us hunspell-eu hunspell-fr hunspell-gd hunspell-gl hunspell-he hunspell-hr hunspell-hu hunspell-it hunspell-ko hunspell-lt hunspell-nl hunspell-no hunspell-pl hunspell-pt-br hunspell-pt-pt hunspell-ro hunspell-ru hunspell-si hunspell-sk hunspell-sl hunspell-sr hunspell-sv hunspell-uk hunspell-vi
19
15
20
16
You will need to provide an export of the Transifex glossary and specify it at
21
17
the command line. Make sure to select "Include glossary notes in file" when
22
18
exporting the csv from Transifex.
23
19
24
20
Translators can 'misuse' their languages' comment field on Transifex to add
25
21
inflected forms of their glossary translations. We use the delimiter '|' to
26
22
signal that the field has inflected forms in it. Examples:
27
23
28
24
Source    Translation    Comment             Translation will be matched against
29
25
------    -----------    ----------------    -----------------------------------
30
26
sheep     sheep          Nice, fluffy!       'sheep'
31
27
ax        axe            axes|               'axe', 'axes'
32
28
click     click          clicking|clicked    'click', 'clicking', 'clicked'
33
29
click     click          clicking | clicked  'click', 'clicking', 'clicked'
34
30
35
31
"""
36
32
37
33
from collections import defaultdict
38
34
from subprocess import call, CalledProcessError, Popen, PIPE
39
35
import csv
40
36
import os.path
41
37
import re
42
38
import subprocess
43
39
import sys
44
40
import time
45
41
import traceback
46
42
47
43
#############################################################################
48
44
# Data Containers                                                           #
49
45
#############################################################################
50
46
51
47
52
48
class GlossaryEntry:
53
49
    """An entry in our parsed glossaries."""
54
50
55
51
    def __init__(self):
56
52
         # Base form of the term, followed by any inflected forms
57
53
        self.terms = []
58
54
        # Base form of the translation, followed by any inflected forms
59
55
        self.translations = []
60
56
61
57
62
58
class FailedTranslation:
63
59
    """Information about a translation that failed a check."""
64
60
65
61
    def __init__(self):
66
62
        # The locale where the check failed
67
63
        self.locale = ''
68
64
        # The po file containing the failed translation
69
65
        self.po_file = ''
70
66
        # Source text
71
67
        self.source = ''
72
68
        # Target text
73
69
        self.target = ''
74
70
        # Location in the source code
75
71
        self.location = ''
76
72
        # The glossary term that failed the check
77
73
        self.term = ''
78
74
        # The base form of the translated glossary term
79
75
        self.translation = ''
80
76
81
77
82
78
class HunspellLocale:
83
79
    """A specific locale for Hunspell, plus whether its dictionary is
84
80
    installed."""
85
81
86
82
    def __init__(self, locale):
87
83
         # Specific language/country code for Hunspell, e.g. el_GR
88
84
        self.locale = locale
89
85
        # Whether a dictionary has been found for the locale
90
86
        self.is_available = False
91
87
92
88
hunspell_locales = defaultdict(list)
93
89
""" Hunspell needs specific locales"""
94
90
95
91
#############################################################################
96
92
# File System Functions                                                     #
97
93
#############################################################################
98
94
99
95
100
96
def read_csv_file(filepath):
101
97
    """Parses a CSV file into a 2-dimensional array."""
102
98
    result = []
103
99
    with open(filepath) as csvfile:
104
100
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
105
101
        for row in csvreader:
106
102
            result.append(row)
107
103
    return result
108
104
109
105
110
106
def make_path(base_path, subdir):
111
107
    """Creates the correct form of the path and makes sure that it exists."""
112
108
    result = os.path.abspath(os.path.join(base_path, subdir))
113
109
    if not os.path.exists(result):
114
110
        os.makedirs(result)
115
111
    return result
116
112
117
113
118
114
def delete_path(path):
119
115
    """Deletes the directory specified by 'path' and all its subdirectories and
120
116
    file contents."""
121
117
    if os.path.exists(path) and not os.path.isfile(path):
122
118
        files = sorted(os.listdir(path), key=str.lower)
123
119
        for deletefile in files:
124
120
            deleteme = os.path.abspath(os.path.join(path, deletefile))
125
121
            if os.path.isfile(deleteme):
126
122
                try:
127
123
                    os.remove(deleteme)
128
124
                except Exception:
129
125
                    print('Failed to delete file ' + deleteme)
130
126
            else:
131
127
                delete_path(deleteme)
132
128
        try:
133
129
            os.rmdir(path)
134
130
        except Exception:
135
131
            print('Failed to delete path ' + deleteme)
136
132
137
133
#############################################################################
138
134
# Glossary Loading                                                          #
139
135
#############################################################################
140
136
141
137
142
138
def set_has_hunspell_locale(hunspell_locale):
143
139
    """Tries calling hunspell with the given locale and returns false if it has
144
140
    failed."""
145
141
    try:
146
142
        process = Popen(['hunspell', '-d', hunspell_locale.locale,
147
143
                         '-s'], stderr=PIPE, stdout=PIPE, stdin=PIPE)
148
144
        hunspell_result = process.communicate('foo')
149
145
        if hunspell_result[1] == None:
150
146
            hunspell_locale.is_available = True
151
147
            return True
152
148
        else:
153
149
            print('Error loading Hunspell dictionary for locale ' +
154
150
                  hunspell_locale.locale + ': ' + hunspell_result[1])
155
151
            return False
156
152
157
153
    except CalledProcessError:
158
154
        print('Failed to run hunspell for locale: ' + hunspell_locale.locale)
159
155
        return False
160
156
161
157
162
158
def get_hunspell_locale(locale):
163
159
    """Returns the corresponding Hunspell locale for this locale, or empty
164
160
    string if not available."""
165
161
    if len(hunspell_locales[locale]) == 1 and hunspell_locales[locale][0].is_available:
166
162
        return hunspell_locales[locale][0].locale
167
163
    return ''
168
164
169
165
170
166
def load_hunspell_locales(locale):
171
167
    """Registers locales for Hunspell.
172
168
173
169
    Maps a list of generic locales to specific locales and checks which
174
170
    dictionaries are available. If locale != "all", load only the
175
171
    dictionary for the given locale.
176
172
177
173
    """
178
174
    hunspell_locales['bg'].append(HunspellLocale('bg_BG'))
179
175
    hunspell_locales['br'].append(HunspellLocale('br_FR'))
180
176
    hunspell_locales['ca'].append(HunspellLocale('ca_ES'))
181
177
    hunspell_locales['da'].append(HunspellLocale('da_DK'))
182
178
    hunspell_locales['cs'].append(HunspellLocale('cs_CZ'))
183
179
    hunspell_locales['de'].append(HunspellLocale('de_DE'))
184
180
    hunspell_locales['el'].append(HunspellLocale('el_GR'))
185
181
    hunspell_locales['en_CA'].append(HunspellLocale('en_CA'))
186
182
    hunspell_locales['en_GB'].append(HunspellLocale('en_GB'))
187
183
    hunspell_locales['en_US'].append(HunspellLocale('en_US'))
188
184
    hunspell_locales['eo'].append(HunspellLocale('eo'))
189
185
    hunspell_locales['es'].append(HunspellLocale('es_ES'))
190
186
    hunspell_locales['et'].append(HunspellLocale('et_EE'))
191
187
    hunspell_locales['eu'].append(HunspellLocale('eu_ES'))
192
188
    hunspell_locales['fa'].append(HunspellLocale('fa_IR'))
193
189
    hunspell_locales['fi'].append(HunspellLocale('fi_FI'))
194
190
    hunspell_locales['fr'].append(HunspellLocale('fr_FR'))
195
191
    hunspell_locales['gd'].append(HunspellLocale('gd_GB'))
196
192
    hunspell_locales['gl'].append(HunspellLocale('gl_ES'))
197
193
    hunspell_locales['he'].append(HunspellLocale('he_IL'))
198
194
    hunspell_locales['hr'].append(HunspellLocale('hr_HR'))
199
195
    hunspell_locales['hu'].append(HunspellLocale('hu_HU'))
200
196
    hunspell_locales['ia'].append(HunspellLocale('ia'))
201
197
    hunspell_locales['id'].append(HunspellLocale('id_ID'))
202
198
    hunspell_locales['it'].append(HunspellLocale('it_IT'))
203
199
    hunspell_locales['ja'].append(HunspellLocale('ja_JP'))
204
200
    hunspell_locales['jv'].append(HunspellLocale('jv_ID'))
205
201
    hunspell_locales['ka'].append(HunspellLocale('ka_GE'))
206
202
    hunspell_locales['ko'].append(HunspellLocale('ko_KR'))
207
203
    hunspell_locales['krl'].append(HunspellLocale('krl_RU'))
208
204
    hunspell_locales['la'].append(HunspellLocale('la'))
209
205
    hunspell_locales['lt'].append(HunspellLocale('lt_LT'))
210
206
    hunspell_locales['mr'].append(HunspellLocale('mr_IN'))
211
207
    hunspell_locales['ms'].append(HunspellLocale('ms_MY'))
212
208
    hunspell_locales['my'].append(HunspellLocale('my_MM'))
213
209
    hunspell_locales['nb'].append(HunspellLocale('nb_NO'))
214
210
    hunspell_locales['nds'].append(HunspellLocale('nds_DE'))
215
211
    hunspell_locales['nl'].append(HunspellLocale('nl_NL'))
216
212
    hunspell_locales['nn'].append(HunspellLocale('nn_NO'))
217
213
    hunspell_locales['oc'].append(HunspellLocale('oc_FR'))
218
214
    hunspell_locales['pl'].append(HunspellLocale('pl_PL'))
219
215
    hunspell_locales['pt'].append(HunspellLocale('pt_PT'))
220
216
    hunspell_locales['ro'].append(HunspellLocale('ro_RO'))
221
217
    hunspell_locales['ru'].append(HunspellLocale('ru_RU'))
222
218
    hunspell_locales['rw'].append(HunspellLocale('rw_RW'))
223
219
    hunspell_locales['si'].append(HunspellLocale('si_LK'))
224
220
    hunspell_locales['sk'].append(HunspellLocale('sk_SK'))
225
221
    hunspell_locales['sl'].append(HunspellLocale('sl_SI'))
226
222
    hunspell_locales['sr'].append(HunspellLocale('sr_RS'))
227
223
    hunspell_locales['sv'].append(HunspellLocale('sv_SE'))
228
224
    hunspell_locales['tr'].append(HunspellLocale('tr_TR'))
229
225
    hunspell_locales['uk'].append(HunspellLocale('uk_UA'))
230
226
    hunspell_locales['vi'].append(HunspellLocale('vi_VN'))
231
227
    hunspell_locales['zh_CN'].append(HunspellLocale('zh_CN'))
232
228
    hunspell_locales['zh_TW'].append(HunspellLocale('zh_TW'))
233
229
    if locale == 'all':
234
230
        print('Looking for Hunspell dictionaries')
235
231
        for locale in hunspell_locales:
236
232
            set_has_hunspell_locale(hunspell_locales[locale][0])
237
233
    else:
238
234
        print('Looking for Hunspell dictionary')
239
235
        set_has_hunspell_locale(hunspell_locales[locale][0])
240
236
241
237
242
238
def is_vowel(character):
243
239
    """Helper function for creating inflections of English words."""
244
240
    return character == 'a' or character == 'e' or character == 'i' \
245
241
        or character == 'o' or character == 'u' or character == 'y'
246
242
247
243
248
244
def make_english_plural(word):
249
245
    """Create plural forms for nouns.
250
246
251
247
    This will create a few nonsense entries for irregular plurals, but
252
248
    it's good enough for our purpose. Glossary contains pluralized
253
249
    terms, so we don't add any plural forms for strings ending in 's'.
254
250
255
251
    """
256
252
    result = ''
257
253
    if not word.endswith('s'):
258
254
        if word.endswith('y') and not is_vowel(word[-2:-1]):
259
255
            result = word[0:-1] + 'ies'
260
256
        elif word.endswith('z') or word.endswith('x') or word.endswith('ch') or word.endswith('sh') or word.endswith('o'):
261
257
            result = word + 'es'
262
258
        else:
263
259
            result = word + 's'
264
260
    return result
265
261
266
262
267
263
def make_english_verb_forms(word):
268
264
    """Create inflected forms of an English verb: -ed and -ing forms.
269
265
270
266
    Will create nonsense for irregular verbs.
271
267
272
268
    """
273
269
    result = []
274
270
    if word.endswith('e'):
275
271
        result.append(word[0:-1] + 'ing')
276
272
        result.append(word + 'd')
277
273
    elif is_vowel(word[-2:-1]) and not is_vowel(word[-1]):
278
274
        # The consonant is duplicated here if the last syllable is stressed.
279
275
        # We can't detect stress, so we add both variants.
280
276
        result.append(word + word[-1] + 'ing')
281
277
        result.append(word + 'ing')
282
278
        result.append(word + word[-1] + 'ed')
283
279
        result.append(word + 'ed')
284
280
    elif word.endswith('y') and not is_vowel(word[-2:-1]):
285
281
        result.append(word + 'ing')
286
282
        result.append(word[0:-1] + 'ed')
287
283
    else:
288
284
        result.append(word + 'ing')
289
285
        result.append(word + 'ed')
290
286
    # 3rd person s has the same pattern as noun plurals.
291
287
    # We ommitted words ending on s i the plural, so we add them here.
292
288
    if word.endswith('s'):
293
289
        result.append(word + 'es')
294
290
    else:
295
291
        result.append(make_english_plural(word))
296
292
    return result
297
293
298
294
299
295
def load_glossary(glossary_file, locale):
300
296
    """Build a glossary from the given Transifex glossary csv file for the
301
297
    given locale."""
302
298
    result = []
303
299
    counter = 0
304
300
    term_index = 0
305
301
    term_comment_index = 0
306
302
    wordclass_index = 0
307
303
    translation_index = 0
308
304
    comment_index = 0
309
305
    for row in read_csv_file(glossary_file):
310
306
        # Detect the column indices
311
307
        if counter == 0:
312
308
            colum_counter = 0
313
309
            for header in row:
314
310
                if header == 'term':
315
311
                    term_index = colum_counter
316
312
                elif header == 'comment':
317
313
                    term_comment_index = colum_counter
318
314
                elif header == 'pos':
319
315
                    wordclass_index = colum_counter
320
316
                elif header == 'translation_' + locale or header == locale:
321
317
                    translation_index = colum_counter
322
318
                elif header == 'comment_' + locale:
323
319
                    comment_index = colum_counter
324
320
                colum_counter = colum_counter + 1
325
321
        # If there is a translation, parse the entry
326
322
        # We also have some obsolete terms in the glossary that we want to
327
323
        # filter out.
328
324
        elif len(row[translation_index].strip()) > 0 and not row[term_comment_index].startswith('OBSOLETE'):
329
325
            if translation_index == 0:
330
326
                raise Exception(
331
327
                    'Locale %s is missing from glossary file.' % locale)
332
328
            if comment_index == 0:
333
329
                raise Exception(
334
330
                    'Comment field for locale %s is missing from glossary file.' % locale)
335
331
            entry = GlossaryEntry()
336
332
            entry.terms.append(row[term_index].strip())
337
333
            if row[wordclass_index] == 'Noun':
338
334
                plural = make_english_plural(entry.terms[0])
339
335
                if len(plural) > 0:
340
336
                    entry.terms.append(plural)
341
337
            elif row[wordclass_index] == 'Verb':
342
338
                verb_forms = make_english_verb_forms(entry.terms[0])
343
339
                for verb_form in verb_forms:
344
340
                    entry.terms.append(verb_form)
345
341
346
342
            entry.translations.append(row[translation_index].strip())
347
343
348
344
            # Misuse the comment field to provide a list of inflected forms.
349
345
            # Otherwise, we would get tons of false positive hits in the checks
350
346
            # later on and the translators would have our heads on a platter.
351
347
            delimiter = '|'
352
348
            if len(row[comment_index].strip()) > 1 and delimiter in row[comment_index]:
353
349
                inflections = row[comment_index].split(delimiter)
354
350
                for inflection in inflections:
355
351
                    entry.translations.append(inflection.strip())
356
352
357
353
            result.append(entry)
358
354
        counter = counter + 1
359
355
    return result
360
356
361
357
362
358
#############################################################################
363
359
# Term Checking                                                             #
364
360
#############################################################################
365
361
366
362
367
363
def contains_term(string, term):
368
364
    """Checks whether 'string' contains 'term' as a whole word.
369
365
370
366
    This check is case-ionsensitive.
371
367
372
368
    """
373
369
    result = False
374
370
    # Regex is slow, so we do this preliminary check
375
371
    if term.lower() in string.lower():
376
372
        # Now make sure that it's whole words!
377
373
        # We won't want to match "AI" against "again" etc.
378
374
        regex = re.compile('^|(.+\W)' + term + '(\W.+)|$', re.IGNORECASE)
379
375
        result = regex.match(string)
380
376
    return result
381
377
382
378
383
379
def source_contains_term(source_to_check, entry, glossary):
384
380
    """Checks if the source string contains the glossary entry while filtering
385
381
    out superstrings from the glossary, e.g. we don't want to check 'arena'
386
382
    against 'battle arena'."""
387
383
    source_to_check = source_to_check.lower()
388
384
    for term in entry.terms:
389
385
        term = term.lower()
390
386
        if term in source_to_check:
391
387
            source_regex = re.compile('.+[\s,.]' + term + '[\s,.].+')
392
388
            if source_regex.match(source_to_check):
393
389
                for entry2 in glossary:
394
390
                    if entry.terms[0] != entry2.terms[0]:
395
391
                        for term2 in entry2.terms:
396
392
                            term2 = term2.lower()
397
393
                            if term2 != term and term in term2 and term2 in source_to_check:
398
394
                                source_to_check = source_to_check.replace(
399
395
                                    term2, '')
400
396
                # Check if the source still contains the term to check
401
397
                return contains_term(source_to_check, term)
402
398
    return False
403
399
404
400
405
401
def append_hunspell_stems(hunspell_locale, translation):
406
402
    """ Use hunspell to append the stems for terms found = less work for glossary editors.
407
403
    The effectiveness of this check depends on how good the hunspell data is."""
408
404
    try:
409
405
        process = Popen(['hunspell', '-d', hunspell_locale,
410
406
                         '-s'], stdout=PIPE, stdin=PIPE)
411
407
        hunspell_result = process.communicate(translation)
412
408
        if hunspell_result[0] != '':
413
409
            translation = ' '.join([translation, hunspell_result[0]])
414
410
    except CalledProcessError:
415
411
        print('Failed to run hunspell for locale: ' + hunspell_locale)
416
412
    return translation
417
413
418
414
419
415
def translation_has_term(entry, target):
420
416
    """Verify the target translation against all translation variations from
421
417
    the glossary."""
422
418
    result = False
423
419
    for translation in entry.translations:
424
420
        if contains_term(target, translation):
425
421
            result = True
426
422
            break
427
423
    return result
428
424
429
425
430
426
def check_file(csv_file, glossaries, locale, po_file):
431
427
    """Run the actual check."""
432
428
    translations = read_csv_file(csv_file)
433
429
    source_index = 0
434
430
    target_index = 0
435
431
    location_index = 0
436
432
    hits = []
437
433
    counter = 0
438
434
    has_hunspell = True
439
435
    hunspell_locale = get_hunspell_locale(locale)
440
436
    for row in translations:
441
437
        # Detect the column indices
442
438
        if counter == 0:
443
439
            colum_counter = 0
444
440
            for header in row:
445
441
                if header == 'source':
446
442
                    source_index = colum_counter
447
443
                elif header == 'target':
448
444
                    target_index = colum_counter
449
445
                elif header == 'location':
450
446
                    location_index = colum_counter
451
447
                colum_counter = colum_counter + 1
452
448
        else:
453
449
            for entry in glossaries[locale][0]:
454
450
                # Check if the source text contains the glossary term.
455
451
                # Filter out superstrings, e.g. we don't want to check
456
452
                # "arena" against "battle arena"
457
453
                if source_contains_term(row[source_index], entry, glossaries[locale][0]):
458
454
                    # Now verify the translation against all translation
459
455
                    # variations from the glossary
460
456
                    term_found = translation_has_term(entry, row[target_index])
461
457
                    # Add Hunspell stems for better matches and try again
462
458
                    # We do it here because the Hunspell manipulation is slow.
463
459
                    if not term_found and hunspell_locale != '':
464
460
                        target_to_check = append_hunspell_stems(
465
461
                            hunspell_locale, row[target_index])
466
462
                        term_found = translation_has_term(
467
463
                            entry, target_to_check)
468
464
                    if not term_found:
469
465
                        hit = FailedTranslation()
470
466
                        hit.source = row[source_index]
471
467
                        hit.target = row[target_index]
472
468
                        hit.location = row[location_index]
473
469
                        hit.term = entry.terms[0]
474
470
                        hit.translation = entry.translations[0]
475
471
                        hit.locale = locale
476
472
                        hit.po_file = po_file
477
473
                        hits.append(hit)
478
474
        counter = counter + 1
479
475
    return hits
480
476
481
477
482
478
#############################################################################
483
479
# Main Loop                                                                 #
484
480
#############################################################################
485
481
486
482
487
483
def check_translations_with_glossary(input_path, output_path, glossary_file, only_locale):
488
484
    """Main loop.
489
485
490
486
    Loads the Transifex and Hunspell glossaries, converts all po files
491
487
    for languages that have glossary entries to temporary csv files,
492
488
    runs the check and then reports any hits to csv files.
493
489
494
490
    """
495
491
    print('Locale: ' + only_locale)
496
492
    temp_path = make_path(output_path, 'temp_glossary')
497
493
    hits = []
498
494
    locale_list = defaultdict(list)
499
495
500
496
    glossaries = defaultdict(list)
501
497
    load_hunspell_locales(only_locale)
502
498
503
499
    source_directories = sorted(os.listdir(input_path), key=str.lower)
504
500
    for dirname in source_directories:
505
501
        dirpath = os.path.join(input_path, dirname)
506
502
        if os.path.isdir(dirpath):
507
503
            source_files = sorted(os.listdir(dirpath), key=str.lower)
508
504
            sys.stdout.write("\nChecking text domain '" + dirname + "': ")
509
505
            sys.stdout.flush()
510
506
            failed = 0
511
507
            for source_filename in source_files:
512
508
                po_file = dirpath + '/' + source_filename
513
509
                if source_filename.endswith('.po'):
514
510
                    locale = source_filename[0:-3]
515
511
                    if only_locale == 'all' or locale == only_locale:
516
512
                        # Load the glossary if we haven't seen this locale
517
513
                        # before
518
514
                        if len(glossaries[locale]) < 1:
519
515
                            sys.stdout.write(
520
516
                                '\nLoading glossary for ' + locale)
521
517
                            glossaries[locale].append(
522
518
                                load_glossary(glossary_file, locale))
523
519
                            sys.stdout.write(' - %d entries ' %
524
520
                                             len(glossaries[locale][0]))
525
521
                            sys.stdout.flush()
526
522
                        # Only bother with locales that have glossary entries
527
523
                        if len(glossaries[locale][0]) > 0:
528
524
                            sys.stdout.write(locale + ' ')
529
525
                            sys.stdout.flush()
530
526
                            if len(locale_list[locale]) < 1:
531
527
                                locale_list[locale].append(locale)
532
528
                            csv_file = os.path.abspath(os.path.join(
533
529
                                temp_path, dirname + '_' + locale + '.csv'))
534
530
                            # Convert to csv for easy parsing
535
531
                            call(['po2csv', '--progress=none', po_file, csv_file])
536
532
537
533
                            # Now run the actual check
538
534
                            current_hits = check_file(
539
535
                                csv_file, glossaries, locale, dirname)
540
536
                            for hit in current_hits:
541
537
                                hits.append(hit)
542
538
543
539
                            # The csv file is no longer needed, delete it.
544
540
                            os.remove(csv_file)
545
541
546
542
    hits = sorted(hits, key=lambda FailedTranslation: [
547
543
                  FailedTranslation.locale, FailedTranslation.translation])
548
544
    for locale in locale_list:
549
545
        locale_result = '"glossary_term","glossary_translation","source","target","file","location"\n'
550
546
        counter = 0
551
547
        for hit in hits:
552
548
            if hit.locale == locale:
553
549
                row = '"%s","%s","%s","%s","%s","%s"\n' % (
554
550
                    hit.term, hit.translation, hit.source, hit.target, hit.po_file, hit.location)
555
551
                locale_result = locale_result + row
556
552
                counter = counter + 1
557
553
        dest_filepath = output_path + '/glossary_check_' + locale + '.csv'
558
554
        with open(dest_filepath, 'wt') as dest_file:
559
555
            dest_file.write(locale_result)
560
556
        # Uncomment this line to print a statistic of the number of hits for each locale
561
557
        # print("%s\t%d"%(locale, counter))
562
558
563
559
    delete_path(temp_path)
564
560
    return 0
565
561
566
562
567
563
def main():
568
564
    """Checks whether we are in the correct directory and everything's there,
569
565
    then runs a glossary check over all PO files."""
570
566
    if len(sys.argv) == 2 or len(sys.argv) == 3:
571
567
        print('Running glossary checks:')
572
568
    else:
573
569
        print(
574
570
            'Usage: glossary_checks.py <relative-path-to-glossary> [locale]')
575
571
        return 1
576
572
577
573
    try:
578
574
        print('Current time: %s' % time.ctime())
579
575
        # Prepare the paths
580
576
        glossary_file = os.path.abspath(os.path.join(
581
577
            os.path.dirname(__file__), sys.argv[1]))
582
578
        locale = 'all'
583
579
        if len(sys.argv) == 3:
584
580
            locale = sys.argv[2]
585
581
586
582
        if (not (os.path.exists(glossary_file) and os.path.isfile(glossary_file))):
587
583
            print('There is no glossary file at ' + glossary_file)
588
584
            return 1
589
585
590
586
        input_path = os.path.abspath(os.path.join(
591
587
            os.path.dirname(__file__), '../po'))
592
588
        output_path = make_path(os.path.dirname(__file__), '../po_validation')
593
589
        result = check_translations_with_glossary(
594
590
            input_path, output_path, glossary_file, locale)
595
591
        print('Current time: %s' % time.ctime())
596
592
        return result
597
593
598
594
    except Exception:
599
595
        print('Something went wrong:')
600
596
        traceback.print_exc()
601
597
        delete_path(make_path(output_path, 'temp_glossary'))
602
598
        return 1
603
599
604
600
if __name__ == '__main__':
605
601
    sys.exit(main())
Reviewer	Review Type	Date Requested	Status
GunChleoc			Needs Resubmitting on 2016-12-08
Review via email: mp+312430@code.launchpad.net