diff --git a/tools/translation-progress.py b/tools/translation-progress.py index b43fff2e..141fbe97 100755 --- a/tools/translation-progress.py +++ b/tools/translation-progress.py @@ -8,17 +8,19 @@ # program. # # Each translatable string is put in one of three categories: -# - Translated: The string is present in both the English and the other -# language translations and its value in the other language -# translation is different than in English. +# - Translated: The string is present in both the English and the other +# language translations and its value in the other language +# translation is different than in English. # - Not translated: The string is present in both the English and the other # language translations but its value in the other language # translation is the same as in English. -# - Missing: The string is present only in the English translation and not in -# the other language translation. +# - Missing: The string is present only in the English translation and +# not in the other language translation. +# - Misplaced: The string is located in the wrong file. Both translated +# and not translated strings are considered. # # Strings with the translatable="false" attribute or whose names are in the -# string_blacklist are ignored. +# _ignored_strings are ignored. # # There are two output formats: # - CSV: Machine readable, can be used in CI scripts. This is the default @@ -29,6 +31,8 @@ # - Not Translated The number of not translated strings. # - Missing The number of missing strings. # - Completion The percentage of translated strings. +# - Misplaced The number of misplaced strings (only if the +# respective option is provided). # - Human readable: This format is not as structured as CSV but is easier for # humans to read and provides some extra information. This # format is used when increasing the verbosity level. The @@ -43,7 +47,7 @@ from glob import glob from os import path from sys import exit -from typing import Dict, List +from typing import Dict, List, Tuple import argparse import xml.etree.ElementTree as ET @@ -51,75 +55,137 @@ # These paths are relative to the script and should be changed accordingly if # the script is moved -english_translation = '../app/src/main/res/values/strings.xml' -other_translations = '../app/src/main/res/values-*/strings.xml' +_english_dir = '../app/src/main/res/values' +_other_lang_dirs = '../app/src/main/res/values-*' -# String names in this list will not be considered -string_blacklist = [ - 'setting_available_simple_string_codes', - 'setting_example_simple_string', +# The glob pattern used to find string XML files +_string_xml_glob = 'strings*.xml' + +# Directories to ignore when searching for other languages +_ignored_lang_dirs = [ + 'values-night', + 'values-v21', + 'values-v27', +] + +# String XML files to ignore when checking translated strings +_ignored_string_xml = [ + 'strings_not_translated.xml', ] +# String names to ignore when checking translated strings +_ignored_strings = [ + 'setting_available_simple_string_codes', + 'setting_example_simple_string', +] -def parse_arguments(): + +# A dictionary from string names to tuples of string values and filenames, +# e.g. 'action_search' : ('Search', 'strings_main_graphs_map_about.xml') +StringsXML = Dict[str, Tuple[str, str]] + +# A dictionary containing statistics for a single language's translation +# status. The valid keys are 'translated', 'not_translated', 'missing' and +# 'misplaced' and their values are lists of string names that fall into each +# category. It may also contain a key 'dirname' with an str value containing +# the path to the strings.xml file it refers to. +SingleLangStats = Dict[str, List[str]] + +# A dictionary with language names as keys and SingleLangStats as values. It +# contains all the gathered data about all the languages. +LangStats = Dict[str, SingleLangStats] + + + +def script_dir() -> str: """ - Parse command line arguments. + Return the path to the directory containing this file. - :return: A Namespace containing the arguments and their values. - :rtype: argparse.Namespace + :return: The full path to the directory containing this file with a + trailing slash. + :rtype: str """ - parser = argparse.ArgumentParser(description='Show translation progress for Forecastie') - parser.add_argument('--verbose', '-v', action='count', default=0, - help='Produce more verbose output. Extra occurrences ' - 'of this option, up to 3 total, increase the amount ' - 'of information shown.') - parser.add_argument('language', metavar='LANGUAGE', type=str, nargs='?', - default=None, - help='Only show translation progress for language ' - 'LANGUAGE. LANGUAGE should be one of the suffixes of ' - '../app/src/main/res/values-*') - args = parser.parse_args() - return args + return path.dirname(path.realpath(__file__)) + '/' + + + +def other_language_dirs() -> List[str]: + """ + Find the directories containing the string XML files for languages other + than English. + + :return: A list of paths to the directories containing the string XML files. + :rtype: List[str] + """ + dirnames = [x.rstrip('/') for x in glob(script_dir() + _other_lang_dirs)] + return sorted([x for x in dirnames if path.basename(x) not in _ignored_lang_dirs]) -# A dictionary with string names and string values, -# e.g. 'action_search' : 'Search' -StringsXML = Dict[str, str] +def string_xml_files(dirname: str) -> List[str]: + """ + Find the language string XML files contained in dirname. -def parse_strings_xml(filename: str) -> StringsXML: + :param str filename: The directory containing the string XML files. + :return: A list of paths to the XML files. + :rtype: List[str] """ - Parse a strings.xml into a dictionary. + filenames = glob(dirname + '/' + _string_xml_glob) + return sorted([x for x in filenames if path.basename(x) not in _ignored_string_xml]) + + + +def english_xml_files() -> List[str]: + """ + Find the English string XML files. + + :return: A list of paths to the English XML files. + :rtype: List[str] + """ + return string_xml_files(script_dir() + _english_dir) + - :param str filename: The path to the strings.xml file. + +def get_lang_name(dirname: str) -> str: + """ + Return the language code given the directory containing its string XML files. + + :param str dirname: The directory containing the string XML files. + :return: The language code extracted from dirname. + :rtype: str + """ + return path.basename(dirname.rstrip('/'))[7:] + + + +def parse_strings_xml(filenames: List[str]) -> StringsXML: + """ + Parse the supplied strings XML files into a single dictionary. + + :param List[str] filenames: The paths to the strings XML files. :return: A dictionary with the string names and values as keys and values. :rtype: StringsXML """ d = {} - # Read in the strings.xml data - xml_root = ET.parse(filename).getroot() - # Iterate over all translated strings - for xml_child in xml_root: - if xml_child.tag == 'string': - if 'translatable' in xml_child.attrib and xml_child.attrib['translatable'] == "false": - continue - if 'name' in xml_child.attrib: - string_name = xml_child.attrib['name'] - if string_name not in string_blacklist: - string_value = xml_child.text - d[string_name] = string_value + for filename in filenames: + # Read in the strings.xml data + xml_root = ET.parse(filename).getroot() + # Iterate over all translated strings + for xml_child in xml_root: + if xml_child.tag == 'string': + if 'translatable' in xml_child.attrib \ + and xml_child.attrib['translatable'] == "false": + continue + if 'name' in xml_child.attrib: + string_name = xml_child.attrib['name'] + if string_name not in _ignored_strings: + string_value = xml_child.text + d[string_name] = (string_value, path.basename(filename)) return d -# A dictionary containing statistics for a single language's translation -# status. The valid keys are 'translated', 'not_translated' and 'missing' and -# their values are lists of string names that fall into each category. It may -# also contain a key 'filename' with an str value containing the path to the -# strings.xml file it refers to. -SingleLangStats = Dict[str, List[str]] - def compare_strings_xml(eng: StringsXML, other: StringsXML) -> SingleLangStats: """ Compare the translation status of a language with English. @@ -130,47 +196,55 @@ def compare_strings_xml(eng: StringsXML, other: StringsXML) -> SingleLangStats: keys and values. :rtype: SingleLangStats """ - result = {'translated': [], 'not_translated': [], 'missing': []} + result = {'translated': [], 'not_translated': [], 'missing': [], 'misplaced': []} # Iterate over all English strings for s in eng: # Strings are considered translated if they exist in the other # strings.xml and their value is different than the English one if s in other: - if other[s] != eng[s]: + # Test if the value of the string differs from the English one + if other[s][0] != eng[s][0]: result['translated'].append(s) else: result['not_translated'].append(s) + # Test if the file the string was in differs from the English one + if other[s][1] != eng[s][1]: + result['misplaced'].append(s) else: result['missing'].append(s) return result -# A dictionary with language names as keys and SingleLangStats as values. It -# contains all the gathered data about all the languages. -LangStats = Dict[str, SingleLangStats] - -def csv_print(language_stats: LangStats): +def csv_print(language_stats: LangStats, show_misplaced: bool = False): """ Print language translation status in CSV format. :param LangStats language_stats: The data to be printed. + :param bool show_misplaced: Show data for misplaced strings. """ - print('Language,Filename,Translated,Not Translated,Missing,Completion') + header = 'Language,Filename,Translated,Not Translated,Missing' + if show_misplaced: + header += ',Misplaced' + header += ',Completion' + print(header) for lang in language_stats: translated = len(language_stats[lang]['translated']) not_translated = len(language_stats[lang]['not_translated']) missing = len(language_stats[lang]['missing']) + misplaced = len(language_stats[lang]['misplaced']) total = translated + not_translated + missing completion = int(100 * translated / total) - print(lang + ',' - + '"' + language_stats[lang]['filename'] + '"' + ',' - + str(translated) + ',' - + str(not_translated) + ',' - + str(missing) + ',' - + str(completion)) - -def detailed_print(language_stats: LangStats, verbosity_level: int = 1): + line = ','.join([lang, '"' + language_stats[lang]['dirname'] + '"', + str(translated), str(not_translated), str(missing)]) + if show_misplaced: + line += ',' + str(misplaced) + line += ',' + str(completion) + print(line) + + + +def detailed_print(language_stats: LangStats, verbosity_level: int = 1, show_misplaced: bool = False): """ Print language translation status in human readable format. @@ -182,31 +256,63 @@ def detailed_print(language_stats: LangStats, verbosity_level: int = 1): of strings that are 'not_translated' or 'missing'. A value of 3 or more with also show the names of strings that are 'translated'. + :param bool show_misplaced: Show data for misplaced strings. """ + num_pc_fmt = '{:3d} ({:3d}%)' for lang in language_stats: translated = len(language_stats[lang]['translated']) not_translated = len(language_stats[lang]['not_translated']) missing = len(language_stats[lang]['missing']) + misplaced = len(language_stats[lang]['misplaced']) total = translated + not_translated + missing completion = int(100 * translated / total) + not_translated_pc = int(100 * not_translated / total) + missing_pc = int(100 * missing / total) + misplaced_pc = int(100 * misplaced / (translated + not_translated)) print('Language: ' + lang) - print(' File: ' + language_stats[lang]['filename']) - print(' Translated: ' + str(translated) - + ' (' + str(int(100 * translated / total)) + ' %)') + print(' File: ' + language_stats[lang]['dirname']) + print((' Translated: ' + num_pc_fmt).format(translated, completion)) if verbosity_level > 2: for s in language_stats[lang]['translated']: print(' ' + s) - print(' Not translated: ' + str(not_translated) - + ' (' + str(int(100 * not_translated / total)) + ' %)') + print((' Not translated: ' + num_pc_fmt).format(not_translated, not_translated_pc)) if verbosity_level > 1: for s in language_stats[lang]['not_translated']: print(' ' + s) - print(' Missing: ' + str(missing) - + ' (' + str(int(100 * missing / total)) + ' %)') + print((' Missing: ' + num_pc_fmt).format(missing, missing_pc)) if verbosity_level > 1: for s in language_stats[lang]['missing']: print(' ' + s) - print(' Completion: ' + str(completion) + ' %') + print((' Misplaced: ' + num_pc_fmt).format(misplaced, misplaced_pc)) + if verbosity_level > 1: + for s in language_stats[lang]['misplaced']: + print(' ' + s) + print(' Completion: {:3d}%'.format(completion)) + + + +def parse_arguments(): + """ + Parse command line arguments. + + :return: A Namespace containing the arguments and their values. + :rtype: argparse.Namespace + """ + parser = argparse.ArgumentParser(description='Show translation progress for Forecastie') + parser.add_argument('--misplaced', '-m', action='store_true', + help='Gather and show statistics about misplaced' + 'strings (strings located in the wrong file).') + parser.add_argument('--verbose', '-v', action='count', default=0, + help='Produce more verbose output. Extra occurrences ' + 'of this option, up to 3 total, increase the amount ' + 'of information shown.') + parser.add_argument('language', metavar='LANGUAGE', type=str, nargs='?', + default=None, + help='Only show translation progress for language ' + 'LANGUAGE. LANGUAGE should be one of the suffixes of ' + + _other_lang_dirs) + args = parser.parse_args() + return args @@ -214,45 +320,36 @@ def detailed_print(language_stats: LangStats, verbosity_level: int = 1): # Parse command line arguments args = parse_arguments() - # Add the script directory before relative paths to allow calling the - # script from anywhere - script_dir = path.dirname(path.realpath(__file__)) + '/' - english_translation = script_dir + english_translation - other_translations = glob(script_dir + other_translations) - # Read in the English translation - english_strings = parse_strings_xml(english_translation) + english_strings = parse_strings_xml(english_xml_files()) # Iterate over all the other translations language_stats = {} - for filename in other_translations: - # Get the language name by removing a prefix and a suffix from the - # filename - prefix_end_idx = len(script_dir)+len('../app/src/main/res/values-') - suffix_start_idx = len('/strings.xml') - language_name = filename[prefix_end_idx:-suffix_start_idx] + for dirname in other_language_dirs(): + # Get the language name from the dirname suffix + language_name = get_lang_name(dirname) # If a specific language was specified then skip all others if args.language and language_name.lower() != args.language.lower(): continue - # Read in the other translation - other_strings = parse_strings_xml(filename) + # Read in the translation in this language + other_strings = parse_strings_xml(string_xml_files(dirname)) # Compare against the English translation res = compare_strings_xml(english_strings, other_strings) - # Add filename info to the results and add the results to the language + # Add dirname info to the results and add the results to the language # dictionary - res['filename'] = filename[len(script_dir):] + res['dirname'] = path.basename(dirname) language_stats[language_name] = res # Print the results if language_stats: if args.verbose == 0: - csv_print(language_stats) + csv_print(language_stats, args.misplaced) else: - detailed_print(language_stats, args.verbose) + detailed_print(language_stats, args.verbose, args.misplaced) else: print('Error: language ' + args.language + ' could not be found') exit(1)