#!/usr/bin/env python3 import argparse import os import re import shutil from collections import OrderedDict EXTRACT_TAGS = ["description", "brief_description", "member", "constant", "theme_item", "link"] HEADER = """\ # LANGUAGE translation of the Godot Engine class reference # Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. # Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). # This file is distributed under the same license as the Godot source code. # # FIRST AUTHOR , YEAR. # #, fuzzy msgid "" msgstr "" "Project-Id-Version: Godot Engine class reference\\n" "Content-Type: text/plain; charset=UTF-8\\n" "Content-Transfer-Encoding: 8-bit\\n" """ ## import sys sys.modules["_elementtree"] = None import xml.etree.ElementTree as ET ## override the parser to get the line number class LineNumberingParser(ET.XMLParser): def _start(self, *args, **kwargs): ## Here we assume the default XML parser which is expat ## and copy its element position attributes into output Elements element = super(self.__class__, self)._start(*args, **kwargs) element._start_line_number = self.parser.CurrentLineNumber element._start_column_number = self.parser.CurrentColumnNumber element._start_byte_index = self.parser.CurrentByteIndex return element def _end(self, *args, **kwargs): element = super(self.__class__, self)._end(*args, **kwargs) element._end_line_number = self.parser.CurrentLineNumber element._end_column_number = self.parser.CurrentColumnNumber element._end_byte_index = self.parser.CurrentByteIndex return element ## class Desc: def __init__(self, line_no, msg, desc_list=None): ## line_no : the line number where the desc is ## msg : the description string ## desc_list : the DescList it belongs to self.line_no = line_no self.msg = msg self.desc_list = desc_list class DescList: def __init__(self, doc, path): ## doc : root xml element of the document ## path : file path of the xml document ## list : list of Desc objects for this document self.doc = doc self.path = path self.list = [] def print_error(error): print("ERROR: {}".format(error)) ## build classes with xml elements recursively def _collect_classes_dir(path, classes): if not os.path.isdir(path): print_error("Invalid directory path: {}".format(path)) exit(1) for _dir in map(lambda dir: os.path.join(path, dir), os.listdir(path)): if os.path.isdir(_dir): _collect_classes_dir(_dir, classes) elif os.path.isfile(_dir): if not _dir.endswith(".xml"): # print("Got non-.xml file '{}', skipping.".format(path)) continue _collect_classes_file(_dir, classes) ## opens a file and parse xml add to classes def _collect_classes_file(path, classes): if not os.path.isfile(path) or not path.endswith(".xml"): print_error("Invalid xml file path: {}".format(path)) exit(1) print("Collecting file: {}".format(os.path.basename(path))) try: tree = ET.parse(path, parser=LineNumberingParser()) except ET.ParseError as e: print_error("Parse error reading file '{}': {}".format(path, e)) exit(1) doc = tree.getroot() if "name" in doc.attrib: if "version" not in doc.attrib: print_error("Version missing from 'doc', file: {}".format(path)) name = doc.attrib["name"] if name in classes: print_error("Duplicate class {} at path {}".format(name, path)) exit(1) classes[name] = DescList(doc, path) else: print_error("Unknown XML file {}, skipping".format(path)) ## regions are list of tuples with size 3 (start_index, end_index, indent) ## indication in string where the codeblock starts, ends, and it's indent ## if i inside the region returns the indent, else returns -1 def _get_xml_indent(i, regions): for region in regions: if region[0] < i < region[1]: return region[2] return -1 ## find and build all regions of codeblock which we need later def _make_codeblock_regions(desc, path=""): code_block_end = False code_block_index = 0 code_block_regions = [] while not code_block_end: code_block_index = desc.find("[codeblock]", code_block_index) if code_block_index < 0: break xml_indent = 0 while True: ## [codeblock] always have a trailing new line and some tabs ## those tabs are belongs to xml indentations not code indent if desc[code_block_index + len("[codeblock]\n") + xml_indent] == "\t": xml_indent += 1 else: break end_index = desc.find("[/codeblock]", code_block_index) if end_index < 0: print_error("Non terminating codeblock: {}".format(path)) exit(1) code_block_regions.append((code_block_index, end_index, xml_indent)) code_block_index += 1 return code_block_regions def _strip_and_split_desc(desc, code_block_regions): desc_strip = "" ## a stripped desc msg total_indent = 0 ## code indent = total indent - xml indent for i in range(len(desc)): c = desc[i] if c == "\n": c = "\\n" if c == '"': c = '\\"' if c == "\\": c = "\\\\" ## is invalid for msgmerge if c == "\t": xml_indent = _get_xml_indent(i, code_block_regions) if xml_indent >= 0: total_indent += 1 if xml_indent < total_indent: c = "\\t" else: continue else: continue desc_strip += c if c == "\\n": total_indent = 0 return desc_strip ## make catlog strings from xml elements def _make_translation_catalog(classes): unique_msgs = OrderedDict() for class_name in classes: desc_list = classes[class_name] for elem in desc_list.doc.iter(): if elem.tag in EXTRACT_TAGS: if not elem.text or len(elem.text) == 0: continue line_no = elem._start_line_number if elem.text[0] != "\n" else elem._start_line_number + 1 desc_str = elem.text.strip() code_block_regions = _make_codeblock_regions(desc_str, desc_list.path) desc_msg = _strip_and_split_desc(desc_str, code_block_regions) desc_obj = Desc(line_no, desc_msg, desc_list) desc_list.list.append(desc_obj) if desc_msg not in unique_msgs: unique_msgs[desc_msg] = [desc_obj] else: unique_msgs[desc_msg].append(desc_obj) return unique_msgs ## generate the catlog file def _generate_translation_catalog_file(unique_msgs, output): with open(output, "w", encoding="utf8") as f: f.write(HEADER) for msg in unique_msgs: if len(msg) == 0: continue ## ignore f.write("#:") desc_list = unique_msgs[msg] for desc in desc_list: path = desc.desc_list.path.replace("\\", "/") if path.startswith("./"): path = path[2:] f.write(" {}:{}".format(path, desc.line_no)) f.write("\n") f.write('msgid "{}"\n'.format(msg)) f.write('msgstr ""\n\n') ## TODO: what if 'nt'? if os.name == "posix": print("Wrapping template at 79 characters for compatibility with Weblate.") os.system("msgmerge -w79 {0} {0} > {0}.wrap".format(output)) shutil.move("{}.wrap".format(output), output) def main(): parser = argparse.ArgumentParser() parser.add_argument("--path", "-p", default=".", help="The directory containing XML files to collect.") parser.add_argument("--output", "-o", default="translation_catlog.pot", help="The path to the output file.") args = parser.parse_args() output = os.path.abspath(args.output) if not os.path.isdir(os.path.dirname(output)) or not output.endswith(".pot"): print_error("Invalid output path: {}".format(output)) exit(1) if not os.path.isdir(args.path): print_error("Invalid working directory path: {}".format(args.path)) exit(1) os.chdir(args.path) print("Current working dir: {}\n".format(os.getcwd())) classes = OrderedDict() ## dictionary of key=class_name, value=DescList objects _collect_classes_dir(".", classes) classes = OrderedDict(sorted(classes.items(), key=lambda kv: kv[0].lower())) unique_msgs = _make_translation_catalog(classes) _generate_translation_catalog_file(unique_msgs, output) if __name__ == "__main__": main()