diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 706a303..e44fba0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,17 +17,14 @@ jobs: with: source-dir: . build-dir: build + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 - name: Create TeX file run: | mkdir out - build/language > out/ipa.tex - cp data/*.tex out/ - - name: Construct `table.pdf` - uses: xu-cheng/latex-action@v3 - with: - working_directory: out - root_file: table.tex - latexmk_use_xelatex: true + python python/moire_converter.py --input data/text.moi --output ${OUTPUT_DIRECTORY}/text.tex --format tex - name: Construct `text.pdf` uses: xu-cheng/latex-action@v3 with: diff --git a/python/moire/default.py b/python/moire/default.py new file mode 100644 index 0000000..f467452 --- /dev/null +++ b/python/moire/default.py @@ -0,0 +1,169 @@ +import sys +from argparse import ArgumentParser, Namespace +from typing import Any, Dict, List, Set, Tuple +from textwrap import dedent + +from moire.moire import Moire + +__author__ = "Sergey Vartanov" +__email__ = "me@enzet.ru" + +depth = 0 +status = {} +BLOCK_TAGS: Set[str] = { + "block", "body", "code", "title", "number", "list", "image", "table" +} # fmt: skip +Arguments = List[Any] + + +class TagNotImplementedError(NotImplementedError): + """Tag is not implemented in the parser.""" + + def __init__(self, tag: str = "") -> None: + self.tag: str = tag + + def __str__(self) -> str: + return f"Tag \\{self.tag} is not implemented in the parser" + + +class Default(Moire): + """Default tag declaration.""" + + def __init__(self) -> None: + super().__init__() + + def title(self, arg: Arguments) -> str: + """Document title.""" + return "" + + def header(self, arg: Arguments, level: int) -> str: + """Header. + + Arguments:
? + """ + raise TagNotImplementedError("header") + + def m(self, arg: Arguments) -> str: + """Monospaced text.""" + raise TagNotImplementedError("m") + + +class DefaultTeX(Default): + """TeX syntax.""" + + name = "Tex" + id_: str = "tex" + extension = "tex" + + escape_symbols = { + "_": "\\_", + } + block_tags = BLOCK_TAGS + headers: List[str] = [ + "section", "subsection", "subsubsection", "paragraph", "subparagraph" + ] # fmt: skip + + def body(self, arg: Arguments) -> str: + s = dedent( + """\ + \\documentclass[twoside,psfig]{article} + \\usepackage[utf8]{inputenc} + \\usepackage[russian]{babel} + \\usepackage{enumitem} + \\usepackage{float} + \\usepackage[margin=3cm,hmarginratio=1:1,top=32mm,columnsep=20pt] + {geometry} + \\usepackage{graphicx} + \\usepackage{hyperref} + \\usepackage{multicol} + \\begin{document} + """ + ) + s += self.parse(arg[0], in_block=True) + s += "\\end {document}" + return s + + def title(self, arg: Arguments) -> str: + s = f"\\title{{{self.parse(arg[0])}}}\n" + s += "\\maketitle" + return s + + def author(self, arg: Arguments) -> str: + return f"\\author{{{self.parse(arg[0])}}}" + + def header(self, arg: Arguments, number: int) -> str: + if number < 6: + return f"\\{self.headers[number - 1]}{{{self.parse(arg[0])}}}" + return self.parse(arg[0]) + + def table(self, arg: Arguments) -> str: + s = "\\begin{table}[h]\n\\begin{center}\n\\begin{tabular}{|" + max_tds = 0 + for tr in arg: + if isinstance(tr, list): + tds = 0 + for td in tr: + if isinstance(td, list): + tds += 1 + if tds > max_tds: + max_tds = tds + for k in range(max_tds): + s += "l|" + s += "}\n\\hline\n" + for tr in arg: + if isinstance(tr, list): + tds = [] + for td in tr: + if isinstance(td, list): + tds.append(td) + for td in tds[:-1]: + s += self.parse(td) + " & " + s += self.parse(tds[-1]) + s += " \\\\\n\\hline\n" + s += "\\end{tabular}\n\\end{center}\n\\end{table}\n" + return s + + def list__(self, arg: Arguments) -> str: + s = "\\begin{itemize}\n" + for item in arg: + s += f"\\item {self.parse(item)}\n\n" + s += "\\end{itemize}\n" + return s + + def abstract(self, arg: Arguments) -> str: + return ( + "\\begin{abstract}\n\n" + + self.parse(arg[0], in_block=True) + + "\\end{abstract}\n\n" + ) + + def date(self, arg: Arguments) -> str: + pass + + def text(self, arg: Arguments) -> str: + return self.parse(arg[0]) + "\n\n" + + def m(self, arg: Arguments) -> str: + return "{\\tt " + self.parse(arg[0]) + "}" + + +if __name__ == "__main__": + parser: ArgumentParser = ArgumentParser() + + parser.add_argument("-i", "--input", help="Moire input file", required=True) + parser.add_argument("-o", "--output", help="output file", required=True) + parser.add_argument("-f", "--format", help="output format", required=True) + + options: Namespace = parser.parse_args(sys.argv[1:]) + + with open(options.input, "r") as input_file: + converter: Moire = getattr(sys.modules[__name__], options.format)() + output: str = converter.convert(input_file.read()) + + if not output: + print("Fatal: output was no produced.") + sys.exit(1) + + with open(options.output, "w+") as output_file: + output_file.write(output) + print(f"Converted to {options.output}.") diff --git a/python/moire/main.py b/python/moire/main.py new file mode 100755 index 0000000..0b4d3e2 --- /dev/null +++ b/python/moire/main.py @@ -0,0 +1,64 @@ +""" +Command line Python tool for file conversion from Moire markup language to other +formats, such as HTML, TeX, etc. +""" + +import logging +import sys +from argparse import ArgumentParser, Namespace +from pathlib import Path +from typing import List, Optional + +from moire.default import Default +from moire.moire import Moire + +__author__ = "Sergey Vartanov" +__email__ = "me@enzet.ru" + + +def main(arguments: List[str] = None, top_class=None): + if not arguments: + arguments = sys.argv[1:] + if not top_class: + top_class = Default + + logging.basicConfig(level=logging.INFO, format="%(message)s") + + parser: ArgumentParser = ArgumentParser() + + parser.add_argument("-i", "--input", help="Moire input file", required=True) + parser.add_argument("-o", "--output", help="output file") + parser.add_argument("-f", "--format", help="output format", required=True) + parser.add_argument("--wrap", action="store_true", default=True) + + options: Namespace = parser.parse_args(arguments) + + converter: Optional[Moire] = None + for class_ in top_class.__subclasses__(): + if class_.id_ == options.format: + converter = class_() + + if not converter: + logging.fatal( + f"No converter class found for format `{options.format}`." + ) + exit(1) + + with Path(options.input).open() as input_file: + converter.file_name = options.input + output: str = converter.convert(input_file.read(), wrap=options.wrap) + + if not output: + logging.fatal("No output was produced.") + sys.exit(1) + + if options.output: + with open(options.output, "w") as output_file: + output_file.write(output) + logging.info(f"Converted to {options.output}.") + else: + sys.stdout.write(output) + + +if __name__ == "__main__": + main(sys.argv[1:], Default) diff --git a/python/moire/moire.py b/python/moire/moire.py new file mode 100644 index 0000000..619ce21 --- /dev/null +++ b/python/moire/moire.py @@ -0,0 +1,490 @@ +""" +Moire, a simple extensible markup language. + +See http://github.com/enzet/Moire +""" + +import logging +import sys +from dataclasses import dataclass +from io import StringIO + +from typing import Any, Callable, Dict, List, Optional, Tuple + +__author__: str = "Sergey Vartanov" +__email__: str = "me@enzet.ru" + +# Constants + +COMMENT_BEGIN: str = "/*" +COMMENT_END: str = "*/" +TAG_MARKER: str = "\\" +ARGUMENT_START: str = "{" +ARGUMENT_END: str = "}" +PARAGRAPH_DELIMITER: str = "\n\n" + +SPACES: str = " \n\t\r" + + +class Root: + def __init__(self): + self.elements: list = [] + + def add(self, element) -> None: + self.elements.append(element) + + +@dataclass +class Tag: + """Moire tag definition. + + Tag has name and parameters: + {} ... {}. + """ + + id: str + parameters: list + + def __eq__(self, other: "Tag") -> bool: + if not isinstance(other, type(self)): + return False + if self.id != other.id: + return False + if len(self.parameters) != len(other.parameters): + return False + for i in range(len(self.parameters)): + if self.parameters[i] != other.parameters[i]: + return False + return True + + def is_header(self) -> bool: + return self.id in "123456" + + +@dataclass +class Lexeme: + type: str + content: Optional[str] = None + + +class Tree: + def __init__(self, parent, children, element) -> None: + self.element = element + self.parent = parent + self.children = children + self.number = 0 + + def pr(self) -> None: + print(self.element) + for child in self.children: + child.pr() + + def find(self, text: str) -> Optional["Tree"]: + if ( + len(self.element.parameters) > 1 + and self.element.parameters[1][0] == text + ): + return self + for child in self.children: + a = child.find(text) + if a: + return a + return None + + +@dataclass +class Argument: + array: List + spec: Dict[str, Any] + + def __getitem__(self, key: int): + return self.array[key] + + def __len__(self): + return len(self.array) + + +def trim_inside(text: str) -> str: + """Replace all space symbol sequences with one space character.""" + result: str = "" + index: int = 0 + while index < len(text): + if text[index] in SPACES: + result += " " + while index < len(text) and text[index] in SPACES: + index += 1 + continue + else: + result += text[index] + index += 1 + return result + + +def preprocess_comments(text: str): + """Text to text processing: comments removing.""" + preprocessed: str = "" + adding: bool = True + i: int = 0 + while i < len(text): + if text[i : i + len(COMMENT_BEGIN)] == COMMENT_BEGIN: + adding = False + i += 1 + elif text[i : i + len(COMMENT_END)] == COMMENT_END: + adding = True + i += 1 + else: + if adding: + preprocessed += text[i] + i += 1 + return preprocessed + + +def is_letter_or_digit(char: str) -> bool: + return "a" <= char <= "z" or "A" <= char <= "Z" or "0" <= char <= "9" + + +def lexer(text) -> (List[Lexeme], List[int]): + """Parse formatted preprocessed text to a list of lexemes.""" + in_tag: bool = False # Lexer position in tag name + # Lexer position in space between tag name and first "{" + in_space: bool = True + lexemes: List[Lexeme] = [] + positions: List[int] = [] + tag_name: str = "" + word: str = "" + + index: int = 0 + + while index < len(text): + char = text[index] + if char == TAG_MARKER: + if index == len(text) - 1: + logging.error("Backslash at the end of string.") + elif not is_letter_or_digit(text[index + 1]): + if word != "": + lexemes.append(Lexeme("text", word)) + positions.append(index) + word = "" + lexemes.append(Lexeme("symbol", text[index + 1])) + positions.append(index + 1) + index += 1 + else: + if word != "": + lexemes.append(Lexeme("text", word)) + positions.append(index) + word = "" + in_tag = True + tag_name = "" + elif char == ARGUMENT_START: + if in_tag or in_space: + in_tag = False + if tag_name != "": + lexemes.append(Lexeme("tag", tag_name)) + positions.append(index) + lexemes.append(Lexeme("parameter_begin")) + positions.append(index) + tag_name = "" + word = "" + elif char == ARGUMENT_END: + if word != "": + lexemes.append(Lexeme("text", word)) + positions.append(index) + word = "" + lexemes.append(Lexeme("parameter_end")) + positions.append(index) + elif char in SPACES: + if in_tag: + in_tag = False + in_space = True + else: + word += char + else: + if in_tag: + tag_name += char + else: + word += char + index += 1 + if word != "": + lexemes.append(Lexeme("text", word)) + positions.append(index) + + return lexemes, positions + + +def get_intermediate(lexemes, positions, level, index=0): + """Get intermediate representation.""" + tag: Optional[Tag] = None + result = [] + while index < len(lexemes): + item = lexemes[index] + if item.type == "tag": + if tag: + result.append(tag) + tag = Tag(item.content, []) + elif item.type == "parameter_begin": + level += 1 + if not tag: + index += 1 + index, res = get_intermediate(lexemes, positions, level, index) + result.append(res) + else: + index += 1 + index, res = get_intermediate(lexemes, positions, level, index) + tag.parameters.append(res) + index += 1 + continue + elif item.type == "parameter_end": + level -= 1 + if level < 0: + position = positions[index] + logging.error(f"Lexer error at {position}.") + index += 1 + sys.exit(1) + if tag: + result.append(tag) + return index, result + elif item.type == "text": + if tag: + result.append(tag) + tag = None + result.append(item.content) + elif item.type == "symbol": + if tag: + result.append(tag) + tag = None + result.append(item.content) + index += 1 + if tag: + result.append(tag) + return index, result + + +class Moire: + name: str = "Empty format" + block_tags: List[str] = [] + escape_symbols: Dict[str, str] = {} + + def __init__(self, file_name: Optional[str] = None): + self.index: int = 0 + self.status: Dict[str, Any] = {"missing_tags": set()} + self.file_name: Optional[str] = file_name + + def init(self): + """Some preliminary actions.""" + pass + + def finish(self): + """Some finish actions.""" + pass + + def escape(self, text: str) -> str: + for key in self.escape_symbols: + text = text.replace(key, self.escape_symbols[key]) + return text + + def trim(self, text: str) -> str: + if text.startswith("\n"): + text = text[1:] + if text.endswith("\n"): + text = text[:-1] + return text + + def get_ids(self, content: str) -> List[Tuple[str, int]]: + """Get all header identifiers. + + :param content: input content in the Moire format + :return: list of tuples (id, level), level is 0 for labels + """ + ids: List[Tuple[str, int]] = [] + intermediate_representation = self.get_ir(content) + for element in intermediate_representation: + if isinstance(element, Tag): + if element.is_header() and len(element.parameters) >= 2: + ids.append((element.parameters[1][0], int(element.id))) + if element.id == "label": + ids.append((element.parameters[0][0], 0)) + return ids + + def convert( + self, input_data: str, wrap: bool = True, in_block: bool = False + ) -> str: + """Convert Moire text without includes but with comments artifacts to + selected format. + """ + ir = self.get_ir(input_data) + + # Construct content table + + tree: Tree = Tree(None, [], Tag("0", ["_", "_"])) + content_root: Tree = tree + for part in ir: + if not isinstance(part, Tag) or part.id not in "123456": + continue + element: Tree = Tree(tree, [], part) + if int(part.id) > int(tree.element.id): + tree.children.append(element) + element.number = len(tree.children) - 1 + tree = tree.children[-1] + else: + while int(part.id) <= int(tree.element.id): + tree = tree.parent + tree.children.append(element) + element.number = len(tree.children) - 1 + element.parent = tree + tree = tree.children[-1] + self.status["tree"] = content_root + + # Wrap whole text with "body" tag + + if wrap: + ir = Tag("body", [ir, content_root]) + + self.init() + self.parse(ir, mode="pre_") + result: str = self.parse(ir, in_block=in_block) + self.finish() + + return result + + def parse( + self, + text, + in_block: bool = False, + depth: int = 0, + mode: str = "", + spec: Optional[Dict[str, Any]] = None, + ) -> str: + """Element parsing into formatted text. + + Element may be plain text, tag, or list of elements. + """ + if spec is None: + spec = {} + + if not text: + return "" + elif isinstance(text, str): + if "trim" in spec and not spec["trim"]: + return self.escape(text) + else: + return self.escape(trim_inside(text)) + elif isinstance(text, Tag): + key: str = "header" if (text.id in "123456") else text.id + + method: Optional[Callable] = None + try: + method = getattr(self, mode + key) + except AttributeError: + pass + if method is None: + try: + method = getattr(self, mode + key + "__") + except AttributeError: + pass + if method is not None: + arg = Argument(text.parameters, spec) + if key == "header": + return method(arg, int(text.id)) + else: + return method(arg) + else: + if mode == "": + self.status["missing_tags"].add(key) + assert False, ( + f"Unknown tag `{mode}{key}`" + + (f" in `{self.file_name}`" if self.file_name else "") + + "." + ) + else: + return "" + elif isinstance(text, list): + builder = StringIO() + inner_block = [] + for item in text: + if in_block: + if isinstance(item, Tag) and item.id in self.block_tags: + if inner_block: + builder.write(self.process_inner_block(inner_block)) + inner_block = [] + builder.write( + self.parse( + item, + in_block=in_block, + depth=depth + 1, + mode=mode, + spec=spec, + ) + ) + else: + inner_block.append(item) + else: + parsed = self.parse( + item, + in_block=in_block, + depth=depth + 1, + mode=mode, + spec=spec, + ) + if parsed is not None: + builder.write(parsed) + if inner_block: + builder.write(self.process_inner_block(inner_block)) + return builder.getvalue() + else: + assert False, f"Part is of type {type(text)}" + + def clear(self, text) -> str: + if isinstance(text, list): + return self.escape("".join([x for x in text if isinstance(x, str)])) + return self.escape(text) + + def get_ir(self, text: str, offset: int = 0, prefix: str = ""): + """Get intermediate representation.""" + text = preprocess_comments(text) + lexemes, positions = lexer(text) + index, raw_ir = get_intermediate(lexemes, positions, 0) + + resulted_ir = [] + + for item in raw_ir: + if isinstance(item, Tag): + if item.is_header() and (offset or prefix): + new_item = Tag(str(int(item.id) + offset), item.parameters) + resulted_ir.append(new_item) + else: + resulted_ir.append(item) + else: + resulted_ir.append(item) + + return resulted_ir + + def process_inner_block(self, inner_block): + """Wrap parts of inner block element with text tag.""" + if len(inner_block) == 1 and inner_block[0] == "": + return "" + paragraphs = [] + paragraph = [] + for item in inner_block: + if isinstance(item, str): + previous = 0 + delimiter = item.find(PARAGRAPH_DELIMITER) + while delimiter != -1: + content = item[previous:delimiter] + if content != "" or previous == 0: + paragraph.append(content) + paragraphs.append(paragraph) + paragraph = [] + previous = delimiter + len(PARAGRAPH_DELIMITER) + delimiter = item.find(PARAGRAPH_DELIMITER, delimiter + 1) + paragraph.append(item[previous:]) + else: + paragraph.append(item) + paragraphs.append(paragraph) + s = "" + for paragraph in paragraphs: + if isinstance(paragraph[0], str): + paragraph[0] = paragraph[0].lstrip() + if isinstance(paragraph[-1], str): + paragraph[-1] = paragraph[-1].rstrip() + s += str(self.parse(Tag("text", [paragraph]))) + return s diff --git a/python/moire_converter.py b/python/moire_converter.py index ad55f7e..fa06121 100644 --- a/python/moire_converter.py +++ b/python/moire_converter.py @@ -1,4 +1,4 @@ -from moire.default import Default, DefaultTeX, DefaultMarkdown +from moire.default import Default, DefaultTeX from moire.main import main from textwrap import dedent import subprocess @@ -150,10 +150,5 @@ def symbol_table(self, arg) -> str: return proc.stdout.read().decode() -class LanguageMarkdown(Language, DefaultMarkdown): - def ipa(self, arg) -> str: - return self.parse(arg[0]) - - if __name__ == "__main__": main(sys.argv[1:], Language)