diff --git a/README.md b/README.md index 82708b9..415abbf 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Master document : In SomEnergia, master documents are doc files stored in a given GoogleDrive folder. Deployed document -: An specific file derived from one or many master files, adapted by format, language, styling or utilities for the users to access it in an specific platform +: An specific file derived from one master file, adapted by format, language, styling or utilities for the users to access it in an specific platform : Example: The html page in the website, the html fragment to be embeded inside a web form, the pdf sent by email, the pdf to be signed by signaturit... ### Constituents of a document @@ -121,39 +121,27 @@ About the output: - This is convenient since this improves the diff effectivity but be aware of possible artifacts. - Some languages split or merge the sentences in a different way. -### Review md files after import - -- Compare imported md files to identify real changes and formatting or import errors - - Against previous version in git - - Against same document in other languages -- TODO: List of usual import errors - ### Extracting translation yaml files ```bash -# first time, all languages -legal-text-processor extract mydocument/??.md -# successive, just the reference one -legal-text-processor extract mydocument/es.md +# just the reference one for example my-document/es.md +legal-text-processor extract my-document/es.md ``` -- Generates `mydocument/??.yaml` containing the translation +- Generates `my-document/??.yaml` containing the translation - This is done by identifying numbered titles and clauses - Translation ID's are based on the title/clause numbering, not the content - Some sentences start with something similar to a clause number generating an extra ID. This has to be corrected by hand. -- TODO: let the script check the numbering sequence and report inconsistencies ### Extracting template for resynthesizing md's The template is a file specifying how to compose translated texts to rebuild a translated markdown document. ```bash -# first time, all languages -legal-text-processor template mydocument/??.md -# successive, just the reference one -legal-text-processor template mydocument/es.md +# just the reference one +legal-text-processor template my-document/es.md ``` -- This generates `mydocument/template.md`. +- This generates `my-document/template.md`. - It will trigger colored messages if a previous `template.md` exists and any clause structure change is detected. - This is useful when importing several languages, to spot structure differences among them. @@ -163,21 +151,24 @@ Once translators have translated the new sentences in weblate, the following command: ```bash -legal-text-processor reintegrate my_output +legal-text-processor reintegrate my-document/{lang}.yaml (reeplace lang by es or ca or ...) ``` It will regenerate markdowns back from the specified translations using the template. ### Generate output documents -TODO: This step is still under heavy development, -this documentation does not reflect reality -and reality will surely change. +This script generates the output document in pdf or html format in directory 'output' for all transaltions presents in my-document directory (trasnlations are yaml files obtained in previues step). ```bash -legal-text-processor generate .... +legal-text-processor generate ... ``` +Parameters for this script are: + **input_dir**: Input directory of the transaltions, yamls for each translated language + **output_prefix**: Prefix for output files, by default 'output' + **target_type**: html or pdf, by default 'html' + **with_toc**: To add an index table ONLY for html files, by default False. TOC is generated automatically (ONLY if TABLE placeholrder is present in the tanslation yaml) from each section diff --git a/legaltexts/cli.py b/legaltexts/cli.py index 2fb2ce9..658299d 100644 --- a/legaltexts/cli.py +++ b/legaltexts/cli.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import typer -from yamlns import ns +import yaml from pathlib import Path import re import itertools @@ -129,7 +129,7 @@ def diff(old_file: Path, newcontent: list[str]): ) return ''.join(difflines) -def generate_pdf(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"): +def generate_pdf_file(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"): """ Generates pdf from markdown file """ @@ -211,31 +211,39 @@ def template(markdown_file: list[Path]): @app.command() def reintegrate(translation_yaml: list[Path]): """Reconstructs markdown files from translation yamls.""" - for yaml_file in translation_yaml: - ensure_extension(yaml_file, '.yaml') - markdown_file = yaml_file.with_suffix('.md') - template_file = yaml_file.parent/'template.md' - step(f"Generating {markdown_file} from {yaml_file} and {template_file}") - translation = ns.load(yaml_file) - template = (yaml_file.parent/'template.md').read_text() + for yaml_file_name in translation_yaml: + ensure_extension(yaml_file_name, '.yaml') + markdown_file = yaml_file_name.with_suffix('.md') + template_file = yaml_file_name.parent/'template.md' + step(f"Generating {markdown_file} from {yaml_file_name} and {template_file}") + translation = yaml.safe_load(open(yaml_file_name, 'r')) + template = (yaml_file_name.parent/'template.md').read_text() content = template.format(**translation) markdown_file.write_text(content) @app.command() -def generate(target: Annotated[str, typer.Argument()]=''): - if not target or target=='web-pdf': - generate_web_pdf( - master_path=Path('indexed-tariff-specific-conditions'), - output_prefix='web-pdf' +def generate( + input_dir: Annotated[str, typer.Argument(help="Input directory (name of weblate directory)")]='', + output_prefix: Annotated[str, typer.Option(help='Optional prefix for output files')]='output', + target_type: Annotated[str, typer.Option(help='html or pdf output')]='html', + with_toc: Annotated[bool, typer.Option("--with_toc")]=False + ): + if target_type=='pdf': + generate_pdf( + Path(input_dir), + output_prefix ) - if not target or target=='webforms': - generate_webforms_html( - master_path=Path('general-conditions'), - output_prefix='webforms' + if target_type=='html': + generate_html( + Path(input_dir), + output_prefix, + with_toc ) + if not input_dir: + print(f"Input directory should be especified") -def generate_web_pdf(master_path: Path, output_prefix: str): - """Generates a pdf for the website""" +def generate_pdf(master_path: Path, output_prefix: str): + """Generates a pdf""" document = master_path.name output_dir.mkdir(exist_ok=True) for markdown_file in master_path.glob('??.md'): @@ -243,10 +251,10 @@ def generate_web_pdf(master_path: Path, output_prefix: str): output_template = f'{output_prefix}-{document}-{lang}.pdf' target = output_dir / output_template step(f"Generating {target}...") - generate_pdf(markdown_file, 'pagedlegaltext.css', target) + generate_pdf_file(markdown_file, 'pagedlegaltext.css', target) -def generate_webforms_html(master_path: Path, output_prefix: str): - """Generates an html fragment to be included in webforms LegalText view""" +def generate_html(master_path: Path, output_prefix: str, with_toc: bool = False): + """Generates an html fragment""" document = master_path.name output_dir.mkdir(exist_ok=True) for markdown_file in master_path.glob('??.md'): @@ -258,24 +266,29 @@ def generate_webforms_html(master_path: Path, output_prefix: str): step(f" Reading {markdown_file}...") markdown_content = markdown_file.read_text() - step(f" Generating TOC") - markdown_with_toc = add_markdown_toc( - markdown_content, - place_holder='[TABLE]', - title=tr(lang, 'TOC_TITLE'), - top_level=2, - ) - - step(f" Generating html...") - html = md_to_html_fragment(markdown_with_toc) - - step(f" Adding up-links...") - top="\n\n" - final_content = top+add_links_to_toc( - html, - text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑", - target="#top", - ) + if with_toc: + step(f" Generating TOC") + markdown_with_toc = add_markdown_toc( + markdown_content, + place_holder='[TABLE]', + title=tr(lang, 'TOC_TITLE'), + top_level=2, + ) + step(f" Generating html...") + html = md_to_html_fragment(markdown_with_toc) + else: + step(f" Generating html...") + html = md_to_html_fragment(markdown_content) + + final_content = html + if with_toc: + step(f" Adding up-links...") + top=f"\n\n" + final_content = top+add_links_to_toc( + html, + text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑", + target=f"#{document}-top", + ) step(f" Writing output") target.write_text(final_content) diff --git a/legaltexts/translate.py b/legaltexts/translate.py index 831fc2c..74aaec8 100644 --- a/legaltexts/translate.py +++ b/legaltexts/translate.py @@ -1,15 +1,15 @@ -from yamlns import ns +import yaml from pathlib import Path from importlib.resources import files as package_files def build_translations(): if hasattr(build_translations, "translations"): return build_translations.translations - translations = ns() - for translation_file in package_files('legaltexts.i18n').iterdir(): - if translation_file.suffix != '.yaml': continue - lang = translation_file.stem - translations[lang] = ns.loads(translation_file.read_text()) + translations = {} + for translation_file_name in package_files('legaltexts.i18n').iterdir(): + if translation_file_name.suffix != '.yaml': continue + lang = translation_file_name.stem + translations[lang] = yaml.safe_load(open(translation_file_name, 'r')) build_translations.translations = translations return build_translations.translations diff --git a/pyproject.toml b/pyproject.toml index cdbee78..9632a5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ classifiers = [ 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', ] dependencies = [ - 'yamlns', + 'PyYAML', 'typer', 'consolemsg', 'somutils',