♻ Make generation process generic

Replace yamlns by PyYAML Improve README
Som-Energia · Dec 12, 2024 · a2822d6 · a2822d6
1 parent c4bd30a
commit a2822d6
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 72 deletions.
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ Master document
 : In SomEnergia, master documents are doc files stored in a given GoogleDrive folder.
 
 Deployed document
-: An specific file derived from one or many master files, adapted by format, language, styling or utilities for the users to access it in an specific platform
+: An specific file derived from one master file, adapted by format, language, styling or utilities for the users to access it in an specific platform
 : Example: The html page in the website, the html fragment to be embeded inside a web form, the pdf sent by email, the pdf to be signed by signaturit...
 
 ### Constituents of a document
@@ -121,39 +121,27 @@ About the output:
     - This is convenient since this improves the diff effectivity but be aware of possible artifacts.
     - Some languages split or merge the sentences in a different way.
 
-### Review md files after import
-
-- Compare imported md files to identify real changes and formatting or import errors
-    - Against previous version in git
-    - Against same document in other languages
-- TODO: List of usual import errors
-
 ### Extracting translation yaml files
 
 ```bash
-# first time, all languages
-legal-text-processor extract mydocument/??.md
-# successive, just the reference one
-legal-text-processor extract mydocument/es.md
+# just the reference one for example my-document/es.md
+legal-text-processor extract my-document/es.md
 ```
 
-- Generates `mydocument/??.yaml` containing the translation
+- Generates `my-document/??.yaml` containing the translation
 - This is done by identifying numbered titles and clauses
 - Translation ID's are based on the title/clause numbering, not the content
 - Some sentences start with something similar to a clause number generating an extra ID. This has to be corrected by hand.
-- TODO: let the script check the numbering sequence and report inconsistencies
 
 ### Extracting template for resynthesizing md's
 
 The template is a file specifying how to compose translated texts to rebuild a translated markdown document.
 
 ```bash
-# first time, all languages
-legal-text-processor template mydocument/??.md
-# successive, just the reference one
-legal-text-processor template mydocument/es.md
+# just the reference one
+legal-text-processor template my-document/es.md
 ```
-- This generates `mydocument/template.md`.
+- This generates `my-document/template.md`.
 - It will trigger colored messages if a previous `template.md` exists and any clause structure change is detected.
 - This is useful when importing several languages, to spot structure differences among them.
 
@@ -163,21 +151,24 @@ Once translators have translated the new sentences in weblate,
 the following command:
 
 ```bash
-legal-text-processor reintegrate my_output
+legal-text-processor reintegrate my-document/{lang}.yaml (reeplace lang by es or ca or ...)
 ```
 
 It will regenerate markdowns back from the specified translations using the template.
 
 ### Generate output documents
 
-TODO: This step is still under heavy development,
-this documentation does not reflect reality
-and reality will surely change.
+This script generates the output document in pdf or html format in directory 'output' for all transaltions presents in my-document directory (trasnlations are yaml files obtained in previues step).
 
 ```bash
-legal-text-processor generate ....
+legal-text-processor generate ...
 ```
 
+Parameters for this script are:
+    **input_dir**: Input directory of the transaltions, yamls for each translated language
+    **output_prefix**: Prefix for output files, by default 'output'
+    **target_type**: html or pdf, by default 'html'
+    **with_toc**: To add an index table ONLY for html files, by default False. TOC is generated automatically (ONLY if TABLE placeholrder is present in the tanslation yaml) from each section
 
 
 
diff --git a/legaltexts/cli.py b/legaltexts/cli.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import typer
-from yamlns import ns
+import yaml
 from pathlib import Path
 import re
 import itertools
@@ -129,7 +129,7 @@ def diff(old_file: Path, newcontent: list[str]):
     )
     return ''.join(difflines)
 
-def generate_pdf(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"):
+def generate_pdf_file(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"):
     """
     Generates pdf from markdown file
     """
@@ -211,42 +211,50 @@ def template(markdown_file: list[Path]):
 @app.command()
 def reintegrate(translation_yaml: list[Path]):
     """Reconstructs markdown files from translation yamls."""
-    for yaml_file in translation_yaml:
-        ensure_extension(yaml_file, '.yaml')
-        markdown_file = yaml_file.with_suffix('.md')
-        template_file = yaml_file.parent/'template.md'
-        step(f"Generating {markdown_file} from {yaml_file} and {template_file}")
-        translation = ns.load(yaml_file)
-        template = (yaml_file.parent/'template.md').read_text()
+    for yaml_file_name in translation_yaml:
+        ensure_extension(yaml_file_name, '.yaml')
+        markdown_file = yaml_file_name.with_suffix('.md')
+        template_file = yaml_file_name.parent/'template.md'
+        step(f"Generating {markdown_file} from {yaml_file_name} and {template_file}")
+        translation = yaml.safe_load(open(yaml_file_name, 'r'))
+        template = (yaml_file_name.parent/'template.md').read_text()
         content = template.format(**translation)
         markdown_file.write_text(content)
 
 @app.command()
-def generate(target: Annotated[str, typer.Argument()]=''):
-    if not target or target=='web-pdf':
-        generate_web_pdf(
-            master_path=Path('indexed-tariff-specific-conditions'),
-            output_prefix='web-pdf'
+def generate(
+    input_dir: Annotated[str, typer.Argument(help="Input directory (name of weblate directory)")]='',
+    output_prefix: Annotated[str, typer.Option(help='Optional prefix for output files')]='output',
+    target_type: Annotated[str, typer.Option(help='html or pdf output')]='html',
+    with_toc: Annotated[bool, typer.Option("--with_toc")]=False
+    ):
+    if target_type=='pdf':
+        generate_pdf(
+            Path(input_dir),
+            output_prefix
         )
-    if not target or target=='webforms':
-        generate_webforms_html(
-            master_path=Path('general-conditions'),
-            output_prefix='webforms'
+    if target_type=='html':
+        generate_html(
+            Path(input_dir),
+            output_prefix,
+            with_toc
         )
+    if not input_dir:
+        print(f"Input directory should be especified")
 
-def generate_web_pdf(master_path: Path, output_prefix: str):
-    """Generates a pdf for the website"""
+def generate_pdf(master_path: Path, output_prefix: str):
+    """Generates a pdf"""
     document = master_path.name
     output_dir.mkdir(exist_ok=True)
     for markdown_file in master_path.glob('??.md'):
         lang = markdown_file.stem
         output_template = f'{output_prefix}-{document}-{lang}.pdf'
         target = output_dir / output_template
         step(f"Generating {target}...")
-        generate_pdf(markdown_file, 'pagedlegaltext.css', target)
+        generate_pdf_file(markdown_file, 'pagedlegaltext.css', target)
 
-def generate_webforms_html(master_path: Path, output_prefix: str):
-    """Generates an html fragment to be included in webforms LegalText view"""
+def generate_html(master_path: Path, output_prefix: str, with_toc: bool = False):
+    """Generates an html fragment"""
     document = master_path.name
     output_dir.mkdir(exist_ok=True)
     for markdown_file in master_path.glob('??.md'):
@@ -258,24 +266,29 @@ def generate_webforms_html(master_path: Path, output_prefix: str):
         step(f"  Reading {markdown_file}...")
         markdown_content = markdown_file.read_text()
 
-        step(f"  Generating TOC")
-        markdown_with_toc = add_markdown_toc(
-            markdown_content,
-            place_holder='[TABLE]',
-            title=tr(lang, 'TOC_TITLE'),
-            top_level=2,
-        )
-
-        step(f"  Generating html...")
-        html = md_to_html_fragment(markdown_with_toc)
-
-        step(f"  Adding up-links...")
-        top="<span id='top'></span>\n\n"
-        final_content = top+add_links_to_toc(
-            html,
-            text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
-            target="#top",
-        )
+        if with_toc:
+            step(f"  Generating TOC")
+            markdown_with_toc = add_markdown_toc(
+                markdown_content,
+                place_holder='[TABLE]',
+                title=tr(lang, 'TOC_TITLE'),
+                top_level=2,
+            )
+            step(f"  Generating html...")
+            html = md_to_html_fragment(markdown_with_toc)
+        else:
+            step(f"  Generating html...")
+            html = md_to_html_fragment(markdown_content)
+
+        final_content = html
+        if with_toc:
+            step(f"  Adding up-links...")
+            top=f"<span id={document}-top></span>\n\n"
+            final_content = top+add_links_to_toc(
+                html,
+                text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
+                target=f"#{document}-top",
+            )
 
         step(f"  Writing output")
         target.write_text(final_content)

diff --git a/legaltexts/translate.py b/legaltexts/translate.py
@@ -1,15 +1,15 @@
-from yamlns import ns
+import yaml
 from pathlib import Path
 from importlib.resources import files as package_files
 
 def build_translations():
     if hasattr(build_translations, "translations"):
         return build_translations.translations
-    translations = ns()
-    for translation_file in package_files('legaltexts.i18n').iterdir():
-        if translation_file.suffix != '.yaml': continue
-        lang = translation_file.stem
-        translations[lang] = ns.loads(translation_file.read_text())
+    translations = {}
+    for translation_file_name in package_files('legaltexts.i18n').iterdir():
+        if translation_file_name.suffix != '.yaml': continue
+        lang = translation_file_name.stem
+        translations[lang] = yaml.safe_load(open(translation_file_name, 'r'))
     build_translations.translations = translations
     return build_translations.translations
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,7 @@ classifiers = [
     'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
 ]
 dependencies = [
-    'yamlns',
+    'PyYAML',
     'typer',
     'consolemsg',
     'somutils',