diff --git a/README.md b/README.md
index 82708b9..415abbf 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ Master document
: In SomEnergia, master documents are doc files stored in a given GoogleDrive folder.
Deployed document
-: An specific file derived from one or many master files, adapted by format, language, styling or utilities for the users to access it in an specific platform
+: An specific file derived from one master file, adapted by format, language, styling or utilities for the users to access it in an specific platform
: Example: The html page in the website, the html fragment to be embeded inside a web form, the pdf sent by email, the pdf to be signed by signaturit...
### Constituents of a document
@@ -121,39 +121,27 @@ About the output:
- This is convenient since this improves the diff effectivity but be aware of possible artifacts.
- Some languages split or merge the sentences in a different way.
-### Review md files after import
-
-- Compare imported md files to identify real changes and formatting or import errors
- - Against previous version in git
- - Against same document in other languages
-- TODO: List of usual import errors
-
### Extracting translation yaml files
```bash
-# first time, all languages
-legal-text-processor extract mydocument/??.md
-# successive, just the reference one
-legal-text-processor extract mydocument/es.md
+# just the reference one for example my-document/es.md
+legal-text-processor extract my-document/es.md
```
-- Generates `mydocument/??.yaml` containing the translation
+- Generates `my-document/??.yaml` containing the translation
- This is done by identifying numbered titles and clauses
- Translation ID's are based on the title/clause numbering, not the content
- Some sentences start with something similar to a clause number generating an extra ID. This has to be corrected by hand.
-- TODO: let the script check the numbering sequence and report inconsistencies
### Extracting template for resynthesizing md's
The template is a file specifying how to compose translated texts to rebuild a translated markdown document.
```bash
-# first time, all languages
-legal-text-processor template mydocument/??.md
-# successive, just the reference one
-legal-text-processor template mydocument/es.md
+# just the reference one
+legal-text-processor template my-document/es.md
```
-- This generates `mydocument/template.md`.
+- This generates `my-document/template.md`.
- It will trigger colored messages if a previous `template.md` exists and any clause structure change is detected.
- This is useful when importing several languages, to spot structure differences among them.
@@ -163,21 +151,24 @@ Once translators have translated the new sentences in weblate,
the following command:
```bash
-legal-text-processor reintegrate my_output
+legal-text-processor reintegrate my-document/{lang}.yaml (reeplace lang by es or ca or ...)
```
It will regenerate markdowns back from the specified translations using the template.
### Generate output documents
-TODO: This step is still under heavy development,
-this documentation does not reflect reality
-and reality will surely change.
+This script generates the output document in pdf or html format in directory 'output' for all transaltions presents in my-document directory (trasnlations are yaml files obtained in previues step).
```bash
-legal-text-processor generate ....
+legal-text-processor generate ...
```
+Parameters for this script are:
+ **input_dir**: Input directory of the transaltions, yamls for each translated language
+ **output_prefix**: Prefix for output files, by default 'output'
+ **target_type**: html or pdf, by default 'html'
+ **with_toc**: To add an index table ONLY for html files, by default False. TOC is generated automatically (ONLY if TABLE placeholrder is present in the tanslation yaml) from each section
diff --git a/legaltexts/cli.py b/legaltexts/cli.py
index 2fb2ce9..658299d 100644
--- a/legaltexts/cli.py
+++ b/legaltexts/cli.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import typer
-from yamlns import ns
+import yaml
from pathlib import Path
import re
import itertools
@@ -129,7 +129,7 @@ def diff(old_file: Path, newcontent: list[str]):
)
return ''.join(difflines)
-def generate_pdf(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"):
+def generate_pdf_file(markdown_file: Path, css_file: Path = "pagedlegaltext.css", output_pdf: Path = "output.pdf"):
"""
Generates pdf from markdown file
"""
@@ -211,31 +211,39 @@ def template(markdown_file: list[Path]):
@app.command()
def reintegrate(translation_yaml: list[Path]):
"""Reconstructs markdown files from translation yamls."""
- for yaml_file in translation_yaml:
- ensure_extension(yaml_file, '.yaml')
- markdown_file = yaml_file.with_suffix('.md')
- template_file = yaml_file.parent/'template.md'
- step(f"Generating {markdown_file} from {yaml_file} and {template_file}")
- translation = ns.load(yaml_file)
- template = (yaml_file.parent/'template.md').read_text()
+ for yaml_file_name in translation_yaml:
+ ensure_extension(yaml_file_name, '.yaml')
+ markdown_file = yaml_file_name.with_suffix('.md')
+ template_file = yaml_file_name.parent/'template.md'
+ step(f"Generating {markdown_file} from {yaml_file_name} and {template_file}")
+ translation = yaml.safe_load(open(yaml_file_name, 'r'))
+ template = (yaml_file_name.parent/'template.md').read_text()
content = template.format(**translation)
markdown_file.write_text(content)
@app.command()
-def generate(target: Annotated[str, typer.Argument()]=''):
- if not target or target=='web-pdf':
- generate_web_pdf(
- master_path=Path('indexed-tariff-specific-conditions'),
- output_prefix='web-pdf'
+def generate(
+ input_dir: Annotated[str, typer.Argument(help="Input directory (name of weblate directory)")]='',
+ output_prefix: Annotated[str, typer.Option(help='Optional prefix for output files')]='output',
+ target_type: Annotated[str, typer.Option(help='html or pdf output')]='html',
+ with_toc: Annotated[bool, typer.Option("--with_toc")]=False
+ ):
+ if target_type=='pdf':
+ generate_pdf(
+ Path(input_dir),
+ output_prefix
)
- if not target or target=='webforms':
- generate_webforms_html(
- master_path=Path('general-conditions'),
- output_prefix='webforms'
+ if target_type=='html':
+ generate_html(
+ Path(input_dir),
+ output_prefix,
+ with_toc
)
+ if not input_dir:
+ print(f"Input directory should be especified")
-def generate_web_pdf(master_path: Path, output_prefix: str):
- """Generates a pdf for the website"""
+def generate_pdf(master_path: Path, output_prefix: str):
+ """Generates a pdf"""
document = master_path.name
output_dir.mkdir(exist_ok=True)
for markdown_file in master_path.glob('??.md'):
@@ -243,10 +251,10 @@ def generate_web_pdf(master_path: Path, output_prefix: str):
output_template = f'{output_prefix}-{document}-{lang}.pdf'
target = output_dir / output_template
step(f"Generating {target}...")
- generate_pdf(markdown_file, 'pagedlegaltext.css', target)
+ generate_pdf_file(markdown_file, 'pagedlegaltext.css', target)
-def generate_webforms_html(master_path: Path, output_prefix: str):
- """Generates an html fragment to be included in webforms LegalText view"""
+def generate_html(master_path: Path, output_prefix: str, with_toc: bool = False):
+ """Generates an html fragment"""
document = master_path.name
output_dir.mkdir(exist_ok=True)
for markdown_file in master_path.glob('??.md'):
@@ -258,24 +266,29 @@ def generate_webforms_html(master_path: Path, output_prefix: str):
step(f" Reading {markdown_file}...")
markdown_content = markdown_file.read_text()
- step(f" Generating TOC")
- markdown_with_toc = add_markdown_toc(
- markdown_content,
- place_holder='[TABLE]',
- title=tr(lang, 'TOC_TITLE'),
- top_level=2,
- )
-
- step(f" Generating html...")
- html = md_to_html_fragment(markdown_with_toc)
-
- step(f" Adding up-links...")
- top="\n\n"
- final_content = top+add_links_to_toc(
- html,
- text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
- target="#top",
- )
+ if with_toc:
+ step(f" Generating TOC")
+ markdown_with_toc = add_markdown_toc(
+ markdown_content,
+ place_holder='[TABLE]',
+ title=tr(lang, 'TOC_TITLE'),
+ top_level=2,
+ )
+ step(f" Generating html...")
+ html = md_to_html_fragment(markdown_with_toc)
+ else:
+ step(f" Generating html...")
+ html = md_to_html_fragment(markdown_content)
+
+ final_content = html
+ if with_toc:
+ step(f" Adding up-links...")
+ top=f"\n\n"
+ final_content = top+add_links_to_toc(
+ html,
+ text=f"{tr(lang, 'TOC_GO_TO_TOC')} ↑",
+ target=f"#{document}-top",
+ )
step(f" Writing output")
target.write_text(final_content)
diff --git a/legaltexts/translate.py b/legaltexts/translate.py
index 831fc2c..74aaec8 100644
--- a/legaltexts/translate.py
+++ b/legaltexts/translate.py
@@ -1,15 +1,15 @@
-from yamlns import ns
+import yaml
from pathlib import Path
from importlib.resources import files as package_files
def build_translations():
if hasattr(build_translations, "translations"):
return build_translations.translations
- translations = ns()
- for translation_file in package_files('legaltexts.i18n').iterdir():
- if translation_file.suffix != '.yaml': continue
- lang = translation_file.stem
- translations[lang] = ns.loads(translation_file.read_text())
+ translations = {}
+ for translation_file_name in package_files('legaltexts.i18n').iterdir():
+ if translation_file_name.suffix != '.yaml': continue
+ lang = translation_file_name.stem
+ translations[lang] = yaml.safe_load(open(translation_file_name, 'r'))
build_translations.translations = translations
return build_translations.translations
diff --git a/pyproject.toml b/pyproject.toml
index cdbee78..9632a5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ classifiers = [
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
]
dependencies = [
- 'yamlns',
+ 'PyYAML',
'typer',
'consolemsg',
'somutils',