Loading .gitlab-ci.yml +9 −0 Original line number Diff line number Diff line Loading @@ -51,6 +51,15 @@ Build generateBaseline docker image: - generateBaseline/postprocessing.py - generateBaseline/file_helper.py Build docx-field-refresh docker image: stage: build script: - docker build --tag forge.etsi.org:5050/cti/md-specs-dev/tools/docx-field-refresh:$CI_COMMIT_BRANCH -f generateBaseline/dockerfile.soffice . - docker push forge.etsi.org:5050/cti/md-specs-dev/tools/docx-field-refresh:$CI_COMMIT_REF_NAME rules: - if: $CI_COMMIT_BRANCH && $CI_PROJECT_NAME == "tools" changes: - generateBaseline/dockerfile.soffice Build generateSpecWebSite docker image: stage: build Loading generateBaseline/dockerfile.soffice 0 → 100644 +15 −0 Original line number Diff line number Diff line FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ libreoffice \ libreoffice-writer \ fonts-dejavu-core \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Working directory where files will be mounted WORKDIR /data # Default command: update fields in input.docx -> output.docx ENTRYPOINT ["/usr/bin/soffice"] generateBaseline/postprocessing.py +62 −1 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ import tempfile import shutil from typing import Union import argparse import subprocess #import win32com.client #pip install pywin32 from docx import Document #pip install python-docx Loading Loading @@ -114,6 +114,67 @@ def update_word_fields(config_path: Union[dict, str]): finally: word.Quit() def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> str: """ Refreshes fields in a DOCX file using LibreOffice inside a Docker container. The refreshed file overwrites the input file. Parameters ---------- input_path : str Path to the input .docx file. image : str, optional Name of the Docker image (default: 'docx-field-refresh'). Returns ------- str Path to the refreshed (overwritten) .docx file. """ input_path = Path(input_path).resolve() if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") with tempfile.TemporaryDirectory() as tmpdir: tmpdir_path = Path(tmpdir) temp_input = tmpdir_path / input_path.name shutil.copy2(input_path, temp_input) # Run LibreOffice container on this specific file cmd = [ "docker", "run", "--rm", "-v", f"{tmpdir_path}:/data", image, "--headless", "--convert-to", "docx", "--infilter=writer8", "--outdir", "/data", f"/data/{input_path.name}", ] print(f"Running command:\n{' '.join(cmd)}\n") subprocess.run(cmd, check=True) generated = tmpdir_path / f"{input_path.stem}.docx" if not generated.exists(): raise RuntimeError("LibreOffice did not produce an output file.") # Overwrite the original file shutil.copy2(generated, input_path) print(f"✅ Refreshed DOCX updated in place: {input_path}") return str(input_path) def refresh_docx_fields_cli(): parser = argparse.ArgumentParser(description="Refresh DOCX fields using LibreOffice in Docker (in-place).") parser.add_argument("input", help="Path to input DOCX file.") parser.add_argument("--image", default="docx-field-refresh", help="Docker image name (default: docx-field-refresh)") args = parser.parse_args() refresh_docx_fields(args.input, args.image) def insert_page_break_before_long_tables(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") Loading generateBaseline/setup.py +1 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ setup( "check_multipage_tables=postprocessing:insert_page_break_before_long_tables", #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc=postprocessing:update_toc_cli", "refresh_docx_fields=postprocessing:refresh_docx_fields_cli", ] } Loading Loading
.gitlab-ci.yml +9 −0 Original line number Diff line number Diff line Loading @@ -51,6 +51,15 @@ Build generateBaseline docker image: - generateBaseline/postprocessing.py - generateBaseline/file_helper.py Build docx-field-refresh docker image: stage: build script: - docker build --tag forge.etsi.org:5050/cti/md-specs-dev/tools/docx-field-refresh:$CI_COMMIT_BRANCH -f generateBaseline/dockerfile.soffice . - docker push forge.etsi.org:5050/cti/md-specs-dev/tools/docx-field-refresh:$CI_COMMIT_REF_NAME rules: - if: $CI_COMMIT_BRANCH && $CI_PROJECT_NAME == "tools" changes: - generateBaseline/dockerfile.soffice Build generateSpecWebSite docker image: stage: build Loading
generateBaseline/dockerfile.soffice 0 → 100644 +15 −0 Original line number Diff line number Diff line FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ libreoffice \ libreoffice-writer \ fonts-dejavu-core \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Working directory where files will be mounted WORKDIR /data # Default command: update fields in input.docx -> output.docx ENTRYPOINT ["/usr/bin/soffice"]
generateBaseline/postprocessing.py +62 −1 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ import tempfile import shutil from typing import Union import argparse import subprocess #import win32com.client #pip install pywin32 from docx import Document #pip install python-docx Loading Loading @@ -114,6 +114,67 @@ def update_word_fields(config_path: Union[dict, str]): finally: word.Quit() def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> str: """ Refreshes fields in a DOCX file using LibreOffice inside a Docker container. The refreshed file overwrites the input file. Parameters ---------- input_path : str Path to the input .docx file. image : str, optional Name of the Docker image (default: 'docx-field-refresh'). Returns ------- str Path to the refreshed (overwritten) .docx file. """ input_path = Path(input_path).resolve() if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") with tempfile.TemporaryDirectory() as tmpdir: tmpdir_path = Path(tmpdir) temp_input = tmpdir_path / input_path.name shutil.copy2(input_path, temp_input) # Run LibreOffice container on this specific file cmd = [ "docker", "run", "--rm", "-v", f"{tmpdir_path}:/data", image, "--headless", "--convert-to", "docx", "--infilter=writer8", "--outdir", "/data", f"/data/{input_path.name}", ] print(f"Running command:\n{' '.join(cmd)}\n") subprocess.run(cmd, check=True) generated = tmpdir_path / f"{input_path.stem}.docx" if not generated.exists(): raise RuntimeError("LibreOffice did not produce an output file.") # Overwrite the original file shutil.copy2(generated, input_path) print(f"✅ Refreshed DOCX updated in place: {input_path}") return str(input_path) def refresh_docx_fields_cli(): parser = argparse.ArgumentParser(description="Refresh DOCX fields using LibreOffice in Docker (in-place).") parser.add_argument("input", help="Path to input DOCX file.") parser.add_argument("--image", default="docx-field-refresh", help="Docker image name (default: docx-field-refresh)") args = parser.parse_args() refresh_docx_fields(args.input, args.image) def insert_page_break_before_long_tables(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") Loading
generateBaseline/setup.py +1 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ setup( "check_multipage_tables=postprocessing:insert_page_break_before_long_tables", #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc=postprocessing:update_toc_cli", "refresh_docx_fields=postprocessing:refresh_docx_fields_cli", ] } Loading