Commit 82490058 authored by Jonas Schüppen's avatar Jonas Schüppen
Browse files

Added some more postprocessing functions

parent 9c968275
Loading
Loading
Loading
Loading
Loading
+43 −1
Original line number Diff line number Diff line
@@ -16,6 +16,9 @@ from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm

from postprocessing_styling import postprocess_etsi_styles


def parse_input(args_to_parse=None, description = None):
    """
    args_to_parse: list of arguments, e.g. ["input", "output"]
@@ -117,6 +120,16 @@ def table_width_adjustment_cli():
                                                 description="Set the width of table columns according to values in json or if non there to be equal.")
    table_widths_adjustment(docx_input, docx_output, tables_folder)

def postprocess_etsi_styles_cli():
    docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).")
    postprocess_etsi_styles(docx_input, docx_output)

def remove_docx_metadata_cli():
    docx_input, docx_output = parse_input(
        description="Remove metadata from docx.")
    postprocess_etsi_styles(docx_input, docx_output)


def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"):
    # Filter warning
    warnings.filterwarnings(
@@ -486,3 +499,32 @@ def table_widths_adjustment(docx_input, docx_output, tables_folder):
                cell = row.cells[i]
                cell.width = width
    doc.save(docx_output)

def remove_docx_metadata(docx_input, docx_output):
    """
    removes metadata from word file.
    """
    # 1. Eigenschaften mit python-docx leeren
    doc = Document(docx_input)
    props = doc.core_properties
    props.author = None
    props.last_modified_by = None
    props.title = None
    props.subject = None
    props.comments = None
    props.keywords = None
    props.category = None
    doc.save(docx_output)

    # 2. Metadaten-Dateien aus dem ZIP löschen
    tmp_path = docx_output + ".tmp"
    shutil.copy(docx_output, tmp_path)

    with zipfile.ZipFile(tmp_path, "r") as zin:
        with zipfile.ZipFile(docx_output, "w") as zout:
            for item in zin.infolist():
                if item.filename not in ["docProps/core.xml", "docProps/app.xml"]:
                    zout.writestr(item, zin.read(item))

    os.remove(tmp_path)
    print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}")
 No newline at end of file
+53 −0
Original line number Diff line number Diff line
import re

from docx import Document
from docx.shared import Cm


def change_annex_heading_formats(doc):

    for para in doc.paragraphs:
        if para.style.name == "Heading 1":
            text = para.text.strip()
            if not text:
                continue

            first_word = text.split()[0]

            # Beispiel: Bedingte Formatänderung
            if first_word.lower().startswith("annex"):
                para.style = doc.styles['Heading 8']
    return doc

def intent_example_and_note(doc):
    for para in doc.paragraphs:
        if para.text.strip().startswith("NOTE") or para.text.strip().startswith("EXAMPLE"):

            if para.text.strip().startswith("NOTE"):
                # Einzug vergrößern
                para.paragraph_format.left_indent = Cm(2)
                # Hängender Einzug setzen
                para.paragraph_format.first_line_indent = Cm(-1.5)
            else:
                # Einzug vergrößern
                para.paragraph_format.left_indent = Cm(2.5)
                # Hängender Einzug setzen
                para.paragraph_format.first_line_indent = Cm(-2)

            # set Tab after ":"
            # Liste der Keywords
            keywords = ["EXAMPLE", "NOTE"]

            # Regex bauen: (EXAMPLE|NOTE)\s*\d* → Keyword optional mit Zahl
            pattern = re.compile(r'(' + '|'.join(keywords) + r'\s*\d*)\s*:\s*(.*)')

            para.text = pattern.sub(r'\1:\t\2', para.text)




def postprocess_etsi_styles(docx_input, docx_output):
    doc = Document(docx_input)
    change_annex_heading_formats(doc)
    intent_example_and_note(doc)
    doc.save(docx_output)
 No newline at end of file
+2 −0
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@ setup(
        						#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
        						"update_toc=postprocessing:update_toc_cli",
								"refresh_docx_fields=postprocessing:refresh_docx_fields_cli",
                                "apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli",
                                "remove_metadata=postprocessing:remove_docx_metadata_cli"
			]
            }