Commit a22580c1 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Adding new features

parent 898feabc
Loading
Loading
Loading
Loading
Loading
+280 −0
Original line number Diff line number Diff line
import json
import os
import re
import warnings
from pathlib import Path

import win32com.client #pip install pywin32

from docx import Document  #pip install python-docx
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm

from errors import ErrorHandler, Level, Mode
from file_helper import get_all_files_from_dir


def apply_standard_style_to_unformatted_paragraphs(config):
    docx_path = config.get("output_docx")
    output_path = config.get("output_docx")
    standard_style_name = config.get("standard_style_name", "etsi_standard")
    # Filter warning
    warnings.filterwarnings(
        "ignore",
        category=UserWarning,
        message=re.escape("style lookup by style_id is deprecated. Use style name as key instead.")
    )


    doc = Document(docx_path)
    changed = 0

    for p in doc.paragraphs:
        current_style = p.style.name if p.style else None
        # Prüfe, ob Stil nicht 'Standard' ist
        if current_style == "Body Text":
            p.style = standard_style_name
            changed += 1

    ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.")
    doc.save(output_path)

def rotate_cell_text(cell):
    # Hole oder erstelle <w:tcPr>
    tcPr = cell._element.find(qn('w:tcPr'))
    if tcPr is None:
        tcPr = OxmlElement('w:tcPr')
        cell._element.insert(0, tcPr)

    # Erstelle oder ersetze <w:textDirection w:val="btLr"/>
    text_dir = tcPr.find(qn('w:textDirection'))
    if text_dir is None:
        text_dir = OxmlElement('w:textDirection')
        tcPr.append(text_dir)
    text_dir.set(qn('w:val'), 'btLr')  # bottom-to-top, left-to-right

def clean_and_set_text(cell, new_text):
    # Entferne alle vorhandenen Paragraphen
    for p in cell.paragraphs:
        p._element.getparent().remove(p._element)

    # Füge neuen Absatz mit dem bereinigten Text hinzu
    cell.add_paragraph(new_text)

def postprocess_table_content(config):
    docx_path = config.get("output_docx")
    output_path = config.get("output_docx")
    doc = Document(docx_path)
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                if cell.text.strip().startswith("[rotate]"):
                    # Entferne den Marker und setze neuen Text
                    new_text = cell.text.replace("[rotate]", "").strip()
                    clean_and_set_text(cell, new_text)
                    rotate_cell_text(cell)
    doc.save(output_path)


def update_word_fields(config_path: dict|str):
    if os.path.isfile(config_path):
        docx_path = config_path
    else:
        docx_path = config_path.get("output_docx")
    # Prüfen, ob Datei existiert
    relativer_pfad = Path(docx_path)
    docx_absolute_path = relativer_pfad.resolve()
    if not os.path.isfile(docx_absolute_path):
        ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR)

    # Word starten
    word = win32com.client.Dispatch("Word.Application")
    word.Visible = False  # unsichtbar im Hintergrund

    try:
        # Dokument öffnen
        doc = word.Documents.Open(str(docx_absolute_path))

        # Alle Felder im Dokument aktualisieren
        for field in doc.Fields:
            field.Update()

        # Dokument speichern
        doc.Save()

        # Schließen
        doc.Close()
        ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved")
    finally:
        word.Quit()

def insert_page_break_before_long_tables(config):
    docx_path = config.get("output_docx")
    output_path = config.get("output_docx")

    word = win32com.client.Dispatch("Word.Application")
    word.Visible = False

    doc = word.Documents.Open(docx_path)

    for i, table in enumerate(doc.Tables):
        # Tabellenbereich abrufen
        start = table.Range.Start
        end = table.Range.End

        # Seitenzahl berechnen
        start_page = doc.Range(start, start).Information(3)  # wdActiveEndPageNumber = 3
        end_page = doc.Range(end - 1, end - 1).Information(3)

        if end_page > start_page:
            ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO)
            # Seitenumbruch einfügen
            para = doc.Range(start, start)
            para.InsertBreak(7)  # wdPageBreak = 7

    # Speichern unter neuem Namen
    doc.SaveAs(output_path)
    doc.Close()
    word.Quit()

def update_toc_level(config):
    docx_path = config.get("output_docx")
    word = win32com.client.Dispatch("Word.Application")
    word.Visible = False

    doc = word.Documents.Open(docx_path)

    # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
    if doc.TablesOfContents.Count == 0:
        # Inhaltsverzeichnis am Anfang des Dokuments einfügen
        doc.TablesOfContents.Add(
            Range=doc.Range(0, 0),
            UseHeadingStyles=True,
            UpperHeadingLevel=1,
            LowerHeadingLevel=9,  # 👉 bis Heading 9
            UseHyperlinks=True,
            HidePageNumbersInWeb=False,
            UseOutlineLevels=True
        )
    # Vorhandenes TOC anpassen
    toc = doc.TablesOfContents(1)
    #Formating heading -> ToDo: last line not working so skipped for the moment
    #toc_range = toc.Range
    #heading_para = toc_range.Paragraphs(1)
    #heading_para.Style = doc.Styles("Heading 1")
    #set level range from 1-9
    toc.UpperHeadingLevel = 1
    toc.LowerHeadingLevel = 9
    toc.Update()

    doc.SaveAs(docx_path)
    doc.Close()
    word.Quit()

def table_widths_adjustment(config):
    table_path = config.get("tables_folder")
    docx_path = config.get("output_docx")
    doc = Document(docx_path)
    def get_table_caption(table):
        """Liest den Alternativtext-Titel (Caption) aus einer Tabelle."""
        tbl = table._tbl
        tblPr = tbl.tblPr

        if tblPr is None:
            return None  # Tabelle hat keine Eigenschaften

        # Suche nach <w:tblCaption w:val="...">
        caption_el = tblPr.find(qn("w:tblCaption"))
        if caption_el is not None:
            return caption_el.get(qn("w:val"))

        # Manche Word-Versionen speichern den Text als direktes Element ohne w:val
        for el in tblPr:
            if el.tag == qn("w:tblCaption"):
                return el.text or None

        return None

    def percent_to_cm(width_percentages, total_width_cm):
        """
        Wandelt Prozentangaben (z. B. [30, 40, 30]) in absolute cm-Werte um.

        Args:
            width_percentages (list[float]): Prozentwerte (Summe kann ein beliebiger positiver wert (>0) sein)
            total_width_cm (float): Gesamtbreite der Tabelle in cm

        Returns:
            list[float]: Spaltenbreiten in cm
        """
        # Falls Prozente als Ganzzahlen angegeben sind, normalisieren wir sie
        total_percent = sum(width_percentages)
        if total_percent == 0:
            ErrorHandler()("sum of percent-values is 0.")

        # Umrechnen auf relative cm-Werte
        widths_cm = [Cm((p / total_percent) * total_width_cm) for p in width_percentages]
        return widths_cm

    def get_json_data(json_data_array, value, matching_json_field):
        """
        Sucht in einer Liste von JSON-Dictionaries nach einem bestimmten Feldwert.

        Args:
            json_data_array (list[dict]): Liste von JSON-Objekten
            value (str): Der gesuchte Wert
            matching_json_field (str): Name des JSON-Feldes, in dem gesucht werden soll

        Returns:
            dict | None: Das gefundene JSON-Objekt oder None, falls kein Treffer
        """
        for item in json_data_array:
            if not isinstance(item, dict):
                continue  # Überspringt ungültige Einträge
            if(matching_json_field==caption):
                if value.endswith(item.get(matching_json_field)):
                    return item
            if item.get(matching_json_field) == value:
                return item
        return None


    # preparing json_table list -> getting caption and width from table-json files
    table_list = get_all_files_from_dir(table_path, "json")
    json_tables = []
    for table_path in table_list:
        with open(table_path, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
            except json.JSONDecodeError as e:
                ErrorHandler()(f"Skipped {table_path}. Error in reading file: {e}")
                continue

        caption = data.get("caption")
        widths = data.get("column_width")

        if not caption or not widths:
            ErrorHandler()(f"Skipping {table_path} – as caption and/or column_width are not set as expected")
            continue

        json_tables.append(data)
    for table in doc.tables:
        # Get matching of table in docx and json
        docx_caption=get_table_caption(table)
        data = get_json_data(json_tables, docx_caption, "caption")
        #found json
        if(data is not None):
            total_width = data.get("total_width", 16.88)
            col_widths_percent = data.get("column_width")
        else:
            total_width = 16.88
            col_widths_percent = [1] * len(table.rows[0].cells)

        col_widths = percent_to_cm(col_widths_percent, total_width)

        # Jede Zelle in der jeweiligen Spalte auf Breite setzen
        for row in table.rows:
            for i, width in enumerate(col_widths):
                cell = row.cells[i]
                cell.width = width
    doc.save(docx_path)
 No newline at end of file
+7 −0
Original line number Diff line number Diff line
@@ -14,6 +14,13 @@ setup(
            'console_scripts' : ['pandocFilter=pandocFilter:main',
								'generateTOC=generateTOC:main',
								'svg2png=svg2png:main',
								"update_references=postprocessing:update_word_fields",
        						"update_formats=postprocessing:apply_standard_style_to_unformatted_paragraphs",
        						"turn_table_contents=postprocessing:postprocess_table_content",
        						"table_width_adjustment=postprocessing:table_widths_adjustment",
        						"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
        						#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
        						"update_toc_level=postprocessing:update_toc_level",
			]
            }