Loading generateBaseline/postprocessing.py 0 → 100644 +280 −0 Original line number Diff line number Diff line import json import os import re import warnings from pathlib import Path import win32com.client #pip install pywin32 from docx import Document #pip install python-docx from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm from errors import ErrorHandler, Level, Mode from file_helper import get_all_files_from_dir def apply_standard_style_to_unformatted_paragraphs(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") standard_style_name = config.get("standard_style_name", "etsi_standard") # Filter warning warnings.filterwarnings( "ignore", category=UserWarning, message=re.escape("style lookup by style_id is deprecated. Use style name as key instead.") ) doc = Document(docx_path) changed = 0 for p in doc.paragraphs: current_style = p.style.name if p.style else None # Prüfe, ob Stil nicht 'Standard' ist if current_style == "Body Text": p.style = standard_style_name changed += 1 ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.") doc.save(output_path) def rotate_cell_text(cell): # Hole oder erstelle <w:tcPr> tcPr = cell._element.find(qn('w:tcPr')) if tcPr is None: tcPr = OxmlElement('w:tcPr') cell._element.insert(0, tcPr) # Erstelle oder ersetze <w:textDirection w:val="btLr"/> text_dir = tcPr.find(qn('w:textDirection')) if text_dir is None: text_dir = OxmlElement('w:textDirection') tcPr.append(text_dir) text_dir.set(qn('w:val'), 'btLr') # bottom-to-top, left-to-right def clean_and_set_text(cell, new_text): # Entferne alle vorhandenen Paragraphen for p in cell.paragraphs: p._element.getparent().remove(p._element) # Füge neuen Absatz mit dem bereinigten Text hinzu cell.add_paragraph(new_text) def postprocess_table_content(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") doc = Document(docx_path) for table in doc.tables: for row in table.rows: for cell in row.cells: if cell.text.strip().startswith("[rotate]"): # Entferne den Marker und setze neuen Text new_text = cell.text.replace("[rotate]", "").strip() clean_and_set_text(cell, new_text) rotate_cell_text(cell) doc.save(output_path) def update_word_fields(config_path: dict|str): if os.path.isfile(config_path): docx_path = config_path else: docx_path = config_path.get("output_docx") # Prüfen, ob Datei existiert relativer_pfad = Path(docx_path) docx_absolute_path = relativer_pfad.resolve() if not os.path.isfile(docx_absolute_path): ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR) # Word starten word = win32com.client.Dispatch("Word.Application") word.Visible = False # unsichtbar im Hintergrund try: # Dokument öffnen doc = word.Documents.Open(str(docx_absolute_path)) # Alle Felder im Dokument aktualisieren for field in doc.Fields: field.Update() # Dokument speichern doc.Save() # Schließen doc.Close() ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved") finally: word.Quit() def insert_page_break_before_long_tables(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(docx_path) for i, table in enumerate(doc.Tables): # Tabellenbereich abrufen start = table.Range.Start end = table.Range.End # Seitenzahl berechnen start_page = doc.Range(start, start).Information(3) # wdActiveEndPageNumber = 3 end_page = doc.Range(end - 1, end - 1).Information(3) if end_page > start_page: ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO) # Seitenumbruch einfügen para = doc.Range(start, start) para.InsertBreak(7) # wdPageBreak = 7 # Speichern unter neuem Namen doc.SaveAs(output_path) doc.Close() word.Quit() def update_toc_level(config): docx_path = config.get("output_docx") word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(docx_path) # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen: if doc.TablesOfContents.Count == 0: # Inhaltsverzeichnis am Anfang des Dokuments einfügen doc.TablesOfContents.Add( Range=doc.Range(0, 0), UseHeadingStyles=True, UpperHeadingLevel=1, LowerHeadingLevel=9, # 👉 bis Heading 9 UseHyperlinks=True, HidePageNumbersInWeb=False, UseOutlineLevels=True ) # Vorhandenes TOC anpassen toc = doc.TablesOfContents(1) #Formating heading -> ToDo: last line not working so skipped for the moment #toc_range = toc.Range #heading_para = toc_range.Paragraphs(1) #heading_para.Style = doc.Styles("Heading 1") #set level range from 1-9 toc.UpperHeadingLevel = 1 toc.LowerHeadingLevel = 9 toc.Update() doc.SaveAs(docx_path) doc.Close() word.Quit() def table_widths_adjustment(config): table_path = config.get("tables_folder") docx_path = config.get("output_docx") doc = Document(docx_path) def get_table_caption(table): """Liest den Alternativtext-Titel (Caption) aus einer Tabelle.""" tbl = table._tbl tblPr = tbl.tblPr if tblPr is None: return None # Tabelle hat keine Eigenschaften # Suche nach <w:tblCaption w:val="..."> caption_el = tblPr.find(qn("w:tblCaption")) if caption_el is not None: return caption_el.get(qn("w:val")) # Manche Word-Versionen speichern den Text als direktes Element ohne w:val for el in tblPr: if el.tag == qn("w:tblCaption"): return el.text or None return None def percent_to_cm(width_percentages, total_width_cm): """ Wandelt Prozentangaben (z. B. [30, 40, 30]) in absolute cm-Werte um. Args: width_percentages (list[float]): Prozentwerte (Summe kann ein beliebiger positiver wert (>0) sein) total_width_cm (float): Gesamtbreite der Tabelle in cm Returns: list[float]: Spaltenbreiten in cm """ # Falls Prozente als Ganzzahlen angegeben sind, normalisieren wir sie total_percent = sum(width_percentages) if total_percent == 0: ErrorHandler()("sum of percent-values is 0.") # Umrechnen auf relative cm-Werte widths_cm = [Cm((p / total_percent) * total_width_cm) for p in width_percentages] return widths_cm def get_json_data(json_data_array, value, matching_json_field): """ Sucht in einer Liste von JSON-Dictionaries nach einem bestimmten Feldwert. Args: json_data_array (list[dict]): Liste von JSON-Objekten value (str): Der gesuchte Wert matching_json_field (str): Name des JSON-Feldes, in dem gesucht werden soll Returns: dict | None: Das gefundene JSON-Objekt oder None, falls kein Treffer """ for item in json_data_array: if not isinstance(item, dict): continue # Überspringt ungültige Einträge if(matching_json_field==caption): if value.endswith(item.get(matching_json_field)): return item if item.get(matching_json_field) == value: return item return None # preparing json_table list -> getting caption and width from table-json files table_list = get_all_files_from_dir(table_path, "json") json_tables = [] for table_path in table_list: with open(table_path, "r", encoding="utf-8") as f: try: data = json.load(f) except json.JSONDecodeError as e: ErrorHandler()(f"Skipped {table_path}. Error in reading file: {e}") continue caption = data.get("caption") widths = data.get("column_width") if not caption or not widths: ErrorHandler()(f"Skipping {table_path} – as caption and/or column_width are not set as expected") continue json_tables.append(data) for table in doc.tables: # Get matching of table in docx and json docx_caption=get_table_caption(table) data = get_json_data(json_tables, docx_caption, "caption") #found json if(data is not None): total_width = data.get("total_width", 16.88) col_widths_percent = data.get("column_width") else: total_width = 16.88 col_widths_percent = [1] * len(table.rows[0].cells) col_widths = percent_to_cm(col_widths_percent, total_width) # Jede Zelle in der jeweiligen Spalte auf Breite setzen for row in table.rows: for i, width in enumerate(col_widths): cell = row.cells[i] cell.width = width doc.save(docx_path) No newline at end of file generateBaseline/setup.py +7 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,13 @@ setup( 'console_scripts' : ['pandocFilter=pandocFilter:main', 'generateTOC=generateTOC:main', 'svg2png=svg2png:main', "update_references=postprocessing:update_word_fields", "update_formats=postprocessing:apply_standard_style_to_unformatted_paragraphs", "turn_table_contents=postprocessing:postprocess_table_content", "table_width_adjustment=postprocessing:table_widths_adjustment", "check_multipage_tables=postprocessing:insert_page_break_before_long_tables", #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc_level=postprocessing:update_toc_level", ] } Loading Loading
generateBaseline/postprocessing.py 0 → 100644 +280 −0 Original line number Diff line number Diff line import json import os import re import warnings from pathlib import Path import win32com.client #pip install pywin32 from docx import Document #pip install python-docx from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm from errors import ErrorHandler, Level, Mode from file_helper import get_all_files_from_dir def apply_standard_style_to_unformatted_paragraphs(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") standard_style_name = config.get("standard_style_name", "etsi_standard") # Filter warning warnings.filterwarnings( "ignore", category=UserWarning, message=re.escape("style lookup by style_id is deprecated. Use style name as key instead.") ) doc = Document(docx_path) changed = 0 for p in doc.paragraphs: current_style = p.style.name if p.style else None # Prüfe, ob Stil nicht 'Standard' ist if current_style == "Body Text": p.style = standard_style_name changed += 1 ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.") doc.save(output_path) def rotate_cell_text(cell): # Hole oder erstelle <w:tcPr> tcPr = cell._element.find(qn('w:tcPr')) if tcPr is None: tcPr = OxmlElement('w:tcPr') cell._element.insert(0, tcPr) # Erstelle oder ersetze <w:textDirection w:val="btLr"/> text_dir = tcPr.find(qn('w:textDirection')) if text_dir is None: text_dir = OxmlElement('w:textDirection') tcPr.append(text_dir) text_dir.set(qn('w:val'), 'btLr') # bottom-to-top, left-to-right def clean_and_set_text(cell, new_text): # Entferne alle vorhandenen Paragraphen for p in cell.paragraphs: p._element.getparent().remove(p._element) # Füge neuen Absatz mit dem bereinigten Text hinzu cell.add_paragraph(new_text) def postprocess_table_content(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") doc = Document(docx_path) for table in doc.tables: for row in table.rows: for cell in row.cells: if cell.text.strip().startswith("[rotate]"): # Entferne den Marker und setze neuen Text new_text = cell.text.replace("[rotate]", "").strip() clean_and_set_text(cell, new_text) rotate_cell_text(cell) doc.save(output_path) def update_word_fields(config_path: dict|str): if os.path.isfile(config_path): docx_path = config_path else: docx_path = config_path.get("output_docx") # Prüfen, ob Datei existiert relativer_pfad = Path(docx_path) docx_absolute_path = relativer_pfad.resolve() if not os.path.isfile(docx_absolute_path): ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR) # Word starten word = win32com.client.Dispatch("Word.Application") word.Visible = False # unsichtbar im Hintergrund try: # Dokument öffnen doc = word.Documents.Open(str(docx_absolute_path)) # Alle Felder im Dokument aktualisieren for field in doc.Fields: field.Update() # Dokument speichern doc.Save() # Schließen doc.Close() ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved") finally: word.Quit() def insert_page_break_before_long_tables(config): docx_path = config.get("output_docx") output_path = config.get("output_docx") word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(docx_path) for i, table in enumerate(doc.Tables): # Tabellenbereich abrufen start = table.Range.Start end = table.Range.End # Seitenzahl berechnen start_page = doc.Range(start, start).Information(3) # wdActiveEndPageNumber = 3 end_page = doc.Range(end - 1, end - 1).Information(3) if end_page > start_page: ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO) # Seitenumbruch einfügen para = doc.Range(start, start) para.InsertBreak(7) # wdPageBreak = 7 # Speichern unter neuem Namen doc.SaveAs(output_path) doc.Close() word.Quit() def update_toc_level(config): docx_path = config.get("output_docx") word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(docx_path) # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen: if doc.TablesOfContents.Count == 0: # Inhaltsverzeichnis am Anfang des Dokuments einfügen doc.TablesOfContents.Add( Range=doc.Range(0, 0), UseHeadingStyles=True, UpperHeadingLevel=1, LowerHeadingLevel=9, # 👉 bis Heading 9 UseHyperlinks=True, HidePageNumbersInWeb=False, UseOutlineLevels=True ) # Vorhandenes TOC anpassen toc = doc.TablesOfContents(1) #Formating heading -> ToDo: last line not working so skipped for the moment #toc_range = toc.Range #heading_para = toc_range.Paragraphs(1) #heading_para.Style = doc.Styles("Heading 1") #set level range from 1-9 toc.UpperHeadingLevel = 1 toc.LowerHeadingLevel = 9 toc.Update() doc.SaveAs(docx_path) doc.Close() word.Quit() def table_widths_adjustment(config): table_path = config.get("tables_folder") docx_path = config.get("output_docx") doc = Document(docx_path) def get_table_caption(table): """Liest den Alternativtext-Titel (Caption) aus einer Tabelle.""" tbl = table._tbl tblPr = tbl.tblPr if tblPr is None: return None # Tabelle hat keine Eigenschaften # Suche nach <w:tblCaption w:val="..."> caption_el = tblPr.find(qn("w:tblCaption")) if caption_el is not None: return caption_el.get(qn("w:val")) # Manche Word-Versionen speichern den Text als direktes Element ohne w:val for el in tblPr: if el.tag == qn("w:tblCaption"): return el.text or None return None def percent_to_cm(width_percentages, total_width_cm): """ Wandelt Prozentangaben (z. B. [30, 40, 30]) in absolute cm-Werte um. Args: width_percentages (list[float]): Prozentwerte (Summe kann ein beliebiger positiver wert (>0) sein) total_width_cm (float): Gesamtbreite der Tabelle in cm Returns: list[float]: Spaltenbreiten in cm """ # Falls Prozente als Ganzzahlen angegeben sind, normalisieren wir sie total_percent = sum(width_percentages) if total_percent == 0: ErrorHandler()("sum of percent-values is 0.") # Umrechnen auf relative cm-Werte widths_cm = [Cm((p / total_percent) * total_width_cm) for p in width_percentages] return widths_cm def get_json_data(json_data_array, value, matching_json_field): """ Sucht in einer Liste von JSON-Dictionaries nach einem bestimmten Feldwert. Args: json_data_array (list[dict]): Liste von JSON-Objekten value (str): Der gesuchte Wert matching_json_field (str): Name des JSON-Feldes, in dem gesucht werden soll Returns: dict | None: Das gefundene JSON-Objekt oder None, falls kein Treffer """ for item in json_data_array: if not isinstance(item, dict): continue # Überspringt ungültige Einträge if(matching_json_field==caption): if value.endswith(item.get(matching_json_field)): return item if item.get(matching_json_field) == value: return item return None # preparing json_table list -> getting caption and width from table-json files table_list = get_all_files_from_dir(table_path, "json") json_tables = [] for table_path in table_list: with open(table_path, "r", encoding="utf-8") as f: try: data = json.load(f) except json.JSONDecodeError as e: ErrorHandler()(f"Skipped {table_path}. Error in reading file: {e}") continue caption = data.get("caption") widths = data.get("column_width") if not caption or not widths: ErrorHandler()(f"Skipping {table_path} – as caption and/or column_width are not set as expected") continue json_tables.append(data) for table in doc.tables: # Get matching of table in docx and json docx_caption=get_table_caption(table) data = get_json_data(json_tables, docx_caption, "caption") #found json if(data is not None): total_width = data.get("total_width", 16.88) col_widths_percent = data.get("column_width") else: total_width = 16.88 col_widths_percent = [1] * len(table.rows[0].cells) col_widths = percent_to_cm(col_widths_percent, total_width) # Jede Zelle in der jeweiligen Spalte auf Breite setzen for row in table.rows: for i, width in enumerate(col_widths): cell = row.cells[i] cell.width = width doc.save(docx_path) No newline at end of file
generateBaseline/setup.py +7 −0 Original line number Diff line number Diff line Loading @@ -14,6 +14,13 @@ setup( 'console_scripts' : ['pandocFilter=pandocFilter:main', 'generateTOC=generateTOC:main', 'svg2png=svg2png:main', "update_references=postprocessing:update_word_fields", "update_formats=postprocessing:apply_standard_style_to_unformatted_paragraphs", "turn_table_contents=postprocessing:postprocess_table_content", "table_width_adjustment=postprocessing:table_widths_adjustment", "check_multipage_tables=postprocessing:insert_page_break_before_long_tables", #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc_level=postprocessing:update_toc_level", ] } Loading