Adding new features (a22580c1) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

0 → 100644

+280 −0

Original line number	Diff line number	Diff line
		import json
		import os
		import re
		import warnings
		from pathlib import Path

		import win32com.client #pip install pywin32

		from docx import Document #pip install python-docx
		from docx.oxml import OxmlElement
		from docx.oxml.ns import qn
		from docx.shared import Cm

		from errors import ErrorHandler, Level, Mode
		from file_helper import get_all_files_from_dir


		def apply_standard_style_to_unformatted_paragraphs(config):
		docx_path = config.get("output_docx")
		output_path = config.get("output_docx")
		standard_style_name = config.get("standard_style_name", "etsi_standard")
		# Filter warning
		warnings.filterwarnings(
		"ignore",
		category=UserWarning,
		message=re.escape("style lookup by style_id is deprecated. Use style name as key instead.")
		)


		doc = Document(docx_path)
		changed = 0

		for p in doc.paragraphs:
		current_style = p.style.name if p.style else None
		# Prüfe, ob Stil nicht 'Standard' ist
		if current_style == "Body Text":
		p.style = standard_style_name
		changed += 1

		ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.")
		doc.save(output_path)

		def rotate_cell_text(cell):
		# Hole oder erstelle <w:tcPr>
		tcPr = cell._element.find(qn('w:tcPr'))
		if tcPr is None:
		tcPr = OxmlElement('w:tcPr')
		cell._element.insert(0, tcPr)

		# Erstelle oder ersetze <w:textDirection w:val="btLr"/>
		text_dir = tcPr.find(qn('w:textDirection'))
		if text_dir is None:
		text_dir = OxmlElement('w:textDirection')
		tcPr.append(text_dir)
		text_dir.set(qn('w:val'), 'btLr') # bottom-to-top, left-to-right

		def clean_and_set_text(cell, new_text):
		# Entferne alle vorhandenen Paragraphen
		for p in cell.paragraphs:
		p._element.getparent().remove(p._element)

		# Füge neuen Absatz mit dem bereinigten Text hinzu
		cell.add_paragraph(new_text)

		def postprocess_table_content(config):
		docx_path = config.get("output_docx")
		output_path = config.get("output_docx")
		doc = Document(docx_path)
		for table in doc.tables:
		for row in table.rows:
		for cell in row.cells:
		if cell.text.strip().startswith("[rotate]"):
		# Entferne den Marker und setze neuen Text
		new_text = cell.text.replace("[rotate]", "").strip()
		clean_and_set_text(cell, new_text)
		rotate_cell_text(cell)
		doc.save(output_path)


		def update_word_fields(config_path: dict\|str):
		if os.path.isfile(config_path):
		docx_path = config_path
		else:
		docx_path = config_path.get("output_docx")
		# Prüfen, ob Datei existiert
		relativer_pfad = Path(docx_path)
		docx_absolute_path = relativer_pfad.resolve()
		if not os.path.isfile(docx_absolute_path):
		ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR)

		# Word starten
		word = win32com.client.Dispatch("Word.Application")
		word.Visible = False # unsichtbar im Hintergrund

		try:
		# Dokument öffnen
		doc = word.Documents.Open(str(docx_absolute_path))

		# Alle Felder im Dokument aktualisieren
		for field in doc.Fields:
		field.Update()

		# Dokument speichern
		doc.Save()

		# Schließen
		doc.Close()
		ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved")
		finally:
		word.Quit()

		def insert_page_break_before_long_tables(config):
		docx_path = config.get("output_docx")
		output_path = config.get("output_docx")

		word = win32com.client.Dispatch("Word.Application")
		word.Visible = False

		doc = word.Documents.Open(docx_path)

		for i, table in enumerate(doc.Tables):
		# Tabellenbereich abrufen
		start = table.Range.Start
		end = table.Range.End

		# Seitenzahl berechnen
		start_page = doc.Range(start, start).Information(3) # wdActiveEndPageNumber = 3
		end_page = doc.Range(end - 1, end - 1).Information(3)

		if end_page > start_page:
		ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO)
		# Seitenumbruch einfügen
		para = doc.Range(start, start)
		para.InsertBreak(7) # wdPageBreak = 7

		# Speichern unter neuem Namen
		doc.SaveAs(output_path)
		doc.Close()
		word.Quit()

		def update_toc_level(config):
		docx_path = config.get("output_docx")
		word = win32com.client.Dispatch("Word.Application")
		word.Visible = False

		doc = word.Documents.Open(docx_path)

		# Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
		if doc.TablesOfContents.Count == 0:
		# Inhaltsverzeichnis am Anfang des Dokuments einfügen
		doc.TablesOfContents.Add(
		Range=doc.Range(0, 0),
		UseHeadingStyles=True,
		UpperHeadingLevel=1,
		LowerHeadingLevel=9, # 👉 bis Heading 9
		UseHyperlinks=True,
		HidePageNumbersInWeb=False,
		UseOutlineLevels=True
		)
		# Vorhandenes TOC anpassen
		toc = doc.TablesOfContents(1)
		#Formating heading -> ToDo: last line not working so skipped for the moment
		#toc_range = toc.Range
		#heading_para = toc_range.Paragraphs(1)
		#heading_para.Style = doc.Styles("Heading 1")
		#set level range from 1-9
		toc.UpperHeadingLevel = 1
		toc.LowerHeadingLevel = 9
		toc.Update()

		doc.SaveAs(docx_path)
		doc.Close()
		word.Quit()

		def table_widths_adjustment(config):
		table_path = config.get("tables_folder")
		docx_path = config.get("output_docx")
		doc = Document(docx_path)
		def get_table_caption(table):
		"""Liest den Alternativtext-Titel (Caption) aus einer Tabelle."""
		tbl = table._tbl
		tblPr = tbl.tblPr

		if tblPr is None:
		return None # Tabelle hat keine Eigenschaften

		# Suche nach <w:tblCaption w:val="...">
		caption_el = tblPr.find(qn("w:tblCaption"))
		if caption_el is not None:
		return caption_el.get(qn("w:val"))

		# Manche Word-Versionen speichern den Text als direktes Element ohne w:val
		for el in tblPr:
		if el.tag == qn("w:tblCaption"):
		return el.text or None

		return None

		def percent_to_cm(width_percentages, total_width_cm):
		"""
		Wandelt Prozentangaben (z. B. [30, 40, 30]) in absolute cm-Werte um.

		Args:
		width_percentages (list[float]): Prozentwerte (Summe kann ein beliebiger positiver wert (>0) sein)
		total_width_cm (float): Gesamtbreite der Tabelle in cm

		Returns:
		list[float]: Spaltenbreiten in cm
		"""
		# Falls Prozente als Ganzzahlen angegeben sind, normalisieren wir sie
		total_percent = sum(width_percentages)
		if total_percent == 0:
		ErrorHandler()("sum of percent-values is 0.")

		# Umrechnen auf relative cm-Werte
		widths_cm = [Cm((p / total_percent) * total_width_cm) for p in width_percentages]
		return widths_cm

		def get_json_data(json_data_array, value, matching_json_field):
		"""
		Sucht in einer Liste von JSON-Dictionaries nach einem bestimmten Feldwert.

		Args:
		json_data_array (list[dict]): Liste von JSON-Objekten
		value (str): Der gesuchte Wert
		matching_json_field (str): Name des JSON-Feldes, in dem gesucht werden soll

		Returns:
		dict \| None: Das gefundene JSON-Objekt oder None, falls kein Treffer
		"""
		for item in json_data_array:
		if not isinstance(item, dict):
		continue # Überspringt ungültige Einträge
		if(matching_json_field==caption):
		if value.endswith(item.get(matching_json_field)):
		return item
		if item.get(matching_json_field) == value:
		return item
		return None


		# preparing json_table list -> getting caption and width from table-json files
		table_list = get_all_files_from_dir(table_path, "json")
		json_tables = []
		for table_path in table_list:
		with open(table_path, "r", encoding="utf-8") as f:
		try:
		data = json.load(f)
		except json.JSONDecodeError as e:
		ErrorHandler()(f"Skipped {table_path}. Error in reading file: {e}")
		continue

		caption = data.get("caption")
		widths = data.get("column_width")

		if not caption or not widths:
		ErrorHandler()(f"Skipping {table_path} – as caption and/or column_width are not set as expected")
		continue

		json_tables.append(data)
		for table in doc.tables:
		# Get matching of table in docx and json
		docx_caption=get_table_caption(table)
		data = get_json_data(json_tables, docx_caption, "caption")
		#found json
		if(data is not None):
		total_width = data.get("total_width", 16.88)
		col_widths_percent = data.get("column_width")
		else:
		total_width = 16.88
		col_widths_percent = [1] * len(table.rows[0].cells)

		col_widths = percent_to_cm(col_widths_percent, total_width)

		# Jede Zelle in der jeweiligen Spalte auf Breite setzen
		for row in table.rows:
		for i, width in enumerate(col_widths):
		cell = row.cells[i]
		cell.width = width
		doc.save(docx_path)
		No newline at end of file

generateBaseline/setup.py

+7 −0

Original line number	Diff line number	Diff line
		@@ -14,6 +14,13 @@ setup(
		'console_scripts' : ['pandocFilter=pandocFilter:main',
		'generateTOC=generateTOC:main',
		'svg2png=svg2png:main',
		"update_references=postprocessing:update_word_fields",
		"update_formats=postprocessing:apply_standard_style_to_unformatted_paragraphs",
		"turn_table_contents=postprocessing:postprocess_table_content",
		"table_width_adjustment=postprocessing:table_widths_adjustment",
		"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
		#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
		"update_toc_level=postprocessing:update_toc_level",
		]
		}