Added some more postprocessing functions (82490058) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+43 −1

Original line number	Diff line number	Diff line
		@@ -16,6 +16,9 @@ from docx.oxml import OxmlElement
		from docx.oxml.ns import qn
		from docx.shared import Cm

		from postprocessing_styling import postprocess_etsi_styles


		def parse_input(args_to_parse=None, description = None):
		"""
		args_to_parse: list of arguments, e.g. ["input", "output"]
		@@ -117,6 +120,16 @@ def table_width_adjustment_cli():
		description="Set the width of table columns according to values in json or if non there to be equal.")
		table_widths_adjustment(docx_input, docx_output, tables_folder)

		def postprocess_etsi_styles_cli():
		docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).")
		postprocess_etsi_styles(docx_input, docx_output)

		def remove_docx_metadata_cli():
		docx_input, docx_output = parse_input(
		description="Remove metadata from docx.")
		postprocess_etsi_styles(docx_input, docx_output)


		def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"):
		# Filter warning
		warnings.filterwarnings(
		@@ -486,3 +499,32 @@ def table_widths_adjustment(docx_input, docx_output, tables_folder):
		cell = row.cells[i]
		cell.width = width
		doc.save(docx_output)

		def remove_docx_metadata(docx_input, docx_output):
		"""
		removes metadata from word file.
		"""
		# 1. Eigenschaften mit python-docx leeren
		doc = Document(docx_input)
		props = doc.core_properties
		props.author = None
		props.last_modified_by = None
		props.title = None
		props.subject = None
		props.comments = None
		props.keywords = None
		props.category = None
		doc.save(docx_output)

		# 2. Metadaten-Dateien aus dem ZIP löschen
		tmp_path = docx_output + ".tmp"
		shutil.copy(docx_output, tmp_path)

		with zipfile.ZipFile(tmp_path, "r") as zin:
		with zipfile.ZipFile(docx_output, "w") as zout:
		for item in zin.infolist():
		if item.filename not in ["docProps/core.xml", "docProps/app.xml"]:
		zout.writestr(item, zin.read(item))

		os.remove(tmp_path)
		print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}")
		No newline at end of file

generateBaseline/postprocessing_styling.py

0 → 100644

+53 −0

Original line number	Diff line number	Diff line
		import re

		from docx import Document
		from docx.shared import Cm


		def change_annex_heading_formats(doc):

		for para in doc.paragraphs:
		if para.style.name == "Heading 1":
		text = para.text.strip()
		if not text:
		continue

		first_word = text.split()[0]

		# Beispiel: Bedingte Formatänderung
		if first_word.lower().startswith("annex"):
		para.style = doc.styles['Heading 8']
		return doc

		def intent_example_and_note(doc):
		for para in doc.paragraphs:
		if para.text.strip().startswith("NOTE") or para.text.strip().startswith("EXAMPLE"):

		if para.text.strip().startswith("NOTE"):
		# Einzug vergrößern
		para.paragraph_format.left_indent = Cm(2)
		# Hängender Einzug setzen
		para.paragraph_format.first_line_indent = Cm(-1.5)
		else:
		# Einzug vergrößern
		para.paragraph_format.left_indent = Cm(2.5)
		# Hängender Einzug setzen
		para.paragraph_format.first_line_indent = Cm(-2)

		# set Tab after ":"
		# Liste der Keywords
		keywords = ["EXAMPLE", "NOTE"]

		# Regex bauen: (EXAMPLE\|NOTE)\s\d → Keyword optional mit Zahl
		pattern = re.compile(r'(' + '\|'.join(keywords) + r'\s\d)\s:\s(.*)')

		para.text = pattern.sub(r'\1:\t\2', para.text)




		def postprocess_etsi_styles(docx_input, docx_output):
		doc = Document(docx_input)
		change_annex_heading_formats(doc)
		intent_example_and_note(doc)
		doc.save(docx_output)
		No newline at end of file

generateBaseline/setup.py

+2 −0

Original line number	Diff line number	Diff line
		@@ -22,6 +22,8 @@ setup(
		#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
		"update_toc=postprocessing:update_toc_cli",
		"refresh_docx_fields=postprocessing:refresh_docx_fields_cli",
		"apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli",
		"remove_metadata=postprocessing:remove_docx_metadata_cli"
		]
		}