new update toc function (ea6db6ba) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/pandocFilter.py

+10 −6

Original line number	Diff line number	Diff line
		@@ -211,21 +211,25 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
		return _lines


		def process(document:str, outDirectory:str) -> None:
		def process(args) -> None:
		with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress:
		mdLines = readMDFile(progress, document)
		mdLines = readMDFile(progress, args.document)
		mdLines = correctTOC(progress, mdLines)
		mdLines = replaceTableCaptions(progress, mdLines)
		mdLines = replaceFigureCaptions(progress, mdLines)
		if args.figure_paths:
		mdLines = replaceFiguresPathSvgToPng(progress, mdLines)
		mdLines = replaceLineBreaks(progress, mdLines)
		if args.table_separators:
		mdLines = correctTableSeparators(progress, mdLines)
		writeMDFile(progress, mdLines, document, outDirectory)
		writeMDFile(progress, mdLines, args.document, args.outDirectory)


		def main(args=None):
		# Parse command line arguments
		parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
		parser.add_argument('-ts', '--table-separators', action='store_true', required=False, default=False, help="Correct table separators")
		parser.add_argument('-fp', '--figure-paths', action='store_true', required=False, default=False, help="Replace figure paths")
		parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>', help = 'specify output directory')
		parser.add_argument('document', help = 'document to parse')
		args = parser.parse_args()
		@@ -233,7 +237,7 @@ def main(args=None):
		# Process documents and print output
		os.makedirs(args.outDirectory, exist_ok = True)

		process(args.document, args.outDirectory)
		process(args)

		if __name__ == '__main__':
		sys.exit(main())
		No newline at end of file

generateBaseline/postprocessing.py

+99 −36

Original line number	Diff line number	Diff line
		@@ -3,6 +3,10 @@ import os
		import re
		import warnings
		from pathlib import Path
		import zipfile
		from lxml import etree
		import tempfile
		import shutil

		import win32com.client #pip install pywin32

		@@ -11,7 +15,6 @@ from docx.oxml import OxmlElement
		from docx.oxml.ns import qn
		from docx.shared import Cm

		from errors import ErrorHandler, Level, Mode
		from file_helper import get_all_files_from_dir


		@@ -37,7 +40,7 @@ def apply_standard_style_to_unformatted_paragraphs(config):
		p.style = standard_style_name
		changed += 1

		ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.")
		print(f'Changed style to {standard_style_name} for {changed} paragraphs.')
		doc.save(output_path)

		def rotate_cell_text(cell):
		@@ -86,7 +89,7 @@ def update_word_fields(config_path: dict\|str):
		relativer_pfad = Path(docx_path)
		docx_absolute_path = relativer_pfad.resolve()
		if not os.path.isfile(docx_absolute_path):
		ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR)
		print(f'File not found: {docx_absolute_path}')

		# Word starten
		word = win32com.client.Dispatch("Word.Application")
		@@ -105,7 +108,7 @@ def update_word_fields(config_path: dict\|str):

		# Schließen
		doc.Close()
		ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved")
		print(f'Fields in {docx_absolute_path} updated and saved')
		finally:
		word.Quit()

		@@ -128,7 +131,7 @@ def insert_page_break_before_long_tables(config):
		end_page = doc.Range(end - 1, end - 1).Information(3)

		if end_page > start_page:
		ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO)
		print(f'Table {i + 1} is on a page break: {start_page} -> {end_page}')
		# Seitenumbruch einfügen
		para = doc.Range(start, start)
		para.InsertBreak(7) # wdPageBreak = 7
		@@ -138,39 +141,99 @@ def insert_page_break_before_long_tables(config):
		doc.Close()
		word.Quit()

		def update_toc_level(config):
		docx_path = config.get("output_docx")
		word = win32com.client.Dispatch("Word.Application")
		word.Visible = False
		def format_toc_header(xml_data, ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}):
		root = etree.fromstring(xml_data)

		doc = word.Documents.Open(docx_path)
		counter = 0
		# Find <w:pStyle w:val="TOCHeading">
		for pstyle in root.xpath('.//w:pStyle[@w:val="TOCHeading"]', namespaces=ns):
		# Change it to be <w:pStyle w:val="TT">
		old_text = pstyle
		pstyle.set(f"{{{ns['w']}}}val", "TT")
		counter+=1
		print(f'Changed Style "TOCHeading" to "TT" {counter} times')
		return etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

		# Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
		if doc.TablesOfContents.Count == 0:
		# Inhaltsverzeichnis am Anfang des Dokuments einfügen
		doc.TablesOfContents.Add(
		Range=doc.Range(0, 0),
		UseHeadingStyles=True,
		UpperHeadingLevel=1,
		LowerHeadingLevel=9, # 👉 bis Heading 9
		UseHyperlinks=True,
		HidePageNumbersInWeb=False,
		UseOutlineLevels=True
		)
		# Vorhandenes TOC anpassen
		toc = doc.TablesOfContents(1)
		#Formating heading -> ToDo: last line not working so skipped for the moment
		#toc_range = toc.Range
		#heading_para = toc_range.Paragraphs(1)
		#heading_para.Style = doc.Styles("Heading 1")
		#set level range from 1-9
		toc.UpperHeadingLevel = 1
		toc.LowerHeadingLevel = 9
		toc.Update()

		doc.SaveAs(docx_path)
		doc.Close()
		word.Quit()
		def update_toc_level(xml_data, ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}):
		root = etree.fromstring(xml_data)
		new_range = "1-9"
		# Regex for \o "x-y" with x and y being numbers
		pattern = re.compile(r'(?<=\\o )"\d+-\d+"\s*')


		# Loop over all elements to find "TOC"
		for elem in root.xpath('.//w:instrText', namespaces=ns):
		if 'TOC' in elem.text:
		old_text = elem.text
		elem.text = pattern.sub('', elem.text)

		print(f'Changed TOC: {old_text} → {elem.text}')

		return etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

		def update_toc(docx_input, docx_output):
		# read xml
		with zipfile.ZipFile(docx_input, 'r') as zin:
		xml_data = zin.read("word/document.xml")

		xml_data = update_toc_level(xml_data)

		xml_data = format_toc_header(xml_data)

		# create temp file
		tmp_fd, tmp_path = tempfile.mkstemp(suffix=".docx")
		os.close(tmp_fd) # Datei wird nur über zipfile geöffnet

		try:
		# write new docx to temp file
		with zipfile.ZipFile(docx_input, 'r') as zin, zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
		for item in zin.infolist():
		if item.filename != "word/document.xml":
		data = zin.read(item.filename)
		zout.writestr(item.filename, data)
		zout.writestr("word/document.xml", xml_data)

		# Write to output file
		shutil.move(tmp_path, docx_output)

		finally:
		# delete temp file if still existing
		if os.path.exists(tmp_path):
		os.remove(tmp_path)

		#def update_toc_level(config):
		# docx_path = config.get("output_docx")
		# word = win32com.client.Dispatch("Word.Application")
		# word.Visible = False
		#
		# doc = word.Documents.Open(docx_path)
		#
		# # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
		# if doc.TablesOfContents.Count == 0:
		# # Inhaltsverzeichnis am Anfang des Dokuments einfügen
		# doc.TablesOfContents.Add(
		# Range=doc.Range(0, 0),
		# UseHeadingStyles=True,
		# UpperHeadingLevel=1,
		# LowerHeadingLevel=9, # 👉 bis Heading 9
		# UseHyperlinks=True,
		# HidePageNumbersInWeb=False,
		# UseOutlineLevels=True
		# )
		# # Vorhandenes TOC anpassen
		# toc = doc.TablesOfContents(1)
		# #Formating heading -> ToDo: last line not working so skipped for the moment
		# #toc_range = toc.Range
		# #heading_para = toc_range.Paragraphs(1)
		# #heading_para.Style = doc.Styles("Heading 1")
		# #set level range from 1-9
		# toc.UpperHeadingLevel = 1
		# toc.LowerHeadingLevel = 9
		# toc.Update()

		# doc.SaveAs(docx_path)
		# doc.Close()
		# word.Quit()

		def table_widths_adjustment(config):
		table_path = config.get("tables_folder")

generateBaseline/setup.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -20,7 +20,7 @@ setup(
		"table_width_adjustment=postprocessing:table_widths_adjustment",
		"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
		#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
		"update_toc_level=postprocessing:update_toc_level",
		"update_toc=postprocessing:update_toc",
		]
		}