Merge branch 'styles_adjustment' into newfeatures_adjustment (14e11bdc) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

.gitlab-ci.yml

+9 −2

Original line number	Diff line number	Diff line
		@@ -49,6 +49,7 @@ Build generateBaseline docker image:
		- generateBaseline/generateTOC.py
		- generateBaseline/svg2png.py
		- generateBaseline/postprocessing.py
		- generateBaseline/postprocessing_styling.py
		- generateBaseline/file_helper.py

		Build docx-field-refresh docker image:
		@@ -75,10 +76,16 @@ Build generateSpecWebSite docker image:
		- generateSpecWebSite/gridTableFilter.py
		- generateSpecWebSite/gridTableTools.py
		- generateSpecWebSite/indexDownload.md
		- generateSpecWebSite/indexReleaseNotes.md
		- generateSpecWebSite/markdownTools.py
		- generateSpecWebSite/regexMatches.py
		- generateSpecWebSite/spec_on_pages.sh
		- generateSpecWebSite/toMkdocs.py
		- generateSpecWebSite/create_frontmatter_table.py
		- markdownTools/dockerfile
		- markdownTools/setup.py
		- markdownTools/requirements.txt
		- markdownTools/processMDSpec.py

		Build markdownTools docker image:
		stage: build

generateBaseline/postprocessing.py

+56 −7

Original line number	Diff line number	Diff line
		@@ -16,7 +16,7 @@ from docx.oxml import OxmlElement
		from docx.oxml.ns import qn
		from docx.shared import Cm

		from postprocessing_styling import postprocess_etsi_styles
		from postprocessing_styling import *


		def parse_input(args_to_parse=None, description = None):
		@@ -120,14 +120,10 @@ def table_width_adjustment_cli():
		description="Set the width of table columns according to values in json or if non there to be equal.")
		table_widths_adjustment(docx_input, docx_output, tables_folder)

		def postprocess_etsi_styles_cli():
		docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).")
		postprocess_etsi_styles(docx_input, docx_output)

		def remove_docx_metadata_cli():
		docx_input, docx_output = parse_input(
		description="Remove metadata from docx.")
		postprocess_etsi_styles(docx_input, docx_output)
		#ToDo


		def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"):
		@@ -528,3 +524,56 @@ def remove_docx_metadata(docx_input, docx_output):

		os.remove(tmp_path)
		print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}")


		def update_format_styles_cli():
		parser = argparse.ArgumentParser(description="Update format styles in a DOCX file.")
		parser.add_argument("docx_input", help="Path to input DOCX file")
		parser.add_argument("docx_output", help="Path to output DOCX file")
		args = parser.parse_args()

		update_figure_captions(args.docx_input, args.docx_output)
		update_heading_styles(args.docx_input, args.docx_output)
		update_figure_style(args.docx_input, args.docx_output)
		update_unnumbered_lists(args.docx_input, args.docx_output)
		update_table_captions(args.docx_input, args.docx_output)
		update_abbreviations(args.docx_input, args.docx_output)
		update_table_rows(args.docx_input, args.docx_output)
		update_notes(args.docx_input, args.docx_output)
		update_references(args.docx_input, args.docx_output)
		update_body_text_style(args.docx_input, args.docx_output)
		add_no_break_hyphens(args.docx_input, args.docx_output)
		update_references_style(args.docx_input, args.docx_output)
		update_source_code_style(args.docx_input, args.docx_output)
		update_equation_style(args.docx_input, args.docx_output)
		correct_quotes_docx(args.docx_input, args.docx_output)

		# Final sanitization: remove invalid XML characters
		ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
		with zipfile.ZipFile(args.docx_output, 'r') as zin:
		xml_data = zin.read("word/document.xml")

		root = etree.fromstring(xml_data)
		sanitize_document_xml(root, ns)

		xml_data = etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

		# Write sanitized document back
		tmp_fd, tmp_path = tempfile.mkstemp(suffix=".docx")
		os.close(tmp_fd)

		try:
		with zipfile.ZipFile(args.docx_output, 'r') as zin, zipfile.ZipFile(tmp_path, 'w',
		zipfile.ZIP_DEFLATED) as zout:
		for item in zin.infolist():
		if item.filename != "word/document.xml":
		data = zin.read(item.filename)
		zout.writestr(item.filename, data)
		zout.writestr("word/document.xml", xml_data)

		shutil.move(tmp_path, args.docx_output)
		os.chmod(args.docx_output, 0o644)
		print('Sanitized document: removed invalid XML characters')
		finally:
		if os.path.exists(tmp_path):
		os.remove(tmp_path)
		No newline at end of file

generateBaseline/postprocessing_styling.py

+1896 −35

File changed.

Preview size limit exceeded, changes collapsed.

generateBaseline/setup.py

+3 −4

Original line number	Diff line number	Diff line
		@@ -14,15 +14,14 @@ setup(
		'console_scripts' : ['pandocFilter=pandocFilter:main',
		'generateTOC=generateTOC:main',
		'svg2png=svg2png:main',
		#"update_references=postprocessing:update_word_fields",
		"update_references=postprocessing:update_word_fields",
		"update_formats=postprocessing:update_formats_cli",
		"turn_table_contents=postprocessing:turn_table_contents_cli",
		"table_width_adjustment=postprocessing:table_width_adjustment_cli",
		#"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
		#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
		"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
		"update_toc=postprocessing:update_toc_cli",
		"update_format_styles=postprocessing:update_format_styles_cli",
		"refresh_docx_fields=postprocessing:refresh_docx_fields_cli",
		"apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli",
		"remove_metadata=postprocessing:remove_docx_metadata_cli"
		]
		}

generateSpecWebSite/create_frontmatter_table.py

0 → 100644

+100 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		#
		# create_frontmatter_table.py
		#
		# Creates a markdown table from frontmatter data
		# - Removes headings
		# - Removes <br> tags
		# - Formats as a clean markdown table
		#
		# (c) 2025 by Miguel Angel Reina Ortega
		# License: BSD 3-Clause License. See the LICENSE file for further details.

		import sys
		import yaml
		import re

		def remove_br_tags(text):
		"""Remove <br> and <br/> tags from text."""
		if not text:
		return text
		# Replace <br>, <br/>, <br /> with space
		text = re.sub(r'<br\s*/?>', ' ', text, flags=re.IGNORECASE)
		# Clean up multiple spaces
		text = re.sub(r'\s+', ' ', text)
		return text.strip()

		def create_table_from_frontmatter(frontmatter_file, output_file=None):
		"""
		Create a markdown table from frontmatter data.

		Args:
		frontmatter_file: Path to the frontmatter.md file
		output_file: Optional path to output file (if None, prints to stdout)
		"""

		# Read frontmatter
		with open(frontmatter_file, 'r', encoding='utf-8') as f:
		content = f.read()
		frontmatter_content = []
		# Split into lines and skip first line if it looks like a filename
		lines = content.split('\n')
		for line in lines:
		# Check if it's a filename line: ends with .md: or ends with : and no space before colon
		if line.strip().endswith('.md:'):
		# Skip first line
		continue
		else:
		frontmatter_content.append(line)

		# Parse YAML
		frontmatter = yaml.safe_load('\n'.join(frontmatter_content))
		if not frontmatter:
		frontmatter = {}

		# Generate markdown table (no header row)
		table_lines = []
		table_lines.append("")
		table_lines.append("\|  \|  \|")
		table_lines.append(f"\|:-\|:-\|")
		# Process each field dynamically
		for key, value in frontmatter.items():
		# Convert value to string and clean it
		if value is None:
		value_str = ""
		elif isinstance(value, (list, dict)):
		# For lists and dicts, convert to YAML string
		value_str = yaml.dump(value, default_flow_style=False).strip()
		else:
		value_str = str(value)

		# Remove <br> tags
		#value_str = remove_br_tags(value_str)

		# Escape pipe characters in values (they break markdown tables)
		value_str = value_str.replace('\|', '\\\|')

		# Add row to table
		table_lines.append(f"\| {key} \| {value_str} \|")

		# Join all lines
		table_lines.append("")
		table_content = '\n'.join(table_lines)

		# Output
		if output_file:
		with open(output_file, 'w', encoding='utf-8') as f:
		f.write(table_content)
		else:
		print(table_content)

		if __name__ == '__main__':
		if len(sys.argv) < 2:
		print("Usage: create_frontmatter_table.py <frontmatter_file> [output_file]")
		sys.exit(1)

		frontmatter_file = sys.argv[1]
		output_file = sys.argv[2] if len(sys.argv) > 2 else None

		create_table_from_frontmatter(frontmatter_file, output_file)