Skip Abbreviations clause when reformatting examples (81ed0daa) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+44 −4

Original line number	Diff line number	Diff line
		@@ -2403,11 +2403,34 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output):
		w_before = f"{{{ns['w']}}}before"
		w_after = f"{{{ns['w']}}}after"

		def _has_target_block_style(elem):
		def _paragraph_style_val(elem):
		if elem is None or elem.tag != w_p:
		return False
		return None
		style_elems = elem.xpath('./w:pPr/w:pStyle', namespaces=ns)
		return bool(style_elems and style_elems[0].get(w_val) in {"PL", "EW"})
		if not style_elems:
		return None
		return style_elems[0].get(w_val)

		def _paragraph_text(elem):
		if elem is None or elem.tag != w_p:
		return ""
		# Build text from runs to respect how headings are created in update_heading_styles:
		# number + <w:tab/> + title in separate run parts.
		parts = []
		for run in elem.xpath('./w:r', namespaces=ns):
		for child in run:
		if child.tag == f"{{{ns['w']}}}t" and child.text:
		parts.append(child.text)
		elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
		parts.append(" ")
		# Fallback for paragraphs containing text in non-direct run descendants.
		if not parts:
		parts = elem.xpath('.//w:t/text()', namespaces=ns)
		return ''.join(parts).strip()

		def _normalize_ws(text):
		# Normalize tabs/non-breaking spaces/multiple spaces for robust heading match.
		return re.sub(r'\s+', ' ', text.replace('\u00A0', ' ')).strip()

		def _is_empty_paragraph(elem):
		if elem is None or elem.tag != w_p:
		@@ -2417,9 +2440,26 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output):

		code_break_paras = 0
		table_break_paras = 0
		in_abbreviations_clause = False
		for elem in list(body):
		# Track whether we are currently inside an "Abbreviations" top-level section.
		# Heading style has already been normalized to "Heading1" before this step.
		if elem.tag == w_p:
		style_val = _paragraph_style_val(elem)
		if style_val == "Heading1":
		heading_text = _normalize_ws(_paragraph_text(elem))
		in_abbreviations_clause = bool(
		re.search(r'\babbreviations\b', heading_text, flags=re.IGNORECASE)
		)

		# After PL/EW paragraphs: insert one empty paragraph with zero spacing.
		if _has_target_block_style(elem):
		style_val = _paragraph_style_val(elem)
		is_target_style = style_val in {"PL", "EW"}
		if is_target_style:
		# EW inside "Abbreviations" clause must not be modified.
		if style_val == "EW" and in_abbreviations_clause:
		continue

		nxt = elem.getnext()
		if _is_empty_paragraph(nxt):
		continue