Add fix for editor's note containing a reference (7ae0a0ea) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+30 −7

Original line number	Diff line number	Diff line
		@@ -1897,9 +1897,27 @@ def update_references(docx_input, docx_output):
		new_style = "EX"
		counter = 0

		def _paragraph_text_from_runs(para):
		# Build paragraph text from runs (including runs inside hyperlinks).
		parts = []
		for run in para.xpath('.//w:r', namespaces=ns):
		for child in run:
		if child.tag == f"{{{ns['w']}}}t" and child.text:
		parts.append(child.text)
		elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
		parts.append(" ")
		return ''.join(parts).strip()

		# Loop over all paragraphs with style "BodyText" or "FirstParagraph" and change to "EX"
		for para in root.xpath('.//w:p[w:pPr/w:pStyle[@w:val="BodyText" or @w:val="FirstParagraph"]]', namespaces=ns):
		old_val = para.get(f"{{{ns['w']}}}val")

		# Keep Editor's Notes untouched here; they are formatted later in
		# update_body_text_style (including highlighting).
		para_text = _paragraph_text_from_runs(para)
		if re.search(r'Editor[\'’]s\s+Note:', para_text, re.IGNORECASE):
		continue

		# A reference is a paragraph that contains:
		# - A pPr with pStyle "BodyText" or "FirstParagraph" (already checked in loop)
		# - A bookmarkStart and a bookmarkEnd
		@@ -2001,6 +2019,17 @@ def update_body_text_style(docx_input, docx_output):
		counter = 0
		h6_counter = 0

		def _paragraph_text_from_runs(para):
		# Build paragraph text from runs (including runs inside hyperlinks).
		parts = []
		for run in para.xpath('.//w:r', namespaces=ns):
		for child in run:
		if child.tag == f"{{{ns['w']}}}t" and child.text:
		parts.append(child.text)
		elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
		parts.append(" ")
		return ''.join(parts).strip()

		# Loop over all elements to find "BodyText" and "FirstParagraph" and change to "Normal"
		# Combine both XPath queries
		body_text_elems = root.xpath('.//w:pStyle[@w:val="BodyText"]', namespaces=ns)
		@@ -2008,7 +2037,6 @@ def update_body_text_style(docx_input, docx_output):
		all_elems = body_text_elems + first_para_elems

		for elem in all_elems:
		full_text = ""
		# elem is w:pStyle, its parent is w:pPr, and w:pPr's parent is w:p
		pPr = elem.getparent()
		if pPr is None:
		@@ -2016,12 +2044,7 @@ def update_body_text_style(docx_input, docx_output):
		para = pPr.getparent()
		if para is None:
		continue
		runs = para.xpath('./w:r', namespaces=ns)
		for run in runs:
		text_elems = run.xpath('.//w:t', namespaces=ns)
		for text_elem in text_elems:
		if text_elem.text:
		full_text += text_elem.text
		full_text = _paragraph_text_from_runs(para)

		# Strip whitespace and check if text matches "Essential patents" or "Trademarks"
		full_text_stripped = full_text.strip()