Fixes for lists postprocessing (c9231aaa) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+54 −17

Original line number	Diff line number	Diff line
		@@ -828,11 +828,11 @@ def update_heading_styles(docx_input, docx_output):
		os.remove(tmp_path)


		def update_unnumbered_lists(docx_input, docx_output):
		def update_lists(docx_input, docx_output):
		"""
		Updates unnumbered list items (starting with "- ") in tables to appear as bulleted lists.
		For list items in tables: removes "- " prefix and creates separate paragraphs with FP style and numPr.
		For list items outside tables: removes "- " prefix and adds B1 style.
		Updates list items (starting with "- " or "1. ") in tables to appear as bulleted/numbered lists.
		For list items in tables: removes prefix and creates separate paragraphs with FP style and numPr.
		For list items outside tables: removes prefix and adds B1 (bullet) or BN (numbered) style.

		Parameters
		----------
		@@ -978,10 +978,20 @@ def update_unnumbered_lists(docx_input, docx_output):

		def is_list_item_para(para):
		"""Check if paragraph contains a list item (starts with '- ')"""
		runs = para.xpath('./w:r', namespaces=ns)
		for run in runs:
		text_elem = run.find('.//w:t', namespaces=ns)
		if text_elem is not None and text_elem.text and text_elem.text.startswith('- '):
		full_text = ''
		for node in para.xpath('.//w:t \| .//w:br \| .//w:cr \| .//w:tab', namespaces=ns):
		tag = node.tag
		if tag.endswith('}t'):
		if node.text:
		full_text += node.text
		elif tag.endswith('}tab'):
		full_text += '\t'
		else: # br or cr
		full_text += '\n'

		for line in full_text.split('\n'):
		line = line.lstrip()
		if line.startswith('- ') or re.match(r'^\d+\.\s', line):
		return True
		return False

		@@ -1139,15 +1149,31 @@ def update_unnumbered_lists(docx_input, docx_output):
		if not runs:
		continue

		# Find ALL list item runs (runs starting with "- ")
		# Find ALL list item runs (runs starting with "- " at start of line)
		list_item_runs = []
		all_children = list(para)
		is_start_of_line = True

		for idx, child in enumerate(all_children):
		if child.tag == f"{{{ns['w']}}}r":
		text_elem = child.find('.//w:t', namespaces=ns)
		if text_elem is not None and text_elem.text and text_elem.text.startswith('- '):

		if is_start_of_line and text_elem is not None and text_elem.text:
		text_stripped = text_elem.text.lstrip()
		if text_stripped.startswith('- ') or re.match(r'^\d+\.\s', text_stripped):
		list_item_runs.append((idx, child, text_elem))

		# Update state
		for node in child:
		tag = node.tag
		if tag == f"{{{ns['w']}}}br" or tag == f"{{{ns['w']}}}cr":
		is_start_of_line = True
		elif tag == f"{{{ns['w']}}}t":
		if node.text and node.text.strip():
		is_start_of_line = False
		elif child.tag == f"{{{ns['w']}}}hyperlink":
		is_start_of_line = False

		# If we found list items, process each one separately
		if list_item_runs:
		# Get the parent element (usually the document body or table cell)
		@@ -1182,8 +1208,16 @@ def update_unnumbered_lists(docx_input, docx_output):
		# Process each list item run separately
		insert_offset = 0 # Track where to insert new paragraphs
		for list_idx, (run_idx, list_item_run, list_item_text_elem) in enumerate(list_item_runs):
		# Remove the "- " prefix
		list_item_text_elem.text = list_item_text_elem.text[2:]

		# Determine list type
		text = list_item_text_elem.text.lstrip()
		is_numbered_manual = bool(re.match(r'^\d+\.', text))

		# Remove the prefix
		if is_numbered_manual:
		list_item_text_elem.text = re.sub(r'^\s\d+\.\s', '', list_item_text_elem.text)
		else:
		list_item_text_elem.text = re.sub(r'^\s*-\s+', '', list_item_text_elem.text)

		# Create a new paragraph for this list item
		new_para = OxmlElement('w:p')
		@@ -1235,10 +1269,13 @@ def update_unnumbered_lists(docx_input, docx_output):
		else:
		# Simple structure for regular list items (outside tables)
		pStyle = OxmlElement('w:pStyle')
		if is_numbered_manual:
		pStyle.set(f"{{{ns['w']}}}val", "BN")
		counter_numbered += 1
		else:
		pStyle.set(f"{{{ns['w']}}}val", "B1")
		pPr.append(pStyle)

		counter_regular += 1
		pPr.append(pStyle)

		new_para.append(pPr)

		@@ -2475,7 +2512,7 @@ def update_format_styles_cli():
		update_table_rows(args.docx_input, args.docx_output)
		update_notes(args.docx_input, args.docx_output)
		update_references(args.docx_input, args.docx_output)
		update_unnumbered_lists(args.docx_input, args.docx_output)
		update_lists(args.docx_input, args.docx_output)
		update_body_text_style(args.docx_input, args.docx_output)
		add_no_break_hyphens(args.docx_input, args.docx_output)
		update_references_style(args.docx_input, args.docx_output)