Another try (ead1c788) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+39 −2

Original line number	Diff line number	Diff line
		@@ -1089,6 +1089,43 @@ def update_lists(docx_input, docx_output):
		for i, r in enumerate(new_runs):
		para.insert(index + i, r)

		def split_run_by_regex(para, run, pattern):
		import copy
		try:
		index = list(para).index(run)
		except ValueError:
		return

		text_node = run.find(f".//{{{ns['w']}}}t")
		if text_node is None or not text_node.text:
		return

		text = text_node.text
		parts = re.split(pattern, text)
		if len(parts) == 1:
		return

		run_props = run.find(f".//{{{ns['w']}}}rPr")
		new_runs = []

		def create_run(txt):
		r = OxmlElement('w:r')
		if run_props is not None:
		r.append(copy.deepcopy(run_props))
		t = OxmlElement('w:t')
		t.set(f"{{{ns['xml']}}}space", "preserve")
		t.text = txt
		r.append(t)
		return r

		for part in parts:
		if part:
		new_runs.append(create_run(part))

		para.remove(run)
		for i, r in enumerate(new_runs):
		para.insert(index + i, r)

		for para in paragraphs:
		# Skip if already processed
		if id(para) in processed_paras:
		@@ -1203,12 +1240,12 @@ def update_lists(docx_input, docx_output):
		if child.tag == f"{{{ns['w']}}}r":
		text_elem = child.find('.//w:t', namespaces=ns)

		if text_elem is not None and text_elem.text:
		if is_start_of_line and text_elem is not None and text_elem.text:
		text_stripped = text_elem.text.lstrip()
		is_numbered_candidate = bool(re.match(r'^\d+\.\s', text_stripped))
		is_bullet_candidate = text_stripped.startswith('- ')

		if is_numbered_candidate or (is_bullet_candidate and is_start_of_line):
		if is_numbered_candidate or is_bullet_candidate:
		list_item_runs.append((idx, child, text_elem))

		# Update state