Loading generateBaseline/postprocessing.py +39 −2 Original line number Diff line number Diff line Loading @@ -1089,6 +1089,43 @@ def update_lists(docx_input, docx_output): for i, r in enumerate(new_runs): para.insert(index + i, r) def split_run_by_regex(para, run, pattern): import copy try: index = list(para).index(run) except ValueError: return text_node = run.find(f".//{{{ns['w']}}}t") if text_node is None or not text_node.text: return text = text_node.text parts = re.split(pattern, text) if len(parts) == 1: return run_props = run.find(f".//{{{ns['w']}}}rPr") new_runs = [] def create_run(txt): r = OxmlElement('w:r') if run_props is not None: r.append(copy.deepcopy(run_props)) t = OxmlElement('w:t') t.set(f"{{{ns['xml']}}}space", "preserve") t.text = txt r.append(t) return r for part in parts: if part: new_runs.append(create_run(part)) para.remove(run) for i, r in enumerate(new_runs): para.insert(index + i, r) for para in paragraphs: # Skip if already processed if id(para) in processed_paras: Loading Loading @@ -1203,12 +1240,12 @@ def update_lists(docx_input, docx_output): if child.tag == f"{{{ns['w']}}}r": text_elem = child.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text: if is_start_of_line and text_elem is not None and text_elem.text: text_stripped = text_elem.text.lstrip() is_numbered_candidate = bool(re.match(r'^\d+\.\s', text_stripped)) is_bullet_candidate = text_stripped.startswith('- ') if is_numbered_candidate or (is_bullet_candidate and is_start_of_line): if is_numbered_candidate or is_bullet_candidate: list_item_runs.append((idx, child, text_elem)) # Update state Loading Loading
generateBaseline/postprocessing.py +39 −2 Original line number Diff line number Diff line Loading @@ -1089,6 +1089,43 @@ def update_lists(docx_input, docx_output): for i, r in enumerate(new_runs): para.insert(index + i, r) def split_run_by_regex(para, run, pattern): import copy try: index = list(para).index(run) except ValueError: return text_node = run.find(f".//{{{ns['w']}}}t") if text_node is None or not text_node.text: return text = text_node.text parts = re.split(pattern, text) if len(parts) == 1: return run_props = run.find(f".//{{{ns['w']}}}rPr") new_runs = [] def create_run(txt): r = OxmlElement('w:r') if run_props is not None: r.append(copy.deepcopy(run_props)) t = OxmlElement('w:t') t.set(f"{{{ns['xml']}}}space", "preserve") t.text = txt r.append(t) return r for part in parts: if part: new_runs.append(create_run(part)) para.remove(run) for i, r in enumerate(new_runs): para.insert(index + i, r) for para in paragraphs: # Skip if already processed if id(para) in processed_paras: Loading Loading @@ -1203,12 +1240,12 @@ def update_lists(docx_input, docx_output): if child.tag == f"{{{ns['w']}}}r": text_elem = child.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text: if is_start_of_line and text_elem is not None and text_elem.text: text_stripped = text_elem.text.lstrip() is_numbered_candidate = bool(re.match(r'^\d+\.\s', text_stripped)) is_bullet_candidate = text_stripped.startswith('- ') if is_numbered_candidate or (is_bullet_candidate and is_start_of_line): if is_numbered_candidate or is_bullet_candidate: list_item_runs.append((idx, child, text_elem)) # Update state Loading