Loading generateBaseline/postprocessing_styling.py +43 −15 Original line number Diff line number Diff line Loading @@ -475,13 +475,12 @@ def update_unnumbered_lists(docx_input, docx_output): paragraphs = root.xpath('.//w:p', namespaces=ns) def is_list_item_para(para): """Check if paragraph contains a list item (starts with '- ')""" runs = para.xpath('./w:r', namespaces=ns) for run in runs: text_elem = run.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): return True """Check if paragraph starts with '- ' across all runs (formula-safe)""" texts = [t.text for t in para.xpath('.//w:t', namespaces=ns) if t.text] if not texts: return False full_text = ''.join(texts).lstrip() return full_text.startswith('- ') def is_blank_para(para): """Check if paragraph is blank (empty or only whitespace)""" Loading @@ -507,6 +506,33 @@ def update_unnumbered_lists(docx_input, docx_output): text += text_elem.text return text def remove_dash_prefix(para): """Remove '- ' prefix across runs safely""" remaining = 2 # length of "- " for t in para.xpath('.//w:t', namespaces=ns): if not t.text: continue text = t.text stripped = text.lstrip() # Skip leading whitespace first leading_ws_len = len(text) - len(text.lstrip()) if leading_ws_len > 0: continue if remaining <= 0: break if len(text) <= remaining: remaining -= len(text) t.text = '' else: t.text = text[remaining:] remaining = 0 break for para in paragraphs: # Skip if already processed if id(para) in processed_paras: Loading Loading @@ -622,7 +648,7 @@ def update_unnumbered_lists(docx_input, docx_output): insert_offset = 0 # Track where to insert new paragraphs for list_idx, (run_idx, list_item_run, list_item_text_elem) in enumerate(list_item_runs): # Remove the "- " prefix list_item_text_elem.text = list_item_text_elem.text[2:] remove_dash_prefix(para) # Create a new paragraph for this list item new_para = OxmlElement('w:p') Loading Loading @@ -687,17 +713,19 @@ def update_unnumbered_lists(docx_input, docx_output): end_idx = list_item_runs[list_idx + 1][0] if list_idx + 1 < len(list_item_runs) else len(all_children) # Move runs for this list item to the new paragraph runs_to_move = [] content_to_move = [] for idx in range(start_idx, end_idx): child = all_children[idx] if child.tag != f"{{{ns['w']}}}pPr": runs_to_move.append(child) # Skip paragraph properties if child.tag == f"{{{ns['w']}}}pPr": continue content_to_move.append(child) # Remove from original and add to new paragraph for run in runs_to_move: if run in para: para.remove(run) new_para.append(run) for node in content_to_move: if node in para: para.remove(node) new_para.append(node) # Now look at subsequent paragraphs in the same parent and merge them # until we hit another list item, blank line, or end of parent Loading Loading
generateBaseline/postprocessing_styling.py +43 −15 Original line number Diff line number Diff line Loading @@ -475,13 +475,12 @@ def update_unnumbered_lists(docx_input, docx_output): paragraphs = root.xpath('.//w:p', namespaces=ns) def is_list_item_para(para): """Check if paragraph contains a list item (starts with '- ')""" runs = para.xpath('./w:r', namespaces=ns) for run in runs: text_elem = run.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): return True """Check if paragraph starts with '- ' across all runs (formula-safe)""" texts = [t.text for t in para.xpath('.//w:t', namespaces=ns) if t.text] if not texts: return False full_text = ''.join(texts).lstrip() return full_text.startswith('- ') def is_blank_para(para): """Check if paragraph is blank (empty or only whitespace)""" Loading @@ -507,6 +506,33 @@ def update_unnumbered_lists(docx_input, docx_output): text += text_elem.text return text def remove_dash_prefix(para): """Remove '- ' prefix across runs safely""" remaining = 2 # length of "- " for t in para.xpath('.//w:t', namespaces=ns): if not t.text: continue text = t.text stripped = text.lstrip() # Skip leading whitespace first leading_ws_len = len(text) - len(text.lstrip()) if leading_ws_len > 0: continue if remaining <= 0: break if len(text) <= remaining: remaining -= len(text) t.text = '' else: t.text = text[remaining:] remaining = 0 break for para in paragraphs: # Skip if already processed if id(para) in processed_paras: Loading Loading @@ -622,7 +648,7 @@ def update_unnumbered_lists(docx_input, docx_output): insert_offset = 0 # Track where to insert new paragraphs for list_idx, (run_idx, list_item_run, list_item_text_elem) in enumerate(list_item_runs): # Remove the "- " prefix list_item_text_elem.text = list_item_text_elem.text[2:] remove_dash_prefix(para) # Create a new paragraph for this list item new_para = OxmlElement('w:p') Loading Loading @@ -687,17 +713,19 @@ def update_unnumbered_lists(docx_input, docx_output): end_idx = list_item_runs[list_idx + 1][0] if list_idx + 1 < len(list_item_runs) else len(all_children) # Move runs for this list item to the new paragraph runs_to_move = [] content_to_move = [] for idx in range(start_idx, end_idx): child = all_children[idx] if child.tag != f"{{{ns['w']}}}pPr": runs_to_move.append(child) # Skip paragraph properties if child.tag == f"{{{ns['w']}}}pPr": continue content_to_move.append(child) # Remove from original and add to new paragraph for run in runs_to_move: if run in para: para.remove(run) new_para.append(run) for node in content_to_move: if node in para: para.remove(node) new_para.append(node) # Now look at subsequent paragraphs in the same parent and merge them # until we hit another list item, blank line, or end of parent Loading