Loading generateBaseline/postprocessing.py +54 −17 Original line number Diff line number Diff line Loading @@ -828,11 +828,11 @@ def update_heading_styles(docx_input, docx_output): os.remove(tmp_path) def update_unnumbered_lists(docx_input, docx_output): def update_lists(docx_input, docx_output): """ Updates unnumbered list items (starting with "- ") in tables to appear as bulleted lists. For list items in tables: removes "- " prefix and creates separate paragraphs with FP style and numPr. For list items outside tables: removes "- " prefix and adds B1 style. Updates list items (starting with "- " or "1. ") in tables to appear as bulleted/numbered lists. For list items in tables: removes prefix and creates separate paragraphs with FP style and numPr. For list items outside tables: removes prefix and adds B1 (bullet) or BN (numbered) style. Parameters ---------- Loading Loading @@ -978,10 +978,20 @@ def update_unnumbered_lists(docx_input, docx_output): def is_list_item_para(para): """Check if paragraph contains a list item (starts with '- ')""" runs = para.xpath('./w:r', namespaces=ns) for run in runs: text_elem = run.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): full_text = '' for node in para.xpath('.//w:t | .//w:br | .//w:cr | .//w:tab', namespaces=ns): tag = node.tag if tag.endswith('}t'): if node.text: full_text += node.text elif tag.endswith('}tab'): full_text += '\t' else: # br or cr full_text += '\n' for line in full_text.split('\n'): line = line.lstrip() if line.startswith('- ') or re.match(r'^\d+\.\s', line): return True return False Loading Loading @@ -1139,15 +1149,31 @@ def update_unnumbered_lists(docx_input, docx_output): if not runs: continue # Find ALL list item runs (runs starting with "- ") # Find ALL list item runs (runs starting with "- " at start of line) list_item_runs = [] all_children = list(para) is_start_of_line = True for idx, child in enumerate(all_children): if child.tag == f"{{{ns['w']}}}r": text_elem = child.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): if is_start_of_line and text_elem is not None and text_elem.text: text_stripped = text_elem.text.lstrip() if text_stripped.startswith('- ') or re.match(r'^\d+\.\s', text_stripped): list_item_runs.append((idx, child, text_elem)) # Update state for node in child: tag = node.tag if tag == f"{{{ns['w']}}}br" or tag == f"{{{ns['w']}}}cr": is_start_of_line = True elif tag == f"{{{ns['w']}}}t": if node.text and node.text.strip(): is_start_of_line = False elif child.tag == f"{{{ns['w']}}}hyperlink": is_start_of_line = False # If we found list items, process each one separately if list_item_runs: # Get the parent element (usually the document body or table cell) Loading Loading @@ -1182,8 +1208,16 @@ def update_unnumbered_lists(docx_input, docx_output): # Process each list item run separately insert_offset = 0 # Track where to insert new paragraphs for list_idx, (run_idx, list_item_run, list_item_text_elem) in enumerate(list_item_runs): # Remove the "- " prefix list_item_text_elem.text = list_item_text_elem.text[2:] # Determine list type text = list_item_text_elem.text.lstrip() is_numbered_manual = bool(re.match(r'^\d+\.', text)) # Remove the prefix if is_numbered_manual: list_item_text_elem.text = re.sub(r'^\s*\d+\.\s*', '', list_item_text_elem.text) else: list_item_text_elem.text = re.sub(r'^\s*-\s+', '', list_item_text_elem.text) # Create a new paragraph for this list item new_para = OxmlElement('w:p') Loading Loading @@ -1235,10 +1269,13 @@ def update_unnumbered_lists(docx_input, docx_output): else: # Simple structure for regular list items (outside tables) pStyle = OxmlElement('w:pStyle') if is_numbered_manual: pStyle.set(f"{{{ns['w']}}}val", "BN") counter_numbered += 1 else: pStyle.set(f"{{{ns['w']}}}val", "B1") pPr.append(pStyle) counter_regular += 1 pPr.append(pStyle) new_para.append(pPr) Loading Loading @@ -2475,7 +2512,7 @@ def update_format_styles_cli(): update_table_rows(args.docx_input, args.docx_output) update_notes(args.docx_input, args.docx_output) update_references(args.docx_input, args.docx_output) update_unnumbered_lists(args.docx_input, args.docx_output) update_lists(args.docx_input, args.docx_output) update_body_text_style(args.docx_input, args.docx_output) add_no_break_hyphens(args.docx_input, args.docx_output) update_references_style(args.docx_input, args.docx_output) Loading Loading
generateBaseline/postprocessing.py +54 −17 Original line number Diff line number Diff line Loading @@ -828,11 +828,11 @@ def update_heading_styles(docx_input, docx_output): os.remove(tmp_path) def update_unnumbered_lists(docx_input, docx_output): def update_lists(docx_input, docx_output): """ Updates unnumbered list items (starting with "- ") in tables to appear as bulleted lists. For list items in tables: removes "- " prefix and creates separate paragraphs with FP style and numPr. For list items outside tables: removes "- " prefix and adds B1 style. Updates list items (starting with "- " or "1. ") in tables to appear as bulleted/numbered lists. For list items in tables: removes prefix and creates separate paragraphs with FP style and numPr. For list items outside tables: removes prefix and adds B1 (bullet) or BN (numbered) style. Parameters ---------- Loading Loading @@ -978,10 +978,20 @@ def update_unnumbered_lists(docx_input, docx_output): def is_list_item_para(para): """Check if paragraph contains a list item (starts with '- ')""" runs = para.xpath('./w:r', namespaces=ns) for run in runs: text_elem = run.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): full_text = '' for node in para.xpath('.//w:t | .//w:br | .//w:cr | .//w:tab', namespaces=ns): tag = node.tag if tag.endswith('}t'): if node.text: full_text += node.text elif tag.endswith('}tab'): full_text += '\t' else: # br or cr full_text += '\n' for line in full_text.split('\n'): line = line.lstrip() if line.startswith('- ') or re.match(r'^\d+\.\s', line): return True return False Loading Loading @@ -1139,15 +1149,31 @@ def update_unnumbered_lists(docx_input, docx_output): if not runs: continue # Find ALL list item runs (runs starting with "- ") # Find ALL list item runs (runs starting with "- " at start of line) list_item_runs = [] all_children = list(para) is_start_of_line = True for idx, child in enumerate(all_children): if child.tag == f"{{{ns['w']}}}r": text_elem = child.find('.//w:t', namespaces=ns) if text_elem is not None and text_elem.text and text_elem.text.startswith('- '): if is_start_of_line and text_elem is not None and text_elem.text: text_stripped = text_elem.text.lstrip() if text_stripped.startswith('- ') or re.match(r'^\d+\.\s', text_stripped): list_item_runs.append((idx, child, text_elem)) # Update state for node in child: tag = node.tag if tag == f"{{{ns['w']}}}br" or tag == f"{{{ns['w']}}}cr": is_start_of_line = True elif tag == f"{{{ns['w']}}}t": if node.text and node.text.strip(): is_start_of_line = False elif child.tag == f"{{{ns['w']}}}hyperlink": is_start_of_line = False # If we found list items, process each one separately if list_item_runs: # Get the parent element (usually the document body or table cell) Loading Loading @@ -1182,8 +1208,16 @@ def update_unnumbered_lists(docx_input, docx_output): # Process each list item run separately insert_offset = 0 # Track where to insert new paragraphs for list_idx, (run_idx, list_item_run, list_item_text_elem) in enumerate(list_item_runs): # Remove the "- " prefix list_item_text_elem.text = list_item_text_elem.text[2:] # Determine list type text = list_item_text_elem.text.lstrip() is_numbered_manual = bool(re.match(r'^\d+\.', text)) # Remove the prefix if is_numbered_manual: list_item_text_elem.text = re.sub(r'^\s*\d+\.\s*', '', list_item_text_elem.text) else: list_item_text_elem.text = re.sub(r'^\s*-\s+', '', list_item_text_elem.text) # Create a new paragraph for this list item new_para = OxmlElement('w:p') Loading Loading @@ -1235,10 +1269,13 @@ def update_unnumbered_lists(docx_input, docx_output): else: # Simple structure for regular list items (outside tables) pStyle = OxmlElement('w:pStyle') if is_numbered_manual: pStyle.set(f"{{{ns['w']}}}val", "BN") counter_numbered += 1 else: pStyle.set(f"{{{ns['w']}}}val", "B1") pPr.append(pStyle) counter_regular += 1 pPr.append(pStyle) new_para.append(pPr) Loading Loading @@ -2475,7 +2512,7 @@ def update_format_styles_cli(): update_table_rows(args.docx_input, args.docx_output) update_notes(args.docx_input, args.docx_output) update_references(args.docx_input, args.docx_output) update_unnumbered_lists(args.docx_input, args.docx_output) update_lists(args.docx_input, args.docx_output) update_body_text_style(args.docx_input, args.docx_output) add_no_break_hyphens(args.docx_input, args.docx_output) update_references_style(args.docx_input, args.docx_output) Loading