Loading generateBaseline/postprocessing.py +23 −18 Original line number Diff line number Diff line Loading @@ -815,23 +815,23 @@ def update_unnumbered_lists(docx_input, docx_output): - numbering_root: lxml root element of numbering.xml (or None if not available) Returns: - True if numbered list (decimal, etc.), False if unnumbered (bullet, etc.), None if cannot determine - Tuple (bool, str): (True, num_fmt) if numbered list, (False, num_fmt) if unnumbered, (None, None) if cannot determine """ if numbering_root is None: return None # Cannot determine without numbering.xml return (None, None) # Cannot determine without numbering.xml # Get numId and ilvl from paragraph numId_elem = para.xpath('./w:pPr/w:numPr/w:numId', namespaces=ns) ilvl_elem = para.xpath('./w:pPr/w:numPr/w:ilvl', namespaces=ns) if not numId_elem or not ilvl_elem: return None return (None, None) numId = numId_elem[0].get(f"{{{ns['w']}}}val") ilvl = ilvl_elem[0].get(f"{{{ns['w']}}}val") if numId is None or ilvl is None: return None return (None, None) try: # Find the num element with this numId Loading @@ -840,16 +840,16 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not num_elem: return None return (None, None) # Get the abstractNumId abstract_num_id_elem = num_elem[0].xpath('./w:abstractNumId', namespaces=ns) if not abstract_num_id_elem: return None return (None, None) abstract_num_id = abstract_num_id_elem[0].get(f"{{{ns['w']}}}val") if abstract_num_id is None: return None return (None, None) # Find the abstractNum abstract_num = numbering_root.xpath( Loading @@ -857,7 +857,7 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not abstract_num: return None return (None, None) # Check the format for this level lvl = abstract_num[0].xpath( Loading @@ -865,11 +865,11 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not lvl: return None return (None, None) num_fmt = lvl[0].get(f"{{{ns['w']}}}val") if num_fmt is None: return None return (None, None) # Numbered formats numbered_formats = ['decimal', 'lowerLetter', 'upperLetter', Loading @@ -896,16 +896,16 @@ def update_unnumbered_lists(docx_input, docx_output): 'check', 'arrow', 'arrowhead', 'rtArrow', 'hyphen'] if num_fmt in numbered_formats: return True return (True, num_fmt) # Return (True, format string) elif num_fmt in unnumbered_formats: return False return (False, num_fmt) # Return (False, format string) else: # Unknown format, default to numbered return False # Unknown format, default to unnumbered return (False, num_fmt) except Exception as e: # If any error occurs, return None to fall back to heuristic return None return (None, None) counter_regular = 0 counter_b1 = 0 counter_b2 = 0 Loading Loading @@ -959,12 +959,17 @@ def update_unnumbered_lists(docx_input, docx_output): continue if para.xpath('./w:pPr/w:pStyle[@w:val="Compact"]', namespaces=ns) and para.xpath('./w:pPr/w:numPr', namespaces=ns): # Check if it is a numbered list if is_numbered_list(para, numbering_root): # Check if it is a numbered list and get the format is_numbered, num_format = is_numbered_list(para, numbering_root) if is_numbered: # is_numbered is True if numbered, False if unnumbered, None if cannot determine # Get pPr element to add pStyle to it pPr = para.xpath('./w:pPr', namespaces=ns)[0] pStyle = OxmlElement('w:pStyle') # If format is decimal (numbers), use BN; otherwise use BL if num_format == 'decimal': pStyle.set(f"{{{ns['w']}}}val", "BN") else: pStyle.set(f"{{{ns['w']}}}val", "BL") pPr.append(pStyle) counter_numbered += 1 else: Loading Loading
generateBaseline/postprocessing.py +23 −18 Original line number Diff line number Diff line Loading @@ -815,23 +815,23 @@ def update_unnumbered_lists(docx_input, docx_output): - numbering_root: lxml root element of numbering.xml (or None if not available) Returns: - True if numbered list (decimal, etc.), False if unnumbered (bullet, etc.), None if cannot determine - Tuple (bool, str): (True, num_fmt) if numbered list, (False, num_fmt) if unnumbered, (None, None) if cannot determine """ if numbering_root is None: return None # Cannot determine without numbering.xml return (None, None) # Cannot determine without numbering.xml # Get numId and ilvl from paragraph numId_elem = para.xpath('./w:pPr/w:numPr/w:numId', namespaces=ns) ilvl_elem = para.xpath('./w:pPr/w:numPr/w:ilvl', namespaces=ns) if not numId_elem or not ilvl_elem: return None return (None, None) numId = numId_elem[0].get(f"{{{ns['w']}}}val") ilvl = ilvl_elem[0].get(f"{{{ns['w']}}}val") if numId is None or ilvl is None: return None return (None, None) try: # Find the num element with this numId Loading @@ -840,16 +840,16 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not num_elem: return None return (None, None) # Get the abstractNumId abstract_num_id_elem = num_elem[0].xpath('./w:abstractNumId', namespaces=ns) if not abstract_num_id_elem: return None return (None, None) abstract_num_id = abstract_num_id_elem[0].get(f"{{{ns['w']}}}val") if abstract_num_id is None: return None return (None, None) # Find the abstractNum abstract_num = numbering_root.xpath( Loading @@ -857,7 +857,7 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not abstract_num: return None return (None, None) # Check the format for this level lvl = abstract_num[0].xpath( Loading @@ -865,11 +865,11 @@ def update_unnumbered_lists(docx_input, docx_output): namespaces=ns ) if not lvl: return None return (None, None) num_fmt = lvl[0].get(f"{{{ns['w']}}}val") if num_fmt is None: return None return (None, None) # Numbered formats numbered_formats = ['decimal', 'lowerLetter', 'upperLetter', Loading @@ -896,16 +896,16 @@ def update_unnumbered_lists(docx_input, docx_output): 'check', 'arrow', 'arrowhead', 'rtArrow', 'hyphen'] if num_fmt in numbered_formats: return True return (True, num_fmt) # Return (True, format string) elif num_fmt in unnumbered_formats: return False return (False, num_fmt) # Return (False, format string) else: # Unknown format, default to numbered return False # Unknown format, default to unnumbered return (False, num_fmt) except Exception as e: # If any error occurs, return None to fall back to heuristic return None return (None, None) counter_regular = 0 counter_b1 = 0 counter_b2 = 0 Loading Loading @@ -959,12 +959,17 @@ def update_unnumbered_lists(docx_input, docx_output): continue if para.xpath('./w:pPr/w:pStyle[@w:val="Compact"]', namespaces=ns) and para.xpath('./w:pPr/w:numPr', namespaces=ns): # Check if it is a numbered list if is_numbered_list(para, numbering_root): # Check if it is a numbered list and get the format is_numbered, num_format = is_numbered_list(para, numbering_root) if is_numbered: # is_numbered is True if numbered, False if unnumbered, None if cannot determine # Get pPr element to add pStyle to it pPr = para.xpath('./w:pPr', namespaces=ns)[0] pStyle = OxmlElement('w:pStyle') # If format is decimal (numbers), use BN; otherwise use BL if num_format == 'decimal': pStyle.set(f"{{{ns['w']}}}val", "BN") else: pStyle.set(f"{{{ns['w']}}}val", "BL") pPr.append(pStyle) counter_numbered += 1 else: Loading