Commit 6d9bdea1 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Fix for restarting numbering in lists

parent a991a4ca
Loading
Loading
Loading
Loading
Loading
+78 −14
Original line number Original line Diff line number Diff line
@@ -852,6 +852,13 @@ def update_unnumbered_lists(docx_input, docx_output):
        except KeyError:
        except KeyError:
            numbering_root = None
            numbering_root = None


        # Try to read styles.xml, if it doesn't exist, styles_root will be None
        try:
            styles_data = zin.read("word/styles.xml")
            styles_root = etree.fromstring(styles_data)
        except KeyError:
            styles_root = None
            
    root = etree.fromstring(xml_data)
    root = etree.fromstring(xml_data)
    
    
    def is_numbered_list(para, numbering_root):
    def is_numbered_list(para, numbering_root):
@@ -888,16 +895,16 @@ def update_unnumbered_lists(docx_input, docx_output):
                namespaces=ns
                namespaces=ns
            )
            )
            if not num_elem:
            if not num_elem:
                return (None, None)
                return (None, None, None)
            
            
            # Get the abstractNumId
            # Get the abstractNumId
            abstract_num_id_elem = num_elem[0].xpath('./w:abstractNumId', namespaces=ns)
            abstract_num_id_elem = num_elem[0].xpath('./w:abstractNumId', namespaces=ns)
            if not abstract_num_id_elem:
            if not abstract_num_id_elem:
                return (None, None)
                return (None, None, None)
            
            
            abstract_num_id = abstract_num_id_elem[0].get(f"{{{ns['w']}}}val")
            abstract_num_id = abstract_num_id_elem[0].get(f"{{{ns['w']}}}val")
            if abstract_num_id is None:
            if abstract_num_id is None:
                return (None, None)
                return (None, None, None)
            
            
            # Find the abstractNum
            # Find the abstractNum
            abstract_num = numbering_root.xpath(
            abstract_num = numbering_root.xpath(
@@ -905,7 +912,7 @@ def update_unnumbered_lists(docx_input, docx_output):
                namespaces=ns
                namespaces=ns
            )
            )
            if not abstract_num:
            if not abstract_num:
                return (None, None)
                return (None, None, None)
            
            
            # Check the format for this level
            # Check the format for this level
            lvl = abstract_num[0].xpath(
            lvl = abstract_num[0].xpath(
@@ -913,11 +920,11 @@ def update_unnumbered_lists(docx_input, docx_output):
                namespaces=ns
                namespaces=ns
            )
            )
            if not lvl:
            if not lvl:
                return (None, None)
                return (None, None, None)
            
            
            num_fmt = lvl[0].get(f"{{{ns['w']}}}val")
            num_fmt = lvl[0].get(f"{{{ns['w']}}}val")
            if num_fmt is None:
            if num_fmt is None:
                return (None, None)
                return (None, None, None)
            
            
            # Numbered formats
            # Numbered formats
            numbered_formats = ['decimal', 'lowerLetter', 'upperLetter', 
            numbered_formats = ['decimal', 'lowerLetter', 'upperLetter', 
@@ -944,16 +951,16 @@ def update_unnumbered_lists(docx_input, docx_output):
                                'check', 'arrow', 'arrowhead', 'rtArrow', 'hyphen']
                                'check', 'arrow', 'arrowhead', 'rtArrow', 'hyphen']
            
            
            if num_fmt in numbered_formats:
            if num_fmt in numbered_formats:
                return (True, num_fmt)  # Return (True, format string)
                return (True, num_fmt, abstract_num_id)  # Return (True, format string)
            elif num_fmt in unnumbered_formats:
            elif num_fmt in unnumbered_formats:
                return (False, num_fmt)  # Return (False, format string)
                return (False, num_fmt, abstract_num_id)  # Return (False, format string)
            else:
            else:
                # Unknown format, default to unnumbered
                # Unknown format, default to unnumbered
                return (False, num_fmt)
                return (False, num_fmt, abstract_num_id)
                
                
        except Exception as e:
        except Exception as e:
            # If any error occurs, return None to fall back to heuristic
            # If any error occurs, return None to fall back to heuristic
            return (None, None)
            return (None, None, None)
    counter_regular = 0
    counter_regular = 0
    counter_b1 = 0
    counter_b1 = 0
    counter_b2 = 0
    counter_b2 = 0
@@ -1001,6 +1008,33 @@ def update_unnumbered_lists(docx_input, docx_output):
                    text += text_elem.text
                    text += text_elem.text
        return text
        return text


    def get_style_abstract_id(styles_root, numbering_root, style_id):
        """
        Find the abstractNumId used by the given style.
        """
        styles = styles_root
        style = styles.xpath(f'//w:style[@w:styleId="{style_id}"]', namespaces=ns)[0]

        num_id_el = style.xpath('.//w:numId', namespaces=ns)
        if not num_id_el:
            return None
            #raise ValueError(f"Style {style_id} has no numId")

        # Remove numPr from style if it exists
        num_pr = style.xpath('.//w:numPr', namespaces=ns)
        if num_pr:
            for el in num_pr:
                el.getparent().remove(el)

        return num_id_el[0].get(qn('w:val'))

    def update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_bn):

        for num in numbering_root.xpath(f'//w:num[@w:abstractNumId="{abstract_num_id}"]', namespaces=ns):
            abstract = num.xpath('./w:abstractNumId', namespaces=ns)[0]
            old_id = abstract.get(qn('w:val'))
            abstract.set(qn('w:val'), abstract_id_bn)

    for para in paragraphs:
    for para in paragraphs:
        # Skip if already processed
        # Skip if already processed
        if id(para) in processed_paras:
        if id(para) in processed_paras:
@@ -1017,25 +1051,39 @@ def update_unnumbered_lists(docx_input, docx_output):
            else:
            else:
                compact_style = OxmlElement('w:pStyle')
                compact_style = OxmlElement('w:pStyle')
           # Check if it is a numbered list and get the format
           # Check if it is a numbered list and get the format
            is_numbered, num_format = is_numbered_list(para, numbering_root)
            is_numbered, num_format, abstract_num_id = is_numbered_list(para, numbering_root)
            if is_numbered:  # is_numbered is True if numbered, False if unnumbered, None if cannot determine
            if is_numbered:  # is_numbered is True if numbered, False if unnumbered, None if cannot determine
                #pStyle = OxmlElement('w:pStyle')
                #pStyle = OxmlElement('w:pStyle')
                # If format is decimal (numbers), use BN; otherwise use BL
                # If format is decimal (numbers), use BN; otherwise use BL
                if num_format == 'decimal':
                if num_format == 'decimal':
                    abstract_id_bn = get_style_abstract_id(styles_root, numbering_root, "BN")
                    if abstract_id_bn is not None and abstract_num_id is not None:
                        update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_bn)
                    compact_style.set(f"{{{ns['w']}}}val", "BN")
                    compact_style.set(f"{{{ns['w']}}}val", "BN")
                else:
                else:
                    abstract_id_bl = get_style_abstract_id(styles_root, numbering_root, "BL")
                    if abstract_id_bl is not None and abstract_num_id is not None:
                        update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_bl)
                    compact_style.set(f"{{{ns['w']}}}val", "BL")
                    compact_style.set(f"{{{ns['w']}}}val", "BL")
                pPr.insert(0, compact_style)
                pPr.insert(0, compact_style)
                # Remove numId from numPr
                # Remove numId from numPr
                numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                #numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                numPr.remove(numPr.xpath('./w:numId', namespaces=ns)[0])
                #numPr.remove(numPr.xpath('./w:numId', namespaces=ns)[0])
                counter_numbered += 1
                counter_numbered += 1
            else:                
            else:                
                if para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="0"]', namespaces=ns):
                if para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="0"]', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                        abstract_id_tb1 = get_style_abstract_id(styles_root, numbering_root, "TB1")
                        if abstract_id_tb1 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_tb1)

                        compact_style.set(f"{{{ns['w']}}}val", "TB1")
                        compact_style.set(f"{{{ns['w']}}}val", "TB1")
                        counter_table += 1
                        counter_table += 1
                    else:
                    else:
                        abstract_id_b1 = get_style_abstract_id(styles_root, numbering_root, "B1")
                        if abstract_id_b1 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b1)

                        compact_style.set(f"{{{ns['w']}}}val", "B1")
                        compact_style.set(f"{{{ns['w']}}}val", "B1")
                    #Remove numPr from pPr
                    #Remove numPr from pPr
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
@@ -1043,9 +1091,17 @@ def update_unnumbered_lists(docx_input, docx_output):
                    counter_b1 += 1
                    counter_b1 += 1
                elif para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="1"]', namespaces=ns):
                elif para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="1"]', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                        abstract_id_tb2 = get_style_abstract_id(styles_root, numbering_root, "TB2")
                        if abstract_id_tb2 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_tb2)

                        compact_style.set(f"{{{ns['w']}}}val", "TB2")
                        compact_style.set(f"{{{ns['w']}}}val", "TB2")
                        counter_table += 1
                        counter_table += 1
                    else:
                    else:
                        abstract_id_b2 = get_style_abstract_id(styles_root, numbering_root, "B2")
                        if abstract_id_b2 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b2)

                        compact_style.set(f"{{{ns['w']}}}val", "B2")
                        compact_style.set(f"{{{ns['w']}}}val", "B2")
                    #Remove numPr from pPr
                    #Remove numPr from pPr
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
@@ -1053,9 +1109,17 @@ def update_unnumbered_lists(docx_input, docx_output):
                    counter_b2 += 1 
                    counter_b2 += 1 
                elif para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="2"]', namespaces=ns):
                elif para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="2"]', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                        abstract_id_tb3 = get_style_abstract_id(styles_root, numbering_root, "TB3")
                        if abstract_id_tb3 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_tb3)

                        compact_style.set(f"{{{ns['w']}}}val", "TB3")
                        compact_style.set(f"{{{ns['w']}}}val", "TB3")
                        counter_table += 1
                        counter_table += 1
                    else:
                    else:
                        abstract_id_b3 = get_style_abstract_id(styles_root, numbering_root, "B3")
                        if abstract_id_b3 is not None and abstract_num_id is not None: 
                            update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b3)

                        compact_style.set(f"{{{ns['w']}}}val", "B3")
                        compact_style.set(f"{{{ns['w']}}}val", "B3")
                    #Remove numPr from pPr
                    #Remove numPr from pPr
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                    numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]