Commit 3318715b authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Adding a soft carriage return for text belonging to an element list

parent bb8f054d
Loading
Loading
Loading
Loading
Loading
+103 −36
Original line number Diff line number Diff line
@@ -913,20 +913,23 @@ def update_unnumbered_lists(docx_input, docx_output):
                namespaces=ns
            )
            if not abstract_num:
                return (None, None, None)
                return (None, None, None, None)
            
            # Check the format for this level
            lvl = abstract_num[0].xpath(
            numFmt_elem = abstract_num[0].xpath(
                f'./w:lvl[@w:ilvl="{ilvl}"]/w:numFmt',
                namespaces=ns
            )
            if not lvl:
                return (None, None, None)
            if not numFmt_elem:
                return (None, None, None, None)
            
            num_fmt = lvl[0].get(f"{{{ns['w']}}}val")
            if num_fmt is None:
                return (None, None, None)
            numFmt = numFmt_elem[0].get(f"{{{ns['w']}}}val")

            lvl_text_elem = abstract_num[0].xpath(f'./w:lvl[@w:ilvl="{ilvl}"]/w:lvlText', namespaces=ns)
            if not lvl_text_elem:
                lvl_text = None
            
            lvl_text = lvl_text_elem[0].get(f"{{{ns['w']}}}val")
            # Numbered formats
            numbered_formats = ['decimal', 'lowerLetter', 'upperLetter', 
                             'lowerRoman', 'upperRoman', 'arabic', 'ordinal',
@@ -951,17 +954,17 @@ def update_unnumbered_lists(docx_input, docx_output):
            unnumbered_formats = ['bullet', 'circle', 'square', 'dash', 'diamond',
                                'check', 'arrow', 'arrowhead', 'rtArrow', 'hyphen']
            
            if num_fmt in numbered_formats:
                return (True, num_fmt, abstract_num_id)  # Return (True, format string)
            elif num_fmt in unnumbered_formats:
                return (False, num_fmt, abstract_num_id)  # Return (False, format string)
            if numFmt in numbered_formats:
                return (True, numFmt, lvl_text, abstract_num_id)  # Return (True, format string)
            elif numFmt in unnumbered_formats:
                return (False, numFmt, lvl_text, abstract_num_id)  # Return (False, format string)
            else:
                # Unknown format, default to unnumbered
                return (False, num_fmt, abstract_num_id)
                return (False, numFmt, lvl_text, abstract_num_id)
                
        except Exception as e:
            # If any error occurs, return None to fall back to heuristic
            return (None, None, None)
            return (None, None, None, None)
    counter_regular = 0
    counter_b1 = 0
    counter_b2 = 0
@@ -1041,6 +1044,7 @@ def update_unnumbered_lists(docx_input, docx_output):
            abstract.set(qn('w:val'), abstract_id_bn)

    for para in paragraphs:
        para_removed = False
        # Skip if already processed
        if id(para) in processed_paras:
            continue
@@ -1056,7 +1060,7 @@ def update_unnumbered_lists(docx_input, docx_output):
            else:
                compact_style = OxmlElement('w:pStyle')
           # Check if it is a numbered list and get the format
            is_numbered, num_format, abstract_num_id = is_numbered_list(para, numbering_root)
            is_numbered, num_format, lvl_text, abstract_num_id = is_numbered_list(para, numbering_root)
            if is_numbered:  # is_numbered is True if numbered, False if unnumbered, None if cannot determine
                #pStyle = OxmlElement('w:pStyle')
                # If format is decimal (numbers), use BN; otherwise use BL
@@ -1076,6 +1080,8 @@ def update_unnumbered_lists(docx_input, docx_output):
                #numPr.remove(numPr.xpath('./w:numId', namespaces=ns)[0])
                counter_numbered += 1
            else:
                #print(f'The lvl_text is {lvl_text} and the abstract_num_id is {abstract_num_id}')
                
                if para.xpath('./w:pPr/w:numPr/w:ilvl[@w:val="0"]', namespaces=ns):
                    if para.xpath('ancestor::w:tbl', namespaces=ns):
                        abstract_id_tb1 = get_style_abstract_id(styles_root, numbering_root, "TB1")
@@ -1085,12 +1091,32 @@ def update_unnumbered_lists(docx_input, docx_output):
                        compact_style.set(f"{{{ns['w']}}}val", "TB1")
                        counter_table += 1
                    else:
                        if not lvl_text == " ":
                            abstract_id_b1 = get_style_abstract_id(styles_root, numbering_root, "B1")
                            if abstract_id_b1 is not None and abstract_num_id is not None: 
                                update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b1)

                            compact_style.set(f"{{{ns['w']}}}val", "B1")
                        else:
                            #print(f'Found text belonging to a numbered list item')
                            # Take the text from the paragraph
                            text = get_para_text(para)
                            #  Insert it as new run to the previous paragraph. New run starting by <w:br/>
                            previous_para = para.getprevious()
                            if previous_para is not None:
                                new_run = OxmlElement('w:r')
                                new_run.append(OxmlElement('w:br'))
                                text_t = OxmlElement('w:t')
                                text_t.text = text
                                new_run.append(text_t)
                                previous_para.append(new_run)
                            # Remove the original paragraph
                            para.getparent().remove(para)
                            para_removed = True
                            #print(f'Removed the original paragraph')
                            #compact_style.set(f"{{{ns['w']}}}val", "FP")
                    #Remove numPr from pPr
                    if not para_removed:
                        numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                        pPr.remove(numPr)
                    counter_b1 += 1
@@ -1103,12 +1129,32 @@ def update_unnumbered_lists(docx_input, docx_output):
                        compact_style.set(f"{{{ns['w']}}}val", "TB2")
                        counter_table += 1
                    else:
                        if not lvl_text == " ":
                            abstract_id_b2 = get_style_abstract_id(styles_root, numbering_root, "B2")
                            if abstract_id_b2 is not None and abstract_num_id is not None: 
                                update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b2)

                            compact_style.set(f"{{{ns['w']}}}val", "B2")
                        else:
                            #print(f'Found text belonging to a numbered list item')
                            # Take the text from the paragraph
                            text = get_para_text(para)
                            #  Insert it as new run to the previous paragraph. New run starting by <w:br/>
                            previous_para = para.getprevious()
                            if previous_para is not None:
                                new_run = OxmlElement('w:r')
                                new_run.append(OxmlElement('w:br'))
                                text_t = OxmlElement('w:t')
                                text_t.text = text
                                new_run.append(text_t)
                                previous_para.append(new_run)
                            # Remove the original paragraph
                            para.getparent().remove(para)
                            para_removed = True
                            #print(f'Removed the original paragraph')
                            #compact_style.set(f"{{{ns['w']}}}val", "FP")
                    #Remove numPr from pPr
                    if not para_removed:
                        numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                        pPr.remove(numPr)
                    counter_b2 += 1 
@@ -1121,15 +1167,36 @@ def update_unnumbered_lists(docx_input, docx_output):
                        compact_style.set(f"{{{ns['w']}}}val", "TB3")
                        counter_table += 1
                    else:
                        if not lvl_text == " ":
                            abstract_id_b3 = get_style_abstract_id(styles_root, numbering_root, "B3")
                            if abstract_id_b3 is not None and abstract_num_id is not None: 
                                update_list_instance_to_style(numbering_root, abstract_num_id, abstract_id_b3)

                            compact_style.set(f"{{{ns['w']}}}val", "B3")
                        else:
                            #print(f'Found text belonging to a numbered list item')
                            # Take the text from the paragraph
                            text = get_para_text(para)
                            #  Insert it as new run to the previous paragraph. New run starting by <w:br/>
                            previous_para = para.getprevious()
                            if previous_para is not None:
                                new_run = OxmlElement('w:r')
                                new_run.append(OxmlElement('w:br'))
                                text_t = OxmlElement('w:t')
                                text_t.text = text
                                new_run.append(text_t)
                                previous_para.append(new_run)
                            # Remove the original paragraph
                            para.getparent().remove(para)
                            para_removed = True
                            #print(f'Removed the original paragraph')
                            #compact_style.set(f"{{{ns['w']}}}val", "FP")
                    #Remove numPr from pPr
                    if not para_removed:
                        numPr = pPr.xpath('./w:numPr', namespaces=ns)[0]
                        pPr.remove(numPr)
                    counter_b3 += 1
                
                pPr.insert(0, compact_style)
                counter_compact += 1
                continue