Commit ead1c788 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Another try

parent 65745d38
Loading
Loading
Loading
Loading
Loading
+39 −2
Original line number Diff line number Diff line
@@ -1089,6 +1089,43 @@ def update_lists(docx_input, docx_output):
        for i, r in enumerate(new_runs):
            para.insert(index + i, r)

    def split_run_by_regex(para, run, pattern):
        import copy
        try:
            index = list(para).index(run)
        except ValueError:
            return

        text_node = run.find(f".//{{{ns['w']}}}t")
        if text_node is None or not text_node.text:
            return

        text = text_node.text
        parts = re.split(pattern, text)
        if len(parts) == 1:
            return

        run_props = run.find(f".//{{{ns['w']}}}rPr")
        new_runs = []
        
        def create_run(txt):
            r = OxmlElement('w:r')
            if run_props is not None:
                r.append(copy.deepcopy(run_props))
            t = OxmlElement('w:t')
            t.set(f"{{{ns['xml']}}}space", "preserve")
            t.text = txt
            r.append(t)
            return r

        for part in parts:
            if part:
                new_runs.append(create_run(part))
        
        para.remove(run)
        for i, r in enumerate(new_runs):
            para.insert(index + i, r)

    for para in paragraphs:
        # Skip if already processed
        if id(para) in processed_paras:
@@ -1203,12 +1240,12 @@ def update_lists(docx_input, docx_output):
            if child.tag == f"{{{ns['w']}}}r":
                text_elem = child.find('.//w:t', namespaces=ns)
                
                if text_elem is not None and text_elem.text:
                if is_start_of_line and text_elem is not None and text_elem.text:
                    text_stripped = text_elem.text.lstrip()
                    is_numbered_candidate = bool(re.match(r'^\d+\.\s', text_stripped))
                    is_bullet_candidate = text_stripped.startswith('- ')
                    
                    if is_numbered_candidate or (is_bullet_candidate and is_start_of_line):
                    if is_numbered_candidate or is_bullet_candidate:
                        list_item_runs.append((idx, child, text_elem))
                
                # Update state