Commit 7ae0a0ea authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Add fix for editor's note containing a reference

parent 81ed0daa
Loading
Loading
Loading
Loading
Loading
+30 −7
Original line number Diff line number Diff line
@@ -1897,9 +1897,27 @@ def update_references(docx_input, docx_output):
    new_style = "EX"
    counter = 0
    
    def _paragraph_text_from_runs(para):
        # Build paragraph text from runs (including runs inside hyperlinks).
        parts = []
        for run in para.xpath('.//w:r', namespaces=ns):
            for child in run:
                if child.tag == f"{{{ns['w']}}}t" and child.text:
                    parts.append(child.text)
                elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
                    parts.append(" ")
        return ''.join(parts).strip()

    # Loop over all paragraphs with style "BodyText" or "FirstParagraph" and change to "EX"
    for para in root.xpath('.//w:p[w:pPr/w:pStyle[@w:val="BodyText" or @w:val="FirstParagraph"]]', namespaces=ns):
        old_val = para.get(f"{{{ns['w']}}}val")
        
        # Keep Editor's Notes untouched here; they are formatted later in
        # update_body_text_style (including highlighting).
        para_text = _paragraph_text_from_runs(para)
        if re.search(r'Editor[\'’]s\s+Note:', para_text, re.IGNORECASE):
            continue

        # A reference is a paragraph that contains:
        # - A pPr with pStyle "BodyText" or "FirstParagraph" (already checked in loop)
        # - A bookmarkStart and a bookmarkEnd
@@ -2001,6 +2019,17 @@ def update_body_text_style(docx_input, docx_output):
    counter = 0
    h6_counter = 0
    
    def _paragraph_text_from_runs(para):
        # Build paragraph text from runs (including runs inside hyperlinks).
        parts = []
        for run in para.xpath('.//w:r', namespaces=ns):
            for child in run:
                if child.tag == f"{{{ns['w']}}}t" and child.text:
                    parts.append(child.text)
                elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
                    parts.append(" ")
        return ''.join(parts).strip()

    # Loop over all elements to find "BodyText" and "FirstParagraph" and change to "Normal"
    # Combine both XPath queries
    body_text_elems = root.xpath('.//w:pStyle[@w:val="BodyText"]', namespaces=ns)
@@ -2008,7 +2037,6 @@ def update_body_text_style(docx_input, docx_output):
    all_elems = body_text_elems + first_para_elems
    
    for elem in all_elems:
        full_text = ""
        # elem is w:pStyle, its parent is w:pPr, and w:pPr's parent is w:p
        pPr = elem.getparent()
        if pPr is None:
@@ -2016,12 +2044,7 @@ def update_body_text_style(docx_input, docx_output):
        para = pPr.getparent()
        if para is None:
            continue
        runs = para.xpath('./w:r', namespaces=ns)
        for run in runs:
            text_elems = run.xpath('.//w:t', namespaces=ns)
            for text_elem in text_elems:
                if text_elem.text:
                    full_text += text_elem.text
        full_text = _paragraph_text_from_runs(para)
        
        # Strip whitespace and check if text matches "Essential patents" or "Trademarks"
        full_text_stripped = full_text.strip()