Commit 81ed0daa authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Skip Abbreviations clause when reformatting examples

parent 56d86df3
Loading
Loading
Loading
Loading
Loading
+44 −4
Original line number Diff line number Diff line
@@ -2403,11 +2403,34 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output):
    w_before = f"{{{ns['w']}}}before"
    w_after = f"{{{ns['w']}}}after"

    def _has_target_block_style(elem):
    def _paragraph_style_val(elem):
        if elem is None or elem.tag != w_p:
            return False
            return None
        style_elems = elem.xpath('./w:pPr/w:pStyle', namespaces=ns)
        return bool(style_elems and style_elems[0].get(w_val) in {"PL", "EW"})
        if not style_elems:
            return None
        return style_elems[0].get(w_val)

    def _paragraph_text(elem):
        if elem is None or elem.tag != w_p:
            return ""
        # Build text from runs to respect how headings are created in update_heading_styles:
        # number + <w:tab/> + title in separate run parts.
        parts = []
        for run in elem.xpath('./w:r', namespaces=ns):
            for child in run:
                if child.tag == f"{{{ns['w']}}}t" and child.text:
                    parts.append(child.text)
                elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"):
                    parts.append(" ")
        # Fallback for paragraphs containing text in non-direct run descendants.
        if not parts:
            parts = elem.xpath('.//w:t/text()', namespaces=ns)
        return ''.join(parts).strip()

    def _normalize_ws(text):
        # Normalize tabs/non-breaking spaces/multiple spaces for robust heading match.
        return re.sub(r'\s+', ' ', text.replace('\u00A0', ' ')).strip()

    def _is_empty_paragraph(elem):
        if elem is None or elem.tag != w_p:
@@ -2417,9 +2440,26 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output):

    code_break_paras = 0
    table_break_paras = 0
    in_abbreviations_clause = False
    for elem in list(body):
        # Track whether we are currently inside an "Abbreviations" top-level section.
        # Heading style has already been normalized to "Heading1" before this step.
        if elem.tag == w_p:
            style_val = _paragraph_style_val(elem)
            if style_val == "Heading1":
                heading_text = _normalize_ws(_paragraph_text(elem))
                in_abbreviations_clause = bool(
                    re.search(r'\babbreviations\b', heading_text, flags=re.IGNORECASE)
                )

        # After PL/EW paragraphs: insert one empty paragraph with zero spacing.
        if _has_target_block_style(elem):
        style_val = _paragraph_style_val(elem)
        is_target_style = style_val in {"PL", "EW"}
        if is_target_style:
            # EW inside "Abbreviations" clause must not be modified.
            if style_val == "EW" and in_abbreviations_clause:
                continue

            nxt = elem.getnext()
            if _is_empty_paragraph(nxt):
                continue