Loading generateBaseline/postprocessing.py +44 −4 Original line number Diff line number Diff line Loading @@ -2403,11 +2403,34 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output): w_before = f"{{{ns['w']}}}before" w_after = f"{{{ns['w']}}}after" def _has_target_block_style(elem): def _paragraph_style_val(elem): if elem is None or elem.tag != w_p: return False return None style_elems = elem.xpath('./w:pPr/w:pStyle', namespaces=ns) return bool(style_elems and style_elems[0].get(w_val) in {"PL", "EW"}) if not style_elems: return None return style_elems[0].get(w_val) def _paragraph_text(elem): if elem is None or elem.tag != w_p: return "" # Build text from runs to respect how headings are created in update_heading_styles: # number + <w:tab/> + title in separate run parts. parts = [] for run in elem.xpath('./w:r', namespaces=ns): for child in run: if child.tag == f"{{{ns['w']}}}t" and child.text: parts.append(child.text) elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"): parts.append(" ") # Fallback for paragraphs containing text in non-direct run descendants. if not parts: parts = elem.xpath('.//w:t/text()', namespaces=ns) return ''.join(parts).strip() def _normalize_ws(text): # Normalize tabs/non-breaking spaces/multiple spaces for robust heading match. return re.sub(r'\s+', ' ', text.replace('\u00A0', ' ')).strip() def _is_empty_paragraph(elem): if elem is None or elem.tag != w_p: Loading @@ -2417,9 +2440,26 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output): code_break_paras = 0 table_break_paras = 0 in_abbreviations_clause = False for elem in list(body): # Track whether we are currently inside an "Abbreviations" top-level section. # Heading style has already been normalized to "Heading1" before this step. if elem.tag == w_p: style_val = _paragraph_style_val(elem) if style_val == "Heading1": heading_text = _normalize_ws(_paragraph_text(elem)) in_abbreviations_clause = bool( re.search(r'\babbreviations\b', heading_text, flags=re.IGNORECASE) ) # After PL/EW paragraphs: insert one empty paragraph with zero spacing. if _has_target_block_style(elem): style_val = _paragraph_style_val(elem) is_target_style = style_val in {"PL", "EW"} if is_target_style: # EW inside "Abbreviations" clause must not be modified. if style_val == "EW" and in_abbreviations_clause: continue nxt = elem.getnext() if _is_empty_paragraph(nxt): continue Loading Loading
generateBaseline/postprocessing.py +44 −4 Original line number Diff line number Diff line Loading @@ -2403,11 +2403,34 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output): w_before = f"{{{ns['w']}}}before" w_after = f"{{{ns['w']}}}after" def _has_target_block_style(elem): def _paragraph_style_val(elem): if elem is None or elem.tag != w_p: return False return None style_elems = elem.xpath('./w:pPr/w:pStyle', namespaces=ns) return bool(style_elems and style_elems[0].get(w_val) in {"PL", "EW"}) if not style_elems: return None return style_elems[0].get(w_val) def _paragraph_text(elem): if elem is None or elem.tag != w_p: return "" # Build text from runs to respect how headings are created in update_heading_styles: # number + <w:tab/> + title in separate run parts. parts = [] for run in elem.xpath('./w:r', namespaces=ns): for child in run: if child.tag == f"{{{ns['w']}}}t" and child.text: parts.append(child.text) elif child.tag in (f"{{{ns['w']}}}tab", f"{{{ns['w']}}}br"): parts.append(" ") # Fallback for paragraphs containing text in non-direct run descendants. if not parts: parts = elem.xpath('.//w:t/text()', namespaces=ns) return ''.join(parts).strip() def _normalize_ws(text): # Normalize tabs/non-breaking spaces/multiple spaces for robust heading match. return re.sub(r'\s+', ' ', text.replace('\u00A0', ' ')).strip() def _is_empty_paragraph(elem): if elem is None or elem.tag != w_p: Loading @@ -2417,9 +2440,26 @@ def add_break_after_code_blocks_and_tables(docx_input, docx_output): code_break_paras = 0 table_break_paras = 0 in_abbreviations_clause = False for elem in list(body): # Track whether we are currently inside an "Abbreviations" top-level section. # Heading style has already been normalized to "Heading1" before this step. if elem.tag == w_p: style_val = _paragraph_style_val(elem) if style_val == "Heading1": heading_text = _normalize_ws(_paragraph_text(elem)) in_abbreviations_clause = bool( re.search(r'\babbreviations\b', heading_text, flags=re.IGNORECASE) ) # After PL/EW paragraphs: insert one empty paragraph with zero spacing. if _has_target_block_style(elem): style_val = _paragraph_style_val(elem) is_target_style = style_val in {"PL", "EW"} if is_target_style: # EW inside "Abbreviations" clause must not be modified. if style_val == "EW" and in_abbreviations_clause: continue nxt = elem.getnext() if _is_empty_paragraph(nxt): continue Loading