Loading generateBaseline/postprocessing.py +43 −1 Original line number Diff line number Diff line Loading @@ -16,6 +16,9 @@ from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm from postprocessing_styling import postprocess_etsi_styles def parse_input(args_to_parse=None, description = None): """ args_to_parse: list of arguments, e.g. ["input", "output"] Loading Loading @@ -117,6 +120,16 @@ def table_width_adjustment_cli(): description="Set the width of table columns according to values in json or if non there to be equal.") table_widths_adjustment(docx_input, docx_output, tables_folder) def postprocess_etsi_styles_cli(): docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).") postprocess_etsi_styles(docx_input, docx_output) def remove_docx_metadata_cli(): docx_input, docx_output = parse_input( description="Remove metadata from docx.") postprocess_etsi_styles(docx_input, docx_output) def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"): # Filter warning warnings.filterwarnings( Loading Loading @@ -486,3 +499,32 @@ def table_widths_adjustment(docx_input, docx_output, tables_folder): cell = row.cells[i] cell.width = width doc.save(docx_output) def remove_docx_metadata(docx_input, docx_output): """ removes metadata from word file. """ # 1. Eigenschaften mit python-docx leeren doc = Document(docx_input) props = doc.core_properties props.author = None props.last_modified_by = None props.title = None props.subject = None props.comments = None props.keywords = None props.category = None doc.save(docx_output) # 2. Metadaten-Dateien aus dem ZIP löschen tmp_path = docx_output + ".tmp" shutil.copy(docx_output, tmp_path) with zipfile.ZipFile(tmp_path, "r") as zin: with zipfile.ZipFile(docx_output, "w") as zout: for item in zin.infolist(): if item.filename not in ["docProps/core.xml", "docProps/app.xml"]: zout.writestr(item, zin.read(item)) os.remove(tmp_path) print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}") No newline at end of file generateBaseline/postprocessing_styling.py 0 → 100644 +53 −0 Original line number Diff line number Diff line import re from docx import Document from docx.shared import Cm def change_annex_heading_formats(doc): for para in doc.paragraphs: if para.style.name == "Heading 1": text = para.text.strip() if not text: continue first_word = text.split()[0] # Beispiel: Bedingte Formatänderung if first_word.lower().startswith("annex"): para.style = doc.styles['Heading 8'] return doc def intent_example_and_note(doc): for para in doc.paragraphs: if para.text.strip().startswith("NOTE") or para.text.strip().startswith("EXAMPLE"): if para.text.strip().startswith("NOTE"): # Einzug vergrößern para.paragraph_format.left_indent = Cm(2) # Hängender Einzug setzen para.paragraph_format.first_line_indent = Cm(-1.5) else: # Einzug vergrößern para.paragraph_format.left_indent = Cm(2.5) # Hängender Einzug setzen para.paragraph_format.first_line_indent = Cm(-2) # set Tab after ":" # Liste der Keywords keywords = ["EXAMPLE", "NOTE"] # Regex bauen: (EXAMPLE|NOTE)\s*\d* → Keyword optional mit Zahl pattern = re.compile(r'(' + '|'.join(keywords) + r'\s*\d*)\s*:\s*(.*)') para.text = pattern.sub(r'\1:\t\2', para.text) def postprocess_etsi_styles(docx_input, docx_output): doc = Document(docx_input) change_annex_heading_formats(doc) intent_example_and_note(doc) doc.save(docx_output) No newline at end of file generateBaseline/setup.py +2 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,8 @@ setup( #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc=postprocessing:update_toc_cli", "refresh_docx_fields=postprocessing:refresh_docx_fields_cli", "apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli", "remove_metadata=postprocessing:remove_docx_metadata_cli" ] } Loading Loading
generateBaseline/postprocessing.py +43 −1 Original line number Diff line number Diff line Loading @@ -16,6 +16,9 @@ from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm from postprocessing_styling import postprocess_etsi_styles def parse_input(args_to_parse=None, description = None): """ args_to_parse: list of arguments, e.g. ["input", "output"] Loading Loading @@ -117,6 +120,16 @@ def table_width_adjustment_cli(): description="Set the width of table columns according to values in json or if non there to be equal.") table_widths_adjustment(docx_input, docx_output, tables_folder) def postprocess_etsi_styles_cli(): docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).") postprocess_etsi_styles(docx_input, docx_output) def remove_docx_metadata_cli(): docx_input, docx_output = parse_input( description="Remove metadata from docx.") postprocess_etsi_styles(docx_input, docx_output) def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"): # Filter warning warnings.filterwarnings( Loading Loading @@ -486,3 +499,32 @@ def table_widths_adjustment(docx_input, docx_output, tables_folder): cell = row.cells[i] cell.width = width doc.save(docx_output) def remove_docx_metadata(docx_input, docx_output): """ removes metadata from word file. """ # 1. Eigenschaften mit python-docx leeren doc = Document(docx_input) props = doc.core_properties props.author = None props.last_modified_by = None props.title = None props.subject = None props.comments = None props.keywords = None props.category = None doc.save(docx_output) # 2. Metadaten-Dateien aus dem ZIP löschen tmp_path = docx_output + ".tmp" shutil.copy(docx_output, tmp_path) with zipfile.ZipFile(tmp_path, "r") as zin: with zipfile.ZipFile(docx_output, "w") as zout: for item in zin.infolist(): if item.filename not in ["docProps/core.xml", "docProps/app.xml"]: zout.writestr(item, zin.read(item)) os.remove(tmp_path) print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}") No newline at end of file
generateBaseline/postprocessing_styling.py 0 → 100644 +53 −0 Original line number Diff line number Diff line import re from docx import Document from docx.shared import Cm def change_annex_heading_formats(doc): for para in doc.paragraphs: if para.style.name == "Heading 1": text = para.text.strip() if not text: continue first_word = text.split()[0] # Beispiel: Bedingte Formatänderung if first_word.lower().startswith("annex"): para.style = doc.styles['Heading 8'] return doc def intent_example_and_note(doc): for para in doc.paragraphs: if para.text.strip().startswith("NOTE") or para.text.strip().startswith("EXAMPLE"): if para.text.strip().startswith("NOTE"): # Einzug vergrößern para.paragraph_format.left_indent = Cm(2) # Hängender Einzug setzen para.paragraph_format.first_line_indent = Cm(-1.5) else: # Einzug vergrößern para.paragraph_format.left_indent = Cm(2.5) # Hängender Einzug setzen para.paragraph_format.first_line_indent = Cm(-2) # set Tab after ":" # Liste der Keywords keywords = ["EXAMPLE", "NOTE"] # Regex bauen: (EXAMPLE|NOTE)\s*\d* → Keyword optional mit Zahl pattern = re.compile(r'(' + '|'.join(keywords) + r'\s*\d*)\s*:\s*(.*)') para.text = pattern.sub(r'\1:\t\2', para.text) def postprocess_etsi_styles(docx_input, docx_output): doc = Document(docx_input) change_annex_heading_formats(doc) intent_example_and_note(doc) doc.save(docx_output) No newline at end of file
generateBaseline/setup.py +2 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,8 @@ setup( #"apply_etsi_styling: postprocessing:postprocess_etsi_styles", "update_toc=postprocessing:update_toc_cli", "refresh_docx_fields=postprocessing:refresh_docx_fields_cli", "apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli", "remove_metadata=postprocessing:remove_docx_metadata_cli" ] } Loading