Commit 14e11bdc authored by Jonas Schüppen's avatar Jonas Schüppen
Browse files

Merge branch 'styles_adjustment' into newfeatures_adjustment

# Conflicts:
#	.gitlab-ci.yml
#	generateBaseline/postprocessing.py
#	generateBaseline/postprocessing_styling.py
#	generateBaseline/setup.py
parents 7f4fac29 12f85914
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@ Build generateBaseline docker image:
        - generateBaseline/generateTOC.py
        - generateBaseline/svg2png.py
        - generateBaseline/postprocessing.py
        - generateBaseline/postprocessing_styling.py
        - generateBaseline/file_helper.py

Build docx-field-refresh docker image:
@@ -75,10 +76,16 @@ Build generateSpecWebSite docker image:
        - generateSpecWebSite/gridTableFilter.py
        - generateSpecWebSite/gridTableTools.py
        - generateSpecWebSite/indexDownload.md
        - generateSpecWebSite/indexReleaseNotes.md
        - generateSpecWebSite/markdownTools.py
        - generateSpecWebSite/regexMatches.py
        - generateSpecWebSite/spec_on_pages.sh
        - generateSpecWebSite/toMkdocs.py
        - generateSpecWebSite/create_frontmatter_table.py
        - markdownTools/dockerfile
        - markdownTools/setup.py
        - markdownTools/requirements.txt
        - markdownTools/processMDSpec.py

Build markdownTools docker image:
  stage: build
+56 −7
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm

from postprocessing_styling import postprocess_etsi_styles
from postprocessing_styling import *


def parse_input(args_to_parse=None, description = None):
@@ -120,14 +120,10 @@ def table_width_adjustment_cli():
                                                 description="Set the width of table columns according to values in json or if non there to be equal.")
    table_widths_adjustment(docx_input, docx_output, tables_folder)

def postprocess_etsi_styles_cli():
    docx_input, docx_output = parse_input(description="Update styles in document according to etsi styles (currently not ready, just a few styles).")
    postprocess_etsi_styles(docx_input, docx_output)

def remove_docx_metadata_cli():
    docx_input, docx_output = parse_input(
        description="Remove metadata from docx.")
    postprocess_etsi_styles(docx_input, docx_output)
    #ToDo


def apply_standard_style_to_unformatted_paragraphs(docx_input, docx_output, standard_style_name = "Normal"):
@@ -528,3 +524,56 @@ def remove_docx_metadata(docx_input, docx_output):

    os.remove(tmp_path)
    print(f"Successfully removed metadata from {docx_input} and wrote it to {docx_output}")


def update_format_styles_cli():
    parser = argparse.ArgumentParser(description="Update format styles in a DOCX file.")
    parser.add_argument("docx_input", help="Path to input DOCX file")
    parser.add_argument("docx_output", help="Path to output DOCX file")
    args = parser.parse_args()

    update_figure_captions(args.docx_input, args.docx_output)
    update_heading_styles(args.docx_input, args.docx_output)
    update_figure_style(args.docx_input, args.docx_output)
    update_unnumbered_lists(args.docx_input, args.docx_output)
    update_table_captions(args.docx_input, args.docx_output)
    update_abbreviations(args.docx_input, args.docx_output)
    update_table_rows(args.docx_input, args.docx_output)
    update_notes(args.docx_input, args.docx_output)
    update_references(args.docx_input, args.docx_output)
    update_body_text_style(args.docx_input, args.docx_output)
    add_no_break_hyphens(args.docx_input, args.docx_output)
    update_references_style(args.docx_input, args.docx_output)
    update_source_code_style(args.docx_input, args.docx_output)
    update_equation_style(args.docx_input, args.docx_output)
    correct_quotes_docx(args.docx_input, args.docx_output)

    # Final sanitization: remove invalid XML characters
    ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
    with zipfile.ZipFile(args.docx_output, 'r') as zin:
        xml_data = zin.read("word/document.xml")

    root = etree.fromstring(xml_data)
    sanitize_document_xml(root, ns)

    xml_data = etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

    # Write sanitized document back
    tmp_fd, tmp_path = tempfile.mkstemp(suffix=".docx")
    os.close(tmp_fd)

    try:
        with zipfile.ZipFile(args.docx_output, 'r') as zin, zipfile.ZipFile(tmp_path, 'w',
                                                                            zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                if item.filename != "word/document.xml":
                    data = zin.read(item.filename)
                    zout.writestr(item.filename, data)
            zout.writestr("word/document.xml", xml_data)

        shutil.move(tmp_path, args.docx_output)
        os.chmod(args.docx_output, 0o644)
        print('Sanitized document: removed invalid XML characters')
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)
 No newline at end of file
+1896 −35

File changed.

Preview size limit exceeded, changes collapsed.

+3 −4
Original line number Diff line number Diff line
@@ -14,15 +14,14 @@ setup(
            'console_scripts' : ['pandocFilter=pandocFilter:main',
								'generateTOC=generateTOC:main',
								'svg2png=svg2png:main',
								#"update_references=postprocessing:update_word_fields",
								"update_references=postprocessing:update_word_fields",
        						"update_formats=postprocessing:update_formats_cli",
        						"turn_table_contents=postprocessing:turn_table_contents_cli",
        						"table_width_adjustment=postprocessing:table_width_adjustment_cli",
        						#"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
        						#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
        						"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
        						"update_toc=postprocessing:update_toc_cli",
								"update_format_styles=postprocessing:update_format_styles_cli",
								"refresh_docx_fields=postprocessing:refresh_docx_fields_cli",
                                "apply_etsi_styling=postprocessing:postprocess_etsi_styles_cli",
                                "remove_metadata=postprocessing:remove_docx_metadata_cli"
			]
            }
+100 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
#
#	create_frontmatter_table.py
#
#	Creates a markdown table from frontmatter data
#	- Removes headings
#	- Removes <br> tags
#	- Formats as a clean markdown table
#
#	(c) 2025 by Miguel Angel Reina Ortega
#	License: BSD 3-Clause License. See the LICENSE file for further details.

import sys
import yaml
import re

def remove_br_tags(text):
    """Remove <br> and <br/> tags from text."""
    if not text:
        return text
    # Replace <br>, <br/>, <br /> with space
    text = re.sub(r'<br\s*/?>', ' ', text, flags=re.IGNORECASE)
    # Clean up multiple spaces
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def create_table_from_frontmatter(frontmatter_file, output_file=None):
    """
    Create a markdown table from frontmatter data.
    
    Args:
        frontmatter_file: Path to the frontmatter.md file
        output_file: Optional path to output file (if None, prints to stdout)
    """
    
    # Read frontmatter
    with open(frontmatter_file, 'r', encoding='utf-8') as f:
        content = f.read()
    frontmatter_content = []
    # Split into lines and skip first line if it looks like a filename
    lines = content.split('\n')
    for line in lines:
        # Check if it's a filename line: ends with .md: or ends with : and no space before colon
        if line.strip().endswith('.md:'):
            # Skip first line
            continue
        else:
            frontmatter_content.append(line)
    
    # Parse YAML
    frontmatter = yaml.safe_load('\n'.join(frontmatter_content))
    if not frontmatter:
        frontmatter = {}
    
    # Generate markdown table (no header row)
    table_lines = []
    table_lines.append("")
    table_lines.append("|&nbsp; |&nbsp; |")
    table_lines.append(f"|:-|:-|")
    # Process each field dynamically
    for key, value in frontmatter.items():
        # Convert value to string and clean it
        if value is None:
            value_str = ""
        elif isinstance(value, (list, dict)):
            # For lists and dicts, convert to YAML string
            value_str = yaml.dump(value, default_flow_style=False).strip()
        else:
            value_str = str(value)
        
        # Remove <br> tags
        #value_str = remove_br_tags(value_str)
        
        # Escape pipe characters in values (they break markdown tables)
        value_str = value_str.replace('|', '\\|')
        
        # Add row to table
        table_lines.append(f"| **{key}** | {value_str} |")
    
    # Join all lines
    table_lines.append("")
    table_content = '\n'.join(table_lines)
    
    # Output
    if output_file:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(table_content)
    else:
        print(table_content)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: create_frontmatter_table.py <frontmatter_file> [output_file]")
        sys.exit(1)
    
    frontmatter_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    
    create_table_from_frontmatter(frontmatter_file, output_file)
Loading