Commit ea6db6ba authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

new update toc function

parent 37746c11
Loading
Loading
Loading
Loading
Loading
+10 −6
Original line number Diff line number Diff line
@@ -211,21 +211,25 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
	return _lines


def process(document:str, outDirectory:str) -> None:
def process(args) -> None:
	with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
		mdLines = readMDFile(progress, document)
		mdLines = readMDFile(progress, args.document)
		mdLines = correctTOC(progress, mdLines)
		mdLines = replaceTableCaptions(progress, mdLines)
		mdLines = replaceFigureCaptions(progress, mdLines)
		if args.figure_paths:
			mdLines = replaceFiguresPathSvgToPng(progress, mdLines)
		mdLines = replaceLineBreaks(progress, mdLines)
		if args.table_separators:
			mdLines = correctTableSeparators(progress, mdLines)
		writeMDFile(progress, mdLines, document, outDirectory)
		writeMDFile(progress, mdLines, args.document, args.outDirectory)


def main(args=None):
	# Parse command line arguments
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('-ts', '--table-separators',        action='store_true', required=False, default=False, help="Correct table separators")
	parser.add_argument('-fp', '--figure-paths',        action='store_true', required=False, default=False, help="Replace figure paths")
	parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>',  help = 'specify output directory')
	parser.add_argument('document',  help = 'document to parse')
	args = parser.parse_args()
@@ -233,7 +237,7 @@ def main(args=None):
	# Process documents and print output
	os.makedirs(args.outDirectory, exist_ok = True)

	process(args.document, args.outDirectory)
	process(args)

if __name__ == '__main__':
    sys.exit(main())
 No newline at end of file
+99 −36
Original line number Diff line number Diff line
@@ -3,6 +3,10 @@ import os
import re
import warnings
from pathlib import Path
import zipfile
from lxml import etree
import tempfile
import shutil

import win32com.client #pip install pywin32

@@ -11,7 +15,6 @@ from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm

from errors import ErrorHandler, Level, Mode
from file_helper import get_all_files_from_dir


@@ -37,7 +40,7 @@ def apply_standard_style_to_unformatted_paragraphs(config):
            p.style = standard_style_name
            changed += 1

    ErrorHandler()(f"Changed style to '{standard_style_name}' for {changed} paragraphs.")
    print(f'Changed style to {standard_style_name} for {changed} paragraphs.')
    doc.save(output_path)

def rotate_cell_text(cell):
@@ -86,7 +89,7 @@ def update_word_fields(config_path: dict|str):
    relativer_pfad = Path(docx_path)
    docx_absolute_path = relativer_pfad.resolve()
    if not os.path.isfile(docx_absolute_path):
        ErrorHandler()(f"File not found: {docx_absolute_path}", Level.ERROR)
        print(f'File not found: {docx_absolute_path}')

    # Word starten
    word = win32com.client.Dispatch("Word.Application")
@@ -105,7 +108,7 @@ def update_word_fields(config_path: dict|str):

        # Schließen
        doc.Close()
        ErrorHandler()(f"Fields in '{docx_absolute_path}' updated and saved")
        print(f'Fields in {docx_absolute_path} updated and saved')
    finally:
        word.Quit()

@@ -128,7 +131,7 @@ def insert_page_break_before_long_tables(config):
        end_page = doc.Range(end - 1, end - 1).Information(3)

        if end_page > start_page:
            ErrorHandler()(f"Table {i + 1} is on a page break: {start_page} -> {end_page}", Level.INFO)
            print(f'Table {i + 1} is on a page break: {start_page} -> {end_page}')
            # Seitenumbruch einfügen
            para = doc.Range(start, start)
            para.InsertBreak(7)  # wdPageBreak = 7
@@ -138,39 +141,99 @@ def insert_page_break_before_long_tables(config):
    doc.Close()
    word.Quit()

def update_toc_level(config):
    docx_path = config.get("output_docx")
    word = win32com.client.Dispatch("Word.Application")
    word.Visible = False
def format_toc_header(xml_data, ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}):
    root = etree.fromstring(xml_data)

    doc = word.Documents.Open(docx_path)
    counter = 0
    # Find <w:pStyle w:val="TOCHeading">
    for pstyle in root.xpath('.//w:pStyle[@w:val="TOCHeading"]', namespaces=ns):
        # Change it to be <w:pStyle w:val="TT">
        old_text = pstyle
        pstyle.set(f"{{{ns['w']}}}val", "TT")
        counter+=1
    print(f'Changed Style "TOCHeading" to "TT" {counter} times')
    return etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

    # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
    if doc.TablesOfContents.Count == 0:
        # Inhaltsverzeichnis am Anfang des Dokuments einfügen
        doc.TablesOfContents.Add(
            Range=doc.Range(0, 0),
            UseHeadingStyles=True,
            UpperHeadingLevel=1,
            LowerHeadingLevel=9,  # 👉 bis Heading 9
            UseHyperlinks=True,
            HidePageNumbersInWeb=False,
            UseOutlineLevels=True
        )
    # Vorhandenes TOC anpassen
    toc = doc.TablesOfContents(1)
    #Formating heading -> ToDo: last line not working so skipped for the moment
    #toc_range = toc.Range
    #heading_para = toc_range.Paragraphs(1)
    #heading_para.Style = doc.Styles("Heading 1")
    #set level range from 1-9
    toc.UpperHeadingLevel = 1
    toc.LowerHeadingLevel = 9
    toc.Update()

    doc.SaveAs(docx_path)
    doc.Close()
    word.Quit()
def update_toc_level(xml_data, ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}):
    root = etree.fromstring(xml_data)
    new_range = "1-9"
    # Regex for \o "x-y" with x and y being numbers
    pattern = re.compile(r'(?<=\\o )"\d+-\d+"\s*')


    # Loop over all elements to find "TOC"
    for elem in root.xpath('.//w:instrText', namespaces=ns):
        if 'TOC' in elem.text:
            old_text = elem.text
            elem.text = pattern.sub('', elem.text)

            print(f'Changed TOC: {old_text}{elem.text}')

    return etree.tostring(root, xml_declaration=True, encoding="UTF-8", standalone="yes")

def update_toc(docx_input, docx_output):
    # read xml
    with zipfile.ZipFile(docx_input, 'r') as zin:
        xml_data = zin.read("word/document.xml")

    xml_data = update_toc_level(xml_data)

    xml_data = format_toc_header(xml_data)

    # create temp file
    tmp_fd, tmp_path = tempfile.mkstemp(suffix=".docx")
    os.close(tmp_fd)  # Datei wird nur über zipfile geöffnet

    try:
        # write new docx to temp file
        with zipfile.ZipFile(docx_input, 'r') as zin, zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                if item.filename != "word/document.xml":
                    data = zin.read(item.filename)
                    zout.writestr(item.filename, data)
            zout.writestr("word/document.xml", xml_data)

        # Write to output file
        shutil.move(tmp_path, docx_output)

    finally:
        # delete temp file if still existing
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

#def update_toc_level(config):
#    docx_path = config.get("output_docx")
#    word = win32com.client.Dispatch("Word.Application")
#    word.Visible = False
#
#    doc = word.Documents.Open(docx_path)
#
#    # Wenn kein TOC vorhanden ist, kannst du eins hinzufügen:
#    if doc.TablesOfContents.Count == 0:
#        # Inhaltsverzeichnis am Anfang des Dokuments einfügen
#        doc.TablesOfContents.Add(
#            Range=doc.Range(0, 0),
#            UseHeadingStyles=True,
#            UpperHeadingLevel=1,
#            LowerHeadingLevel=9,  # 👉 bis Heading 9
#            UseHyperlinks=True,
#            HidePageNumbersInWeb=False,
#            UseOutlineLevels=True
#        )
#    # Vorhandenes TOC anpassen
#    toc = doc.TablesOfContents(1)
#    #Formating heading -> ToDo: last line not working so skipped for the moment
#    #toc_range = toc.Range
#    #heading_para = toc_range.Paragraphs(1)
#    #heading_para.Style = doc.Styles("Heading 1")
#    #set level range from 1-9
#    toc.UpperHeadingLevel = 1
#    toc.LowerHeadingLevel = 9
#    toc.Update()

#    doc.SaveAs(docx_path)
#    doc.Close()
#    word.Quit()

def table_widths_adjustment(config):
    table_path = config.get("tables_folder")
+1 −1
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ setup(
        						"table_width_adjustment=postprocessing:table_widths_adjustment",
        						"check_multipage_tables=postprocessing:insert_page_break_before_long_tables",
        						#"apply_etsi_styling: postprocessing:postprocess_etsi_styles",
        						"update_toc_level=postprocessing:update_toc_level",
        						"update_toc=postprocessing:update_toc",
			]
            }