Commit 480c3901 authored by Serafino's avatar Serafino
Browse files

fixed inconsistencies

parent 802f8b27
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
import re
import sys
from os import path
from dataclasses import dataclass

import bs4
@@ -393,7 +392,7 @@ def handle_tag(HTML_tag : bs4.element.Tag):
            # if we are inside a table we use the cell width instead, but it need to be adjusted because the python docx image width works in a strange way
            scale_factor = .8 #80% of the width looks better then 100%
            width = cell.width*scale_factor if cell else Emu(5486400*scale_factor) 
            run.add_picture(path.join(path.dirname(path.abspath(__file__)),HTML_tag.attrs["src"]), width = width) #use .png pictures that will later be changed with .emf
            run.add_picture(HTML_tag.attrs["src"], width = width) #use .png pictures that will later be changed with .emf
            run = None # this makes sure we are not adding text to the same run that contains the picture
        elif HTML_tag.name == "span": #this is a run with a different style
            if "class" in HTML_tag.attrs:
+2 −2
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ foldername = filename[:-5] # same as filename but without extension .docx
media_folder = sys.argv[2]

#EXTRACT docx
os.system(f"7zz x {filename} -o{foldername}")
os.system(f"7z x {filename} -o{foldername}")

#PARSE rels
path = f"{foldername}/word/_rels/document.xml.rels"
@@ -59,7 +59,7 @@ with open(path,"w") as file:
    doc.writexml(file, encoding="UTF-8")

#rebuild docx
os.system(f"7zz a {foldername+'_fixed'}.docx ./{foldername}/*")
os.system(f"7z a {foldername+'_fixed'}.docx ./{foldername}/*")

#delete temp folder
os.system(f"rm -r ./{foldername}")