Loading md_to_docx_converter/customized_reference.docx +192 B (27.9 KiB) File changed.No diff preview for this file type. View original file View changed file md_to_docx_converter/src/to_docx/postprocessing.py +51 −78 Original line number Diff line number Diff line Loading @@ -277,10 +277,12 @@ def format_examples_and_notes(doc: Doc): paragraph.text = f"\t{paragraph.text}" elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES: if paragraph.style.name == "Source Code": paragraph.style = "EX Source Code" # paragraph.style = "EX Source Code" paragraph.paragraph_format.left_indent = Pt(85.04) # 3 cm in points if paragraph.style.name == "Compact": paragraph.style = "EX Compact" # paragraph.style = "EX Compact" paragraph.paragraph_format.left_indent = Pt(103.68) # 3.66 cm in points else: # No longer in example or note in_example_or_note = False Loading Loading @@ -465,33 +467,8 @@ def add_tagged_styles_and_formatting(doc: Doc): and start_match.group(1) == end_match.group(1) ) # if has_both_tags: # style_name = start_match.group(1) # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_to_style = text[start_match.end() : end_match.start()] # text_after_end = text[end_match.end() :] # run.text = text_before_start # Nothing should happen to the text before the tag # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, style_name) # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # i += 1 # continue if has_both_tags: style_name = start_match.group(1) start_tag = "{{{" + style_name + "}}}" end_tag = "{{{/" + style_name + "}}}" text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] Loading @@ -502,75 +479,69 @@ def add_tagged_styles_and_formatting(doc: Doc): styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, style_name) i += 1 paragraph._p.insert( i+1, styled_run._element ) # Insert the styled run after the current run if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) i += 1 paragraph._p.insert( i+1, paragraph.runs[-1]._element ) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added i += 1 continue if not has_start_tag and has_end_tag: # Separate text portions according to whether they should be styled or not text_before_end = text[: end_match.start()] text_after_end = text[end_match.end() :] # if not has_start_tag and has_end_tag: # # Separate text portions according to whether they should be styled or not # text_before_end = text[: end_match.start()] # text_after_end = text[end_match.end() :] # Keep track of the text to style and this run text_to_style += text_before_end style_runs.append(run) # # Keep track of the text to style and this run # text_to_style += text_before_end # style_runs.append(run) # Reset the old runs and create a new styled run with the accumulated text to style for style_run in style_runs: style_run.text = "" # # Reset the old runs and create a new styled run with the accumulated text to style # for style_run in style_runs: # style_run.text = "" styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, active_style) # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, active_style) if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # Prepare for other tags active_style = None style_runs = [] text_to_style = "" # # Prepare for other tags # active_style = None # style_runs = [] # text_to_style = "" i += 1 continue # i += 1 # continue if not active_style and has_start_tag and not has_end_tag: active_style = start_match.group(1) # if not active_style and has_start_tag and not has_end_tag: # active_style = start_match.group(1) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] text_after_start = text[start_match.end() :] # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_after_start = text[start_match.end() :] # Keep just the text before the the tag and start keeping track of the text to style run.text = text_before_start text_to_style += text_after_start # # Keep just the text before the the tag and start keeping track of the text to style # run.text = text_before_start # text_to_style += text_after_start style_runs.append(run) # style_runs.append(run) i += 1 continue # i += 1 # continue if active_style: # Inside a tag # Simply keep track of this run and its text text_to_style += text style_runs.append(run) # if active_style: # Inside a tag # # Simply keep track of this run and its text # text_to_style += text # style_runs.append(run) i += 1 continue # i += 1 # continue i += 1 # No tag here, so just go on to the next run Loading @@ -582,6 +553,8 @@ def set_keep_with_next_false(doc: Doc): for paragraph in iter_paragraphs(doc): paragraph.paragraph_format.keep_with_next = False paragraph.paragraph_format.space_after = Pt(12) return doc Loading md_to_docx_converter/src/to_docx/preprocessing.py +1 −0 Original line number Diff line number Diff line Loading @@ -432,6 +432,7 @@ def handle_examples_and_notes(soup: BeautifulSoup): grandchild.clear() grandchild.append(tagged_text) grandchild.unwrap() return soup Loading md_to_docx_converter/src/to_html/postprocessing.py +13 −0 Original line number Diff line number Diff line Loading @@ -638,6 +638,18 @@ def fix_custom_tags(soup: BeautifulSoup): a.string = a.string.replace("root", new_id_prefix) return soup def fix_lists(soup: BeautifulSoup): """ Fix lists that have been improperly nested due to markdown conversion. """ listitems = soup.find_all(["li"]) for item in listitems: children = list(item.children) if len(children) == 1 and children[0].name in ["p"]: children[0].unwrap() return soup def extract_images_from_html(soup: BeautifulSoup) -> dict: """ Loading Loading @@ -818,6 +830,7 @@ def postprocess(html_dir: str): soup = replace_dash_characters(soup) soup = move_figure_id_to_FL_elements(soup) soup = fix_custom_tags(soup) soup = fix_lists(soup) images, soup = extract_images_from_html(soup) for image_id, image_src in images.items(): Loading Loading
md_to_docx_converter/customized_reference.docx +192 B (27.9 KiB) File changed.No diff preview for this file type. View original file View changed file
md_to_docx_converter/src/to_docx/postprocessing.py +51 −78 Original line number Diff line number Diff line Loading @@ -277,10 +277,12 @@ def format_examples_and_notes(doc: Doc): paragraph.text = f"\t{paragraph.text}" elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES: if paragraph.style.name == "Source Code": paragraph.style = "EX Source Code" # paragraph.style = "EX Source Code" paragraph.paragraph_format.left_indent = Pt(85.04) # 3 cm in points if paragraph.style.name == "Compact": paragraph.style = "EX Compact" # paragraph.style = "EX Compact" paragraph.paragraph_format.left_indent = Pt(103.68) # 3.66 cm in points else: # No longer in example or note in_example_or_note = False Loading Loading @@ -465,33 +467,8 @@ def add_tagged_styles_and_formatting(doc: Doc): and start_match.group(1) == end_match.group(1) ) # if has_both_tags: # style_name = start_match.group(1) # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_to_style = text[start_match.end() : end_match.start()] # text_after_end = text[end_match.end() :] # run.text = text_before_start # Nothing should happen to the text before the tag # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, style_name) # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # i += 1 # continue if has_both_tags: style_name = start_match.group(1) start_tag = "{{{" + style_name + "}}}" end_tag = "{{{/" + style_name + "}}}" text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] Loading @@ -502,75 +479,69 @@ def add_tagged_styles_and_formatting(doc: Doc): styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, style_name) i += 1 paragraph._p.insert( i+1, styled_run._element ) # Insert the styled run after the current run if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) i += 1 paragraph._p.insert( i+1, paragraph.runs[-1]._element ) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added i += 1 continue if not has_start_tag and has_end_tag: # Separate text portions according to whether they should be styled or not text_before_end = text[: end_match.start()] text_after_end = text[end_match.end() :] # if not has_start_tag and has_end_tag: # # Separate text portions according to whether they should be styled or not # text_before_end = text[: end_match.start()] # text_after_end = text[end_match.end() :] # Keep track of the text to style and this run text_to_style += text_before_end style_runs.append(run) # # Keep track of the text to style and this run # text_to_style += text_before_end # style_runs.append(run) # Reset the old runs and create a new styled run with the accumulated text to style for style_run in style_runs: style_run.text = "" # # Reset the old runs and create a new styled run with the accumulated text to style # for style_run in style_runs: # style_run.text = "" styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, active_style) # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, active_style) if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # Prepare for other tags active_style = None style_runs = [] text_to_style = "" # # Prepare for other tags # active_style = None # style_runs = [] # text_to_style = "" i += 1 continue # i += 1 # continue if not active_style and has_start_tag and not has_end_tag: active_style = start_match.group(1) # if not active_style and has_start_tag and not has_end_tag: # active_style = start_match.group(1) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] text_after_start = text[start_match.end() :] # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_after_start = text[start_match.end() :] # Keep just the text before the the tag and start keeping track of the text to style run.text = text_before_start text_to_style += text_after_start # # Keep just the text before the the tag and start keeping track of the text to style # run.text = text_before_start # text_to_style += text_after_start style_runs.append(run) # style_runs.append(run) i += 1 continue # i += 1 # continue if active_style: # Inside a tag # Simply keep track of this run and its text text_to_style += text style_runs.append(run) # if active_style: # Inside a tag # # Simply keep track of this run and its text # text_to_style += text # style_runs.append(run) i += 1 continue # i += 1 # continue i += 1 # No tag here, so just go on to the next run Loading @@ -582,6 +553,8 @@ def set_keep_with_next_false(doc: Doc): for paragraph in iter_paragraphs(doc): paragraph.paragraph_format.keep_with_next = False paragraph.paragraph_format.space_after = Pt(12) return doc Loading
md_to_docx_converter/src/to_docx/preprocessing.py +1 −0 Original line number Diff line number Diff line Loading @@ -432,6 +432,7 @@ def handle_examples_and_notes(soup: BeautifulSoup): grandchild.clear() grandchild.append(tagged_text) grandchild.unwrap() return soup Loading
md_to_docx_converter/src/to_html/postprocessing.py +13 −0 Original line number Diff line number Diff line Loading @@ -638,6 +638,18 @@ def fix_custom_tags(soup: BeautifulSoup): a.string = a.string.replace("root", new_id_prefix) return soup def fix_lists(soup: BeautifulSoup): """ Fix lists that have been improperly nested due to markdown conversion. """ listitems = soup.find_all(["li"]) for item in listitems: children = list(item.children) if len(children) == 1 and children[0].name in ["p"]: children[0].unwrap() return soup def extract_images_from_html(soup: BeautifulSoup) -> dict: """ Loading Loading @@ -818,6 +830,7 @@ def postprocess(html_dir: str): soup = replace_dash_characters(soup) soup = move_figure_id_to_FL_elements(soup) soup = fix_custom_tags(soup) soup = fix_lists(soup) images, soup = extract_images_from_html(soup) for image_id, image_src in images.items(): Loading