Loading md_to_docx_converter/customized_reference.docx +111 B (27.7 KiB) File changed.No diff preview for this file type. View original file View changed file md_to_docx_converter/src/constants.py +2 −1 Original line number Diff line number Diff line Loading @@ -61,7 +61,8 @@ REFERENCE_DOC = "customized_reference.docx" OUTPUT_DOC_NAME = "document.docx" # Classes for examples and notes EXAMPLE_NOTE_CLASSES = ["EX", "NO", "TAN", "Source Code"] EXAMPLE_NOTE_CLASSES = ["EX", "NO", "TAN"] STYLES_FOR_EXAMPLES_NOTES = ["Source Code", "Compact"] # HTML tags to look for nested in examples and notes - Pandoc doesn't handle these well, so they need to be handled BOLD_TAGS = ["strong", "b"] Loading md_to_docx_converter/src/to_docx/postprocessing.py +64 −11 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from docx.opc.constants import RELATIONSHIP_TYPE as RT from src.constants import ( BOLD_TAGS, EXAMPLE_NOTE_CLASSES, STYLES_FOR_EXAMPLES_NOTES, HANDLE_UNDERSCORE_CLASSES, HTML_BASIC_FORMAT_TAGS, ITALIC_TAGS, Loading Loading @@ -232,6 +233,24 @@ def format_tables(doc: Doc): return doc def raise_outline_level(paragraph: Paragraph): pPr = paragraph._element.get_or_add_pPr() numPr = pPr.find(qn('w:numPr')) if numPr is not None: ilvl = numPr.find(qn('w:ilvl')) if ilvl is not None: current_level = ilvl.get(qn('w:val')) if current_level is not None: new_level = str(int(current_level) + 1) ilvl.set(qn('w:val'), new_level) else: # If no current level, set to 0 ilvl = OxmlElement('w:ilvl') ilvl.set(qn('w:val'), '0') numPr.insert(0, ilvl) return paragraph def format_examples_and_notes(doc: Doc): """Add tabs in to examples and notes to ensure their bodies are indented properly""" Loading @@ -255,15 +274,18 @@ def format_examples_and_notes(doc: Doc): if in_example_or_note: # Continue example or note if paragraph.style.name in EXAMPLE_NOTE_CLASSES: if not paragraph.text.startswith("\t") and paragraph.style.name != "Source Code": paragraph.text = f"\t{paragraph.text}" elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES: if paragraph.style.name == "Source Code": paragraph.style = "EX Source Code" if paragraph.style.name == "Compact": paragraph.style = "EX Compact" else: # No longer in example or note in_example_or_note = False if paragraph.style.name == "Source Code": if in_example_or_note == True: paragraph.style = "EX Source Code" if paragraph.style.name == "Source Code" or paragraph.style.name == "EX Source Code": # apply to all its runs the HTML-Sample style with no space suffix for run in paragraph.runs: run.style = "HTML-Sample" Loading Loading @@ -404,7 +426,7 @@ def add_tagged_styles_and_formatting(doc: Doc): else: # Apply the style to it by default run.style = style_name para, style_name = handle_code_block_tags_with_suffixes(para, style_name) # para, style_name = handle_code_block_tags_with_suffixes(para, style_name) # Apply bolding and/or italicization as necessary handled_style_name = style_name Loading Loading @@ -443,8 +465,33 @@ def add_tagged_styles_and_formatting(doc: Doc): and start_match.group(1) == end_match.group(1) ) # if has_both_tags: # style_name = start_match.group(1) # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_to_style = text[start_match.end() : end_match.start()] # text_after_end = text[end_match.end() :] # run.text = text_before_start # Nothing should happen to the text before the tag # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, style_name) # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # i += 1 # continue if has_both_tags: style_name = start_match.group(1) start_tag = "{{{" + style_name + "}}}" end_tag = "{{{/" + style_name + "}}}" text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] Loading @@ -455,14 +502,20 @@ def add_tagged_styles_and_formatting(doc: Doc): styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, style_name) i += 1 paragraph._p.insert( i+1, styled_run._element ) # Insert the styled run after the current run if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) i += 1 paragraph._p.insert( i+1, paragraph.runs[-1]._element ) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added i += 1 continue if not has_start_tag and has_end_tag: Loading Loading
md_to_docx_converter/customized_reference.docx +111 B (27.7 KiB) File changed.No diff preview for this file type. View original file View changed file
md_to_docx_converter/src/constants.py +2 −1 Original line number Diff line number Diff line Loading @@ -61,7 +61,8 @@ REFERENCE_DOC = "customized_reference.docx" OUTPUT_DOC_NAME = "document.docx" # Classes for examples and notes EXAMPLE_NOTE_CLASSES = ["EX", "NO", "TAN", "Source Code"] EXAMPLE_NOTE_CLASSES = ["EX", "NO", "TAN"] STYLES_FOR_EXAMPLES_NOTES = ["Source Code", "Compact"] # HTML tags to look for nested in examples and notes - Pandoc doesn't handle these well, so they need to be handled BOLD_TAGS = ["strong", "b"] Loading
md_to_docx_converter/src/to_docx/postprocessing.py +64 −11 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from docx.opc.constants import RELATIONSHIP_TYPE as RT from src.constants import ( BOLD_TAGS, EXAMPLE_NOTE_CLASSES, STYLES_FOR_EXAMPLES_NOTES, HANDLE_UNDERSCORE_CLASSES, HTML_BASIC_FORMAT_TAGS, ITALIC_TAGS, Loading Loading @@ -232,6 +233,24 @@ def format_tables(doc: Doc): return doc def raise_outline_level(paragraph: Paragraph): pPr = paragraph._element.get_or_add_pPr() numPr = pPr.find(qn('w:numPr')) if numPr is not None: ilvl = numPr.find(qn('w:ilvl')) if ilvl is not None: current_level = ilvl.get(qn('w:val')) if current_level is not None: new_level = str(int(current_level) + 1) ilvl.set(qn('w:val'), new_level) else: # If no current level, set to 0 ilvl = OxmlElement('w:ilvl') ilvl.set(qn('w:val'), '0') numPr.insert(0, ilvl) return paragraph def format_examples_and_notes(doc: Doc): """Add tabs in to examples and notes to ensure their bodies are indented properly""" Loading @@ -255,15 +274,18 @@ def format_examples_and_notes(doc: Doc): if in_example_or_note: # Continue example or note if paragraph.style.name in EXAMPLE_NOTE_CLASSES: if not paragraph.text.startswith("\t") and paragraph.style.name != "Source Code": paragraph.text = f"\t{paragraph.text}" elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES: if paragraph.style.name == "Source Code": paragraph.style = "EX Source Code" if paragraph.style.name == "Compact": paragraph.style = "EX Compact" else: # No longer in example or note in_example_or_note = False if paragraph.style.name == "Source Code": if in_example_or_note == True: paragraph.style = "EX Source Code" if paragraph.style.name == "Source Code" or paragraph.style.name == "EX Source Code": # apply to all its runs the HTML-Sample style with no space suffix for run in paragraph.runs: run.style = "HTML-Sample" Loading Loading @@ -404,7 +426,7 @@ def add_tagged_styles_and_formatting(doc: Doc): else: # Apply the style to it by default run.style = style_name para, style_name = handle_code_block_tags_with_suffixes(para, style_name) # para, style_name = handle_code_block_tags_with_suffixes(para, style_name) # Apply bolding and/or italicization as necessary handled_style_name = style_name Loading Loading @@ -443,8 +465,33 @@ def add_tagged_styles_and_formatting(doc: Doc): and start_match.group(1) == end_match.group(1) ) # if has_both_tags: # style_name = start_match.group(1) # # Separate text portions according to whether they should be styled or not # text_before_start = text[: start_match.start()] # text_to_style = text[start_match.end() : end_match.start()] # text_after_end = text[end_match.end() :] # run.text = text_before_start # Nothing should happen to the text before the tag # styled_run = paragraph.add_run(text_to_style) # styled_run = apply_formatting_and_styling(styled_run, style_name) # if text_after_end.strip(): # Add any remaining text as another run # paragraph.add_run(text_after_end) # runs = ( # paragraph.runs # ) # Refresh runs now that a new run has been added # i += 1 # continue if has_both_tags: style_name = start_match.group(1) start_tag = "{{{" + style_name + "}}}" end_tag = "{{{/" + style_name + "}}}" text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag) # Separate text portions according to whether they should be styled or not text_before_start = text[: start_match.start()] Loading @@ -455,14 +502,20 @@ def add_tagged_styles_and_formatting(doc: Doc): styled_run = paragraph.add_run(text_to_style) styled_run = apply_formatting_and_styling(styled_run, style_name) i += 1 paragraph._p.insert( i+1, styled_run._element ) # Insert the styled run after the current run if text_after_end.strip(): # Add any remaining text as another run paragraph.add_run(text_after_end) i += 1 paragraph._p.insert( i+1, paragraph.runs[-1]._element ) runs = ( paragraph.runs ) # Refresh runs now that a new run has been added i += 1 continue if not has_start_tag and has_end_tag: Loading