fix: prevent custom styles to be applied twice (c3a058de) · Commits · CIM - Context Information Management / NGSI-LD API

md_to_docx_converter/customized_reference.docx

+192 B (27.9 KiB)

File changed.

No diff preview for this file type.

md_to_docx_converter/src/to_docx/postprocessing.py

+51 −78

Original line number	Diff line number	Diff line
		@@ -277,10 +277,12 @@ def format_examples_and_notes(doc: Doc):
		paragraph.text = f"\t{paragraph.text}"
		elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES:
		if paragraph.style.name == "Source Code":
		paragraph.style = "EX Source Code"
		# paragraph.style = "EX Source Code"
		paragraph.paragraph_format.left_indent = Pt(85.04) # 3 cm in points

		if paragraph.style.name == "Compact":
		paragraph.style = "EX Compact"
		# paragraph.style = "EX Compact"
		paragraph.paragraph_format.left_indent = Pt(103.68) # 3.66 cm in points
		else:
		# No longer in example or note
		in_example_or_note = False
		@@ -465,33 +467,8 @@ def add_tagged_styles_and_formatting(doc: Doc):
		and start_match.group(1) == end_match.group(1)
		)

		# if has_both_tags:
		# style_name = start_match.group(1)

		# # Separate text portions according to whether they should be styled or not
		# text_before_start = text[: start_match.start()]
		# text_to_style = text[start_match.end() : end_match.start()]
		# text_after_end = text[end_match.end() :]

		# run.text = text_before_start # Nothing should happen to the text before the tag

		# styled_run = paragraph.add_run(text_to_style)
		# styled_run = apply_formatting_and_styling(styled_run, style_name)

		# if text_after_end.strip(): # Add any remaining text as another run
		# paragraph.add_run(text_after_end)
		# runs = (
		# paragraph.runs
		# ) # Refresh runs now that a new run has been added

		# i += 1
		# continue

		if has_both_tags:
		style_name = start_match.group(1)
		start_tag = "{{{" + style_name + "}}}"
		end_tag = "{{{/" + style_name + "}}}"
		text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag)

		# Separate text portions according to whether they should be styled or not
		text_before_start = text[: start_match.start()]
		@@ -502,75 +479,69 @@ def add_tagged_styles_and_formatting(doc: Doc):

		styled_run = paragraph.add_run(text_to_style)
		styled_run = apply_formatting_and_styling(styled_run, style_name)
		i += 1
		paragraph._p.insert(
		i+1, styled_run._element
		) # Insert the styled run after the current run

		if text_after_end.strip(): # Add any remaining text as another run
		paragraph.add_run(text_after_end)
		i += 1
		paragraph._p.insert(
		i+1, paragraph.runs[-1]._element
		)
		runs = (
		paragraph.runs
		) # Refresh runs now that a new run has been added

		i += 1
		continue

		if not has_start_tag and has_end_tag:
		# Separate text portions according to whether they should be styled or not
		text_before_end = text[: end_match.start()]
		text_after_end = text[end_match.end() :]
		# if not has_start_tag and has_end_tag:
		# # Separate text portions according to whether they should be styled or not
		# text_before_end = text[: end_match.start()]
		# text_after_end = text[end_match.end() :]

		# Keep track of the text to style and this run
		text_to_style += text_before_end
		style_runs.append(run)
		# # Keep track of the text to style and this run
		# text_to_style += text_before_end
		# style_runs.append(run)

		# Reset the old runs and create a new styled run with the accumulated text to style
		for style_run in style_runs:
		style_run.text = ""
		# # Reset the old runs and create a new styled run with the accumulated text to style
		# for style_run in style_runs:
		# style_run.text = ""

		styled_run = paragraph.add_run(text_to_style)
		styled_run = apply_formatting_and_styling(styled_run, active_style)
		# styled_run = paragraph.add_run(text_to_style)
		# styled_run = apply_formatting_and_styling(styled_run, active_style)

		if text_after_end.strip(): # Add any remaining text as another run
		paragraph.add_run(text_after_end)
		runs = (
		paragraph.runs
		) # Refresh runs now that a new run has been added
		# if text_after_end.strip(): # Add any remaining text as another run
		# paragraph.add_run(text_after_end)
		# runs = (
		# paragraph.runs
		# ) # Refresh runs now that a new run has been added

		# Prepare for other tags
		active_style = None
		style_runs = []
		text_to_style = ""
		# # Prepare for other tags
		# active_style = None
		# style_runs = []
		# text_to_style = ""

		i += 1
		continue
		# i += 1
		# continue

		if not active_style and has_start_tag and not has_end_tag:
		active_style = start_match.group(1)
		# if not active_style and has_start_tag and not has_end_tag:
		# active_style = start_match.group(1)

		# Separate text portions according to whether they should be styled or not
		text_before_start = text[: start_match.start()]
		text_after_start = text[start_match.end() :]
		# # Separate text portions according to whether they should be styled or not
		# text_before_start = text[: start_match.start()]
		# text_after_start = text[start_match.end() :]

		# Keep just the text before the the tag and start keeping track of the text to style
		run.text = text_before_start
		text_to_style += text_after_start
		# # Keep just the text before the the tag and start keeping track of the text to style
		# run.text = text_before_start
		# text_to_style += text_after_start

		style_runs.append(run)
		# style_runs.append(run)

		i += 1
		continue
		# i += 1
		# continue

		if active_style: # Inside a tag
		# Simply keep track of this run and its text
		text_to_style += text
		style_runs.append(run)
		# if active_style: # Inside a tag
		# # Simply keep track of this run and its text
		# text_to_style += text
		# style_runs.append(run)

		i += 1
		continue
		# i += 1
		# continue

		i += 1 # No tag here, so just go on to the next run

		@@ -582,6 +553,8 @@ def set_keep_with_next_false(doc: Doc):

		for paragraph in iter_paragraphs(doc):
		paragraph.paragraph_format.keep_with_next = False
		paragraph.paragraph_format.space_after = Pt(12)


		return doc

md_to_docx_converter/src/to_docx/preprocessing.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -432,6 +432,7 @@ def handle_examples_and_notes(soup: BeautifulSoup):

		grandchild.clear()
		grandchild.append(tagged_text)
		grandchild.unwrap()

		return soup

md_to_docx_converter/src/to_html/postprocessing.py

+13 −0

Original line number	Diff line number	Diff line
		@@ -638,6 +638,18 @@ def fix_custom_tags(soup: BeautifulSoup):
		a.string = a.string.replace("root", new_id_prefix)
		return soup

		def fix_lists(soup: BeautifulSoup):
		"""
		Fix lists that have been improperly nested due to markdown conversion.
		"""
		listitems = soup.find_all(["li"])

		for item in listitems:
		children = list(item.children)
		if len(children) == 1 and children[0].name in ["p"]:
		children[0].unwrap()

		return soup

		def extract_images_from_html(soup: BeautifulSoup) -> dict:
		"""
		@@ -818,6 +830,7 @@ def postprocess(html_dir: str):
		soup = replace_dash_characters(soup)
		soup = move_figure_id_to_FL_elements(soup)
		soup = fix_custom_tags(soup)
		soup = fix_lists(soup)

		images, soup = extract_images_from_html(soup)
		for image_id, image_src in images.items():