fix: prevent pandoc to html encode < and > in json blocks (efaf5545) · Commits · CIM - Context Information Management / NGSI-LD API

md_to_docx_converter/src/to_html/postprocessing.py

+12 −4

Original line number	Diff line number	Diff line
		@@ -73,9 +73,17 @@ def unwrap_gt_lt_code_tags(soup: BeautifulSoup):
		During preprocessing, sections of text marked by a beginning `<` and an ending `>` needed to be enclosed in code blocks for Pandoc to preserve the text.
		"""
		# codes = soup.find_all("code", lambda tag: tag.parent and tag.parent.name != "pre")
		codes = soup.select("code:not(pre > code):not(em > code)")
		codes = soup.select("code:not(em > code)")

		for code in codes:
		if code.parent and code.parent.name == "pre":
		span_text_only_children = code.find_all(
		lambda tag: isinstance(tag, Tag) and tag.name == "span" and len(tag.contents) == 1 and isinstance(tag.contents[0], NavigableString)
		)
		for child in span_text_only_children:
		text = NavigableString(html.unescape(child.get_text()))
		child.contents[0].replace_with(text)
		else:
		text = NavigableString(html.unescape(code.get_text()))
		code.insert_before(text)
		code.decompose()