feat: improve HTML diff handling with enhanced element cloning and output formatting (2fbaa0a9) · Commits · CIM - Context Information Management / NGSI-LD API

md_to_docx_converter/customCSS.css

+4 −20

Original line number	Diff line number	Diff line
		@@ -54,7 +54,7 @@ body {
		}

		#TOC a.diff-changes-enable {
		position: relative;
		/* position: relative; */
		overflow: visible;
		display: block;
		background-color: red;
		@@ -76,7 +76,7 @@ body {
		color: white;
		}

		#TOC a.diff-changes-enable:hover::before {
		/* #TOC a.diff-changes-enable:hover::before {
		content: "Total Changes: " attr(data-diff-count);
		position: absolute;
		left: 2%;
		@@ -91,7 +91,7 @@ body {
		white-space: nowrap;
		z-index: 1000;
		box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2);
		}
		} */

		#TOC h1 {
		background-color: #02488d;
		@@ -2403,19 +2403,3 @@ ins:has(>li),
		del:has(>li) {
		display: block;
		}
		No newline at end of file

		/*
		Rule to show deleted lines as strikethrough with a red dashed underline in simple diff mode
		div[data-original-content]>del.diff-view-simple {
		display: block !important;
		color: transparent !important;
		border-left: 3px solid #b00020 !important;
		padding-left: 2px !important;
		min-height: 1.2em !important;
		position: relative !important;
		background: repeating-linear-gradient(to right,
		#b00020 0,
		#b00020 8px,
		transparent 8px,
		transparent 12px) center/100% 2px no-repeat !important;
		} */
		No newline at end of file

md_to_docx_converter/html_diff.py

+46 −30

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@ from bs4 import (
		MarkupResemblesLocatorWarning,
		)
		import warnings
		import html

		warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)

		@@ -17,24 +18,34 @@ warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
		def clone_element(elem, soup):
		"""
		Clones an element preserving whitespace and structure.
		More reliable than serializing and re-parsing.
		Recursively copies without serializing/parsing to avoid entity issues.
		"""
		if isinstance(elem, NavigableString):
		if isinstance(elem, Comment):
		return Comment(str(elem))
		# Create a new string in the target soup
		return soup.new_string(str(elem))

		# For Tag elements, serialize with formatter=None to preserve all whitespace
		# then re-parse with html.parser
		elem_str = elem.decode(formatter=None)
		cloned = BeautifulSoup(elem_str, "html.parser").contents[0]
		return cloned
		# For Tag elements, create a new tag and recursively copy children
		if isinstance(elem, Tag):
		new_tag = soup.new_tag(elem.name)
		# Copy attributes
		for key, value in elem.attrs.items():
		new_tag[key] = value
		# Recursively copy children
		for child in elem.children:
		new_child = clone_element(child, soup)
		new_tag.append(new_child)
		return new_tag

		# Fallback to string representation
		return soup.new_string(str(elem))


		def diff_text_content(old_text: str, new_text: str) -> str:
		"""Diffs two text strings and returns HTML with track changes."""
		def diff_text_content(old_text: str, new_text: str, soup):
		"""Diffs two text strings and returns list of BeautifulSoup elements with track changes."""
		if not old_text and not new_text:
		return ""
		return []

		dmp = diff_match_patch()
		dmp.Diff_Timeout = 0
		@@ -45,13 +56,19 @@ def diff_text_content(old_text: str, new_text: str) -> str:
		result = []
		for op, text in diffs:
		if op == -1: # Deletion
		result.append(f"<del class='diff-view-all'>{text}</del>")
		del_tag = soup.new_tag("del")
		del_tag["class"] = ["diff-view-all"]
		del_tag.string = text
		result.append(del_tag)
		elif op == 1: # Insertion
		result.append(f"<ins class='diff-view-all'>{text}</ins>")
		ins_tag = soup.new_tag("ins")
		ins_tag["class"] = ["diff-view-all"]
		ins_tag.string = text
		result.append(ins_tag)
		else: # No change
		result.append(text)
		result.append(soup.new_string(text))

		return "".join(result)
		return result


		def is_whitespace_only(elem):
		@@ -551,28 +568,22 @@ def diff_elements(old_elem, new_elem, parent_result, soup):
		parent_result.append(soup.new_string(preserved_text))
		return

		diff_html = diff_text_content(old_text, new_text)
		if diff_html:
		# Parse the diff result
		diff_soup = BeautifulSoup(diff_html, "html.parser")

		for child in diff_soup.children:
		if isinstance(child, NavigableString):
		text = str(child)
		diff_elements_list = diff_text_content(old_text, new_text, soup)
		if diff_elements_list:
		for elem in diff_elements_list:
		if isinstance(elem, NavigableString):
		text = str(elem)
		# If inside sourceCode, preserve spaces with non-breaking spaces
		if in_source_code:
		text = text.replace(" ", "\u00a0")
		parent_result.append(soup.new_string(text))
		else:
		# Create a new tag in the result soup instead of reusing the parsed one
		new_tag = soup.new_tag(child.name)
		if child.string:
		text = child.string
		# elem is already a proper BeautifulSoup tag
		if elem.string and in_source_code:
		# If inside sourceCode, preserve spaces with non-breaking spaces
		if in_source_code:
		text = text.replace(" ", "\u00a0")
		new_tag.string = text
		parent_result.append(new_tag)
		text = elem.string.replace(" ", "\u00a0")
		elem.string = text
		parent_result.append(elem)
		return

		# One is a tag, the other is a string: they are different
		@@ -774,6 +785,10 @@ def make_trackchanges_diff(
		except Exception as e:
		new_html = "<html><body><div id='editor'></div></body></html>"

		if "annex-c" in out_path.lower():
		print(f"Processing Annex C: {out_path}")
		print(f"Content: {new_html}") # Print the beginning of the content for debugging

		# Parse with BeautifulSoup
		old_soup = BeautifulSoup(old_html, "html.parser")
		new_soup = BeautifulSoup(new_html, "html.parser")
		@@ -866,7 +881,8 @@ def make_trackchanges_diff(
		if "diff-view-all" not in row.get("class", []):
		row["class"].append("diff-view-all")

		Path(out_path).write_text(str(result_soup), encoding="utf-8")
		# Use formatter="html" to properly handle HTML entities
		Path(out_path).write_text(result_soup.decode(formatter="html"), encoding="utf-8")
		# print(f"Created diff: {out_path}")
		return total_changes