Loading md_to_docx_converter/customCSS.css +4 −20 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ body { } #TOC a.diff-changes-enable { position: relative; /* position: relative; */ overflow: visible; display: block; background-color: red; Loading @@ -76,7 +76,7 @@ body { color: white; } #TOC a.diff-changes-enable:hover::before { /* #TOC a.diff-changes-enable:hover::before { content: "Total Changes: " attr(data-diff-count); position: absolute; left: 2%; Loading @@ -91,7 +91,7 @@ body { white-space: nowrap; z-index: 1000; box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2); } } */ #TOC h1 { background-color: #02488d; Loading Loading @@ -2403,19 +2403,3 @@ ins:has(>li), del:has(>li) { display: block; } No newline at end of file /* Rule to show deleted lines as strikethrough with a red dashed underline in simple diff mode div[data-original-content]>del.diff-view-simple { display: block !important; color: transparent !important; border-left: 3px solid #b00020 !important; padding-left: 2px !important; min-height: 1.2em !important; position: relative !important; background: repeating-linear-gradient(to right, #b00020 0, #b00020 8px, transparent 8px, transparent 12px) center/100% 2px no-repeat !important; } */ No newline at end of file md_to_docx_converter/html_diff.py +46 −30 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ from bs4 import ( MarkupResemblesLocatorWarning, ) import warnings import html warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) Loading @@ -17,24 +18,34 @@ warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) def clone_element(elem, soup): """ Clones an element preserving whitespace and structure. More reliable than serializing and re-parsing. Recursively copies without serializing/parsing to avoid entity issues. """ if isinstance(elem, NavigableString): if isinstance(elem, Comment): return Comment(str(elem)) # Create a new string in the target soup return soup.new_string(str(elem)) # For Tag elements, serialize with formatter=None to preserve all whitespace # then re-parse with html.parser elem_str = elem.decode(formatter=None) cloned = BeautifulSoup(elem_str, "html.parser").contents[0] return cloned # For Tag elements, create a new tag and recursively copy children if isinstance(elem, Tag): new_tag = soup.new_tag(elem.name) # Copy attributes for key, value in elem.attrs.items(): new_tag[key] = value # Recursively copy children for child in elem.children: new_child = clone_element(child, soup) new_tag.append(new_child) return new_tag # Fallback to string representation return soup.new_string(str(elem)) def diff_text_content(old_text: str, new_text: str) -> str: """Diffs two text strings and returns HTML with track changes.""" def diff_text_content(old_text: str, new_text: str, soup): """Diffs two text strings and returns list of BeautifulSoup elements with track changes.""" if not old_text and not new_text: return "" return [] dmp = diff_match_patch() dmp.Diff_Timeout = 0 Loading @@ -45,13 +56,19 @@ def diff_text_content(old_text: str, new_text: str) -> str: result = [] for op, text in diffs: if op == -1: # Deletion result.append(f"<del class='diff-view-all'>{text}</del>") del_tag = soup.new_tag("del") del_tag["class"] = ["diff-view-all"] del_tag.string = text result.append(del_tag) elif op == 1: # Insertion result.append(f"<ins class='diff-view-all'>{text}</ins>") ins_tag = soup.new_tag("ins") ins_tag["class"] = ["diff-view-all"] ins_tag.string = text result.append(ins_tag) else: # No change result.append(text) result.append(soup.new_string(text)) return "".join(result) return result def is_whitespace_only(elem): Loading Loading @@ -551,28 +568,22 @@ def diff_elements(old_elem, new_elem, parent_result, soup): parent_result.append(soup.new_string(preserved_text)) return diff_html = diff_text_content(old_text, new_text) if diff_html: # Parse the diff result diff_soup = BeautifulSoup(diff_html, "html.parser") for child in diff_soup.children: if isinstance(child, NavigableString): text = str(child) diff_elements_list = diff_text_content(old_text, new_text, soup) if diff_elements_list: for elem in diff_elements_list: if isinstance(elem, NavigableString): text = str(elem) # If inside sourceCode, preserve spaces with non-breaking spaces if in_source_code: text = text.replace(" ", "\u00a0") parent_result.append(soup.new_string(text)) else: # Create a new tag in the result soup instead of reusing the parsed one new_tag = soup.new_tag(child.name) if child.string: text = child.string # elem is already a proper BeautifulSoup tag if elem.string and in_source_code: # If inside sourceCode, preserve spaces with non-breaking spaces if in_source_code: text = text.replace(" ", "\u00a0") new_tag.string = text parent_result.append(new_tag) text = elem.string.replace(" ", "\u00a0") elem.string = text parent_result.append(elem) return # One is a tag, the other is a string: they are different Loading Loading @@ -774,6 +785,10 @@ def make_trackchanges_diff( except Exception as e: new_html = "<html><body><div id='editor'></div></body></html>" if "annex-c" in out_path.lower(): print(f"Processing Annex C: {out_path}") print(f"Content: {new_html}") # Print the beginning of the content for debugging # Parse with BeautifulSoup old_soup = BeautifulSoup(old_html, "html.parser") new_soup = BeautifulSoup(new_html, "html.parser") Loading Loading @@ -866,7 +881,8 @@ def make_trackchanges_diff( if "diff-view-all" not in row.get("class", []): row["class"].append("diff-view-all") Path(out_path).write_text(str(result_soup), encoding="utf-8") # Use formatter="html" to properly handle HTML entities Path(out_path).write_text(result_soup.decode(formatter="html"), encoding="utf-8") # print(f"Created diff: {out_path}") return total_changes Loading Loading
md_to_docx_converter/customCSS.css +4 −20 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ body { } #TOC a.diff-changes-enable { position: relative; /* position: relative; */ overflow: visible; display: block; background-color: red; Loading @@ -76,7 +76,7 @@ body { color: white; } #TOC a.diff-changes-enable:hover::before { /* #TOC a.diff-changes-enable:hover::before { content: "Total Changes: " attr(data-diff-count); position: absolute; left: 2%; Loading @@ -91,7 +91,7 @@ body { white-space: nowrap; z-index: 1000; box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2); } } */ #TOC h1 { background-color: #02488d; Loading Loading @@ -2403,19 +2403,3 @@ ins:has(>li), del:has(>li) { display: block; } No newline at end of file /* Rule to show deleted lines as strikethrough with a red dashed underline in simple diff mode div[data-original-content]>del.diff-view-simple { display: block !important; color: transparent !important; border-left: 3px solid #b00020 !important; padding-left: 2px !important; min-height: 1.2em !important; position: relative !important; background: repeating-linear-gradient(to right, #b00020 0, #b00020 8px, transparent 8px, transparent 12px) center/100% 2px no-repeat !important; } */ No newline at end of file
md_to_docx_converter/html_diff.py +46 −30 Original line number Diff line number Diff line Loading @@ -10,6 +10,7 @@ from bs4 import ( MarkupResemblesLocatorWarning, ) import warnings import html warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) Loading @@ -17,24 +18,34 @@ warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) def clone_element(elem, soup): """ Clones an element preserving whitespace and structure. More reliable than serializing and re-parsing. Recursively copies without serializing/parsing to avoid entity issues. """ if isinstance(elem, NavigableString): if isinstance(elem, Comment): return Comment(str(elem)) # Create a new string in the target soup return soup.new_string(str(elem)) # For Tag elements, serialize with formatter=None to preserve all whitespace # then re-parse with html.parser elem_str = elem.decode(formatter=None) cloned = BeautifulSoup(elem_str, "html.parser").contents[0] return cloned # For Tag elements, create a new tag and recursively copy children if isinstance(elem, Tag): new_tag = soup.new_tag(elem.name) # Copy attributes for key, value in elem.attrs.items(): new_tag[key] = value # Recursively copy children for child in elem.children: new_child = clone_element(child, soup) new_tag.append(new_child) return new_tag # Fallback to string representation return soup.new_string(str(elem)) def diff_text_content(old_text: str, new_text: str) -> str: """Diffs two text strings and returns HTML with track changes.""" def diff_text_content(old_text: str, new_text: str, soup): """Diffs two text strings and returns list of BeautifulSoup elements with track changes.""" if not old_text and not new_text: return "" return [] dmp = diff_match_patch() dmp.Diff_Timeout = 0 Loading @@ -45,13 +56,19 @@ def diff_text_content(old_text: str, new_text: str) -> str: result = [] for op, text in diffs: if op == -1: # Deletion result.append(f"<del class='diff-view-all'>{text}</del>") del_tag = soup.new_tag("del") del_tag["class"] = ["diff-view-all"] del_tag.string = text result.append(del_tag) elif op == 1: # Insertion result.append(f"<ins class='diff-view-all'>{text}</ins>") ins_tag = soup.new_tag("ins") ins_tag["class"] = ["diff-view-all"] ins_tag.string = text result.append(ins_tag) else: # No change result.append(text) result.append(soup.new_string(text)) return "".join(result) return result def is_whitespace_only(elem): Loading Loading @@ -551,28 +568,22 @@ def diff_elements(old_elem, new_elem, parent_result, soup): parent_result.append(soup.new_string(preserved_text)) return diff_html = diff_text_content(old_text, new_text) if diff_html: # Parse the diff result diff_soup = BeautifulSoup(diff_html, "html.parser") for child in diff_soup.children: if isinstance(child, NavigableString): text = str(child) diff_elements_list = diff_text_content(old_text, new_text, soup) if diff_elements_list: for elem in diff_elements_list: if isinstance(elem, NavigableString): text = str(elem) # If inside sourceCode, preserve spaces with non-breaking spaces if in_source_code: text = text.replace(" ", "\u00a0") parent_result.append(soup.new_string(text)) else: # Create a new tag in the result soup instead of reusing the parsed one new_tag = soup.new_tag(child.name) if child.string: text = child.string # elem is already a proper BeautifulSoup tag if elem.string and in_source_code: # If inside sourceCode, preserve spaces with non-breaking spaces if in_source_code: text = text.replace(" ", "\u00a0") new_tag.string = text parent_result.append(new_tag) text = elem.string.replace(" ", "\u00a0") elem.string = text parent_result.append(elem) return # One is a tag, the other is a string: they are different Loading Loading @@ -774,6 +785,10 @@ def make_trackchanges_diff( except Exception as e: new_html = "<html><body><div id='editor'></div></body></html>" if "annex-c" in out_path.lower(): print(f"Processing Annex C: {out_path}") print(f"Content: {new_html}") # Print the beginning of the content for debugging # Parse with BeautifulSoup old_soup = BeautifulSoup(old_html, "html.parser") new_soup = BeautifulSoup(new_html, "html.parser") Loading Loading @@ -866,7 +881,8 @@ def make_trackchanges_diff( if "diff-view-all" not in row.get("class", []): row["class"].append("diff-view-all") Path(out_path).write_text(str(result_soup), encoding="utf-8") # Use formatter="html" to properly handle HTML entities Path(out_path).write_text(result_soup.decode(formatter="html"), encoding="utf-8") # print(f"Created diff: {out_path}") return total_changes Loading