Loading md_to_docx_converter/src/to_html/postprocessing.py +28 −2 Original line number Diff line number Diff line Loading @@ -496,6 +496,21 @@ def fix_custom_tags(soup: BeautifulSoup): image_extensions = [".png", ".jpg", ".jpeg", ".svg"] return not any(href.endswith(ext) for ext in image_extensions) def remove_trailing_punctuation(a: Tag, needle: str) -> str: if not a["href"].endswith(needle): # find index of last occurrence of needle index = a["href"].rfind(needle) if index != -1 and index + len(needle) < len(a["href"]): remaining_text = a["href"][index + len(needle):] new_href = a["href"][:index + len(needle)] # a["href"] = href string_index = a.string.rfind(needle) new_a_text = a.string[:string_index + len(needle)] # a.string = a.string[:string_index + len(needle)] a.insert_after(NavigableString(remaining_text)) return new_href, new_a_text return None, None # Example: Change <custom-tag> to <div class="custom-tag"> h1_tag = soup.find("h1", id=True) Loading @@ -503,7 +518,13 @@ def fix_custom_tags(soup: BeautifulSoup): for a in a_tags: href = a.get("href", "") if href.endswith("below"): if href.find("+++below") != -1: new_href, new_a_text = remove_trailing_punctuation(a, "+++below") if new_href: href = new_href if new_a_text: a.string = new_a_text count_below = href.count("+++below") is_table = "Table" in href class_name = "TH" if is_table else "FL" Loading @@ -528,7 +549,12 @@ def fix_custom_tags(soup: BeautifulSoup): ) ) os._exit(1) elif href.endswith("above"): elif href.find("+++above") != -1: new_href, new_a_text = remove_trailing_punctuation(a, "+++above") if new_href: href = new_href if new_a_text: a.string = new_a_text count_above = href.count("+++above") is_table = "Table" in href class_name = "TH" if is_table else "FL" Loading Loading
md_to_docx_converter/src/to_html/postprocessing.py +28 −2 Original line number Diff line number Diff line Loading @@ -496,6 +496,21 @@ def fix_custom_tags(soup: BeautifulSoup): image_extensions = [".png", ".jpg", ".jpeg", ".svg"] return not any(href.endswith(ext) for ext in image_extensions) def remove_trailing_punctuation(a: Tag, needle: str) -> str: if not a["href"].endswith(needle): # find index of last occurrence of needle index = a["href"].rfind(needle) if index != -1 and index + len(needle) < len(a["href"]): remaining_text = a["href"][index + len(needle):] new_href = a["href"][:index + len(needle)] # a["href"] = href string_index = a.string.rfind(needle) new_a_text = a.string[:string_index + len(needle)] # a.string = a.string[:string_index + len(needle)] a.insert_after(NavigableString(remaining_text)) return new_href, new_a_text return None, None # Example: Change <custom-tag> to <div class="custom-tag"> h1_tag = soup.find("h1", id=True) Loading @@ -503,7 +518,13 @@ def fix_custom_tags(soup: BeautifulSoup): for a in a_tags: href = a.get("href", "") if href.endswith("below"): if href.find("+++below") != -1: new_href, new_a_text = remove_trailing_punctuation(a, "+++below") if new_href: href = new_href if new_a_text: a.string = new_a_text count_below = href.count("+++below") is_table = "Table" in href class_name = "TH" if is_table else "FL" Loading @@ -528,7 +549,12 @@ def fix_custom_tags(soup: BeautifulSoup): ) ) os._exit(1) elif href.endswith("above"): elif href.find("+++above") != -1: new_href, new_a_text = remove_trailing_punctuation(a, "+++above") if new_href: href = new_href if new_a_text: a.string = new_a_text count_above = href.count("+++above") is_table = "Table" in href class_name = "TH" if is_table else "FL" Loading