Loading md_to_docx_converter/md_to_html_2.lua +4 −1 Original line number Diff line number Diff line Loading @@ -131,7 +131,10 @@ function MultipleClauses(el) --this may be the start of a list of multiple clauses if elem.text:lower():find("clauses") then clausesFound = true newContent:insert(elem) -- Replace plain "clauses" text with a span containing it local clausesSpan = pandoc.Span({pandoc.Str(elem.text)}) clausesSpan.classes = pandoc.List({"clauses-marker"}) newContent:insert(clausesSpan) goto continue end Loading md_to_docx_converter/src/to_html/postprocessing.py +40 −1 Original line number Diff line number Diff line Loading @@ -508,7 +508,6 @@ def fix_custom_tags(soup: BeautifulSoup): print(p_error(f"Error: No previous element found for '{a.string}'. There are not any figures/tables above this tag.")) os._exit(1) elif href.find("#") != -1 and href.find("root") != -1 and notAnImage(href): # when root is used in md print(href) new_id_prefix = f"{h1_tag['id']}" a["href"] = href.replace("root", new_id_prefix) a.string = a.string.replace("root", new_id_prefix) Loading Loading @@ -564,6 +563,45 @@ def add_custom_link_to_images(soup: BeautifulSoup, images_mapping: dict) -> Beau return soup def fix_capitalization_in_links(soup: BeautifulSoup) -> BeautifulSoup: """ Ensures that the capitalization in the link text matches the capitalization in the href attribute. """ a_tags = soup.find_all("a") span_clauses_tags = soup.find_all("span", class_="clauses-marker") for a in a_tags: text = a.get_text() if not text: continue if not text.startswith(("figure", "table", "clause", "annex")): continue # First case: it is the first word in a sentence if a.parent and a.parent.contents[0] == a: capitalized_text = text.capitalize() a.string = capitalized_text # Second case: it is after a period elif a.previous_sibling and isinstance(a.previous_sibling, NavigableString): prev_text = a.previous_sibling.strip() if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"): capitalized_text = text.capitalize() a.string = capitalized_text for span in span_clauses_tags: text = span.get_text() if not text: continue if span.parent and span.parent.contents[0] == span: capitalized_text = text.capitalize() span.string = capitalized_text elif span.previous_sibling and isinstance(span.previous_sibling, NavigableString): prev_text = span.previous_sibling.strip() if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"): capitalized_text = text.capitalize() span.string = capitalized_text return soup def postprocess(html_dir: str): """ Loading Loading @@ -637,6 +675,7 @@ def postprocess(html_dir: str): try: soup = add_custom_link_to_images(soup, images_mapping) soup = fix_capitalization_in_links(soup) except ValueError as e: print(p_error(f"Error in file {filename}:")) print(p_error(str(e))) Loading Loading
md_to_docx_converter/md_to_html_2.lua +4 −1 Original line number Diff line number Diff line Loading @@ -131,7 +131,10 @@ function MultipleClauses(el) --this may be the start of a list of multiple clauses if elem.text:lower():find("clauses") then clausesFound = true newContent:insert(elem) -- Replace plain "clauses" text with a span containing it local clausesSpan = pandoc.Span({pandoc.Str(elem.text)}) clausesSpan.classes = pandoc.List({"clauses-marker"}) newContent:insert(clausesSpan) goto continue end Loading
md_to_docx_converter/src/to_html/postprocessing.py +40 −1 Original line number Diff line number Diff line Loading @@ -508,7 +508,6 @@ def fix_custom_tags(soup: BeautifulSoup): print(p_error(f"Error: No previous element found for '{a.string}'. There are not any figures/tables above this tag.")) os._exit(1) elif href.find("#") != -1 and href.find("root") != -1 and notAnImage(href): # when root is used in md print(href) new_id_prefix = f"{h1_tag['id']}" a["href"] = href.replace("root", new_id_prefix) a.string = a.string.replace("root", new_id_prefix) Loading Loading @@ -564,6 +563,45 @@ def add_custom_link_to_images(soup: BeautifulSoup, images_mapping: dict) -> Beau return soup def fix_capitalization_in_links(soup: BeautifulSoup) -> BeautifulSoup: """ Ensures that the capitalization in the link text matches the capitalization in the href attribute. """ a_tags = soup.find_all("a") span_clauses_tags = soup.find_all("span", class_="clauses-marker") for a in a_tags: text = a.get_text() if not text: continue if not text.startswith(("figure", "table", "clause", "annex")): continue # First case: it is the first word in a sentence if a.parent and a.parent.contents[0] == a: capitalized_text = text.capitalize() a.string = capitalized_text # Second case: it is after a period elif a.previous_sibling and isinstance(a.previous_sibling, NavigableString): prev_text = a.previous_sibling.strip() if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"): capitalized_text = text.capitalize() a.string = capitalized_text for span in span_clauses_tags: text = span.get_text() if not text: continue if span.parent and span.parent.contents[0] == span: capitalized_text = text.capitalize() span.string = capitalized_text elif span.previous_sibling and isinstance(span.previous_sibling, NavigableString): prev_text = span.previous_sibling.strip() if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"): capitalized_text = text.capitalize() span.string = capitalized_text return soup def postprocess(html_dir: str): """ Loading Loading @@ -637,6 +675,7 @@ def postprocess(html_dir: str): try: soup = add_custom_link_to_images(soup, images_mapping) soup = fix_capitalization_in_links(soup) except ValueError as e: print(p_error(f"Error in file {filename}:")) print(p_error(str(e))) Loading