Commit b9a56316 authored by Marco Cavalli's avatar Marco Cavalli
Browse files

feat: capitalize words in links when needed

parent 20656421
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -131,7 +131,10 @@ function MultipleClauses(el)
            --this may be the start of a list of multiple clauses
            if elem.text:lower():find("clauses") then
                clausesFound = true
                newContent:insert(elem)
                -- Replace plain "clauses" text with a span containing it
                local clausesSpan = pandoc.Span({pandoc.Str(elem.text)})
                clausesSpan.classes = pandoc.List({"clauses-marker"})
                newContent:insert(clausesSpan)
                goto continue
            end

+40 −1
Original line number Diff line number Diff line
@@ -508,7 +508,6 @@ def fix_custom_tags(soup: BeautifulSoup):
                print(p_error(f"Error: No previous element found for '{a.string}'. There are not any figures/tables above this tag."))
                os._exit(1)
        elif href.find("#") != -1 and href.find("root") != -1 and notAnImage(href): # when root is used in md
            print(href)
            new_id_prefix = f"{h1_tag['id']}"
            a["href"] = href.replace("root", new_id_prefix)
            a.string = a.string.replace("root", new_id_prefix)
@@ -564,6 +563,45 @@ def add_custom_link_to_images(soup: BeautifulSoup, images_mapping: dict) -> Beau

    return soup

def fix_capitalization_in_links(soup: BeautifulSoup) -> BeautifulSoup:
    """
    Ensures that the capitalization in the link text matches the capitalization in the href attribute.
    """
    a_tags = soup.find_all("a")
    span_clauses_tags = soup.find_all("span", class_="clauses-marker")
    for a in a_tags:
        text = a.get_text()
        if not text:
            continue

        if not text.startswith(("figure", "table", "clause", "annex")):
            continue
        
        # First case: it is the first word in a sentence
        if a.parent and a.parent.contents[0] == a:
            capitalized_text = text.capitalize()
            a.string = capitalized_text

        # Second case: it is after a period
        elif a.previous_sibling and isinstance(a.previous_sibling, NavigableString):
            prev_text = a.previous_sibling.strip()
            if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"):
                capitalized_text = text.capitalize()
                a.string = capitalized_text
    for span in span_clauses_tags:
        text = span.get_text()
        if not text:
            continue
        if span.parent and span.parent.contents[0] == span:
            capitalized_text = text.capitalize()
            span.string = capitalized_text
        elif span.previous_sibling and isinstance(span.previous_sibling, NavigableString):
            prev_text = span.previous_sibling.strip()
            if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"):
                capitalized_text = text.capitalize()
                span.string = capitalized_text
    return soup


def postprocess(html_dir: str):
    """
@@ -637,6 +675,7 @@ def postprocess(html_dir: str):
            
            try:
                soup = add_custom_link_to_images(soup, images_mapping)
                soup = fix_capitalization_in_links(soup)
            except ValueError as e:
                print(p_error(f"Error in file {filename}:"))
                print(p_error(str(e)))