Commit 8dd82672 authored by Marco Cavalli's avatar Marco Cavalli
Browse files

fix: tags all references when th more occurrences in the same paragraph

parent cfd1e5d3
Loading
Loading
Loading
Loading
+33 −26
Original line number Diff line number Diff line
@@ -319,23 +319,34 @@ def add_links_to_references_in_text(soup):
    REF_REGEX_N = r"(?<!\[)\[(n\.[A-Za-z0-9]+)\]"

    def insert_link_with_reference(
        content, tag_contents, internal_match, is_informative
        content, is_informative
    ):
        if content.parent is None:
            return

        before_text = str(content).split(tag_contents)[0]
        after_text = str(content).split(tag_contents)[1]
        tag_contents = tag_contents.replace("[n.", "[") # normative can't have a prefix, this won't affex informative
        opening_bracket_index = content.find("[")
        closing_bracket_index = content.find("]") + 1

        if opening_bracket_index > 0:
            before_text = content[:opening_bracket_index]
        else:
            before_text = ""
        
        if closing_bracket_index < len(content):
            after_text = content[closing_bracket_index:]
        else:
            after_text = ""
        
        internal_text = content[opening_bracket_index+1:closing_bracket_index-1]

        # prepare the new <a> tag
        link = (
            f"{informative_file}.html#{internal_match}"
            f"{informative_file}.html#{internal_text}"
            if is_informative
            else f"{normative_file}.html#{internal_match}"
            else f"{normative_file}.html#{internal_text}"
        )
        a = soup.new_tag("a", attrs={"href": link})
        a.append(f"{tag_contents}")
        a.append(f"[{internal_text.replace('n.', '')}]")
        content.replace_with(a)

        # Add any remaining text after the <a> tag
@@ -345,25 +356,21 @@ def add_links_to_references_in_text(soup):
    def process_text_nodes(element):
        for content in list(element.contents):
            if isinstance(content, NavigableString):
                informative_match = re.search(REF_REGEX_I, str(content))
                normative_match = re.search(REF_REGEX_N, str(content))
                if informative_match:
                    # get the content
                    tag_contents = informative_match.group(0)
                    internal_match = informative_match.group(1)
                split_content = content.split(" ")
                for part in split_content:
                    element = NavigableString(part + " ")
                    content.insert_before(element)
                    if re.match(REF_REGEX_I, part):
                        insert_link_with_reference(
                        content, tag_contents, internal_match, is_informative=True
                            element, is_informative=True
                        )

                if normative_match:
                    # get the content
                    tag_contents = normative_match.group(0)
                    internal_match = normative_match.group(1)
                    elif re.match(REF_REGEX_N, part):
                        insert_link_with_reference(
                        content, tag_contents, internal_match, is_informative=False
                            element, is_informative=False
                        )
                content.extract()

            elif isinstance(content, Tag):
            elif isinstance(content, Tag) and not content.name in ["a", "code"]:
                process_text_nodes(content)

    for element in soup.find_all(["p", "div"]):