Loading md_to_docx_converter/src/to_html/postprocessing.py +33 −26 Original line number Diff line number Diff line Loading @@ -319,23 +319,34 @@ def add_links_to_references_in_text(soup): REF_REGEX_N = r"(?<!\[)\[(n\.[A-Za-z0-9]+)\]" def insert_link_with_reference( content, tag_contents, internal_match, is_informative content, is_informative ): if content.parent is None: return before_text = str(content).split(tag_contents)[0] after_text = str(content).split(tag_contents)[1] tag_contents = tag_contents.replace("[n.", "[") # normative can't have a prefix, this won't affex informative opening_bracket_index = content.find("[") closing_bracket_index = content.find("]") + 1 if opening_bracket_index > 0: before_text = content[:opening_bracket_index] else: before_text = "" if closing_bracket_index < len(content): after_text = content[closing_bracket_index:] else: after_text = "" internal_text = content[opening_bracket_index+1:closing_bracket_index-1] # prepare the new <a> tag link = ( f"{informative_file}.html#{internal_match}" f"{informative_file}.html#{internal_text}" if is_informative else f"{normative_file}.html#{internal_match}" else f"{normative_file}.html#{internal_text}" ) a = soup.new_tag("a", attrs={"href": link}) a.append(f"{tag_contents}") a.append(f"[{internal_text.replace('n.', '')}]") content.replace_with(a) # Add any remaining text after the <a> tag Loading @@ -345,25 +356,21 @@ def add_links_to_references_in_text(soup): def process_text_nodes(element): for content in list(element.contents): if isinstance(content, NavigableString): informative_match = re.search(REF_REGEX_I, str(content)) normative_match = re.search(REF_REGEX_N, str(content)) if informative_match: # get the content tag_contents = informative_match.group(0) internal_match = informative_match.group(1) split_content = content.split(" ") for part in split_content: element = NavigableString(part + " ") content.insert_before(element) if re.match(REF_REGEX_I, part): insert_link_with_reference( content, tag_contents, internal_match, is_informative=True element, is_informative=True ) if normative_match: # get the content tag_contents = normative_match.group(0) internal_match = normative_match.group(1) elif re.match(REF_REGEX_N, part): insert_link_with_reference( content, tag_contents, internal_match, is_informative=False element, is_informative=False ) content.extract() elif isinstance(content, Tag): elif isinstance(content, Tag) and not content.name in ["a", "code"]: process_text_nodes(content) for element in soup.find_all(["p", "div"]): Loading Loading
md_to_docx_converter/src/to_html/postprocessing.py +33 −26 Original line number Diff line number Diff line Loading @@ -319,23 +319,34 @@ def add_links_to_references_in_text(soup): REF_REGEX_N = r"(?<!\[)\[(n\.[A-Za-z0-9]+)\]" def insert_link_with_reference( content, tag_contents, internal_match, is_informative content, is_informative ): if content.parent is None: return before_text = str(content).split(tag_contents)[0] after_text = str(content).split(tag_contents)[1] tag_contents = tag_contents.replace("[n.", "[") # normative can't have a prefix, this won't affex informative opening_bracket_index = content.find("[") closing_bracket_index = content.find("]") + 1 if opening_bracket_index > 0: before_text = content[:opening_bracket_index] else: before_text = "" if closing_bracket_index < len(content): after_text = content[closing_bracket_index:] else: after_text = "" internal_text = content[opening_bracket_index+1:closing_bracket_index-1] # prepare the new <a> tag link = ( f"{informative_file}.html#{internal_match}" f"{informative_file}.html#{internal_text}" if is_informative else f"{normative_file}.html#{internal_match}" else f"{normative_file}.html#{internal_text}" ) a = soup.new_tag("a", attrs={"href": link}) a.append(f"{tag_contents}") a.append(f"[{internal_text.replace('n.', '')}]") content.replace_with(a) # Add any remaining text after the <a> tag Loading @@ -345,25 +356,21 @@ def add_links_to_references_in_text(soup): def process_text_nodes(element): for content in list(element.contents): if isinstance(content, NavigableString): informative_match = re.search(REF_REGEX_I, str(content)) normative_match = re.search(REF_REGEX_N, str(content)) if informative_match: # get the content tag_contents = informative_match.group(0) internal_match = informative_match.group(1) split_content = content.split(" ") for part in split_content: element = NavigableString(part + " ") content.insert_before(element) if re.match(REF_REGEX_I, part): insert_link_with_reference( content, tag_contents, internal_match, is_informative=True element, is_informative=True ) if normative_match: # get the content tag_contents = normative_match.group(0) internal_match = normative_match.group(1) elif re.match(REF_REGEX_N, part): insert_link_with_reference( content, tag_contents, internal_match, is_informative=False element, is_informative=False ) content.extract() elif isinstance(content, Tag): elif isinstance(content, Tag) and not content.name in ["a", "code"]: process_text_nodes(content) for element in soup.find_all(["p", "div"]): Loading