feat: capitalize words in links when needed (b9a56316) · Commits · CIM - Context Information Management / NGSI-LD API

md_to_docx_converter/md_to_html_2.lua

+4 −1

Original line number	Diff line number	Diff line
		@@ -131,7 +131,10 @@ function MultipleClauses(el)
		--this may be the start of a list of multiple clauses
		if elem.text:lower():find("clauses") then
		clausesFound = true
		newContent:insert(elem)
		-- Replace plain "clauses" text with a span containing it
		local clausesSpan = pandoc.Span({pandoc.Str(elem.text)})
		clausesSpan.classes = pandoc.List({"clauses-marker"})
		newContent:insert(clausesSpan)
		goto continue
		end

md_to_docx_converter/src/to_html/postprocessing.py

+40 −1

Original line number	Diff line number	Diff line
		@@ -508,7 +508,6 @@ def fix_custom_tags(soup: BeautifulSoup):
		print(p_error(f"Error: No previous element found for '{a.string}'. There are not any figures/tables above this tag."))
		os._exit(1)
		elif href.find("#") != -1 and href.find("root") != -1 and notAnImage(href): # when root is used in md
		print(href)
		new_id_prefix = f"{h1_tag['id']}"
		a["href"] = href.replace("root", new_id_prefix)
		a.string = a.string.replace("root", new_id_prefix)
		@@ -564,6 +563,45 @@ def add_custom_link_to_images(soup: BeautifulSoup, images_mapping: dict) -> Beau

		return soup

		def fix_capitalization_in_links(soup: BeautifulSoup) -> BeautifulSoup:
		"""
		Ensures that the capitalization in the link text matches the capitalization in the href attribute.
		"""
		a_tags = soup.find_all("a")
		span_clauses_tags = soup.find_all("span", class_="clauses-marker")
		for a in a_tags:
		text = a.get_text()
		if not text:
		continue

		if not text.startswith(("figure", "table", "clause", "annex")):
		continue

		# First case: it is the first word in a sentence
		if a.parent and a.parent.contents[0] == a:
		capitalized_text = text.capitalize()
		a.string = capitalized_text

		# Second case: it is after a period
		elif a.previous_sibling and isinstance(a.previous_sibling, NavigableString):
		prev_text = a.previous_sibling.strip()
		if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"):
		capitalized_text = text.capitalize()
		a.string = capitalized_text
		for span in span_clauses_tags:
		text = span.get_text()
		if not text:
		continue
		if span.parent and span.parent.contents[0] == span:
		capitalized_text = text.capitalize()
		span.string = capitalized_text
		elif span.previous_sibling and isinstance(span.previous_sibling, NavigableString):
		prev_text = span.previous_sibling.strip()
		if prev_text.endswith(".") or prev_text.endswith("!") or prev_text.endswith("?"):
		capitalized_text = text.capitalize()
		span.string = capitalized_text
		return soup


		def postprocess(html_dir: str):
		"""
		@@ -637,6 +675,7 @@ def postprocess(html_dir: str):

		try:
		soup = add_custom_link_to_images(soup, images_mapping)
		soup = fix_capitalization_in_links(soup)
		except ValueError as e:
		print(p_error(f"Error in file {filename}:"))
		print(p_error(str(e)))