Removing html tags from clause titles and anchors (9caf2dbd) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

toMkdocs/toMkdocs.py

+6 −0

Original line number	Diff line number	Diff line
		@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
		_markdownLink = re.compile(r'[^!]\[[^\]]\]\((#[^)])\)', re.IGNORECASE)
		_htmlLink = re.compile(r'<a\s+href="([^"\'])">[^<]</a>', re.IGNORECASE)
		_htmlAnchorLink = re.compile(r'<a\s+name="([^"])">[^<]</a>', re.IGNORECASE)
		_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
		_matchNoteStart = re.compile(r'^\s>\s(note)?\s[:]?\s', re.IGNORECASE)


		@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
		if (m := _matchHeader.match(line)):
		# Add a new clause
		clauseTitle = m.groups()[1].strip()
		clauseTitle = re.sub(_htmlTag, '', clauseTitle)
		headerNumber = _matchHeaderNumber.search(clauseTitle)
		outClauses.append(Clause(len(m.groups()[0]), # level
		headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
		@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
		# Find all headers in the clause
		for line in clause.lines:
		if (m := _matchHeader.match(line.text)):

		# convert the header to anchor format and add it to the dictionary
		# Remove special characters
		# TODO move perhaps to an own function
		anchor = m.groups()[1].strip().casefold().replace(' ', '-')
		for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
		anchor = anchor.replace(c, '')
		# remove html tags from the anchor
		anchor = re.sub(_htmlTag, '', anchor)

		linkTargets[f'#{anchor}'] = clause
		if veryVerbose:
		print(f'[dim]Added Markdown anchor "{anchor}"')