Commit 9caf2dbd authored by ankraft's avatar ankraft
Browse files

Removing html tags from clause titles and anchors

parent 001d4d9e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
_matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)


@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
		if (m := _matchHeader.match(line)):
			# Add a new clause
			clauseTitle = m.groups()[1].strip()
			clauseTitle = re.sub(_htmlTag, '', clauseTitle)
			headerNumber = _matchHeaderNumber.search(clauseTitle)
			outClauses.append(Clause(len(m.groups()[0]), # level
						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
		# Find all headers in the clause
		for line in clause.lines:
			if (m := _matchHeader.match(line.text)):
				
				# convert the header to anchor format and add it to the dictionary
				# Remove special characters
				# TODO move perhaps to an own function
				anchor = m.groups()[1].strip().casefold().replace(' ', '-')
				for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
					anchor = anchor.replace(c, '')
				# remove html tags from the anchor
				anchor = re.sub(_htmlTag, '', anchor)

				linkTargets[f'#{anchor}'] = clause
				if veryVerbose:
					print(f'[dim]Added Markdown anchor "{anchor}"')