Added html and markdown internal links and anchors. Added configurable... (ddee5f58) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

toMkdocs/toMkdocs.py

+60 −48

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@
		# directory structure.
		#

		from typing import Tuple
		import argparse, re, os, shutil
		from dataclasses import dataclass
		from rich import print
		@@ -19,10 +20,15 @@ class Clause:
		lines:list[str]
		onlyNav:bool = False

		fnLength = 4

		_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
		_matchCodefence = re.compile(r'\s```\s?.', re.IGNORECASE)
		_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
		_markdownLink = re.compile(r'[^!]\[[^\]]\]\((#.)\)', re.IGNORECASE)
		_htmlLink = re.compile(r'<a\s+href="([^"\'])">[^<]</a>', re.IGNORECASE)
		_htmlAnchorLink = re.compile(r'<a\s+name="([^"])">[^<]</a>', re.IGNORECASE)


		# TODO handle multiple nav levels (left bar) better (make conifgurable)
		# TODO Update links in the markdown files to the new structure
		@@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
		The list of clauses.
		"""

		print(f'[gray]Analyzing file "{filename}"')
		print(f'[green]Analyzing "{filename}"')

		with open(filename, 'r') as file:
		inLines = file.readlines()
		@@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
		continue

		# write to single files
		with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file:
		print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"')
		with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file:
		file.writelines(f.lines)
		print(f'[green]File "{i}.md" written - "{f.title}"')


		# write nav.yml file
		print(f'[green]Writing "_nav.yml"')
		with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
		file.write(f' - {navTitle}:\n')
		for i, f in enumerate(outLines):
		@@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
		else:
		if len(f.lines) == 0:
		continue
		file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n")
		print(f'[green]File "_nav.yml" written')

		file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")

		_markdownLink = re.compile(r'\[.\]\((.)\)', re.IGNORECASE)
		_htmlLink = re.compile(r'<a\s+href="([^"\'])">[^<]</a>', re.IGNORECASE)
		_anchorLink = re.compile(r'<a\s+name="([^"])">[^<]</a>', re.IGNORECASE)

		def updateLinks(clauses:list[Clause]) -> list[Clause]:
		""" Update the links in the clauses to the new structure.
		""" Update the links in the clauses to the new structure. This is done by
		creating a dictionary of all links and their targets and then replacing
		the links in the clauses.

		Args:
		clauses: The list of clauses.
		@@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
		Returns:
		The list of clauses.
		"""
		print(f'[green]Updating links in clauses')

		# Build the link target dictionary
		linkTargets = {}

		# Build the link target dictionary. Mapping anchor -> (clause index, clause)
		linkTargets:dict[str, Tuple[int, str]] = {}

		# Find all Markdown headers in the clauses and convert them to anchor format
		for i, clause in enumerate(clauses):
		# Find all headers in the clause
		for line in clause.lines:
		if (m := _matchHeader.match(line)):
		# convert the header to anchor format and add it to the dictionary
		# TODO move perhaps to an own function
		anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '')
		linkTargets[f'#{anchor}'] = (i, clause)

		# Find all HTML anchors in the clauses and add them to the dictionary
		for i, clause in enumerate(clauses):
		for line in clause.lines:
		if (anchors := _htmlAnchorLink.findall(line)):
		for a in anchors:
		linkTargets[f'#{a}'] = (i, clause)

		# Replace the html links
		for clause in clauses:
		for i, line in enumerate(clause.lines):
		if (lnk := _anchorLink.findall(line)):
		linkTargets[lnk[0]] = clause

		# # Check if the line contains a link
		# if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)):
		# continue
		# print(lnk)
		# print(lnk.groups()[0])




		# Update links in the markdown file
		for title in [ c.title for c in clauses ]:
		if title in line:
		clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
		if (links := _htmlLink.findall(line)):
		for lnk in links:
		width = 4
		if lnk in linkTargets:
		clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}')

		# Create a dictionary with the titles and the corresponding clause
		clauseDict = {}
		for clause in clauses:
		clauseDict[clause.title] = clause

		# Go through the clauses and update the links
		# Replace the markdown links
		for clause in clauses:
		for i, line in enumerate(clause.lines):
		# Update links in the markdown file
		for title in clauseDict.keys():
		if title in line:
		clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
		if (links := _markdownLink.findall(line)):
		for lnk in links:
		if lnk in linkTargets:
		clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}')

		return clauses


		def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
		""" Copy media files from the source directory to the target directory.

		@@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
		targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'

		if os.path.exists(sourceDirectory):
		print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
		shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
		print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"')
		else:
		print(f'[red]Media directory "{sourceDirectory}" does not exist')


		def processDocument(args:argparse.Namespace) -> None:
		global fnLength
		document = os.path.abspath(args.document)
		fnLength = args.filename_length

		# Analyse the markdown file
		clauses = analyseMarkdown(document)
		clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
		# clauses = updateLinks(clauses)
		clauses = updateLinks(clauses)
		clauses = prepareForMkdocs(clauses)

		# Write the clauses to files
		writeClauses(clauses, document, args.title)

		# Copy the media files
		copyMediaFiles(document, args.title, args.media_directory)


		@@ -267,14 +282,11 @@ if __name__ == '__main__':
		parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)

		parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
		parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
		parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
		parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level')
		parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
		parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros')
		parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')

		args = parser.parse_args()

		processDocument(args)