Commit ddee5f58 authored by ankraft's avatar ankraft
Browse files

Added html and markdown internal links and anchors. Added configurable...

Added html and markdown internal links and anchors. Added configurable filename length. Renamed -i to -ic to avoid confusion
parent 0ab633db
Loading
Loading
Loading
Loading
+60 −48
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#	directory structure.
#

from typing import Tuple
import argparse, re, os, shutil
from dataclasses import dataclass
from rich import print
@@ -19,10 +20,15 @@ class Clause:
	lines:list[str]
	onlyNav:bool = False

fnLength = 4

_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)


# TODO handle multiple nav levels (left bar) better (make conifgurable)
# TODO Update links in the markdown files to the new structure
@@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
			The list of clauses.
	"""

	print(f'[gray]Analyzing file "{filename}"')
	print(f'[green]Analyzing "{filename}"')

	with open(filename, 'r') as file:
		inLines = file.readlines()
@@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
			continue
	
		# write to single files
		with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file:
		print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"')
		with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file:
			file.writelines(f.lines)
			print(f'[green]File "{i}.md" written - "{f.title}"')

	
	# write nav.yml file
	print(f'[green]Writing "_nav.yml"')
	with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
		file.write(f'  - {navTitle}:\n')
		for i, f in enumerate(outLines):
@@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
			else:
				if len(f.lines) == 0:
					continue
				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n")
	print(f'[green]File "_nav.yml" written')

				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")

_markdownLink = re.compile(r'\[.*\]\((.*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_anchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)

def updateLinks(clauses:list[Clause]) -> list[Clause]:
	"""	Update the links in the clauses to the new structure.
	"""	Update the links in the clauses to the new structure. This is done by
		creating a dictionary of all links and their targets and then replacing
		the links in the clauses.

		Args:
			clauses: The list of clauses.
@@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
		Returns:
			The list of clauses.
	"""
	print(f'[green]Updating links in clauses')

	# Build the link target dictionary
	linkTargets = {}

	# Build the link target dictionary. Mapping anchor -> (clause index, clause)
	linkTargets:dict[str, Tuple[int, str]] = {}

	# Find all Markdown headers in the clauses and convert them to anchor format
	for i, clause in enumerate(clauses):
		# Find all headers in the clause
		for line in clause.lines:
			if (m := _matchHeader.match(line)):
				# convert the header to anchor format and add it to the dictionary
				# TODO move perhaps to an own function
				anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '')
				linkTargets[f'#{anchor}'] = (i, clause)

	# Find all HTML anchors in the clauses and add them to the dictionary
	for i, clause in enumerate(clauses):
		for line in clause.lines:
			if (anchors := _htmlAnchorLink.findall(line)):
				for a in anchors:
					linkTargets[f'#{a}'] = (i, clause)

	# Replace the html links
	for clause in clauses:
		for i, line in enumerate(clause.lines):
			if (lnk := _anchorLink.findall(line)):
				linkTargets[lnk[0]] = clause

			# # Check if the line contains a link
			# if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)):
			# 	continue
			# print(lnk)
			# print(lnk.groups()[0])
			



			# Update links in the markdown file
			for title in [ c.title for c in clauses ]:
				if title in line:
					clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
			if (links := _htmlLink.findall(line)):
				for lnk in links:
					width = 4
					if lnk in linkTargets:
						clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}')

	# Create a dictionary with the titles and the corresponding clause
	clauseDict = {}
	for clause in clauses:
		clauseDict[clause.title] = clause

	# Go through the clauses and update the links
	# Replace the markdown links
	for clause in clauses:
		for i, line in enumerate(clause.lines):
			# Update links in the markdown file
			for title in clauseDict.keys():
				if title in line:
					clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
			if (links := _markdownLink.findall(line)):
				for lnk in links:
					if lnk in linkTargets:
						clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}')

	return clauses


def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
	"""	Copy media files from the source directory to the target directory.

@@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
	targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'

	if os.path.exists(sourceDirectory):
		print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
		shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
		print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"')
	else:
		print(f'[red]Media directory "{sourceDirectory}" does not exist')

	
def processDocument(args:argparse.Namespace) -> None:
	global fnLength
	document = os.path.abspath(args.document)
	fnLength = args.filename_length

	# Analyse the markdown file
	clauses = analyseMarkdown(document)
	clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
	# clauses = updateLinks(clauses)
	clauses = updateLinks(clauses)
	clauses = prepareForMkdocs(clauses)

	# Write the clauses to files
	writeClauses(clauses, document, args.title)

	# Copy the media files
	copyMediaFiles(document, args.title, args.media_directory)


@@ -267,14 +282,11 @@ if __name__ == '__main__':
	parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)

	parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
	parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
	parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
	parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level')
	parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
	parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros')
	parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')

	args = parser.parse_args()

	processDocument(args)