Commit 2b62a735 authored by ankraft's avatar ankraft
Browse files

Corrected _nav generation when dangling clauses are present in the document

parent c47b9d6a
Loading
Loading
Loading
Loading
+82 −45
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
#	This script converts oneM2M spec markdown file to a mkdocs compatible
#	directory structure.
#

from __future__ import annotations
from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
@@ -25,6 +25,7 @@ class LineType(Enum):
	LIST = auto()
	NOTE = auto()


@dataclass
class Line:
	"""	Represents a line in the markdown file. """
@@ -40,7 +41,35 @@ class Clause:
	clauseNumber:str
	title:str
	lines:list[Line]
	onlyNav:bool = False


	@property
	def linesCount(self) -> int:
		"""	Return the number of lines in the clause.

			Returns:
				The number of lines in the clause.
		"""
		return len(self.lines)
	

	def append(self, line:Line) -> None:
		"""	Append a line to the clause.

			Args:
				line: The line to append.
		"""
		self.lines.append(line)
	

	def extend(self, clause:Clause) -> None:
		"""	Extend the clause with the lines of another clause.

			Args:
				clause: The clause to extend with.
		"""
		self.lines.extend(clause.lines)


	def asStringList(self) -> list[str]:
		"""	Return the clause as a list of strings. 
@@ -51,6 +80,15 @@ class Clause:
		return [ l.text for l in self.lines ]


	def __len__(self) -> int:
		"""	Return the number of characters in the clause.

			Returns:
				The number of characters in the clause.
		"""
		return sum([ len(l.text) for l in self.lines ])


_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
@@ -98,53 +136,58 @@ def analyseMarkdown(filename:str) -> list[Clause]:
	with open(filename, 'r') as file:
		inLines = file.readlines()
	
	outLines:list[Clause] = [Clause(0, '', '', [])]
	outClauses:list[Clause] = [Clause(0, '', '', [])]

	# Go through the lines and detect headers and codefences
	inCodefence = False
	for line in inLines:

		# Detect codefences
		if _matchCodefenceStart.match(line):
		# Detect and handle codefences
		# For the moment we support only codefences that start and end
		# with 3 backticks. This is the most common way to define codefences.
		# Note, that longer codefences are allowed by the markdown specification.
  
		if _matchCodefenceStart.match(line) and not inCodefence:
			inCodefence = True
			outLines[-1].lines.append(Line(line, LineType.CODEFENCESTART))
			outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
			continue

		if _matchCodefenceEnd.match(line):
			inCodefence = False
			outLines[-1].lines.append(Line(line, LineType.CODEFENCEND))
			outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
			continue

		if inCodefence:
			outLines[-1].lines.append(Line(line, LineType.CODE))
			outClauses[-1].append(Line(line, LineType.CODE))
			continue
	
		# Detect notes
  		# Notes are lines that start with a '>'.
		if _matchNote.match(line):
			outLines[-1].lines.append(Line(line, LineType.NOTE))
			outClauses[-1].append(Line(line, LineType.NOTE))
			continue
  
		# Detect headers
		_lineType = LineType.TEXT
		if (m := _matchHeader.match(line)):
			# Add a new clause
			clauseTitle = m.groups()[1].strip()
			headerNumber = _matchHeaderNumber.search(clauseTitle)
			outLines.append(Clause(len(m.groups()[0]), # level
			outClauses.append(Clause(len(m.groups()[0]), # level
						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
								   clauseTitle, 
								   []))
			_lineType = LineType.HEADING

		outLines[-1].lines.append(Line(line, _lineType))
		# Just add the line to the current clause as text
		outClauses[-1].append(Line(line, _lineType))

	return outLines
	return outClauses


def splitMarkdownDocument(clauses:list[Clause], 
						  ignoreTitles:list[str] = [], 
						  splitLevel:int = 1,
						  ignoreUntilFirstHeading:bool = True) -> list[Clause]:
	"""	Split the clauses at a certain level. This is used to create separate
	"""	Split the clauses at a certain level. This is used to create the separate
		markdown files for MkDocs.

		Args:
@@ -156,7 +199,7 @@ def splitMarkdownDocument(clauses:list[Clause],
		Returns:
			The list of clauses.
	"""
	outLines:list[Clause] = [Clause(0, '', '', [])]
	outClauses:list[Clause] = [Clause(0, '', '', [])]

	for clause in clauses:
		level = clause.level
@@ -168,17 +211,17 @@ def splitMarkdownDocument(clauses:list[Clause],
		# Add a new output clause if the current clause's level is 
  		# equal or less than the split level
		if clause.level <= splitLevel:
			outLines.append(Clause(level, clause.clauseNumber, clause.title, []))
			outClauses.append(Clause(level, clause.clauseNumber, clause.title, []))
		
		# Add the lines to the output clause
		outLines[-1].lines.extend(clause.lines)
		outClauses[-1].extend(clause)
	
	# Remove the first clause if it has no title
	if ignoreUntilFirstHeading:
		while len(outLines[0].title) == 0:
			outLines.pop(0)
		while len(outClauses[0].title) == 0:
			outClauses.pop(0)

	return outLines
	return outClauses


def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
@@ -196,19 +239,12 @@ def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
	# in the clause. This is done because MkDocs repeats the heading when
	# displaying the page.
	for clause in clauses:
		if len(clause.lines) > 0:
		if clause.linesCount > 0:
			clause.lines.pop(0)
			# Also, remove the first empty lines if they exist
			while len(clause.lines) > 0 and clause.lines[0].text.strip() == '':
			while clause.linesCount > 0 and clause.lines[0].text.strip() == '':
				clause.lines.pop(0)
	
	# Mark the whole clause if it is the first AND NOT only clause
	# for a parent clause. Then it is usually empty except the heading.
	# We still need it for navigation, so we mark it as onlyNav
	for clause in clauses:
		if len(''.join(clause.asStringList()).strip()) == 0 and clause.level > 0:
			clause.onlyNav = True

	# Repair wrong markdown for indented lines.
	# Add 2 spaces to existing 2-space indentions
	for clause in clauses:
@@ -318,26 +354,22 @@ def updateNotes(clauses:list[Clause]) -> list[Clause]:
	return clauses


def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None:
	"""	Write the clauses to separate files and create a navigation file.

		Args:
			outLines: The list of clauses.
			outClauses: The list of clauses.
			filename: The name of the original markdown file.
			navTitle: The title of the navigation entry. This is used to determine the directories.
	"""

	print(f'[green]Writing clauses to files')
	# Write the files
	# create directory first
	os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
	for i, f in enumerate(outLines):
		if len(f.lines) == 0 or f.onlyNav:	# ignore empty clauses or clauses that are only for navigation
			if verbose:
				print(f'[dim]Navigation only - "{f.title}"')
			continue
	
		# write to single files
	# Write the files
	for i, f in enumerate(outClauses):
		# write to single files, even empty ones
		if verbose:
			print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
		with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
@@ -350,13 +382,18 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
		if veryVerbose:
			print(f'[dim]Writing navigation file')
		file.write(f'  - {navTitle}:\n')
		for i, f in enumerate(outLines):
			if f.onlyNav:
				file.write(f"  {'  '*f.level}- '{f.title}':\n")
		for i, f in enumerate(outClauses):

			# TODO handle if the next clause is more than one level deeper
   
			_title = f.title.replace("'", '"')
			nextClause = outClauses[i+1] if i+1 < len(outClauses) else None
			if nextClause is None or nextClause.level <= f.level:
				file.write(f"  {'  '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
			else:
				if len(f.lines) == 0:
					continue
				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{f.clauseNumber}.md'\n")
				file.write(f"  {'  '*f.level}- '{_title}':\n")
				if len(f) > 0:
					file.write(f"  {'  '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n")


def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: