Commit 2b62a735 authored by ankraft's avatar ankraft
Browse files

Corrected _nav generation when dangling clauses are present in the document

parent c47b9d6a
Loading
Loading
Loading
Loading
+82 −45
Original line number Original line Diff line number Diff line
@@ -6,7 +6,7 @@
#	This script converts oneM2M spec markdown file to a mkdocs compatible
#	This script converts oneM2M spec markdown file to a mkdocs compatible
#	directory structure.
#	directory structure.
#
#

from __future__ import annotations
from enum import Enum, auto
from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
from dataclasses import dataclass
@@ -25,6 +25,7 @@ class LineType(Enum):
	LIST = auto()
	LIST = auto()
	NOTE = auto()
	NOTE = auto()



@dataclass
@dataclass
class Line:
class Line:
	"""	Represents a line in the markdown file. """
	"""	Represents a line in the markdown file. """
@@ -40,7 +41,35 @@ class Clause:
	clauseNumber:str
	clauseNumber:str
	title:str
	title:str
	lines:list[Line]
	lines:list[Line]
	onlyNav:bool = False


	@property
	def linesCount(self) -> int:
		"""	Return the number of lines in the clause.

			Returns:
				The number of lines in the clause.
		"""
		return len(self.lines)
	

	def append(self, line:Line) -> None:
		"""	Append a line to the clause.

			Args:
				line: The line to append.
		"""
		self.lines.append(line)
	

	def extend(self, clause:Clause) -> None:
		"""	Extend the clause with the lines of another clause.

			Args:
				clause: The clause to extend with.
		"""
		self.lines.extend(clause.lines)



	def asStringList(self) -> list[str]:
	def asStringList(self) -> list[str]:
		"""	Return the clause as a list of strings. 
		"""	Return the clause as a list of strings. 
@@ -51,6 +80,15 @@ class Clause:
		return [ l.text for l in self.lines ]
		return [ l.text for l in self.lines ]




	def __len__(self) -> int:
		"""	Return the number of characters in the clause.

			Returns:
				The number of characters in the clause.
		"""
		return sum([ len(l.text) for l in self.lines ])


_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
@@ -98,53 +136,58 @@ def analyseMarkdown(filename:str) -> list[Clause]:
	with open(filename, 'r') as file:
	with open(filename, 'r') as file:
		inLines = file.readlines()
		inLines = file.readlines()
	
	
	outLines:list[Clause] = [Clause(0, '', '', [])]
	outClauses:list[Clause] = [Clause(0, '', '', [])]


	# Go through the lines and detect headers and codefences
	# Go through the lines and detect headers and codefences
	inCodefence = False
	inCodefence = False
	for line in inLines:
	for line in inLines:


		# Detect codefences
		# Detect and handle codefences
		if _matchCodefenceStart.match(line):
		# For the moment we support only codefences that start and end
		# with 3 backticks. This is the most common way to define codefences.
		# Note, that longer codefences are allowed by the markdown specification.
  
		if _matchCodefenceStart.match(line) and not inCodefence:
			inCodefence = True
			inCodefence = True
			outLines[-1].lines.append(Line(line, LineType.CODEFENCESTART))
			outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
			continue
			continue

		if _matchCodefenceEnd.match(line):
		if _matchCodefenceEnd.match(line):
			inCodefence = False
			inCodefence = False
			outLines[-1].lines.append(Line(line, LineType.CODEFENCEND))
			outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
			continue
			continue

		if inCodefence:
		if inCodefence:
			outLines[-1].lines.append(Line(line, LineType.CODE))
			outClauses[-1].append(Line(line, LineType.CODE))
			continue
			continue
	
	
		# Detect notes
		# Detect notes
  		# Notes are lines that start with a '>'.
		if _matchNote.match(line):
		if _matchNote.match(line):
			outLines[-1].lines.append(Line(line, LineType.NOTE))
			outClauses[-1].append(Line(line, LineType.NOTE))
			continue
			continue
  
  
		# Detect headers
		# Detect headers
		_lineType = LineType.TEXT
		_lineType = LineType.TEXT
		if (m := _matchHeader.match(line)):
		if (m := _matchHeader.match(line)):
			# Add a new clause
			clauseTitle = m.groups()[1].strip()
			clauseTitle = m.groups()[1].strip()
			headerNumber = _matchHeaderNumber.search(clauseTitle)
			headerNumber = _matchHeaderNumber.search(clauseTitle)
			outLines.append(Clause(len(m.groups()[0]), # level
			outClauses.append(Clause(len(m.groups()[0]), # level
						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
								   clauseTitle, 
								   clauseTitle, 
								   []))
								   []))
			_lineType = LineType.HEADING
			_lineType = LineType.HEADING


		outLines[-1].lines.append(Line(line, _lineType))
		# Just add the line to the current clause as text
		outClauses[-1].append(Line(line, _lineType))


	return outLines
	return outClauses




def splitMarkdownDocument(clauses:list[Clause], 
def splitMarkdownDocument(clauses:list[Clause], 
						  ignoreTitles:list[str] = [], 
						  ignoreTitles:list[str] = [], 
						  splitLevel:int = 1,
						  splitLevel:int = 1,
						  ignoreUntilFirstHeading:bool = True) -> list[Clause]:
						  ignoreUntilFirstHeading:bool = True) -> list[Clause]:
	"""	Split the clauses at a certain level. This is used to create separate
	"""	Split the clauses at a certain level. This is used to create the separate
		markdown files for MkDocs.
		markdown files for MkDocs.


		Args:
		Args:
@@ -156,7 +199,7 @@ def splitMarkdownDocument(clauses:list[Clause],
		Returns:
		Returns:
			The list of clauses.
			The list of clauses.
	"""
	"""
	outLines:list[Clause] = [Clause(0, '', '', [])]
	outClauses:list[Clause] = [Clause(0, '', '', [])]


	for clause in clauses:
	for clause in clauses:
		level = clause.level
		level = clause.level
@@ -168,17 +211,17 @@ def splitMarkdownDocument(clauses:list[Clause],
		# Add a new output clause if the current clause's level is 
		# Add a new output clause if the current clause's level is 
  		# equal or less than the split level
  		# equal or less than the split level
		if clause.level <= splitLevel:
		if clause.level <= splitLevel:
			outLines.append(Clause(level, clause.clauseNumber, clause.title, []))
			outClauses.append(Clause(level, clause.clauseNumber, clause.title, []))
		
		
		# Add the lines to the output clause
		# Add the lines to the output clause
		outLines[-1].lines.extend(clause.lines)
		outClauses[-1].extend(clause)
	
	
	# Remove the first clause if it has no title
	# Remove the first clause if it has no title
	if ignoreUntilFirstHeading:
	if ignoreUntilFirstHeading:
		while len(outLines[0].title) == 0:
		while len(outClauses[0].title) == 0:
			outLines.pop(0)
			outClauses.pop(0)


	return outLines
	return outClauses




def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
@@ -196,19 +239,12 @@ def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
	# in the clause. This is done because MkDocs repeats the heading when
	# in the clause. This is done because MkDocs repeats the heading when
	# displaying the page.
	# displaying the page.
	for clause in clauses:
	for clause in clauses:
		if len(clause.lines) > 0:
		if clause.linesCount > 0:
			clause.lines.pop(0)
			clause.lines.pop(0)
			# Also, remove the first empty lines if they exist
			# Also, remove the first empty lines if they exist
			while len(clause.lines) > 0 and clause.lines[0].text.strip() == '':
			while clause.linesCount > 0 and clause.lines[0].text.strip() == '':
				clause.lines.pop(0)
				clause.lines.pop(0)
	
	
	# Mark the whole clause if it is the first AND NOT only clause
	# for a parent clause. Then it is usually empty except the heading.
	# We still need it for navigation, so we mark it as onlyNav
	for clause in clauses:
		if len(''.join(clause.asStringList()).strip()) == 0 and clause.level > 0:
			clause.onlyNav = True

	# Repair wrong markdown for indented lines.
	# Repair wrong markdown for indented lines.
	# Add 2 spaces to existing 2-space indentions
	# Add 2 spaces to existing 2-space indentions
	for clause in clauses:
	for clause in clauses:
@@ -318,26 +354,22 @@ def updateNotes(clauses:list[Clause]) -> list[Clause]:
	return clauses
	return clauses




def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None:
	"""	Write the clauses to separate files and create a navigation file.
	"""	Write the clauses to separate files and create a navigation file.


		Args:
		Args:
			outLines: The list of clauses.
			outClauses: The list of clauses.
			filename: The name of the original markdown file.
			filename: The name of the original markdown file.
			navTitle: The title of the navigation entry. This is used to determine the directories.
			navTitle: The title of the navigation entry. This is used to determine the directories.
	"""
	"""


	print(f'[green]Writing clauses to files')
	print(f'[green]Writing clauses to files')
	# Write the files
	# create directory first
	# create directory first
	os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
	os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
	for i, f in enumerate(outLines):
		if len(f.lines) == 0 or f.onlyNav:	# ignore empty clauses or clauses that are only for navigation
			if verbose:
				print(f'[dim]Navigation only - "{f.title}"')
			continue
	
	
		# write to single files
	# Write the files
	for i, f in enumerate(outClauses):
		# write to single files, even empty ones
		if verbose:
		if verbose:
			print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
			print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
		with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
		with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
@@ -350,13 +382,18 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
		if veryVerbose:
		if veryVerbose:
			print(f'[dim]Writing navigation file')
			print(f'[dim]Writing navigation file')
		file.write(f'  - {navTitle}:\n')
		file.write(f'  - {navTitle}:\n')
		for i, f in enumerate(outLines):
		for i, f in enumerate(outClauses):
			if f.onlyNav:

				file.write(f"  {'  '*f.level}- '{f.title}':\n")
			# TODO handle if the next clause is more than one level deeper
   
			_title = f.title.replace("'", '"')
			nextClause = outClauses[i+1] if i+1 < len(outClauses) else None
			if nextClause is None or nextClause.level <= f.level:
				file.write(f"  {'  '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
			else:
			else:
				if len(f.lines) == 0:
				file.write(f"  {'  '*f.level}- '{_title}':\n")
					continue
				if len(f) > 0:
				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{f.clauseNumber}.md'\n")
					file.write(f"  {'  '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n")




def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: