Loading toMkdocs/toMkdocs.py +268 −231 Original line number Diff line number Diff line Loading @@ -91,123 +91,43 @@ class Clause: return sum([ len(l.text) for l in self.lines ]) _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) def shortHash(value:str, length:int) -> str: """ Generate a short hash of a string value. Args: value: The value to hash. length: The length of the hash. Returns: The hash. """ return base64.b64encode( hashlib.sha256( value.encode() ).digest() ).decode()[:length] def analyseMarkdown(filename:str) -> list[Clause]: """ Analyse the markdown file and split it into clauses. class Footnote: """ Represents a footnote in the markdown file. """ def __init__(self, id:str, line:Line) -> None: self.id = id self.line = line Args: filename: The name of the markdown file. Returns: The list of clauses. """ print(f'[green]Analyzing "{filename}"') # Read the file. # Note: We use utf-8 and replace errors to avoid problems with special or unknown characters. with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file: inLines = file.readlines() outClauses:list[Clause] = [Clause(0, '', '', [])] class Document: """ Represents the document object. """ clauses:list[Clause] = [] footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False for line in inLines: def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None: self.clauses = clauses self.footnotes = footnotes # Detect and handle codefences # For the moment we support only codefences that start and end # with 3 backticks. This is the most common way to define codefences. # Note, that longer codefences are allowed by the markdown specification. if _matchCodefenceStart.match(line) and not inCodefence: inCodefence = True outClauses[-1].append(Line(line, LineType.CODEFENCESTART)) continue if _matchCodefenceEnd.match(line): inCodefence = False outClauses[-1].append(Line(line, LineType.CODEFENCEEND)) continue if inCodefence: outClauses[-1].append(Line(line, LineType.CODE)) continue # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue # Detect headers _lineType = LineType.TEXT if (m := _matchHeader.match(line)): # Add a new clause clauseTitle = m.groups()[1].strip() clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), clauseTitle, [])) _lineType = LineType.HEADING # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) return outClauses def splitMarkdownDocument(clauses:list[Clause], def splitMarkdownDocument(self, ignoreTitles:list[str] = [], splitLevel:int = 1, ignoreUntilFirstHeading:bool = True) -> list[Clause]: ignoreUntilFirstHeading:bool = True) -> None: """ Split the clauses at a certain level. This is used to create the separate markdown files for MkDocs. After the split, the clauses are stored in the document object. Args: clauses: The list of clauses. ignoreTitles: A list of titles that should be ignored. They are not included in the output. splitLevel: The level at which the clauses should be split. ignoreUntilFirstHeader: Ignore all clauses until the first heading. Returns: The list of clauses. """ outClauses:list[Clause] = [Clause(0, '', '', [])] result:list[Clause] = [Clause(0, '', '', [])] ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case for clause in clauses: for clause in self.clauses: level = clause.level # Check if the current clause should be ignored Loading @@ -217,60 +137,51 @@ def splitMarkdownDocument(clauses:list[Clause], # Add a new output clause if the current clause's level is # equal or less than the split level if clause.level <= splitLevel: outClauses.append(Clause(level, clause.clauseNumber, clause.title, [])) result.append(Clause(level, clause.clauseNumber, clause.title, [])) # Add the lines to the output clause outClauses[-1].extend(clause) result[-1].extend(clause) # Remove the first clause if it has no title if ignoreUntilFirstHeading: while len(outClauses[0].title) == 0: outClauses.pop(0) while len(result[0].title) == 0: result.pop(0) return outClauses self.clauses = result def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]: """ Prepare the clauses for MkDocs. This includes removing the heading from the clauses and marking the clauses that are only for navigation. def insertFootnotes(self) -> None: """ Insert footnotes into the clauses. Args: clauses: The list of clauses. After the insertion, the clauses are stored in the document object. Returns: The list of clauses. """ print(f'[green]Adding footnotes to clauses') # Remove the heading from the lines. The heading is the first line # in the clause. This is done because MkDocs repeats the heading when # displaying the page. for clause in clauses: if clause.linesCount > 0: clause.lines.pop(0) # Also, remove the first empty lines if they exist while clause.linesCount > 0 and clause.lines[0].text.strip() == '': clause.lines.pop(0) for clause in self.clauses: foundFootnotes:list[Footnote] = [] for line in clause.lines: # ATTN: Only footnotes in normal text lines are checked # Repair wrong markdown for indented lines. # Add 2 spaces to existing 2-space indentions for clause in clauses: for i, line in enumerate(clause.lines): if _match2spaceListIndention.match(line.text): clause.lines[i].text = ' ' + line.text if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)): # Find the footnote in the list of footnotes for f in self.footnotes: if f.id == fn.groups()[0]: foundFootnotes.append(f) return clauses # Insert the footnotes at the end of the clause if len(foundFootnotes) > 0: clause.append(Line('\n', LineType.TEXT)) for f in foundFootnotes: clause.append(f.line) def updateLinks(clauses:list[Clause]) -> list[Clause]: def updateLinks(self) -> None: """ Update the links in the clauses to the new structure. This is done by creating a dictionary of all links and their targets and then replacing the links in the clauses. Args: clauses: The list of clauses. Returns: The list of clauses. After the update, the clauses are stored in the document object. """ print(f'[green]Updating links in clauses') Loading @@ -278,7 +189,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: linkTargets:dict[str, Clause] = {} # Find all Markdown headers in the clauses and convert them to anchor format for i, clause in enumerate(clauses): for i, clause in enumerate(self.clauses): # Find all headers in the clause for line in clause.lines: if (m := _matchHeader.match(line.text)): Loading @@ -297,7 +208,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Added Markdown anchor "{anchor}"') # Find all HTML anchors in the clauses and add them to the dictionary for i, clause in enumerate(clauses): for i, clause in enumerate(self.clauses): for line in clause.lines: if (anchors := _htmlAnchorLink.findall(line.text)): for a in anchors: Loading @@ -306,7 +217,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') # Replace the html links for clause in clauses: for clause in self.clauses: for i, line in enumerate(clause.lines): if (links := _htmlLink.findall(line.text)): for lnk in links: Loading @@ -316,7 +227,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') # Replace the markdown links for clause in clauses: for clause in self.clauses: for i, line in enumerate(clause.lines): if (links := _markdownLink.findall(line.text)): # Replace the old link targets with converted Loading @@ -328,21 +239,15 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: if veryVerbose: print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') return clauses def updateNotes(clauses:list[Clause]) -> list[Clause]: def updateNotes(self) -> None: """ Update the notes in the clauses to the mkDocs notes version. Args: clauses: The list of clauses. Returns: The list of clauses. After the update, the clauses are stored in the document object. """ print(f'[green]Updating notes in clauses') for clause in clauses: for clause in self.clauses: lines:list[Line] = [] inNote = False for line in clause.lines: Loading @@ -360,14 +265,37 @@ def updateNotes(clauses:list[Clause]) -> list[Clause]: inNote = False lines.append(line) clause.lines = lines return clauses def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: def prepareForMkdocs(self) -> None: """ Prepare the clauses for MkDocs. This includes removing the heading from the clauses and marking the clauses that are only for navigation. After the preparation, the clauses are stored in the document object. """ # Remove the heading from the lines. The heading is the first line # in the clause. This is done because MkDocs repeats the heading when # displaying the page. for clause in self.clauses: if clause.linesCount > 0: clause.lines.pop(0) # Also, remove the first empty lines if they exist while clause.linesCount > 0 and clause.lines[0].text.strip() == '': clause.lines.pop(0) # Repair wrong markdown for indented lines. # Add 2 spaces to existing 2-space indentions for clause in self.clauses: for i, line in enumerate(clause.lines): if _match2spaceListIndention.match(line.text): clause.lines[i].text = ' ' + line.text def writeClausesMkDocs(self, filename:str, navTitle:str) -> None: """ Write the clauses to separate files and create a navigation file. Args: outClauses: The list of clauses. filename: The name of the original markdown file. navTitle: The title of the navigation entry. This is used to determine the directories. """ Loading @@ -377,7 +305,7 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) # Write the files for i, f in enumerate(outClauses): for i, f in enumerate(self.clauses): # write to single files, even empty ones if verbose: print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') Loading @@ -395,12 +323,12 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: if veryVerbose: print(f'[dim]Writing navigation file') file.write(f' - {navTitle}:\n') for i, f in enumerate(outClauses): for i, f in enumerate(self.clauses): # TODO handle if the next clause is more than one level deeper _title = f.title.replace("'", '"') nextClause = outClauses[i+1] if i+1 < len(outClauses) else None nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None if nextClause is None or nextClause.level <= f.level: file.write(f" {' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") else: Loading @@ -409,6 +337,114 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: file.write(f" {' '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n") _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) _footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE) _inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) def shortHash(value:str, length:int) -> str: """ Generate a short hash of a string value. Args: value: The value to hash. length: The length of the hash. Returns: The hash. """ return base64.b64encode( hashlib.sha256( value.encode() ).digest() ).decode()[:length] def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Args: filename: The name of the markdown file. Returns: The document object. """ print(f'[green]Analyzing "{filename}"') # Read the file. # Note: We use utf-8 and replace errors to avoid problems with special or unknown characters. with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file: inLines = file.readlines() outClauses:list[Clause] = [Clause(0, '', '', [])] footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False for line in inLines: # Detect and handle codefences # For the moment we support only codefences that start and end # with 3 backticks. This is the most common way to define codefences. # Note, that longer codefences are allowed by the markdown specification. if _matchCodefenceStart.match(line) and not inCodefence: inCodefence = True outClauses[-1].append(Line(line, LineType.CODEFENCESTART)) continue if _matchCodefenceEnd.match(line): inCodefence = False outClauses[-1].append(Line(line, LineType.CODEFENCEEND)) continue if inCodefence: outClauses[-1].append(Line(line, LineType.CODE)) continue # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue # Detect footnotes # Footnotes are lines that start with a '^' if (_fn := _footnote.match(line)): footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT))) continue # Detect headers _lineType = LineType.TEXT if (m := _matchHeader.match(line)): # Add a new clause clauseTitle = m.groups()[1].strip() clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), clauseTitle, [])) _lineType = LineType.HEADING # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) return Document(outClauses, footnotes) def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: """ Copy media files from the source directory to the target directory. Loading @@ -429,24 +465,25 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> def processDocument(args:argparse.Namespace) -> None: global verbose, veryVerbose document = os.path.abspath(args.document) inDocumentFilename = os.path.abspath(args.document) veryVerbose = args.very_verbose verbose = args.verbose if veryVerbose: verbose = True # Analyse the markdown file clauses = analyseMarkdown(document) clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level) clauses = updateLinks(clauses) clauses = updateNotes(clauses) clauses = prepareForMkdocs(clauses) document = analyseMarkdown(inDocumentFilename) document.splitMarkdownDocument(args.ignore_clause, args.split_level) document.insertFootnotes() document.updateLinks() document.updateNotes() document.prepareForMkdocs() # Write the clauses to files writeClauses(clauses, document, args.title) document.writeClausesMkDocs(inDocumentFilename, args.title) # Copy the media files copyMediaFiles(document, args.title, args.media_directory) copyMediaFiles(inDocumentFilename, args.title, args.media_directory) if __name__ == '__main__': Loading Loading
toMkdocs/toMkdocs.py +268 −231 Original line number Diff line number Diff line Loading @@ -91,123 +91,43 @@ class Clause: return sum([ len(l.text) for l in self.lines ]) _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) def shortHash(value:str, length:int) -> str: """ Generate a short hash of a string value. Args: value: The value to hash. length: The length of the hash. Returns: The hash. """ return base64.b64encode( hashlib.sha256( value.encode() ).digest() ).decode()[:length] def analyseMarkdown(filename:str) -> list[Clause]: """ Analyse the markdown file and split it into clauses. class Footnote: """ Represents a footnote in the markdown file. """ def __init__(self, id:str, line:Line) -> None: self.id = id self.line = line Args: filename: The name of the markdown file. Returns: The list of clauses. """ print(f'[green]Analyzing "{filename}"') # Read the file. # Note: We use utf-8 and replace errors to avoid problems with special or unknown characters. with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file: inLines = file.readlines() outClauses:list[Clause] = [Clause(0, '', '', [])] class Document: """ Represents the document object. """ clauses:list[Clause] = [] footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False for line in inLines: def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None: self.clauses = clauses self.footnotes = footnotes # Detect and handle codefences # For the moment we support only codefences that start and end # with 3 backticks. This is the most common way to define codefences. # Note, that longer codefences are allowed by the markdown specification. if _matchCodefenceStart.match(line) and not inCodefence: inCodefence = True outClauses[-1].append(Line(line, LineType.CODEFENCESTART)) continue if _matchCodefenceEnd.match(line): inCodefence = False outClauses[-1].append(Line(line, LineType.CODEFENCEEND)) continue if inCodefence: outClauses[-1].append(Line(line, LineType.CODE)) continue # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue # Detect headers _lineType = LineType.TEXT if (m := _matchHeader.match(line)): # Add a new clause clauseTitle = m.groups()[1].strip() clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), clauseTitle, [])) _lineType = LineType.HEADING # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) return outClauses def splitMarkdownDocument(clauses:list[Clause], def splitMarkdownDocument(self, ignoreTitles:list[str] = [], splitLevel:int = 1, ignoreUntilFirstHeading:bool = True) -> list[Clause]: ignoreUntilFirstHeading:bool = True) -> None: """ Split the clauses at a certain level. This is used to create the separate markdown files for MkDocs. After the split, the clauses are stored in the document object. Args: clauses: The list of clauses. ignoreTitles: A list of titles that should be ignored. They are not included in the output. splitLevel: The level at which the clauses should be split. ignoreUntilFirstHeader: Ignore all clauses until the first heading. Returns: The list of clauses. """ outClauses:list[Clause] = [Clause(0, '', '', [])] result:list[Clause] = [Clause(0, '', '', [])] ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case for clause in clauses: for clause in self.clauses: level = clause.level # Check if the current clause should be ignored Loading @@ -217,60 +137,51 @@ def splitMarkdownDocument(clauses:list[Clause], # Add a new output clause if the current clause's level is # equal or less than the split level if clause.level <= splitLevel: outClauses.append(Clause(level, clause.clauseNumber, clause.title, [])) result.append(Clause(level, clause.clauseNumber, clause.title, [])) # Add the lines to the output clause outClauses[-1].extend(clause) result[-1].extend(clause) # Remove the first clause if it has no title if ignoreUntilFirstHeading: while len(outClauses[0].title) == 0: outClauses.pop(0) while len(result[0].title) == 0: result.pop(0) return outClauses self.clauses = result def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]: """ Prepare the clauses for MkDocs. This includes removing the heading from the clauses and marking the clauses that are only for navigation. def insertFootnotes(self) -> None: """ Insert footnotes into the clauses. Args: clauses: The list of clauses. After the insertion, the clauses are stored in the document object. Returns: The list of clauses. """ print(f'[green]Adding footnotes to clauses') # Remove the heading from the lines. The heading is the first line # in the clause. This is done because MkDocs repeats the heading when # displaying the page. for clause in clauses: if clause.linesCount > 0: clause.lines.pop(0) # Also, remove the first empty lines if they exist while clause.linesCount > 0 and clause.lines[0].text.strip() == '': clause.lines.pop(0) for clause in self.clauses: foundFootnotes:list[Footnote] = [] for line in clause.lines: # ATTN: Only footnotes in normal text lines are checked # Repair wrong markdown for indented lines. # Add 2 spaces to existing 2-space indentions for clause in clauses: for i, line in enumerate(clause.lines): if _match2spaceListIndention.match(line.text): clause.lines[i].text = ' ' + line.text if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)): # Find the footnote in the list of footnotes for f in self.footnotes: if f.id == fn.groups()[0]: foundFootnotes.append(f) return clauses # Insert the footnotes at the end of the clause if len(foundFootnotes) > 0: clause.append(Line('\n', LineType.TEXT)) for f in foundFootnotes: clause.append(f.line) def updateLinks(clauses:list[Clause]) -> list[Clause]: def updateLinks(self) -> None: """ Update the links in the clauses to the new structure. This is done by creating a dictionary of all links and their targets and then replacing the links in the clauses. Args: clauses: The list of clauses. Returns: The list of clauses. After the update, the clauses are stored in the document object. """ print(f'[green]Updating links in clauses') Loading @@ -278,7 +189,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: linkTargets:dict[str, Clause] = {} # Find all Markdown headers in the clauses and convert them to anchor format for i, clause in enumerate(clauses): for i, clause in enumerate(self.clauses): # Find all headers in the clause for line in clause.lines: if (m := _matchHeader.match(line.text)): Loading @@ -297,7 +208,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Added Markdown anchor "{anchor}"') # Find all HTML anchors in the clauses and add them to the dictionary for i, clause in enumerate(clauses): for i, clause in enumerate(self.clauses): for line in clause.lines: if (anchors := _htmlAnchorLink.findall(line.text)): for a in anchors: Loading @@ -306,7 +217,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') # Replace the html links for clause in clauses: for clause in self.clauses: for i, line in enumerate(clause.lines): if (links := _htmlLink.findall(line.text)): for lnk in links: Loading @@ -316,7 +227,7 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') # Replace the markdown links for clause in clauses: for clause in self.clauses: for i, line in enumerate(clause.lines): if (links := _markdownLink.findall(line.text)): # Replace the old link targets with converted Loading @@ -328,21 +239,15 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: if veryVerbose: print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') return clauses def updateNotes(clauses:list[Clause]) -> list[Clause]: def updateNotes(self) -> None: """ Update the notes in the clauses to the mkDocs notes version. Args: clauses: The list of clauses. Returns: The list of clauses. After the update, the clauses are stored in the document object. """ print(f'[green]Updating notes in clauses') for clause in clauses: for clause in self.clauses: lines:list[Line] = [] inNote = False for line in clause.lines: Loading @@ -360,14 +265,37 @@ def updateNotes(clauses:list[Clause]) -> list[Clause]: inNote = False lines.append(line) clause.lines = lines return clauses def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: def prepareForMkdocs(self) -> None: """ Prepare the clauses for MkDocs. This includes removing the heading from the clauses and marking the clauses that are only for navigation. After the preparation, the clauses are stored in the document object. """ # Remove the heading from the lines. The heading is the first line # in the clause. This is done because MkDocs repeats the heading when # displaying the page. for clause in self.clauses: if clause.linesCount > 0: clause.lines.pop(0) # Also, remove the first empty lines if they exist while clause.linesCount > 0 and clause.lines[0].text.strip() == '': clause.lines.pop(0) # Repair wrong markdown for indented lines. # Add 2 spaces to existing 2-space indentions for clause in self.clauses: for i, line in enumerate(clause.lines): if _match2spaceListIndention.match(line.text): clause.lines[i].text = ' ' + line.text def writeClausesMkDocs(self, filename:str, navTitle:str) -> None: """ Write the clauses to separate files and create a navigation file. Args: outClauses: The list of clauses. filename: The name of the original markdown file. navTitle: The title of the navigation entry. This is used to determine the directories. """ Loading @@ -377,7 +305,7 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) # Write the files for i, f in enumerate(outClauses): for i, f in enumerate(self.clauses): # write to single files, even empty ones if verbose: print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') Loading @@ -395,12 +323,12 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: if veryVerbose: print(f'[dim]Writing navigation file') file.write(f' - {navTitle}:\n') for i, f in enumerate(outClauses): for i, f in enumerate(self.clauses): # TODO handle if the next clause is more than one level deeper _title = f.title.replace("'", '"') nextClause = outClauses[i+1] if i+1 < len(outClauses) else None nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None if nextClause is None or nextClause.level <= f.level: file.write(f" {' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") else: Loading @@ -409,6 +337,114 @@ def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: file.write(f" {' '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n") _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) _footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE) _inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) def shortHash(value:str, length:int) -> str: """ Generate a short hash of a string value. Args: value: The value to hash. length: The length of the hash. Returns: The hash. """ return base64.b64encode( hashlib.sha256( value.encode() ).digest() ).decode()[:length] def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Args: filename: The name of the markdown file. Returns: The document object. """ print(f'[green]Analyzing "{filename}"') # Read the file. # Note: We use utf-8 and replace errors to avoid problems with special or unknown characters. with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file: inLines = file.readlines() outClauses:list[Clause] = [Clause(0, '', '', [])] footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False for line in inLines: # Detect and handle codefences # For the moment we support only codefences that start and end # with 3 backticks. This is the most common way to define codefences. # Note, that longer codefences are allowed by the markdown specification. if _matchCodefenceStart.match(line) and not inCodefence: inCodefence = True outClauses[-1].append(Line(line, LineType.CODEFENCESTART)) continue if _matchCodefenceEnd.match(line): inCodefence = False outClauses[-1].append(Line(line, LineType.CODEFENCEEND)) continue if inCodefence: outClauses[-1].append(Line(line, LineType.CODE)) continue # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue # Detect footnotes # Footnotes are lines that start with a '^' if (_fn := _footnote.match(line)): footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT))) continue # Detect headers _lineType = LineType.TEXT if (m := _matchHeader.match(line)): # Add a new clause clauseTitle = m.groups()[1].strip() clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), clauseTitle, [])) _lineType = LineType.HEADING # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) return Document(outClauses, footnotes) def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: """ Copy media files from the source directory to the target directory. Loading @@ -429,24 +465,25 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> def processDocument(args:argparse.Namespace) -> None: global verbose, veryVerbose document = os.path.abspath(args.document) inDocumentFilename = os.path.abspath(args.document) veryVerbose = args.very_verbose verbose = args.verbose if veryVerbose: verbose = True # Analyse the markdown file clauses = analyseMarkdown(document) clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level) clauses = updateLinks(clauses) clauses = updateNotes(clauses) clauses = prepareForMkdocs(clauses) document = analyseMarkdown(inDocumentFilename) document.splitMarkdownDocument(args.ignore_clause, args.split_level) document.insertFootnotes() document.updateLinks() document.updateNotes() document.prepareForMkdocs() # Write the clauses to files writeClauses(clauses, document, args.title) document.writeClausesMkDocs(inDocumentFilename, args.title) # Copy the media files copyMediaFiles(document, args.title, args.media_directory) copyMediaFiles(inDocumentFilename, args.title, args.media_directory) if __name__ == '__main__': Loading