Loading toMkdocs/toMkdocs.py +60 −48 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ # directory structure. # from typing import Tuple import argparse, re, os, shutil from dataclasses import dataclass from rich import print Loading @@ -19,10 +20,15 @@ class Clause: lines:list[str] onlyNav:bool = False fnLength = 4 _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) # TODO Update links in the markdown files to the new structure Loading @@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: The list of clauses. """ print(f'[gray]Analyzing file "{filename}"') print(f'[green]Analyzing "{filename}"') with open(filename, 'r') as file: inLines = file.readlines() Loading Loading @@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None: continue # write to single files with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file: print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"') with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file: file.writelines(f.lines) print(f'[green]File "{i}.md" written - "{f.title}"') # write nav.yml file print(f'[green]Writing "_nav.yml"') with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: file.write(f' - {navTitle}:\n') for i, f in enumerate(outLines): Loading @@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None: else: if len(f.lines) == 0: continue file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n") print(f'[green]File "_nav.yml" written') file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n") _markdownLink = re.compile(r'\[.*\]\((.*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _anchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) def updateLinks(clauses:list[Clause]) -> list[Clause]: """ Update the links in the clauses to the new structure. """ Update the links in the clauses to the new structure. This is done by creating a dictionary of all links and their targets and then replacing the links in the clauses. Args: clauses: The list of clauses. Loading @@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: Returns: The list of clauses. """ print(f'[green]Updating links in clauses') # Build the link target dictionary linkTargets = {} # Build the link target dictionary. Mapping anchor -> (clause index, clause) linkTargets:dict[str, Tuple[int, str]] = {} # Find all Markdown headers in the clauses and convert them to anchor format for i, clause in enumerate(clauses): # Find all headers in the clause for line in clause.lines: if (m := _matchHeader.match(line)): # convert the header to anchor format and add it to the dictionary # TODO move perhaps to an own function anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '') linkTargets[f'#{anchor}'] = (i, clause) # Find all HTML anchors in the clauses and add them to the dictionary for i, clause in enumerate(clauses): for line in clause.lines: if (anchors := _htmlAnchorLink.findall(line)): for a in anchors: linkTargets[f'#{a}'] = (i, clause) # Replace the html links for clause in clauses: for i, line in enumerate(clause.lines): if (lnk := _anchorLink.findall(line)): linkTargets[lnk[0]] = clause # # Check if the line contains a link # if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)): # continue # print(lnk) # print(lnk.groups()[0]) # Update links in the markdown file for title in [ c.title for c in clauses ]: if title in line: clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md') if (links := _htmlLink.findall(line)): for lnk in links: width = 4 if lnk in linkTargets: clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}') # Create a dictionary with the titles and the corresponding clause clauseDict = {} for clause in clauses: clauseDict[clause.title] = clause # Go through the clauses and update the links # Replace the markdown links for clause in clauses: for i, line in enumerate(clause.lines): # Update links in the markdown file for title in clauseDict.keys(): if title in line: clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md') if (links := _markdownLink.findall(line)): for lnk in links: if lnk in linkTargets: clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}') return clauses def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: """ Copy media files from the source directory to the target directory. Loading @@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}' if os.path.exists(sourceDirectory): print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"') shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True) print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"') else: print(f'[red]Media directory "{sourceDirectory}" does not exist') def processDocument(args:argparse.Namespace) -> None: global fnLength document = os.path.abspath(args.document) fnLength = args.filename_length # Analyse the markdown file clauses = analyseMarkdown(document) clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level) # clauses = updateLinks(clauses) clauses = updateLinks(clauses) clauses = prepareForMkdocs(clauses) # Write the clauses to files writeClauses(clauses, document, args.title) # Copy the media files copyMediaFiles(document, args.title, args.media_directory) Loading @@ -267,14 +282,11 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile') parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document') parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document') parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level') parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored') parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros') parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process') args = parser.parse_args() processDocument(args) Loading
toMkdocs/toMkdocs.py +60 −48 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ # directory structure. # from typing import Tuple import argparse, re, os, shutil from dataclasses import dataclass from rich import print Loading @@ -19,10 +20,15 @@ class Clause: lines:list[str] onlyNav:bool = False fnLength = 4 _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) # TODO Update links in the markdown files to the new structure Loading @@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: The list of clauses. """ print(f'[gray]Analyzing file "{filename}"') print(f'[green]Analyzing "{filename}"') with open(filename, 'r') as file: inLines = file.readlines() Loading Loading @@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None: continue # write to single files with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file: print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"') with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file: file.writelines(f.lines) print(f'[green]File "{i}.md" written - "{f.title}"') # write nav.yml file print(f'[green]Writing "_nav.yml"') with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: file.write(f' - {navTitle}:\n') for i, f in enumerate(outLines): Loading @@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None: else: if len(f.lines) == 0: continue file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n") print(f'[green]File "_nav.yml" written') file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n") _markdownLink = re.compile(r'\[.*\]\((.*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _anchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) def updateLinks(clauses:list[Clause]) -> list[Clause]: """ Update the links in the clauses to the new structure. """ Update the links in the clauses to the new structure. This is done by creating a dictionary of all links and their targets and then replacing the links in the clauses. Args: clauses: The list of clauses. Loading @@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: Returns: The list of clauses. """ print(f'[green]Updating links in clauses') # Build the link target dictionary linkTargets = {} # Build the link target dictionary. Mapping anchor -> (clause index, clause) linkTargets:dict[str, Tuple[int, str]] = {} # Find all Markdown headers in the clauses and convert them to anchor format for i, clause in enumerate(clauses): # Find all headers in the clause for line in clause.lines: if (m := _matchHeader.match(line)): # convert the header to anchor format and add it to the dictionary # TODO move perhaps to an own function anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '') linkTargets[f'#{anchor}'] = (i, clause) # Find all HTML anchors in the clauses and add them to the dictionary for i, clause in enumerate(clauses): for line in clause.lines: if (anchors := _htmlAnchorLink.findall(line)): for a in anchors: linkTargets[f'#{a}'] = (i, clause) # Replace the html links for clause in clauses: for i, line in enumerate(clause.lines): if (lnk := _anchorLink.findall(line)): linkTargets[lnk[0]] = clause # # Check if the line contains a link # if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)): # continue # print(lnk) # print(lnk.groups()[0]) # Update links in the markdown file for title in [ c.title for c in clauses ]: if title in line: clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md') if (links := _htmlLink.findall(line)): for lnk in links: width = 4 if lnk in linkTargets: clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}') # Create a dictionary with the titles and the corresponding clause clauseDict = {} for clause in clauses: clauseDict[clause.title] = clause # Go through the clauses and update the links # Replace the markdown links for clause in clauses: for i, line in enumerate(clause.lines): # Update links in the markdown file for title in clauseDict.keys(): if title in line: clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md') if (links := _markdownLink.findall(line)): for lnk in links: if lnk in linkTargets: clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}') return clauses def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: """ Copy media files from the source directory to the target directory. Loading @@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}' if os.path.exists(sourceDirectory): print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"') shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True) print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"') else: print(f'[red]Media directory "{sourceDirectory}" does not exist') def processDocument(args:argparse.Namespace) -> None: global fnLength document = os.path.abspath(args.document) fnLength = args.filename_length # Analyse the markdown file clauses = analyseMarkdown(document) clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level) # clauses = updateLinks(clauses) clauses = updateLinks(clauses) clauses = prepareForMkdocs(clauses) # Write the clauses to files writeClauses(clauses, document, args.title) # Copy the media files copyMediaFiles(document, args.title, args.media_directory) Loading @@ -267,14 +282,11 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile') parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document') parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document') parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level') parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored') parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros') parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process') args = parser.parse_args() processDocument(args)