Loading processMDSpec.py 0 → 100644 +198 −0 Original line number Diff line number Diff line # # processMDSpec.py # # (c) 2025 by Andreas Kraft # License: BSD 3-Clause License. See the LICENSE file for further details. # """ This script processes markdown specification files. It handles the include statements and the front matter. It can also render the markdown content on console or output the front matter only. """ from __future__ import annotations from typing import Tuple, Generator import argparse from rich import print, markdown import re, sys, yaml from contextlib import contextmanager _frontMatter:dict = {} _includeStack:list[str] = [] @contextmanager def includeStack(filename:str) -> Generator [None, None, None]: """ Handle the include stack. This is used to detect circular includes and to keep track of the include stack. Args: filename: The name of the file being processed. Raises: Exception: If a circular include is detected. Returns: Generator: A generator that yields nothing. """ if filename in _includeStack: print(f'[red]Circular include detected: {filename}') raise Exception('Circular include detected') _includeStack.append(filename) yield _includeStack.pop() def processFrontMatter(lines:list[str], args:argparse.Namespace) -> Tuple[dict, list[str]]: """ Process the front matter of a markdown file. This includes extracting the front matter information and returning it as a dictionary. Currently only YAML front matter is supported. It can be extended later. Args: lines: The lines of the markdown file. args: The command line arguments. Raises: yaml.YAMLError: If the front matter cannot be parsed as YAML. Returns: dict: The front matter information as a dictionary. list[str]: The lines of the markdown file without the front matter. """ if not lines or not lines[0].startswith('---'): return {}, lines frontMatterLines:list[str] = [] for line in lines[1:]: if re.match(r'^---\s*', line): break frontMatterLines.append(line) # Remove the front matter from the lines lines = lines[len(frontMatterLines)+2:] # Parse the front matter as YAML try: return yaml.safe_load(''.join(frontMatterLines)), lines except yaml.YAMLError as e: print(f'[red]Error parsing front matter: {e}') raise def processFile(args:argparse.Namespace) -> str: """ Handle the include statements in the markdown files. This includes processing the include statements and removing the include statements from the markdown files. Args: args: The command line arguments. Raises: Exception: If the file cannot be processed. Returns: The processed markdown content as a string. """ def handleIncludesForFile(filename:str) -> str: """ Read a single markdown file and return its content. Args: filename: The name of the file to read. Raises: FileNotFoundError: If the file cannot be found. Returns: The content of the file. """ with includeStack(filename): try: with open(filename, 'r') as f: lines = f.readlines() except FileNotFoundError: print(f'[red]File not found: {filename}') raise # extract front matter information fm, lines = processFrontMatter(lines, args) if fm: _frontMatter[filename] = fm if not args.doInclude: return ''.join(lines) inCodeFence = False for line in lines: # Ignore code fences if re.match(r'^\s*```.*', line): inCodeFence = not inCodeFence continue if inCodeFence: continue # Check for ::include{file=...} pattern using regex at the beginning of a line match = re.search(r'^::include\{\s*file=(.*?)\s*\}', line.strip()) if match: include_filename = match.group(1) # Read the included file and replace the include statement with its content include_content = handleIncludesForFile(include_filename) lines[lines.index(line)] = include_content return ''.join(lines) return handleIncludesForFile(args.document) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Process markdown specification files.') parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements") parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown') parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only') parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter') parser.add_argument('--verbose', '-v', action='store_true', help='print debug information to stderr.') parser.add_argument('document', type = str, help = 'a markdown specification document to process') args = parser.parse_args() if args.verbose: if not args.doInclude: print(f'[yellow]Skipping processing include statements', file=sys.stderr) else: print(f'[green]Processing include statements', file=sys.stderr) try: lines = processFile(args) except Exception as e: print(f'[red]Error processing file: {e}', file=sys.stderr) quit(1) if args.outputFrontMatter or args.onlyFrontMatter: # Collect front matter information in the output if not args.onlyFrontMatter: print('---') # The following is a workaround to keep the order of the dictionary # see https://stackoverflow.com/a/52621703 yaml.add_representer(dict, lambda self, data: yaml.representer.SafeRepresenter.represent_dict(self, data.items())) print(yaml.dump(_frontMatter, default_flow_style=False), end='') if not args.onlyFrontMatter: print('---') if not args.onlyFrontMatter: if args.renderAsMarkdown: # Render the markdown content print(markdown.Markdown(lines)) else: # Print the raw markdown content print(lines) Loading
processMDSpec.py 0 → 100644 +198 −0 Original line number Diff line number Diff line # # processMDSpec.py # # (c) 2025 by Andreas Kraft # License: BSD 3-Clause License. See the LICENSE file for further details. # """ This script processes markdown specification files. It handles the include statements and the front matter. It can also render the markdown content on console or output the front matter only. """ from __future__ import annotations from typing import Tuple, Generator import argparse from rich import print, markdown import re, sys, yaml from contextlib import contextmanager _frontMatter:dict = {} _includeStack:list[str] = [] @contextmanager def includeStack(filename:str) -> Generator [None, None, None]: """ Handle the include stack. This is used to detect circular includes and to keep track of the include stack. Args: filename: The name of the file being processed. Raises: Exception: If a circular include is detected. Returns: Generator: A generator that yields nothing. """ if filename in _includeStack: print(f'[red]Circular include detected: {filename}') raise Exception('Circular include detected') _includeStack.append(filename) yield _includeStack.pop() def processFrontMatter(lines:list[str], args:argparse.Namespace) -> Tuple[dict, list[str]]: """ Process the front matter of a markdown file. This includes extracting the front matter information and returning it as a dictionary. Currently only YAML front matter is supported. It can be extended later. Args: lines: The lines of the markdown file. args: The command line arguments. Raises: yaml.YAMLError: If the front matter cannot be parsed as YAML. Returns: dict: The front matter information as a dictionary. list[str]: The lines of the markdown file without the front matter. """ if not lines or not lines[0].startswith('---'): return {}, lines frontMatterLines:list[str] = [] for line in lines[1:]: if re.match(r'^---\s*', line): break frontMatterLines.append(line) # Remove the front matter from the lines lines = lines[len(frontMatterLines)+2:] # Parse the front matter as YAML try: return yaml.safe_load(''.join(frontMatterLines)), lines except yaml.YAMLError as e: print(f'[red]Error parsing front matter: {e}') raise def processFile(args:argparse.Namespace) -> str: """ Handle the include statements in the markdown files. This includes processing the include statements and removing the include statements from the markdown files. Args: args: The command line arguments. Raises: Exception: If the file cannot be processed. Returns: The processed markdown content as a string. """ def handleIncludesForFile(filename:str) -> str: """ Read a single markdown file and return its content. Args: filename: The name of the file to read. Raises: FileNotFoundError: If the file cannot be found. Returns: The content of the file. """ with includeStack(filename): try: with open(filename, 'r') as f: lines = f.readlines() except FileNotFoundError: print(f'[red]File not found: {filename}') raise # extract front matter information fm, lines = processFrontMatter(lines, args) if fm: _frontMatter[filename] = fm if not args.doInclude: return ''.join(lines) inCodeFence = False for line in lines: # Ignore code fences if re.match(r'^\s*```.*', line): inCodeFence = not inCodeFence continue if inCodeFence: continue # Check for ::include{file=...} pattern using regex at the beginning of a line match = re.search(r'^::include\{\s*file=(.*?)\s*\}', line.strip()) if match: include_filename = match.group(1) # Read the included file and replace the include statement with its content include_content = handleIncludesForFile(include_filename) lines[lines.index(line)] = include_content return ''.join(lines) return handleIncludesForFile(args.document) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Process markdown specification files.') parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements") parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown') parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only') parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter') parser.add_argument('--verbose', '-v', action='store_true', help='print debug information to stderr.') parser.add_argument('document', type = str, help = 'a markdown specification document to process') args = parser.parse_args() if args.verbose: if not args.doInclude: print(f'[yellow]Skipping processing include statements', file=sys.stderr) else: print(f'[green]Processing include statements', file=sys.stderr) try: lines = processFile(args) except Exception as e: print(f'[red]Error processing file: {e}', file=sys.stderr) quit(1) if args.outputFrontMatter or args.onlyFrontMatter: # Collect front matter information in the output if not args.onlyFrontMatter: print('---') # The following is a workaround to keep the order of the dictionary # see https://stackoverflow.com/a/52621703 yaml.add_representer(dict, lambda self, data: yaml.representer.SafeRepresenter.represent_dict(self, data.items())) print(yaml.dump(_frontMatter, default_flow_style=False), end='') if not args.onlyFrontMatter: print('---') if not args.onlyFrontMatter: if args.renderAsMarkdown: # Render the markdown content print(markdown.Markdown(lines)) else: # Print the raw markdown content print(lines)