Added generateTOC script to generate and replace TOC sections in markdown files (9d36ce02) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateTOC/README.md

0 → 100644

+32 −0

Original line number	Diff line number	Diff line
		# Generate TOC for Markdown files

		The script will generate a TOC for a Markdown file, based on the headers in the file.

		It generates and prints the TOC to the console, and optionally also inserts it into the original file.
		For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents.


		## Prerequisites

		- Python 3.8 or higher

		## Usage

		```bash
		$ python generateTOC.py <document path>
		```

		## Command Line Options

		```
		usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document

		positional arguments:
		document document to parse

		options:
		-h, --help show this help message and exit
		--add-content, -a add TOC to "# Content" section in the document (default: False)
		--indent <indent>, -i <indent>
		indent spaces for each level (default: 4)
		```
		No newline at end of file

generateTOC/generateTOC.py

0 → 100644

+101 −0

Original line number	Diff line number	Diff line
		#
		# generateTOC.py
		#
		# Script to generate the table of contents for a markdown file.
		#
		# (c) 2023 by Andreas Kraft
		# License: BSD 3-Clause License. See the LICENSE file for further details.
		#

		from __future__ import annotations
		from typing import Tuple
		import argparse, os, re
		from rich import print


		def backupFile(filename:str) -> None:
		""" Backup a file.

		Args:
		filename: The filename to backup.
		"""
		if os.path.isfile(filename):
		os.rename(filename, filename + '.bak')


		def processDocument(args:argparse.Namespace) -> None:

		def prepareTOClink(line:str) -> str:
		"""Prepare a link for the TOC"""

		# Remove HTML tags
		line = re.sub('<[^<]+?>', '', line)

		# Add more special characters to replace in markdown header links if necessary
		return line.lower()\
		.replace(' ', '-')\
		.replace('.', '')\
		.replace(';', '')\
		.replace('&', '%26')\
		.replace('(', '%28')\
		.replace(')', '%29')\
		.replace('>', '%3E')\
		.replace(':', '%3A')


		# Read the document
		headers:list[Tuple[str, int]] = []
		with open(args.document, 'r') as f:
		document = f.readlines()
		for line in document:
		_l = line.strip()
		if _l.startswith('#'):
		level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1
		headers.append((_l.lstrip('#').strip(), level))

		# Prepare and Print the table of contents
		to = '# Contents\n\n'
		for h in headers:
		to += ' ' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])}) \n'
		to = re.sub('<[^<]+?>', '', to)

		# Write the TOC to the console
		print(to)

		# Add the TOC to the document
		# The TOC replaces the old TOC if it exists in the section "# Contents"
		if args.addContent:
		backupFile(args.document)
		# Add the TOC to the document
		with open(args.document, 'w') as f:
		inToc = False
		for line in document:
		# Skip the old TOC when writing
		if inToc:
		if not line.strip().startswith('#'):
		continue
		inToc = False

		# Write the new TOC
		if line.strip() == '# Contents':
		inToc = True
		f.write(to)
		continue

		# Write the rest
		f.write(line)



		if __name__ == '__main__':

		# Parse command line arguments
		parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
		parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False, help = 'add TOC to "# Content" section in the document')
		parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level')

		parser.add_argument('document', help = 'document to parse')
		args = parser.parse_args()

		processDocument(args)