Commit 9d36ce02 authored by ankraft's avatar ankraft
Browse files

Added generateTOC script to generate and replace TOC sections in markdown files

parent 1ea0f0c9
Loading
Loading
Loading
Loading

generateTOC/README.md

0 → 100644
+32 −0
Original line number Diff line number Diff line
# Generate TOC for Markdown files

The script will generate a TOC for a Markdown file, based on the headers in the file.

It generates and prints the TOC to the console, and optionally also inserts it into the original file.
For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents.


## Prerequisites

- Python 3.8 or higher

## Usage

```bash
$ python generateTOC.py <document path>
```

## Command Line Options

```
usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document

positional arguments:
  document              document to parse

options:
  -h, --help            show this help message and exit
  --add-content, -a     add TOC to "# Content" section in the document (default: False)
  --indent <indent>, -i <indent>
                        indent spaces for each level (default: 4)
```
 No newline at end of file
+101 −0
Original line number Diff line number Diff line
#
#	generateTOC.py
#
#	Script to generate the table of contents for a markdown file.
#
#	(c) 2023 by Andreas Kraft
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#

from __future__ import annotations
from typing import Tuple
import argparse, os, re
from rich import print


def backupFile(filename:str) -> None:
	"""	Backup a file.

		Args:
			filename: The filename to backup.
	"""
	if os.path.isfile(filename):
		os.rename(filename, filename + '.bak')


def processDocument(args:argparse.Namespace) -> None:

	def prepareTOClink(line:str) -> str:
		"""Prepare a link for the TOC"""

		# Remove HTML tags
		line = re.sub('<[^<]+?>', '', line)

		# Add more special characters to replace in markdown header links if necessary
		return line.lower()\
				   .replace(' ', '-')\
				   .replace('.', '')\
				   .replace(';', '')\
				   .replace('&', '%26')\
				   .replace('(', '%28')\
				   .replace(')', '%29')\
				   .replace('>', '%3E')\
				   .replace(':', '%3A')
	

	# Read the document
	headers:list[Tuple[str, int]] = []
	with open(args.document, 'r') as f:
		document = f.readlines()
		for line in document:
			_l = line.strip()
			if _l.startswith('#'):
				level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1
				headers.append((_l.lstrip('#').strip(), level))
	
	# Prepare and Print the table of contents
	to = '# Contents\n\n'
	for h in headers:
		to += '&nbsp;' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])})  \n'
	to = re.sub('<[^<]+?>', '', to)
	
	# Write the TOC to the console
	print(to)

	# Add the TOC to the document
	# The TOC replaces the old TOC if it exists in the section "# Contents"
	if args.addContent:
		backupFile(args.document)
		# Add the TOC to the document
		with open(args.document, 'w') as f:
			inToc = False
			for line in document:
				# Skip the old TOC when writing
				if inToc:
					if not line.strip().startswith('#'):
						continue
					inToc = False
				
				# Write the new TOC
				if line.strip() == '# Contents':
					inToc = True
					f.write(to)
					continue

				# Write the rest
				f.write(line)
			


if __name__ == '__main__':

	# Parse command line arguments
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False,  help = 'add TOC to "# Content" section in the document')
	parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level')

	parser.add_argument('document', help = 'document to parse')
	args = parser.parse_args()

	processDocument(args)