Added support for merging consecutive code paragraphs into a single code block (9328ebdb) · Commits · Centre for Testing and Interoperability / Markdown specifications development / spec2md

README.md

+3 −2

Original line number	Diff line number	Diff line
		@@ -12,8 +12,8 @@ python3 -m pip install -r requirements.txt

		## Usage
		- Create a directory with the Word document in it. The Word document must be in docx format. This can be achieved by opening the document with Word and save it in docx format to another file.
		- Create a configuration file with the same base name as the Word document + .ini extension. This file may contain different configurations as the standard config.ini file provided.
		- Alternatively, a file named config.ini will apply to all files in that directory.
		- Optional: Create a configuration file config.ini in that directory. This file may contain different configurations as the configuratioon file in the project's root directory. This confiuration file will apply to all files in that directory
		- Alternatively, a configuration wile with the same base name as the Word document. This configuration file will only apply to the Word document with the same base name.
		- It is only necessary to add the settings that are different from the config.ini file in the project's root directory. That file will always act as a fallback.
		- Run the converter as follows:
		```
		@@ -63,5 +63,6 @@ Lists in table cells are also not possible. One may use html lists for this, but

		## Changes

		- 2024-01-09 - Added support for merging consecutive code paragraphs into a single code block.
		- 2023-08-18 - Improved handling of sometimes broken inline formatting in table cells. Adding more default heading formats.
		- 2023-07-27 - Added converting bold and italic text in paragraphs, headers and tables.
		No newline at end of file

config.ini

+9 −2

Original line number	Diff line number	Diff line
		@@ -7,8 +7,12 @@


		[general]
		# Replace non-breaking spaces in the word document with an HTML space entity.
		replaceNbsp =

		# Replace the less than character in the word document with an HTML entity.
		replaceLt = <

		; Rename EMF/WMF image references to a different file extension.
		; Allowed values: png, svg.
		; If not preseent, no renaming will happen.
		@@ -18,12 +22,14 @@ renameEMFExtension = svg
		; Default: false
		skipUnreferencedMediaFiles = false

		replaceLt = <
		# Combine code paragraphs into a single markdown code paragraph.
		combineCodeParagraphs = true

		; Add image captions to the markdown's alternate text.
		; Note, that the image caption has follow the image in the document.
		imageCaptions2AltText = true


		[toc]
		addSectionNumbers = false
		excludeFromNumbering =
		@@ -35,7 +41,7 @@ addTocMacro = false

		[paragraphs]
		normal = normal
		h1 = heading 1
		h1 = heading 1, tt
		h2 = heading 2
		h3 = heading 3
		h4 = heading 4
		@@ -48,6 +54,7 @@ a1 = heading 1
		a2 = heading 2
		a3 = heading 3
		note = no
		code = pl
		example = ex, ew
		ul1 = b1, b1+, list paragraph
		ul2 = b2, b2+

spec2md.py

+32 −0

Original line number	Diff line number	Diff line
		@@ -27,6 +27,7 @@ import configparser, zipfile
		from lxml import etree as ET

		class Style(IntEnum):
		code = auto()
		example = auto()
		image = auto()
		imagecaption = auto()
		@@ -145,6 +146,7 @@ class DocumentConfiguration(object):
		self.renameEMFExtension = config.get('general', 'renameEMFExtension', fallback = None)
		self.skipUnreferencedMediaFiles = config.getboolean('general', 'skipUnreferencedMediaFiles', fallback = False)
		self.imageCaptions2AltText = config.getboolean('general', 'imageCaptions2AltText', fallback = True)
		self.combineCodeParagraphs = config.getboolean('general', 'combineCodeParagraphs', fallback = True)

		# Paragraphs
		self.paragraphs = { c : config.getlist('paragraphs', c) # type: ignore [attr-defined]
		@@ -170,6 +172,7 @@ class DocumentConfiguration(object):
		self.ul4 = self.paragraphs['ul4']
		self.ul5 = self.paragraphs['ul5']
		#self.continuedlist = self.paragraphs['continuedlist']
		self.code = self.paragraphs['code']
		self.note = self.paragraphs['note']
		self.example = self.paragraphs['example']
		self.tablecaption = self.paragraphs['tablecaption']
		@@ -653,6 +656,12 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion:
		lines.append('')
		lines.append(text)

		# Code
		elif style in docConfig.code:
		checkSameStyle(Style.code, lambda:lines.append(''))
		for _t in text.split(_linebreak):
		lines.append(f'```{_t if _t else " "}``` ') # at least an empty space. And 2 spaces at the end for newline

		# Example
		elif style in docConfig.example:
		checkSameStyle(Style.example, lambda:lines.append(''))
		@@ -745,6 +754,29 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion:
		lines[i] = line


		#
		# Combine mutiple consecutive "code" lines
		#

		if docConfig.combineCodeParagraphs:
		codeblock:list[str] = []
		_lines:list[str] = []
		for i in range(len(lines)):
		line = lines[i]
		if line.startswith('```') and line.endswith('``` '):
		# Store code block
		codeblock.append(line[3:-5])
		elif codeblock:
		# Add whole code block to lines
		_lines.append('```')
		_lines.extend(codeblock)
		_lines.append('```')
		codeblock = []
		else:
		# Add line
		_lines.append(line)
		lines = _lines

		#
		# Insert auto-generated table of contents
		#