Do not treat individual runs (bb8f054d) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/pandocFilter.py

+38 −0

Original line number	Diff line number	Diff line
		@@ -210,6 +210,43 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
		progress.stop_task(_taskID)
		return _lines

		def instertLineBeforeStartOfList(progress:Progress, mdLines:list[str]) -> list[str]:
		""" Insert a line before the start of a list.
		"""
		_taskID = progress.add_task('[blue]Inserting line before start of list', total=0)
		# progress.update()
		# A line starts with a dash, asterisk, plus or number followed by a space for a list item. There might be several spaces for sublists.
		listregex = re.compile('^(\s)(-\|\\|\+\|\d+\.)\s+.*')
		_inList = False
		_lines:list[str] = []
		for line in mdLines:
		matches = re.findall(listregex, line)

		# If we are in a code block, we are not in a list
		if checkInCodeBlock(line):
		_inList = False
		_lines.append(line)
		continue

		if matches:
		if not _inList:
		# start of list
		_inList = True
		# insert a blank line before the start of the list
		_lines.append(f'\n')
		_lines.append(line)
		else:
		# maybe not in list anymore
		# Only end the list if we encounter non-empty text starting the line. Spaces may mean text belonging to the list item.
		if line.startswith(' ') or line == '\n': # empty line or line starting with a space
		pass
		else:
		_inList = False
		_lines.append(line)

		print(f'{_lines}')
		progress.stop_task(_taskID)
		return _lines

		def process(args) -> None:
		with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress:
		@@ -222,6 +259,7 @@ def process(args) -> None:
		mdLines = replaceLineBreaks(progress, mdLines)
		if args.table_separators:
		mdLines = correctTableSeparators(progress, mdLines)
		mdLines = instertLineBeforeStartOfList(progress, mdLines)
		writeMDFile(progress, mdLines, args.document, args.outDirectory)

generateBaseline/postprocessing.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -1134,6 +1134,7 @@ def update_unnumbered_lists(docx_input, docx_output):
		counter_compact += 1
		continue

		"""
		# Get all direct child runs (not nested runs)
		runs = para.xpath('./w:r', namespaces=ns)
		if not runs:
		@@ -1299,7 +1300,7 @@ def update_unnumbered_lists(docx_input, docx_output):
		remaining_runs = [c for c in para if c.tag != f"{{{ns['w']}}}pPr"]
		if not remaining_runs:
		processed_paras.add(id(para))
		parent.remove(para)
		parent.remove(para)"""

		print(f'Updated {counter_b1} B1 style paragraphs, {counter_b2} B2 style paragraphs, {counter_b3} B3 style paragraphs')
		print(f'Updated {counter_compact} Compact style paragraphs, {counter_table} unnumbered list items in tables, {counter_regular} outside tables')