Commit 0d69f8ed authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Do not treat individual runs but prepare lists as a proper input for Pandoc by...

Do not treat individual runs but prepare lists as a proper input for Pandoc by adding a whiteline before list starts
parent 3330a59e
Loading
Loading
Loading
Loading
+38 −0
Original line number Diff line number Diff line
@@ -210,6 +210,43 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
	progress.stop_task(_taskID)
	return _lines

def instertLineBeforeStartOfList(progress:Progress, mdLines:list[str]) -> list[str]:
	"""	Insert a line before the start of a list.
	"""
	_taskID = progress.add_task('[blue]Inserting line before start of list', total=0)
	# progress.update()
	# A line starts with a dash, asterisk, plus or number followed by a space for a list item. There might be several spaces for sublists.
	listregex = re.compile('^(\s*)(-|\*|\+|\d+\.)\s+.*')
	_inList = False
	_lines:list[str] = []
	for line in mdLines:
		matches = re.findall(listregex, line)
		
		# If we are in a code block, we are not in a list
		if checkInCodeBlock(line):
			_inList = False
			_lines.append(line)
			continue

		if matches:
			if not _inList:
				# start of list
				_inList = True
				# insert a blank line before the start of the list
				_lines.append(f'\n')
			_lines.append(line)
		else:
			# maybe not in list anymore
			# Only end the list if we encounter non-empty text starting the line. Spaces may mean text belonging to the list item.
			if line.startswith(' ') or line == '\n': # empty line or line starting with a space
				pass
			else:
				_inList = False
			_lines.append(line)

	print(f'{_lines}')
	progress.stop_task(_taskID)
	return _lines

def process(args) -> None:
	with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
@@ -222,6 +259,7 @@ def process(args) -> None:
		mdLines = replaceLineBreaks(progress, mdLines)
		if args.table_separators:
			mdLines = correctTableSeparators(progress, mdLines)
		mdLines = instertLineBeforeStartOfList(progress, mdLines)
		writeMDFile(progress, mdLines, args.document, args.outDirectory)


+2 −1
Original line number Diff line number Diff line
@@ -1134,6 +1134,7 @@ def update_unnumbered_lists(docx_input, docx_output):
                counter_compact += 1
                continue
       
        """
        # Get all direct child runs (not nested runs)
        runs = para.xpath('./w:r', namespaces=ns)
        if not runs:
@@ -1299,7 +1300,7 @@ def update_unnumbered_lists(docx_input, docx_output):
            remaining_runs = [c for c in para if c.tag != f"{{{ns['w']}}}pPr"]
            if not remaining_runs:
                processed_paras.add(id(para))
                parent.remove(para)
                parent.remove(para)"""

    print(f'Updated {counter_b1} B1 style paragraphs, {counter_b2} B2 style paragraphs, {counter_b3} B3 style paragraphs')
    print(f'Updated {counter_compact} Compact style paragraphs, {counter_table} unnumbered list items in tables, {counter_regular} outside tables')