Loading generateBaseline/pandocFilter.py +38 −0 Original line number Diff line number Diff line Loading @@ -210,6 +210,43 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]: progress.stop_task(_taskID) return _lines def instertLineBeforeStartOfList(progress:Progress, mdLines:list[str]) -> list[str]: """ Insert a line before the start of a list. """ _taskID = progress.add_task('[blue]Inserting line before start of list', total=0) # progress.update() # A line starts with a dash, asterisk, plus or number followed by a space for a list item. There might be several spaces for sublists. listregex = re.compile('^(\s*)(-|\*|\+|\d+\.)\s+.*') _inList = False _lines:list[str] = [] for line in mdLines: matches = re.findall(listregex, line) # If we are in a code block, we are not in a list if checkInCodeBlock(line): _inList = False _lines.append(line) continue if matches: if not _inList: # start of list _inList = True # insert a blank line before the start of the list _lines.append(f'\n') _lines.append(line) else: # maybe not in list anymore # Only end the list if we encounter non-empty text starting the line. Spaces may mean text belonging to the list item. if line.startswith(' ') or line == '\n': # empty line or line starting with a space pass else: _inList = False _lines.append(line) print(f'{_lines}') progress.stop_task(_taskID) return _lines def process(args) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: Loading @@ -222,6 +259,7 @@ def process(args) -> None: mdLines = replaceLineBreaks(progress, mdLines) if args.table_separators: mdLines = correctTableSeparators(progress, mdLines) mdLines = instertLineBeforeStartOfList(progress, mdLines) writeMDFile(progress, mdLines, args.document, args.outDirectory) Loading generateBaseline/postprocessing.py +2 −1 Original line number Diff line number Diff line Loading @@ -1134,6 +1134,7 @@ def update_unnumbered_lists(docx_input, docx_output): counter_compact += 1 continue """ # Get all direct child runs (not nested runs) runs = para.xpath('./w:r', namespaces=ns) if not runs: Loading Loading @@ -1299,7 +1300,7 @@ def update_unnumbered_lists(docx_input, docx_output): remaining_runs = [c for c in para if c.tag != f"{{{ns['w']}}}pPr"] if not remaining_runs: processed_paras.add(id(para)) parent.remove(para) parent.remove(para)""" print(f'Updated {counter_b1} B1 style paragraphs, {counter_b2} B2 style paragraphs, {counter_b3} B3 style paragraphs') print(f'Updated {counter_compact} Compact style paragraphs, {counter_table} unnumbered list items in tables, {counter_regular} outside tables') Loading Loading
generateBaseline/pandocFilter.py +38 −0 Original line number Diff line number Diff line Loading @@ -210,6 +210,43 @@ def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]: progress.stop_task(_taskID) return _lines def instertLineBeforeStartOfList(progress:Progress, mdLines:list[str]) -> list[str]: """ Insert a line before the start of a list. """ _taskID = progress.add_task('[blue]Inserting line before start of list', total=0) # progress.update() # A line starts with a dash, asterisk, plus or number followed by a space for a list item. There might be several spaces for sublists. listregex = re.compile('^(\s*)(-|\*|\+|\d+\.)\s+.*') _inList = False _lines:list[str] = [] for line in mdLines: matches = re.findall(listregex, line) # If we are in a code block, we are not in a list if checkInCodeBlock(line): _inList = False _lines.append(line) continue if matches: if not _inList: # start of list _inList = True # insert a blank line before the start of the list _lines.append(f'\n') _lines.append(line) else: # maybe not in list anymore # Only end the list if we encounter non-empty text starting the line. Spaces may mean text belonging to the list item. if line.startswith(' ') or line == '\n': # empty line or line starting with a space pass else: _inList = False _lines.append(line) print(f'{_lines}') progress.stop_task(_taskID) return _lines def process(args) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: Loading @@ -222,6 +259,7 @@ def process(args) -> None: mdLines = replaceLineBreaks(progress, mdLines) if args.table_separators: mdLines = correctTableSeparators(progress, mdLines) mdLines = instertLineBeforeStartOfList(progress, mdLines) writeMDFile(progress, mdLines, args.document, args.outDirectory) Loading
generateBaseline/postprocessing.py +2 −1 Original line number Diff line number Diff line Loading @@ -1134,6 +1134,7 @@ def update_unnumbered_lists(docx_input, docx_output): counter_compact += 1 continue """ # Get all direct child runs (not nested runs) runs = para.xpath('./w:r', namespaces=ns) if not runs: Loading Loading @@ -1299,7 +1300,7 @@ def update_unnumbered_lists(docx_input, docx_output): remaining_runs = [c for c in para if c.tag != f"{{{ns['w']}}}pPr"] if not remaining_runs: processed_paras.add(id(para)) parent.remove(para) parent.remove(para)""" print(f'Updated {counter_b1} B1 style paragraphs, {counter_b2} B2 style paragraphs, {counter_b3} B3 style paragraphs') print(f'Updated {counter_compact} Compact style paragraphs, {counter_table} unnumbered list items in tables, {counter_regular} outside tables') Loading