Loading pandocFilter/pandocFilter.py 0 → 100644 +119 −0 Original line number Original line Diff line number Diff line # # pandocFilter.py # # Script to convert a oneM2M Markdown file to pandoc input format # # (c) 2023 by Andreas Kraft, Miguel Ortega # License: BSD 3-Clause License. See the LICENSE file for further details. # import argparse, os, re from rich import print from rich.progress import Progress, TextColumn, TimeElapsedColumn def readMDFile(progress:Progress, document:str) -> list[str]: """ Read the markdown file and return a list of lines. """ _taskID = progress.add_task('[blue]Reading document', start=False, total=0) # Check if file exists if not os.path.exists(document): print(f'File {document} does not exist') exit(1) # Read the file with open(document, 'r', encoding='utf-8', errors = 'replace') as f: progress.stop_task(_taskID) return f.readlines() def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory:str) -> None: """ Write the markdown file. """ _taskID = progress.add_task('[blue]Writing document', start=False, total=0) # Write the file with open(f'{outDirectory}/{os.path.basename(document)}', 'w', encoding='utf-8', errors = 'replace') as f: f.writelines(mdLines) progress.stop_task(_taskID) def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]: """ Correct the TOC to be compatible with pandoc. """ _taskID = progress.add_task('[blue]Correcting TOC', start=False, total=0) _contents = f'# {tocSection}\n' tocregex = re.compile('^(.*\[.*\])') _lines:list[str] = [] _inTOC = False for line in mdLines: # find TOC section first if line == _contents: _inTOC = True _lines.append(line) continue if _inTOC: if line.startswith('#'): # End of TOC? _inTOC = False _lines.append(line) continue matches = re.findall(tocregex, line) # Replace entry if matches: _lines.append(f'{matches[0]} \n') continue else: _lines.append(line) progress.stop_task(_taskID) return _lines def replaceTableCaptions(progress:Progress, mdLines:list[str]) -> list[str]: """ Replace table captions with a pandoc table caption. """ _taskID = progress.add_task('[blue]Replacing table captions', start=False, total=0) # progress.update() tableregex = re.compile('^\*\*(Table .*)\*\*') _lines:list[str] = [] for line in mdLines: matches = re.findall(tableregex, line) if matches: # move the caption to the beginning of the table and add a "Table:" prefix _idx = len(_lines) - 1 while _idx >= 0 and _lines[_idx].startswith('|'): _idx -= 1 if _idx > 0: _lines.insert(_idx+1, f'Table: {matches[0]}\n') _lines.insert(_idx+1, f'Table: {matches[0]}\n') else: _lines.append(line) #print(_lines) progress.stop_task(_taskID) return _lines def process(document:str, outDirectory:str) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: mdLines = readMDFile(progress, document) mdLines = correctTOC(progress, mdLines) mdLines = replaceTableCaptions(progress, mdLines) writeMDFile(progress, mdLines, document, outDirectory) if __name__ == '__main__': # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>', help = 'specify output directory') parser.add_argument('document', help = 'document to parse') args = parser.parse_args() # Process documents and print output os.makedirs(args.outDirectory, exist_ok = True) process(args.document, args.outDirectory) pandocFilter/requirements.txt 0 → 100644 +14 −0 Original line number Original line Diff line number Diff line # # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile # markdown-it-py==2.2.0 # via rich mdurl==0.1.2 # via markdown-it-py pygments==2.15.1 # via rich rich==13.3.5 # via oneM2M-markdown-to-pandoc-filter (setup.py) pandocFilter/setup.py 0 → 100644 +13 −0 Original line number Original line Diff line number Diff line from setuptools import setup, find_packages setup( name='oneM2M markdown to pandoc filter', version='0.0.1', author='Andreas Kraft, Miguel Ortega', author_email='an.kraft@gmail.com', description='Convert oneM2M Markdown to Pandoc input', packages=find_packages(), install_requires=[ 'rich', ] ) Loading
pandocFilter/pandocFilter.py 0 → 100644 +119 −0 Original line number Original line Diff line number Diff line # # pandocFilter.py # # Script to convert a oneM2M Markdown file to pandoc input format # # (c) 2023 by Andreas Kraft, Miguel Ortega # License: BSD 3-Clause License. See the LICENSE file for further details. # import argparse, os, re from rich import print from rich.progress import Progress, TextColumn, TimeElapsedColumn def readMDFile(progress:Progress, document:str) -> list[str]: """ Read the markdown file and return a list of lines. """ _taskID = progress.add_task('[blue]Reading document', start=False, total=0) # Check if file exists if not os.path.exists(document): print(f'File {document} does not exist') exit(1) # Read the file with open(document, 'r', encoding='utf-8', errors = 'replace') as f: progress.stop_task(_taskID) return f.readlines() def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory:str) -> None: """ Write the markdown file. """ _taskID = progress.add_task('[blue]Writing document', start=False, total=0) # Write the file with open(f'{outDirectory}/{os.path.basename(document)}', 'w', encoding='utf-8', errors = 'replace') as f: f.writelines(mdLines) progress.stop_task(_taskID) def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]: """ Correct the TOC to be compatible with pandoc. """ _taskID = progress.add_task('[blue]Correcting TOC', start=False, total=0) _contents = f'# {tocSection}\n' tocregex = re.compile('^(.*\[.*\])') _lines:list[str] = [] _inTOC = False for line in mdLines: # find TOC section first if line == _contents: _inTOC = True _lines.append(line) continue if _inTOC: if line.startswith('#'): # End of TOC? _inTOC = False _lines.append(line) continue matches = re.findall(tocregex, line) # Replace entry if matches: _lines.append(f'{matches[0]} \n') continue else: _lines.append(line) progress.stop_task(_taskID) return _lines def replaceTableCaptions(progress:Progress, mdLines:list[str]) -> list[str]: """ Replace table captions with a pandoc table caption. """ _taskID = progress.add_task('[blue]Replacing table captions', start=False, total=0) # progress.update() tableregex = re.compile('^\*\*(Table .*)\*\*') _lines:list[str] = [] for line in mdLines: matches = re.findall(tableregex, line) if matches: # move the caption to the beginning of the table and add a "Table:" prefix _idx = len(_lines) - 1 while _idx >= 0 and _lines[_idx].startswith('|'): _idx -= 1 if _idx > 0: _lines.insert(_idx+1, f'Table: {matches[0]}\n') _lines.insert(_idx+1, f'Table: {matches[0]}\n') else: _lines.append(line) #print(_lines) progress.stop_task(_taskID) return _lines def process(document:str, outDirectory:str) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: mdLines = readMDFile(progress, document) mdLines = correctTOC(progress, mdLines) mdLines = replaceTableCaptions(progress, mdLines) writeMDFile(progress, mdLines, document, outDirectory) if __name__ == '__main__': # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>', help = 'specify output directory') parser.add_argument('document', help = 'document to parse') args = parser.parse_args() # Process documents and print output os.makedirs(args.outDirectory, exist_ok = True) process(args.document, args.outDirectory)
pandocFilter/requirements.txt 0 → 100644 +14 −0 Original line number Original line Diff line number Diff line # # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile # markdown-it-py==2.2.0 # via rich mdurl==0.1.2 # via markdown-it-py pygments==2.15.1 # via rich rich==13.3.5 # via oneM2M-markdown-to-pandoc-filter (setup.py)
pandocFilter/setup.py 0 → 100644 +13 −0 Original line number Original line Diff line number Diff line from setuptools import setup, find_packages setup( name='oneM2M markdown to pandoc filter', version='0.0.1', author='Andreas Kraft, Miguel Ortega', author_email='an.kraft@gmail.com', description='Convert oneM2M Markdown to Pandoc input', packages=find_packages(), install_requires=[ 'rich', ] )