Detecting images and tables in the markdown parser (78b6c6f4) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

toMkdocs/toMkdocs.py

+37 −2

Original line number	Diff line number	Diff line
		@@ -24,6 +24,11 @@ class LineType(Enum):
		CODEFENCEEND = auto()
		LIST = auto()
		NOTE = auto()
		STANDALONEIMAGE = auto()
		TABLEHEADER = auto()
		TABLESEPARATOR = auto()
		TABLEROW = auto()
		TABLELASTROW = auto()


		@dataclass
		@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d(\.\d+)\b', re.IGNORECASE)
		_matchCodefenceStart = re.compile(r'\s```\s?.', re.IGNORECASE)
		_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
		_matchNote = re.compile(r'^\s>\s', re.IGNORECASE)
		_matchStandAloneImage = re.compile(r'^\s!\[[^\]]\]$([^)])$\s', re.IGNORECASE)
		_matchTable = re.compile(r'^\s\\|.\\|\s$', re.IGNORECASE)
		_matchTableSeparator = re.compile(r'^\s\\|([-: ]+\\|)+\s$', re.IGNORECASE)
		_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
		_markdownLink = re.compile(r'[^!]\[[^\]]\]$(#[^)])$', re.IGNORECASE)
		_htmlLink = re.compile(r'<a\s+href="([^"\'])">[^<]</a>', re.IGNORECASE)
		@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]:

		# Go through the lines and detect headers and codefences
		inCodefence = False
		inTable = False
		tableHasSeparator = False
		for line in inLines:

		# Detect and handle codefences
		@@ -205,12 +215,37 @@ def analyseMarkdown(filename:str) -> list[Clause]:
		outClauses[-1].append(Line(line, LineType.CODE))
		continue

		# Detect and handle tables
		if _matchTable.match(line) and not inTable:
		inTable = True
		outClauses[-1].append(Line(line, LineType.TABLEHEADER))
		continue
		if inTable:
		if _matchTableSeparator.match(line) and not tableHasSeparator:
		outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
		tableHasSeparator = True
		continue
		elif _matchTable.match(line):
		outClauses[-1].append(Line(line, LineType.TABLEROW))
		continue
		else:
		inTable = False
		tableHasSeparator = False
		# Mark the previous line as the last row in the table
		outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
		# continue with other matches

		# Detect notes
		# Notes are lines that start with a '>'.
		if _matchNote.match(line):
		outClauses[-1].append(Line(line, LineType.NOTE))
		continue

		# Detect images on a single line
		if (m := _matchStandAloneImage.match(line)):
		outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
		continue

		# Detect headers
		_lineType = LineType.TEXT
		if (m := _matchHeader.match(line)):