Commit 78b6c6f4 authored by ankraft's avatar ankraft Committed by Miguel Angel Reina Ortega
Browse files

Detecting images and tables in the markdown parser

parent f2a6c36b
Loading
Loading
Loading
Loading
+37 −2
Original line number Diff line number Diff line
@@ -24,6 +24,11 @@ class LineType(Enum):
	CODEFENCEEND = auto()
	LIST = auto()
	NOTE = auto()
	STANDALONEIMAGE = auto()
	TABLEHEADER = auto()
	TABLESEPARATOR = auto()
	TABLEROW = auto()
	TABLELASTROW = auto()


@dataclass
@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
_matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]:

	# Go through the lines and detect headers and codefences
	inCodefence = False
	inTable = False
	tableHasSeparator = False
	for line in inLines:

		# Detect and handle codefences
@@ -205,12 +215,37 @@ def analyseMarkdown(filename:str) -> list[Clause]:
			outClauses[-1].append(Line(line, LineType.CODE))
			continue

		# Detect and handle tables
		if _matchTable.match(line) and not inTable:
			inTable = True
			outClauses[-1].append(Line(line, LineType.TABLEHEADER))
			continue
		if inTable:
			if _matchTableSeparator.match(line) and not tableHasSeparator:
				outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
				tableHasSeparator = True
				continue
			elif _matchTable.match(line):
				outClauses[-1].append(Line(line, LineType.TABLEROW))
				continue
			else:
				inTable = False
				tableHasSeparator = False
				# Mark the previous line as the last row in the table
				outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
				# continue with other matches

		# Detect notes
  		# Notes are lines that start with a '>'.
		if _matchNote.match(line):
			outClauses[-1].append(Line(line, LineType.NOTE))
			continue

		# Detect images on a single line
		if (m := _matchStandAloneImage.match(line)):
			outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
			continue

		# Detect headers
		_lineType = LineType.TEXT
		if (m := _matchHeader.match(line)):