Allow + and | delimiters as part of cell content (39715c63) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateSpecWebSite/gridTableTools.py

+74 −9

Original line number	Diff line number	Diff line
		@@ -207,7 +207,9 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		while delimiterStart == None:
		delimiterStart = row[colI - 1].position if colI > 0 else 0
		colI -= 1
		positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "\|+" if delimiter in line[delimiterStart + 1:]]

		#positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "\|+" if delimiter in line[delimiterStart + 1:]]
		positions = [match.start() for match in matchGridTableDelimiter.finditer(line, delimiterStart + 1)]
		position = min(positions) if positions else -1
		if position > delimiterPositions[j]: # Colspan to be increased
		row[columnIndex].colspan += 1
		@@ -240,7 +242,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		printDebug(f'\nChecking line: "{line}"')
		printDebug(f'Expected delimiter positions: {delimiterPositions}')

		# For full separator lines (only +)
		""" # For full separator lines (only +)
		if '+' in line and '\|' not in line:
		currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)]
		printDebug(f'Full separator line - Found + at positions: {currentPositions}')
		@@ -255,13 +257,53 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		for pos in currentPositions)

		# For partial separators (mix of + and \|)
		currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i > 0)]
		if matchGridTableBodySeparator.match(line):
		# Find all delimiter positions that are not escaped
		currentPositions = []
		i = 0
		while i < len(line):
		if i > 0 and line[i-1] == '\\': # Skip escaped delimiters
		i += 1
		continue
		if line[i] in delimiters:
		currentPositions.append(i)
		i += 1

		printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}')
		printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}')
		return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '\|')) and pos in delimiterPositions
		for pos in currentPositions)

		separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line)]
		# For data lines (starting with \|)
		if line.startswith('\|') and not matchGridTableBodySeparator.match(line):
		# Find all \| positions that are not escaped (+ doesn't need escaping, but \| does)
		currentPositions = []
		i = 0
		while i < len(line):
		if i > 0 and line[i-1] == '\\' and line[i] == '\|': # Skip escaped \|
		i += 1
		continue
		if line[i] == '\|':
		currentPositions.append(i)
		i += 1

		printDebug(f'Data line - Found \| at positions: {currentPositions}')
		return all(delimiterPositions[-1] in currentPositions and line.startswith("\|") and pos in delimiterPositions
		for pos in currentPositions)"""
		#for match in re.finditer(r'(?<!\\)\\|\|(?=\+:?-+:?\+)\+\|(?<![^-:])\+', line):
		currentPositions = []
		for match in matchGridTableDelimiter.finditer(line):
		if match.start() > 0: # Skip the first position
		currentPositions.append(match.start())

		printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}')
		printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}')
		return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '\|')) and pos in delimiterPositions
		for pos in currentPositions)

		return False

		separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line.rstrip())]

		if not separatorIndices:
		raise ValueError('No valid separators found in the provided grid table.')
		@@ -285,7 +327,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		dataRows:GridTableRowList = []

		for index in separatorIndices:
		if matchGridTableHeaderSeparator.match(lines[index]):
		if matchGridTableHeaderSeparator.match(lines[index].rstrip()):
		hasHeader = True
		headerSeparatorIndex = index
		parts = re.split(r'\+', lines[index].strip('+'))
		@@ -352,7 +394,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		if not checkDelimiterAlignment(line, delimiterPositions):
		raise ValueError(f'Misaligned delimiters in separator row: {line}')

		parts = re.split(r'\s\+\s', line.strip('+'))
		# Split by delimiters
		parts = []
		matches = list(matchGridTableDelimiter.finditer(line))
		pos = 0
		for match in matches:
		if match.start() > pos:
		parts.append(line[pos+1:match.start()])
		pos = match.start()

		delimiterIndex = 0

		rows.append(GridRow(numberOfColumns))
		@@ -384,7 +434,14 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		if not checkDelimiterAlignment(line, delimiterPositions):
		raise ValueError(f'Misaligned delimiters in partial separator: {line}')

		parts = re.split(r'[\\|\+]', line.strip('\|').strip('+')) # (?<!\\)[\\|\+]
		# Split by delimiters
		parts = []
		matches = list(matchGridTableDelimiter.finditer(line))
		pos = 0
		for match in matches:
		if match.start() > pos:
		parts.append(line[pos+1:match.start()])
		pos = match.start()

		#Add another row, set delimiters for each cell
		rows.append(GridRow(numberOfColumns))
		@@ -446,7 +503,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		raise ValueError(f'More cells than columns found ({len(parts)} {numberOfColumns})')

		else: # Data row
		cellsContent = re.split(r'\\|', line.strip('\|'))
		#cellsContent = re.split(r'\\|', line.strip('\|'))
		#Split by delimiters
		cellsContent = []
		matches = list(matchGridTableDelimiter.finditer(line))
		pos = 0
		for match in matches:
		if match.start() > pos:
		cellsContent.append(line[pos+1:match.start()])
		pos = match.start()

		# Add delimiter alignment check
		if not checkDelimiterAlignment(line, delimiterPositions):

generateSpecWebSite/regexMatches.py

+5 −5

Original line number	Diff line number	Diff line
		@@ -22,11 +22,11 @@ markdownLink = re.compile(r'[^!]\[[^\]]\]$(#[^)])$', re.IGNORECASE)
		matchCodefenceStart = re.compile(r'\s```\s?.', re.IGNORECASE)
		matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
		matchGridTable = re.compile(r'^\s\+-.\+\s$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'.\+([:-]+\+)+.$', re.IGNORECASE)
		matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
		matchGridTableHeaderSeparator = re.compile(r'.\+([=:]+\+)+.$', re.IGNORECASE)
		matchGridTableSeparator = re.compile(r'\s\+([-:=]+\+)+\s$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'.\+([:-]+\+)+.$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'^[+\|].(\+[:-]+\+($\|.[+\|]$))', re.IGNORECASE)
		matchGridTableBodySeparatorLine = re.compile(r'^:?-+:?$', re.IGNORECASE)
		matchGridTableHeaderSeparator = re.compile(r'^\+([=:]+\+)+$', re.IGNORECASE)
		matchGridTableSeparator = re.compile(r'^\+([-:=]+\+)+$', re.IGNORECASE)
		matchGridTableDelimiter = re.compile(r'(?<!\\)\\|\|(?=\+:?[-=]+:?\+)\+\|(?<![^-=:])\+', re.IGNORECASE)
		matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
		matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d(\.\d+)\b', re.IGNORECASE)
		matchListInContent = re.compile(r'^(?:\s(P<marker>[-+]\|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE)

Original line number	Diff line number	Diff line
		@@ -22,11 +22,11 @@ markdownLink = re.compile(r'[^!]\[[^\]]\]\((#[^)])\)', re.IGNORECASE)
		matchCodefenceStart = re.compile(r'\s```\s?.', re.IGNORECASE)
		matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
		matchGridTable = re.compile(r'^\s\+-.\+\s$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'.\+([:-]+\+)+.$', re.IGNORECASE)
		matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
		matchGridTableHeaderSeparator = re.compile(r'.\+([=:]+\+)+.$', re.IGNORECASE)
		matchGridTableSeparator = re.compile(r'\s\+([-:=]+\+)+\s$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'.\+([:-]+\+)+.$', re.IGNORECASE)
		matchGridTableBodySeparator = re.compile(r'^[+\|].(\+[:-]+\+($\|.[+\|]$))', re.IGNORECASE)
		matchGridTableBodySeparatorLine = re.compile(r'^:?-+:?$', re.IGNORECASE)
		matchGridTableHeaderSeparator = re.compile(r'^\+([=:]+\+)+$', re.IGNORECASE)
		matchGridTableSeparator = re.compile(r'^\+([-:=]+\+)+$', re.IGNORECASE)
		matchGridTableDelimiter = re.compile(r'(?<!\\)\\|\|(?=\+:?[-=]+:?\+)\+\|(?<![^-=:])\+', re.IGNORECASE)
		matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
		matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d(\.\d+)\b', re.IGNORECASE)
		matchListInContent = re.compile(r'^(?:\s(P<marker>[-+]\|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE)