Support for grid tables and equations on mkdocs (bc780760) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateChangemarks/.gitlab-ci.yml

+2 −2

Original line number	Diff line number	Diff line
		@@ -197,12 +197,12 @@ pages:
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css
		- mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/
		- \|
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=master" >> mkdocs.yml
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=gridtables" >> mkdocs.yml
		- \|
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md
		- mkdir -p docs/download && mv indexDownload.md docs/download/index.md
		- \|
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=master" >> toMkdocs.py
		curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=gridtables" >> toMkdocs.py
		- \|
		export SPEC_NAME=$(ls \| grep -E "(TS\|TR\|WI).*\.md" \| cut -d'.' -f1)
		- \|

toMkdocs/mkdocs.yml

+6 −0

Original line number	Diff line number	Diff line
		@@ -60,6 +60,8 @@ markdown_extensions:
		pygments_lang_class: true
		- pymdownx.inlinehilite
		- pymdownx.snippets
		- pymdownx.arithmatex:
		generic: true
		- pymdownx.superfences:
		custom_fences:
		- name: mermaid
		@@ -69,6 +71,10 @@ markdown_extensions:
		alternate_style: true
		- tables

		extra_javascript:
		- javascripts/mathjax.js
		- https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js

		##############################################################################

		extra:

toMkdocs/toMkdocs.py

+342 −4

Original line number	Diff line number	Diff line
		@@ -11,6 +11,7 @@ from enum import Enum, auto
		import argparse, re, os, shutil, hashlib, base64
		from dataclasses import dataclass
		from rich import print
		from html import escape

		verbose = False
		veryVerbose = False
		@@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s>\s', re.IGNORECASE)
		_matchStandAloneImage = re.compile(r'^\s!\[[^\]]\]$([^)])$\s', re.IGNORECASE)
		_matchTable = re.compile(r'^\s\\|.\\|\s$', re.IGNORECASE)
		_matchTableSeparator = re.compile(r'^\s\\|([-: ]+\\|)+\s$', re.IGNORECASE)
		_matchGridTable = re.compile(r'^\s\+-.\+\s$', re.IGNORECASE)
		_matchGridTableBodySeparator = re.compile(r'.\+([-:]+\+)+.$', re.IGNORECASE)
		_matchGridTableHeaderSeparator = re.compile(r'.\+([=:]+\+)+.$', re.IGNORECASE)
		_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
		_markdownLink = re.compile(r'[^!]\[[^\]]\]$(#[^)])$', re.IGNORECASE)
		_htmlLink = re.compile(r'<a\s+href="([^"\'])">[^<]</a>', re.IGNORECASE)
		@@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str:
		).digest()
		).decode()[:length]

		def parse_pandoc_table_with_spans(pandoc_table):
		"""
		Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.

		:param pandoc_table: String of the Pandoc-style grid table.
		:return: List of lists representing the table with metadata for spans.
		"""
		# Split the input into lines
		lines = [line.strip() for line in pandoc_table.strip().split("\n")]

		# Detect separator lines by pattern (it does not take into account partial separators
		def is_separator(line):
		_matchGridTableSeparator = re.compile(r'\s\+([-:=]+\+)+\s$', re.IGNORECASE)
		return _matchGridTableSeparator.match(line)

		_matchGridTableSeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
		separator_indices = [i for i, line in enumerate(lines) if is_separator(line)]

		print(separator_indices)
		if not separator_indices:
		raise ValueError("No valid separators found in the provided Pandoc table.")

		# Calculate max number of columns
		delimiter_positions = []
		number_of_columns = 0
		for separator_index in separator_indices:
		if lines[separator_index].count("+") - 1 > number_of_columns:
		number_of_columns = lines[separator_index].count("+") - 1
		delimiter_positions = []
		for j in range(number_of_columns):
		delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0
		del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]]
		delimiter_positions.append(min(del_positions) if del_positions else -1)
		has_header = False
		for index in separator_indices:
		if _matchGridTableHeaderSeparator.match(lines[index]):
		has_header = True
		header_separator_index = index
		header_rows = []
		data_rows = []
		for row in range(len(separator_indices) - 1):
		table_row = []
		auxiliar_row = []
		use_auxiliar_row = []
		has_merged_cells = False
		in_data_row = False
		start, end = separator_indices[row], separator_indices[row + 1]
		row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row
		if row_lines:
		# Combine multiline content into single strings for each cell
		for line in row_lines:
		if is_separator(line) and not in_data_row:
		number_of_columns_row = line.count("+") - 1
		in_data_row = True
		parts = re.split(r"\s\+\s", line.strip("+"))
		# Add as many cells as columns with span attributes
		delimiter_index = 0
		for i in range(number_of_columns_row):
		delimiter_index += len(parts[i]) + 1
		table_row.append({
		"content": "NOCONTENT",
		"rowspan": 0,
		"colspan": 0,
		"colspan_adjusted": False,
		"position": delimiter_index # Position of cell delimiter +
		})
		for i in range(number_of_columns):
		auxiliar_row.append({
		"content": "NOCONTENT",
		"rowspan": 0,
		"colspan": 0,
		"colspan_adjusted": False,
		"position": 0
		})
		use_auxiliar_row.append(False)

		elif in_data_row:
		# Regular data row or partial separator
		if _matchGridTableBodySeparator.match(line): # Partial separator
		has_merged_cells = True
		cells = re.split(r"\s[\\|\+]\s", line.strip("\|").strip("+")) # (?<!\\)[\\|\+]
		if len(cells) < number_of_columns: # Colspan: Positions of \| with respect to + need to be determined
		for i in range(len(cells)):
		if _matchGridTableSeparatorLine.match(cells[i]): # A new row is to be added
		use_auxiliar_row[i] = True
		else:
		if table_row[i]['content'] == "NOCONTENT":
		table_row[i]['rowspan'] += 1
		table_row[i]['colspan'] += 1
		table_row[i]['content'] = cells[i]
		else:
		table_row[i]['content'] += cells[i]
		# Cell which is not separator
		table_row[i]['rowspan'] += 1
		if not table_row[i]['colspan_adjusted']:
		table_row[i]['colspan_adjusted'] = True
		for j in range(i, len(cells)):
		delimiter_start = table_row[j-1]['position'] if j != 0 else 0
		positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "\|+" if delimiter in line[delimiter_start + 1:]]
		position = min(positions) if positions else -1
		if position > delimiter_positions_start[j]: # Colspan to add
		table_row[i]['colspan'] += 1
		elif position < delimiter_positions_start[j]:
		raise ValueError("Wrong cell formatting")
		else:
		break
		elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
		for i in range(len(cells)):
		if _matchGridTableSeparatorLine.match(cells[i]): # Update cell in new row
		use_auxiliar_row[i] = True
		else:
		if table_row[i]['content'] == "NOCONTENT":
		table_row[i]['rowspan'] += 1
		table_row[i]['colspan'] += 1
		table_row[i]['content'] = cells[i]
		else:
		table_row[i]['content'] += cells[i]
		# Cell which is not separator
		table_row[i]['rowspan'] += 1
		# Not needed, no colspan as number of cells is equal to number of columns
		#for j in range(i, len(cells)):
		# delimiter_start = table_row[j-1]['position'] if j != 0 else 0
		# positions = [line.find(delimiter,delimiter_start+1) for delimiter in "\|+" if delimiter in line[delimiter_start+1:]]
		# position = min(positions) if positions else -1
		# if position > table_row[i]['position']: # Only colspan to be increased
		# table_row[i]['colspan'] += 1
		# elif position + 1 < table_row[i]['position']:
		# raise ValueError("Wrong cell formatting")
		# else:
		# break

		else:
		raise ValueError("More cells than columns found")
		else: # Data row
		cells = re.split(r"\s\\|\s", line.strip("\|"))
		if len(cells) < number_of_columns: # Colspan: Positions of \| with respect to + need to be determined
		for i in range(len(cells)):
		if table_row[i]['content'] == "NOCONTENT":
		table_row[i]['rowspan'] += 1
		table_row[i]['colspan'] += 1
		table_row[i]['content'] = cells[i]
		else:
		table_row[i]['content'] += cells[i]
		if not table_row[i]['colspan_adjusted']:
		table_row[i]['colspan_adjusted'] = True
		for j in range(i, len(cells)):
		delimiter_start = table_row[j-1]['position'] if j != 0 else 0
		if line.find("\|", delimiter_start+1) > delimiter_positions[j]: # Colspan to be increased
		table_row[i]['colspan'] += 1
		elif line.find("\|", delimiter_start+1) < delimiter_positions[j]:
		raise ValueError("Wrong cell formatting")
		else:

		break

		elif len(cells) == number_of_columns: # Simple row
		for i in range(len(cells)):
		if use_auxiliar_row[i]:
		if auxiliar_row[i]['content'] == "NOCONTENT":
		auxiliar_row[i]['rowspan'] += 1
		auxiliar_row[i]['colspan'] += 1
		auxiliar_row[i]['content'] = cells[i]
		else:
		auxiliar_row[i]['content'] += cells[i]
		else:
		if table_row[i]['content'] == "NOCONTENT":
		table_row[i]['rowspan'] += 1
		table_row[i]['colspan'] += 1
		table_row[i]['content'] = cells[i]
		else:
		table_row[i]['content'] += cells[i]
		else:
		raise ValueError("More cells than columns found")
		else:
		raise ValueError("No separator line found for row starting")

		if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
		data_rows.append(table_row)
		if has_merged_cells:
		data_rows.append(auxiliar_row)
		elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows
		header_rows.append(table_row)
		if has_merged_cells:
		header_rows.append(auxiliar_row)

		#print(header_rows)
		#print(data_rows)
		# Correct newlines characters
		for row in header_rows:
		for cell in row:
		cell['content'] = cell['content'].replace("\\", "<br>")
		for row in data_rows:
		for cell in row:
		cell['content'] = cell['content'].replace("\\", "<br>")
		# Check if there are any data rows
		if not data_rows and not header_rows:
		raise ValueError("No valid rows found in the provided Pandoc table.")

		# Format text
		bold = "<strong>"
		for row in header_rows:
		for cell in row:
		while cell['content'].find("**") != -1:
		cell['content'] = cell['content'].replace("**", bold, 1)
		if bold == "<strong>":
		bold = "</strong>"
		else:
		bold = "<strong>"
		bold = "<strong>"
		for row in data_rows:
		for cell in row:
		while cell['content'].find("**") != -1:
		cell['content'] = cell['content'].replace("**", bold, 1)
		if bold == "<strong>":
		bold = "</strong>"
		else:
		bold = "<strong>"

		# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows

		forward_rowspan = []
		for row_index in range(len(header_rows)):
		if len(forward_rowspan) == 0:
		forward_rowspan = [0 for _ in range(len(header_rows[row_index]))]
		sum = 0
		for cell_index in range(len(header_rows[row_index])):
		sum += header_rows[row_index][cell_index]['colspan']
		if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0:
		if forward_rowspan[cell_index] > 0:
		sum += 1
		forward_rowspan[cell_index] -= 1
		if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1:
		forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1
		if not sum == number_of_columns:
		raise ValueError("Grid table not converted properly")
		forward_rowspan = []
		for row_index in range(len(data_rows)):
		if len(forward_rowspan) == 0:
		forward_rowspan = [0 for _ in range(len(data_rows[row_index]))]
		sum = 0
		for cell_index in range(len(data_rows[row_index])):
		sum += data_rows[row_index][cell_index]['colspan']
		if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0:
		if forward_rowspan[cell_index] > 0:
		sum += 1
		forward_rowspan[cell_index] -= 1
		if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1:
		forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1
		if not sum == number_of_columns:
		raise ValueError("Grid table not converted properly")
		#if has_header:
		# table_with_spans = header_rows

		#table_with_spans += data_rows

		#return table_with_spans
		return header_rows, data_rows

		def generate_html_table_with_spans(pandoc_table):
		"""
		Generate an HTML table from a Pandoc-style grid table with row and column spans.

		:param pandoc_table: String of the Pandoc-style grid table.
		:return: HTML string.
		"""
		grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)

		html = "<table>\n"
		has_header = False

		for row in grid_header:
		for cell in row:
		if cell['rowspan'] != 0 and cell['colspan'] != 0:
		has_header = True
		if has_header:
		html += " <thead>\n"
		for row in grid_header:
		html += " <tr>\n"
		for cell in row:
		if cell['rowspan'] == 0 or cell['colspan'] == 0:
		continue
		else:
		rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else ""
		colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else ""
		html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n"
		html += " </tr>\n"
		html += " </thead>\n"

		html += " <tbody>\n"
		for row in grid_body:
		html += " <tr>\n"
		for cell in row:
		if cell['rowspan'] == 0 or cell['colspan'] == 0:
		continue
		else:
		rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else ""
		colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else ""
		html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n"
		html += " </tr>\n"

		html += " </tbody>\n"
		html += "</table>"
		return html

		def analyseMarkdown(filename:str) -> Document:
		""" Analyse the markdown file and split it into clauses.
		@@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document:
		inCodefence = False
		inTable = False
		tableHasSeparator = False
		inGridTable = False
		gridTableHasSeparator = False
		gridTable = ""
		for line in inLines:

		# Detect and handle codefences
		@@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document:
		continue

		# Detect and handle tables
		if _matchTable.match(line) and not inTable:
		if _matchTable.match(line) and not inTable and not inGridTable:
		inTable = True
		outClauses[-1].append(Line(line, LineType.TABLEHEADER))
		continue
		@@ -512,6 +822,34 @@ def analyseMarkdown(filename:str) -> Document:
		outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
		# continue with other matches

		#Detect grid tables and convert them to html table
		if _matchGridTable.match(line) and not inGridTable:
		inGridTable = True
		#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
		gridTable += line
		continue
		if inGridTable:
		if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line):
		#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
		gridTable += line
		continue
		elif _matchTable.match(line):
		#outClauses[-1].append(Line(line, LineType.TABLEROW))
		gridTable += line
		continue
		else:
		inGridTable = False
		# Mark the previous line as the last row in the table
		#outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
		print(gridTable)
		htmltable = ""
		htmltable = generate_html_table_with_spans(gridTable)
		print(htmltable)
		for row in htmltable:
		outClauses[-1].append(Line(row, LineType.TABLEROW))
		gridTable = ""
		# continue with other matches

		# Detect notes
		# Notes are lines that start with a '>'.
		if _matchNote.match(line):

Admin message