Some cleanup for handling of grid tables (bb284002) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

toMkdocs/toMkdocs.py

+103 −113

Original line number	Diff line number	Diff line
		@@ -7,6 +7,8 @@
		# directory structure.
		#
		from __future__ import annotations

		import logging
		from enum import Enum, auto
		import argparse, re, os, shutil, hashlib, base64
		from dataclasses import dataclass
		@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table):
		self.list_flag = False
		self.auxiliar_index = None

		def set_alignment(self):
		header_delimiter_index = 0
		while header_delimiter_index in range(len(default_alignments)) and self.position > header_delimiter_positions[header_delimiter_index]:
		header_delimiter_index += 1
		if header_delimiter_index in range(len(default_alignments)):
		if self.position < header_delimiter_positions[header_delimiter_index]:
		self.alignment = default_alignments[header_delimiter_index]
		elif self.position == header_delimiter_positions[header_delimiter_index]:
		self.alignment = default_alignments[header_delimiter_index]
		header_delimiter_index += 1
		else:
		raise ValueError("Invalid table formatting")

		class Row():
		""" Represents a row in the markdown file. """
		cells:list[Cell] = []
		@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table):
		def __init__(self, length: int = 1) -> None:
		self.cells = [Cell() for _ in range(length)]

		def __getitem__(self, item):
		return self.cells[item]

		def __setitem__(self, key, value):
		self.cells[key] = value

		# Detect separator lines by pattern (it does not take into account partial separators
		def is_separator(line):
		return _matchGridTableSeparator.match(line)
		@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
		has_header = True
		header_separator_index = index
		header_rows = []
		parts = re.split(r"\s\+\s", lines[index].strip("+"))
		parts = re.split(r"\+", lines[index].strip("+"))
		default_alignments = []
		#Calculate default alignments and positions of delimiters
		for part_index in range(len(parts)):
		@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
		for row in range(len(separator_indices) - 1):
		table_row = []
		auxiliar_rows = []
		auxiliar_row = []
		use_auxiliar_row = []
		list_flags = []
		has_merged_cells = False
		in_data_row = False
		start, end = separator_indices[row], separator_indices[row + 1]
		@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table):
		table_row = Row(number_of_columns_row)
		for i in range(number_of_columns_row):
		delimiter_index += len(parts[i]) + 1
		table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
		table_row.cells[i].position = delimiter_index # Position of cell delimiter +
		table_row[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
		table_row[i].position = delimiter_index # Position of cell delimiter +

		#Set alignment as defined by header separator line
		while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]:
		header_delimiter_index += 1
		if header_delimiter_index in range(len(default_alignments)):
		if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]:
		table_row.cells[i].alignment = default_alignments[header_delimiter_index]
		elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]:
		table_row.cells[i].alignment = default_alignments[i]
		header_delimiter_index += 1
		else:
		raise ValueError("Invalid table formatting")

		#auxiliar_row = Row(number_of_columns)
		#for i in range(number_of_columns):
		#auxiliar_row.append(default_cell)
		#use_auxiliar_row.append(False)
		#auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags})
		table_row[i].set_alignment()

		elif in_data_row:
		# Regular data row or partial separator
		if _matchGridTableBodySeparator.match(line): # Partial separator
		has_merged_cells = True
		cells = re.split(r"[\\|\+]", line.strip("\|").strip("+")) # (?<!\\)[\\|\+]
		#Add auxiliar line, set delimiters for each cell
		auxiliar_rows.append(Row(number_of_columns))
		aux_delimiter_index = 0
		for i in range(number_of_columns_row):
		aux_delimiter_index += len(parts[i]) + 1
		auxiliar_rows[-1].cells[i].position = aux_delimiter_index # Position of cell delimiter +
		for auxiliar_cell_index in range(number_of_columns):
		aux_delimiter_index += len(cells[auxiliar_cell_index]) + 1
		auxiliar_rows[-1][auxiliar_cell_index].position = aux_delimiter_index # Position of cell delimiter +
		auxiliar_rows[-1][i].set_alignment()

		cells = re.split(r"\s[\\|\+]\s", line.strip("\|").strip("+")) # (?<!\\)[\\|\+]
		if len(cells) <= number_of_columns: # Colspan: Positions of \| with respect to + need to be determined
		for i in range(len(cells)):
		if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added
		#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
		auxiliar_rows[-1].cells[i].list_flag = False
		table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1
		auxiliar_rows[-1][i].list_flag = False
		table_row[i].auxiliar_index = len(auxiliar_rows)-1
		#if cells[i].startswith(":") and not cells[i].endswith(":"):
		# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
		#elif not cells[i].startswith(":") and cells[i].endswith(":"):
		@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table):
		# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
		else:
		# Handle content of the cell
		if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i])
		if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted:
		auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True
		if table_row[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
		if not auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted:
		auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted = True
		# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
		auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions)
		auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions)
		else:
		table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
		table_row[i] = handling_content(table_row[i], cells[i])
		# Cell which is not separator
		table_row.cells[i].rowspan += 1
		table_row[i].rowspan += 1
		if not table_row.cells[i].colspan_adjusted:
		table_row.cells[i].colspan_adjusted = True
		table_row[i].colspan_adjusted = True
		#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
		table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
		#elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
		# for i in range(len(cells)):
		# if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row
		# use_auxiliar_row[i] = True
		# list_flags[i] = False
		# if cells[i].startswith(":") and not cells[i].endswith(":"):
		# auxiliar_row[i]['alignment'] = "align=\"left\""
		# elif not cells[i].startswith(":") and cells[i].endswith(":"):
		# auxiliar_row[i]['alignment'] = "align=\"right\""
		# else:
		# auxiliar_row[i]['alignment'] = "align=\"center\""
		# else:
		# #Handle content of the cell
		# list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i])
		# # Cell which is not separator
		# table_row[i]['rowspan'] += 1
		# # Adjusting of colspan not needed, no colspan as number of cells is equal to number of columns
		table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions)
		else:
		raise ValueError("More cells than columns found")
		else: # Data row
		@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table):
		if len(cells) < number_of_columns: # Colspan: Positions of \| with respect to + need to be determined
		for i in range(len(cells)):
		# Handle content of the cell
		if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i])
		if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
		if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted:
		auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True
		#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
		auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions)
		auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions)
		else:
		table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
		table_row[i] = handling_content(table_row[i], cells[i])
		if not table_row.cells[i].colspan_adjusted:
		table_row.cells[i].colspan_adjusted = True
		table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
		table_row[i].colspan_adjusted = True
		table_row[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
		elif len(cells) == number_of_columns: # Simple row
		for i in range(len(cells)):
		if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i])
		if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
		auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
		else:
		# Handle content of the cell
		table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
		table_row[i] = handling_content(table_row[i], cells[i])
		else:
		raise ValueError("More cells than columns found")
		else:
		raise ValueError("No separator line found for row starting")


		if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
		data_rows.append(table_row.cells)
		if has_merged_cells:
		@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
		for cell in row:
		if cell.content is not None:
		# Replacing "<" by <
		cell.content = cell.content.replace("<", "<")
		#cell.content = cell.content.replace("<", "<")

		#Bold
		for bold_characters in ["**", "__"]:
		@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table):
		:param pandoc_table: String of the Pandoc-style grid table.
		:return: HTML string.
		"""
		try:
		grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)

		except:
		logging.ERROR("Grid table could not be generated")
		return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
		else:
		html = "<table>\n"
		has_header = False