Loading generateChangemarks/.gitlab-ci.yml +2 −2 Original line number Diff line number Diff line Loading @@ -197,12 +197,12 @@ pages: curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css - mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/ - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=master" >> mkdocs.yml curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=gridtables" >> mkdocs.yml - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md - mkdir -p docs/download && mv indexDownload.md docs/download/index.md - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=master" >> toMkdocs.py curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=gridtables" >> toMkdocs.py - | export SPEC_NAME=$(ls | grep -E "(TS|TR|WI).*\.md" | cut -d'.' -f1) - | Loading toMkdocs/mkdocs.yml +6 −0 Original line number Diff line number Diff line Loading @@ -60,6 +60,8 @@ markdown_extensions: pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.arithmatex: generic: true - pymdownx.superfences: custom_fences: - name: mermaid Loading @@ -69,6 +71,10 @@ markdown_extensions: alternate_style: true - tables extra_javascript: - javascripts/mathjax.js - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js ############################################################################## extra: Loading toMkdocs/toMkdocs.py +342 −4 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ from enum import Enum, auto import argparse, re, os, shutil, hashlib, base64 from dataclasses import dataclass from rich import print from html import escape verbose = False veryVerbose = False Loading Loading @@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE) _matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) _matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) _matchGridTableBodySeparator = re.compile(r'.*\+([-:]+\+)+.*$', re.IGNORECASE) _matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) Loading Loading @@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str: ).digest() ).decode()[:length] def parse_pandoc_table_with_spans(pandoc_table): """ Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan. :param pandoc_table: String of the Pandoc-style grid table. :return: List of lists representing the table with metadata for spans. """ # Split the input into lines lines = [line.strip() for line in pandoc_table.strip().split("\n")] # Detect separator lines by pattern (it does not take into account partial separators def is_separator(line): _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) return _matchGridTableSeparator.match(line) _matchGridTableSeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) separator_indices = [i for i, line in enumerate(lines) if is_separator(line)] print(separator_indices) if not separator_indices: raise ValueError("No valid separators found in the provided Pandoc table.") # Calculate max number of columns delimiter_positions = [] number_of_columns = 0 for separator_index in separator_indices: if lines[separator_index].count("+") - 1 > number_of_columns: number_of_columns = lines[separator_index].count("+") - 1 delimiter_positions = [] for j in range(number_of_columns): delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0 del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]] delimiter_positions.append(min(del_positions) if del_positions else -1) has_header = False for index in separator_indices: if _matchGridTableHeaderSeparator.match(lines[index]): has_header = True header_separator_index = index header_rows = [] data_rows = [] for row in range(len(separator_indices) - 1): table_row = [] auxiliar_row = [] use_auxiliar_row = [] has_merged_cells = False in_data_row = False start, end = separator_indices[row], separator_indices[row + 1] row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row if row_lines: # Combine multiline content into single strings for each cell for line in row_lines: if is_separator(line) and not in_data_row: number_of_columns_row = line.count("+") - 1 in_data_row = True parts = re.split(r"\s*\+\s*", line.strip("+")) # Add as many cells as columns with span attributes delimiter_index = 0 for i in range(number_of_columns_row): delimiter_index += len(parts[i]) + 1 table_row.append({ "content": "NOCONTENT", "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "position": delimiter_index # Position of cell delimiter + }) for i in range(number_of_columns): auxiliar_row.append({ "content": "NOCONTENT", "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "position": 0 }) use_auxiliar_row.append(False) elif in_data_row: # Regular data row or partial separator if _matchGridTableBodySeparator.match(line): # Partial separator has_merged_cells = True cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if _matchGridTableSeparatorLine.match(cells[i]): # A new row is to be added use_auxiliar_row[i] = True else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] # Cell which is not separator table_row[i]['rowspan'] += 1 if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True for j in range(i, len(cells)): delimiter_start = table_row[j-1]['position'] if j != 0 else 0 positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]] position = min(positions) if positions else -1 if position > delimiter_positions_start[j]: # Colspan to add table_row[i]['colspan'] += 1 elif position < delimiter_positions_start[j]: raise ValueError("Wrong cell formatting") else: break elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added for i in range(len(cells)): if _matchGridTableSeparatorLine.match(cells[i]): # Update cell in new row use_auxiliar_row[i] = True else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] # Cell which is not separator table_row[i]['rowspan'] += 1 # Not needed, no colspan as number of cells is equal to number of columns #for j in range(i, len(cells)): # delimiter_start = table_row[j-1]['position'] if j != 0 else 0 # positions = [line.find(delimiter,delimiter_start+1) for delimiter in "|+" if delimiter in line[delimiter_start+1:]] # position = min(positions) if positions else -1 # if position > table_row[i]['position']: # Only colspan to be increased # table_row[i]['colspan'] += 1 # elif position + 1 < table_row[i]['position']: # raise ValueError("Wrong cell formatting") # else: # break else: raise ValueError("More cells than columns found") else: # Data row cells = re.split(r"\s*\|\s*", line.strip("|")) if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True for j in range(i, len(cells)): delimiter_start = table_row[j-1]['position'] if j != 0 else 0 if line.find("|", delimiter_start+1) > delimiter_positions[j]: # Colspan to be increased table_row[i]['colspan'] += 1 elif line.find("|", delimiter_start+1) < delimiter_positions[j]: raise ValueError("Wrong cell formatting") else: break elif len(cells) == number_of_columns: # Simple row for i in range(len(cells)): if use_auxiliar_row[i]: if auxiliar_row[i]['content'] == "NOCONTENT": auxiliar_row[i]['rowspan'] += 1 auxiliar_row[i]['colspan'] += 1 auxiliar_row[i]['content'] = cells[i] else: auxiliar_row[i]['content'] += cells[i] else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] else: raise ValueError("More cells than columns found") else: raise ValueError("No separator line found for row starting") if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows data_rows.append(table_row) if has_merged_cells: data_rows.append(auxiliar_row) elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows header_rows.append(table_row) if has_merged_cells: header_rows.append(auxiliar_row) #print(header_rows) #print(data_rows) # Correct newlines characters for row in header_rows: for cell in row: cell['content'] = cell['content'].replace("\\", "<br>") for row in data_rows: for cell in row: cell['content'] = cell['content'].replace("\\", "<br>") # Check if there are any data rows if not data_rows and not header_rows: raise ValueError("No valid rows found in the provided Pandoc table.") # Format text bold = "<strong>" for row in header_rows: for cell in row: while cell['content'].find("**") != -1: cell['content'] = cell['content'].replace("**", bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" bold = "<strong>" for row in data_rows: for cell in row: while cell['content'].find("**") != -1: cell['content'] = cell['content'].replace("**", bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" # Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows forward_rowspan = [] for row_index in range(len(header_rows)): if len(forward_rowspan) == 0: forward_rowspan = [0 for _ in range(len(header_rows[row_index]))] sum = 0 for cell_index in range(len(header_rows[row_index])): sum += header_rows[row_index][cell_index]['colspan'] if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") forward_rowspan = [] for row_index in range(len(data_rows)): if len(forward_rowspan) == 0: forward_rowspan = [0 for _ in range(len(data_rows[row_index]))] sum = 0 for cell_index in range(len(data_rows[row_index])): sum += data_rows[row_index][cell_index]['colspan'] if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") #if has_header: # table_with_spans = header_rows #table_with_spans += data_rows #return table_with_spans return header_rows, data_rows def generate_html_table_with_spans(pandoc_table): """ Generate an HTML table from a Pandoc-style grid table with row and column spans. :param pandoc_table: String of the Pandoc-style grid table. :return: HTML string. """ grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) html = "<table>\n" has_header = False for row in grid_header: for cell in row: if cell['rowspan'] != 0 and cell['colspan'] != 0: has_header = True if has_header: html += " <thead>\n" for row in grid_header: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: continue else: rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" html += " </tr>\n" html += " </thead>\n" html += " <tbody>\n" for row in grid_body: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: continue else: rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" html += " </tr>\n" html += " </tbody>\n" html += "</table>" return html def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Loading @@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document: inCodefence = False inTable = False tableHasSeparator = False inGridTable = False gridTableHasSeparator = False gridTable = "" for line in inLines: # Detect and handle codefences Loading @@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document: continue # Detect and handle tables if _matchTable.match(line) and not inTable: if _matchTable.match(line) and not inTable and not inGridTable: inTable = True outClauses[-1].append(Line(line, LineType.TABLEHEADER)) continue Loading @@ -512,6 +822,34 @@ def analyseMarkdown(filename:str) -> Document: outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW # continue with other matches #Detect grid tables and convert them to html table if _matchGridTable.match(line) and not inGridTable: inGridTable = True #outClauses[-1].append(Line(line, LineType.TABLEHEADER)) gridTable += line continue if inGridTable: if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line): #outClauses[-1].append(Line(line, LineType.TABLESEPARATOR)) gridTable += line continue elif _matchTable.match(line): #outClauses[-1].append(Line(line, LineType.TABLEROW)) gridTable += line continue else: inGridTable = False # Mark the previous line as the last row in the table #outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW print(gridTable) htmltable = "" htmltable = generate_html_table_with_spans(gridTable) print(htmltable) for row in htmltable: outClauses[-1].append(Line(row, LineType.TABLEROW)) gridTable = "" # continue with other matches # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): Loading Loading
generateChangemarks/.gitlab-ci.yml +2 −2 Original line number Diff line number Diff line Loading @@ -197,12 +197,12 @@ pages: curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css - mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/ - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=master" >> mkdocs.yml curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=gridtables" >> mkdocs.yml - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md - mkdir -p docs/download && mv indexDownload.md docs/download/index.md - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=master" >> toMkdocs.py curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=gridtables" >> toMkdocs.py - | export SPEC_NAME=$(ls | grep -E "(TS|TR|WI).*\.md" | cut -d'.' -f1) - | Loading
toMkdocs/mkdocs.yml +6 −0 Original line number Diff line number Diff line Loading @@ -60,6 +60,8 @@ markdown_extensions: pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.arithmatex: generic: true - pymdownx.superfences: custom_fences: - name: mermaid Loading @@ -69,6 +71,10 @@ markdown_extensions: alternate_style: true - tables extra_javascript: - javascripts/mathjax.js - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js ############################################################################## extra: Loading
toMkdocs/toMkdocs.py +342 −4 Original line number Diff line number Diff line Loading @@ -11,6 +11,7 @@ from enum import Enum, auto import argparse, re, os, shutil, hashlib, base64 from dataclasses import dataclass from rich import print from html import escape verbose = False veryVerbose = False Loading Loading @@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE) _matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) _matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) _matchGridTableBodySeparator = re.compile(r'.*\+([-:]+\+)+.*$', re.IGNORECASE) _matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) Loading Loading @@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str: ).digest() ).decode()[:length] def parse_pandoc_table_with_spans(pandoc_table): """ Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan. :param pandoc_table: String of the Pandoc-style grid table. :return: List of lists representing the table with metadata for spans. """ # Split the input into lines lines = [line.strip() for line in pandoc_table.strip().split("\n")] # Detect separator lines by pattern (it does not take into account partial separators def is_separator(line): _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) return _matchGridTableSeparator.match(line) _matchGridTableSeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) separator_indices = [i for i, line in enumerate(lines) if is_separator(line)] print(separator_indices) if not separator_indices: raise ValueError("No valid separators found in the provided Pandoc table.") # Calculate max number of columns delimiter_positions = [] number_of_columns = 0 for separator_index in separator_indices: if lines[separator_index].count("+") - 1 > number_of_columns: number_of_columns = lines[separator_index].count("+") - 1 delimiter_positions = [] for j in range(number_of_columns): delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0 del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]] delimiter_positions.append(min(del_positions) if del_positions else -1) has_header = False for index in separator_indices: if _matchGridTableHeaderSeparator.match(lines[index]): has_header = True header_separator_index = index header_rows = [] data_rows = [] for row in range(len(separator_indices) - 1): table_row = [] auxiliar_row = [] use_auxiliar_row = [] has_merged_cells = False in_data_row = False start, end = separator_indices[row], separator_indices[row + 1] row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row if row_lines: # Combine multiline content into single strings for each cell for line in row_lines: if is_separator(line) and not in_data_row: number_of_columns_row = line.count("+") - 1 in_data_row = True parts = re.split(r"\s*\+\s*", line.strip("+")) # Add as many cells as columns with span attributes delimiter_index = 0 for i in range(number_of_columns_row): delimiter_index += len(parts[i]) + 1 table_row.append({ "content": "NOCONTENT", "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "position": delimiter_index # Position of cell delimiter + }) for i in range(number_of_columns): auxiliar_row.append({ "content": "NOCONTENT", "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "position": 0 }) use_auxiliar_row.append(False) elif in_data_row: # Regular data row or partial separator if _matchGridTableBodySeparator.match(line): # Partial separator has_merged_cells = True cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if _matchGridTableSeparatorLine.match(cells[i]): # A new row is to be added use_auxiliar_row[i] = True else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] # Cell which is not separator table_row[i]['rowspan'] += 1 if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True for j in range(i, len(cells)): delimiter_start = table_row[j-1]['position'] if j != 0 else 0 positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]] position = min(positions) if positions else -1 if position > delimiter_positions_start[j]: # Colspan to add table_row[i]['colspan'] += 1 elif position < delimiter_positions_start[j]: raise ValueError("Wrong cell formatting") else: break elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added for i in range(len(cells)): if _matchGridTableSeparatorLine.match(cells[i]): # Update cell in new row use_auxiliar_row[i] = True else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] # Cell which is not separator table_row[i]['rowspan'] += 1 # Not needed, no colspan as number of cells is equal to number of columns #for j in range(i, len(cells)): # delimiter_start = table_row[j-1]['position'] if j != 0 else 0 # positions = [line.find(delimiter,delimiter_start+1) for delimiter in "|+" if delimiter in line[delimiter_start+1:]] # position = min(positions) if positions else -1 # if position > table_row[i]['position']: # Only colspan to be increased # table_row[i]['colspan'] += 1 # elif position + 1 < table_row[i]['position']: # raise ValueError("Wrong cell formatting") # else: # break else: raise ValueError("More cells than columns found") else: # Data row cells = re.split(r"\s*\|\s*", line.strip("|")) if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True for j in range(i, len(cells)): delimiter_start = table_row[j-1]['position'] if j != 0 else 0 if line.find("|", delimiter_start+1) > delimiter_positions[j]: # Colspan to be increased table_row[i]['colspan'] += 1 elif line.find("|", delimiter_start+1) < delimiter_positions[j]: raise ValueError("Wrong cell formatting") else: break elif len(cells) == number_of_columns: # Simple row for i in range(len(cells)): if use_auxiliar_row[i]: if auxiliar_row[i]['content'] == "NOCONTENT": auxiliar_row[i]['rowspan'] += 1 auxiliar_row[i]['colspan'] += 1 auxiliar_row[i]['content'] = cells[i] else: auxiliar_row[i]['content'] += cells[i] else: if table_row[i]['content'] == "NOCONTENT": table_row[i]['rowspan'] += 1 table_row[i]['colspan'] += 1 table_row[i]['content'] = cells[i] else: table_row[i]['content'] += cells[i] else: raise ValueError("More cells than columns found") else: raise ValueError("No separator line found for row starting") if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows data_rows.append(table_row) if has_merged_cells: data_rows.append(auxiliar_row) elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows header_rows.append(table_row) if has_merged_cells: header_rows.append(auxiliar_row) #print(header_rows) #print(data_rows) # Correct newlines characters for row in header_rows: for cell in row: cell['content'] = cell['content'].replace("\\", "<br>") for row in data_rows: for cell in row: cell['content'] = cell['content'].replace("\\", "<br>") # Check if there are any data rows if not data_rows and not header_rows: raise ValueError("No valid rows found in the provided Pandoc table.") # Format text bold = "<strong>" for row in header_rows: for cell in row: while cell['content'].find("**") != -1: cell['content'] = cell['content'].replace("**", bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" bold = "<strong>" for row in data_rows: for cell in row: while cell['content'].find("**") != -1: cell['content'] = cell['content'].replace("**", bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" # Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows forward_rowspan = [] for row_index in range(len(header_rows)): if len(forward_rowspan) == 0: forward_rowspan = [0 for _ in range(len(header_rows[row_index]))] sum = 0 for cell_index in range(len(header_rows[row_index])): sum += header_rows[row_index][cell_index]['colspan'] if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") forward_rowspan = [] for row_index in range(len(data_rows)): if len(forward_rowspan) == 0: forward_rowspan = [0 for _ in range(len(data_rows[row_index]))] sum = 0 for cell_index in range(len(data_rows[row_index])): sum += data_rows[row_index][cell_index]['colspan'] if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") #if has_header: # table_with_spans = header_rows #table_with_spans += data_rows #return table_with_spans return header_rows, data_rows def generate_html_table_with_spans(pandoc_table): """ Generate an HTML table from a Pandoc-style grid table with row and column spans. :param pandoc_table: String of the Pandoc-style grid table. :return: HTML string. """ grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) html = "<table>\n" has_header = False for row in grid_header: for cell in row: if cell['rowspan'] != 0 and cell['colspan'] != 0: has_header = True if has_header: html += " <thead>\n" for row in grid_header: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: continue else: rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" html += " </tr>\n" html += " </thead>\n" html += " <tbody>\n" for row in grid_body: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: continue else: rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" html += " </tr>\n" html += " </tbody>\n" html += "</table>" return html def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Loading @@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document: inCodefence = False inTable = False tableHasSeparator = False inGridTable = False gridTableHasSeparator = False gridTable = "" for line in inLines: # Detect and handle codefences Loading @@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document: continue # Detect and handle tables if _matchTable.match(line) and not inTable: if _matchTable.match(line) and not inTable and not inGridTable: inTable = True outClauses[-1].append(Line(line, LineType.TABLEHEADER)) continue Loading @@ -512,6 +822,34 @@ def analyseMarkdown(filename:str) -> Document: outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW # continue with other matches #Detect grid tables and convert them to html table if _matchGridTable.match(line) and not inGridTable: inGridTable = True #outClauses[-1].append(Line(line, LineType.TABLEHEADER)) gridTable += line continue if inGridTable: if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line): #outClauses[-1].append(Line(line, LineType.TABLESEPARATOR)) gridTable += line continue elif _matchTable.match(line): #outClauses[-1].append(Line(line, LineType.TABLEROW)) gridTable += line continue else: inGridTable = False # Mark the previous line as the last row in the table #outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW print(gridTable) htmltable = "" htmltable = generate_html_table_with_spans(gridTable) print(htmltable) for row in htmltable: outClauses[-1].append(Line(row, LineType.TABLEROW)) gridTable = "" # continue with other matches # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line): Loading