Loading toMkdocs/toMkdocs.py +164 −117 Original line number Diff line number Diff line Loading @@ -420,6 +420,7 @@ _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECA _matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) _matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) _matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) _matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) _matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) Loading Loading @@ -463,58 +464,85 @@ def parse_pandoc_table_with_spans(pandoc_table): # Split the input into lines lines = [line.strip() for line in pandoc_table.strip().split("\n")] class Cell: """ Represents the document object. """ content: str rowspan: int colspan: int colspan_adjusted: bool alignment: str position: int list_flag: bool auxiliar_index: int def __init__(self): self.content = None self.rowspan = 0 self.colspan = 0 self.colspan_adjusted = False self.alignment = "align=\"center\"" self.position = 0 self.list_flag = False self.auxiliar_index = None class Row(): """ Represents a row in the markdown file. """ cells:list[Cell] = [] def __init__(self, length: int = 1) -> None: self.cells = [Cell() for _ in range(length)] # Detect separator lines by pattern (it does not take into account partial separators def is_separator(line): _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) return _matchGridTableSeparator.match(line) def handling_content(cell, content, list_flag): if cell['content'] is None: cell['rowspan'] += 1 cell['colspan'] += 1 def handling_content(cell, content): if cell.content is None: cell.rowspan += 1 cell.colspan += 1 if content.strip().startswith("- "): # List list_flag = True print(content) cell['content'] = content.strip() + "\n" # Add newline to know when the list element ends elif list_flag: # any other content when handling list is concatenated to the last list element cell['content'] += content.strip() + "\n" elif cells[i].strip() == "": # separation between list and other paragraph list_flag = False cell['content'] = re.sub(r'\\\s*$', "\n", content) cell.list_flag = True #print(content) cell.content = content.strip() + "\n" # Add newline to know when the list element ends elif cell.list_flag and cells[i].strip() != "": # any other content when handling list is concatenated to the last list element cell.content += content.strip() + "\n" elif cells[i].strip == "": # separation between list and other paragraph cell.list_flag = False cell.content += "\n" #if not cell['content'].endswith("\n") else "" else: cell['content'] = re.sub(r'\\\s*$', "\n", content.strip()) cell.content = re.sub(r'\\\s*$', "\n", content.strip()) else: if content.strip().startswith("- "): # List if not list_flag: cell['content'] += "\n" if not cell.list_flag: cell.content += "\n" #cell['content'] = cell['content'].strip("\n") list_flag = True cell['content'] += content.strip() + "\n" # Add newline to know when the list element ends elif list_flag: # any other content when handling list is concatenated to the last list element cell['content'] = cell['content'].strip("\n") cell['content'] += " " + content.strip() + "\n" cell.list_flag = True cell.content += content.strip() + "\n" # Add newline to know when the list element ends elif cell.list_flag and cells[i].strip() != "": # any other content when handling list is concatenated to the last list element cell.content = cell.content.strip("\n") cell.content += " " + content.strip() + "\n" elif cells[i].strip() == "": # separation between list and other paragraph list_flag = False cell.list_flag = False #content = re.sub(r'\\\s*$', "\n", content.strip()) cell['content'] += "\n" if not cell['content'].endswith("\n") else "" cell.content += "\n" if not cell.content.endswith("\n") else "" else: content = re.sub(r'\\\s*$', "\n", content.strip()) cell['content'] += " " + content cell.content += " " + content #print(cell['content']) return list_flag, cell return cell def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions): for j in range(column_index, number_of_parts): delimiter_start = row[j - 1]['position'] if j != 0 else 0 delimiter_start = row[j - 1].position if j != 0 else 0 positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]] position = min(positions) if positions else -1 if position > delimiter_positions[j]: # Colspan to be increased row[i]['colspan'] += 1 row[i].colspan += 1 if position == delimiter_positions[len(delimiter_positions) - 1]: # last cell in row, adjust colspan to get max number columns colspan_allocated = 0 for cell_index in range(number_of_parts): colspan_allocated += row[cell_index]['colspan'] row[column_index]['colspan'] += number_of_columns - colspan_allocated colspan_allocated = row[i].colspan #for cell_index in range(number_of_parts): # colspan_allocated += row[cell_index].colspan row[column_index].colspan += number_of_columns - colspan_allocated - column_index elif position < delimiter_positions[j]: raise ValueError("Wrong cell formatting") else: Loading Loading @@ -563,6 +591,7 @@ def parse_pandoc_table_with_spans(pandoc_table): data_rows = [] for row in range(len(separator_indices) - 1): table_row = [] auxiliar_rows = [] auxiliar_row = [] use_auxiliar_row = [] list_flags = [] Loading Loading @@ -591,65 +620,70 @@ def parse_pandoc_table_with_spans(pandoc_table): # else: # alignments.append("align=\"center\"") header_delimiter_index = 0 table_row = Row(number_of_columns_row) for i in range(number_of_columns_row): delimiter_index += len(parts[i]) + 1 table_row.append({ "content": None, "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "alignment": default_alignments[i] if i == 0 else "align=\"center\"", "position": delimiter_index # Position of cell delimiter + }) table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\"" table_row.cells[i].position = delimiter_index # Position of cell delimiter + #Set alignment as defined by header separator line while header_delimiter_index in range(len(default_alignments)) and table_row[i]['position'] > header_delimiter_positions[header_delimiter_index]: while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]: header_delimiter_index += 1 if header_delimiter_index in range(len(default_alignments)): if table_row[i]['position'] < header_delimiter_positions[header_delimiter_index]: table_row[i]['alignment'] = default_alignments[header_delimiter_index] elif table_row[i]['position'] == header_delimiter_positions[header_delimiter_index]: table_row[i]['alignment'] = default_alignments[i] if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]: table_row.cells[i].alignment = default_alignments[header_delimiter_index] elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]: table_row.cells[i].alignment = default_alignments[i] header_delimiter_index += 1 else: raise ValueError("Invalid table formatting") for i in range(number_of_columns): auxiliar_row.append({ "content": None, "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "alignment": "align=\"center\"", "position": 0 }) use_auxiliar_row.append(False) list_flags.append(False) #auxiliar_row = Row(number_of_columns) #for i in range(number_of_columns): #auxiliar_row.append(default_cell) #use_auxiliar_row.append(False) #auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags}) elif in_data_row: # Regular data row or partial separator if _matchGridTableBodySeparator.match(line): # Partial separator has_merged_cells = True #Add auxiliar line, set delimiters for each cell auxiliar_rows.append(Row(number_of_columns)) aux_delimiter_index = 0 for i in range(number_of_columns_row): aux_delimiter_index += len(parts[i]) + 1 auxiliar_rows[-1].cells[i].position = aux_delimiter_index # Position of cell delimiter + cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added use_auxiliar_row[i] = True list_flags[i] = False if cells[i].startswith(":") and not cells[i].endswith(":"): auxiliar_row[i]['alignment'] = "align=\"left\"" elif not cells[i].startswith(":") and cells[i].endswith(":"): auxiliar_row[i]['alignment'] = "align=\"right\"" else: auxiliar_row[i]['alignment'] = "align=\"center\"" #auxiliar_rows[-1]['use_auxiliar_row'][i] = True auxiliar_rows[-1].cells[i].list_flag = False table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1 #if cells[i].startswith(":") and not cells[i].endswith(":"): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\"" #elif not cells[i].startswith(":") and cells[i].endswith(":"): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"right\"" #else: # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\"" else: # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i]) if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i]) if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted: auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True # TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions) else: table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) # Cell which is not separator table_row[i]['rowspan'] += 1 if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True table_row.cells[i].rowspan += 1 if not table_row.cells[i].colspan_adjusted: table_row.cells[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions) table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) #elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added # for i in range(len(cells)): # if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row Loading @@ -674,30 +708,42 @@ def parse_pandoc_table_with_spans(pandoc_table): if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i], list_flags[i]) if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions) if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions) else: table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) if not table_row.cells[i].colspan_adjusted: table_row.cells[i].colspan_adjusted = True table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) elif len(cells) == number_of_columns: # Simple row for i in range(len(cells)): if use_auxiliar_row[i]: list_flags[i], auxiliar_row[i] = handling_content(auxiliar_row[i], cells[i],list_flags[i]) if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) else: # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i]) table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) else: raise ValueError("More cells than columns found") else: raise ValueError("No separator line found for row starting") if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows data_rows.append(table_row) data_rows.append(table_row.cells) if has_merged_cells: data_rows.append(auxiliar_row) for row in auxiliar_rows: #for i in range(len(row.cells)): # print(row.cells[i].content) data_rows.append(row.cells) elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows header_rows.append(table_row) header_rows.append(table_row.cells) if has_merged_cells: header_rows.append(auxiliar_row) for row in auxiliar_rows: header_rows.append(row.cells) #print(header_rows) #print(data_rows) Loading @@ -711,35 +757,35 @@ def parse_pandoc_table_with_spans(pandoc_table): italic = "<i>" for row in rows: for cell in row: if cell['content'] is not None: if cell.content is not None: # Replacing "<" by < cell['content'] = cell['content'].replace("<", "<") cell.content = cell.content.replace("<", "<") #Bold for bold_characters in ["**", "__"]: while cell['content'].find(bold_characters) != -1: cell['content'] = cell['content'].replace(bold_characters, bold, 1) while cell.content.find(bold_characters) != -1: cell.content = cell.content.replace(bold_characters, bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" #Italic while cell['content'].find("_") != -1 and cell['content'].find("\_") == -1: cell['content'] = cell['content'].rstrip() .replace("_", italic, 1) while cell.content.find("_") != -1 and cell.content.find("\_") == -1: cell.content = cell.content.rstrip() .replace("_", italic, 1) if italic == "<i>": italic = "</i>" else: italic = "<i>" while cell['content'].find("\_") != -1: cell['content'] = cell['content'].rstrip().replace("\_", "_", 1) while cell.content.find("\_") != -1: cell.content = cell.content.rstrip().replace("\_", "_", 1) # Correct newlines characters for row in header_rows: for cell in row: cell['content'] = cell['content'].replace("\n", "<br />") if cell['content'] is not None else None cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None for row in data_rows: for cell in row: cell['content'] = cell['content'].replace("\n", "<br />") if cell['content'] is not None else None cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None # Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows forward_rowspan = [] Loading @@ -748,13 +794,13 @@ def parse_pandoc_table_with_spans(pandoc_table): forward_rowspan = [0 for _ in range(len(header_rows[row_index]))] sum = 0 for cell_index in range(len(header_rows[row_index])): sum += header_rows[row_index][cell_index]['colspan'] if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0: sum += header_rows[row_index][cell_index].colspan if row_index > 0 and header_rows[row_index][cell_index].colspan == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index].rowspan > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index].rowspan -1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") forward_rowspan = [] Loading @@ -763,13 +809,13 @@ def parse_pandoc_table_with_spans(pandoc_table): forward_rowspan = [0 for _ in range(len(data_rows[row_index]))] sum = 0 for cell_index in range(len(data_rows[row_index])): sum += data_rows[row_index][cell_index]['colspan'] if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0: sum += data_rows[row_index][cell_index].colspan if row_index > 0 and data_rows[row_index][cell_index].colspan == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index].rowspan > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index].rowspan - 1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") Loading @@ -789,35 +835,35 @@ def generate_html_table_with_spans(pandoc_table): for row in grid_header: for cell in row: if cell['rowspan'] != 0 and cell['colspan'] != 0: if cell.rowspan != 0 and cell.colspan != 0: has_header = True if has_header: html += " <thead>\n" for row in grid_header: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: if cell.rowspan == 0 or cell.colspan == 0: continue else: # Prepare content, in case there's a list #print(cell['content']) #print(cell.content) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell['content']): # Update cell in new row cell.content): # Update cell in new row #print("MATCHING") list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" cell['content'] = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell['content']) cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content) # Enforce left alignment if cell contains a list cell['alignment'] = "align=\"left\"" cell.alignment = "align=\"left\"" #else: # print("NOT MATCHING") rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <th{rowspan}{colspan} {cell['alignment']}>{cell['content']}</th>\n" rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" html += f" <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n" html += " </tr>\n" html += " </thead>\n" Loading @@ -825,26 +871,27 @@ def generate_html_table_with_spans(pandoc_table): for row in grid_body: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: if cell.rowspan == 0 or cell.colspan == 0: continue else: #Prepare content, in case there's a list #print(cell['content']) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell['content']): # Update cell in new row #print(cell.content) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content): # Update cell in new row #print("MATCHING") #print(cell.content) list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" cell['content'] = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell['content']) cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content) # Enforce left alignment if cell contains a list cell['alignment'] = "align=\"left\"" cell.alignment = "align=\"left\"" #else: #print("NOT MATCHING") rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan} {cell['alignment']}>{cell['content']}</td>\n" rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" html += f" <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n" html += " </tr>\n" html += " </tbody>\n" Loading Loading
toMkdocs/toMkdocs.py +164 −117 Original line number Diff line number Diff line Loading @@ -420,6 +420,7 @@ _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECA _matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) _matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) _matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) _matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) _matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) Loading Loading @@ -463,58 +464,85 @@ def parse_pandoc_table_with_spans(pandoc_table): # Split the input into lines lines = [line.strip() for line in pandoc_table.strip().split("\n")] class Cell: """ Represents the document object. """ content: str rowspan: int colspan: int colspan_adjusted: bool alignment: str position: int list_flag: bool auxiliar_index: int def __init__(self): self.content = None self.rowspan = 0 self.colspan = 0 self.colspan_adjusted = False self.alignment = "align=\"center\"" self.position = 0 self.list_flag = False self.auxiliar_index = None class Row(): """ Represents a row in the markdown file. """ cells:list[Cell] = [] def __init__(self, length: int = 1) -> None: self.cells = [Cell() for _ in range(length)] # Detect separator lines by pattern (it does not take into account partial separators def is_separator(line): _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) return _matchGridTableSeparator.match(line) def handling_content(cell, content, list_flag): if cell['content'] is None: cell['rowspan'] += 1 cell['colspan'] += 1 def handling_content(cell, content): if cell.content is None: cell.rowspan += 1 cell.colspan += 1 if content.strip().startswith("- "): # List list_flag = True print(content) cell['content'] = content.strip() + "\n" # Add newline to know when the list element ends elif list_flag: # any other content when handling list is concatenated to the last list element cell['content'] += content.strip() + "\n" elif cells[i].strip() == "": # separation between list and other paragraph list_flag = False cell['content'] = re.sub(r'\\\s*$', "\n", content) cell.list_flag = True #print(content) cell.content = content.strip() + "\n" # Add newline to know when the list element ends elif cell.list_flag and cells[i].strip() != "": # any other content when handling list is concatenated to the last list element cell.content += content.strip() + "\n" elif cells[i].strip == "": # separation between list and other paragraph cell.list_flag = False cell.content += "\n" #if not cell['content'].endswith("\n") else "" else: cell['content'] = re.sub(r'\\\s*$', "\n", content.strip()) cell.content = re.sub(r'\\\s*$', "\n", content.strip()) else: if content.strip().startswith("- "): # List if not list_flag: cell['content'] += "\n" if not cell.list_flag: cell.content += "\n" #cell['content'] = cell['content'].strip("\n") list_flag = True cell['content'] += content.strip() + "\n" # Add newline to know when the list element ends elif list_flag: # any other content when handling list is concatenated to the last list element cell['content'] = cell['content'].strip("\n") cell['content'] += " " + content.strip() + "\n" cell.list_flag = True cell.content += content.strip() + "\n" # Add newline to know when the list element ends elif cell.list_flag and cells[i].strip() != "": # any other content when handling list is concatenated to the last list element cell.content = cell.content.strip("\n") cell.content += " " + content.strip() + "\n" elif cells[i].strip() == "": # separation between list and other paragraph list_flag = False cell.list_flag = False #content = re.sub(r'\\\s*$', "\n", content.strip()) cell['content'] += "\n" if not cell['content'].endswith("\n") else "" cell.content += "\n" if not cell.content.endswith("\n") else "" else: content = re.sub(r'\\\s*$', "\n", content.strip()) cell['content'] += " " + content cell.content += " " + content #print(cell['content']) return list_flag, cell return cell def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions): for j in range(column_index, number_of_parts): delimiter_start = row[j - 1]['position'] if j != 0 else 0 delimiter_start = row[j - 1].position if j != 0 else 0 positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]] position = min(positions) if positions else -1 if position > delimiter_positions[j]: # Colspan to be increased row[i]['colspan'] += 1 row[i].colspan += 1 if position == delimiter_positions[len(delimiter_positions) - 1]: # last cell in row, adjust colspan to get max number columns colspan_allocated = 0 for cell_index in range(number_of_parts): colspan_allocated += row[cell_index]['colspan'] row[column_index]['colspan'] += number_of_columns - colspan_allocated colspan_allocated = row[i].colspan #for cell_index in range(number_of_parts): # colspan_allocated += row[cell_index].colspan row[column_index].colspan += number_of_columns - colspan_allocated - column_index elif position < delimiter_positions[j]: raise ValueError("Wrong cell formatting") else: Loading Loading @@ -563,6 +591,7 @@ def parse_pandoc_table_with_spans(pandoc_table): data_rows = [] for row in range(len(separator_indices) - 1): table_row = [] auxiliar_rows = [] auxiliar_row = [] use_auxiliar_row = [] list_flags = [] Loading Loading @@ -591,65 +620,70 @@ def parse_pandoc_table_with_spans(pandoc_table): # else: # alignments.append("align=\"center\"") header_delimiter_index = 0 table_row = Row(number_of_columns_row) for i in range(number_of_columns_row): delimiter_index += len(parts[i]) + 1 table_row.append({ "content": None, "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "alignment": default_alignments[i] if i == 0 else "align=\"center\"", "position": delimiter_index # Position of cell delimiter + }) table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\"" table_row.cells[i].position = delimiter_index # Position of cell delimiter + #Set alignment as defined by header separator line while header_delimiter_index in range(len(default_alignments)) and table_row[i]['position'] > header_delimiter_positions[header_delimiter_index]: while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]: header_delimiter_index += 1 if header_delimiter_index in range(len(default_alignments)): if table_row[i]['position'] < header_delimiter_positions[header_delimiter_index]: table_row[i]['alignment'] = default_alignments[header_delimiter_index] elif table_row[i]['position'] == header_delimiter_positions[header_delimiter_index]: table_row[i]['alignment'] = default_alignments[i] if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]: table_row.cells[i].alignment = default_alignments[header_delimiter_index] elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]: table_row.cells[i].alignment = default_alignments[i] header_delimiter_index += 1 else: raise ValueError("Invalid table formatting") for i in range(number_of_columns): auxiliar_row.append({ "content": None, "rowspan": 0, "colspan": 0, "colspan_adjusted": False, "alignment": "align=\"center\"", "position": 0 }) use_auxiliar_row.append(False) list_flags.append(False) #auxiliar_row = Row(number_of_columns) #for i in range(number_of_columns): #auxiliar_row.append(default_cell) #use_auxiliar_row.append(False) #auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags}) elif in_data_row: # Regular data row or partial separator if _matchGridTableBodySeparator.match(line): # Partial separator has_merged_cells = True #Add auxiliar line, set delimiters for each cell auxiliar_rows.append(Row(number_of_columns)) aux_delimiter_index = 0 for i in range(number_of_columns_row): aux_delimiter_index += len(parts[i]) + 1 auxiliar_rows[-1].cells[i].position = aux_delimiter_index # Position of cell delimiter + cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added use_auxiliar_row[i] = True list_flags[i] = False if cells[i].startswith(":") and not cells[i].endswith(":"): auxiliar_row[i]['alignment'] = "align=\"left\"" elif not cells[i].startswith(":") and cells[i].endswith(":"): auxiliar_row[i]['alignment'] = "align=\"right\"" else: auxiliar_row[i]['alignment'] = "align=\"center\"" #auxiliar_rows[-1]['use_auxiliar_row'][i] = True auxiliar_rows[-1].cells[i].list_flag = False table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1 #if cells[i].startswith(":") and not cells[i].endswith(":"): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\"" #elif not cells[i].startswith(":") and cells[i].endswith(":"): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"right\"" #else: # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\"" else: # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i]) if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i]) if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted: auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True # TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions) else: table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) # Cell which is not separator table_row[i]['rowspan'] += 1 if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True table_row.cells[i].rowspan += 1 if not table_row.cells[i].colspan_adjusted: table_row.cells[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions) table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) #elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added # for i in range(len(cells)): # if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row Loading @@ -674,30 +708,42 @@ def parse_pandoc_table_with_spans(pandoc_table): if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i], list_flags[i]) if not table_row[i]['colspan_adjusted']: table_row[i]['colspan_adjusted'] = True table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions) if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions) else: table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) if not table_row.cells[i].colspan_adjusted: table_row.cells[i].colspan_adjusted = True table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) elif len(cells) == number_of_columns: # Simple row for i in range(len(cells)): if use_auxiliar_row[i]: list_flags[i], auxiliar_row[i] = handling_content(auxiliar_row[i], cells[i],list_flags[i]) if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) else: # Handle content of the cell list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i]) table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) else: raise ValueError("More cells than columns found") else: raise ValueError("No separator line found for row starting") if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows data_rows.append(table_row) data_rows.append(table_row.cells) if has_merged_cells: data_rows.append(auxiliar_row) for row in auxiliar_rows: #for i in range(len(row.cells)): # print(row.cells[i].content) data_rows.append(row.cells) elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows header_rows.append(table_row) header_rows.append(table_row.cells) if has_merged_cells: header_rows.append(auxiliar_row) for row in auxiliar_rows: header_rows.append(row.cells) #print(header_rows) #print(data_rows) Loading @@ -711,35 +757,35 @@ def parse_pandoc_table_with_spans(pandoc_table): italic = "<i>" for row in rows: for cell in row: if cell['content'] is not None: if cell.content is not None: # Replacing "<" by < cell['content'] = cell['content'].replace("<", "<") cell.content = cell.content.replace("<", "<") #Bold for bold_characters in ["**", "__"]: while cell['content'].find(bold_characters) != -1: cell['content'] = cell['content'].replace(bold_characters, bold, 1) while cell.content.find(bold_characters) != -1: cell.content = cell.content.replace(bold_characters, bold, 1) if bold == "<strong>": bold = "</strong>" else: bold = "<strong>" #Italic while cell['content'].find("_") != -1 and cell['content'].find("\_") == -1: cell['content'] = cell['content'].rstrip() .replace("_", italic, 1) while cell.content.find("_") != -1 and cell.content.find("\_") == -1: cell.content = cell.content.rstrip() .replace("_", italic, 1) if italic == "<i>": italic = "</i>" else: italic = "<i>" while cell['content'].find("\_") != -1: cell['content'] = cell['content'].rstrip().replace("\_", "_", 1) while cell.content.find("\_") != -1: cell.content = cell.content.rstrip().replace("\_", "_", 1) # Correct newlines characters for row in header_rows: for cell in row: cell['content'] = cell['content'].replace("\n", "<br />") if cell['content'] is not None else None cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None for row in data_rows: for cell in row: cell['content'] = cell['content'].replace("\n", "<br />") if cell['content'] is not None else None cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None # Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows forward_rowspan = [] Loading @@ -748,13 +794,13 @@ def parse_pandoc_table_with_spans(pandoc_table): forward_rowspan = [0 for _ in range(len(header_rows[row_index]))] sum = 0 for cell_index in range(len(header_rows[row_index])): sum += header_rows[row_index][cell_index]['colspan'] if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0: sum += header_rows[row_index][cell_index].colspan if row_index > 0 and header_rows[row_index][cell_index].colspan == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1 if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index].rowspan > 1: forward_rowspan[cell_index] = header_rows[row_index][cell_index].rowspan -1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") forward_rowspan = [] Loading @@ -763,13 +809,13 @@ def parse_pandoc_table_with_spans(pandoc_table): forward_rowspan = [0 for _ in range(len(data_rows[row_index]))] sum = 0 for cell_index in range(len(data_rows[row_index])): sum += data_rows[row_index][cell_index]['colspan'] if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0: sum += data_rows[row_index][cell_index].colspan if row_index > 0 and data_rows[row_index][cell_index].colspan == 0: if forward_rowspan[cell_index] > 0: sum += 1 forward_rowspan[cell_index] -= 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1 if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index].rowspan > 1: forward_rowspan[cell_index] = data_rows[row_index][cell_index].rowspan - 1 if not sum == number_of_columns: raise ValueError("Grid table not converted properly") Loading @@ -789,35 +835,35 @@ def generate_html_table_with_spans(pandoc_table): for row in grid_header: for cell in row: if cell['rowspan'] != 0 and cell['colspan'] != 0: if cell.rowspan != 0 and cell.colspan != 0: has_header = True if has_header: html += " <thead>\n" for row in grid_header: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: if cell.rowspan == 0 or cell.colspan == 0: continue else: # Prepare content, in case there's a list #print(cell['content']) #print(cell.content) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell['content']): # Update cell in new row cell.content): # Update cell in new row #print("MATCHING") list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" cell['content'] = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell['content']) cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content) # Enforce left alignment if cell contains a list cell['alignment'] = "align=\"left\"" cell.alignment = "align=\"left\"" #else: # print("NOT MATCHING") rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <th{rowspan}{colspan} {cell['alignment']}>{cell['content']}</th>\n" rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" html += f" <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n" html += " </tr>\n" html += " </thead>\n" Loading @@ -825,26 +871,27 @@ def generate_html_table_with_spans(pandoc_table): for row in grid_body: html += " <tr>\n" for cell in row: if cell['rowspan'] == 0 or cell['colspan'] == 0: if cell.rowspan == 0 or cell.colspan == 0: continue else: #Prepare content, in case there's a list #print(cell['content']) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell['content']): # Update cell in new row #print(cell.content) if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content): # Update cell in new row #print("MATCHING") #print(cell.content) list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" cell['content'] = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell['content']) cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content) # Enforce left alignment if cell contains a list cell['alignment'] = "align=\"left\"" cell.alignment = "align=\"left\"" #else: #print("NOT MATCHING") rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" html += f" <td{rowspan}{colspan} {cell['alignment']}>{cell['content']}</td>\n" rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" html += f" <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n" html += " </tr>\n" html += " </tbody>\n" Loading