From 3491470e6b95a96f976426b1cdb0de18903472d9 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 7 Mar 2025 02:04:31 +0100 Subject: [PATCH 01/13] Gitlab grid table filter --- grid_table_filter.rb | 522 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 grid_table_filter.rb diff --git a/grid_table_filter.rb b/grid_table_filter.rb new file mode 100644 index 0000000..dee837d --- /dev/null +++ b/grid_table_filter.rb @@ -0,0 +1,522 @@ +# frozen_string_literal: true + +# +# GridTableFilter.rb +# +# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft +# License: BSD 3-Clause License. See the LICENSE file for further details. +# + +# TODO: This is now a legacy filter, and is only used with the Ruby parser. +# The current markdown parser now properly handles grid table blocks. +# issue: https://gitlab.com/gitlab-org/gitlab/-/issues/460864 +# GridTableFilter.rb +# +# Converts Pandoc-style grid tables to HTML tables with rowspan and colspan support +# + +module Banzai + module Filter + class GridTableFilter < HTML::Pipeline::TextFilter + MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ + (? + # Grid table blocks: + # +---+---+---+---+ + # Anything, starting with | blocks which are ignored by this filter + # +---+---+---+---+ + + ^\s*\+-.*\+\s$ # First separator line + (?:.*\n)*? # Any number of rows (non-greedy) + \s*\+-.*\+\s$ # Last separator line + ) + }mx + + require 'logger' + + class Cell + attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag + + def initialize + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position = nil + @list_flag = false + end + + def set_alignment(default_alignments, header_delimiter_positions) + header_delimiter_index = 0 + while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] + header_delimiter_index += 1 + end + + if header_delimiter_index < default_alignments.length + if @position < header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] + elsif @position == header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] + header_delimiter_index += 1 + end + else + raise "Invalid table formatting" + end + end + end + + class Row + attr_accessor :cells + + def initialize(length = 1) + @cells = Array.new(length) { Cell.new } + end + + def [](index) + @cells[index] + end + + def []=(index, value) + @cells[index] = value + end + end + + class RowTracker + attr_accessor :row_tracker + + def initialize(items) + @row_tracker = Array.new(items, 0) + end + + def [](index) + @row_tracker[index] + end + + def []=(index, value) + @row_tracker[index] = value + end + end + + # Add these regex constants at the top of the file, after the require statement + GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/ + GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/ + GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/ + GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/ + + def parse_pandoc_table_with_spans(pandoc_table) + # Split the input into lines + lines = pandoc_table.strip.split("\n").map(&:strip) + + # Helper method to detect separator lines + def is_separator(line) + GRID_TABLE_SEPARATOR.match?(line) + end + + # Helper method to handle content in cells + def handling_content(cell, content) + if cell.content.nil? + cell.rowspan += 1 + cell.colspan += 1 + if content.strip.start_with?("- ") # List + cell.list_flag = true + cell.content = "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content += "#{content.strip}\n" + elsif content.strip == "" + cell.list_flag = false + cell.content = "\n" + else + cell.content = content.strip.gsub(/\\\s*$/, "\n") + end + else + if content.strip.start_with?("- ") + unless cell.list_flag + cell.content += "\n" + end + cell.list_flag = true + cell.content += "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content = cell.content.strip.chomp("\n") + cell.content += " #{content.strip}\n" + elsif content.strip.empty? + cell.list_flag = false + cell.content += cell.content.end_with?("\n") ? "" : "\n" + else + content = content.strip.gsub(/\\\s*$/, "\n") + cell.content += " #{content}" + end + end + cell + end + + # Helper method to adjust colspan + def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) + (column_index...number_of_parts).each do |j| + delimiter_start = nil + col_i = column_index + until delimiter_start + delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 + col_i -= 1 + end + + positions = ["|", "+"].map do |delimiter| + pos = line[delimiter_start + 1..-1]&.index(delimiter) + pos ? pos + delimiter_start + 1 : nil + end.compact + + position = positions.min + + if position && position > delimiter_positions[j] + row[column_index].colspan += 1 + if position == delimiter_positions[-1] + colspan_allocated = row[column_index].colspan + row[column_index].colspan += number_of_columns - colspan_allocated - column_index + end + elsif position && position < delimiter_positions[j] + raise "Wrong cell formatting" + else + break + end + end + row[column_index] + end + + separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } + + raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? + + # Calculate max number of columns and delimiter positions + delimiter_positions = [] + number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max + + separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } + number_of_columns.times do |j| + start_pos = j.zero? ? 0 : delimiter_positions[j - 1] + pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") + delimiter_positions << (pos ? pos + start_pos + 1 : -1) + end + + # Process header + has_header = false + header_delimiter_positions = [] + default_alignments = [] + header_rows = [] + header_separator_index = nil + + separator_indices.each do |index| + if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) + has_header = true + header_separator_index = index + parts = lines[index].strip.delete_prefix("+").split("+") + + parts.each_with_index do |part, part_index| + default_alignments << if part.start_with?(":") && !part.end_with?(":") + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end + + start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] + pos = lines[index][start_pos + 1..-1]&.index("+") + header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) + end + break + end + end + + # Process table body + data_rows = [] + (separator_indices.length - 1).times do |row| + rows = [] + rows_tracker = nil + in_data_row = false + start, end_idx = separator_indices[row], separator_indices[row + 1] + row_lines = lines[start...end_idx] + + next if row_lines.empty? + + row_lines.each do |line| + if is_separator(line) && !in_data_row + in_data_row = true + parts = line.strip.delete_prefix("+").split("+") + delimiter_index = 0 + rows << Row.new(number_of_columns) + rows_tracker = RowTracker.new(number_of_columns) + + i = 0 + parts.each_with_index do |_, j| + next unless i < number_of_columns + + delimiter_index += parts[j].length + 1 + rows[-1][i].position = delimiter_index + rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) + + while delimiter_index > delimiter_positions[i] + i += 1 + end + i += 1 + end + + elsif in_data_row + if GRID_TABLE_BODY_SEPARATOR.match?(line) + cells_content = line.strip.delete_prefix("|").delete_prefix("+") + .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) + + rows << Row.new(number_of_columns) + aux_delimiter_index = 0 + auxiliar_cell_index = 0 + + cells_content.each_with_index do |_, i| + next unless auxiliar_cell_index < number_of_columns + + aux_delimiter_index += cells_content[i].length + 1 + rows[-1][auxiliar_cell_index].position = aux_delimiter_index + rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) + + while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] + auxiliar_cell_index += 1 + end + auxiliar_cell_index += 1 + end + + if cells_content.length <= number_of_columns + column_index = 0 + cells_content.each_with_index do |content, i| + if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) + rows_tracker[column_index] += 1 + rows[rows_tracker[column_index]][column_index].list_flag = false + + column_forward = 0 + (column_index...delimiter_positions.length).each do |del_index| + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] + column_forward += 1 + rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 + end + end + column_index += column_forward + else + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + rows[rows_tracker[column_index]][column_index].rowspan += 1 + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, + line, number_of_columns, delimiter_positions) + end + + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + colspan = rows[rows_tracker[column_index]][column_index].colspan + column_index += colspan.zero? ? 1 : colspan + end + end + end + else + raise "More cells than columns found" + end + else + cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/) + column_index = 0 + + if cells_content.length < number_of_columns + cells_content.each_with_index do |content, i| + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, + line, number_of_columns, delimiter_positions) + end + + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + column_index += rows[rows_tracker[column_index]][column_index].colspan + end + end + elsif cells_content.length == number_of_columns + cells_content.each_with_index do |content, i| + rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) + end + else + raise "More cells than columns found" + end + end + else + raise "No separator line found for row starting" + end + end + + if has_header && start >= header_separator_index + rows.each { |body_row| data_rows << body_row.cells } + elsif has_header && start < header_separator_index + rows.each { |header_row| header_rows << header_row.cells } + end + end + + raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? + + # Format text (bold and italic) + [header_rows, data_rows].each do |rows| + rows.each do |row| + row.each do |cell| + next if cell.content.nil? + + ["**", "__"].each do |bold_chars| + while cell.content.include?(bold_chars) + cell.content = cell.content.sub(bold_chars, "") + .sub(bold_chars, "") + end + end + + while cell.content.include?("_") && !cell.content.include?("\\_") + cell.content = cell.content.rstrip.sub("_", "").sub("_", "") + end + + while cell.content.include?("\\_") + cell.content = cell.content.rstrip.sub("\\_", "_") + end + end + end + end + + # Convert newlines to HTML breaks + [header_rows, data_rows].each do |rows| + rows.each do |row| + row.each do |cell| + cell.content = cell.content&.gsub("\n", "
") + end + end + end + + # Validate grid correctness + [header_rows, data_rows].each do |rows| + forward_rowspan = [] + + rows.each_with_index do |row, row_index| + forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? + sum = 0 + + row.each_with_index do |cell, cell_index| + sum += cell.colspan + if row_index > 0 && cell.colspan.zero? + if forward_rowspan[cell_index].positive? + sum += 1 + end + forward_rowspan[cell_index] -= 1 + end + + if forward_rowspan[cell_index].zero? && cell.rowspan > 1 + forward_rowspan[cell_index] = cell.rowspan - 1 + end + end + + raise "Grid table not converted properly" unless sum == number_of_columns + end + end + + [header_rows, data_rows] + end + + def generate_html_table_with_spans(pandoc_table) + begin + grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) + rescue StandardError => e + logger = Logger.new(STDOUT) + logger.error("Grid table could not be generated: #{e.message}") + return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" + else + html = "\n" + has_header = false + + grid_header.each do |row| + row.each do |cell| + if cell.rowspan != 0 && cell.colspan != 0 + has_header = true + break + end + end + end + + if has_header + html += " \n" + grid_header.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + # Prepare content, in case there's a list + if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) + list = "" + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' + end + + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %{ #{cell.content}\n} + end + html += " \n" + end + html += " \n" + end + + html += " \n" + grid_body.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) + list = "" + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' + end + + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %{ #{cell.content}\n} + end + html += " \n" + end + + html += " \n" + html += "
" + html + end + end + + def call + return @text if MarkdownFilter.glfm_markdown?(context) + + regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true) + return @text unless regex.match?(@text) + + regex.replace_gsub(@text) do |match| + # Extract the grid table content from the match + grid_table = match[:code] + if grid_table + # Convert grid table to HTML table + generate_html_table_with_spans(grid_table) + else + # Return original text if no grid table found + match.to_s + end + end + end + end # end of class GridTableFilter + end # end of module Filter +end # end of module Banzai \ No newline at end of file -- GitLab From 2afc6b8a4e517eec006544d4e4decb75935e8324 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 7 Mar 2025 02:35:10 +0100 Subject: [PATCH 02/13] Regex corrections + fix for merged rows --- grid_table_filter.rb | 840 ++++++++++++++++++++++--------------------- 1 file changed, 426 insertions(+), 414 deletions(-) diff --git a/grid_table_filter.rb b/grid_table_filter.rb index dee837d..addd34b 100644 --- a/grid_table_filter.rb +++ b/grid_table_filter.rb @@ -25,498 +25,510 @@ module Banzai # Anything, starting with | blocks which are ignored by this filter # +---+---+---+---+ - ^\s*\+-.*\+\s$ # First separator line - (?:.*\n)*? # Any number of rows (non-greedy) - \s*\+-.*\+\s$ # Last separator line + ^\s*\+(-+\+)+$\n # First separator line + (?:^\s*[|+][^\n]*$\n)* + ^\s*\+(-+\+)+$ # Last separator line + ) }mx require 'logger' class Cell - attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag - - def initialize - @content = nil - @rowspan = 0 - @colspan = 0 - @colspan_adjusted = false - @alignment = 'align="center"' - @position = nil - @list_flag = false - end - - def set_alignment(default_alignments, header_delimiter_positions) - header_delimiter_index = 0 - while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] - header_delimiter_index += 1 + attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag + + def initialize + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position = nil + @list_flag = false end - if header_delimiter_index < default_alignments.length - if @position < header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] - elsif @position == header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] + def set_alignment(default_alignments, header_delimiter_positions) + header_delimiter_index = 0 + while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] header_delimiter_index += 1 + end + + if header_delimiter_index < default_alignments.length + if @position < header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] + elsif @position == header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] + header_delimiter_index += 1 + end + else + raise "Invalid table formatting" + end end - else - raise "Invalid table formatting" - end - end - end + end # end of class Cell class Row - attr_accessor :cells + attr_accessor :cells - def initialize(length = 1) - @cells = Array.new(length) { Cell.new } - end + def initialize(length = 1) + @cells = Array.new(length) { Cell.new } + end - def [](index) - @cells[index] - end + def [](index) + @cells[index] + end - def []=(index, value) - @cells[index] = value - end - end + def []=(index, value) + @cells[index] = value + end + end # end of class Row class RowTracker - attr_accessor :row_tracker + attr_accessor :row_tracker + + def initialize(items) + @row_tracker = Array.new(items, 0) + end - def initialize(items) - @row_tracker = Array.new(items, 0) - end + def [](index) + @row_tracker[index] + end - def [](index) - @row_tracker[index] - end + def []=(index, value) + @row_tracker[index] = value + end - def []=(index, value) - @row_tracker[index] = value - end - end + def maxValue + @row_tracker.max + end + end # end of class RowTracker # Add these regex constants at the top of the file, after the require statement - GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/ - GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/ - GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/ - GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/ + GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/ + GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/ + GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/ + GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ def parse_pandoc_table_with_spans(pandoc_table) - # Split the input into lines - lines = pandoc_table.strip.split("\n").map(&:strip) - - # Helper method to detect separator lines - def is_separator(line) - GRID_TABLE_SEPARATOR.match?(line) - end - - # Helper method to handle content in cells - def handling_content(cell, content) - if cell.content.nil? - cell.rowspan += 1 - cell.colspan += 1 - if content.strip.start_with?("- ") # List - cell.list_flag = true - cell.content = "#{content.strip}\n" - elsif cell.list_flag && !content.strip.empty? - cell.content += "#{content.strip}\n" - elsif content.strip == "" - cell.list_flag = false - cell.content = "\n" - else - cell.content = content.strip.gsub(/\\\s*$/, "\n") + # Split the input into lines + lines = pandoc_table.strip.split("\n").map(&:strip) + + # Helper method to detect separator lines + def is_separator(line) + GRID_TABLE_SEPARATOR.match?(line) end - else - if content.strip.start_with?("- ") - unless cell.list_flag - cell.content += "\n" + + # Helper method to handle content in cells + def handling_content(cell, content) + if cell.content.nil? + cell.rowspan += 1 + cell.colspan += 1 + if content.strip.start_with?("- ") # List + cell.list_flag = true + cell.content = "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content += "#{content.strip}\n" + elsif content.strip == "" + cell.list_flag = false + cell.content = "\n" + else + cell.content = content.strip.gsub(/\\\s*$/, "\n") + end + else + if content.strip.start_with?("- ") + unless cell.list_flag + cell.content += "\n" + end + cell.list_flag = true + cell.content += "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content = cell.content.strip.chomp("\n") + cell.content += " #{content.strip}\n" + elsif content.strip.empty? + cell.list_flag = false + cell.content += cell.content.end_with?("\n") ? "" : "\n" + else + content = content.strip.gsub(/\\\s*$/, "\n") + cell.content += " #{content}" + end end - cell.list_flag = true - cell.content += "#{content.strip}\n" - elsif cell.list_flag && !content.strip.empty? - cell.content = cell.content.strip.chomp("\n") - cell.content += " #{content.strip}\n" - elsif content.strip.empty? - cell.list_flag = false - cell.content += cell.content.end_with?("\n") ? "" : "\n" - else - content = content.strip.gsub(/\\\s*$/, "\n") - cell.content += " #{content}" - end - end - cell - end - - # Helper method to adjust colspan - def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) - (column_index...number_of_parts).each do |j| - delimiter_start = nil - col_i = column_index - until delimiter_start - delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 - col_i -= 1 + cell end - positions = ["|", "+"].map do |delimiter| - pos = line[delimiter_start + 1..-1]&.index(delimiter) - pos ? pos + delimiter_start + 1 : nil - end.compact - - position = positions.min + # Helper method to adjust colspan + def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) + (column_index...number_of_parts).each do |j| + delimiter_start = nil + col_i = column_index + until delimiter_start + delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 + col_i -= 1 + end + + positions = ["|", "+"].map do |delimiter| + pos = line[delimiter_start + 1..-1]&.index(delimiter) + pos ? pos + delimiter_start + 1 : nil + end.compact + + position = positions.min - if position && position > delimiter_positions[j] - row[column_index].colspan += 1 - if position == delimiter_positions[-1] - colspan_allocated = row[column_index].colspan - row[column_index].colspan += number_of_columns - colspan_allocated - column_index + if position && position > delimiter_positions[j] + row[column_index].colspan += 1 + if position == delimiter_positions[-1] + colspan_allocated = row[column_index].colspan + row[column_index].colspan += number_of_columns - colspan_allocated - column_index + end + elsif position && position < delimiter_positions[j] + raise "Wrong cell formatting" + else + break + end end - elsif position && position < delimiter_positions[j] - raise "Wrong cell formatting" - else - break + row[column_index] end - end - row[column_index] - end - - separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } - - raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? - - # Calculate max number of columns and delimiter positions - delimiter_positions = [] - number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max - - separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } - number_of_columns.times do |j| - start_pos = j.zero? ? 0 : delimiter_positions[j - 1] - pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") - delimiter_positions << (pos ? pos + start_pos + 1 : -1) - end - - # Process header - has_header = false - header_delimiter_positions = [] - default_alignments = [] - header_rows = [] - header_separator_index = nil - - separator_indices.each do |index| - if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) - has_header = true - header_separator_index = index - parts = lines[index].strip.delete_prefix("+").split("+") - - parts.each_with_index do |part, part_index| - default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' - elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' - else - 'align="center"' - end - start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] - pos = lines[index][start_pos + 1..-1]&.index("+") - header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) - end - break + # Retrieve separator indices + separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } + + raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? + + # Calculate max number of columns and delimiter positions + delimiter_positions = [] + number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max + + # Determine delimiter positions + separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } + number_of_columns.times do |j| + start_pos = j.zero? ? 0 : delimiter_positions[j - 1] + pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") + delimiter_positions << (pos ? pos + start_pos + 1 : -1) end - end - - # Process table body - data_rows = [] - (separator_indices.length - 1).times do |row| - rows = [] - rows_tracker = nil - in_data_row = false - start, end_idx = separator_indices[row], separator_indices[row + 1] - row_lines = lines[start...end_idx] - - next if row_lines.empty? - - row_lines.each do |line| - if is_separator(line) && !in_data_row - in_data_row = true - parts = line.strip.delete_prefix("+").split("+") - delimiter_index = 0 - rows << Row.new(number_of_columns) - rows_tracker = RowTracker.new(number_of_columns) - - i = 0 - parts.each_with_index do |_, j| - next unless i < number_of_columns - - delimiter_index += parts[j].length + 1 - rows[-1][i].position = delimiter_index - rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) - - while delimiter_index > delimiter_positions[i] - i += 1 - end - i += 1 - end - elsif in_data_row - if GRID_TABLE_BODY_SEPARATOR.match?(line) - cells_content = line.strip.delete_prefix("|").delete_prefix("+") - .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) - - rows << Row.new(number_of_columns) - aux_delimiter_index = 0 - auxiliar_cell_index = 0 - - cells_content.each_with_index do |_, i| - next unless auxiliar_cell_index < number_of_columns - - aux_delimiter_index += cells_content[i].length + 1 - rows[-1][auxiliar_cell_index].position = aux_delimiter_index - rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) + # Process header + has_header = false + header_delimiter_positions = [] + default_alignments = [] + header_rows = [] + header_separator_index = nil + + # Determine header delimiter positions + separator_indices.each do |index| + if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) + has_header = true + header_separator_index = index + parts = lines[index].strip.delete_prefix("+").split("+") - while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] - auxiliar_cell_index += 1 + parts.each_with_index do |part, part_index| + default_alignments << if part.start_with?(":") && !part.end_with?(":") + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end + + start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] + pos = lines[index][start_pos + 1..-1]&.index("+") + header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end - auxiliar_cell_index += 1 + break end + end - if cells_content.length <= number_of_columns - column_index = 0 - cells_content.each_with_index do |content, i| - if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) - rows_tracker[column_index] += 1 - rows[rows_tracker[column_index]][column_index].list_flag = false + # Process table body (including rows belonging to header as they are processed in the same way) + data_rows = [] + (separator_indices.length - 1).times do |row| + rows = [] + rows_tracker = nil + in_data_row = false + start, end_idx = separator_indices[row], separator_indices[row + 1] # Lines between separators including separator line start as it gives information about the number of columns of the row + row_lines = lines[start...end_idx] + + next if row_lines.empty? + + row_lines.each do |line| + # First line (normally a separator) of each block + if is_separator(line) && !in_data_row + in_data_row = true + parts = line.strip.delete_prefix("+").split("+") + delimiter_index = 0 + rows << Row.new(number_of_columns) + rows_tracker = RowTracker.new(number_of_columns) + + i = 0 + parts.each_with_index do |_, j| + next unless i < number_of_columns - column_forward = 0 - (column_index...delimiter_positions.length).each do |del_index| - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] - column_forward += 1 - rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 + delimiter_index += parts[j].length + 1 + rows[-1][i].position = delimiter_index + rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) + + while delimiter_index > delimiter_positions[i] + i += 1 end + i += 1 end - column_index += column_forward - else - rows[rows_tracker[column_index]][column_index] = - handling_content(rows[rows_tracker[column_index]][column_index], content) - rows[rows_tracker[column_index]][column_index].rowspan += 1 + # Lines in a block + elsif in_data_row + # Regular data row or partial separator + if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator + cells_content = line.strip.delete_prefix("|").delete_prefix("+") + .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) - unless rows[rows_tracker[column_index]][column_index].colspan_adjusted - rows[rows_tracker[column_index]][column_index].colspan_adjusted = true - rows[rows_tracker[column_index]][column_index] = - adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, - line, number_of_columns, delimiter_positions) - end + rows << Row.new(number_of_columns) + aux_delimiter_index = 0 + auxiliar_cell_index = 0 + + cells_content.each_with_index do |_, i| + next unless auxiliar_cell_index < number_of_columns + + aux_delimiter_index += cells_content[i].length + 1 + rows[-1][auxiliar_cell_index].position = aux_delimiter_index + rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) + + while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] + auxiliar_cell_index += 1 + end + auxiliar_cell_index += 1 + end + + if cells_content.length <= number_of_columns + column_index = 0 + maxRowTracker = rows_tracker.maxValue + cells_content.each_with_index do |content, i| + if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row + rows_tracker[column_index] = maxRowTracker + 1 + rows[rows_tracker[column_index]][column_index].list_flag = false + + column_forward = 0 + (column_index...delimiter_positions.length).each do |del_index| + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] + column_forward += 1 + #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 + end + end + column_index += column_forward + else # Regular cell in Partial separator line + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + rows[rows_tracker[column_index]][column_index].rowspan += 1 + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, + line, number_of_columns, delimiter_positions) + end + + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + colspan = rows[rows_tracker[column_index]][column_index].colspan + column_index += colspan.zero? ? 1 : colspan + end + end + end + else + raise "More cells than columns found" + end + else # Data row + cells_content = line.strip.delete_prefix("|").delete_suffix("|").split(/\|/) + column_index = 0 + + if cells_content.length < number_of_columns + cells_content.each_with_index do |content, i| + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, + line, number_of_columns, delimiter_positions) + end - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] - colspan = rows[rows_tracker[column_index]][column_index].colspan - column_index += colspan.zero? ? 1 : colspan + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + column_index += rows[rows_tracker[column_index]][column_index].colspan + end + end + elsif cells_content.length == number_of_columns + cells_content.each_with_index do |content, i| + rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) + end + else + raise "More cells than columns found" + end end + else + raise "No separator line found for row starting" end - end - else - raise "More cells than columns found" end - else - cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/) - column_index = 0 - - if cells_content.length < number_of_columns - cells_content.each_with_index do |content, i| - rows[rows_tracker[column_index]][column_index] = - handling_content(rows[rows_tracker[column_index]][column_index], content) - - unless rows[rows_tracker[column_index]][column_index].colspan_adjusted - rows[rows_tracker[column_index]][column_index].colspan_adjusted = true - rows[rows_tracker[column_index]][column_index] = - adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, - line, number_of_columns, delimiter_positions) - end - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] - column_index += rows[rows_tracker[column_index]][column_index].colspan - end - end - elsif cells_content.length == number_of_columns - cells_content.each_with_index do |content, i| - rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) - end - else - raise "More cells than columns found" - end + if has_header && start >= header_separator_index + rows.each { |body_row| data_rows << body_row.cells } + elsif has_header && start < header_separator_index + rows.each { |header_row| header_rows << header_row.cells } end - else - raise "No separator line found for row starting" - end - end - - if has_header && start >= header_separator_index - rows.each { |body_row| data_rows << body_row.cells } - elsif has_header && start < header_separator_index - rows.each { |header_row| header_rows << header_row.cells } end - end - raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? + raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? - # Format text (bold and italic) - [header_rows, data_rows].each do |rows| - rows.each do |row| - row.each do |cell| - next if cell.content.nil? + # Format text (bold and italic) + [header_rows, data_rows].each do |rows| + rows.each do |row| + row.each do |cell| + next if cell.content.nil? - ["**", "__"].each do |bold_chars| - while cell.content.include?(bold_chars) - cell.content = cell.content.sub(bold_chars, "") - .sub(bold_chars, "") - end - end + ["**", "__"].each do |bold_chars| + while cell.content.include?(bold_chars) + cell.content = cell.content.sub(bold_chars, "") + .sub(bold_chars, "") + end + end - while cell.content.include?("_") && !cell.content.include?("\\_") - cell.content = cell.content.rstrip.sub("_", "").sub("_", "") + while cell.content.include?("_") && !cell.content.include?("\\_") + cell.content = cell.content.rstrip.sub("_", "").sub("_", "") + end + + while cell.content.include?("\\_") + cell.content = cell.content.rstrip.sub("\\_", "_") + end end - - while cell.content.include?("\\_") - cell.content = cell.content.rstrip.sub("\\_", "_") end end - end - end - # Convert newlines to HTML breaks - [header_rows, data_rows].each do |rows| - rows.each do |row| - row.each do |cell| - cell.content = cell.content&.gsub("\n", "
") - end + # Convert newlines to HTML breaks + [header_rows, data_rows].each do |rows| + rows.each do |row| + row.each do |cell| + cell.content = cell.content&.gsub("\n", "
") + end + end end - end - # Validate grid correctness - [header_rows, data_rows].each do |rows| - forward_rowspan = [] - - rows.each_with_index do |row, row_index| - forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? - sum = 0 - - row.each_with_index do |cell, cell_index| - sum += cell.colspan - if row_index > 0 && cell.colspan.zero? - if forward_rowspan[cell_index].positive? - sum += 1 - end - forward_rowspan[cell_index] -= 1 - end + # Validate grid correctness + [header_rows, data_rows].each do |rows| + forward_rowspan = [] - if forward_rowspan[cell_index].zero? && cell.rowspan > 1 - forward_rowspan[cell_index] = cell.rowspan - 1 + rows.each_with_index do |row, row_index| + forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? + sum = 0 + + row.each_with_index do |cell, cell_index| + sum += cell.colspan + if row_index > 0 && cell.colspan.zero? + if forward_rowspan[cell_index].positive? + sum += 1 + end + forward_rowspan[cell_index] -= 1 + end + + if forward_rowspan[cell_index].zero? && cell.rowspan > 1 + forward_rowspan[cell_index] = cell.rowspan - 1 + end + end + + raise "Grid table not converted properly" unless sum == number_of_columns end end - - raise "Grid table not converted properly" unless sum == number_of_columns - end - end - [header_rows, data_rows] - end + [header_rows, data_rows] + end # end of parse_pandoc_table_with_spans def generate_html_table_with_spans(pandoc_table) - begin - grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) - rescue StandardError => e - logger = Logger.new(STDOUT) - logger.error("Grid table could not be generated: #{e.message}") - return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" - else - html = "\n" - has_header = false - - grid_header.each do |row| - row.each do |cell| - if cell.rowspan != 0 && cell.colspan != 0 - has_header = true - break - end - end - end + begin + grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) + rescue StandardError => e + logger = Logger.new(STDOUT) + logger.error("Grid table could not be generated: #{e.message}") + return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS FILE" + else + html = "
\n" + has_header = false - if has_header - html += " \n" grid_header.each do |row| - html += " \n" row.each do |cell| - next if cell.rowspan == 0 || cell.colspan == 0 - - # Prepare content, in case there's a list - if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) - list = "
    " - matches.each do |match| - list += "
  • #{match[1]}
  • " + if cell.rowspan != 0 && cell.colspan != 0 + has_header = true + break end - list += "
" - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) - # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' end + end - rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" - colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %{ #{cell.content}\n} + if has_header + html += " \n" + grid_header.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + # Prepare content, in case there's a list + if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) + list = "
    " + matches.each do |match| + list += "
  • #{match[1]}
  • " + end + list += "
" + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' + end + + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %{ #{cell.content}\n} + end + html += " \n" end - html += " \n" + html += " \n" end - html += " \n" - end - html += " \n" - grid_body.each do |row| - html += " \n" - row.each do |cell| - next if cell.rowspan == 0 || cell.colspan == 0 - - if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) - list = "
    " - matches.each do |match| - list += "
  • #{match[1]}
  • " + html += "
\n" + grid_body.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) + list = "
    " + matches.each do |match| + list += "
  • #{match[1]}
  • " + end + list += "
" + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' end - list += "" - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) - # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' - end - rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" - colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %{ #{cell.content}\n} + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %{ #{cell.content}\n} + end + html += " \n" end - html += " \n" - end - html += " \n" - html += "
" - html - end - end + html += " \n" + html += "" + html + end + end # end of def generate_html_table_with_spans - def call - return @text if MarkdownFilter.glfm_markdown?(context) - - regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true) - return @text unless regex.match?(@text) - - regex.replace_gsub(@text) do |match| - # Extract the grid table content from the match - grid_table = match[:code] - if grid_table - # Convert grid table to HTML table - generate_html_table_with_spans(grid_table) - else - # Return original text if no grid table found - match.to_s - end - end - end + def call + return @text unless MarkdownFilter.glfm_markdown?(context) + + regex = MARKDOWN_GRID_TABLE_BLOCK_REGEX + return @text unless regex.match?(@text) + + @text.gsub(regex) do + match = Regexp.last_match + # Extract the grid table content from the match + grid_table = match[:code] + if grid_table + # Convert grid table to HTML table + generate_html_table_with_spans(grid_table) + else + # Return original text if no grid table found + match.to_s + end + end + end # end of def call end # end of class GridTableFilter end # end of module Filter -end # end of module Banzai \ No newline at end of file +end # end of module Banzai -- GitLab From 5f9a76d5f9996e7f993ecc3d1794d737ff9ca14e Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 7 Mar 2025 03:42:34 +0100 Subject: [PATCH 03/13] Formatting changes --- grid_table_filter.rb | 967 ++++++++++++++++++++++--------------------- 1 file changed, 487 insertions(+), 480 deletions(-) diff --git a/grid_table_filter.rb b/grid_table_filter.rb index addd34b..6e82f53 100644 --- a/grid_table_filter.rb +++ b/grid_table_filter.rb @@ -1,10 +1,10 @@ # frozen_string_literal: true # -# GridTableFilter.rb +# GridTableFilter.rb # -# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft -# License: BSD 3-Clause License. See the LICENSE file for further details. +# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft +# License: BSD 3-Clause License. See the LICENSE file for further details. # # TODO: This is now a legacy filter, and is only used with the Ruby parser. @@ -16,519 +16,526 @@ # module Banzai - module Filter - class GridTableFilter < HTML::Pipeline::TextFilter - MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ - (? - # Grid table blocks: - # +---+---+---+---+ - # Anything, starting with | blocks which are ignored by this filter - # +---+---+---+---+ + module Filter + class GridTableFilter < HTML::Pipeline::TextFilter + MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ + (? + # Grid table blocks: + # +---+---+---+---+ + # Anything, starting with | blocks which are ignored by this filter + # +---+---+---+---+ - ^\s*\+(-+\+)+$\n # First separator line - (?:^\s*[|+][^\n]*$\n)* - ^\s*\+(-+\+)+$ # Last separator line + ^\s*\+(-+\+)+$\n # First separator line + (?:^\s*[|+][^\n]*$\n)* + ^\s*\+(-+\+)+$ # Last separator line - ) - }mx + ) + }mx - require 'logger' - - class Cell - attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag - - def initialize - @content = nil - @rowspan = 0 - @colspan = 0 - @colspan_adjusted = false - @alignment = 'align="center"' - @position = nil - @list_flag = false + require 'logger' + + # Add these regex constants at the top of the file, after the require statement + GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/ + GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/ + GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/ + GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ + + class Cell + attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag + + def initialize + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position = nil + @list_flag = false + end + + def set_alignment(default_alignments, header_delimiter_positions) + header_delimiter_index = 0 + while header_delimiter_index < default_alignments.length && + @position > header_delimiter_positions[header_delimiter_index] + header_delimiter_index += 1 end - def set_alignment(default_alignments, header_delimiter_positions) - header_delimiter_index = 0 - while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] + raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length + if @position < header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] + elsif @position == header_delimiter_positions[header_delimiter_index] + @alignment = default_alignments[header_delimiter_index] header_delimiter_index += 1 - end - - if header_delimiter_index < default_alignments.length - if @position < header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] - elsif @position == header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] - header_delimiter_index += 1 - end - else - raise "Invalid table formatting" - end - end - end # end of class Cell - - class Row - attr_accessor :cells - - def initialize(length = 1) - @cells = Array.new(length) { Cell.new } end + end + end # end of class Cell + + class Row + attr_accessor :cells + + def initialize(length = 1) + @cells = Array.new(length) { Cell.new } + end + + def [](index) + @cells[index] + end + + def []=(index, value) + @cells[index] = value + end + end # end of class Row + + class RowTracker + attr_accessor :row_tracker + + def initialize(items) + @row_tracker = Array.new(items, 0) + end + + def [](index) + @row_tracker[index] + end + + def []=(index, value) + @row_tracker[index] = value + end + + def maxValue + @row_tracker.max + end + end # end of class RowTracker + + # Helper method to detect separator lines + def separator?(line) + GRID_TABLE_SEPARATOR.match?(line) + end + # Helper method to handle content in cells + def handling_content(cell, content) + if cell.content.nil? + cell.rowspan += 1 + cell.colspan += 1 + if content.strip.start_with?("- ") # List + cell.list_flag = true + cell.content = "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content += "#{content.strip}\n" + elsif content.strip == "" + cell.list_flag = false + cell.content = "\n" + else + cell.content = content.strip.gsub(/\\\s*$/, "\n") + end + elsif content.strip.start_with?("- ") + cell.content += "\n" unless cell.list_flag + + cell.list_flag = true + cell.content += "#{content.strip}\n" + elsif cell.list_flag && !content.strip.empty? + cell.content = cell.content.strip.chomp("\n") + cell.content += " #{content.strip}\n" + elsif content.strip.empty? + cell.list_flag = false + cell.content += cell.content.end_with?("\n") ? "" : "\n" + else + content = content.strip.gsub(/\\\s*$/, "\n") + cell.content += " #{content}" + end + + cell + end + + # Helper method to adjust colspan + def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) + (column_index...number_of_parts).each do |j| + delimiter_start = nil + col_i = column_index + + until delimiter_start + delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 + col_i -= 1 + end + + delimiters = ['|', '+'] + positions = delimiters.filter_map do |delimiter| + pos = line[delimiter_start + 1..-1]&.index(delimiter) + pos ? pos + delimiter_start + 1 : nil + end.compact + + position = positions.min + + if position && position > delimiter_positions[j] + row[column_index].colspan += 1 - def [](index) - @cells[index] - end - - def []=(index, value) - @cells[index] = value - end - end # end of class Row - - class RowTracker - attr_accessor :row_tracker - - def initialize(items) - @row_tracker = Array.new(items, 0) - end - - def [](index) - @row_tracker[index] - end - - def []=(index, value) - @row_tracker[index] = value - end - - def maxValue - @row_tracker.max - end - end # end of class RowTracker - - # Add these regex constants at the top of the file, after the require statement - GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/ - GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/ - GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/ - GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ - - def parse_pandoc_table_with_spans(pandoc_table) - # Split the input into lines - lines = pandoc_table.strip.split("\n").map(&:strip) - - # Helper method to detect separator lines - def is_separator(line) - GRID_TABLE_SEPARATOR.match?(line) + if position == delimiter_positions[-1] + colspan_allocated = row[column_index].colspan + row[column_index].colspan += number_of_columns - colspan_allocated - column_index end - - # Helper method to handle content in cells - def handling_content(cell, content) - if cell.content.nil? - cell.rowspan += 1 - cell.colspan += 1 - if content.strip.start_with?("- ") # List - cell.list_flag = true - cell.content = "#{content.strip}\n" - elsif cell.list_flag && !content.strip.empty? - cell.content += "#{content.strip}\n" - elsif content.strip == "" - cell.list_flag = false - cell.content = "\n" - else - cell.content = content.strip.gsub(/\\\s*$/, "\n") - end - else - if content.strip.start_with?("- ") - unless cell.list_flag - cell.content += "\n" - end - cell.list_flag = true - cell.content += "#{content.strip}\n" - elsif cell.list_flag && !content.strip.empty? - cell.content = cell.content.strip.chomp("\n") - cell.content += " #{content.strip}\n" - elsif content.strip.empty? - cell.list_flag = false - cell.content += cell.content.end_with?("\n") ? "" : "\n" - else - content = content.strip.gsub(/\\\s*$/, "\n") - cell.content += " #{content}" - end + elsif position && position < delimiter_positions[j] + raise "Wrong cell formatting" + else + break + end + end + + row[column_index] + end + + # rubocop:disable Metrics/AbcSize -- PoC + # rubocop:disable Metrics/CyclomaticComplexity -- PoC + # rubocop:disable Metrics/PerceivedComplexity -- PoC + def parse_pandoc_table_with_spans(pandoc_table) + + # Split the input into lines + lines = pandoc_table.strip.split("\n").map(&:strip) + + # Retrieve separator indices + separator_indices = lines.each_index.select { |i| separator?(lines[i]) } + + raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? + + # Calculate max number of columns and delimiter positions + delimiter_positions = [] + number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max + + # Determine delimiter positions + separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } + number_of_columns.times do |j| + start_pos = j == 0 ? 0 : delimiter_positions[j - 1] + pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") + delimiter_positions << (pos ? pos + start_pos + 1 : -1) + end + + # Process header + has_header = false + header_delimiter_positions = [] + default_alignments = [] + header_rows = [] + header_separator_index = nil + + # Determine header delimiter positions + separator_indices.each do |index| + next unless GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) + + has_header = true + header_separator_index = index + parts = lines[index].strip.delete_prefix("+").split("+") + + parts.each_with_index do |part, part_index| + default_alignments << if part.start_with?(":") && !part.end_with?(":") + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end + + start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] + pos = lines[index][start_pos + 1..-1]&.index("+") + header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) + end + break + end + + # Process table body (including rows belonging to header as they are processed in the same way) + data_rows = [] + + (separator_indices.length - 1).times do |row| + rows = [] + rows_tracker = nil + in_data_row = false + start = separator_indices[row] + end_idx = separator_indices[row + 1] + row_lines = lines[start...end_idx] + + next if row_lines.empty? + + row_lines.each do |line| + # First line (normally a separator) of each block + if separator?(line) && !in_data_row + in_data_row = true + parts = line.strip.delete_prefix("+").split("+") + delimiter_index = 0 + rows << Row.new(number_of_columns) + rows_tracker = RowTracker.new(number_of_columns) + + i = 0 + parts.each_with_index do |_, j| + next unless i < number_of_columns + + delimiter_index += parts[j].length + 1 + rows[-1][i].position = delimiter_index + rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) + + i += 1 while delimiter_index > delimiter_positions[i] + i += 1 + end + # Lines in a block + elsif in_data_row + # Regular data row or partial separator + if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator + cells_content = line.strip + .delete_prefix("|") + .delete_prefix("+") + .split(/[\|\+]/) + + rows << Row.new(number_of_columns) + aux_delimiter_index = 0 + auxiliar_cell_index = 0 + + cells_content.each_with_index do |_, i| + next unless auxiliar_cell_index < number_of_columns + + aux_delimiter_index += cells_content[i].length + 1 + rows[-1][auxiliar_cell_index].position = aux_delimiter_index + rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) + + auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] + + auxiliar_cell_index += 1 end - cell - end - # Helper method to adjust colspan - def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) - (column_index...number_of_parts).each do |j| - delimiter_start = nil - col_i = column_index - until delimiter_start - delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 - col_i -= 1 + raise "More cells than columns found" unless cells_content.length <= number_of_columns + + column_index = 0 + maxRowTracker = rows_tracker.maxValue + cells_content.each_with_index do |content, _i| + if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row + rows_tracker[column_index] = maxRowTracker + 1 + rows[rows_tracker[column_index]][column_index].list_flag = false + + column_forward = 0 + (column_index...delimiter_positions.length).each do |del_index| + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] + column_forward += 1 + #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 + end end - positions = ["|", "+"].map do |delimiter| - pos = line[delimiter_start + 1..-1]&.index(delimiter) - pos ? pos + delimiter_start + 1 : nil - end.compact - - position = positions.min - - if position && position > delimiter_positions[j] - row[column_index].colspan += 1 - if position == delimiter_positions[-1] - colspan_allocated = row[column_index].colspan - row[column_index].colspan += number_of_columns - colspan_allocated - column_index - end - elsif position && position < delimiter_positions[j] - raise "Wrong cell formatting" - else - break + column_index += column_forward + else # Regular cell in Partial separator line + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + rows[rows_tracker[column_index]][column_index].rowspan += 1 + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], + column_index, + number_of_columns, + line, + number_of_columns, + delimiter_positions) end - end - row[column_index] - end - - # Retrieve separator indices - separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } - - raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? - - # Calculate max number of columns and delimiter positions - delimiter_positions = [] - number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max - - # Determine delimiter positions - separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } - number_of_columns.times do |j| - start_pos = j.zero? ? 0 : delimiter_positions[j - 1] - pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") - delimiter_positions << (pos ? pos + start_pos + 1 : -1) - end - # Process header - has_header = false - header_delimiter_positions = [] - default_alignments = [] - header_rows = [] - header_separator_index = nil - - # Determine header delimiter positions - separator_indices.each do |index| - if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) - has_header = true - header_separator_index = index - parts = lines[index].strip.delete_prefix("+").split("+") - - parts.each_with_index do |part, part_index| - default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' - elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' - else - 'align="center"' - end - - start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] - pos = lines[index][start_pos + 1..-1]&.index("+") - header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + colspan = rows[rows_tracker[column_index]][column_index].colspan + column_index += (colspan == 0 ? 1 : colspan) # rubocop:disable Metrics/BlockNesting -- PoC end - break + end end - end - # Process table body (including rows belonging to header as they are processed in the same way) - data_rows = [] - (separator_indices.length - 1).times do |row| - rows = [] - rows_tracker = nil - in_data_row = false - start, end_idx = separator_indices[row], separator_indices[row + 1] # Lines between separators including separator line start as it gives information about the number of columns of the row - row_lines = lines[start...end_idx] - - next if row_lines.empty? - - row_lines.each do |line| - # First line (normally a separator) of each block - if is_separator(line) && !in_data_row - in_data_row = true - parts = line.strip.delete_prefix("+").split("+") - delimiter_index = 0 - rows << Row.new(number_of_columns) - rows_tracker = RowTracker.new(number_of_columns) - - i = 0 - parts.each_with_index do |_, j| - next unless i < number_of_columns - - delimiter_index += parts[j].length + 1 - rows[-1][i].position = delimiter_index - rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) - - while delimiter_index > delimiter_positions[i] - i += 1 - end - i += 1 - end - # Lines in a block - elsif in_data_row - # Regular data row or partial separator - if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator - cells_content = line.strip.delete_prefix("|").delete_prefix("+") - .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) - - rows << Row.new(number_of_columns) - aux_delimiter_index = 0 - auxiliar_cell_index = 0 - - cells_content.each_with_index do |_, i| - next unless auxiliar_cell_index < number_of_columns - - aux_delimiter_index += cells_content[i].length + 1 - rows[-1][auxiliar_cell_index].position = aux_delimiter_index - rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) - - while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] - auxiliar_cell_index += 1 - end - auxiliar_cell_index += 1 - end - - if cells_content.length <= number_of_columns - column_index = 0 - maxRowTracker = rows_tracker.maxValue - cells_content.each_with_index do |content, i| - if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row - rows_tracker[column_index] = maxRowTracker + 1 - rows[rows_tracker[column_index]][column_index].list_flag = false - - column_forward = 0 - (column_index...delimiter_positions.length).each do |del_index| - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] - column_forward += 1 - #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 - end - end - column_index += column_forward - else # Regular cell in Partial separator line - rows[rows_tracker[column_index]][column_index] = - handling_content(rows[rows_tracker[column_index]][column_index], content) - rows[rows_tracker[column_index]][column_index].rowspan += 1 - - unless rows[rows_tracker[column_index]][column_index].colspan_adjusted - rows[rows_tracker[column_index]][column_index].colspan_adjusted = true - rows[rows_tracker[column_index]][column_index] = - adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, - line, number_of_columns, delimiter_positions) - end - - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] - colspan = rows[rows_tracker[column_index]][column_index].colspan - column_index += colspan.zero? ? 1 : colspan - end - end - end - else - raise "More cells than columns found" - end - else # Data row - cells_content = line.strip.delete_prefix("|").delete_suffix("|").split(/\|/) - column_index = 0 - - if cells_content.length < number_of_columns - cells_content.each_with_index do |content, i| - rows[rows_tracker[column_index]][column_index] = - handling_content(rows[rows_tracker[column_index]][column_index], content) - - unless rows[rows_tracker[column_index]][column_index].colspan_adjusted - rows[rows_tracker[column_index]][column_index].colspan_adjusted = true - rows[rows_tracker[column_index]][column_index] = - adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, - line, number_of_columns, delimiter_positions) - end - - if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] - column_index += rows[rows_tracker[column_index]][column_index].colspan - end - end - elsif cells_content.length == number_of_columns - cells_content.each_with_index do |content, i| - rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) - end - else - raise "More cells than columns found" - end - end - else - raise "No separator line found for row starting" + else # Data row + cells_content = line.strip.delete_prefix("|").split(/\|/) + column_index = 0 + + if cells_content.length < number_of_columns + cells_content.each_with_index do |content, _i| + rows[rows_tracker[column_index]][column_index] = + handling_content(rows[rows_tracker[column_index]][column_index], content) + + unless rows[rows_tracker[column_index]][column_index].colspan_adjusted + rows[rows_tracker[column_index]][column_index].colspan_adjusted = true + rows[rows_tracker[column_index]][column_index] = + adjust_colspan(rows[rows_tracker[column_index]], + column_index, + number_of_columns, + line, + number_of_columns, + delimiter_positions) end - end - if has_header && start >= header_separator_index - rows.each { |body_row| data_rows << body_row.cells } - elsif has_header && start < header_separator_index - rows.each { |header_row| header_rows << header_row.cells } + if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] + column_index += rows[rows_tracker[column_index]][column_index].colspan + end + end + elsif cells_content.length == number_of_columns + cells_content.each_with_index do |content, i| + rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) + end + else + raise "More cells than columns found" end + end + else + raise "No separator line found for row starting" end + end + + if has_header && start >= header_separator_index + rows.each { |body_row| data_rows << body_row.cells } + elsif has_header && start < header_separator_index + rows.each { |header_row| header_rows << header_row.cells } + end + end + + raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? + + # Format text (bold and italic) + [header_rows, data_rows].each do |rows| + rows.each do |row| + row.each do |cell| + next if cell.content.nil? + + delimters = ['**', '__'] + delimters.each do |bold_chars| + while cell.content.include?(bold_chars) + cell.content = cell.content.sub(bold_chars, "") + .sub(bold_chars, "") + end + end - raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? + while cell.content.include?("_") && cell.content.exclude?("\\_") + cell.content = cell.content.rstrip.sub("_", "").sub("_", "") + end - # Format text (bold and italic) - [header_rows, data_rows].each do |rows| - rows.each do |row| - row.each do |cell| - next if cell.content.nil? + cell.content = cell.content.rstrip.sub("\\_", "_") while cell.content.include?("\\_") - ["**", "__"].each do |bold_chars| - while cell.content.include?(bold_chars) - cell.content = cell.content.sub(bold_chars, "") - .sub(bold_chars, "") - end - end - - while cell.content.include?("_") && !cell.content.include?("\\_") - cell.content = cell.content.rstrip.sub("_", "").sub("_", "") - end - - while cell.content.include?("\\_") - cell.content = cell.content.rstrip.sub("\\_", "_") - end - end - end + # Convert newlines to HTML breaks + cell.content = cell.content&.gsub("\n", "
") end + end - # Convert newlines to HTML breaks - [header_rows, data_rows].each do |rows| - rows.each do |row| - row.each do |cell| - cell.content = cell.content&.gsub("\n", "
") - end - end - end + # Validate grid correctness + forward_rowspan = [] - # Validate grid correctness - [header_rows, data_rows].each do |rows| - forward_rowspan = [] - - rows.each_with_index do |row, row_index| - forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? - sum = 0 - - row.each_with_index do |cell, cell_index| - sum += cell.colspan - if row_index > 0 && cell.colspan.zero? - if forward_rowspan[cell_index].positive? - sum += 1 - end - forward_rowspan[cell_index] -= 1 - end - - if forward_rowspan[cell_index].zero? && cell.rowspan > 1 - forward_rowspan[cell_index] = cell.rowspan - 1 - end - end - - raise "Grid table not converted properly" unless sum == number_of_columns - end - end + rows.each_with_index do |row, row_index| + forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? + sum = 0 - [header_rows, data_rows] - end # end of parse_pandoc_table_with_spans + row.each_with_index do |cell, cell_index| + sum += cell.colspan - def generate_html_table_with_spans(pandoc_table) - begin - grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) - rescue StandardError => e - logger = Logger.new(STDOUT) - logger.error("Grid table could not be generated: #{e.message}") - return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS FILE" - else - html = "\n" - has_header = false - - grid_header.each do |row| - row.each do |cell| - if cell.rowspan != 0 && cell.colspan != 0 - has_header = true - break - end - end - end + if row_index > 0 && cell.colspan == 0 + sum += 1 if forward_rowspan[cell_index] > 0 - if has_header - html += " \n" - grid_header.each do |row| - html += " \n" - row.each do |cell| - next if cell.rowspan == 0 || cell.colspan == 0 - - # Prepare content, in case there's a list - if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) - list = "
    " - matches.each do |match| - list += "
  • #{match[1]}
  • " - end - list += "
" - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) - # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' - end - - rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" - colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %{ #{cell.content}\n} - end - html += " \n" - end - html += " \n" - end + forward_rowspan[cell_index] -= 1 + end - html += " \n" - grid_body.each do |row| - html += " \n" - row.each do |cell| - next if cell.rowspan == 0 || cell.colspan == 0 - - if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)/) - list = "
    " - matches.each do |match| - list += "
  • #{match[1]}
  • " - end - list += "
" - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?
)+/, list) - # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' - end - - rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" - colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %{ #{cell.content}\n} - end - html += " \n" - end + forward_rowspan[cell_index] = cell.rowspan - 1 if forward_rowspan[cell_index] == 0 && cell.rowspan > 1 + end - html += " \n" - html += "
" - html + raise "Grid table not converted properly" unless sum == number_of_columns + end + end + + [header_rows, data_rows] + end # end of parse_pandoc_table_with_spans + + def generate_html_table_with_spans(pandoc_table) + begin + grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) + rescue StandardError => e + logger = Logger.new($stdout) + logger.error("Grid table could not be generated: #{e.message}") + + "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" + else + html = "\n" + has_header = false + + grid_header.each do |row| + row.each do |cell| + if cell.rowspan != 0 && cell.colspan != 0 + has_header = true + break + end end - end # end of def generate_html_table_with_spans - - def call - return @text unless MarkdownFilter.glfm_markdown?(context) - - regex = MARKDOWN_GRID_TABLE_BLOCK_REGEX - return @text unless regex.match?(@text) - - @text.gsub(regex) do - match = Regexp.last_match - # Extract the grid table content from the match - grid_table = match[:code] - if grid_table - # Convert grid table to HTML table - generate_html_table_with_spans(grid_table) - else - # Return original text if no grid table found - match.to_s + end + + if has_header + html += " \n" + grid_header.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + # Prepare content, in case there's a list + matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)}) + if matches + list = "
    " + matches.each do |match| + list += "
  • #{match[1]}
  • " + end + list += "
" + cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?
)+}, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' + end + + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %( #{cell.content}\n) + end + html += " \n" end + html += " \n" + end + + html += " \n" + grid_body.each do |row| + html += " \n" + row.each do |cell| + next if cell.rowspan == 0 || cell.colspan == 0 + + matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)}) + if matches + list = "
    " + matches.each do |match| + list += "
  • #{match[1]}
  • " + end + list += "
" + cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?
)+}, list) + # Enforce left alignment if cell contains a list + cell.alignment = 'align="left"' + end + + rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" + colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" + html += %( #{cell.content}\n) end - end # end of def call + html += " \n" + end + + html += " \n" + html += "
" + html + end + end # end of def generate_html_table_with_spans + # rubocop:enable Metrics/PerceivedComplexity + # rubocop:enable Metrics/CyclomaticComplexity + # rubocop:enable Metrics/AbcSize + + def call + return @text unless MarkdownFilter.glfm_markdown?(context) + + regex = MARKDOWN_GRID_TABLE_BLOCK_REGEX + return @text unless regex.match?(@text) + + @text.gsub(regex) do + match = Regexp.last_match + # Extract the grid table content from the match + grid_table = match[:code] + if grid_table + # Convert grid table to HTML table + generate_html_table_with_spans(grid_table) + else + # Return original text if no grid table found + match.to_s + end + end + end # end of def call end # end of class GridTableFilter end # end of module Filter end # end of module Banzai -- GitLab From b621572ffa47c79a2fff9d15531d1819c2f96dd3 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Sun, 9 Mar 2025 08:02:49 +0100 Subject: [PATCH 04/13] Support for only body tables --- grid_table_filter.rb | 85 +++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/grid_table_filter.rb b/grid_table_filter.rb index 6e82f53..1a541b0 100644 --- a/grid_table_filter.rb +++ b/grid_table_filter.rb @@ -41,32 +41,33 @@ module Banzai GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ class Cell - attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag + attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag def initialize - @content = nil - @rowspan = 0 - @colspan = 0 - @colspan_adjusted = false - @alignment = 'align="center"' - @position = nil - @list_flag = false + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position_start = nil + @position = nil + @list_flag = false end - def set_alignment(default_alignments, header_delimiter_positions) + def calculateAndSetAlignment(header_delimiter_positions, default_alignments ) + + raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil? + header_delimiter_index = 0 while header_delimiter_index < default_alignments.length && - @position > header_delimiter_positions[header_delimiter_index] + @position_start > header_delimiter_positions[header_delimiter_index] header_delimiter_index += 1 end raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length - if @position < header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] - elsif @position == header_delimiter_positions[header_delimiter_index] - @alignment = default_alignments[header_delimiter_index] - header_delimiter_index += 1 - end + + @alignment = default_alignments[header_delimiter_index] + end end # end of class Cell @@ -189,7 +190,7 @@ module Banzai # Split the input into lines lines = pandoc_table.strip.split("\n").map(&:strip) - # Retrieve separator indices + # Retrieve separator indices separator_indices = lines.each_index.select { |i| separator?(lines[i]) } raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? @@ -198,7 +199,7 @@ module Banzai delimiter_positions = [] number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max - # Determine delimiter positions + # Determine delimiter positions separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } number_of_columns.times do |j| start_pos = j == 0 ? 0 : delimiter_positions[j - 1] @@ -237,6 +238,26 @@ module Banzai break end + unless has_header + # Set default alignments from the first separator which takes the role of header + header_separator_index = 0 + parts = lines[0].strip.delete_prefix("+").split("+") + + parts.each_with_index do |part, part_index| + default_alignments << if part.start_with?(":") && !part.end_with?(":") + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end + + start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] + pos = lines[0][start_pos + 1..-1]&.index("+") + header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) + end + end + # Process table body (including rows belonging to header as they are processed in the same way) data_rows = [] @@ -264,20 +285,18 @@ module Banzai next unless i < number_of_columns delimiter_index += parts[j].length + 1 + rows[-1][i].position_start = delimiter_index - parts[j].length rows[-1][i].position = delimiter_index - rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) + rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments ) i += 1 while delimiter_index > delimiter_positions[i] i += 1 end - # Lines in a block + # Lines in a block elsif in_data_row # Regular data row or partial separator - if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator - cells_content = line.strip - .delete_prefix("|") - .delete_prefix("+") - .split(/[\|\+]/) + if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator + cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/) rows << Row.new(number_of_columns) aux_delimiter_index = 0 @@ -287,18 +306,20 @@ module Banzai next unless auxiliar_cell_index < number_of_columns aux_delimiter_index += cells_content[i].length + 1 + rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length rows[-1][auxiliar_cell_index].position = aux_delimiter_index - rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) + rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions, default_alignments ) auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] auxiliar_cell_index += 1 end - + raise "More cells than columns found" unless cells_content.length <= number_of_columns column_index = 0 maxRowTracker = rows_tracker.maxValue + cells_content.each_with_index do |content, _i| if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row rows_tracker[column_index] = maxRowTracker + 1 @@ -308,7 +329,6 @@ module Banzai (column_index...delimiter_positions.length).each do |del_index| if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] column_forward += 1 - #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 end end @@ -335,7 +355,6 @@ module Banzai end end end - else # Data row cells_content = line.strip.delete_prefix("|").split(/\|/) column_index = 0 @@ -372,16 +391,18 @@ module Banzai raise "No separator line found for row starting" end end - + if has_header && start >= header_separator_index rows.each { |body_row| data_rows << body_row.cells } elsif has_header && start < header_separator_index rows.each { |header_row| header_rows << header_row.cells } + else + rows.each { |body_row| data_rows << body_row.cells } end + + raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? end - raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? - # Format text (bold and italic) [header_rows, data_rows].each do |rows| rows.each do |row| -- GitLab From 9cfb8d0d8804f3312f39ebb8be94150ea98b40d6 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Sun, 9 Mar 2025 15:46:15 +0100 Subject: [PATCH 05/13] Enhancement of handling content, including lists in cells --- grid_table_filter.rb | 68 ++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/grid_table_filter.rb b/grid_table_filter.rb index 1a541b0..3075179 100644 --- a/grid_table_filter.rb +++ b/grid_table_filter.rb @@ -40,6 +40,8 @@ module Banzai GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/ GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ + NEXT_ELEMENT_LIST_MARK = "∆" + class Cell attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag @@ -113,34 +115,40 @@ module Banzai end # Helper method to handle content in cells def handling_content(cell, content) + _c = content.strip if cell.content.nil? cell.rowspan += 1 cell.colspan += 1 - if content.strip.start_with?("- ") # List + if _c.start_with?("- ") # List cell.list_flag = true - cell.content = "#{content.strip}\n" + _c = _c.gsub(/\\\s*$/, '\n') + cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # Add list element end mark to know when the list element ends elsif cell.list_flag && !content.strip.empty? - cell.content += "#{content.strip}\n" - elsif content.strip == "" - cell.list_flag = false + _c = _c.gsub(/\\\s*$/, '\n') + cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" #add the list element end mark + elsif _c.empty? cell.content = "\n" else - cell.content = content.strip.gsub(/\\\s*$/, "\n") + cell.content = _c.gsub(/\\\s*$/, "\n") end - elsif content.strip.start_with?("- ") + elsif _c.start_with?("- ") cell.content += "\n" unless cell.list_flag - cell.list_flag = true - cell.content += "#{content.strip}\n" - elsif cell.list_flag && !content.strip.empty? - cell.content = cell.content.strip.chomp("\n") - cell.content += " #{content.strip}\n" - elsif content.strip.empty? - cell.list_flag = false + _c = _c.gsub(/\\\s*$/, '\n') + cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}" + elsif cell.list_flag && !_c.empty? + cell.content = cell.content.strip.chomp("#{NEXT_ELEMENT_LIST_MARK}") + _c = _c.gsub(/\\\s*$/, '\n') + cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}" + elsif _c.empty? + if cell.list_flag + cell.list_flag = false + cell.content += "\n\n" + end cell.content += cell.content.end_with?("\n") ? "" : "\n" else - content = content.strip.gsub(/\\\s*$/, "\n") - cell.content += " #{content}" + _c = _c.gsub(/\\\s*$/, "\n") + cell.content += " #{_c}" end cell @@ -188,7 +196,7 @@ module Banzai def parse_pandoc_table_with_spans(pandoc_table) # Split the input into lines - lines = pandoc_table.strip.split("\n").map(&:strip) + lines = pandoc_table.rstrip.split("\n").map(&:rstrip) # Retrieve separator indices separator_indices = lines.each_index.select { |i| separator?(lines[i]) } @@ -272,6 +280,7 @@ module Banzai next if row_lines.empty? row_lines.each do |line| + line = line.rstrip # First line (normally a separator) of each block if separator?(line) && !in_data_row in_data_row = true @@ -409,20 +418,11 @@ module Banzai row.each do |cell| next if cell.content.nil? - delimters = ['**', '__'] - delimters.each do |bold_chars| - while cell.content.include?(bold_chars) - cell.content = cell.content.sub(bold_chars, "") - .sub(bold_chars, "") - end - end - - while cell.content.include?("_") && cell.content.exclude?("\\_") - cell.content = cell.content.rstrip.sub("_", "").sub("_", "") - end + cell.content = cell.content.gsub(/^|\s)(?\*\*|__)(?.+?)\g(?!\w)/, "\\k\\k") + cell.content = cell.content.gsub(/(?^|\s)(?\*|_)(?.+?)\g(?!\w)/, "\\k\\k") # Convert newlines to HTML breaks cell.content = cell.content&.gsub("\n", "
") end @@ -483,14 +483,14 @@ module Banzai next if cell.rowspan == 0 || cell.colspan == 0 # Prepare content, in case there's a list - matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)}) + matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/) if matches list = "
    " matches.each do |match| list += "
  • #{match[1]}
  • " end list += "
" - cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?
)+}, list) + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end @@ -510,14 +510,14 @@ module Banzai row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 - matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=
|$)}) + matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/) if matches list = "
    " matches.each do |match| list += "
  • #{match[1]}
  • " end - list += "
" - cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?
)+}, list) + puts "List: #{list}" + cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end -- GitLab From cbe9d1a077b287df0357cf29a27319b986742089 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Mon, 10 Mar 2025 10:04:59 +0100 Subject: [PATCH 06/13] Adding separators alignment checking --- grid_table_filter.rb | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/grid_table_filter.rb b/grid_table_filter.rb index 3075179..aa0f19d 100644 --- a/grid_table_filter.rb +++ b/grid_table_filter.rb @@ -190,6 +190,32 @@ module Banzai row[column_index] end + def checkDelimiterAlignment(line, delimiterPositions) + return false if line.empty? || delimiterPositions.empty? + + #puts "\nChecking line: #{line}" + #puts "Expected delimiter positions: #{delimiterPositions}" + + # For any row (only +, only |, mix of + and |) + currentPositions = [] + start_pos = 1 + + while start_pos < line.length + pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos + break if pos.nil? # Exit if no more delimiters are found + + currentPositions << pos + start_pos = pos + 1 # Move to the next character after the found delimiter + end + + #puts "Current positions: #{currentPositions}" + + # Check if the last expected delimiter position is found in currentPositions + return currentPositions.include?(delimiterPositions[-1]) && + line.match?(/\A[|+]/) && # Check if the line starts with | or + + currentPositions.all? { |pos| delimiterPositions.include?(pos) } # Ensure all current positions are in delimiterPositions + end + # rubocop:disable Metrics/AbcSize -- PoC # rubocop:disable Metrics/CyclomaticComplexity -- PoC # rubocop:disable Metrics/PerceivedComplexity -- PoC @@ -266,6 +292,10 @@ module Banzai end end + #Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1], delimiter_positions) + + # Process table body (including rows belonging to header as they are processed in the same way) data_rows = [] @@ -284,6 +314,9 @@ module Banzai # First line (normally a separator) of each block if separator?(line) && !in_data_row in_data_row = true + #Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) + parts = line.strip.delete_prefix("+").split("+") delimiter_index = 0 rows << Row.new(number_of_columns) @@ -305,6 +338,9 @@ module Banzai elsif in_data_row # Regular data row or partial separator if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator + #Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) + cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/) rows << Row.new(number_of_columns) @@ -366,6 +402,9 @@ module Banzai end else # Data row cells_content = line.strip.delete_prefix("|").split(/\|/) + #Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) + column_index = 0 if cells_content.length < number_of_columns @@ -516,7 +555,6 @@ module Banzai matches.each do |match| list += "
  • #{match[1]}
  • " end - puts "List: #{list}" cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' -- GitLab From 403a4b1b041362950489e383387db7691d128ca6 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 14 Mar 2025 16:59:53 +0100 Subject: [PATCH 07/13] Moving grid_table_filter.rb to another folder + adding README for it --- gitlabFilter/README.md | 27 +++++++++++++++++++ .../grid_table_filter.rb | 0 2 files changed, 27 insertions(+) create mode 100644 gitlabFilter/README.md rename grid_table_filter.rb => gitlabFilter/grid_table_filter.rb (100%) diff --git a/gitlabFilter/README.md b/gitlabFilter/README.md new file mode 100644 index 0000000..1357e35 --- /dev/null +++ b/gitlabFilter/README.md @@ -0,0 +1,27 @@ +# Grid Table filter + +The grid_table_filter.rb file is a Ruby script that defines a filter for converting Pandoc-style grid tables into HTML tables with support for rowspan and colspan. Here is a summary of the key components and functionality. + +## Module and Class Definitions + +The script is encapsulated within the Banzai::Filter module. +The main class is GridTableFilter, which inherits from HTML::Pipeline::TextFilter. + +- Regex Constants: Several regex constants are defined to match different parts of the grid table structure, such as separators and body lines. + +- Helper Classes: + - Cell: Represents a cell in the table with attributes like content, rowspan, colspan, alignment, etc. + - Row: Represents a row in the table, containing an array of Cell objects. + - RowTracker: Tracks the number of rows for each column to manage rowspan. + +- Helper Methods: + - separator?: Checks if a line is a separator. + - handling_content: Processes the content of a cell, handling lists and newlines. + - adjust_colspan: Adjusts the colspan of cells based on the delimiter positions. + +- Main Methods: + - parse_pandoc_table_with_spans: Parses the Pandoc table, identifies headers, and processes rows to create a structured representation of the table. + - generate_html_table_with_spans: Converts the parsed table structure into an HTML table. + - call: The main entry point for the filter, which applies the regex to find grid tables and converts them to HTML. + +- Error Handling: The script includes error handling to manage invalid table formats and log errors. diff --git a/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb similarity index 100% rename from grid_table_filter.rb rename to gitlabFilter/grid_table_filter.rb -- GitLab From ce3607dbcafafe03531c1c50b3f749cc2318656c Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Wed, 2 Apr 2025 10:48:19 +0900 Subject: [PATCH 08/13] Fixes from Gitlab colleagues to make rubocop checks work --- gitlabFilter/grid_table_filter.rb | 253 ++++++++++++++++-------------- 1 file changed, 136 insertions(+), 117 deletions(-) diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index aa0f19d..ca86fd0 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -18,20 +18,22 @@ module Banzai module Filter class GridTableFilter < HTML::Pipeline::TextFilter + # rubocop:disable Lint/MixedRegexpCaptureTypes -- PoC MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ (? # Grid table blocks: # +---+---+---+---+ # Anything, starting with | blocks which are ignored by this filter # +---+---+---+---+ - + ^\s*\+(-+\+)+$\n # First separator line (?:^\s*[|+][^\n]*$\n)* ^\s*\+(-+\+)+$ # Last separator line - + ) }mx - + # rubocop:enable Lint/MixedRegexpCaptureTypes + require 'logger' # Add these regex constants at the top of the file, after the require statement @@ -41,37 +43,36 @@ module Banzai GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/ NEXT_ELEMENT_LIST_MARK = "∆" - + class Cell - attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag - - def initialize - @content = nil - @rowspan = 0 - @colspan = 0 - @colspan_adjusted = false - @alignment = 'align="center"' - @position_start = nil - @position = nil - @list_flag = false - end + attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, + :list_flag + + def initialize + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position_start = nil + @position = nil + @list_flag = false + end - def calculateAndSetAlignment(header_delimiter_positions, default_alignments ) - - raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil? - - header_delimiter_index = 0 - while header_delimiter_index < default_alignments.length && - @position_start > header_delimiter_positions[header_delimiter_index] - header_delimiter_index += 1 - end + def calculate_and_set_alignment(header_delimiter_positions, default_alignments) + raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil? - raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length - - @alignment = default_alignments[header_delimiter_index] - + header_delimiter_index = 0 + while header_delimiter_index < default_alignments.length && + @position_start > header_delimiter_positions[header_delimiter_index] + header_delimiter_index += 1 end - end # end of class Cell + + raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length + + @alignment = default_alignments[header_delimiter_index] + end + end class Row attr_accessor :cells @@ -87,7 +88,7 @@ module Banzai def []=(index, value) @cells[index] = value end - end # end of class Row + end class RowTracker attr_accessor :row_tracker @@ -104,55 +105,61 @@ module Banzai @row_tracker[index] = value end - def maxValue + def max_value @row_tracker.max end - end # end of class RowTracker + end # Helper method to detect separator lines def separator?(line) GRID_TABLE_SEPARATOR.match?(line) end + # Helper method to handle content in cells + # rubocop:disable Metrics/PerceivedComplexity -- PoC def handling_content(cell, content) - _c = content.strip + modified_content = content.strip if cell.content.nil? cell.rowspan += 1 cell.colspan += 1 - if _c.start_with?("- ") # List + if modified_content.start_with?("- ") # List cell.list_flag = true - _c = _c.gsub(/\\\s*$/, '\n') - cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # Add list element end mark to know when the list element ends + modified_content = modified_content.gsub(/\\\s*$/, '\n') + + # Add list element end mark to know when the list element ends + cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" elsif cell.list_flag && !content.strip.empty? - _c = _c.gsub(/\\\s*$/, '\n') - cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" #add the list element end mark - elsif _c.empty? + modified_content = modified_content.gsub(/\\\s*$/, '\n') + cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark + elsif modified_content.empty? cell.content = "\n" else - cell.content = _c.gsub(/\\\s*$/, "\n") + cell.content = modified_content.gsub(/\\\s*$/, "\n") end - elsif _c.start_with?("- ") + elsif modified_content.start_with?("- ") cell.content += "\n" unless cell.list_flag cell.list_flag = true - _c = _c.gsub(/\\\s*$/, '\n') - cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}" - elsif cell.list_flag && !_c.empty? - cell.content = cell.content.strip.chomp("#{NEXT_ELEMENT_LIST_MARK}") - _c = _c.gsub(/\\\s*$/, '\n') - cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}" - elsif _c.empty? + modified_content = modified_content.gsub(/\\\s*$/, '\n') + cell.content += "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" + elsif cell.list_flag && !modified_content.empty? + cell.content = cell.content.strip.chomp(NEXT_ELEMENT_LIST_MARK.to_s) + modified_content = modified_content.gsub(/\\\s*$/, '\n') + cell.content += " #{modified_content}#{NEXT_ELEMENT_LIST_MARK}" + elsif modified_content.empty? if cell.list_flag cell.list_flag = false cell.content += "\n\n" end + cell.content += cell.content.end_with?("\n") ? "" : "\n" else - _c = _c.gsub(/\\\s*$/, "\n") - cell.content += " #{_c}" + modified_content = modified_content.gsub(/\\\s*$/, "\n") + cell.content += " #{modified_content}" end cell end + # rubocop:enable Metrics/PerceivedComplexity # Helper method to adjust colspan def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) @@ -167,12 +174,12 @@ module Banzai delimiters = ['|', '+'] positions = delimiters.filter_map do |delimiter| - pos = line[delimiter_start + 1..-1]&.index(delimiter) + pos = line[delimiter_start + 1..]&.index(delimiter) pos ? pos + delimiter_start + 1 : nil end.compact position = positions.min - + if position && position > delimiter_positions[j] row[column_index].colspan += 1 @@ -190,40 +197,42 @@ module Banzai row[column_index] end - def checkDelimiterAlignment(line, delimiterPositions) - return false if line.empty? || delimiterPositions.empty? - - #puts "\nChecking line: #{line}" - #puts "Expected delimiter positions: #{delimiterPositions}" - + def check_delimiter_alignment(line, delimiter_positions) + return false if line.empty? || delimiter_positions.empty? + + # puts "\nChecking line: #{line}" + # puts "Expected delimiter positions: #{delimiter_positions}" + # For any row (only +, only |, mix of + and |) - currentPositions = [] + current_positions = [] start_pos = 1 while start_pos < line.length - pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos - break if pos.nil? # Exit if no more delimiters are found + pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos + break if pos.nil? # Exit if no more delimiters are found - currentPositions << pos - start_pos = pos + 1 # Move to the next character after the found delimiter + current_positions << pos + start_pos = pos + 1 # Move to the next character after the found delimiter end - #puts "Current positions: #{currentPositions}" + # puts "Current positions: #{current_positions}" - # Check if the last expected delimiter position is found in currentPositions - return currentPositions.include?(delimiterPositions[-1]) && - line.match?(/\A[|+]/) && # Check if the line starts with | or + - currentPositions.all? { |pos| delimiterPositions.include?(pos) } # Ensure all current positions are in delimiterPositions + # Check if the last expected delimiter position is found in current_positions + current_positions.include?(delimiter_positions[-1]) && + line.match?(/\A[|+]/) && # Check if the line starts with | or + + # Ensure all current positions are in delimiter_positions + current_positions.all? do |pos| + delimiter_positions.include?(pos) + end end # rubocop:disable Metrics/AbcSize -- PoC # rubocop:disable Metrics/CyclomaticComplexity -- PoC # rubocop:disable Metrics/PerceivedComplexity -- PoC def parse_pandoc_table_with_spans(pandoc_table) - # Split the input into lines lines = pandoc_table.rstrip.split("\n").map(&:rstrip) - + # Retrieve separator indices separator_indices = lines.each_index.select { |i| separator?(lines[i]) } @@ -232,12 +241,12 @@ module Banzai # Calculate max number of columns and delimiter positions delimiter_positions = [] number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max - + # Determine delimiter positions separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } number_of_columns.times do |j| start_pos = j == 0 ? 0 : delimiter_positions[j - 1] - pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") + pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+") delimiter_positions << (pos ? pos + start_pos + 1 : -1) end @@ -266,7 +275,7 @@ module Banzai end start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] - pos = lines[index][start_pos + 1..-1]&.index("+") + pos = lines[index][start_pos + 1..]&.index("+") header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end break @@ -276,25 +285,25 @@ module Banzai # Set default alignments from the first separator which takes the role of header header_separator_index = 0 parts = lines[0].strip.delete_prefix("+").split("+") - + parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' - elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' - else - 'align="center"' - end - + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end + start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] - pos = lines[0][start_pos + 1..-1]&.index("+") + pos = lines[0][start_pos + 1..]&.index("+") header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end end - #Check end table delimiter alignment (not checked during the lines processing) - raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1], delimiter_positions) - + # Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in table separators: #{lines[-1]}" unless check_delimiter_alignment(lines[-1], + delimiter_positions) # Process table body (including rows belonging to header as they are processed in the same way) data_rows = [] @@ -314,9 +323,10 @@ module Banzai # First line (normally a separator) of each block if separator?(line) && !in_data_row in_data_row = true - #Check end table delimiter alignment (not checked during the lines processing) - raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) - + # Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in separator row: #{line}" unless check_delimiter_alignment(line, + delimiter_positions) + parts = line.strip.delete_prefix("+").split("+") delimiter_index = 0 rows << Row.new(number_of_columns) @@ -329,7 +339,7 @@ module Banzai delimiter_index += parts[j].length + 1 rows[-1][i].position_start = delimiter_index - parts[j].length rows[-1][i].position = delimiter_index - rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments ) + rows[-1][i].calculate_and_set_alignment(header_delimiter_positions, default_alignments) i += 1 while delimiter_index > delimiter_positions[i] i += 1 @@ -338,8 +348,9 @@ module Banzai elsif in_data_row # Regular data row or partial separator if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator - #Check end table delimiter alignment (not checked during the lines processing) - raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) + # Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in partial separator: #{line}" unless check_delimiter_alignment(line, + delimiter_positions) cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/) @@ -353,21 +364,22 @@ module Banzai aux_delimiter_index += cells_content[i].length + 1 rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length rows[-1][auxiliar_cell_index].position = aux_delimiter_index - rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions, default_alignments ) + rows[-1][auxiliar_cell_index].calculate_and_set_alignment(header_delimiter_positions, + default_alignments) auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] auxiliar_cell_index += 1 end - + raise "More cells than columns found" unless cells_content.length <= number_of_columns column_index = 0 - maxRowTracker = rows_tracker.maxValue - + max_row_tracker = rows_tracker.max_value + cells_content.each_with_index do |content, _i| if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row - rows_tracker[column_index] = maxRowTracker + 1 + rows_tracker[column_index] = max_row_tracker + 1 rows[rows_tracker[column_index]][column_index].list_flag = false column_forward = 0 @@ -401,9 +413,10 @@ module Banzai end end else # Data row - cells_content = line.strip.delete_prefix("|").split(/\|/) - #Check end table delimiter alignment (not checked during the lines processing) - raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions) + cells_content = line.strip.delete_prefix("|").split("|") + # Check end table delimiter alignment (not checked during the lines processing) + raise "Misaligned delimiters in row: #{line}" unless check_delimiter_alignment( + line, delimiter_positions) column_index = 0 @@ -439,15 +452,13 @@ module Banzai raise "No separator line found for row starting" end end - - if has_header && start >= header_separator_index - rows.each { |body_row| data_rows << body_row.cells } - elsif has_header && start < header_separator_index + + if has_header && start < header_separator_index rows.each { |header_row| header_rows << header_row.cells } else rows.each { |body_row| data_rows << body_row.cells } end - + raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? end @@ -459,9 +470,11 @@ module Banzai cell.content = cell.content.gsub(/^|\s)(?\*\*|__)(?.+?)\g(?!\w)/, "\\k\\k") + cell.content = cell.content.gsub(/(?^|\s)(?\*\*|__)(?.+?)\g(?!\w)/, + "\\k\\k") - cell.content = cell.content.gsub(/(?^|\s)(?\*|_)(?.+?)\g(?!\w)/, "\\k\\k") + cell.content = cell.content.gsub(/(?^|\s)(?\*|_)(?.+?)\g(?!\w)/, + "\\k\\k") # Convert newlines to HTML breaks cell.content = cell.content&.gsub("\n", "
    ") end @@ -491,7 +504,7 @@ module Banzai end [header_rows, data_rows] - end # end of parse_pandoc_table_with_spans + end def generate_html_table_with_spans(pandoc_table) begin @@ -522,14 +535,17 @@ module Banzai next if cell.rowspan == 0 || cell.colspan == 0 # Prepare content, in case there's a list - matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/) + matches = cell.content&.scan( + /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o) + if matches list = "
      " matches.each do |match| list += "
    • #{match[1]}
    • " end list += "
    " - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list) + cell.content = cell.content.gsub( + /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end @@ -549,13 +565,16 @@ module Banzai row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 - matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/) + matches = cell.content&.scan( + /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o) + if matches list = "
      " matches.each do |match| list += "
    • #{match[1]}
    • " end - cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list) + cell.content = cell.content.gsub( + /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end @@ -571,7 +590,7 @@ module Banzai html += "" html end - end # end of def generate_html_table_with_spans + end # rubocop:enable Metrics/PerceivedComplexity # rubocop:enable Metrics/CyclomaticComplexity # rubocop:enable Metrics/AbcSize @@ -594,7 +613,7 @@ module Banzai match.to_s end end - end # end of def call - end # end of class GridTableFilter - end # end of module Filter -end # end of module Banzai + end + end + end +end -- GitLab From ba12f69c040539e4721aba9854e1425f34dd9033 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Wed, 2 Apr 2025 11:16:39 +0900 Subject: [PATCH 09/13] Fix when checking generated table rows --- gitlabFilter/grid_table_filter.rb | 75 +++++++++++++++++-------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index ca86fd0..0bbd04a 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -48,30 +48,30 @@ module Banzai attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag - def initialize - @content = nil - @rowspan = 0 - @colspan = 0 - @colspan_adjusted = false - @alignment = 'align="center"' - @position_start = nil - @position = nil - @list_flag = false - end + def initialize + @content = nil + @rowspan = 0 + @colspan = 0 + @colspan_adjusted = false + @alignment = 'align="center"' + @position_start = nil + @position = nil + @list_flag = false + end def calculate_and_set_alignment(header_delimiter_positions, default_alignments) - raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil? + raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil? - header_delimiter_index = 0 - while header_delimiter_index < default_alignments.length && - @position_start > header_delimiter_positions[header_delimiter_index] - header_delimiter_index += 1 - end + header_delimiter_index = 0 + while header_delimiter_index < default_alignments.length && + @position_start > header_delimiter_positions[header_delimiter_index] + header_delimiter_index += 1 + end - raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length + raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length - @alignment = default_alignments[header_delimiter_index] - end + @alignment = default_alignments[header_delimiter_index] + end end class Row @@ -208,18 +208,18 @@ module Banzai start_pos = 1 while start_pos < line.length - pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos - break if pos.nil? # Exit if no more delimiters are found + pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos + break if pos.nil? # Exit if no more delimiters are found current_positions << pos - start_pos = pos + 1 # Move to the next character after the found delimiter + start_pos = pos + 1 # Move to the next character after the found delimiter end # puts "Current positions: #{current_positions}" # Check if the last expected delimiter position is found in current_positions current_positions.include?(delimiter_positions[-1]) && - line.match?(/\A[|+]/) && # Check if the line starts with | or + + line.match?(/\A[|+]/) && # Check if the line starts with | or + # Ensure all current positions are in delimiter_positions current_positions.all? do |pos| delimiter_positions.include?(pos) @@ -288,12 +288,12 @@ module Banzai parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' - elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' - else - 'align="center"' - end + 'align="left"' + elsif !part.start_with?(":") && part.end_with?(":") + 'align="right"' + else + 'align="center"' + end start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] pos = lines[0][start_pos + 1..]&.index("+") @@ -491,12 +491,19 @@ module Banzai sum += cell.colspan if row_index > 0 && cell.colspan == 0 - sum += 1 if forward_rowspan[cell_index] > 0 - + if forward_rowspan[cell_index] > 0 + sum += 1 forward_rowspan[cell_index] -= 1 end - - forward_rowspan[cell_index] = cell.rowspan - 1 if forward_rowspan[cell_index] == 0 && cell.rowspan > 1 + end + if forward_rowspan[cell_index] == 0 && cell.rowspan > 1 + forward_rowspan[cell_index] = cell.rowspan - 1 + colspan = 1 + while cell.colspan > colspan + forward_rowspan[cell_index + colspan] = cell.rowspan - 1 + colspan += 1 + end + end end raise "Grid table not converted properly" unless sum == number_of_columns @@ -513,7 +520,7 @@ module Banzai logger = Logger.new($stdout) logger.error("Grid table could not be generated: #{e.message}") - "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" + "\n\nHTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOG FILE\n\n#{e.message}\n\nCommit ID: ce3607dbcafafe03531c1c50b3f749cc2318656c\n\n" else html = "\n" has_header = false -- GitLab From 784f8c866fa03b5b130ab523930e8d6308ea434e Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 18 Jul 2025 15:56:08 +0200 Subject: [PATCH 10/13] Fix for checking correctness of generated tables + adding the markdown_pipeline file --- gitlabFilter/grid_table_filter.rb | 24 ++++++++++++++---------- gitlabFilter/plain_markdown_pipeline.rb | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 gitlabFilter/plain_markdown_pipeline.rb diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index 0bbd04a..77e6e33 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -352,17 +352,17 @@ module Banzai raise "Misaligned delimiters in partial separator: #{line}" unless check_delimiter_alignment(line, delimiter_positions) - cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/) + parts = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/) rows << Row.new(number_of_columns) aux_delimiter_index = 0 auxiliar_cell_index = 0 - cells_content.each_with_index do |_, i| + parts.each_with_index do |_, i| next unless auxiliar_cell_index < number_of_columns - aux_delimiter_index += cells_content[i].length + 1 - rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length + aux_delimiter_index += parts[i].length + 1 + rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - parts[i].length rows[-1][auxiliar_cell_index].position = aux_delimiter_index rows[-1][auxiliar_cell_index].calculate_and_set_alignment(header_delimiter_positions, default_alignments) @@ -372,12 +372,12 @@ module Banzai auxiliar_cell_index += 1 end - raise "More cells than columns found" unless cells_content.length <= number_of_columns + raise "More cells than columns found" unless parts.length <= number_of_columns column_index = 0 max_row_tracker = rows_tracker.max_value - cells_content.each_with_index do |content, _i| + parts.each_with_index do |content, _i| if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row rows_tracker[column_index] = max_row_tracker + 1 rows[rows_tracker[column_index]][column_index].list_flag = false @@ -418,6 +418,10 @@ module Banzai raise "Misaligned delimiters in row: #{line}" unless check_delimiter_alignment( line, delimiter_positions) + raise "Missing delimiters in previous separator line" if parts.length < cells_content.length + + raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length}, expected delimiters = #{parts.length}" if parts.length > cells_content.length + column_index = 0 if cells_content.length < number_of_columns @@ -486,17 +490,17 @@ module Banzai rows.each_with_index do |row, row_index| forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? sum = 0 - + row_forward_rowspan = forward_rowspan.dup row.each_with_index do |cell, cell_index| sum += cell.colspan - if row_index > 0 && cell.colspan == 0 - if forward_rowspan[cell_index] > 0 + if cell.colspan == 0 + if row_forward_rowspan[cell_index] > 0 sum += 1 forward_rowspan[cell_index] -= 1 end end - if forward_rowspan[cell_index] == 0 && cell.rowspan > 1 + if row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1 forward_rowspan[cell_index] = cell.rowspan - 1 colspan = 1 while cell.colspan > colspan diff --git a/gitlabFilter/plain_markdown_pipeline.rb b/gitlabFilter/plain_markdown_pipeline.rb new file mode 100644 index 0000000..c5bb665 --- /dev/null +++ b/gitlabFilter/plain_markdown_pipeline.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Banzai + module Pipeline + class PlainMarkdownPipeline < BasePipeline + def self.filters + FilterArray[ + Filter::IncludeFilter, + Filter::GridTableFilter, + Filter::MarkdownPreEscapeLegacyFilter, + Filter::DollarMathPreLegacyFilter, + Filter::BlockquoteFenceLegacyFilter, + Filter::MarkdownFilter, + Filter::ConvertTextToDocFilter, + Filter::DollarMathPostLegacyFilter, + Filter::MarkdownPostEscapeLegacyFilter + ] + end + end + end + end \ No newline at end of file -- GitLab From 188b7516780f0e0248441c7ad988d43ed1fd8d3b Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Fri, 18 Jul 2025 16:43:23 +0200 Subject: [PATCH 11/13] Doing nothing to parse markdown cell content --- gitlabFilter/grid_table_filter.rb | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index 77e6e33..10efc52 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -420,7 +420,7 @@ module Banzai raise "Missing delimiters in previous separator line" if parts.length < cells_content.length - raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length}, expected delimiters = #{parts.length}" if parts.length > cells_content.length + #raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length}, expected delimiters = #{parts.length}" if parts.length > cells_content.length column_index = 0 @@ -472,13 +472,13 @@ module Banzai row.each do |cell| next if cell.content.nil? - cell.content = cell.content.gsub(/^|\s)(?\*\*|__)(?.+?)\g(?!\w)/, - "\\k\\k") + #cell.content = cell.content.gsub(/(?^|\s)(?\*\*|__)(?.+?)\g(?!\w)/, + # "\\k\\k") - cell.content = cell.content.gsub(/(?^|\s)(?\*|_)(?.+?)\g(?!\w)/, - "\\k\\k") + #cell.content = cell.content.gsub(/(?^|\s)(?\*|_)(?.+?)\g(?!\w)/, + # "\\k\\k") # Convert newlines to HTML breaks cell.content = cell.content&.gsub("\n", "
      ") end @@ -497,9 +497,9 @@ module Banzai if cell.colspan == 0 if row_forward_rowspan[cell_index] > 0 sum += 1 - forward_rowspan[cell_index] -= 1 + forward_rowspan[cell_index] -= 1 + end end - end if row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1 forward_rowspan[cell_index] = cell.rowspan - 1 colspan = 1 @@ -526,7 +526,7 @@ module Banzai "\n\nHTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOG FILE\n\n#{e.message}\n\nCommit ID: ce3607dbcafafe03531c1c50b3f749cc2318656c\n\n" else - html = "
      \n" + html = '
      ' has_header = false grid_header.each do |row| @@ -539,9 +539,9 @@ module Banzai end if has_header - html += " \n" + html += '' grid_header.each do |row| - html += " \n" + html += '' row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 @@ -563,16 +563,16 @@ module Banzai rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %( #{cell.content}\n) + html += %(\n\n#{cell.content}\n\n) end - html += " \n" + html += '' end - html += " \n" + html += '' end - html += " \n" + html += '' grid_body.each do |row| - html += " \n" + html += '' row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 @@ -592,13 +592,13 @@ module Banzai rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %( #{cell.content}\n) + html += %(\n\n#{cell.content}\n\n) end - html += " \n" + html += '' end - html += " \n" - html += "
      " + html += '' + html += '' html end end -- GitLab From 3ac393c0e579441edcad970890a468f09e72ab10 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Mon, 25 Aug 2025 16:25:09 +0200 Subject: [PATCH 12/13] Small fix for tables with no header --- gitlabFilter/grid_table_filter.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index 10efc52..908125c 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -284,7 +284,8 @@ module Banzai unless has_header # Set default alignments from the first separator which takes the role of header header_separator_index = 0 - parts = lines[0].strip.delete_prefix("+").split("+") + line = lines.find { |l| !l.strip.empty? } # first non-blank line + parts = line.strip.delete_prefix("+").split("+") parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") @@ -296,7 +297,7 @@ module Banzai end start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] - pos = lines[0][start_pos + 1..]&.index("+") + pos = line[start_pos + 1..]&.index("+") header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end end -- GitLab From 6a74639dfc9cfd61c9b65bc8a8c234c379d8dd00 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega Date: Wed, 27 Aug 2025 15:58:28 +0200 Subject: [PATCH 13/13] Trying fixing table cell alignment --- gitlabFilter/grid_table_filter.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gitlabFilter/grid_table_filter.rb b/gitlabFilter/grid_table_filter.rb index 908125c..10ff1e1 100644 --- a/gitlabFilter/grid_table_filter.rb +++ b/gitlabFilter/grid_table_filter.rb @@ -267,11 +267,11 @@ module Banzai parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' + 'left' elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' + 'right' else - 'align="center"' + 'center' end start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] @@ -289,11 +289,11 @@ module Banzai parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") - 'align="left"' + 'left' elsif !part.start_with?(":") && part.end_with?(":") - 'align="right"' + 'right' else - 'align="center"' + 'center' end start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1] @@ -559,12 +559,12 @@ module Banzai cell.content = cell.content.gsub( /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list) # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' + cell.alignment = 'left' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %(\n\n#{cell.content}\n\n) + html += %(\n\n#{cell.content}\n\n) end html += '' end @@ -588,12 +588,12 @@ module Banzai cell.content = cell.content.gsub( /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list) # Enforce left alignment if cell contains a list - cell.alignment = 'align="left"' + cell.alignment = 'left' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" - html += %(\n\n#{cell.content}\n\n) + html += %(\n\n#{cell.content}\n\n) end html += '' end -- GitLab