Loading grid_table_filter.rb 0 → 100644 +522 −0 Original line number Diff line number Diff line # frozen_string_literal: true # # GridTableFilter.rb # # (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft # License: BSD 3-Clause License. See the LICENSE file for further details. # # TODO: This is now a legacy filter, and is only used with the Ruby parser. # The current markdown parser now properly handles grid table blocks. # issue: https://gitlab.com/gitlab-org/gitlab/-/issues/460864 # GridTableFilter.rb # # Converts Pandoc-style grid tables to HTML tables with rowspan and colspan support # module Banzai module Filter class GridTableFilter < HTML::Pipeline::TextFilter MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ (?<code> # Grid table blocks: # +---+---+---+---+ # Anything, starting with | blocks which are ignored by this filter # +---+---+---+---+ ^\s*\+-.*\+\s$ # First separator line (?:.*\n)*? # Any number of rows (non-greedy) \s*\+-.*\+\s$ # Last separator line ) }mx require 'logger' class Cell attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag def initialize @content = nil @rowspan = 0 @colspan = 0 @colspan_adjusted = false @alignment = 'align="center"' @position = nil @list_flag = false end def set_alignment(default_alignments, header_delimiter_positions) header_delimiter_index = 0 while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] header_delimiter_index += 1 end if header_delimiter_index < default_alignments.length if @position < header_delimiter_positions[header_delimiter_index] @alignment = default_alignments[header_delimiter_index] elsif @position == header_delimiter_positions[header_delimiter_index] @alignment = default_alignments[header_delimiter_index] header_delimiter_index += 1 end else raise "Invalid table formatting" end end end class Row attr_accessor :cells def initialize(length = 1) @cells = Array.new(length) { Cell.new } end def [](index) @cells[index] end def []=(index, value) @cells[index] = value end end class RowTracker attr_accessor :row_tracker def initialize(items) @row_tracker = Array.new(items, 0) end def [](index) @row_tracker[index] end def []=(index, value) @row_tracker[index] = value end end # Add these regex constants at the top of the file, after the require statement GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/ GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/ GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/ GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/ def parse_pandoc_table_with_spans(pandoc_table) # Split the input into lines lines = pandoc_table.strip.split("\n").map(&:strip) # Helper method to detect separator lines def is_separator(line) GRID_TABLE_SEPARATOR.match?(line) end # Helper method to handle content in cells def handling_content(cell, content) if cell.content.nil? cell.rowspan += 1 cell.colspan += 1 if content.strip.start_with?("- ") # List cell.list_flag = true cell.content = "#{content.strip}\n" elsif cell.list_flag && !content.strip.empty? cell.content += "#{content.strip}\n" elsif content.strip == "" cell.list_flag = false cell.content = "\n" else cell.content = content.strip.gsub(/\\\s*$/, "\n") end else if content.strip.start_with?("- ") unless cell.list_flag cell.content += "\n" end cell.list_flag = true cell.content += "#{content.strip}\n" elsif cell.list_flag && !content.strip.empty? cell.content = cell.content.strip.chomp("\n") cell.content += " #{content.strip}\n" elsif content.strip.empty? cell.list_flag = false cell.content += cell.content.end_with?("\n") ? "" : "\n" else content = content.strip.gsub(/\\\s*$/, "\n") cell.content += " #{content}" end end cell end # Helper method to adjust colspan def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) (column_index...number_of_parts).each do |j| delimiter_start = nil col_i = column_index until delimiter_start delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 col_i -= 1 end positions = ["|", "+"].map do |delimiter| pos = line[delimiter_start + 1..-1]&.index(delimiter) pos ? pos + delimiter_start + 1 : nil end.compact position = positions.min if position && position > delimiter_positions[j] row[column_index].colspan += 1 if position == delimiter_positions[-1] colspan_allocated = row[column_index].colspan row[column_index].colspan += number_of_columns - colspan_allocated - column_index end elsif position && position < delimiter_positions[j] raise "Wrong cell formatting" else break end end row[column_index] end separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? # Calculate max number of columns and delimiter positions delimiter_positions = [] number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } number_of_columns.times do |j| start_pos = j.zero? ? 0 : delimiter_positions[j - 1] pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") delimiter_positions << (pos ? pos + start_pos + 1 : -1) end # Process header has_header = false header_delimiter_positions = [] default_alignments = [] header_rows = [] header_separator_index = nil separator_indices.each do |index| if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) has_header = true header_separator_index = index parts = lines[index].strip.delete_prefix("+").split("+") parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") 'align="left"' elsif !part.start_with?(":") && part.end_with?(":") 'align="right"' else 'align="center"' end start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] pos = lines[index][start_pos + 1..-1]&.index("+") header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end break end end # Process table body data_rows = [] (separator_indices.length - 1).times do |row| rows = [] rows_tracker = nil in_data_row = false start, end_idx = separator_indices[row], separator_indices[row + 1] row_lines = lines[start...end_idx] next if row_lines.empty? row_lines.each do |line| if is_separator(line) && !in_data_row in_data_row = true parts = line.strip.delete_prefix("+").split("+") delimiter_index = 0 rows << Row.new(number_of_columns) rows_tracker = RowTracker.new(number_of_columns) i = 0 parts.each_with_index do |_, j| next unless i < number_of_columns delimiter_index += parts[j].length + 1 rows[-1][i].position = delimiter_index rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) while delimiter_index > delimiter_positions[i] i += 1 end i += 1 end elsif in_data_row if GRID_TABLE_BODY_SEPARATOR.match?(line) cells_content = line.strip.delete_prefix("|").delete_prefix("+") .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) rows << Row.new(number_of_columns) aux_delimiter_index = 0 auxiliar_cell_index = 0 cells_content.each_with_index do |_, i| next unless auxiliar_cell_index < number_of_columns aux_delimiter_index += cells_content[i].length + 1 rows[-1][auxiliar_cell_index].position = aux_delimiter_index rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] auxiliar_cell_index += 1 end auxiliar_cell_index += 1 end if cells_content.length <= number_of_columns column_index = 0 cells_content.each_with_index do |content, i| if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) rows_tracker[column_index] += 1 rows[rows_tracker[column_index]][column_index].list_flag = false column_forward = 0 (column_index...delimiter_positions.length).each do |del_index| if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] column_forward += 1 rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 end end column_index += column_forward else rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], content) rows[rows_tracker[column_index]][column_index].rowspan += 1 unless rows[rows_tracker[column_index]][column_index].colspan_adjusted rows[rows_tracker[column_index]][column_index].colspan_adjusted = true rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions) end if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] colspan = rows[rows_tracker[column_index]][column_index].colspan column_index += colspan.zero? ? 1 : colspan end end end else raise "More cells than columns found" end else cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/) column_index = 0 if cells_content.length < number_of_columns cells_content.each_with_index do |content, i| rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], content) unless rows[rows_tracker[column_index]][column_index].colspan_adjusted rows[rows_tracker[column_index]][column_index].colspan_adjusted = true rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions) end if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] column_index += rows[rows_tracker[column_index]][column_index].colspan end end elsif cells_content.length == number_of_columns cells_content.each_with_index do |content, i| rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) end else raise "More cells than columns found" end end else raise "No separator line found for row starting" end end if has_header && start >= header_separator_index rows.each { |body_row| data_rows << body_row.cells } elsif has_header && start < header_separator_index rows.each { |header_row| header_rows << header_row.cells } end end raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? # Format text (bold and italic) [header_rows, data_rows].each do |rows| rows.each do |row| row.each do |cell| next if cell.content.nil? ["**", "__"].each do |bold_chars| while cell.content.include?(bold_chars) cell.content = cell.content.sub(bold_chars, "<strong>") .sub(bold_chars, "</strong>") end end while cell.content.include?("_") && !cell.content.include?("\\_") cell.content = cell.content.rstrip.sub("_", "<i>").sub("_", "</i>") end while cell.content.include?("\\_") cell.content = cell.content.rstrip.sub("\\_", "_") end end end end # Convert newlines to HTML breaks [header_rows, data_rows].each do |rows| rows.each do |row| row.each do |cell| cell.content = cell.content&.gsub("\n", "<br />") end end end # Validate grid correctness [header_rows, data_rows].each do |rows| forward_rowspan = [] rows.each_with_index do |row, row_index| forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? sum = 0 row.each_with_index do |cell, cell_index| sum += cell.colspan if row_index > 0 && cell.colspan.zero? if forward_rowspan[cell_index].positive? sum += 1 end forward_rowspan[cell_index] -= 1 end if forward_rowspan[cell_index].zero? && cell.rowspan > 1 forward_rowspan[cell_index] = cell.rowspan - 1 end end raise "Grid table not converted properly" unless sum == number_of_columns end end [header_rows, data_rows] end def generate_html_table_with_spans(pandoc_table) begin grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) rescue StandardError => e logger = Logger.new(STDOUT) logger.error("Grid table could not be generated: #{e.message}") return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" else html = "<table>\n" has_header = false grid_header.each do |row| row.each do |cell| if cell.rowspan != 0 && cell.colspan != 0 has_header = true break end end end if has_header html += " <thead>\n" grid_header.each do |row| html += " <tr>\n" row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 # Prepare content, in case there's a list if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/) list = "<ul>" matches.each do |match| list += "<li>#{match[1]}</li>" end list += "</ul>" cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" html += %{ <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n} end html += " </tr>\n" end html += " </thead>\n" end html += " <tbody>\n" grid_body.each do |row| html += " <tr>\n" row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/) list = "<ul>" matches.each do |match| list += "<li>#{match[1]}</li>" end list += "</ul>" cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" html += %{ <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n} end html += " </tr>\n" end html += " </tbody>\n" html += "</table>" html end end def call return @text if MarkdownFilter.glfm_markdown?(context) regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true) return @text unless regex.match?(@text) regex.replace_gsub(@text) do |match| # Extract the grid table content from the match grid_table = match[:code] if grid_table # Convert grid table to HTML table generate_html_table_with_spans(grid_table) else # Return original text if no grid table found match.to_s end end end end # end of class GridTableFilter end # end of module Filter end # end of module Banzai No newline at end of file Loading
grid_table_filter.rb 0 → 100644 +522 −0 Original line number Diff line number Diff line # frozen_string_literal: true # # GridTableFilter.rb # # (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft # License: BSD 3-Clause License. See the LICENSE file for further details. # # TODO: This is now a legacy filter, and is only used with the Ruby parser. # The current markdown parser now properly handles grid table blocks. # issue: https://gitlab.com/gitlab-org/gitlab/-/issues/460864 # GridTableFilter.rb # # Converts Pandoc-style grid tables to HTML tables with rowspan and colspan support # module Banzai module Filter class GridTableFilter < HTML::Pipeline::TextFilter MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{ (?<code> # Grid table blocks: # +---+---+---+---+ # Anything, starting with | blocks which are ignored by this filter # +---+---+---+---+ ^\s*\+-.*\+\s$ # First separator line (?:.*\n)*? # Any number of rows (non-greedy) \s*\+-.*\+\s$ # Last separator line ) }mx require 'logger' class Cell attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag def initialize @content = nil @rowspan = 0 @colspan = 0 @colspan_adjusted = false @alignment = 'align="center"' @position = nil @list_flag = false end def set_alignment(default_alignments, header_delimiter_positions) header_delimiter_index = 0 while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index] header_delimiter_index += 1 end if header_delimiter_index < default_alignments.length if @position < header_delimiter_positions[header_delimiter_index] @alignment = default_alignments[header_delimiter_index] elsif @position == header_delimiter_positions[header_delimiter_index] @alignment = default_alignments[header_delimiter_index] header_delimiter_index += 1 end else raise "Invalid table formatting" end end end class Row attr_accessor :cells def initialize(length = 1) @cells = Array.new(length) { Cell.new } end def [](index) @cells[index] end def []=(index, value) @cells[index] = value end end class RowTracker attr_accessor :row_tracker def initialize(items) @row_tracker = Array.new(items, 0) end def [](index) @row_tracker[index] end def []=(index, value) @row_tracker[index] = value end end # Add these regex constants at the top of the file, after the require statement GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/ GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/ GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/ GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/ def parse_pandoc_table_with_spans(pandoc_table) # Split the input into lines lines = pandoc_table.strip.split("\n").map(&:strip) # Helper method to detect separator lines def is_separator(line) GRID_TABLE_SEPARATOR.match?(line) end # Helper method to handle content in cells def handling_content(cell, content) if cell.content.nil? cell.rowspan += 1 cell.colspan += 1 if content.strip.start_with?("- ") # List cell.list_flag = true cell.content = "#{content.strip}\n" elsif cell.list_flag && !content.strip.empty? cell.content += "#{content.strip}\n" elsif content.strip == "" cell.list_flag = false cell.content = "\n" else cell.content = content.strip.gsub(/\\\s*$/, "\n") end else if content.strip.start_with?("- ") unless cell.list_flag cell.content += "\n" end cell.list_flag = true cell.content += "#{content.strip}\n" elsif cell.list_flag && !content.strip.empty? cell.content = cell.content.strip.chomp("\n") cell.content += " #{content.strip}\n" elsif content.strip.empty? cell.list_flag = false cell.content += cell.content.end_with?("\n") ? "" : "\n" else content = content.strip.gsub(/\\\s*$/, "\n") cell.content += " #{content}" end end cell end # Helper method to adjust colspan def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions) (column_index...number_of_parts).each do |j| delimiter_start = nil col_i = column_index until delimiter_start delimiter_start = col_i > 0 ? row[col_i - 1].position : 0 col_i -= 1 end positions = ["|", "+"].map do |delimiter| pos = line[delimiter_start + 1..-1]&.index(delimiter) pos ? pos + delimiter_start + 1 : nil end.compact position = positions.min if position && position > delimiter_positions[j] row[column_index].colspan += 1 if position == delimiter_positions[-1] colspan_allocated = row[column_index].colspan row[column_index].colspan += number_of_columns - colspan_allocated - column_index end elsif position && position < delimiter_positions[j] raise "Wrong cell formatting" else break end end row[column_index] end separator_indices = lines.each_index.select { |i| is_separator(lines[i]) } raise "No valid separators found in the provided Pandoc table." if separator_indices.empty? # Calculate max number of columns and delimiter positions delimiter_positions = [] number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns } number_of_columns.times do |j| start_pos = j.zero? ? 0 : delimiter_positions[j - 1] pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+") delimiter_positions << (pos ? pos + start_pos + 1 : -1) end # Process header has_header = false header_delimiter_positions = [] default_alignments = [] header_rows = [] header_separator_index = nil separator_indices.each do |index| if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index]) has_header = true header_separator_index = index parts = lines[index].strip.delete_prefix("+").split("+") parts.each_with_index do |part, part_index| default_alignments << if part.start_with?(":") && !part.end_with?(":") 'align="left"' elsif !part.start_with?(":") && part.end_with?(":") 'align="right"' else 'align="center"' end start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1] pos = lines[index][start_pos + 1..-1]&.index("+") header_delimiter_positions << (pos ? pos + start_pos + 1 : -1) end break end end # Process table body data_rows = [] (separator_indices.length - 1).times do |row| rows = [] rows_tracker = nil in_data_row = false start, end_idx = separator_indices[row], separator_indices[row + 1] row_lines = lines[start...end_idx] next if row_lines.empty? row_lines.each do |line| if is_separator(line) && !in_data_row in_data_row = true parts = line.strip.delete_prefix("+").split("+") delimiter_index = 0 rows << Row.new(number_of_columns) rows_tracker = RowTracker.new(number_of_columns) i = 0 parts.each_with_index do |_, j| next unless i < number_of_columns delimiter_index += parts[j].length + 1 rows[-1][i].position = delimiter_index rows[-1][i].set_alignment(default_alignments, header_delimiter_positions) while delimiter_index > delimiter_positions[i] i += 1 end i += 1 end elsif in_data_row if GRID_TABLE_BODY_SEPARATOR.match?(line) cells_content = line.strip.delete_prefix("|").delete_prefix("+") .delete_suffix("|").delete_suffix("+").split(/[\|\+]/) rows << Row.new(number_of_columns) aux_delimiter_index = 0 auxiliar_cell_index = 0 cells_content.each_with_index do |_, i| next unless auxiliar_cell_index < number_of_columns aux_delimiter_index += cells_content[i].length + 1 rows[-1][auxiliar_cell_index].position = aux_delimiter_index rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions) while aux_delimiter_index > delimiter_positions[auxiliar_cell_index] auxiliar_cell_index += 1 end auxiliar_cell_index += 1 end if cells_content.length <= number_of_columns column_index = 0 cells_content.each_with_index do |content, i| if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) rows_tracker[column_index] += 1 rows[rows_tracker[column_index]][column_index].list_flag = false column_forward = 0 (column_index...delimiter_positions.length).each do |del_index| if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index] column_forward += 1 rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 end end column_index += column_forward else rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], content) rows[rows_tracker[column_index]][column_index].rowspan += 1 unless rows[rows_tracker[column_index]][column_index].colspan_adjusted rows[rows_tracker[column_index]][column_index].colspan_adjusted = true rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions) end if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] colspan = rows[rows_tracker[column_index]][column_index].colspan column_index += colspan.zero? ? 1 : colspan end end end else raise "More cells than columns found" end else cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/) column_index = 0 if cells_content.length < number_of_columns cells_content.each_with_index do |content, i| rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], content) unless rows[rows_tracker[column_index]][column_index].colspan_adjusted rows[rows_tracker[column_index]][column_index].colspan_adjusted = true rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions) end if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index] column_index += rows[rows_tracker[column_index]][column_index].colspan end end elsif cells_content.length == number_of_columns cells_content.each_with_index do |content, i| rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content) end else raise "More cells than columns found" end end else raise "No separator line found for row starting" end end if has_header && start >= header_separator_index rows.each { |body_row| data_rows << body_row.cells } elsif has_header && start < header_separator_index rows.each { |header_row| header_rows << header_row.cells } end end raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty? # Format text (bold and italic) [header_rows, data_rows].each do |rows| rows.each do |row| row.each do |cell| next if cell.content.nil? ["**", "__"].each do |bold_chars| while cell.content.include?(bold_chars) cell.content = cell.content.sub(bold_chars, "<strong>") .sub(bold_chars, "</strong>") end end while cell.content.include?("_") && !cell.content.include?("\\_") cell.content = cell.content.rstrip.sub("_", "<i>").sub("_", "</i>") end while cell.content.include?("\\_") cell.content = cell.content.rstrip.sub("\\_", "_") end end end end # Convert newlines to HTML breaks [header_rows, data_rows].each do |rows| rows.each do |row| row.each do |cell| cell.content = cell.content&.gsub("\n", "<br />") end end end # Validate grid correctness [header_rows, data_rows].each do |rows| forward_rowspan = [] rows.each_with_index do |row, row_index| forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty? sum = 0 row.each_with_index do |cell, cell_index| sum += cell.colspan if row_index > 0 && cell.colspan.zero? if forward_rowspan[cell_index].positive? sum += 1 end forward_rowspan[cell_index] -= 1 end if forward_rowspan[cell_index].zero? && cell.rowspan > 1 forward_rowspan[cell_index] = cell.rowspan - 1 end end raise "Grid table not converted properly" unless sum == number_of_columns end end [header_rows, data_rows] end def generate_html_table_with_spans(pandoc_table) begin grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) rescue StandardError => e logger = Logger.new(STDOUT) logger.error("Grid table could not be generated: #{e.message}") return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" else html = "<table>\n" has_header = false grid_header.each do |row| row.each do |cell| if cell.rowspan != 0 && cell.colspan != 0 has_header = true break end end end if has_header html += " <thead>\n" grid_header.each do |row| html += " <tr>\n" row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 # Prepare content, in case there's a list if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/) list = "<ul>" matches.each do |match| list += "<li>#{match[1]}</li>" end list += "</ul>" cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" html += %{ <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n} end html += " </tr>\n" end html += " </thead>\n" end html += " <tbody>\n" grid_body.each do |row| html += " <tr>\n" row.each do |cell| next if cell.rowspan == 0 || cell.colspan == 0 if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/) list = "<ul>" matches.each do |match| list += "<li>#{match[1]}</li>" end list += "</ul>" cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list) # Enforce left alignment if cell contains a list cell.alignment = 'align="left"' end rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : "" colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : "" html += %{ <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n} end html += " </tr>\n" end html += " </tbody>\n" html += "</table>" html end end def call return @text if MarkdownFilter.glfm_markdown?(context) regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true) return @text unless regex.match?(@text) regex.replace_gsub(@text) do |match| # Extract the grid table content from the match grid_table = match[:code] if grid_table # Convert grid table to HTML table generate_html_table_with_spans(grid_table) else # Return original text if no grid table found match.to_s end end end end # end of class GridTableFilter end # end of module Filter end # end of module Banzai No newline at end of file