Commit 5f9a76d5 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Formatting changes

parent 2afc6b8a
Loading
Loading
Loading
Loading
+487 −480
Original line number Diff line number Diff line
@@ -34,6 +34,12 @@ module Banzai
  
      require 'logger'

      # Add these regex constants at the top of the file, after the require statement
      GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/
      GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/
      GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/
      GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/

      class Cell
          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag

@@ -49,20 +55,18 @@ module Banzai

          def set_alignment(default_alignments, header_delimiter_positions)
            header_delimiter_index = 0
                while header_delimiter_index < default_alignments.length && @position > header_delimiter_positions[header_delimiter_index]
            while header_delimiter_index < default_alignments.length && 
                @position > header_delimiter_positions[header_delimiter_index]
              header_delimiter_index += 1
            end

                if header_delimiter_index < default_alignments.length
            raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
            if @position < header_delimiter_positions[header_delimiter_index]
                @alignment = default_alignments[header_delimiter_index]
            elsif @position == header_delimiter_positions[header_delimiter_index]
                @alignment = default_alignments[header_delimiter_index]
                header_delimiter_index += 1
            end
                else
                raise "Invalid table formatting"
                end
          end
      end # end of class Cell

@@ -102,21 +106,10 @@ module Banzai
        end
      end # end of class RowTracker

        # Add these regex constants at the top of the file, after the require statement
        GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/
        GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/
        GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/
        GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/

        def parse_pandoc_table_with_spans(pandoc_table)
            # Split the input into lines
            lines = pandoc_table.strip.split("\n").map(&:strip)

      # Helper method to detect separator lines
            def is_separator(line)
      def separator?(line)
        GRID_TABLE_SEPARATOR.match?(line)
      end

      # Helper method to handle content in cells
      def handling_content(cell, content)
        if cell.content.nil?
@@ -133,11 +126,9 @@ module Banzai
          else
            cell.content = content.strip.gsub(/\\\s*$/, "\n")
          end
                else
                    if content.strip.start_with?("- ")
                        unless cell.list_flag
                            cell.content += "\n"
                        end
        elsif content.strip.start_with?("- ")
          cell.content += "\n" unless cell.list_flag

          cell.list_flag = true
          cell.content += "#{content.strip}\n"
        elsif cell.list_flag && !content.strip.empty?
@@ -150,7 +141,7 @@ module Banzai
          content = content.strip.gsub(/\\\s*$/, "\n")
          cell.content += " #{content}"
        end
                end

        cell
      end

@@ -159,12 +150,14 @@ module Banzai
        (column_index...number_of_parts).each do |j|
          delimiter_start = nil
          col_i = column_index

          until delimiter_start
            delimiter_start = col_i > 0 ? row[col_i - 1].position : 0
            col_i -= 1
          end

                    positions = ["|", "+"].map do |delimiter|
          delimiters = ['|', '+']
          positions = delimiters.filter_map do |delimiter|
            pos = line[delimiter_start + 1..-1]&.index(delimiter)
            pos ? pos + delimiter_start + 1 : nil
          end.compact
@@ -173,6 +166,7 @@ module Banzai
      
          if position && position > delimiter_positions[j]
            row[column_index].colspan += 1

            if position == delimiter_positions[-1]
              colspan_allocated = row[column_index].colspan
              row[column_index].colspan += number_of_columns - colspan_allocated - column_index
@@ -183,11 +177,20 @@ module Banzai
            break
          end
        end

        row[column_index]
      end

      # rubocop:disable Metrics/AbcSize -- PoC
      # rubocop:disable Metrics/CyclomaticComplexity -- PoC
      # rubocop:disable Metrics/PerceivedComplexity -- PoC
      def parse_pandoc_table_with_spans(pandoc_table)

        # Split the input into lines
        lines = pandoc_table.strip.split("\n").map(&:strip)
        
		# Retrieve separator indices
            separator_indices = lines.each_index.select { |i| is_separator(lines[i]) }
        separator_indices = lines.each_index.select { |i| separator?(lines[i]) }

        raise "No valid separators found in the provided Pandoc table." if separator_indices.empty?

@@ -198,7 +201,7 @@ module Banzai
		# Determine delimiter positions
        separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
        number_of_columns.times do |j|
                start_pos = j.zero? ? 0 : delimiter_positions[j - 1]
          start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
          pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+")
          delimiter_positions << (pos ? pos + start_pos + 1 : -1)
        end
@@ -212,7 +215,8 @@ module Banzai

        # Determine header delimiter positions
        separator_indices.each do |index|
                if GRID_TABLE_HEADER_SEPARATOR.match?(lines[index])
          next unless GRID_TABLE_HEADER_SEPARATOR.match?(lines[index])

          has_header = true
          header_separator_index = index
          parts = lines[index].strip.delete_prefix("+").split("+")
@@ -226,28 +230,29 @@ module Banzai
                                    'align="center"'
                                  end

                        start_pos = part_index.zero? ? 0 : header_delimiter_positions[part_index - 1]
            start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
            pos = lines[index][start_pos + 1..-1]&.index("+")
            header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
          end
          break
        end
            end

        # Process table body (including rows belonging to header as they are processed in the same way)
        data_rows = []

        (separator_indices.length - 1).times do |row|
          rows = []
          rows_tracker = nil
          in_data_row = false
                start, end_idx = separator_indices[row], separator_indices[row + 1] # Lines between separators including separator line start as it gives information about the number of columns of the row
          start = separator_indices[row]
          end_idx = separator_indices[row + 1]
          row_lines = lines[start...end_idx]

          next if row_lines.empty?

          row_lines.each do |line|
            # First line (normally a separator) of each block
                    if is_separator(line) && !in_data_row
            if separator?(line) && !in_data_row
              in_data_row = true
              parts = line.strip.delete_prefix("+").split("+")
              delimiter_index = 0
@@ -262,17 +267,17 @@ module Banzai
                rows[-1][i].position = delimiter_index
                rows[-1][i].set_alignment(default_alignments, header_delimiter_positions)

                        while delimiter_index > delimiter_positions[i]
                            i += 1
                        end
                i += 1 while delimiter_index > delimiter_positions[i]
                i += 1
              end
			# Lines in a block
            elsif in_data_row
              # Regular data row or partial separator
                if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
                            cells_content = line.strip.delete_prefix("|").delete_prefix("+")
                                            .delete_suffix("|").delete_suffix("+").split(/[\|\+]/)
                cells_content = line.strip
                                   .delete_prefix("|")
                                   .delete_prefix("+")
                                   .split(/[\|\+]/)

                rows << Row.new(number_of_columns)
                aux_delimiter_index = 0
@@ -285,16 +290,16 @@ module Banzai
                  rows[-1][auxiliar_cell_index].position = aux_delimiter_index
                  rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions)

                                while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]
                                    auxiliar_cell_index += 1
                                end
                  auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]

                  auxiliar_cell_index += 1
                end

                            if cells_content.length <= number_of_columns
                raise "More cells than columns found" unless cells_content.length <= number_of_columns

                column_index = 0
                maxRowTracker = rows_tracker.maxValue
                                cells_content.each_with_index do |content, i|
                cells_content.each_with_index do |content, _i|
                  if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
                    rows_tracker[column_index] = maxRowTracker + 1
                    rows[rows_tracker[column_index]][column_index].list_flag = false
@@ -306,6 +311,7 @@ module Banzai
                        #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1
                      end
                    end

                    column_index += column_forward
                  else # Regular cell in Partial separator line
                    rows[rows_tracker[column_index]][column_index] =
@@ -315,33 +321,39 @@ module Banzai
                    unless rows[rows_tracker[column_index]][column_index].colspan_adjusted
                      rows[rows_tracker[column_index]][column_index].colspan_adjusted = true
                      rows[rows_tracker[column_index]][column_index] =
                                                adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, 
                                                            line, number_of_columns, delimiter_positions)
                        adjust_colspan(rows[rows_tracker[column_index]],
                          column_index,
                          number_of_columns,
                          line,
                          number_of_columns,
                          delimiter_positions)
                    end

                    if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]
                      colspan = rows[rows_tracker[column_index]][column_index].colspan
                                            column_index += colspan.zero? ? 1 : colspan
                      column_index += (colspan == 0 ? 1 : colspan) # rubocop:disable Metrics/BlockNesting -- PoC
                    end
                  end
                end
                            else
                                raise "More cells than columns found"
                            end

              else # Data row
                            cells_content = line.strip.delete_prefix("|").delete_suffix("|").split(/\|/)
                cells_content = line.strip.delete_prefix("|").split(/\|/)
                column_index = 0

                if cells_content.length < number_of_columns
                                cells_content.each_with_index do |content, i|
                  cells_content.each_with_index do |content, _i|
                    rows[rows_tracker[column_index]][column_index] =
                      handling_content(rows[rows_tracker[column_index]][column_index], content)

                    unless rows[rows_tracker[column_index]][column_index].colspan_adjusted
                      rows[rows_tracker[column_index]][column_index].colspan_adjusted = true
                      rows[rows_tracker[column_index]][column_index] =
                                        adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns,
                                                    line, number_of_columns, delimiter_positions)
                        adjust_colspan(rows[rows_tracker[column_index]],
                          column_index,
                          number_of_columns,
                          line,
                          number_of_columns,
                          delimiter_positions)
                    end

                    if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]
@@ -376,35 +388,26 @@ module Banzai
            row.each do |cell|
              next if cell.content.nil?

                    ["**", "__"].each do |bold_chars|
              delimters = ['**', '__']
              delimters.each do |bold_chars|
                while cell.content.include?(bold_chars)
                  cell.content = cell.content.sub(bold_chars, "<strong>")
                                          .sub(bold_chars, "</strong>")
                end
              end

                    while cell.content.include?("_") && !cell.content.include?("\\_")
              while cell.content.include?("_") && cell.content.exclude?("\\_")
                cell.content = cell.content.rstrip.sub("_", "<i>").sub("_", "</i>")
              end

                    while cell.content.include?("\\_")
                        cell.content = cell.content.rstrip.sub("\\_", "_")
                    end
                end
                end
            end
              cell.content = cell.content.rstrip.sub("\\_", "_") while cell.content.include?("\\_")

              # Convert newlines to HTML breaks
            [header_rows, data_rows].each do |rows|
                rows.each do |row|
                    row.each do |cell|
              cell.content = cell.content&.gsub("\n", "<br />")
            end
          end
            end

          # Validate grid correctness
            [header_rows, data_rows].each do |rows|
          forward_rowspan = []

          rows.each_with_index do |row, row_index|
@@ -413,16 +416,14 @@ module Banzai

            row.each_with_index do |cell, cell_index|
              sum += cell.colspan
                        if row_index > 0 && cell.colspan.zero?
                            if forward_rowspan[cell_index].positive?
                                sum += 1
                            end

              if row_index > 0 && cell.colspan == 0
                sum += 1 if forward_rowspan[cell_index] > 0

                forward_rowspan[cell_index] -= 1
              end

                        if forward_rowspan[cell_index].zero? && cell.rowspan > 1
                            forward_rowspan[cell_index] = cell.rowspan - 1
                        end
              forward_rowspan[cell_index] = cell.rowspan - 1 if forward_rowspan[cell_index] == 0 && cell.rowspan > 1
            end

            raise "Grid table not converted properly" unless sum == number_of_columns
@@ -436,9 +437,10 @@ module Banzai
        begin
          grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
        rescue StandardError => e
                logger = Logger.new(STDOUT)
          logger = Logger.new($stdout)
          logger.error("Grid table could not be generated: #{e.message}")
                return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS FILE"

          "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
        else
          html = "<table>\n"
          has_header = false
@@ -460,20 +462,21 @@ module Banzai
                next if cell.rowspan == 0 || cell.colspan == 0

                # Prepare content, in case there's a list
                            if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/)
                matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
                if matches
                  list = "<ul>"
                  matches.each do |match|
                    list += "<li>#{match[1]}</li>"
                  end
                  list += "</ul>"
                                cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list)
                  cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
                  # Enforce left alignment if cell contains a list
                  cell.alignment = 'align="left"'
                end

                rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
                colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
                            html += %{            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n}
                html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
              end
              html += "        </tr>\n"
            end
@@ -486,20 +489,21 @@ module Banzai
            row.each do |cell|
              next if cell.rowspan == 0 || cell.colspan == 0

                        if matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br \/>|$)/)
              matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
              if matches
                list = "<ul>"
                matches.each do |match|
                  list += "<li>#{match[1]}</li>"
                end
                list += "</ul>"
                            cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+[^<]+?<br \/>)+/, list)
                cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
                # Enforce left alignment if cell contains a list
                cell.alignment = 'align="left"'
              end

              rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
              colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
                        html += %{            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n}
              html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
            end
            html += "        </tr>\n"
          end
@@ -509,6 +513,9 @@ module Banzai
          html
        end
      end # end of def generate_html_table_with_spans
      # rubocop:enable Metrics/PerceivedComplexity
      # rubocop:enable Metrics/CyclomaticComplexity
      # rubocop:enable Metrics/AbcSize

      def call
        return @text unless MarkdownFilter.glfm_markdown?(context)