Commit ce3607db authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Fixes from Gitlab colleagues to make rubocop checks work

parent 403a4b1b
Loading
Loading
Loading
Loading
+136 −117
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
module Banzai
  module Filter
    class GridTableFilter < HTML::Pipeline::TextFilter
      # rubocop:disable Lint/MixedRegexpCaptureTypes -- PoC
      MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{
        (?<code>
          # Grid table blocks:
@@ -31,6 +32,7 @@ module Banzai

        )
      }mx
      # rubocop:enable Lint/MixedRegexpCaptureTypes

      require 'logger'

@@ -43,7 +45,8 @@ module Banzai
      NEXT_ELEMENT_LIST_MARK = "∆"

      class Cell
          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag
        attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position,
          :list_flag

        def initialize
          @content = nil
@@ -56,8 +59,7 @@ module Banzai
          @list_flag = false
        end

          def calculateAndSetAlignment(header_delimiter_positions, default_alignments )
            
        def calculate_and_set_alignment(header_delimiter_positions, default_alignments)
          raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil?

          header_delimiter_index = 0
@@ -69,9 +71,8 @@ module Banzai
          raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length

          @alignment = default_alignments[header_delimiter_index]
            
        end
      end # end of class Cell
      end

      class Row
        attr_accessor :cells
@@ -87,7 +88,7 @@ module Banzai
        def []=(index, value)
          @cells[index] = value
        end
      end # end of class Row
      end

      class RowTracker
        attr_accessor :row_tracker
@@ -104,55 +105,61 @@ module Banzai
          @row_tracker[index] = value
        end

        def maxValue
        def max_value
          @row_tracker.max
        end
      end # end of class RowTracker
      end

      # Helper method to detect separator lines
      def separator?(line)
        GRID_TABLE_SEPARATOR.match?(line)
      end

      # Helper method to handle content in cells
      # rubocop:disable Metrics/PerceivedComplexity -- PoC
      def handling_content(cell, content)
        _c = content.strip
        modified_content = content.strip
        if cell.content.nil?
          cell.rowspan += 1
          cell.colspan += 1
          if _c.start_with?("- ") # List
          if modified_content.start_with?("- ") # List
            cell.list_flag = true
            _c = _c.gsub(/\\\s*$/, '\n')
				    cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}"  # Add list element end mark to know when the list element ends
            modified_content = modified_content.gsub(/\\\s*$/, '\n')

            # Add list element end mark to know when the list element ends
            cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
          elsif cell.list_flag && !content.strip.empty?
            _c = _c.gsub(/\\\s*$/, '\n')
            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" #add the list element end mark
          elsif _c.empty?
            modified_content = modified_content.gsub(/\\\s*$/, '\n')
            cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark
          elsif modified_content.empty?
            cell.content = "\n"
          else
            cell.content = _c.gsub(/\\\s*$/, "\n")
            cell.content = modified_content.gsub(/\\\s*$/, "\n")
          end
        elsif _c.start_with?("- ")
        elsif modified_content.start_with?("- ")
          cell.content += "\n" unless cell.list_flag
          cell.list_flag = true
          _c = _c.gsub(/\\\s*$/, '\n')
          cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}"
        elsif cell.list_flag && !_c.empty?
          cell.content = cell.content.strip.chomp("#{NEXT_ELEMENT_LIST_MARK}")
          _c = _c.gsub(/\\\s*$/, '\n')
          cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}"
        elsif _c.empty?
          modified_content = modified_content.gsub(/\\\s*$/, '\n')
          cell.content += "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
        elsif cell.list_flag && !modified_content.empty?
          cell.content = cell.content.strip.chomp(NEXT_ELEMENT_LIST_MARK.to_s)
          modified_content = modified_content.gsub(/\\\s*$/, '\n')
          cell.content += " #{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
        elsif modified_content.empty?
          if cell.list_flag
            cell.list_flag = false
            cell.content += "\n\n"
          end

          cell.content += cell.content.end_with?("\n") ? "" : "\n"
        else
          _c = _c.gsub(/\\\s*$/, "\n")
          cell.content += " #{_c}"
          modified_content = modified_content.gsub(/\\\s*$/, "\n")
          cell.content += " #{modified_content}"
        end

        cell
      end
      # rubocop:enable Metrics/PerceivedComplexity

      # Helper method to adjust colspan
      def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions)
@@ -167,7 +174,7 @@ module Banzai

          delimiters = ['|', '+']
          positions = delimiters.filter_map do |delimiter|
            pos = line[delimiter_start + 1..-1]&.index(delimiter)
            pos = line[delimiter_start + 1..]&.index(delimiter)
            pos ? pos + delimiter_start + 1 : nil
          end.compact

@@ -190,37 +197,39 @@ module Banzai
        row[column_index]
      end

      def checkDelimiterAlignment(line, delimiterPositions)
        return false if line.empty? || delimiterPositions.empty?
      def check_delimiter_alignment(line, delimiter_positions)
        return false if line.empty? || delimiter_positions.empty?

        # puts "\nChecking line: #{line}"
        #puts "Expected delimiter positions: #{delimiterPositions}"
        # puts "Expected delimiter positions: #{delimiter_positions}"

        # For any row (only +, only |, mix of + and |)
        currentPositions = []
        current_positions = []
        start_pos = 1

        while start_pos < line.length
          pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos
          break if pos.nil? # Exit if no more delimiters are found

          currentPositions << pos
          current_positions << pos
          start_pos = pos + 1 # Move to the next character after the found delimiter
        end

        #puts "Current positions: #{currentPositions}"
        # puts "Current positions: #{current_positions}"

        # Check if the last expected delimiter position is found in currentPositions
        return currentPositions.include?(delimiterPositions[-1]) &&
        # Check if the last expected delimiter position is found in current_positions
        current_positions.include?(delimiter_positions[-1]) &&
          line.match?(/\A[|+]/) && # Check if the line starts with | or +
               currentPositions.all? { |pos| delimiterPositions.include?(pos) }  # Ensure all current positions are in delimiterPositions
          # Ensure all current positions are in delimiter_positions
          current_positions.all? do |pos|
            delimiter_positions.include?(pos)
          end
      end

      # rubocop:disable Metrics/AbcSize -- PoC
      # rubocop:disable Metrics/CyclomaticComplexity -- PoC
      # rubocop:disable Metrics/PerceivedComplexity -- PoC
      def parse_pandoc_table_with_spans(pandoc_table)

        # Split the input into lines
        lines = pandoc_table.rstrip.split("\n").map(&:rstrip)

@@ -237,7 +246,7 @@ module Banzai
        separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
        number_of_columns.times do |j|
          start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
          pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+")
          pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+")
          delimiter_positions << (pos ? pos + start_pos + 1 : -1)
        end

@@ -266,7 +275,7 @@ module Banzai
                                  end

            start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
            pos = lines[index][start_pos + 1..-1]&.index("+")
            pos = lines[index][start_pos + 1..]&.index("+")
            header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
          end
          break
@@ -287,14 +296,14 @@ module Banzai
                                  end

            start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
            pos = lines[0][start_pos + 1..-1]&.index("+")
            pos = lines[0][start_pos + 1..]&.index("+")
            header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
          end
        end

        # Check end table delimiter alignment (not checked during the lines processing)
	      raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1], delimiter_positions)

        raise "Misaligned delimiters in table separators: #{lines[-1]}" unless check_delimiter_alignment(lines[-1],
          delimiter_positions)

        # Process table body (including rows belonging to header as they are processed in the same way)
        data_rows = []
@@ -315,7 +324,8 @@ module Banzai
            if separator?(line) && !in_data_row
              in_data_row = true
              # Check end table delimiter alignment (not checked during the lines processing)
	          raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
              raise "Misaligned delimiters in separator row: #{line}" unless check_delimiter_alignment(line,
                delimiter_positions)

              parts = line.strip.delete_prefix("+").split("+")
              delimiter_index = 0
@@ -329,7 +339,7 @@ module Banzai
                delimiter_index += parts[j].length + 1
                rows[-1][i].position_start = delimiter_index - parts[j].length
                rows[-1][i].position = delimiter_index
                rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
                rows[-1][i].calculate_and_set_alignment(header_delimiter_positions, default_alignments)

                i += 1 while delimiter_index > delimiter_positions[i]
                i += 1
@@ -339,7 +349,8 @@ module Banzai
              # Regular data row or partial separator
              if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
                # Check end table delimiter alignment (not checked during the lines processing)
                raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
                raise "Misaligned delimiters in partial separator: #{line}" unless check_delimiter_alignment(line,
                  delimiter_positions)

                cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)

@@ -353,7 +364,8 @@ module Banzai
                  aux_delimiter_index += cells_content[i].length + 1
                  rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length
                  rows[-1][auxiliar_cell_index].position = aux_delimiter_index
                  rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
                  rows[-1][auxiliar_cell_index].calculate_and_set_alignment(header_delimiter_positions,
                    default_alignments)

                  auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]

@@ -363,11 +375,11 @@ module Banzai
                raise "More cells than columns found" unless cells_content.length <= number_of_columns

                column_index = 0
                maxRowTracker = rows_tracker.maxValue
                max_row_tracker = rows_tracker.max_value

                cells_content.each_with_index do |content, _i|
                  if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
                    rows_tracker[column_index] = maxRowTracker + 1
                    rows_tracker[column_index] = max_row_tracker + 1
                    rows[rows_tracker[column_index]][column_index].list_flag = false

                    column_forward = 0
@@ -401,9 +413,10 @@ module Banzai
                  end
                end
              else # Data row
                cells_content = line.strip.delete_prefix("|").split(/\|/)
                cells_content = line.strip.delete_prefix("|").split("|")
                # Check end table delimiter alignment (not checked during the lines processing)
	              raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
                raise "Misaligned delimiters in row: #{line}" unless check_delimiter_alignment(
                  line, delimiter_positions)

                column_index = 0

@@ -440,9 +453,7 @@ module Banzai
            end
          end

          if has_header && start >= header_separator_index
            rows.each { |body_row| data_rows << body_row.cells }
          elsif has_header && start < header_separator_index
          if has_header && start < header_separator_index
            rows.each { |header_row| header_rows << header_row.cells }
          else
            rows.each { |body_row| data_rows << body_row.cells }
@@ -459,9 +470,11 @@ module Banzai

              cell.content = cell.content.gsub(/</, "&lt;")

              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/, "\\k<espace><strong>\\k<text></strong>")
              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
                "\\k<espace><strong>\\k<text></strong>")

              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/, "\\k<espace><i>\\k<text></i>")
              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
                "\\k<espace><i>\\k<text></i>")
              # Convert newlines to HTML breaks
              cell.content = cell.content&.gsub("\n", "<br />")
            end
@@ -491,7 +504,7 @@ module Banzai
        end

        [header_rows, data_rows]
      end # end of parse_pandoc_table_with_spans
      end

      def generate_html_table_with_spans(pandoc_table)
        begin
@@ -522,14 +535,17 @@ module Banzai
                next if cell.rowspan == 0 || cell.colspan == 0

                # Prepare content, in case there's a list
                matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
                matches = cell.content&.scan(
                  /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)

                if matches
                  list = "<ul>"
                  matches.each do |match|
                    list += "<li>#{match[1]}</li>"
                  end
                  list += "</ul>"
                  cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
                  cell.content = cell.content.gsub(
                    /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
                  # Enforce left alignment if cell contains a list
                  cell.alignment = 'align="left"'
                end
@@ -549,13 +565,16 @@ module Banzai
            row.each do |cell|
              next if cell.rowspan == 0 || cell.colspan == 0

              matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
              matches = cell.content&.scan(
                /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)

              if matches
                list = "<ul>"
                matches.each do |match|
                  list += "<li>#{match[1]}</li>"
                end
                cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
                cell.content = cell.content.gsub(
                  /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
                # Enforce left alignment if cell contains a list
                cell.alignment = 'align="left"'
              end
@@ -571,7 +590,7 @@ module Banzai
          html += "</table>"
          html
        end
      end # end of def generate_html_table_with_spans
      end
      # rubocop:enable Metrics/PerceivedComplexity
      # rubocop:enable Metrics/CyclomaticComplexity
      # rubocop:enable Metrics/AbcSize
@@ -594,7 +613,7 @@ module Banzai
            match.to_s
          end
        end
      end # end of def call
    end # end of class GridTableFilter
  end # end of module Filter
end # end of module Banzai
      end
    end
  end
end