Hopefully fixing List Numbering (f97745c1) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/postprocessing.py

+1901 −22

File changed.

Preview size limit exceeded, changes collapsed.

generateSpecWebSite/gridTableTools.py

+51 −19

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@ class GridCell:
		self.position:Optional[int] = None
		self.listFlag:bool = False
		self.auxiliarIndex:int = 0
		self.leadingSpaces:int = 0


		def calculateAndSetAlignment(self,
		@@ -159,14 +160,20 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR

		# Set content on the cell - concatenating multilines, flagging lists
		def handleCellContent(cell:GridCell, content:str) -> None:
		_c = content.strip()
		_c = content.rstrip()
		# Remove backslash from escaped pipes
		_c = _c.replace('\\|', '\|')
		cell.leadingSpaces = len(_c) - len(_c.lstrip())
		_c = _c[cell.leadingSpaces:]

		if cell.content is None: # Previous empty cell
		cell.rowspan += 1
		cell.colspan += 1
		if _c.startswith('- '): # List in a cell

		cell.leadingSpaces = len(_c) - len(_c.lstrip())
		_c = _c[cell.leadingSpaces:]

		if re.match(r'^(?:>?\s(?:[-+]\s\|\d+\.\s\|\d+\)\s).*)', _c.lstrip()): # List in a cell
		cell.listFlag = True
		_c = re.sub(r'\\\s*$', '\n', _c)
		cell.content = _c + _nextListElementMark # Add list element end mark to know when the list element ends
		@@ -179,7 +186,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		else:
		cell.content = re.sub(r'\\\s*$', '\n', _c)
		else: # Cell has content
		if _c.startswith('- '): # List
		if re.match(r'^(?:>?\s(?:[-+]\s\|\d+\.\s\|\d+\)\s).*)', _c.lstrip()): # List in a cell
		if not cell.listFlag:
		cell.content += '\n'
		#cell['content'] = cell['content'].strip("\n")
		@@ -187,10 +194,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		_c = re.sub(r'\\\s*$', '\n', _c)
		cell.content += _c + _nextListElementMark # Add list element end mark to know when the list element ends
		elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
		# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
		cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark

		_c = re.sub(r'\\\s*$', '\n', _c)
		# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
		cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark
		if cell.content.endswith('\n'): # a force linebreak is present
		cell.content += _c + _nextListElementMark
		else: # Multiline content is concatenated to the last line with a space
		cell.content += ' ' + _c + _nextListElementMark #add list element end mark
		elif len(_c) == 0: # separation between list and other paragraph
		if cell.listFlag:
		@@ -199,6 +209,9 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
		#content = re.sub(r'\\\s*$', "\n", content.strip())
		cell.content += '\n' if not cell.content.endswith('\n') else ''
		else:
		if cell.content.endswith('\n'): # a force linebreak is present
		cell.content += re.sub(r'\\\s*$', '\n', _c)
		else: # Multiline content is concatenated to the last line with a space
		cell.content += ' ' + re.sub(r'\\\s*$', '\n', _c)

		# Adjust colspan of a cell
		@@ -668,8 +681,11 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
		Returns:
		The HTML table in string format.
		"""
		regex1 = r'\s([-+]\|\s*\d+\.)\s+((?:(?!' + re.escape(_nextListElementMark) + r').)+)' + re.escape(_nextListElementMark)
		regex1 = r'\s([-+]\|\s*\d+\.)\s+([^' + re.escape(_nextListElementMark) + r']+)' + re.escape(_nextListElementMark)
		regex2 = r'(\s([-+]\|\s*\d+\.)\s+(?:(?!∆).)+' + re.escape(_nextListElementMark) + r')+'
		regexLists = r'(?:(?:^\|\s<br\s\/?>)\s(?:(?:[-+]\|\d+\.)\s+[^' + re.escape(_nextListElementMark) + r']+' + re.escape(_nextListElementMark) + r')+(?:(\s<br\s\/?>\s<br\s\/?>\|$)))'
		#regexLists = r'(?:(?:(?:[-+]\|\d+\.)\s+[^' + re.escape(_nextListElementMark) + r']+' + re.escape(_nextListElementMark) + r')+)\s<br\s/?>\s<br\s*/?>'
		#regexLists = r'(?:^\s(?:(?:[-+]\|\d+\.)\s+[^' + re.escape(_nextListElementMark) + r']+' + re.escape(_nextListElementMark) + r')+)'

		try:
		gridHeader, gridBody = parseGridTableWithSpans(gridTable)
		@@ -721,19 +737,35 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
		continue
		else:
		#Prepare content, in case there's a list
		if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
		list = '<ul>'
		if cell.content is not None:
		def replace_list(match_obj):
		"""Callback function to replace each list match."""
		list_text = match_obj.group(0) # Get full match
		if matches := re.findall(regex1, list_text):
		sub_list = '<ul>'
		# Build list the matches
		for match in matches:
		list += f'<li>{match[1]}</li>'
		list += '</ul>'
		cell.content = re.sub(regex2, list, cell.content)
		sub_list += f'<li>{match[1]}</li>'
		sub_list += '</ul><br />'
		return sub_list
		return match_obj.group(0) # Return original if no matches

		# Replace all list matches using callback
		new_content = re.sub(regexLists, replace_list, cell.content)
		if new_content != cell.content:
		cell.content = new_content
		# Enforce left alignment if cell contains a list
		cell.alignment = _alignLeft

		#Split content into multilines but keeping the separator
		cell.content = '\n\n'.join(re.split(r'(<br\s/?>\s<br\s*/?>)', cell.content))
		#contentWithBreaklines = cell.content
		#contentWithBreaklines = contentWithBreaklines.replace(_nextListElementMark, "<br />")
		#multilineContent = '\n'.join(contentWithBreaklines.split("<br />"))
		rowspan = f' rowspan="{cell.rowspan}"' if cell.rowspan > 1 else ''
		colspan = f' colspan="{cell.colspan}"' if cell.colspan > 1 else ''
		html += f'<td{rowspan}{colspan} {cell.alignment} markdown="1">{cell.content}</td>\n'
		#html = '\n'.join([html, f'<td{rowspan}{colspan} {cell.alignment} markdown="1">','','', multilineContent,'','', '</td>',''])
		html += f'<td{rowspan}{colspan} {cell.alignment} markdown="1">\n\n{cell.content}\n\n</td>\n'
		html += '</tr>\n'

		html += '</tbody>\n'

generateSpecWebSite/spec_on_pages.sh

+11 −1

Original line number	Diff line number	Diff line
		@@ -68,7 +68,7 @@ if [ -f "frontmatter.md" ] && [ "$(cat frontmatter.md \| tr -d ' \n\t')" != "{}"
		cat spec.md
		frontmatterClause=True
		fi
		python3 /markdownTools/processMDSpec.py "./${SPEC_NAME}.md" > combined.md
		python3 /markdownTools/processMDSpec.py --no-expand-paths "./${SPEC_NAME}.md" > combined.md
		echo "------ Generate input for mkdocs --------"
		#python3 $1 -it -ihp --title ${9^^} "./$8.md"
		if [ $frontmatterClause == True ]; then
		@@ -85,6 +85,16 @@ cat _nav.yml
		echo " - 'Home': 'index.md'" >> mkdocs.yml
		cat _nav.yml >> mkdocs.yml
		cp docs/0.md docs/index.md
		repo_url="https://${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git"
		echo ${repo_url}
		sed -i 's/##PROJECT/'${CI_PROJECT_NAME^^}'/g' mkdocs.yml
		repo_url=$(echo ${repo_url} \| sed 's/\//\\\//g')
		sed -i 's/##REPO_URL/'"${repo_url}"'/g' mkdocs.yml
		sed -i 's/##REPO_NAME/'${CI_PROJECT_NAME^^}'/g' mkdocs.yml

		sed --version

		cat mkdocs.yml
		## Create download tab if official baseline version
		if [[ ${TAG_NAME} == v* ]]; then
		echo "\nAdding download tab for version ${TAG_NAME}..."

gitlabFilter/grid_table_filter.rb

+57 −32

Original line number	Diff line number	Diff line
		@@ -47,7 +47,7 @@ module Banzai

		class Cell
		attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position,
		:list_flag
		:list_flag, :leading_spaces

		def initialize
		@content = nil
		@@ -58,6 +58,7 @@ module Banzai
		@position_start = nil
		@position = nil
		@list_flag = false
		@leading_spaces = 0
		end

		def calculate_and_set_alignment(header_delimiter_positions, default_alignments)
		@@ -119,12 +120,18 @@ module Banzai
		# Helper method to handle content in cells
		# rubocop:disable Metrics/PerceivedComplexity -- PoC
		def handling_content(cell, content)
		modified_content = content.strip
		modified_content = content.rstrip
		modified_content = modified_content.gsub(/\\\\|/, '\|')
		if cell.leading_spaces <= modified_content[/^\s*/].size
		modified_content = modified_content[cell.leading_spaces..]
		end
		if cell.content.nil?
		cell.rowspan += 1
		cell.colspan += 1
		if modified_content.start_with?("- ") # List

		cell.leading_spaces = modified_content[/^\s*/].size
		modified_content = modified_content[cell.leading_spaces..]
		if modified_content.lstrip.match?(/^(?:>?\s(?:[-+]\|\d+\.\|\d+\)\s).*)/) # List
		cell.list_flag = true
		modified_content = modified_content.gsub(/\\\s*$/, '\n')

		@@ -134,30 +141,39 @@ module Banzai
		modified_content = modified_content.gsub(/\\\s*$/, '\n')
		cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark
		elsif modified_content.empty?
		cell.content = "\n"
		cell.content = '\n'
		else
		cell.content = modified_content.gsub(/\\\s*$/, "\n")
		cell.content = modified_content.gsub(/\\\s*$/, '\n')
		end
		elsif modified_content.start_with?("- ")
		cell.content += "\n" unless cell.list_flag
		elsif modified_content.lstrip.match?(/^(?:>?\s(?:[-+]\|\d+\.\|\d+\)\s).*)/) # List
		cell.content += '\n' unless cell.list_flag
		cell.list_flag = true
		modified_content = modified_content.gsub(/\\\s*$/, '\n')
		cell.content += "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
		elsif cell.list_flag && !modified_content.empty?
		cell.content = cell.content.strip.chomp(NEXT_ELEMENT_LIST_MARK.to_s)
		modified_content = modified_content.gsub(/\\\s*$/, '\n')
		# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
		cell.content = cell.content.chomp(NEXT_ELEMENT_LIST_MARK.to_s)
		if cell.content.end_with?('\n') # a force linebreak is present
		cell.content += "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
		else # Multiline content is concatenated to the last line with a space
		cell.content += " #{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
		end
		elsif modified_content.empty?
		if cell.list_flag
		cell.list_flag = false
		cell.content += "\n\n"
		cell.content += '\n\n'
		end

		cell.content += cell.content.end_with?("\n") ? "" : "\n"
		cell.content += cell.content.end_with?('\n') ? "" : '\n'
		else
		modified_content = modified_content.gsub(/\\\s*$/, "\n")
		modified_content = modified_content.gsub(/\\\s*$/, '\n')
		if cell.content.end_with?('\n') # a force linebreak is present
		cell.content += modified_content
		else # Multiline content is concatenated to the last line with a space
		cell.content += " #{modified_content}"
		end
		end

		cell
		end
		@@ -511,7 +527,7 @@ module Banzai
		#cell.content = cell.content.gsub(/(?<espace>^\|\s)(?<italic>\*\|_)(?<text>.+?)\g<italic>(?!\w)/,
		# "\\k<espace><i>\\k<text></i>")
		# Convert newlines to HTML breaks
		cell.content = cell.content&.gsub("\n", "<br />")
		cell.content = cell.content&.gsub('\n', "<br />")
		end
		end

		@@ -594,7 +610,7 @@ module Banzai

		rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
		colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
		html += %(<th#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}"><br /><br />#{cell.content}<br /><br /></th>)
		html += %(<th#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</th>)
		end
		html += '</tr>'
		end
		@@ -607,23 +623,32 @@ module Banzai
		row.each do \|cell\|
		next if cell.rowspan == 0 \|\| cell.colspan == 0

		matches = cell.content&.scan(
		/\s([-+]\|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)

		if matches
		list = "<ul>"
		matches.each do \|match\|
		list += "<li>#{match[1]}</li>"
		end
		cell.content = cell.content.gsub(
		/(\s([-+]\|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
		# Enforce left alignment if cell contains a list
		cell.alignment = 'left'
		end
		lists = cell.content&.scan(
		/(?:(?:^\|\s<br\s\/?>)\s(?:(?:[-+]\|\d+\.)\s+[^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK})+(?:\s<br\s\/?>\s<br\s\/?>\|$))/o)

		#if lists
		# lists.each do \|list\|
		# matches = list&.scan(
		# /\s([-+]\|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}/o)
		# if matches
		# sub_list = "<ul>"
		# matches.each do \|match\|
		# sub_list += "<li>#{match[1]}</li>"
		# end
		# sub_list += "</ul><br />"
		# cell.content = cell.content.gsub(list.to_s, sub_list.to_s)
		# # Enforce left alignment when cell contains a list
		# cell.alignment = 'left'
		# end
		# end
		#end

		rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
		colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
		html += %(<td#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</td>)
		content_with_breaklines = cell.content
		content_with_breaklines = cell.content&.gsub("<br />", "\n")
		content_with_breaklines = content_with_breaklines&.gsub(NEXT_ELEMENT_LIST_MARK.to_s, "\n")
		html += %(<td#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{content_with_breaklines}\n\n</td>)
		end
		html += '</tr>'
		end

markdownTools/processMDSpec.py

+11 −7

Original line number	Diff line number	Diff line
		@@ -213,7 +213,7 @@ def processFile(args:argparse.Namespace) -> str:
		The processed markdown content as a string.
		"""

		def handleIncludesForFile(filename:str, currentPath:str) -> str:
		def handleIncludesForFile(filename:str, currentPath:str, args:argparse.Namespace) -> str:
		""" Read a single markdown file and return its content.

		Args:
		@@ -242,10 +242,13 @@ def processFile(args:argparse.Namespace) -> str:
		print(f'[red]File not found: {filename}')
		raise

		if args.expandPaths:

		# Expand the paths in the markdown file

		# extract front matter information
		lines = expandPaths(lines, currentPath, dirname)

		fm, lines = processFrontMatter(lines, args)
		if fm:
		_frontMatter[filename] = fm
		@@ -268,17 +271,18 @@ def processFile(args:argparse.Namespace) -> str:
		if match:
		includeFilename = match.group(1)
		# Read the included file and replace the include statement with its content
		lines[lines.index(line)] = handleIncludesForFile(includeFilename, os.path.dirname(filename))
		lines[lines.index(line)] = handleIncludesForFile(includeFilename, os.path.dirname(filename), args)

		return ''.join(lines)

		return handleIncludesForFile(args.document, os.path.dirname(args.document))
		return handleIncludesForFile(args.document, os.path.dirname(args.document), args)


		def main(args=None):

		parser = argparse.ArgumentParser(description='Process markdown specification files.')
		parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements")
		parser.add_argument('--no-expand-paths', dest='expandPaths', action='store_false', default=True, help="don't expand paths in the markdown file")
		parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown')
		parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only')
		parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter')