Commit be71662c authored by ankraft's avatar ankraft
Browse files

Fixed some regex

parent 582116cd
Loading
Loading
Loading
Loading
+21 −16
Original line number Diff line number Diff line
@@ -13,6 +13,8 @@ from regexMatches import *
_alignLeft = 'align="left"'
_alignRight = 'align="right"'
_alignCenter = 'align="center"'
_nextListElementMark = ''	# Marks a continuing list in the line before. !!! Must be a single character


printInfo = print
printDebug = print
@@ -157,7 +159,6 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
	defaultAlignments:list[str] = []
	headerDelimiterPositions:list[int] = []
	delimiterPositions:list[int] = []
	nextListElementMark = '@'
	
	# Split the input into lines
	lines:list[str] = [line for line in gridTable.rstrip().split('\n')]
@@ -178,10 +179,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
			if _c.startswith('- '):  # List in a cell
				cell.listFlag = True
				_c = re.sub(r'\\\s*$', '\n', _c)
				cell.content = _c + nextListElementMark  # Add list element end mark to know when the list element ends		
				cell.content = _c + _nextListElementMark  # Add list element end mark to know when the list element ends		
			elif cell.listFlag and len(_c) > 0:  # any other content when handling list is concatenated to the last list element
				_c = re.sub(r'\\\s*$', '\n', _c)
				cell.content = _c + nextListElementMark #add the list element end mark
				cell.content = _c + _nextListElementMark #add the list element end mark
			elif not _c:  # empty line. separation between list and other paragraph
				# cell.content = '\n' if not cell.content.endswith('\n') else ""
				cell.content = '\n' # cell content is always empty / None here.
@@ -194,11 +195,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
					#cell['content'] = cell['content'].strip("\n")
				cell.listFlag = True
				_c = re.sub(r'\\\s*$', '\n', _c)
				cell.content += _c + nextListElementMark  # Add list element end mark to know when the list element ends
				cell.content += _c + _nextListElementMark  # Add list element end mark to know when the list element ends
			elif cell.listFlag and len(_c) > 0:  # any other content when handling list is concatenated to the last list element
				cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
				# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
				cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark

				_c = re.sub(r'\\\s*$', '\n', _c)
				cell.content += " " + _c + nextListElementMark #add list element end mark
				cell.content += ' ' + _c + _nextListElementMark #add list element end mark
			elif len(_c) == 0:  # separation between list and other paragraph
				if cell.listFlag:
					cell.listFlag = False
@@ -222,8 +225,6 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
				row[columnIndex].colspan += 1
				if position == delimiterPositions[len(delimiterPositions) - 1]:  # last cell in row, adjust colspan to get max number columns
					colspan_allocated = row[columnIndex].colspan
					#for cell_index in range(number_of_parts):
					#	colspan_allocated += row[cell_index].colspan
					row[columnIndex].colspan += numberOfColumns - colspan_allocated - columnIndex
			elif position < delimiterPositions[j]:
				raise ValueError("Wrong cell formatting")
@@ -435,7 +436,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
									continue

						else:
							raise ValueError('More cells than columns found')
							raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
						
					else: # Data row
						cellsContent = re.split(r'\|', line.strip('|'))
@@ -462,7 +463,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
								rowIndex = rowsTracker[columnIndex]
								handleCellContent(rows[rowIndex][columnIndex], content)
						else:
							raise ValueError('More cells than columns found')
							raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
				else:
					raise ValueError('No separator line found for row starting')

@@ -489,10 +490,11 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
					# Replacing "<" by &lt;
					cell.content = cell.content.replace('<', '&lt;')

					
					# Bold replacements
					# Regex to detect markdown bold formatting in cell content
					if cell.content is not None:
						cell.content = matchBold.sub(r'<strong>\g<text></strong>', cell.content)
						cell.content = matchBold.sub(r'\1<strong>\g<text></strong>', cell.content)

					# Italic replacements
					# Regex to detect markdown italic formatting in cell content
@@ -560,6 +562,9 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
		Returns:
			The HTML table in string format.
	"""
	regex1 = r'\s*([-*+]|\s*\d+\.)\s+((?:(?!' + re.escape(_nextListElementMark) + r').)+)' + re.escape(_nextListElementMark)
	regex2 = r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!∆).)+' + re.escape(_nextListElementMark) + r')+'

	try:
		gridHeader, gridBody = parseGridTableWithSpans(gridTable)
	except Exception as e:
@@ -585,13 +590,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
					continue
				else:
					# Prepare content, in case there's a list
					if cell.content is not None and (matches := re.findall(r'\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@', cell.content)):  # Update cell in new row
					if cell.content is not None and (matches := re.findall(regex1, cell.content)):  # Update cell in new row
						list = '<ul>'
						# Build list the matches
						for match in matches:
							list += '<li>' + match[1] + '</li>'
						list += '</ul>'
						cell.content = re.sub(r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+', list, cell.content)
						cell.content = re.sub(regex2, list, cell.content)
						# Enforce left alignment if cell contains a list
						cell.alignment = _alignLeft

@@ -610,13 +615,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
				continue
			else:
				#Prepare content, in case there's a list
				if cell.content is not None and (matches := re.findall(r'\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@', cell.content)):  # Update cell in new row
				if cell.content is not None and (matches := re.findall(regex1, cell.content)):  # Update cell in new row
					list = '<ul>'
					# Build list the matches
					for match in matches:
						list += f'<li>{match[1]}</li>'
					list += '</ul>'
					cell.content = re.sub(r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+', list, cell.content)
					cell.content = re.sub(regex2, list, cell.content)
					# Enforce left alignment if cell contains a list
					cell.alignment = _alignLeft

+2 −2
Original line number Diff line number Diff line
@@ -36,5 +36,5 @@ matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECAS
matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE)
matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)

matchBold = re.compile(r'(?<!\S)(\*\*|__)(?P<text>.+?)(?<!\\)\1(?!\S)')
matchItalic = re.compile(r'(?<!\S)(\*|_)(?P<text>.+?)(?<!\\)\1(?!\S)')
matchBold =   re.compile(r'(^|\s)(\*\*|__)(?P<text>.+?)\2(?!\w)')
matchItalic = re.compile(r'(^|\s)(\*|_)(?P<text>.+?)(?<!\\)\3(\s|$)')