Loading generateSpecWebSite/gridTableTools.py +74 −9 Original line number Original line Diff line number Diff line Loading @@ -207,7 +207,9 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR while delimiterStart == None: while delimiterStart == None: delimiterStart = row[colI - 1].position if colI > 0 else 0 delimiterStart = row[colI - 1].position if colI > 0 else 0 colI -= 1 colI -= 1 positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]] #positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]] positions = [match.start() for match in matchGridTableDelimiter.finditer(line, delimiterStart + 1)] position = min(positions) if positions else -1 position = min(positions) if positions else -1 if position > delimiterPositions[j]: # Colspan to be increased if position > delimiterPositions[j]: # Colspan to be increased row[columnIndex].colspan += 1 row[columnIndex].colspan += 1 Loading Loading @@ -240,7 +242,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR printDebug(f'\nChecking line: "{line}"') printDebug(f'\nChecking line: "{line}"') printDebug(f'Expected delimiter positions: {delimiterPositions}') printDebug(f'Expected delimiter positions: {delimiterPositions}') # For full separator lines (only +) """ # For full separator lines (only +) if '+' in line and '|' not in line: if '+' in line and '|' not in line: currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)] currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)] printDebug(f'Full separator line - Found + at positions: {currentPositions}') printDebug(f'Full separator line - Found + at positions: {currentPositions}') Loading @@ -255,13 +257,53 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR for pos in currentPositions) for pos in currentPositions) # For partial separators (mix of + and |) # For partial separators (mix of + and |) currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i > 0)] if matchGridTableBodySeparator.match(line): # Find all delimiter positions that are not escaped currentPositions = [] i = 0 while i < len(line): if i > 0 and line[i-1] == '\\': # Skip escaped delimiters i += 1 continue if line[i] in delimiters: currentPositions.append(i) i += 1 printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions for pos in currentPositions) for pos in currentPositions) separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line)] # For data lines (starting with |) if line.startswith('|') and not matchGridTableBodySeparator.match(line): # Find all | positions that are not escaped (+ doesn't need escaping, but | does) currentPositions = [] i = 0 while i < len(line): if i > 0 and line[i-1] == '\\' and line[i] == '|': # Skip escaped | i += 1 continue if line[i] == '|': currentPositions.append(i) i += 1 printDebug(f'Data line - Found | at positions: {currentPositions}') return all(delimiterPositions[-1] in currentPositions and line.startswith("|") and pos in delimiterPositions for pos in currentPositions)""" #for match in re.finditer(r'(?<!\\)\||(?=\+:?-+:?\+)\+|(?<![^-:])\+', line): currentPositions = [] for match in matchGridTableDelimiter.finditer(line): if match.start() > 0: # Skip the first position currentPositions.append(match.start()) printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions for pos in currentPositions) return False separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line.rstrip())] if not separatorIndices: if not separatorIndices: raise ValueError('No valid separators found in the provided grid table.') raise ValueError('No valid separators found in the provided grid table.') Loading @@ -285,7 +327,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR dataRows:GridTableRowList = [] dataRows:GridTableRowList = [] for index in separatorIndices: for index in separatorIndices: if matchGridTableHeaderSeparator.match(lines[index]): if matchGridTableHeaderSeparator.match(lines[index].rstrip()): hasHeader = True hasHeader = True headerSeparatorIndex = index headerSeparatorIndex = index parts = re.split(r'\+', lines[index].strip('+')) parts = re.split(r'\+', lines[index].strip('+')) Loading Loading @@ -352,7 +394,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): raise ValueError(f'Misaligned delimiters in separator row: {line}') raise ValueError(f'Misaligned delimiters in separator row: {line}') parts = re.split(r'\s*\+\s*', line.strip('+')) # Split by delimiters parts = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: parts.append(line[pos+1:match.start()]) pos = match.start() delimiterIndex = 0 delimiterIndex = 0 rows.append(GridRow(numberOfColumns)) rows.append(GridRow(numberOfColumns)) Loading Loading @@ -384,7 +434,14 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): raise ValueError(f'Misaligned delimiters in partial separator: {line}') raise ValueError(f'Misaligned delimiters in partial separator: {line}') parts = re.split(r'[\|\+]', line.strip('|').strip('+')) # (?<!\\)[\|\+] # Split by delimiters parts = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: parts.append(line[pos+1:match.start()]) pos = match.start() #Add another row, set delimiters for each cell #Add another row, set delimiters for each cell rows.append(GridRow(numberOfColumns)) rows.append(GridRow(numberOfColumns)) Loading Loading @@ -446,7 +503,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR raise ValueError(f'More cells than columns found ({len(parts)} {numberOfColumns})') raise ValueError(f'More cells than columns found ({len(parts)} {numberOfColumns})') else: # Data row else: # Data row cellsContent = re.split(r'\|', line.strip('|')) #cellsContent = re.split(r'\|', line.strip('|')) #Split by delimiters cellsContent = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: cellsContent.append(line[pos+1:match.start()]) pos = match.start() # Add delimiter alignment check # Add delimiter alignment check if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): Loading generateSpecWebSite/regexMatches.py +5 −5 Original line number Original line Diff line number Diff line Loading @@ -22,11 +22,11 @@ markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'^[+|].*(\+[:-]+\+($|.*[+|]$))', re.IGNORECASE) matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) matchGridTableBodySeparatorLine = re.compile(r'^:?-+:?$', re.IGNORECASE) matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) matchGridTableHeaderSeparator = re.compile(r'^\+([=:]+\+)+$', re.IGNORECASE) matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) matchGridTableSeparator = re.compile(r'^\+([-:=]+\+)+$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) matchGridTableDelimiter = re.compile(r'(?<!\\)\||(?=\+:?[-=]+:?\+)\+|(?<![^-=:])\+', re.IGNORECASE) matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE) matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE) Loading Loading
generateSpecWebSite/gridTableTools.py +74 −9 Original line number Original line Diff line number Diff line Loading @@ -207,7 +207,9 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR while delimiterStart == None: while delimiterStart == None: delimiterStart = row[colI - 1].position if colI > 0 else 0 delimiterStart = row[colI - 1].position if colI > 0 else 0 colI -= 1 colI -= 1 positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]] #positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]] positions = [match.start() for match in matchGridTableDelimiter.finditer(line, delimiterStart + 1)] position = min(positions) if positions else -1 position = min(positions) if positions else -1 if position > delimiterPositions[j]: # Colspan to be increased if position > delimiterPositions[j]: # Colspan to be increased row[columnIndex].colspan += 1 row[columnIndex].colspan += 1 Loading Loading @@ -240,7 +242,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR printDebug(f'\nChecking line: "{line}"') printDebug(f'\nChecking line: "{line}"') printDebug(f'Expected delimiter positions: {delimiterPositions}') printDebug(f'Expected delimiter positions: {delimiterPositions}') # For full separator lines (only +) """ # For full separator lines (only +) if '+' in line and '|' not in line: if '+' in line and '|' not in line: currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)] currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)] printDebug(f'Full separator line - Found + at positions: {currentPositions}') printDebug(f'Full separator line - Found + at positions: {currentPositions}') Loading @@ -255,13 +257,53 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR for pos in currentPositions) for pos in currentPositions) # For partial separators (mix of + and |) # For partial separators (mix of + and |) currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i > 0)] if matchGridTableBodySeparator.match(line): # Find all delimiter positions that are not escaped currentPositions = [] i = 0 while i < len(line): if i > 0 and line[i-1] == '\\': # Skip escaped delimiters i += 1 continue if line[i] in delimiters: currentPositions.append(i) i += 1 printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions for pos in currentPositions) for pos in currentPositions) separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line)] # For data lines (starting with |) if line.startswith('|') and not matchGridTableBodySeparator.match(line): # Find all | positions that are not escaped (+ doesn't need escaping, but | does) currentPositions = [] i = 0 while i < len(line): if i > 0 and line[i-1] == '\\' and line[i] == '|': # Skip escaped | i += 1 continue if line[i] == '|': currentPositions.append(i) i += 1 printDebug(f'Data line - Found | at positions: {currentPositions}') return all(delimiterPositions[-1] in currentPositions and line.startswith("|") and pos in delimiterPositions for pos in currentPositions)""" #for match in re.finditer(r'(?<!\\)\||(?=\+:?-+:?\+)\+|(?<![^-:])\+', line): currentPositions = [] for match in matchGridTableDelimiter.finditer(line): if match.start() > 0: # Skip the first position currentPositions.append(match.start()) printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}') printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}') return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions for pos in currentPositions) return False separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line.rstrip())] if not separatorIndices: if not separatorIndices: raise ValueError('No valid separators found in the provided grid table.') raise ValueError('No valid separators found in the provided grid table.') Loading @@ -285,7 +327,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR dataRows:GridTableRowList = [] dataRows:GridTableRowList = [] for index in separatorIndices: for index in separatorIndices: if matchGridTableHeaderSeparator.match(lines[index]): if matchGridTableHeaderSeparator.match(lines[index].rstrip()): hasHeader = True hasHeader = True headerSeparatorIndex = index headerSeparatorIndex = index parts = re.split(r'\+', lines[index].strip('+')) parts = re.split(r'\+', lines[index].strip('+')) Loading Loading @@ -352,7 +394,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): raise ValueError(f'Misaligned delimiters in separator row: {line}') raise ValueError(f'Misaligned delimiters in separator row: {line}') parts = re.split(r'\s*\+\s*', line.strip('+')) # Split by delimiters parts = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: parts.append(line[pos+1:match.start()]) pos = match.start() delimiterIndex = 0 delimiterIndex = 0 rows.append(GridRow(numberOfColumns)) rows.append(GridRow(numberOfColumns)) Loading Loading @@ -384,7 +434,14 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): raise ValueError(f'Misaligned delimiters in partial separator: {line}') raise ValueError(f'Misaligned delimiters in partial separator: {line}') parts = re.split(r'[\|\+]', line.strip('|').strip('+')) # (?<!\\)[\|\+] # Split by delimiters parts = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: parts.append(line[pos+1:match.start()]) pos = match.start() #Add another row, set delimiters for each cell #Add another row, set delimiters for each cell rows.append(GridRow(numberOfColumns)) rows.append(GridRow(numberOfColumns)) Loading Loading @@ -446,7 +503,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR raise ValueError(f'More cells than columns found ({len(parts)} {numberOfColumns})') raise ValueError(f'More cells than columns found ({len(parts)} {numberOfColumns})') else: # Data row else: # Data row cellsContent = re.split(r'\|', line.strip('|')) #cellsContent = re.split(r'\|', line.strip('|')) #Split by delimiters cellsContent = [] matches = list(matchGridTableDelimiter.finditer(line)) pos = 0 for match in matches: if match.start() > pos: cellsContent.append(line[pos+1:match.start()]) pos = match.start() # Add delimiter alignment check # Add delimiter alignment check if not checkDelimiterAlignment(line, delimiterPositions): if not checkDelimiterAlignment(line, delimiterPositions): Loading
generateSpecWebSite/regexMatches.py +5 −5 Original line number Original line Diff line number Diff line Loading @@ -22,11 +22,11 @@ markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'^[+|].*(\+[:-]+\+($|.*[+|]$))', re.IGNORECASE) matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) matchGridTableBodySeparatorLine = re.compile(r'^:?-+:?$', re.IGNORECASE) matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) matchGridTableHeaderSeparator = re.compile(r'^\+([=:]+\+)+$', re.IGNORECASE) matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) matchGridTableSeparator = re.compile(r'^\+([-:=]+\+)+$', re.IGNORECASE) matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE) matchGridTableDelimiter = re.compile(r'(?<!\\)\||(?=\+:?[-=]+:?\+)\+|(?<![^-=:])\+', re.IGNORECASE) matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE) matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE) Loading