diff --git a/gridTable.py b/gridTable.py
index 35d7f5dec978f2eb99eead2f22dfaa852dd3abfa..02df41e190be4d581601cbdc95cf635eeb7eb752 100644
--- a/gridTable.py
+++ b/gridTable.py
@@ -11,6 +11,8 @@ import re
colspanMarker = '~~COLSPAN~~'
rowspanMarker = '~~ROWSPAN~~'
+MAX_CELL_WIDTH = 80 # Target maximum width for a column
+
def markdownToGrid(markdownLines:list[str]) -> list[str]:
""" Convert a markdown table to a grid table.
Cells containing ~~XX~~ will be merged with the previous cell.
@@ -55,38 +57,75 @@ def markdownToGrid(markdownLines:list[str]) -> list[str]:
while len(row) < maxCols:
row.append('')
+ # Check if header row has vertical merges with content rows
+ # If the first data row (row 2, index 2) contains rowspan markers,
+ # it means header cells are merged vertically with content cells
+ hasHeaderVerticalMerge = False
+ if len(rows) > 2:
+ # Check if any cell in the first data row (index 2) is a rowspan marker
+ # This indicates that a header cell from row 0 spans down
+ hasHeaderVerticalMerge = any(
+ cell.strip() == rowspanMarker
+ for cell in rows[2]
+ if cell is not None
+ )
+
# Generate grid table
result = []
+ # Helper function to detect separator rows (markdown table separator with dashes)
+ def isSeparatorRow(row: list[str]) -> bool:
+ """Check if a row is a markdown table separator row (all dashes)."""
+ return all(cell.strip() == '-' or cell.strip() == '' for cell in row)
+
# Top border
result.append('+' + '+'.join('-' * (w + 2) for w in colWidths) + '+')
- # Header row
- result.append('|' + '|'.join(
- f'{rows[0][i]:<{colWidths[i]}}' for i in range(len(rows[0])) if rows[0][i] is not None
- ) + '|')
-
- # Header separator
- result.append('+:' + '+:'.join('=' * (w + 1) for w in colWidths) + '+')
+ # If header has vertical merges, treat all rows as content rows
+ # Otherwise, create a header row and separator
+ if hasHeaderVerticalMerge:
+ # No header row - all rows are content rows
+ # Start processing from row 0 (the original header row), but skip row 1 (separator row)
+ dataRowsStart = 0
+ else:
+ # Header row
+ result.append('|' + '|'.join(
+ f'{rows[0][i]:<{colWidths[i]}}' for i in range(len(rows[0])) if rows[0][i] is not None
+ ) + '|')
+
+ # Header separator
+ result.append('+:' + '+:'.join('=' * (w + 1) for w in colWidths) + '+')
+
+ # Data rows start from row 2 (after header and separator)
+ dataRowsStart = 2
# Data rows
- for rowIndex, row in enumerate(rows[2:]):
+ for rowIndex, row in enumerate(rows[dataRowsStart:]):
+ # Skip separator rows when processing content rows from the start (hasHeaderVerticalMerge case)
+ if hasHeaderVerticalMerge and isSeparatorRow(row):
+ continue
# The following code detects if cells in the next row have rowspan marker(s)
# If so, it will merge the cells with the current one and remove the rowspan marker
# from that cell
nextRowCellsMerged:list[bool] = []
+
+ # Calculate the actual index in the rows array
+ currentRowIdx = dataRowsStart + rowIndex
+ nextRowIdx = currentRowIdx + 1
- if rowIndex < len(rows)-3:
- for cellIndex, cell in enumerate(rows[rowIndex+3]):
- if cell.strip() == rowspanMarker:
+ if nextRowIdx < len(rows):
+ for cellIndex, cell in enumerate(rows[nextRowIdx]):
+ if cellIndex < len(row) and cell.strip() == rowspanMarker:
nextRowCellsMerged.append(True)
- rows[rowIndex+3][cellIndex] = cell.replace(rowspanMarker, ' '*len(rowspanMarker))
+ rows[nextRowIdx][cellIndex] = cell.replace(rowspanMarker, ' '*len(rowspanMarker))
else:
nextRowCellsMerged.append(False)
- # nextRowCellsMerged = [ cell.strip() == rowspanMarker for cell in rows[rowIndex+3] ]
+ # Ensure we have the right number of entries (match number of columns)
+ while len(nextRowCellsMerged) < len(colWidths):
+ nextRowCellsMerged.append(False)
else:
- nextRowCellsMerged = [ False for _ in rows[rowIndex+2] ]
+ nextRowCellsMerged = [ False for _ in colWidths ]
result.append('|' + '|'.join(
f'{row[i]:<{colWidths[i]}}'
@@ -274,18 +313,59 @@ def handleMultiLineGridTable(lines: list[str]) -> list[str]:
splitCells.append([colspanMarker])
else:
parts = cell.split('
')
+ # Check for overly long lines and split them too
+ new_parts = []
+ for part in parts:
+ if len(part) > MAX_CELL_WIDTH:
+ # Simple splitting by space
+ words = part.split(' ')
+ current_line = []
+ current_length = 0
+ for word in words:
+ if current_length + len(word) + 1 > MAX_CELL_WIDTH:
+ new_parts.append(' '.join(current_line))
+ current_line = [word]
+ current_length = len(word)
+ else:
+ current_line.append(word)
+ current_length += len(word) + 1
+ if current_line:
+ new_parts.append(' '.join(current_line))
+ else:
+ new_parts.append(part)
+ parts = new_parts
+
if len(parts) > 1:
# Found line breaks in cell
# Add "\" to each part except the last
- parts = [ p + '\\' if i < len(parts)-1 else p
- for i, p in enumerate(parts) ]
+ # BUT only if it was originally a
break, or if we want to indicate wrap?
+ # For Markdown grid tables, simple newlines are enough.
+ # But standard Grid Tables don't support multiline cells natively without some trickery?
+ # Actually, Pandoc grid tables just use newlines.
+ # The original code added `\` which might be for a specific renderer?
+ # Standard reStructuredText / Pandoc grid tables treat lines in the same cell as just continuation.
+ # No backslash needed usually.
+ pass
+ # parts = [ p + '\\' if i < len(parts)-1 else p
+ # for i, p in enumerate(parts) ]
splitCells.append(parts)
maxLines = max(maxLines, len(parts))
# If we found line breaks, create multiple content lines
+ # Each row's height is determined by its own content (maxLines for that row)
+ # All cells in a row must have the same number of lines for proper alignment
if maxLines > 1:
- for line_idx in range(maxLines):
+ # Find the last line index that has content in any cell
+ lastContentLineIdx = -1
+ for check_idx in range(maxLines):
+ for cellParts in splitCells:
+ if len(cellParts) > check_idx and cellParts[check_idx].strip() and cellParts[check_idx].strip() != colspanMarker:
+ lastContentLineIdx = check_idx
+ break
+
+ # Only create lines up to and including the last line with content
+ for line_idx in range(lastContentLineIdx + 1):
newCells = []
for cellParts in splitCells:
if len(cellParts) == 1 and cellParts[0].strip() == colspanMarker:
diff --git a/spec2md.py b/spec2md.py
index c2f9d581ec51e7f5d2fcbe129a63be814e6572ee..da8bda52560a5f3ade6c09f4658c0a88a8b18902 100644
--- a/spec2md.py
+++ b/spec2md.py
@@ -322,7 +322,7 @@ def processDocuments(documents:list[str],
headers.append( (level, replaceNL(text)))
- return [ '', f'{"#" * level} {nr}{replaceNL(text)}' if text else '' ]
+ return [ '', f'{"#" * level} {nr}{replaceNL(text)}' if text else '', '' ]
def strippedTag(tag:str) -> str:
@@ -772,6 +772,7 @@ def processDocuments(documents:list[str],
rows:list[list[str]] = []
nrRows = 0
colSpanDetected = False
+ rowSpanDetected = False
for row in elem.rows:
_row = ET.fromstring(row._tr.xml)
cells:list[str] = []
@@ -787,6 +788,7 @@ def processDocuments(documents:list[str],
gridspanElem = cell.find('.//w:tcPr/w:vMerge', namespaces={'w': wns})
if gridspanElem is not None and _val not in gridspanElem.attrib:
cells.append(rowspanMarker)
+ rowSpanDetected = True # Set flag that rowspan was found
else:
@@ -817,10 +819,11 @@ def processDocuments(documents:list[str],
if nrRows == 1:
_print(f'[red]({linenumber(len(lines)+2)}) Single-row table found. Such tables cannot be converted to markdown.[/red]Consider to change the following table in the original document:\n[grey39]{rows[0]}', highlight = False)
- # Warning if a table with colspans is detected
- if colSpanDetected:
+ # Warning if a table with colspans or rowspans is detected
+ if colSpanDetected or rowSpanDetected:
if forceMarkdownTables:
- _print(f'[yellow]({linenumber(len(lines)+2)}) Table with colspans found: [/yellow][grey39]{richString(lastTableCaption)}[/grey39]\nConsider to convert it manually to a grid table', highlight = False)
+ spanType = "colspans" if colSpanDetected else "rowspans"
+ _print(f'[yellow]({linenumber(len(lines)+2)}) Table with {spanType} found: [/yellow][grey39]{richString(lastTableCaption)}[/grey39]\nConsider to convert it manually to a grid table', highlight = False)
tableLines:list[str] = []
@@ -841,10 +844,13 @@ def processDocuments(documents:list[str],
tableLines.append(f'|{"|".join(row)}|'
.replace('\n', _linebreak)) # replace line breaks in cells
- # if colSpanDetected and gridTableForColspan then convert to grid table
- if colSpanDetected and not forceMarkdownTables and not errorDetected:
+ # if colSpanDetected or rowSpanDetected then convert to grid table
+ if (colSpanDetected or rowSpanDetected) and not forceMarkdownTables and not errorDetected:
lines.append('') # Add an empty line before a table
- lines.append('Table with colspans converted to grid table. Please check and adjust manually if necessary.')
+ spanType = "colspans" if colSpanDetected else ("rowspans" if rowSpanDetected else "merged cells")
+ if colSpanDetected and rowSpanDetected:
+ spanType = "colspans and rowspans"
+ lines.append(f'Table with {spanType} converted to grid table. Please check and adjust manually if necessary.')
tableLines = markdownToGrid(tableLines)
lines.append('') # Add an empty line before a table
@@ -958,7 +964,8 @@ def processDocuments(documents:list[str],
for i in range(len(lines)):
line = lines[i]
if (m := _definitionExpression.match(line)) is not None:
- lines[i] = f'- [{m.group(1)}]{m.group(2)}'
+ # Use HTML anchor for definitions with span wrapper: [1]
+ lines[i] = f'[{m.group(1)}]{m.group(2)}'
def _repl(m:re.Match) -> str|None:
if m.group(1) == '"':