diff --git a/gridTable.py b/gridTable.py index 35d7f5dec978f2eb99eead2f22dfaa852dd3abfa..02df41e190be4d581601cbdc95cf635eeb7eb752 100644 --- a/gridTable.py +++ b/gridTable.py @@ -11,6 +11,8 @@ import re colspanMarker = '~~COLSPAN~~' rowspanMarker = '~~ROWSPAN~~' +MAX_CELL_WIDTH = 80 # Target maximum width for a column + def markdownToGrid(markdownLines:list[str]) -> list[str]: """ Convert a markdown table to a grid table. Cells containing ~~XX~~ will be merged with the previous cell. @@ -55,38 +57,75 @@ def markdownToGrid(markdownLines:list[str]) -> list[str]: while len(row) < maxCols: row.append('') + # Check if header row has vertical merges with content rows + # If the first data row (row 2, index 2) contains rowspan markers, + # it means header cells are merged vertically with content cells + hasHeaderVerticalMerge = False + if len(rows) > 2: + # Check if any cell in the first data row (index 2) is a rowspan marker + # This indicates that a header cell from row 0 spans down + hasHeaderVerticalMerge = any( + cell.strip() == rowspanMarker + for cell in rows[2] + if cell is not None + ) + # Generate grid table result = [] + # Helper function to detect separator rows (markdown table separator with dashes) + def isSeparatorRow(row: list[str]) -> bool: + """Check if a row is a markdown table separator row (all dashes).""" + return all(cell.strip() == '-' or cell.strip() == '' for cell in row) + # Top border result.append('+' + '+'.join('-' * (w + 2) for w in colWidths) + '+') - # Header row - result.append('|' + '|'.join( - f'{rows[0][i]:<{colWidths[i]}}' for i in range(len(rows[0])) if rows[0][i] is not None - ) + '|') - - # Header separator - result.append('+:' + '+:'.join('=' * (w + 1) for w in colWidths) + '+') + # If header has vertical merges, treat all rows as content rows + # Otherwise, create a header row and separator + if hasHeaderVerticalMerge: + # No header row - all rows are content rows + # Start processing from row 0 (the original header row), but skip row 1 (separator row) + dataRowsStart = 0 + else: + # Header row + result.append('|' + '|'.join( + f'{rows[0][i]:<{colWidths[i]}}' for i in range(len(rows[0])) if rows[0][i] is not None + ) + '|') + + # Header separator + result.append('+:' + '+:'.join('=' * (w + 1) for w in colWidths) + '+') + + # Data rows start from row 2 (after header and separator) + dataRowsStart = 2 # Data rows - for rowIndex, row in enumerate(rows[2:]): + for rowIndex, row in enumerate(rows[dataRowsStart:]): + # Skip separator rows when processing content rows from the start (hasHeaderVerticalMerge case) + if hasHeaderVerticalMerge and isSeparatorRow(row): + continue # The following code detects if cells in the next row have rowspan marker(s) # If so, it will merge the cells with the current one and remove the rowspan marker # from that cell nextRowCellsMerged:list[bool] = [] + + # Calculate the actual index in the rows array + currentRowIdx = dataRowsStart + rowIndex + nextRowIdx = currentRowIdx + 1 - if rowIndex < len(rows)-3: - for cellIndex, cell in enumerate(rows[rowIndex+3]): - if cell.strip() == rowspanMarker: + if nextRowIdx < len(rows): + for cellIndex, cell in enumerate(rows[nextRowIdx]): + if cellIndex < len(row) and cell.strip() == rowspanMarker: nextRowCellsMerged.append(True) - rows[rowIndex+3][cellIndex] = cell.replace(rowspanMarker, ' '*len(rowspanMarker)) + rows[nextRowIdx][cellIndex] = cell.replace(rowspanMarker, ' '*len(rowspanMarker)) else: nextRowCellsMerged.append(False) - # nextRowCellsMerged = [ cell.strip() == rowspanMarker for cell in rows[rowIndex+3] ] + # Ensure we have the right number of entries (match number of columns) + while len(nextRowCellsMerged) < len(colWidths): + nextRowCellsMerged.append(False) else: - nextRowCellsMerged = [ False for _ in rows[rowIndex+2] ] + nextRowCellsMerged = [ False for _ in colWidths ] result.append('|' + '|'.join( f'{row[i]:<{colWidths[i]}}' @@ -274,18 +313,59 @@ def handleMultiLineGridTable(lines: list[str]) -> list[str]: splitCells.append([colspanMarker]) else: parts = cell.split('
') + # Check for overly long lines and split them too + new_parts = [] + for part in parts: + if len(part) > MAX_CELL_WIDTH: + # Simple splitting by space + words = part.split(' ') + current_line = [] + current_length = 0 + for word in words: + if current_length + len(word) + 1 > MAX_CELL_WIDTH: + new_parts.append(' '.join(current_line)) + current_line = [word] + current_length = len(word) + else: + current_line.append(word) + current_length += len(word) + 1 + if current_line: + new_parts.append(' '.join(current_line)) + else: + new_parts.append(part) + parts = new_parts + if len(parts) > 1: # Found line breaks in cell # Add "\" to each part except the last - parts = [ p + '\\' if i < len(parts)-1 else p - for i, p in enumerate(parts) ] + # BUT only if it was originally a
break, or if we want to indicate wrap? + # For Markdown grid tables, simple newlines are enough. + # But standard Grid Tables don't support multiline cells natively without some trickery? + # Actually, Pandoc grid tables just use newlines. + # The original code added `\` which might be for a specific renderer? + # Standard reStructuredText / Pandoc grid tables treat lines in the same cell as just continuation. + # No backslash needed usually. + pass + # parts = [ p + '\\' if i < len(parts)-1 else p + # for i, p in enumerate(parts) ] splitCells.append(parts) maxLines = max(maxLines, len(parts)) # If we found line breaks, create multiple content lines + # Each row's height is determined by its own content (maxLines for that row) + # All cells in a row must have the same number of lines for proper alignment if maxLines > 1: - for line_idx in range(maxLines): + # Find the last line index that has content in any cell + lastContentLineIdx = -1 + for check_idx in range(maxLines): + for cellParts in splitCells: + if len(cellParts) > check_idx and cellParts[check_idx].strip() and cellParts[check_idx].strip() != colspanMarker: + lastContentLineIdx = check_idx + break + + # Only create lines up to and including the last line with content + for line_idx in range(lastContentLineIdx + 1): newCells = [] for cellParts in splitCells: if len(cellParts) == 1 and cellParts[0].strip() == colspanMarker: diff --git a/spec2md.py b/spec2md.py index c2f9d581ec51e7f5d2fcbe129a63be814e6572ee..da8bda52560a5f3ade6c09f4658c0a88a8b18902 100644 --- a/spec2md.py +++ b/spec2md.py @@ -322,7 +322,7 @@ def processDocuments(documents:list[str], headers.append( (level, replaceNL(text))) - return [ '', f'{"#" * level} {nr}{replaceNL(text)}' if text else '' ] + return [ '', f'{"#" * level} {nr}{replaceNL(text)}' if text else '', '' ] def strippedTag(tag:str) -> str: @@ -772,6 +772,7 @@ def processDocuments(documents:list[str], rows:list[list[str]] = [] nrRows = 0 colSpanDetected = False + rowSpanDetected = False for row in elem.rows: _row = ET.fromstring(row._tr.xml) cells:list[str] = [] @@ -787,6 +788,7 @@ def processDocuments(documents:list[str], gridspanElem = cell.find('.//w:tcPr/w:vMerge', namespaces={'w': wns}) if gridspanElem is not None and _val not in gridspanElem.attrib: cells.append(rowspanMarker) + rowSpanDetected = True # Set flag that rowspan was found else: @@ -817,10 +819,11 @@ def processDocuments(documents:list[str], if nrRows == 1: _print(f'[red]({linenumber(len(lines)+2)}) Single-row table found. Such tables cannot be converted to markdown.[/red]Consider to change the following table in the original document:\n[grey39]{rows[0]}', highlight = False) - # Warning if a table with colspans is detected - if colSpanDetected: + # Warning if a table with colspans or rowspans is detected + if colSpanDetected or rowSpanDetected: if forceMarkdownTables: - _print(f'[yellow]({linenumber(len(lines)+2)}) Table with colspans found: [/yellow][grey39]{richString(lastTableCaption)}[/grey39]\nConsider to convert it manually to a grid table', highlight = False) + spanType = "colspans" if colSpanDetected else "rowspans" + _print(f'[yellow]({linenumber(len(lines)+2)}) Table with {spanType} found: [/yellow][grey39]{richString(lastTableCaption)}[/grey39]\nConsider to convert it manually to a grid table', highlight = False) tableLines:list[str] = [] @@ -841,10 +844,13 @@ def processDocuments(documents:list[str], tableLines.append(f'|{"|".join(row)}|' .replace('\n', _linebreak)) # replace line breaks in cells - # if colSpanDetected and gridTableForColspan then convert to grid table - if colSpanDetected and not forceMarkdownTables and not errorDetected: + # if colSpanDetected or rowSpanDetected then convert to grid table + if (colSpanDetected or rowSpanDetected) and not forceMarkdownTables and not errorDetected: lines.append('') # Add an empty line before a table - lines.append('Table with colspans converted to grid table. Please check and adjust manually if necessary.') + spanType = "colspans" if colSpanDetected else ("rowspans" if rowSpanDetected else "merged cells") + if colSpanDetected and rowSpanDetected: + spanType = "colspans and rowspans" + lines.append(f'Table with {spanType} converted to grid table. Please check and adjust manually if necessary.') tableLines = markdownToGrid(tableLines) lines.append('') # Add an empty line before a table @@ -958,7 +964,8 @@ def processDocuments(documents:list[str], for i in range(len(lines)): line = lines[i] if (m := _definitionExpression.match(line)) is not None: - lines[i] = f'- [{m.group(1)}]{m.group(2)}' + # Use HTML anchor for definitions with span wrapper: [1] + lines[i] = f'[{m.group(1)}]{m.group(2)}' def _repl(m:re.Match) -> str|None: if m.group(1) == '"':