Adding width/height attributes to images so that Pandoc does not redimension them to fit the page (d8b22298) · Commits · Centre for Testing and Interoperability / Markdown specifications development / Specification tools

generateBaseline/pandocFilter.py

+111 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,62 @@
		import argparse, os, re, sys
		from rich import print, inspect
		from rich.progress import Progress, TextColumn, TimeElapsedColumn
		from PIL import Image

		#from resizeImage import MAX_HEIGHT_IN, MAX_WIDTH_IN, figure_display_limit, format_inches

		figureImageRegex = re.compile(r'!\[([^\]])\]\(([^)]+)\)(\{[^}]\})?')
		rasterImageExtensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.tif', '.tiff')
		DPI = 96
		MAX_WIDTH_IN = 6.75
		MAX_HEIGHT_IN = 9.5

		def format_inches(value: float) -> str:
		"""Format inch values for pandoc attributes without spurious rounding."""
		return f"{value:.4f}".rstrip("0").rstrip(".")

		def resolve_image_path(document: str, image_path: str) -> str:
		if os.path.isabs(image_path):
		return image_path
		doc_dir = os.path.dirname(os.path.abspath(document))
		return os.path.normpath(os.path.join(doc_dir, image_path))


		def fitted_figure_limit(width_px, height_px, max_width_in_inches, max_height_in_inches):
		"""Compute the limiting display dimension from pixel size.

		Returns:
		(limiting_dimension, limit_inches, scale) or (None, None, None)
		"""
		width_in = width_px / DPI
		height_in = height_px / DPI

		scale_w = max_width_in_inches / width_in if width_in > max_width_in_inches else 1.0
		scale_h = max_height_in_inches / height_in if height_in > max_height_in_inches else 1.0
		scale = min(scale_w, scale_h)

		if scale >= 1.0:
		return None, None, None

		if scale_h < scale_w:
		return "height", max_height_in_inches, scale
		return "width", max_width_in_inches, scale


		def figure_display_limit(image_path, max_width_in_inches, max_height_in_inches):
		"""Read image pixels and return pandoc limit info without modifying the file."""
		with Image.open(image_path) as image:
		image.load()
		width_px, height_px = image.size

		limiting_dimension, limit_inches, scale = fitted_figure_limit(
		width_px, height_px, max_width_in_inches, max_height_in_inches
		)
		if limiting_dimension is None:
		return None, width_px / DPI, width_px, height_px, 1.0

		return limiting_dimension, limit_inches, width_px, height_px, scale


		def readMDFile(progress:Progress, document:str) -> list[str]:
		""" Read the markdown file and return a list of lines.
		@@ -145,6 +201,60 @@ def replaceFigureCaptions(progress:Progress, mdLines:list[str]) -> list[str]:
		return _lines


		def addFigureDimensions(progress: Progress, mdLines: list[str], document: str) -> list[str]:
		"""Add a pandoc width or height attribute; default to natural width when within limits."""
		_taskID = progress.add_task('[blue]Adding figure dimensions', total=0)

		_lines: list[str] = []
		for line in mdLines:
		if checkInCodeBlock(line):
		_lines.append(line)
		continue

		match = figureImageRegex.search(line)
		if not match:
		_lines.append(line)
		continue

		image_path = match.group(2).strip()
		existing_attrs = match.group(3) or ''
		if 'width=' in existing_attrs or 'height=' in existing_attrs:
		_lines.append(line)
		continue

		if not image_path.lower().endswith(rasterImageExtensions):
		_lines.append(line)
		continue

		full_path = resolve_image_path(document, image_path)
		if not os.path.isfile(full_path):
		_lines.append(line)
		continue

		limiting_dimension, limit_in_inches, width_px, height_px, scale = figure_display_limit(
		full_path, MAX_WIDTH_IN, MAX_HEIGHT_IN
		)
		if limiting_dimension is None:
		attrs = f'{{width={format_inches(limit_in_inches)}in}}'
		else:
		attrs = f'{{{limiting_dimension}={format_inches(limit_in_inches)}in}}'

		new_line = figureImageRegex.sub(
		rf'![\1](\2){attrs}',
		line.rstrip('\n'),
		count=1,
		)
		_lines.append(f'{new_line}\n')
		print(
		f'Figure dimensions added for {image_path}: {attrs} '
		f'(from {width_px}x{height_px} px, scale {scale:.6f}, '
		f'max {MAX_WIDTH_IN}" x {MAX_HEIGHT_IN}")'
		)

		progress.stop_task(_taskID)
		return _lines


		def replaceFiguresPathSvgToPng(progress: Progress, mdLines: list[str]) -> list[str]:
		""" Replace figure extensions from svg to png.
		"""
		@@ -440,6 +550,7 @@ def process(args) -> None:
		mdLines = replaceFigureCaptions(progress, mdLines)
		if args.figure_paths:
		mdLines = replaceFiguresPathSvgToPng(progress, mdLines)
		mdLines = addFigureDimensions(progress, mdLines, args.document)
		if args.table_separators:
		mdLines = correctTableSeparators(progress, mdLines)
		mdLines = instertLineBeforeStartOfList(progress, mdLines)