Commit 2b5a1c67 authored by Naum Spaseski's avatar Naum Spaseski
Browse files

Corrected cleanupMarkdown function

parent 26eccfc0
Loading
Loading
Loading
Loading
+33 −53
Original line number Diff line number Diff line
@@ -1087,61 +1087,41 @@ def processDocuments(documents:list[str],
			#	Clean up redundant spaces and normalize blank lines
			#
			def cleanupMarkdown(lines: list[str]) -> list[str]:
				"""Remove trailing spaces and normalize blank lines.
				Figure captions should have exactly one blank line before and after."""
				# First pass: remove trailing spaces
				lines = [line.rstrip() for line in lines]
				cleaned_lines = []
				in_code_block = False
				consecutive_empty = 0

				for line in lines:
					stripped = line.strip()

					# Check for code block markers
					if stripped.startswith('```'):
						# Check for single-line code block (e.g. ```code```)
						if len(stripped) > 3 and stripped.endswith('```'):
							cleaned_lines.append(line)
							consecutive_empty = 0
							continue

				# Second pass: collapse multiple consecutive blank lines and handle figure captions
				result: list[str] = []
				blank_count = 0
						# Toggle code block state
						in_code_block = not in_code_block
						cleaned_lines.append(line)
						consecutive_empty = 0
						continue

				for i, line in enumerate(lines):
					isBlank = (len(line) == 0)
					isFigureCaption = line.startswith('**Figure') and line.endswith('**')
					
					# Check previous and next lines for context
					prevLine = result[-1] if len(result) > 0 else ''
					prevIsBlank = (len(prevLine) == 0)
					prevIsFigureCaption = (prevLine.startswith('**Figure') and prevLine.endswith('**'))
					
					nextLine = lines[i + 1] if i + 1 < len(lines) else ''
					nextIsBlank = (len(nextLine) == 0)
					nextIsFigureCaption = (nextLine.startswith('**Figure') and nextLine.endswith('**'))
					
					if isBlank:
						# Handle blank lines
						if prevIsFigureCaption:
							# After figure caption: we already added one blank line, skip this one
					if in_code_block:
						cleaned_lines.append(line)
						continue
						elif nextIsFigureCaption:
							# Before figure caption: add exactly one blank line
							if not prevIsBlank:
								result.append('')
								blank_count = 1
						else:
							# Regular blank line: allow max 2 consecutive blank lines
							blank_count += 1
							if blank_count <= 2:
								result.append('')
							# Skip if more than 2 consecutive blank lines
					else:
						# Non-blank line
						blank_count = 0

						if isFigureCaption:
							# Figure caption: ensure exactly one blank line before and after
							if not prevIsBlank and len(result) > 0:
								result.append('')
							result.append(line)
							# Add blank line after if next line is not blank
							if not nextIsBlank:
								result.append('')
					# Normal text processing
					if not stripped:
						consecutive_empty += 1
						if consecutive_empty <= 1:
							cleaned_lines.append(line)
					else:
							# Regular line
							result.append(line)
						consecutive_empty = 0
						cleaned_lines.append(line)

				return result
				return cleaned_lines
			
			lines = cleanupMarkdown(lines)