Commit 15d8ac94 authored by Marco Cavalli's avatar Marco Cavalli
Browse files

feat: show changes in toc when generating the html diff

parent 7b7d99ac
Loading
Loading
Loading
Loading
+13 −3
Original line number Diff line number Diff line
@@ -241,12 +241,16 @@ Specify a different directory containing the Markdown files.
Creates a diff version of the HTML by comparing it with `./GENERATED_FILES/{folder_name}-base`.
When using `--diff`, `--src` is required.

`convert.py --frm html --to md --folder {folder_name} --src relative/or/absolute/source/path --diff`
`convert.py --frm md --to html --folder {folder_name} --src relative/or/absolute/source/path --diff`

Creates a diff version of the HTML comparing it with a branch or a commit ID specified in `{target_name}`. If not specified, uses `main` or `master`.
When using `--diff-git`, `--diff` is not necessary, if both are present `--diff` will be ignored.

`convert.py --frm html --to md --folder {folder_name} --src relative/or/absolute/source/path --diff-git {target_name}`
`convert.py --frm md --to html --folder {folder_name} --src relative/or/absolute/source/path --diff-git {target_name}`

By default, the generated file requires a web server (e.g., Live Server - a VS Code extension). To generate a diff that can be opened without a web server, use the `--no_lazy_toc` command.

`convert.py --frm md --to html --folder {folder_name} --src relative/or/absolute/source/path --diff --no_lazy_toc`

### 2.2.3 HTML to Docx

@@ -284,3 +288,9 @@ The accepted parameters are the same as those explained in [section 2.2](#22--co
`./convert.bat --parameters [--arch amd64|arm64]`

Where `--parameters` are the same as those explained in [section 2.2](#22--conversion) and `--arch` is an optional parameter to specify the architecture of the Docker image to be built (default is `amd64`).

# 4. Debug

## 4.1 Show time reports

Using the `--time` parameter prints the duration of each operation; it is useful for debugging performance.
 No newline at end of file
+12 −3
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ set "GIT_BRANCH="
set "REPO_SRC="
set "SRC_SUBFOLDER="
set "REBUILD=false"
set "NO_LAZY_TOC=false"
set "TIMER_ENABLED=false"

:parse_args
@@ -86,6 +87,11 @@ if "%~1"=="--time" (
	shift
	goto parse_args
)
if "%~1"=="--no-lazy-toc" (
	set "NO_LAZY_TOC=true"
	shift
	goto parse_args
)
if "%~1"=="--help" (
	echo Usage: convert.bat --frm <format> --to <format> --folder <path> [options]
	echo.
@@ -97,11 +103,13 @@ if "%~1"=="--help" (
	echo Optional parameters:
	echo   --src <path>          Path to source files (overrides --folder for source volume)
	echo   --file-order <file>   File specifying the order of conversion
	echo   --diff                 Enable diff conversion using folder-base
	echo   --diff-git <branch>    Enable git-based diff conversion. If specified, uses the input branch
	echo   --arch <arch>         Target architecture (amd64 or arm64^). Default is amd64.
	echo   --rebuild              Rebuild the Docker image before running
	echo   --time                 Enable timing of the conversion process
	echo   --time                 (for Debug) Enable timing of the conversion process
	echo Optional parameters from md to html conversion (only applicable if --from md --to html):
	echo   --no-lazy-toc         Disable lazy loading of the Table of Contents (TOC)
	echo   --diff                 Enable diff conversion using folder-base
	echo   --diff-git <branch>    Enable git-based diff conversion. If specified, uses the input branch
	exit /b 0
)
echo Unknown parameter passed: %~1
@@ -203,6 +211,7 @@ if %GIT%==true (
		set "EXTRA_ARGS=!EXTRA_ARGS! --diff-git"
	)
)
if %NO_LAZY_TOC%==true set "EXTRA_ARGS=!EXTRA_ARGS! --no-lazy-toc"
if %TIMER_ENABLED%==true set "EXTRA_ARGS=!EXTRA_ARGS! --time"

set "APP_DIR=%CD%"
+62 −3
Original line number Diff line number Diff line
# region Imports
import argparse, subprocess, os, sys, shutil, json
from bs4 import BeautifulSoup
from html_diff import make_trackchanges_diff
from src.constants import (
    FILEGEN_DIR,
@@ -35,6 +36,7 @@ from src.to_md.postprocessing import postprocess as postprocess_md

from src.to_html.preprocessing import preprocess as preprocess_html
from src.to_html.postprocessing import postprocess as postprocess_html
from src.to_html.postprocessing import clone_toc_in_file as clone_toc_in_file_html

from src.to_docx.preprocessing import preprocess as preprocess_docx
from src.to_docx.postprocessing import postprocess as postprocess_docx
@@ -89,6 +91,11 @@ parser.add_argument(
    action="store_true",
    help="Optionally, print a timing report at the end of the conversion process, showing the time taken for each section of the process. This can be used to identify any bottlenecks in the conversion process and optimize them for better performance.",
)
parser.add_argument(
    "--no-lazy-toc",
    action="store_true",
    help="Optionally, disable the lazy loading of the Table of Contents (ToC) in the generated HTML files when converting from Markdown to HTML. By default, the ToC is loaded lazily to improve performance, but this option forces the ToC to be cloned in all the HTML files.",
)

args = parser.parse_args()

@@ -117,6 +124,8 @@ GIT_CHECKOUT_NAME: str = (

TIMER_ENABLED: bool = bool(args.time)

NO_LAZY_TOC: bool = bool(args.no_lazy_toc)

if IS_GIT_DIFF and IS_DIFF:
    print("Warning: --diff-git cannot be used together with --diff.")
    print("--diff-git overrides --diff. Continuing with --diff-git behavior.")
@@ -612,8 +621,10 @@ if SRC_TYPE == "html" and DEST_TYPE == "docx":
        postprocess_docx(OUTPUT_DOC_PATH)

if SRC_TYPE == "md" and DEST_TYPE == "html":
    toc_filename = "index.html"
    toc_path = os.path.join(DEST, toc_filename)
    with t.section("Post-process HTML files"):
        postprocess_html(DEST)
        postprocess_html(DEST, NO_LAZY_TOC)
    with t.section("Apply diff to HTML files"):
        if IS_DIFF:
            # default diff_source_dir is GENERATED_FILE/{FOLDER}-base/html
@@ -630,16 +641,64 @@ if SRC_TYPE == "md" and DEST_TYPE == "html":
                diff_source_dir = os.path.join(FILEGEN_DIR, f"{FOLDER}-base", "html")
            print(f"Applying diff from source directory: {diff_source_dir}...")
            # Iterate through new HTML files and create diffs
            filename_count_mapping = {}
            for filename in os.listdir(DEST):
                if filename.endswith(".html"):
                    new_file_path = os.path.join(DEST, filename)
                    comparing_file_path = os.path.join(diff_source_dir, filename)
                    # Create diff and overwrite the new file
                    make_trackchanges_diff(
                    count = make_trackchanges_diff(
                        comparing_file_path, new_file_path, new_file_path
                    )
                    if count and count > 0:
                        filename_count_mapping[filename] = count
            shutil.copy("diffVisualizer.js", DEST)
            print(f"Diff applied.")
            toc_soup = None
            if os.path.exists(toc_path):
                with open(toc_path, "r", encoding="utf-8") as f:
                    toc_soup = BeautifulSoup(f, "html.parser")

            def add_diff_count_to_toc_entry(filename, count):
                if toc_soup:
                    for link in toc_soup.find_all("a", href=True):
                        href = link.get("href", "")
                        if link.string and (
                            href == filename or href.startswith(f"{filename}#")
                        ):
                            link["class"] = link.get("class", []) + [
                                "diff-changes-enable"
                            ]
                            link["data-diff-count"] = count
                            break

            for filename, count in filename_count_mapping.items():
                add_diff_count_to_toc_entry(filename, count)
            if toc_soup is not None:
                with open(toc_path, "w", encoding="utf-8") as f:
                    f.write(str(toc_soup))
            else:
                print(
                    p_error(
                        f"TOC file '{toc_path}' was not found or could not be parsed; diff counts were not applied to the TOC."
                    )
                )
            print(f"Diff completed.")
    with t.section("Clone TOC in HTML files"):
        if NO_LAZY_TOC:
            toc_soup = None
            if os.path.exists(toc_path):
                with open(toc_path, "r", encoding="utf-8") as f:
                    toc_soup = BeautifulSoup(f, "html.parser")
            for filename in os.listdir(DEST):
                if filename.endswith(".html") and filename != toc_filename:
                    file_path = os.path.join(DEST, filename)
                    with open(file_path, "r", encoding="utf-8") as f:
                        soup = BeautifulSoup(f, "html.parser")
                    if toc_soup:
                        soup = clone_toc_in_file_html(soup, toc_soup)
                    with open(file_path, "w", encoding="utf-8") as f:
                        f.write(str(soup))

print("Post-processing completed successfully.")
if TIMER_ENABLED:
    print("Timing report:")
+8 −3
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ FILE_ORDER=""
DIFF=false
GIT=false
GIT_BRANCH=""
NO_LAZY_TOC=false
TIMER_ENABLED=false

while [[ "$#" -gt 0 ]]; do
@@ -47,6 +48,7 @@ while [[ "$#" -gt 0 ]]; do
        --file-order) FILE_ORDER="$2"; shift ;;
        --arch) shift ;; # Already processed
        --diff) DIFF=true ;;
        --no-lazy-toc) NO_LAZY_TOC=true ;;
        --diff-git)
            GIT=true
            if [[ -n "$2" && "$2" != --* ]]; then
@@ -69,11 +71,13 @@ while [[ "$#" -gt 0 ]]; do
            echo "Optional parameters:"
            echo "  --src <path>          Path to source files (overrides --folder for source volume)"
            echo "  --file-order <file>   File specifying the order of conversion"
            echo "  --diff                 Enable diff conversion using folder-base"
            echo "  --diff-git <branch>    Enable git-based diff conversion. If specified, uses the input branch"
            echo "  --arch <arch>         Target architecture (amd64 or arm64). Default is amd64."
            echo "  --rebuild              Rebuild the Docker image before running"
            echo "  --time                 Enable timing of the conversion process"
            echo "  --time                 (for debug) Enable timing of the conversion process"
            echo "Optional parameters from md to html conversion (only applicable if --from md --to html):"
            echo "  --no-lazy-toc         Disable lazy loading of the Table of Contents (TOC)"
            echo "  --diff-git <branch>    Enable git-based diff conversion. If specified, uses the input branch"
            echo "  --diff                 Enable diff conversion using folder-base"
            exit 0
            ;;
        *) echo "Unknown parameter passed: $1"; exit 1 ;;
@@ -144,6 +148,7 @@ fi
[[ -n "$FILE_ORDER" ]] && RUN_ARGS+=("--file-order" "$FILE_ORDER")
[[ "$DIFF" == true ]] && RUN_ARGS+=("--diff")
[[ "$GIT" == true ]] && RUN_ARGS+=("--diff-git" "$GIT_BRANCH")
[[ "$NO_LAZY_TOC" == true ]] && RUN_ARGS+=("--no-lazy-toc")
[[ "$TIMER_ENABLED" == true ]] && RUN_ARGS+=("--time")

"${RUN_ARGS[@]}"
+29 −0
Original line number Diff line number Diff line
@@ -53,6 +53,35 @@ body {
  text-overflow: ellipsis;
}

#TOC a.diff-changes-enable {
  position: relative;
  overflow: visible;
  display: block;
  width: 100%;
}

nav>ul>li>a.diff-changes-enable::after {
  content: " ●";
  color: green;
}

nav>ul>li>a.diff-changes-enable:hover::before {
  content: "Total Changes: " attr(data-diff-count);
  position: absolute;
  left: 2%;
  bottom: 50%;
  margin-bottom: 6px;
  padding: 4px 8px;
  background-color: #333;
  color: #fff;
  font-size: 12px;
  line-height: 1.5;
  border-radius: 4px;
  white-space: nowrap;
  z-index: 1000;
  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2);
}

#TOC h1 {
  background-color: #02488d;
  border-bottom-style: none;
Loading