Loading generateBaseline/postprocessing.py +19 −13 Original line number Original line Diff line number Diff line Loading @@ -141,29 +141,33 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent name: {input_path.parent.name}") print(f"📁 Input file parent name: {input_path.parent.name}") # Determine mount point and file path in container # Determine mount point (working folder) and file path in container # Mount /data from host to /data in container # If file is in baseline/, mount the parent directory (working folder) mount_point = Path("/data") # Otherwise mount the file's parent directory # Calculate relative path from mount point to file if input_path.parent.name == "baseline": # If file is at /data/baseline/file.docx, relative path is baseline/file.docx mount_point_host = input_path.parent.parent # Working folder containing baseline # If file is at /data/file.docx, relative path is file.docx else: file_path_in_container = f"/data/{input_path.parent.name}/{input_path.name}" mount_point_host = input_path.parent print(f"📂 Mount point: {mount_point}") mount_point_container = "/data" file_path_in_container = f"{mount_point_container}/{input_path.relative_to(mount_point_host)}" print(f"📂 Mount point (host): {mount_point_host}") print(f"📂 Mount point (container): {mount_point_container}") print(f"📂 File path in container: {file_path_in_container}") print(f"📂 File path in container: {file_path_in_container}") # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container # Then run post-processing command in the same container # Original file is in baseline/, but LibreOffice creates output in /data with just the filename # Original file is in baseline/, but LibreOffice creates output in mount_point with just the filename original_file = file_path_in_container # e.g., /data/baseline/file.docx original_file = file_path_in_container # e.g., /data/baseline/file.docx created_file = f"/data/{input_path.name}" # e.g., /data/file.docx created_file = f"{mount_point_container}/{input_path.name}" # e.g., /data/file.docx print(f"📄 Original file (in container): {original_file}") print(f"📄 Original file (in container): {original_file}") print(f"📄 Created file (in container): {created_file}") print(f"📄 Created file (in container): {created_file}") # First, save the original file's permissions, then convert, then apply to created file # First, save the original file's permissions, then convert, then apply to created file save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {original_file}" soffice_cmd = f"soffice --headless --convert-to docx --outdir {mount_point_container} {original_file}" # Apply original file permissions to the created/converted file # Apply original file permissions to the created/converted file post_cmd = f'chown $ORIG_PERMS {created_file}' post_cmd = f'chown $ORIG_PERMS {created_file}' combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" Loading @@ -175,8 +179,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s cmd = [ cmd = [ "docker", "run", "--rm", "docker", "run", "--rm", "-v", f"/data:/data", "-v", f"{mount_point_host}:{mount_point_container}", "-e", "HOME=/data", "-e", f"HOME={mount_point_container}", "--entrypoint", "/bin/bash", "--entrypoint", "/bin/bash", image, image, "-c", combined_cmd, "-c", combined_cmd, Loading @@ -184,6 +188,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s print(f"🐳 Docker command: {' '.join(cmd)}") print(f"🐳 Docker command: {' '.join(cmd)}") subprocess.run("ls -al && pwd && whoami", check=True) subprocess.run(cmd, check=True) subprocess.run(cmd, check=True) return str(input_path) return str(input_path) Loading Loading
generateBaseline/postprocessing.py +19 −13 Original line number Original line Diff line number Diff line Loading @@ -141,29 +141,33 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent name: {input_path.parent.name}") print(f"📁 Input file parent name: {input_path.parent.name}") # Determine mount point and file path in container # Determine mount point (working folder) and file path in container # Mount /data from host to /data in container # If file is in baseline/, mount the parent directory (working folder) mount_point = Path("/data") # Otherwise mount the file's parent directory # Calculate relative path from mount point to file if input_path.parent.name == "baseline": # If file is at /data/baseline/file.docx, relative path is baseline/file.docx mount_point_host = input_path.parent.parent # Working folder containing baseline # If file is at /data/file.docx, relative path is file.docx else: file_path_in_container = f"/data/{input_path.parent.name}/{input_path.name}" mount_point_host = input_path.parent print(f"📂 Mount point: {mount_point}") mount_point_container = "/data" file_path_in_container = f"{mount_point_container}/{input_path.relative_to(mount_point_host)}" print(f"📂 Mount point (host): {mount_point_host}") print(f"📂 Mount point (container): {mount_point_container}") print(f"📂 File path in container: {file_path_in_container}") print(f"📂 File path in container: {file_path_in_container}") # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container # Then run post-processing command in the same container # Original file is in baseline/, but LibreOffice creates output in /data with just the filename # Original file is in baseline/, but LibreOffice creates output in mount_point with just the filename original_file = file_path_in_container # e.g., /data/baseline/file.docx original_file = file_path_in_container # e.g., /data/baseline/file.docx created_file = f"/data/{input_path.name}" # e.g., /data/file.docx created_file = f"{mount_point_container}/{input_path.name}" # e.g., /data/file.docx print(f"📄 Original file (in container): {original_file}") print(f"📄 Original file (in container): {original_file}") print(f"📄 Created file (in container): {created_file}") print(f"📄 Created file (in container): {created_file}") # First, save the original file's permissions, then convert, then apply to created file # First, save the original file's permissions, then convert, then apply to created file save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {original_file}" soffice_cmd = f"soffice --headless --convert-to docx --outdir {mount_point_container} {original_file}" # Apply original file permissions to the created/converted file # Apply original file permissions to the created/converted file post_cmd = f'chown $ORIG_PERMS {created_file}' post_cmd = f'chown $ORIG_PERMS {created_file}' combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" Loading @@ -175,8 +179,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s cmd = [ cmd = [ "docker", "run", "--rm", "docker", "run", "--rm", "-v", f"/data:/data", "-v", f"{mount_point_host}:{mount_point_container}", "-e", "HOME=/data", "-e", f"HOME={mount_point_container}", "--entrypoint", "/bin/bash", "--entrypoint", "/bin/bash", image, image, "-c", combined_cmd, "-c", combined_cmd, Loading @@ -184,6 +188,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s print(f"🐳 Docker command: {' '.join(cmd)}") print(f"🐳 Docker command: {' '.join(cmd)}") subprocess.run("ls -al && pwd && whoami", check=True) subprocess.run(cmd, check=True) subprocess.run(cmd, check=True) return str(input_path) return str(input_path) Loading