Loading generateBaseline/postprocessing.py +43 −10 Original line number Diff line number Diff line Loading @@ -135,19 +135,50 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") # Mount the directory containing the file, preserving full path structure # If file is at /path/to/baseline/file.docx, mount the parent and use /data/baseline/file.docx # Mount the actual working folder (directory containing baseline, not baseline itself) # If file is at /working/folder/baseline/file.docx, mount /working/folder to /data # So file path in container is /data/baseline/file.docx # Log input information print(f"📄 Input file path: {input_path}") print(f"📄 Input file name: {input_path.name}") print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent name: {input_path.parent.name}") # Determine mount point and file path in container # Mount /data from host to /data in container mount_point = Path("/data") # Calculate relative path from mount point to file # If file is at /data/baseline/file.docx, relative path is baseline/file.docx # If file is at /data/file.docx, relative path is file.docx try: relative_path = input_path.relative_to(mount_point) file_path_in_container = f"/data/{relative_path}" except ValueError: # If file is not under /data, use just the filename file_path_in_container = f"/data/{input_path.name}" relative_path = input_path.name print(f"📂 Mount point: {mount_point}") print(f"📂 Relative path: {relative_path}") print(f"📂 File path in container: {file_path_in_container}") # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container soffice_cmd = f"soffice --headless --convert-to docx --outdir /data /data/baseline/{input_path.name}" # TODO: Add post-processing command here post_cmd = f'chown $(stat -c "%u:%g" /data/baseline/{input_path.name}) /data{input_path.name}' # combined_cmd = f"{soffice_cmd} && {post_cmd}" combined_cmd = f"{soffice_cmd} && {post_cmd}" # Original file is in baseline/, but LibreOffice creates output in /data with just the filename original_file = file_path_in_container # e.g., /data/baseline/file.docx created_file = f"/data/{input_path.name}" # e.g., /data/file.docx print(f"📄 Original file (in container): {original_file}") print(f"📄 Created file (in container): {created_file}") # First, save the original file's permissions, then convert, then apply to created file save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {original_file}" # Apply original file permissions to the created/converted file post_cmd = f'chown $ORIG_PERMS {created_file}' combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" print(f"🔧 save_perms_cmd: {save_perms_cmd}") print(f"🔧 soffice_cmd: {soffice_cmd}") print(f"🔧 post_cmd: {post_cmd}") print(f"🔧 combined_cmd: {combined_cmd}") cmd = [ "docker", "run", "--rm", Loading @@ -158,6 +189,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s "-c", combined_cmd, ] print(f"🐳 Docker command: {' '.join(cmd)}") subprocess.run(cmd, check=True) return str(input_path) Loading Loading
generateBaseline/postprocessing.py +43 −10 Original line number Diff line number Diff line Loading @@ -135,19 +135,50 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") # Mount the directory containing the file, preserving full path structure # If file is at /path/to/baseline/file.docx, mount the parent and use /data/baseline/file.docx # Mount the actual working folder (directory containing baseline, not baseline itself) # If file is at /working/folder/baseline/file.docx, mount /working/folder to /data # So file path in container is /data/baseline/file.docx # Log input information print(f"📄 Input file path: {input_path}") print(f"📄 Input file name: {input_path.name}") print(f"📁 Input file parent: {input_path.parent}") print(f"📁 Input file parent name: {input_path.parent.name}") # Determine mount point and file path in container # Mount /data from host to /data in container mount_point = Path("/data") # Calculate relative path from mount point to file # If file is at /data/baseline/file.docx, relative path is baseline/file.docx # If file is at /data/file.docx, relative path is file.docx try: relative_path = input_path.relative_to(mount_point) file_path_in_container = f"/data/{relative_path}" except ValueError: # If file is not under /data, use just the filename file_path_in_container = f"/data/{input_path.name}" relative_path = input_path.name print(f"📂 Mount point: {mount_point}") print(f"📂 Relative path: {relative_path}") print(f"📂 File path in container: {file_path_in_container}") # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container soffice_cmd = f"soffice --headless --convert-to docx --outdir /data /data/baseline/{input_path.name}" # TODO: Add post-processing command here post_cmd = f'chown $(stat -c "%u:%g" /data/baseline/{input_path.name}) /data{input_path.name}' # combined_cmd = f"{soffice_cmd} && {post_cmd}" combined_cmd = f"{soffice_cmd} && {post_cmd}" # Original file is in baseline/, but LibreOffice creates output in /data with just the filename original_file = file_path_in_container # e.g., /data/baseline/file.docx created_file = f"/data/{input_path.name}" # e.g., /data/file.docx print(f"📄 Original file (in container): {original_file}") print(f"📄 Created file (in container): {created_file}") # First, save the original file's permissions, then convert, then apply to created file save_perms_cmd = f'ORIG_PERMS=$(stat -c "%u:%g" {original_file})' soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {original_file}" # Apply original file permissions to the created/converted file post_cmd = f'chown $ORIG_PERMS {created_file}' combined_cmd = f"{save_perms_cmd} && {soffice_cmd} && {post_cmd}" print(f"🔧 save_perms_cmd: {save_perms_cmd}") print(f"🔧 soffice_cmd: {soffice_cmd}") print(f"🔧 post_cmd: {post_cmd}") print(f"🔧 combined_cmd: {combined_cmd}") cmd = [ "docker", "run", "--rm", Loading @@ -158,6 +189,8 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s "-c", combined_cmd, ] print(f"🐳 Docker command: {' '.join(cmd)}") subprocess.run(cmd, check=True) return str(input_path) Loading