Loading generateBaseline/postprocessing.py +21 −90 Original line number Diff line number Diff line Loading @@ -135,100 +135,31 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") print(f"📄 Input file: {input_path}") print(f"📁 Input parent directory: {input_path.parent}") # Create a temporary home directory for LibreOffice to prevent creating files in /root lo_home = Path(tempfile.mkdtemp(prefix="lo_home_")) # Ensure the directory has proper permissions (readable, writable, executable by owner) lo_home.chmod(0o755) # Create necessary subdirectories that LibreOffice needs lo_cache = lo_home / ".cache" lo_cache.mkdir(mode=0o755, exist_ok=True) dconf_dir = lo_cache / "dconf" dconf_dir.mkdir(mode=0o755, exist_ok=True) # Create .config directory and LibreOffice profile directory lo_config = lo_home / ".config" lo_config.mkdir(mode=0o755, exist_ok=True) lo_profile = lo_config / "libreoffice" lo_profile.mkdir(mode=0o755, exist_ok=True) print(f"🏠 LibreOffice home directory: {lo_home}") print(f"📋 Contents of lo_home (before):") try: items = list(lo_home.iterdir()) if items: for item in sorted(items): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") else: print(" (empty)") except Exception as e: print(f" Error listing directory: {e}") try: # Calculate the file path relative to parent for use in container file_path_in_container = f"/data/{input_path.relative_to(input_path.parent)}" print(f"📂 File path in container: {file_path_in_container}") # Mount the directory containing the file, preserving full path structure # If file is at /path/to/baseline/file.docx, mount the parent and use /data/baseline/file.docx # Mount the actual working folder (directory containing baseline, not baseline itself) # If file is at /working/folder/baseline/file.docx, mount /working/folder to /data # So file path in container is /data/baseline/file.docx # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {input_path}" # TODO: Add post-processing command here post_cmd = f'chown $(stat -c "%u:%g" {input_path}) {input_path.name}' # combined_cmd = f"{soffice_cmd} && {post_cmd}" combined_cmd = f"{soffice_cmd} && {post_cmd}" # Run LibreOffice container on this specific file cmd = [ "docker", "run", "--rm", "-v", f"{input_path.parent}:/data", "-v", f"{lo_home}:/tmp/lo_home", # Mount temp home directory "-u", f"{os.getuid()}:{os.getgid()}", # <--- run as host user "-e", "HOME=/tmp/lo_home", # Tell LibreOffice to use temp directory for its profile "-e", "DISPLAY=", # Disable X11 display "-e", "DCONF_PROFILE=", # Disable dconf to avoid permission issues "-e", "SAL_USE_VCLPLUGIN=gen", # Use generic VCL plugin (headless) "-e", "LIBREOFFICE_PROFILE=/tmp/lo_home/.config/libreoffice", # Explicit profile path "-e", "NO_AT_BRIDGE=1", # Disable accessibility bridge "-v", f"/data:/data", "-e", "HOME=/data", "--entrypoint", "/bin/bash", image, "--headless", "--nodefault", "--norestore", # Don't restore previous session "--nolockcheck", # Don't check for file locks "--invisible", "--nofirststartwizard", # Don't show first start wizard "--convert-to", "docx", "--infilter=writer8", "--outdir", "/data", file_path_in_container, "-c", combined_cmd, ] print(f"🚀 Running command:\n{' '.join(cmd)}\n") subprocess.run(cmd, check=True) print(f"📋 Contents of lo_home (after):") try: items = list(lo_home.iterdir()) if items: for item in sorted(items): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") if item.is_dir(): try: for subitem in sorted(item.iterdir()): print(f" - {subitem.name} ({'dir' if subitem.is_dir() else 'file'})") except Exception: pass else: print(" (empty)") except Exception as e: print(f" Error listing directory: {e}") print(f"📋 Files in input parent directory (after):") try: for item in sorted(input_path.parent.iterdir()): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") except Exception as e: print(f" Error listing directory: {e}") finally: # Clean up the temporary home directory print(f"🧹 Cleaning up lo_home: {lo_home}") shutil.rmtree(lo_home, ignore_errors=True) print(f"✅ Refreshed DOCX updated in place: {input_path}") return str(input_path) Loading Loading
generateBaseline/postprocessing.py +21 −90 Original line number Diff line number Diff line Loading @@ -135,100 +135,31 @@ def refresh_docx_fields(input_path: str, image: str = "docx-field-refresh") -> s if not input_path.exists() or input_path.suffix.lower() != ".docx": raise FileNotFoundError(f"Invalid DOCX path: {input_path}") print(f"📄 Input file: {input_path}") print(f"📁 Input parent directory: {input_path.parent}") # Create a temporary home directory for LibreOffice to prevent creating files in /root lo_home = Path(tempfile.mkdtemp(prefix="lo_home_")) # Ensure the directory has proper permissions (readable, writable, executable by owner) lo_home.chmod(0o755) # Create necessary subdirectories that LibreOffice needs lo_cache = lo_home / ".cache" lo_cache.mkdir(mode=0o755, exist_ok=True) dconf_dir = lo_cache / "dconf" dconf_dir.mkdir(mode=0o755, exist_ok=True) # Create .config directory and LibreOffice profile directory lo_config = lo_home / ".config" lo_config.mkdir(mode=0o755, exist_ok=True) lo_profile = lo_config / "libreoffice" lo_profile.mkdir(mode=0o755, exist_ok=True) print(f"🏠 LibreOffice home directory: {lo_home}") print(f"📋 Contents of lo_home (before):") try: items = list(lo_home.iterdir()) if items: for item in sorted(items): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") else: print(" (empty)") except Exception as e: print(f" Error listing directory: {e}") try: # Calculate the file path relative to parent for use in container file_path_in_container = f"/data/{input_path.relative_to(input_path.parent)}" print(f"📂 File path in container: {file_path_in_container}") # Mount the directory containing the file, preserving full path structure # If file is at /path/to/baseline/file.docx, mount the parent and use /data/baseline/file.docx # Mount the actual working folder (directory containing baseline, not baseline itself) # If file is at /working/folder/baseline/file.docx, mount /working/folder to /data # So file path in container is /data/baseline/file.docx # Run LibreOffice to refresh fields (convert docx to docx refreshes fields) # Then run post-processing command in the same container soffice_cmd = f"soffice --headless --convert-to docx --outdir /data {input_path}" # TODO: Add post-processing command here post_cmd = f'chown $(stat -c "%u:%g" {input_path}) {input_path.name}' # combined_cmd = f"{soffice_cmd} && {post_cmd}" combined_cmd = f"{soffice_cmd} && {post_cmd}" # Run LibreOffice container on this specific file cmd = [ "docker", "run", "--rm", "-v", f"{input_path.parent}:/data", "-v", f"{lo_home}:/tmp/lo_home", # Mount temp home directory "-u", f"{os.getuid()}:{os.getgid()}", # <--- run as host user "-e", "HOME=/tmp/lo_home", # Tell LibreOffice to use temp directory for its profile "-e", "DISPLAY=", # Disable X11 display "-e", "DCONF_PROFILE=", # Disable dconf to avoid permission issues "-e", "SAL_USE_VCLPLUGIN=gen", # Use generic VCL plugin (headless) "-e", "LIBREOFFICE_PROFILE=/tmp/lo_home/.config/libreoffice", # Explicit profile path "-e", "NO_AT_BRIDGE=1", # Disable accessibility bridge "-v", f"/data:/data", "-e", "HOME=/data", "--entrypoint", "/bin/bash", image, "--headless", "--nodefault", "--norestore", # Don't restore previous session "--nolockcheck", # Don't check for file locks "--invisible", "--nofirststartwizard", # Don't show first start wizard "--convert-to", "docx", "--infilter=writer8", "--outdir", "/data", file_path_in_container, "-c", combined_cmd, ] print(f"🚀 Running command:\n{' '.join(cmd)}\n") subprocess.run(cmd, check=True) print(f"📋 Contents of lo_home (after):") try: items = list(lo_home.iterdir()) if items: for item in sorted(items): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") if item.is_dir(): try: for subitem in sorted(item.iterdir()): print(f" - {subitem.name} ({'dir' if subitem.is_dir() else 'file'})") except Exception: pass else: print(" (empty)") except Exception as e: print(f" Error listing directory: {e}") print(f"📋 Files in input parent directory (after):") try: for item in sorted(input_path.parent.iterdir()): print(f" - {item.name} ({'dir' if item.is_dir() else 'file'})") except Exception as e: print(f" Error listing directory: {e}") finally: # Clean up the temporary home directory print(f"🧹 Cleaning up lo_home: {lo_home}") shutil.rmtree(lo_home, ignore_errors=True) print(f"✅ Refreshed DOCX updated in place: {input_path}") return str(input_path) Loading