"""Security-critical file operation helpers with path sandboxing.""" from pathlib import Path class PathSecurityError(Exception): """Raised when a file path escapes the allowed workspace root.""" class FileSizeError(Exception): """Raised when a file exceeds the configured size limit.""" class BinaryFileError(Exception): """Raised when an operation is attempted on a binary file.""" def resolve_safe_path(path: str | Path, workspace_root: Path) -> Path: """Resolve a path and verify it is within the workspace root. Args: path: The path to resolve (absolute or relative to workspace_root). workspace_root: The allowed root directory (must be absolute). Returns: The resolved absolute path. Raises: PathSecurityError: If the resolved path is outside workspace_root. """ workspace_root = workspace_root.resolve() resolved = (workspace_root / path).resolve() if not resolved.is_relative_to(workspace_root): raise PathSecurityError( f"Path '{path}' resolves to '{resolved}' which is outside " f"workspace root '{workspace_root}'" ) return resolved def is_binary_file(file_path: Path, sample_size: int = 8192) -> bool: """Detect if a file is binary by checking for null bytes in a sample. Args: file_path: Path to the file to check. sample_size: Number of bytes to read for detection. Returns: True if the file appears to be binary. """ try: with open(file_path, "rb") as f: sample = f.read(sample_size) return b"\x00" in sample except OSError: return False def check_file_size(file_path: Path, max_size_bytes: int) -> None: """Verify that a file does not exceed the size limit. Args: file_path: Path to the file to check. max_size_bytes: Maximum allowed file size in bytes. Raises: FileSizeError: If the file exceeds the size limit. FileNotFoundError: If the file does not exist. """ size = file_path.stat().st_size if size > max_size_bytes: raise FileSizeError( f"File '{file_path}' is {size:,} bytes, exceeding the " f"{max_size_bytes:,} byte limit" ) def safe_read_file( path: str | Path, workspace_root: Path, max_size_bytes: int = 1_048_576, check_binary: bool = True, ) -> str: """Safely read a file with path sandboxing, size, and binary checks. Args: path: Path to the file (relative to workspace_root or absolute). workspace_root: The allowed root directory. max_size_bytes: Maximum file size to read. check_binary: Whether to reject binary files. Returns: The file contents as a string. Raises: PathSecurityError: If the path escapes the workspace. FileSizeError: If the file is too large. BinaryFileError: If the file is binary and check_binary is True. FileNotFoundError: If the file does not exist. """ safe_path = resolve_safe_path(path, workspace_root) if not safe_path.exists(): raise FileNotFoundError(f"File not found: {safe_path}") check_file_size(safe_path, max_size_bytes) if check_binary and is_binary_file(safe_path): raise BinaryFileError(f"File appears to be binary: {safe_path}") return safe_path.read_text(encoding="utf-8") def safe_write_file( path: str | Path, content: str, workspace_root: Path, max_size_bytes: int = 1_048_576, ) -> Path: """Safely write a file with path sandboxing and size checks. Args: path: Path to write to (relative to workspace_root or absolute). content: String content to write. workspace_root: The allowed root directory. max_size_bytes: Maximum allowed content size in bytes. Returns: The resolved path that was written to. Raises: PathSecurityError: If the path escapes the workspace. FileSizeError: If the content exceeds the size limit. """ safe_path = resolve_safe_path(path, workspace_root) content_size = len(content.encode("utf-8")) if content_size > max_size_bytes: raise FileSizeError( f"Content is {content_size:,} bytes, exceeding the " f"{max_size_bytes:,} byte limit" ) # Ensure parent directory exists safe_path.parent.mkdir(parents=True, exist_ok=True) safe_path.write_text(content, encoding="utf-8") return safe_path