Files
SneakyCode/app/utils/file_helpers.py
2026-03-11 07:21:21 -05:00

151 lines
4.4 KiB
Python

"""Security-critical file operation helpers with path sandboxing."""
from pathlib import Path
class PathSecurityError(Exception):
"""Raised when a file path escapes the allowed workspace root."""
class FileSizeError(Exception):
"""Raised when a file exceeds the configured size limit."""
class BinaryFileError(Exception):
"""Raised when an operation is attempted on a binary file."""
def resolve_safe_path(path: str | Path, workspace_root: Path) -> Path:
"""Resolve a path and verify it is within the workspace root.
Args:
path: The path to resolve (absolute or relative to workspace_root).
workspace_root: The allowed root directory (must be absolute).
Returns:
The resolved absolute path.
Raises:
PathSecurityError: If the resolved path is outside workspace_root.
"""
workspace_root = workspace_root.resolve()
resolved = (workspace_root / path).resolve()
if not resolved.is_relative_to(workspace_root):
raise PathSecurityError(
f"Path '{path}' resolves to '{resolved}' which is outside "
f"workspace root '{workspace_root}'"
)
return resolved
def is_binary_file(file_path: Path, sample_size: int = 8192) -> bool:
"""Detect if a file is binary by checking for null bytes in a sample.
Args:
file_path: Path to the file to check.
sample_size: Number of bytes to read for detection.
Returns:
True if the file appears to be binary.
"""
try:
with open(file_path, "rb") as f:
sample = f.read(sample_size)
return b"\x00" in sample
except OSError:
return False
def check_file_size(file_path: Path, max_size_bytes: int) -> None:
"""Verify that a file does not exceed the size limit.
Args:
file_path: Path to the file to check.
max_size_bytes: Maximum allowed file size in bytes.
Raises:
FileSizeError: If the file exceeds the size limit.
FileNotFoundError: If the file does not exist.
"""
size = file_path.stat().st_size
if size > max_size_bytes:
raise FileSizeError(
f"File '{file_path}' is {size:,} bytes, exceeding the "
f"{max_size_bytes:,} byte limit"
)
def safe_read_file(
path: str | Path,
workspace_root: Path,
max_size_bytes: int = 1_048_576,
check_binary: bool = True,
) -> str:
"""Safely read a file with path sandboxing, size, and binary checks.
Args:
path: Path to the file (relative to workspace_root or absolute).
workspace_root: The allowed root directory.
max_size_bytes: Maximum file size to read.
check_binary: Whether to reject binary files.
Returns:
The file contents as a string.
Raises:
PathSecurityError: If the path escapes the workspace.
FileSizeError: If the file is too large.
BinaryFileError: If the file is binary and check_binary is True.
FileNotFoundError: If the file does not exist.
"""
safe_path = resolve_safe_path(path, workspace_root)
if not safe_path.exists():
raise FileNotFoundError(f"File not found: {safe_path}")
check_file_size(safe_path, max_size_bytes)
if check_binary and is_binary_file(safe_path):
raise BinaryFileError(f"File appears to be binary: {safe_path}")
return safe_path.read_text(encoding="utf-8")
def safe_write_file(
path: str | Path,
content: str,
workspace_root: Path,
max_size_bytes: int = 1_048_576,
) -> Path:
"""Safely write a file with path sandboxing and size checks.
Args:
path: Path to write to (relative to workspace_root or absolute).
content: String content to write.
workspace_root: The allowed root directory.
max_size_bytes: Maximum allowed content size in bytes.
Returns:
The resolved path that was written to.
Raises:
PathSecurityError: If the path escapes the workspace.
FileSizeError: If the content exceeds the size limit.
"""
safe_path = resolve_safe_path(path, workspace_root)
content_size = len(content.encode("utf-8"))
if content_size > max_size_bytes:
raise FileSizeError(
f"Content is {content_size:,} bytes, exceeding the "
f"{max_size_bytes:,} byte limit"
)
# Ensure parent directory exists
safe_path.parent.mkdir(parents=True, exist_ok=True)
safe_path.write_text(content, encoding="utf-8")
return safe_path