151 lines
4.4 KiB
Python
151 lines
4.4 KiB
Python
"""Security-critical file operation helpers with path sandboxing."""
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
class PathSecurityError(Exception):
|
|
"""Raised when a file path escapes the allowed workspace root."""
|
|
|
|
|
|
class FileSizeError(Exception):
|
|
"""Raised when a file exceeds the configured size limit."""
|
|
|
|
|
|
class BinaryFileError(Exception):
|
|
"""Raised when an operation is attempted on a binary file."""
|
|
|
|
|
|
def resolve_safe_path(path: str | Path, workspace_root: Path) -> Path:
|
|
"""Resolve a path and verify it is within the workspace root.
|
|
|
|
Args:
|
|
path: The path to resolve (absolute or relative to workspace_root).
|
|
workspace_root: The allowed root directory (must be absolute).
|
|
|
|
Returns:
|
|
The resolved absolute path.
|
|
|
|
Raises:
|
|
PathSecurityError: If the resolved path is outside workspace_root.
|
|
"""
|
|
workspace_root = workspace_root.resolve()
|
|
resolved = (workspace_root / path).resolve()
|
|
|
|
if not resolved.is_relative_to(workspace_root):
|
|
raise PathSecurityError(
|
|
f"Path '{path}' resolves to '{resolved}' which is outside "
|
|
f"workspace root '{workspace_root}'"
|
|
)
|
|
|
|
return resolved
|
|
|
|
|
|
def is_binary_file(file_path: Path, sample_size: int = 8192) -> bool:
|
|
"""Detect if a file is binary by checking for null bytes in a sample.
|
|
|
|
Args:
|
|
file_path: Path to the file to check.
|
|
sample_size: Number of bytes to read for detection.
|
|
|
|
Returns:
|
|
True if the file appears to be binary.
|
|
"""
|
|
try:
|
|
with open(file_path, "rb") as f:
|
|
sample = f.read(sample_size)
|
|
return b"\x00" in sample
|
|
except OSError:
|
|
return False
|
|
|
|
|
|
def check_file_size(file_path: Path, max_size_bytes: int) -> None:
|
|
"""Verify that a file does not exceed the size limit.
|
|
|
|
Args:
|
|
file_path: Path to the file to check.
|
|
max_size_bytes: Maximum allowed file size in bytes.
|
|
|
|
Raises:
|
|
FileSizeError: If the file exceeds the size limit.
|
|
FileNotFoundError: If the file does not exist.
|
|
"""
|
|
size = file_path.stat().st_size
|
|
if size > max_size_bytes:
|
|
raise FileSizeError(
|
|
f"File '{file_path}' is {size:,} bytes, exceeding the "
|
|
f"{max_size_bytes:,} byte limit"
|
|
)
|
|
|
|
|
|
def safe_read_file(
|
|
path: str | Path,
|
|
workspace_root: Path,
|
|
max_size_bytes: int = 1_048_576,
|
|
check_binary: bool = True,
|
|
) -> str:
|
|
"""Safely read a file with path sandboxing, size, and binary checks.
|
|
|
|
Args:
|
|
path: Path to the file (relative to workspace_root or absolute).
|
|
workspace_root: The allowed root directory.
|
|
max_size_bytes: Maximum file size to read.
|
|
check_binary: Whether to reject binary files.
|
|
|
|
Returns:
|
|
The file contents as a string.
|
|
|
|
Raises:
|
|
PathSecurityError: If the path escapes the workspace.
|
|
FileSizeError: If the file is too large.
|
|
BinaryFileError: If the file is binary and check_binary is True.
|
|
FileNotFoundError: If the file does not exist.
|
|
"""
|
|
safe_path = resolve_safe_path(path, workspace_root)
|
|
|
|
if not safe_path.exists():
|
|
raise FileNotFoundError(f"File not found: {safe_path}")
|
|
|
|
check_file_size(safe_path, max_size_bytes)
|
|
|
|
if check_binary and is_binary_file(safe_path):
|
|
raise BinaryFileError(f"File appears to be binary: {safe_path}")
|
|
|
|
return safe_path.read_text(encoding="utf-8")
|
|
|
|
|
|
def safe_write_file(
|
|
path: str | Path,
|
|
content: str,
|
|
workspace_root: Path,
|
|
max_size_bytes: int = 1_048_576,
|
|
) -> Path:
|
|
"""Safely write a file with path sandboxing and size checks.
|
|
|
|
Args:
|
|
path: Path to write to (relative to workspace_root or absolute).
|
|
content: String content to write.
|
|
workspace_root: The allowed root directory.
|
|
max_size_bytes: Maximum allowed content size in bytes.
|
|
|
|
Returns:
|
|
The resolved path that was written to.
|
|
|
|
Raises:
|
|
PathSecurityError: If the path escapes the workspace.
|
|
FileSizeError: If the content exceeds the size limit.
|
|
"""
|
|
safe_path = resolve_safe_path(path, workspace_root)
|
|
|
|
content_size = len(content.encode("utf-8"))
|
|
if content_size > max_size_bytes:
|
|
raise FileSizeError(
|
|
f"Content is {content_size:,} bytes, exceeding the "
|
|
f"{max_size_bytes:,} byte limit"
|
|
)
|
|
|
|
# Ensure parent directory exists
|
|
safe_path.parent.mkdir(parents=True, exist_ok=True)
|
|
safe_path.write_text(content, encoding="utf-8")
|
|
|
|
return safe_path
|