SneakyScope/app/utils/io_helpers.py

import json
from pathlib import Path
from datetime import datetime

from app.logging_setup import get_app_logger

logger = get_app_logger()

def safe_write(path: Path | str, content: str, mode="w", encoding="utf-8"):
    """Write content to a file safely with logging."""
    path = Path(path)
    try:
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, mode, encoding=encoding) as f:
            f.write(content)
        logger.info(f"[+] Wrote file: {path}")
    except Exception as e:
        logger.error(f"[!] Failed writing {path}: {e}")
        raise

def get_recent_results(storage_dir: Path, limit: int, logger) -> list[dict]:
    """
    Scan the SANDBOX_STORAGE directory for run folders (UUIDs), read each
    run's results.json, and return the most recent N entries by file mtime.

    Args:
        storage_dir (Path): Base path where UUID run directories live.
        limit (int): Maximum number of recent items to return.
        logger: Flask or stdlib logger to record non-fatal issues.

    Returns:
        list[dict]: Each item includes:
            {
              "uuid": str,
              "submitted_url": str | None,
              "final_url": str | None,
              "timestamp": str (ISO 8601),
            }
        Returns an empty list if no runs are found or on error.
    """
    items = []

    try:
        # Ensure the storage dir exists
        storage_dir.mkdir(parents=True, exist_ok=True)

        # Iterate directories directly under storage_dir
        for entry in storage_dir.iterdir():
            try:
                if not entry.is_dir():
                    # Skip non-directories
                    continue

                # Expect results.json inside each UUID directory
                results_path = entry / "results.json"
                if not results_path.exists():
                    # Skip folders without results.json
                    continue

                # Read file metadata (mtime) for sorting and display
                stat_info = results_path.stat()
                mtime_epoch = stat_info.st_mtime
                mtime_iso = datetime.fromtimestamp(mtime_epoch).isoformat(timespec="seconds")

                # Parse a small subset of the JSON for display
                submitted_url = None
                final_url = None
                run_uuid = entry.name

                try:
                    with open(results_path, "r", encoding="utf-8") as f:
                        data = json.load(f)

                    if isinstance(data, dict):
                        submitted_url = data.get("submitted_url")
                        final_url = data.get("final_url")
                except Exception as read_err:
                    # If JSON is malformed or unreadable, log and continue
                    if logger:
                        logger.warning(f"[recent] Failed reading {results_path}: {read_err}")

                item = {
                    "uuid": run_uuid,
                    "submitted_url": submitted_url,
                    "final_url": final_url,
                    "timestamp": mtime_iso
                }

                items.append((mtime_epoch, item))
            except Exception as inner_err:
                # Keep going; a single bad folder should not break the list
                if logger:
                    logger.warning(f"[recent] Skipping {entry}: {inner_err}")

        # Sort by mtime desc
        try:
            items.sort(key=lambda t: t[0], reverse=True)
        except Exception as sort_err:
            if logger:
                logger.warning(f"[recent] Sort failed: {sort_err}")

        # Trim to limit without list comprehensions
        trimmed = []
        count = 0
        for tup in items:
            if count >= limit:
                break
            trimmed.append(tup[1])
            count = count + 1

        return trimmed

    except Exception as outer_err:
        if logger:
            logger.error(f"[recent] Unexpected error while scanning {storage_dir}: {outer_err}")
        return []