# roadmap.yaml
updated: "2025-08-22"

roadmap:
  - id: "p1-analysis-cloudflare"
    priority: 1
    title: "Cloudflare Detection"
    goal: "Detect Cloudflare usage and badge it, with explanation of dual-use (security vs. abuse)."
    tags: ["analysis"]
    milestone: null
    details:
      - "Detection signals: DNS (CNAME to Cloudflare, AS13335), HTTP headers (cf-ray, cf-cache-status), IP ranges, and challenge pages."
      - "UI: add badge + tooltip with a short explainer about legitimate protection vs. abuse evasion."
      - "Edge cases: 'grey-clouded' DNS entries, partial proxy (only some records), and CDN in front of non-HTTP services."
      - "Acceptance: correctly identifies Cloudflare on known test hosts and avoids false positives on non-CF CDNs."

  - id: "p1-analysis-total-score"
    priority: 1
    title: "Total Score"
    goal: "Implement a generalized site “Total Score” (0–10 scale) to give analysts a quick risk snapshot."
    tags: ["analysis"]
    milestone: null
    details:
      - "Inputs: TLS posture, suspicious scripts/forms (severity-weighted), domain/IP reputation, server headers/misconfigs."
      - "Method: weighted components with neutral defaults when data is unavailable; avoid over-penalizing partial signals."
      - "Explainability: always show a breakdown and contribution per component; include a 'Why?' link in the UI."
      - "Calibration: start with heuristic weights, then calibrate on a test set; store weights in settings.yaml."

  - id: "p2-ui-rules-lab"
    priority: 2
    title: "Rules Lab"
    goal: "Build a WYSIWYG Rules Lab (paste, validate, run against sample text)."
    tags: ["ui"]
    milestone: null
    details:
      - "Features: syntax-highlighted editor, rule validation, run against sample payloads, show matches/captures, timing."
      - "Samples: ship a small library of example texts and rules; allow users to save their own samples (local storage)."
      - "Safety: no external network calls; size/time limits to prevent runaway regex; clear error messages."
      - "UX: one-click copy of rule JSON; link to docs on rule schema."

  - id: "p2-ui-usage-page"
    priority: 2
    title: "Usage Page"
    goal: "Create a “Usage” page to explain app functionality."
    tags: ["ui","docs"]
    milestone: null
    details:
      - "Content: quickstart, supported analyses, cache vs. re-run behavior, artifact locations."
      - "Include: screenshots/GIFs, API curl examples, link to OpenAPI docs."
      - "Notes: clarify privacy, what we store, and retention defaults."

  - id: "move-changelog-into-app"
    priority: 2
    title: "Move Changelog into App"
    goal: "Moves Changelog into App"
    tags: ["ui","docs"]
    milestone: null
    details:
      - "Notes:Makes it much easier for users to see what's happening"
      - "Content: changelog.md already in docs."

  - id: "p2-ui-about-page"
    priority: 2
    title: "About Page"
    goal: "Create an “About” page with project context."
    tags: ["ui","docs"]
    milestone: null
    details:
      - "Content: project purpose, high-level architecture diagram, technology stack."
      - "Meta: version, commit hash, build date; link to repo and roadmap."
      - "Governance: disclaimer about intended use and limitations."

  - id: "p3-api-core-endpoints"
    priority: 3
    title: "Core Endpoints"
    goal: "Add `/screenshot`, `/source`, and `/analyse` endpoints."
    tags: ["api"]
    milestone: null
    details:
      - "Define request/response schemas; include run_id in responses to tie artifacts together."
      - "Auth: simple token header; rate-limiting per token."
      - "Errors: standardized JSON error body; consistent HTTP codes."
      - "Docs: provide curl examples; note synchronous vs. long-running behavior."

  - id: "p3-api-analyze-script"
    priority: 3
    title: "Analyze Script Endpoint"
    goal: "Add POST /api/analyze_script in OpenAPI and serve /api/openapi.yaml."
    tags: ["api"]
    milestone: null
    details:
      - "Request: raw script text or URL; size cap; optional rule-set selection."
      - "Processing: run rules engine; return matched rule names, severities, and excerpts."
      - "Artifacts: store hashed script with metadata; include reference in response."
      - "Validation: reject binary content; enforce content-type and max size."

  - id: "p3-api-docs-ui"
    priority: 3
    title: "API Docs UI"
    goal: "Provide interactive docs (Swagger UI or Redoc) at /api-docs."
    tags: ["api"]
    milestone: null
    details:
      - "Serve OpenAPI from /api/openapi.yaml; auto-refresh on rebuild."
      - "Swagger UI 'try it out' toggle; disable in prod if needed."
      - "Theming to match app; link to Usage page for context."

  - id: "p3-api-json-errors"
    priority: 3
    title: "JSON Error Consistency"
    goal: "Ensure JSON error consistency across 400–500 responses."
    tags: ["api", "nice-to-have"]
    milestone: null
    details:
      - "Schema: {\"error\": {\"code\": int, \"message\": str, \"details\": object, \"correlation_id\": str}}."
      - "Implement Flask error handlers; return JSON for 400/403/404/405/500."
      - "Log: include correlation_id in logs; surface it in responses for support."

  - id: "p4-ops-retention-policy"
    priority: 4
    title: "Retention Policy"
    goal: "Define retention thresholds for artifacts (age/size)."
    tags: ["ops"]
    milestone: null
    details:
      - "Policy: max age per artifact type; total size caps per workspace."
      - "Configuration: settings.yaml-driven; per-type overrides."
      - "Safety: dry-run mode and deletion preview; minimum free space guard."

  - id: "p4-ops-cleanup-scripts"
    priority: 4
    title: "Cleanup Scripts"
    goal: "Implement cleanup/maintenance scripts, driven by settings.yaml."
    tags: ["ops"]
    milestone: null
    details:
      - "CLI: list, simulate, prune; log summary of bytes reclaimed and items removed."
      - "Scheduling: optional cron/apscheduler task; lock to prevent concurrent runs."
      - "Observability: emit metrics (counts, durations) to logs."

  - id: "p4-ops-results-cache"
    priority: 4
    title: "Results Cache"
    goal: "Add UX toggle: “Re-run analysis” vs. “Load from cache.”"
    tags: ["ops"]
    milestone: null
    details:
      - "Cache key: normalized URL + analysis settings; include versioning to bust on rule changes."
      - "UI: clearly label cached vs. fresh; provide 'Invalidate cache' action."
      - "TTL: setting-driven; guard against stale security results."

  - id: "p5-intel-domain-reputation"
    priority: 5
    title: "Domain Reputation"
    goal: "Build consolidated reputation store (URLHaus, OpenPhish)."
    tags: ["intel"]
    milestone: null
    details:
      - "Ingestion: scheduled pulls; parse feeds; dedupe and normalize indicators."
      - "Storage: compact on-disk DB (e.g., sqlite/duckdb) keyed by domain/URL with timestamps."
      - "Use: query during analysis; add context to findings with source + first_seen/last_seen."

  - id: "p5-intel-threat-connectors"
    priority: 5
    title: "Threat Intel Connectors"
    goal: "Add connectors for VirusTotal, ThreatFox, and future providers (via settings.yaml)."
    tags: ["intel"]
    milestone: null
    details:
      - "Config: enable per-connector with API keys via settings.yaml or env."
      - "Runtime: rate limiting and backoff; cache responses to reduce cost/latency."
      - "Merge: normalize verdicts and confidence; avoid double-counting against Total Score."

backlog:
  - id: "backlog-scan-server-profile"
    title: "Server Profile Scan"
    goal: "Run lightweight nmap scan on web/alt ports, merge with headers for stack inference."
    tags: ["scan"]
    milestone: null
    details:
      - "Scope: common ports (80,443,8000,8080,8443,22); banner grab only; conservative timing."
      - "Inference: combine banners + headers to guess stack (IIS vs. nginx/Apache)."
      - "Controls: opt-in, with time and port limits to avoid noisy scans."

  - id: "backlog-intel-ip-reputation"
    title: "IP Reputation Expansion"
    goal: "Expand reputation checks to IP blocklists and datasets."
    tags: ["intel"]
    milestone: null
    details:
      - "Sources: community blocklists with permissive licenses; document any commercial sources separately."
      - "Model: score IPs with decay over time; avoid permanent penalties for stale abuse."
      - "Integration: surface as context; do not overrule domain-level signals."

open_questions:
  - id: "design-imports-unification"
    title: "Imports Unification"
    goal: "Decide if imports/utilities (e.g., decorators) should be centralized in state.py."
    tags: ["design"]
    milestone: null
    details:
      - "Pros: consistent imports, fewer circular references, easier testing."
      - "Cons: can become a god-module; hidden dependencies."
      - "Proposal: a small 'core/state.py' for app-wide state + 'utils/' packages for helpers."

  - id: "design-score-calibration"
    title: "Score Calibration"
    goal: "Define and calibrate methodology for the Total Score scale."
    tags: ["design"]
    milestone: null
    details:
      - "Dataset: assemble a labeled set of benign/suspicious sites for tuning."
      - "Approach: start with manual weights, then fit via simple regression or grid search."
      - "Outcome: publish thresholds for low/medium/high along with examples."