- API: add `POST /api/analyze_script` (app/blueprints/api.py)
- Fetch one external script to artifacts, run rules, return findings + snippet
- Uses new ExternalScriptFetcher (results_path aware) and job UUID
- Returns: { ok, final_url, status_code, bytes, truncated, sha256, artifact_path, findings[], snippet, snippet_len }
- TODO: document in openapi/openapi.yaml
- Fetcher: update `app/utils/external_fetch.py`
- Constructed with `results_path` (UUID dir); writes to `<results_path>/scripts/fetched/<index>.js`
- Loads settings via `get_settings()`, logs via std logging
- UI (results.html):
- Move “Analyze external script” action into **Content Snippet** column for external rows
- Clicking replaces button with `<details>` snippet, shows rule matches, and adds “open in viewer” link
- Robust fetch handler (checks JSON, shows errors); builds viewer URL from absolute artifact path
- Viewer:
- New route: `GET /view/artifact/<run_uuid>/<path:filename>` (app/blueprints/ui.py)
- New template: Monaco-based read-only code viewer (viewer.html)
- Removes SRI on loader to avoid integrity block; loads file via `raw_url` and detects language by extension
- Forms:
- Refactor `analyze_forms` to mirror scripts analysis:
- Uses rule engine (`category == "form"`) across regex/function rules
- Emits rows only when matches exist
- Includes `content_snippet`, `action`, `method`, `inputs`, `rules`
- Replace legacy plumbing (`flagged`, `flag_reasons`, `status`) in output
- Normalize form function rules to canonical returns `(bool, Optional[str])`:
- `form_action_missing`
- `form_http_on_https_page`
- `form_submits_to_different_host`
- Add minor hardening (lowercasing hosts, no-op actions, clearer reasons)
- CSS: add `.forms-table` to mirror `.scripts-table` (5 columns)
- Fixed table layout, widths per column, chip/snippet styling, responsive tweaks
- Misc:
- Fix “working outside app context” issue by avoiding `current_app` at import time (left storage logic inside routes)
- Add “View Source” link to open page source in viewer
Refs:
- Roadmap: mark “Source code viewer” done; keep TODO to add `/api/analyze_script` to OpenAPI
127 lines
3.8 KiB
Python
127 lines
3.8 KiB
Python
import logging
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
import requests
|
|
import yaml
|
|
import whois
|
|
from datetime import datetime
|
|
from ipaddress import ip_address
|
|
import socket
|
|
|
|
# Local imports
|
|
from app.utils.cache_db import get_cache
|
|
from app.utils.settings import get_settings
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
|
|
# Init cache
|
|
cache = get_cache("/data/cache.db")
|
|
settings = get_settings()
|
|
|
|
# 24 hours * 60 minutes
|
|
days = 24 * 60
|
|
|
|
GEOIP_DEFAULT_TTL = settings.cache.geoip_cache_days * days
|
|
WHOIS_DEFAULT_TTL = settings.cache.whois_cache_days * days
|
|
|
|
def enrich_url(url: str) -> dict:
|
|
"""Perform WHOIS, GeoIP, and BEC word enrichment."""
|
|
result = {}
|
|
|
|
# Extract hostname
|
|
parsed = urlparse(url)
|
|
hostname = parsed.hostname or url # fallback if parsing fails
|
|
|
|
# --- WHOIS ---
|
|
result.update(enrich_whois(hostname))
|
|
|
|
# --- GeoIP ---
|
|
result["geoip"] = enrich_geoip(hostname)
|
|
|
|
return result
|
|
|
|
|
|
def enrich_whois(hostname: str) -> dict:
|
|
"""Fetch WHOIS info using python-whois with safe type handling."""
|
|
cache_key = f"whois:{hostname}"
|
|
cached = cache.read(cache_key)
|
|
if cached:
|
|
logging.info(f"[CACHE HIT] for WHOIS: {hostname}")
|
|
return cached
|
|
|
|
logging.info(f"[CACHE MISS] for WHOIS: {hostname}")
|
|
result = {}
|
|
try:
|
|
w = whois.whois(hostname)
|
|
|
|
def format_dt(val):
|
|
if isinstance(val, list):
|
|
return ", ".join([v.strftime("%Y-%m-%d %H:%M:%S") if isinstance(v, datetime) else str(v) for v in val])
|
|
elif isinstance(val, datetime):
|
|
return val.strftime("%Y-%m-%d %H:%M:%S")
|
|
elif val is None:
|
|
return "Possible Privacy"
|
|
else:
|
|
return str(val)
|
|
|
|
result["whois"] = {
|
|
"registrar": format_dt(getattr(w, "registrar", None)),
|
|
"creation_date": format_dt(getattr(w, "creation_date", None)),
|
|
"expiration_date": format_dt(getattr(w, "expiration_date", None)),
|
|
"owner": format_dt(getattr(w, "org", None))
|
|
}
|
|
|
|
except Exception as e:
|
|
logging.warning(f"WHOIS lookup failed for {hostname}: {e}")
|
|
try:
|
|
# fallback raw whois text
|
|
import subprocess
|
|
raw_output = subprocess.check_output(["whois", hostname], encoding="utf-8", errors="ignore")
|
|
result["whois"] = {}
|
|
result["raw_whois"] = raw_output
|
|
except Exception as raw_e:
|
|
logging.error(f"Raw WHOIS also failed: {raw_e}")
|
|
result["whois"] = {}
|
|
result["raw_whois"] = "N/A"
|
|
|
|
cache.create(cache_key, result, WHOIS_DEFAULT_TTL)
|
|
return result
|
|
|
|
|
|
def enrich_geoip(hostname: str) -> dict:
|
|
"""Resolve hostname to IPs and fetch info from ip-api.com."""
|
|
geo_info = {}
|
|
ips = extract_ips_from_url(hostname)
|
|
for ip in ips:
|
|
ip_str = str(ip)
|
|
cache_key = f"geoip:{ip_str}"
|
|
cached = cache.read(cache_key)
|
|
if cached:
|
|
logging.info(f"[CACHE HIT] for GEOIP: {ip}")
|
|
geo_info[ip_str] = cached
|
|
continue
|
|
|
|
logging.info(f"[CACHE MISS] for GEOIP: {ip}")
|
|
try:
|
|
resp = requests.get(f"http://ip-api.com/json/{ip_str}?fields=24313855", timeout=5)
|
|
if resp.status_code == 200:
|
|
geo_info[ip_str] = resp.json()
|
|
else:
|
|
geo_info[ip_str] = {"error": f"HTTP {resp.status_code}"}
|
|
except Exception as e:
|
|
geo_info[ip_str] = {"error": str(e)}
|
|
|
|
cache.create(cache_key, geo_info[ip_str],GEOIP_DEFAULT_TTL)
|
|
|
|
return geo_info
|
|
|
|
|
|
def extract_ips_from_url(hostname: str):
|
|
"""Resolve hostname to IPs."""
|
|
try:
|
|
info = socket.getaddrinfo(hostname, None)
|
|
return list({ip_address(x[4][0]) for x in info})
|
|
except Exception:
|
|
return []
|