From 1a26e7116551be4252fd9f4693264a9639ffee0b Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Thu, 6 Nov 2025 02:10:05 -0600 Subject: [PATCH] init commit --- .dockerignore | 6 + .gitignore | 14 ++ Dockerfile | 27 ++++ LICENSE | 7 + README.md | 73 ++++++++++ app/config/settings.yaml | 13 ++ app/main.py | 0 app/tasks/example_task.py | 29 ++++ app/utils/cache_db.py | 128 +++++++++++++++++ app/utils/common_utils.py | 202 ++++++++++++++++++++++++++ app/utils/logging_setup.py | 27 ++++ app/utils/settings.py | 127 ++++++++++++++++ app/utils/tasks_master.py | 287 +++++++++++++++++++++++++++++++++++++ build_push_image.sh | 108 ++++++++++++++ requirements.txt | 9 ++ 15 files changed, 1057 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 app/config/settings.yaml create mode 100644 app/main.py create mode 100644 app/tasks/example_task.py create mode 100644 app/utils/cache_db.py create mode 100644 app/utils/common_utils.py create mode 100644 app/utils/logging_setup.py create mode 100644 app/utils/settings.py create mode 100644 app/utils/tasks_master.py create mode 100644 build_push_image.sh create mode 100644 requirements.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9decb12 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +# Ignore everything +** + +# But keep the app directory +!app/ +!requirements.txt \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fba0162 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +* .pyc +__pycache__ + +# Environments +.venv/ +venv/ + +# Secrets +.env +.env.bak diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b683955 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +# Lightweight Python base +FROM python:3.12-slim + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore + +# Install only essential system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates curl gnupg tzdata \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# --- Python dependencies --- +COPY requirements.txt . +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --no-cache-dir -r requirements.txt + +# --- Copy app code --- +COPY app/ /app/ + +# Stay root or not +USER root + +ENTRYPOINT ["python", "/app/main.py"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c59077d --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2025 Phillip Tarrant + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d6caccf --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# Boilerplate +Because I need a boilerplate for all my python stuff, obviously + +## Settings +Settings are loaded from app/config/settings.yaml. You can define any dict here you want, then update `utils/settings.py` with the class to match your new definition. You can set defaults for any key and they will be used. + +### Example Usage: +```python +from utils.settings import get_settings() + +# you can pass a specific config file or use the default (config/settings.yaml) +config_file = Path("config/settings.yaml") +settings = get_settings(config_file) + +# this uses config/settings.yaml +settings = get_settings() + +header = f"{settings.app.name} - version:{settings.app.version_major}.{settings.app.version_minor}" +print(header) +``` + +## Logs setup and config + +I have migrated to **structlog** because it's awesome and has "extra" support. +Example of how to use logging_setup.py + +```python +from utils.logging_setup.py import get_logger + +logger = get_logger(__file__) +logger.info("I logged something",foo=bar,extra_dict={"foo":"bar"}) +``` + +## Cache DB Example Usage + +```python +from utils.cache_db import get_cache +import time + +# Singleton cache with default expiration 30 minutes +cache = get_cache(default_expiration_minutes=30) + +# Create a cache entry (uses default expiration 30 min) +cache.create("user:1", {"name": "Alice"}) + +# Create a cache entry that expires in 2 minutes +cache.create("temp:key", {"value": 42}, expires_in_minutes=2) + +# Read entries +print(cache.read("user:1")) +print(cache.read("temp:key")) + +time.sleep(120) +print(cache.read("temp:key")) # None (expired) + +``` + +## Tasks master example usage +```python +from utils.tasks_master import get_tasksmaster + +# Get the singleton TasksMaster instance +tasks_master = get_tasksmaster() # auto-creates BackgroundScheduler + +# At this point, scheduler is already running +# You can also manually add tasks if needed: +tasks_master.run_scheduled_tasks() # optional if you want to refresh tasks + +# List scheduled jobs +jobs = tasks_master.list_jobs() +for job in jobs: + print(f"Job {job['id']} named '{job['name']}' next runs at {job['next_run']}") +``` \ No newline at end of file diff --git a/app/config/settings.yaml b/app/config/settings.yaml new file mode 100644 index 0000000..ab8d854 --- /dev/null +++ b/app/config/settings.yaml @@ -0,0 +1,13 @@ +app: + name: Boilerplate + version_major: 1 + version_minor: 1 + production: True + enabled: true + token_expiry: 3600 + +database: + host: localhost + port: 5432 + username: admin + password: secret diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..e69de29 diff --git a/app/tasks/example_task.py b/app/tasks/example_task.py new file mode 100644 index 0000000..6ea36de --- /dev/null +++ b/app/tasks/example_task.py @@ -0,0 +1,29 @@ +# tasks/example_task.py + +import logging +logger = logging.getLogger(__name__) + +from utils.settings import get_settings + +# ---------------------- +# TASK CONFIG +# ---------------------- +TASK_CONFIG = { + "name": "ExampleTask", # Name of the task + "cron": "*/1 * * * *", # Runs every minute (crontab format) (note the day is actually -1 from cron per APschedule docs) + "enabled": True, # If False, task is ignored + "run_when_loaded": True # If True, runs immediately on scheduler start +} + +# ---------------------- +# TASK LOGIC +# ---------------------- +def main(): + settings = get_settings() + + """ + This is the entry point of the task. + TasksMaster will call this function based on the cron schedule. + """ + logger.info(f"ExampleTask is running! - Setting: {settings.app.name} was found!") + # Your task logic here diff --git a/app/utils/cache_db.py b/app/utils/cache_db.py new file mode 100644 index 0000000..c752bbb --- /dev/null +++ b/app/utils/cache_db.py @@ -0,0 +1,128 @@ +import json +import time +import sqlite3 +import threading +import functools +from pathlib import Path +from typing import Any, Optional + + +# ---------- SINGLETON DECORATOR ---------- +T = Any + +def singleton_loader(func): + """Ensure only one cache instance exists.""" + cache: dict[str, T] = {} + lock = threading.Lock() + + @functools.wraps(func) + def wrapper(*args, **kwargs) -> T: + with lock: + if func.__name__ not in cache: + cache[func.__name__] = func(*args, **kwargs) + return cache[func.__name__] + return wrapper + +# ---------- CACHE CLASS ---------- +class CacheDB: + """SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support.""" + + TABLE_NAME = "cache" + + def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440): + """ + :param default_expiration_minutes: default expiration in minutes (default 24 hours) + """ + self.db_path = Path(db_path) + self.default_expiration = default_expiration_minutes * 60 # convert minutes -> seconds + + self.conn = sqlite3.connect(self.db_path, check_same_thread=False) + self.conn.row_factory = sqlite3.Row + self._lock = threading.Lock() + self._create_table() + + def _create_table(self): + """Create the cache table if it doesn't exist.""" + with self._lock: + self.conn.execute(f""" + CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} ( + key TEXT PRIMARY KEY, + value TEXT, + expires_at INTEGER + ) + """) + self.conn.commit() + + def _cleanup_expired(self): + """Delete expired rows.""" + now = int(time.time()) + with self._lock: + self.conn.execute( + f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,) + ) + self.conn.commit() + + # ---------- CRUD ---------- + def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None): + """Insert or update a cache entry. expires_in_minutes overrides default expiration.""" + self._cleanup_expired() + if expires_in_minutes is None: + expires_in_seconds = self.default_expiration + else: + expires_in_seconds = expires_in_minutes * 60 + expires_at = int(time.time()) + expires_in_seconds + + value_json = json.dumps(value) + with self._lock: + self.conn.execute( + f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)", + (key, value_json, expires_at) + ) + self.conn.commit() + + def read(self, key: str) -> Optional[Any]: + """Read a cache entry. Auto-cleans expired items.""" + self._cleanup_expired() + with self._lock: + row = self.conn.execute( + f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,) + ).fetchone() + if not row: + return None + return json.loads(row["value"]) + + def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None): + """Update a cache entry. Optional expiration in minutes.""" + if expires_in_minutes is None: + expires_in_seconds = self.default_expiration + else: + expires_in_seconds = expires_in_minutes * 60 + expires_at = int(time.time()) + expires_in_seconds + + value_json = json.dumps(value) + with self._lock: + self.conn.execute( + f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?", + (value_json, expires_at, key) + ) + self.conn.commit() + + def delete(self, key: str): + with self._lock: + self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,)) + self.conn.commit() + + def clear(self): + """Delete all rows from the cache table.""" + with self._lock: + self.conn.execute(f"DELETE FROM {self.TABLE_NAME}") + self.conn.commit() + + def close(self): + self.conn.close() + + +# ---------- SINGLETON INSTANCE ---------- +@singleton_loader +def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB: + return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes) diff --git a/app/utils/common_utils.py b/app/utils/common_utils.py new file mode 100644 index 0000000..c4501d0 --- /dev/null +++ b/app/utils/common_utils.py @@ -0,0 +1,202 @@ +import re +import os +import sys +import csv +import json +import logging +import zipfile +import functools +from pathlib import Path + +logger = logging.getLogger(__file__) + +try: + import requests + import yaml +except ModuleNotFoundError: + msg = ( + "Required modules are not installed. " + "Can not continue with module / application loading.\n" + "Install it with: pip install -r requirements" + ) + print(msg, file=sys.stderr) + logger.error(msg) + exit() + + +# ---------- SINGLETON DECORATOR ---------- +T = type("T", (), {}) +def singleton_loader(func): + """Decorator to ensure a singleton instance.""" + cache = {} + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if func.__name__ not in cache: + cache[func.__name__] = func(*args, **kwargs) + return cache[func.__name__] + + return wrapper + + +# ---------- UTILITY CLASSES ---------- +class FileUtils: + """File and directory utilities.""" + + @staticmethod + def ensure_directory(path): + """Create the directory if it doesn't exist.""" + dir_path = Path(path) + if not dir_path.exists(): + dir_path.mkdir(parents=True, exist_ok=True) + logger.info(f"Created directory: {dir_path}") + return True + return False + + @staticmethod + def create_dir_if_not_exist(dir_to_create): + return FileUtils.ensure_directory(dir_to_create) + + @staticmethod + def list_files_with_ext(directory="/tmp", ext="docx"): + """List all files in a directory with a specific extension.""" + return [f for f in os.listdir(directory) if f.endswith(ext)] + + @staticmethod + def download_file(url, dest_path): + """Download a file from a URL to a local path.""" + response = requests.get(url, stream=True) + response.raise_for_status() + with open(dest_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logger.info(f"File Downloaded to: {dest_path} from {url}") + + @staticmethod + def unzip_file(zip_path, extract_to="."): + """Unzip a file to the given directory.""" + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(extract_to) + logger.info(f"{zip_path} Extracted to: {extract_to}") + + @staticmethod + def verify_file_exist(filepath: Path, exit_if_false=False): + """Verify a file exists.""" + if not filepath.exists(): + if exit_if_false: + sys.stderr.write(f"[FATAL] File not found: {filepath}\n") + sys.exit(1) + return False + return True + + @staticmethod + def read_yaml_file(full_file_path: Path): + """Read a YAML file safely.""" + if not FileUtils.verify_file_exist(full_file_path): + logger.error(f"Unable to read yaml - {full_file_path} does not exist") + return {} + try: + with open(full_file_path, 'r') as yfile: + return yaml.safe_load(yfile) + except Exception as e: + logger.error(f"Unable to read yaml due to: {e}") + return {} + + @staticmethod + def delete_list_of_files(files_to_delete: list): + """Delete multiple files safely.""" + for file_path in files_to_delete: + try: + os.remove(file_path) + logger.info(f"Deleted {file_path}") + except FileNotFoundError: + logger.warning(f"File not found: {file_path}") + except PermissionError: + logger.warning(f"Permission denied: {file_path}") + except Exception as e: + logger.error(f"Error deleting {file_path}: {e}") + + +class TextUtils: + """Text parsing and string utilities.""" + + @staticmethod + def extract_strings(data: bytes, min_length: int = 4): + """Extract ASCII and UTF-16LE strings from binary data.""" + ascii_re = re.compile(rb"[ -~]{%d,}" % min_length) + ascii_strings = [match.decode("ascii", errors="ignore") for match in ascii_re.findall(data)] + + wide_re = re.compile(rb"(?:[ -~]\x00){%d,}" % min_length) + wide_strings = [match.decode("utf-16le", errors="ignore") for match in wide_re.findall(data)] + + return ascii_strings + wide_strings + + @staticmethod + def defang_url(url: str) -> str: + """Defang a URL to prevent it from being clickable.""" + return url.replace('.', '[.]').replace(':', '[:]') + + @staticmethod + def load_dirty_json(json_text: str): + """Load JSON, return None on error.""" + try: + return json.loads(json_text) + except Exception as e: + logger.warning(f"Failed to parse JSON: {e}") + return None + + +class DataUtils: + """Data manipulation utilities (CSV, dict lists).""" + + @staticmethod + def sort_dict_list(dict_list, key): + """Sort a list of dictionaries by a given key.""" + return sorted(dict_list, key=lambda x: x[key]) + + @staticmethod + def write_to_csv(data, headers, filename): + """ + Write a list of dictionaries to a CSV file with specified headers. + Nested dicts/lists are flattened for CSV output. + """ + if not data: + logger.warning("No data provided to write to CSV") + return + + with open(filename, mode='w', newline='', encoding='utf-8') as file: + writer = csv.writer(file) + writer.writerow(headers) + + key_mapping = list(data[0].keys()) + for item in data: + row = [] + for key in key_mapping: + item_value = item.get(key, "") + if isinstance(item_value, list): + entry = ", ".join(str(v) for v in item_value) + elif isinstance(item_value, dict): + entry = json.dumps(item_value) + else: + entry = str(item_value) + row.append(entry) + writer.writerow(row) + + +# ---------- SINGLETON FACTORY ---------- +@singleton_loader +def get_common_utils(): + """ + Returns the singleton instance for common utilities. + Usage: + utils = get_common_utils() + utils.FileUtils.ensure_directory("/tmp/data") + utils.TextUtils.defang_url("http://example.com") + """ + # Aggregate all utility classes into one instance + class _CommonUtils: + FileUtils = FileUtils + TextUtils = TextUtils + DataUtils = DataUtils + + return _CommonUtils() diff --git a/app/utils/logging_setup.py b/app/utils/logging_setup.py new file mode 100644 index 0000000..2f9c2ec --- /dev/null +++ b/app/utils/logging_setup.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import logging +import structlog + + +def _configure() -> None: + processors = [ + structlog.processors.add_log_level, + structlog.processors.TimeStamper(fmt="iso", utc=False), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.JSONRenderer(sort_keys=True), + ] + + structlog.configure( + processors=processors, + wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), + cache_logger_on_first_use=True, + ) + + +_configure() + + +def get_logger() -> structlog.stdlib.BoundLogger: + return structlog.get_logger() \ No newline at end of file diff --git a/app/utils/settings.py b/app/utils/settings.py new file mode 100644 index 0000000..2a07e07 --- /dev/null +++ b/app/utils/settings.py @@ -0,0 +1,127 @@ +# +# Note the settings file is hardcoded in this class at the top after imports. +# +# To make a new settings section, just add the setting dict to your yaml +# and then define the data class below in the config data classes area. +# +# Example use from anywhere - this will always return the same singleton +# from settings import get_settings +# def main(): +# settings = get_settings() +# print(settings.database.host) # Autocomplete works +# print(settings.logging.level) + +# if __name__ == "__main__": +# main() + +import functools +from pathlib import Path +from typing import Any, Callable, TypeVar +from dataclasses import dataclass, fields, is_dataclass, field, MISSING + +try: + import yaml +except ModuleNotFoundError: + import logging + import sys + + logger = logging.getLogger(__file__) + msg = ( + "Required modules are not installed. " + "Can not continue with module / application loading.\n" + "Install it with: pip install -r requirements" + ) + print(msg, file=sys.stderr) + logger.error(msg) + exit() + +# ---------- CONFIG DATA CLASSES ---------- +@dataclass +class DatabaseConfig: + host: str = "localhost" + port: int = 5432 + username: str = "root" + password: str = "" + + +@dataclass +class AppConfig: + name: str = "MyApp" + version_major: int = 1 + version_minor: int = 0 + production: bool = False + enabled: bool = True + token_expiry: int = 3600 + + +@dataclass +class Settings: + database: DatabaseConfig = field(default_factory=DatabaseConfig) + app: AppConfig = field(default_factory=AppConfig) + + @classmethod + def from_yaml(cls, path: str | Path) -> "Settings": + """Load settings from YAML file into a Settings object.""" + with open(path, "r", encoding="utf-8") as f: + raw: dict[str, Any] = yaml.safe_load(f) or {} + + init_kwargs = {} + for f_def in fields(cls): + yaml_value = raw.get(f_def.name, None) + + # Determine default value from default_factory or default + if f_def.default_factory is not MISSING: + default_value = f_def.default_factory() + elif f_def.default is not MISSING: + default_value = f_def.default + else: + default_value = None + + # Handle nested dataclasses + if is_dataclass(f_def.type): + if isinstance(yaml_value, dict): + # Merge YAML values with defaults + merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)} + merged_data.update(yaml_value) + init_kwargs[f_def.name] = f_def.type(**merged_data) + else: + init_kwargs[f_def.name] = default_value + else: + init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value + + return cls(**init_kwargs) + + +# ---------- SINGLETON DECORATOR ---------- +T = TypeVar("T") + +def singleton_loader(func: Callable[..., T]) -> Callable[..., T]: + """Ensure the function only runs once, returning the cached value.""" + cache: dict[str, T] = {} + + @functools.wraps(func) + def wrapper(*args, **kwargs) -> T: + if func.__name__ not in cache: + cache[func.__name__] = func(*args, **kwargs) + return cache[func.__name__] + + return wrapper + + +@singleton_loader +def get_settings(config_path: str | Path | None = None) -> Settings: + """ + Returns the singleton Settings instance. + + Args: + config_path: Optional path to the YAML config file. If not provided, + defaults to 'config/settings.yaml' in the current working directory. + """ + DEFAULT_SETTINGS_FILE = Path.cwd() / "config" /"settings.yaml" + + if config_path is None: + config_path = DEFAULT_SETTINGS_FILE + else: + config_path = Path(config_path) + + return Settings.from_yaml(config_path) \ No newline at end of file diff --git a/app/utils/tasks_master.py b/app/utils/tasks_master.py new file mode 100644 index 0000000..a49dab5 --- /dev/null +++ b/app/utils/tasks_master.py @@ -0,0 +1,287 @@ +import os +import sys +import datetime +import logging +import functools +import threading +import importlib +import importlib.util + +logger = logging.getLogger(__file__) + +try: + from apscheduler.schedulers.background import BackgroundScheduler + from apscheduler.triggers.cron import CronTrigger + from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR +except ModuleNotFoundError: + msg = ( + "Required modules are not installed. " + "Can not continue with module / application loading.\n" + "Install it with: pip install -r requirements" + ) + print(msg, file=sys.stderr) + logger.error(msg) + exit() + + +# ---------- TASKSMASTER CLASS ---------- +class TasksMaster: + + TASK_DEFAULT_CRON = '*/15 * * * *' + TASK_JITTER = 240 + TASKS_FOLDER = "tasks" + + def __init__(self, scheduler: BackgroundScheduler): + self.tasks = self._config_tasks() + self.scheduler = scheduler + self.last_run_times = {} + self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) + + def _config_tasks(self): + """ + Loads tasks from the TASKS_FOLDER and logs how many were found. + """ + tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER) + logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}") + return tasks_defined + + def _load_tasks_from_folder(self, folder_path): + """ + Loads and registers task modules from a specified folder. + + This function scans the given folder for Python (.py) files, dynamically + imports each as a module, and looks for two attributes: + - TASK_CONFIG: A dictionary containing task metadata, specifically the + 'name' and 'cron' (cron schedule string). + - main: A callable function that represents the task's execution logic. + + Tasks with both attributes are added to a list with their configuration and + execution function. + + Args: + folder_path (str): Path to the folder containing task scripts. + + Returns: + list[dict]: A list of task definitions with keys: + - 'name' (str): The name of the task. + - 'filename' (str): The file the task was loaded from. + - 'cron' (str): The crontab string for scheduling. + - 'enabled' (bool): Whether the task is enabled. + - 'run_when_loaded' (bool): Whether to run the task immediately. + """ + tasks = [] + + if not os.path.exists(folder_path): + logger.error(f"{folder_path} does not exist! Unable to load tasks!") + return tasks + + # we sort the files so that we have a set order, which helps with debugging + for filename in sorted(os.listdir(folder_path)): + + # skip any non python files, as well as any __pycache__ or .pyc files that might creep in there + if not filename.endswith('.py') or filename.startswith("__"): + continue + + path = os.path.join(folder_path, filename) + module_name = filename[:-3] + spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path) + module = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(module) + sys.modules[f"tasks.{module_name}"] = module + except Exception as e: + logger.error(f"Failed to import {filename}: {e}") + continue + + # if we have a tasks config and a main function, we attempt to schedule it + if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'): + + # ensure task_config is a dict + if not isinstance(module.TASK_CONFIG, dict): + logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.") + continue + + task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON + task_name = module.TASK_CONFIG.get("name", module_name) + + # ensure the task_cron is a valid cron value + try: + CronTrigger.from_crontab(task_cron) + except ValueError as ve: + logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task") + continue + + task = { + 'name': module.TASK_CONFIG.get('name', module_name), + 'filename': filename, + 'cron': task_cron, + "enabled": module.TASK_CONFIG.get("enabled", False), + "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False) + } + + tasks.append(task) + + # we are missing things, and we log what's missing + else: + if not hasattr(module, 'TASK_CONFIG'): + logger.warning(f"Missing TASK_CONFIG in {filename}") + elif not hasattr(module, 'main'): + logger.warning(f"Missing main() in {filename}") + + return tasks + + def _add_jobs(self): + # for each task in the tasks config file... + for task_to_run in self.tasks: + + # remember, these tasks, are built from the "load_tasks_from_folder" function, + # if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here. + task_name = task_to_run.get("name") + run_when_loaded = task_to_run.get("run_when_loaded") + module_name = os.path.splitext(task_to_run.get("filename"))[0] + task_enabled = task_to_run.get("enabled", False) + + # if no crontab set for this task, we use 15 as the default. + task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON + + # if task is disabled, skip this one + if not task_enabled: + logger.info(f"{task_name} is disabled in client config. Skipping task") + continue + try: + if os.path.isfile(os.path.join("tasks", task_to_run.get("filename"))): + # schedule the task now that everything has checked out above... + self._schedule_task(task_name, module_name, task_cron, run_when_loaded) + logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run}) + else: + logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run}) + + except Exception as e: + logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run}) + + def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded): + try: + # Dynamically import the module + module = importlib.import_module(f"tasks.{module_name}") + + # Check if the module has a 'main' function + if hasattr(module, 'main'): + logger.info(f"Scheduling {task_name} - {module_name} Main Function") + + # unique_job_id + job_identifier = f"{module_name}__{task_name}" + + # little insurance to make sure the cron is set to something and not none + if task_cron is None: + task_cron = self.TASK_DEFAULT_CRON + + trigger = CronTrigger.from_crontab(task_cron) + + # schedule the task / job + if run_when_loaded: + logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start") + + self.scheduler.add_job( + module.main, + trigger, + id=job_identifier, + jitter=self.TASK_JITTER, + name=task_name, + next_run_time=datetime.datetime.now(), + max_instances=1 + ) + else: + self.scheduler.add_job( + module.main, + trigger, + id=job_identifier, + jitter=self.TASK_JITTER, + name=task_name, + max_instances=1 + ) + else: + logger.error(f"{module_name} does not define a 'main' function.") + + except Exception as e: + logger.error(f"Failed to load {module_name}: {e}") + + def job_listener(self, event): + job_id = event.job_id + self.last_run_times[job_id] = datetime.datetime.now() + + if event.exception: + logger.error(f"Job {event.job_id} failed: {event.exception}") + else: + logger.info(f"Job {event.job_id} completed successfully.") + + def list_jobs(self): + scheduled_jobs = self.scheduler.get_jobs() + jobs_list = [] + + for job in scheduled_jobs: + jobs_list.append({ + "id": job.id, + "name": job.name, + "next_run": job.next_run_time, + }) + return jobs_list + + def run_scheduled_tasks(self): + """ + Runs and schedules enabled tasks using the background scheduler. + + This method performs the following: + 1. Retrieves the current task configurations and updates internal state. + 2. Adds new jobs to the scheduler based on the latest configuration. + 3. Starts the scheduler to begin executing tasks at their defined intervals. + + This ensures the scheduler is always running with the most up-to-date + task definitions and enabled status. + """ + + # Add enabled tasks to the scheduler + self._add_jobs() + + # Start the scheduler to begin executing the scheduled tasks (if not already running) + if not self.scheduler.running: + self.scheduler.start() + + +# ---------- SINGLETON WRAPPER ---------- +T = type + +def singleton_loader(func): + """Decorator to ensure only one instance exists.""" + cache: dict[str, T] = {} + lock = threading.Lock() + + @functools.wraps(func) + def wrapper(*args, **kwargs) -> T: + with lock: + if func.__name__ not in cache: + cache[func.__name__] = func(*args, **kwargs) + return cache[func.__name__] + return wrapper + + +@singleton_loader +def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster: + """ + Returns the singleton TasksMaster instance. + + - Automatically creates a BackgroundScheduler if none is provided. + - Automatically starts the scheduler when the singleton is created. + + :param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created. + """ + if scheduler is None: + scheduler = BackgroundScheduler() + + tm_instance = TasksMaster(scheduler) + + # Auto-start scheduler if not already running + if not scheduler.running: + scheduler.start() + logger.info("TasksMaster scheduler started automatically with singleton creation.") + + return tm_instance diff --git a/build_push_image.sh b/build_push_image.sh new file mode 100644 index 0000000..fb01224 --- /dev/null +++ b/build_push_image.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +set -euo pipefail + +######################################## +# Config (edit these) +######################################## +REGISTRY="git.sneakygeek.net" +NAMESPACE="sneakygeek" +IMAGE_NAME="sneakyscan" + +# A helpful pointer/reference for your repo URL. Not used by docker commands, +# but kept for clarity and future automation if you want to clone/build elsewhere. +GIT_REPO_URL="https://git.sneakygeek.net/sneakygeek/sneakyscan.git" + +# If you prefer to override the tag manually sometimes: +# export DOCKER_TAG=my-custom-tag +######################################## + +# Colors +BOLD='\033[1m'; DIM='\033[2m'; GREEN='\033[32m'; YELLOW='\033[33m'; RED='\033[31m'; NC='\033[0m' + +# Helpers +confirm() { + # Usage: confirm "Question?" default_yes|default_no + local prompt response default + if [[ "${2:-default_no}" == "default_yes" ]]; then + prompt=" [Y/n] " + default="y" + else + prompt=" [y/N] " + default="n" + fi + read -r -p "$1$prompt" response || true + response="${response:-$default}" + [[ "$response" =~ ^[Yy]$ ]] +} + +docker_tag_sanitize() { + # Docker tag must be <=128 chars, allowed: [A-Za-z0-9_.-], usually lowercase for sanity + local raw="$1" + local lower="${raw,,}" + local safe + safe="$(printf '%s' "$lower" | tr -c 'a-z0-9_.-' '-')" + printf '%.128s' "$safe" +} + +detect_git_tag() { + if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + # Branch name or fallback to describe if detached + local ref + ref="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || true)" + if [[ "$ref" == "HEAD" || -z "$ref" ]]; then + # detached HEAD → use nearest tag or short commit + ref="$(git describe --tags --always --dirty 2>/dev/null || git rev-parse --short HEAD 2>/dev/null || echo 'detached')" + fi + docker_tag_sanitize "$ref" + else + # Not a git repo → default to "latest" unless DOCKER_TAG is set + echo "latest" + fi +} + +main() { + local image_repo="${REGISTRY}/${NAMESPACE}/${IMAGE_NAME}" + local tag latest_tag push_it push_latest + + # Determine tag + if [[ -n "${DOCKER_TAG:-}" ]]; then + tag="$(docker_tag_sanitize "$DOCKER_TAG")" + else + tag="$(detect_git_tag)" + fi + + echo -e "${BOLD}Image:${NC} ${image_repo}" + echo -e "${BOLD}Tag: ${NC} ${GREEN}${tag}${NC}" + echo -e "${DIM}(git: $(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'n/a'), repo: ${GIT_REPO_URL})${NC}" + + # Build + echo -e "\n${BOLD}==> Building ${image_repo}:${tag}${NC}" + docker build -t "${image_repo}:${tag}" . + + # Ask about :latest + if confirm "Also tag as :latest?" default_no; then + latest_tag="latest" + echo -e "${BOLD}Tagging:${NC} ${image_repo}:${latest_tag}" + docker tag "${image_repo}:${tag}" "${image_repo}:${latest_tag}" + push_latest="yes" + else + push_latest="no" + fi + + # Ask about pushing + if confirm "Push the built image(s) now?" default_yes; then + echo -e "\n${BOLD}==> Pushing ${image_repo}:${tag}${NC}" + docker push "${image_repo}:${tag}" + if [[ "$push_latest" == "yes" ]]; then + echo -e "${BOLD}==> Pushing ${image_repo}:latest${NC}" + docker push "${image_repo}:latest" + fi + echo -e "${GREEN}Done.${NC}" + else + echo -e "${YELLOW}Skipping push. Local images created:${NC}" + echo " - ${image_repo}:${tag}" + [[ "$push_latest" == "yes" ]] && echo " - ${image_repo}:latest" + fi +} + +main "$@" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bdc7672 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +APScheduler==3.11.1 +certifi==2025.10.5 +charset-normalizer==3.4.4 +idna==3.11 +PyYAML==6.0.3 +requests==2.32.5 +structlog==25.5.0 +tzlocal==5.3.1 +urllib3==2.5.0