first commit

This commit is contained in:
2025-11-06 03:25:44 -06:00
commit 38a9fb2789
17 changed files with 1153 additions and 0 deletions

128
app/utils/cache_db.py Normal file
View File

@@ -0,0 +1,128 @@
import json
import time
import sqlite3
import threading
import functools
from pathlib import Path
from typing import Any, Optional
# ---------- SINGLETON DECORATOR ----------
T = Any
def singleton_loader(func):
"""Ensure only one cache instance exists."""
cache: dict[str, T] = {}
lock = threading.Lock()
@functools.wraps(func)
def wrapper(*args, **kwargs) -> T:
with lock:
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
# ---------- CACHE CLASS ----------
class CacheDB:
"""SQLite-backed cache with expiration in minutes, CRUD, auto-cleanup, singleton support."""
TABLE_NAME = "cache"
def __init__(self, db_path: str | Path = "cache.db", default_expiration_minutes: int = 1440):
"""
:param default_expiration_minutes: default expiration in minutes (default 24 hours)
"""
self.db_path = Path(db_path)
self.default_expiration = default_expiration_minutes * 60 # convert minutes -> seconds
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
self.conn.row_factory = sqlite3.Row
self._lock = threading.Lock()
self._create_table()
def _create_table(self):
"""Create the cache table if it doesn't exist."""
with self._lock:
self.conn.execute(f"""
CREATE TABLE IF NOT EXISTS {self.TABLE_NAME} (
key TEXT PRIMARY KEY,
value TEXT,
expires_at INTEGER
)
""")
self.conn.commit()
def _cleanup_expired(self):
"""Delete expired rows."""
now = int(time.time())
with self._lock:
self.conn.execute(
f"DELETE FROM {self.TABLE_NAME} WHERE expires_at IS NOT NULL AND expires_at < ?", (now,)
)
self.conn.commit()
# ---------- CRUD ----------
def create(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
"""Insert or update a cache entry. expires_in_minutes overrides default expiration."""
self._cleanup_expired()
if expires_in_minutes is None:
expires_in_seconds = self.default_expiration
else:
expires_in_seconds = expires_in_minutes * 60
expires_at = int(time.time()) + expires_in_seconds
value_json = json.dumps(value)
with self._lock:
self.conn.execute(
f"INSERT OR REPLACE INTO {self.TABLE_NAME} (key, value, expires_at) VALUES (?, ?, ?)",
(key, value_json, expires_at)
)
self.conn.commit()
def read(self, key: str) -> Optional[Any]:
"""Read a cache entry. Auto-cleans expired items."""
self._cleanup_expired()
with self._lock:
row = self.conn.execute(
f"SELECT * FROM {self.TABLE_NAME} WHERE key = ?", (key,)
).fetchone()
if not row:
return None
return json.loads(row["value"])
def update(self, key: str, value: Any, expires_in_minutes: Optional[int] = None):
"""Update a cache entry. Optional expiration in minutes."""
if expires_in_minutes is None:
expires_in_seconds = self.default_expiration
else:
expires_in_seconds = expires_in_minutes * 60
expires_at = int(time.time()) + expires_in_seconds
value_json = json.dumps(value)
with self._lock:
self.conn.execute(
f"UPDATE {self.TABLE_NAME} SET value = ?, expires_at = ? WHERE key = ?",
(value_json, expires_at, key)
)
self.conn.commit()
def delete(self, key: str):
with self._lock:
self.conn.execute(f"DELETE FROM {self.TABLE_NAME} WHERE key = ?", (key,))
self.conn.commit()
def clear(self):
"""Delete all rows from the cache table."""
with self._lock:
self.conn.execute(f"DELETE FROM {self.TABLE_NAME}")
self.conn.commit()
def close(self):
self.conn.close()
# ---------- SINGLETON INSTANCE ----------
@singleton_loader
def get_cache(db_path: str = "cache.db", default_expiration_minutes: int = 1440) -> CacheDB:
return CacheDB(db_path=db_path, default_expiration_minutes=default_expiration_minutes)

254
app/utils/common_utils.py Normal file
View File

@@ -0,0 +1,254 @@
import re
import os
import sys
import csv
import json
import base64
import logging
import zipfile
import functools
from pathlib import Path
from typing import List
logger = logging.getLogger(__file__)
try:
import requests
import yaml
except ModuleNotFoundError:
msg = (
"common_utils.py - Required modules are not installed. "
"Can not continue with module / application loading.\n"
"Install it with: pip install -r requirements"
)
print(msg, file=sys.stderr)
logger.error(msg)
exit()
# ---------- SINGLETON DECORATOR ----------
T = type("T", (), {})
def singleton_loader(func):
"""Decorator to ensure a singleton instance."""
cache = {}
@functools.wraps(func)
def wrapper(*args, **kwargs):
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
# ---------- UTILITY CLASSES ----------
class FileUtils:
"""File and directory utilities."""
@staticmethod
def ensure_directory(path):
"""Create the directory if it doesn't exist."""
dir_path = Path(path)
if not dir_path.exists():
dir_path.mkdir(parents=True, exist_ok=True)
logger.info(f"Created directory: {dir_path}")
return True
return False
@staticmethod
def create_dir_if_not_exist(dir_to_create):
return FileUtils.ensure_directory(dir_to_create)
@staticmethod
def list_files_with_ext(directory="/tmp", ext="docx"):
"""List all files in a directory with a specific extension."""
return [f for f in os.listdir(directory) if f.endswith(ext)]
@staticmethod
def list_files_in_dir(directory="/tmp"):
"""List all files in a directory with a specific extension."""
return [f for f in os.listdir(directory)]
def list_files_in_dir_w_subs(directory:str) -> List[str]:
"""
Recursively list all files in the given directory and its subdirectories.
Args:
directory (str): The path to the directory to search.
Returns:
List[str]: A list of full file paths.
"""
files = []
for root, _, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
@staticmethod
def download_file(url, dest_path):
"""Download a file from a URL to a local path."""
response = requests.get(url, stream=True)
response.raise_for_status()
with open(dest_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
logger.info(f"File Downloaded to: {dest_path} from {url}")
@staticmethod
def unzip_file(zip_path, extract_to="."):
"""Unzip a file to the given directory."""
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
logger.info(f"{zip_path} Extracted to: {extract_to}")
@staticmethod
def verify_file_exist(filepath: Path, exit_if_false=False):
"""Verify a file exists."""
if not filepath.exists():
if exit_if_false:
sys.stderr.write(f"[FATAL] File not found: {filepath}\n")
sys.exit(1)
return False
return True
@staticmethod
def read_yaml_file(full_file_path: Path):
"""Read a YAML file safely."""
if not FileUtils.verify_file_exist(full_file_path):
logger.error(f"Unable to read yaml - {full_file_path} does not exist")
return {}
try:
with open(full_file_path, 'r') as yfile:
return yaml.safe_load(yfile)
except Exception as e:
logger.error(f"Unable to read yaml due to: {e}")
return {}
@staticmethod
def delete_list_of_files(files_to_delete: list):
"""Delete multiple files safely."""
for file_path in files_to_delete:
try:
os.remove(file_path)
logger.info(f"Deleted {file_path}")
except FileNotFoundError:
logger.warning(f"File not found: {file_path}")
except PermissionError:
logger.warning(f"Permission denied: {file_path}")
except Exception as e:
logger.error(f"Error deleting {file_path}: {e}")
class TextUtils:
"""Text parsing and string utilities."""
@staticmethod
def extract_strings(data: bytes, min_length: int = 4):
"""Extract ASCII and UTF-16LE strings from binary data."""
ascii_re = re.compile(rb"[ -~]{%d,}" % min_length)
ascii_strings = [match.decode("ascii", errors="ignore") for match in ascii_re.findall(data)]
wide_re = re.compile(rb"(?:[ -~]\x00){%d,}" % min_length)
wide_strings = [match.decode("utf-16le", errors="ignore") for match in wide_re.findall(data)]
return ascii_strings + wide_strings
@staticmethod
def defang_url(url: str) -> str:
"""Defang a URL to prevent it from being clickable."""
return url.replace('.', '[.]').replace(':', '[:]')
@staticmethod
def load_dirty_json(json_text: str):
"""Load JSON, return None on error."""
try:
return json.loads(json_text)
except Exception as e:
logger.warning(f"Failed to parse JSON: {e}")
return None
@staticmethod
def encode_base64(text: str) -> str:
"""
Encode a string using Base64 and return the encoded result as a string.
Args:
text (str): The input text to encode.
Returns:
str: The Base64-encoded string.
"""
encoded_bytes = base64.b64encode(text.encode("utf-8"))
return encoded_bytes.decode("utf-8")
@staticmethod
def decode_base64(encoded_text: str) -> str:
"""
Decode a Base64-encoded string and return the original text.
Args:
encoded_text (str): The Base64-encoded string.
Returns:
str: The decoded plain text.
"""
decoded_bytes = base64.b64decode(encoded_text.encode("utf-8"))
return decoded_bytes.decode("utf-8")
class DataUtils:
"""Data manipulation utilities (CSV, dict lists)."""
@staticmethod
def sort_dict_list(dict_list, key):
"""Sort a list of dictionaries by a given key."""
return sorted(dict_list, key=lambda x: x[key])
@staticmethod
def write_to_csv(data, headers, filename):
"""
Write a list of dictionaries to a CSV file with specified headers.
Nested dicts/lists are flattened for CSV output.
"""
if not data:
logger.warning("No data provided to write to CSV")
return
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(headers)
key_mapping = list(data[0].keys())
for item in data:
row = []
for key in key_mapping:
item_value = item.get(key, "")
if isinstance(item_value, list):
entry = ", ".join(str(v) for v in item_value)
elif isinstance(item_value, dict):
entry = json.dumps(item_value)
else:
entry = str(item_value)
row.append(entry)
writer.writerow(row)
# ---------- SINGLETON FACTORY ----------
@singleton_loader
def get_common_utils():
"""
Returns the singleton instance for common utilities.
Usage:
utils = get_common_utils()
utils.FileUtils.ensure_directory("/tmp/data")
utils.TextUtils.defang_url("http://example.com")
"""
# Aggregate all utility classes into one instance
class _CommonUtils:
FileUtils = FileUtils
TextUtils = TextUtils
DataUtils = DataUtils
return _CommonUtils()

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
import logging
import structlog
def _configure() -> None:
processors = [
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso", utc=False),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer(sort_keys=True),
]
structlog.configure(
processors=processors,
wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
cache_logger_on_first_use=True,
)
_configure()
def get_logger() -> structlog.stdlib.BoundLogger:
return structlog.get_logger()

127
app/utils/settings.py Normal file
View File

@@ -0,0 +1,127 @@
#
# Note the settings file is hardcoded in this class at the top after imports.
#
# To make a new settings section, just add the setting dict to your yaml
# and then define the data class below in the config data classes area.
#
# Example use from anywhere - this will always return the same singleton
# from settings import get_settings
# def main():
# settings = get_settings()
# print(settings.database.host) # Autocomplete works
# print(settings.logging.level)
# if __name__ == "__main__":
# main()
import functools
from pathlib import Path
from typing import Any, Callable, TypeVar
from dataclasses import dataclass, fields, is_dataclass, field, MISSING
try:
import yaml
except ModuleNotFoundError:
import logging
import sys
logger = logging.getLogger(__file__)
msg = (
"Required modules are not installed. "
"Can not continue with module / application loading.\n"
"Install it with: pip install -r requirements"
)
print(msg, file=sys.stderr)
logger.error(msg)
exit()
# ---------- CONFIG DATA CLASSES ----------
@dataclass
class DatabaseConfig:
host: str = "localhost"
port: int = 5432
username: str = "root"
password: str = ""
@dataclass
class AppConfig:
name: str = "MyApp"
version_major: int = 1
version_minor: int = 0
production: bool = False
enabled: bool = True
token_expiry: int = 3600
@dataclass
class Settings:
database: DatabaseConfig = field(default_factory=DatabaseConfig)
app: AppConfig = field(default_factory=AppConfig)
@classmethod
def from_yaml(cls, path: str | Path) -> "Settings":
"""Load settings from YAML file into a Settings object."""
with open(path, "r", encoding="utf-8") as f:
raw: dict[str, Any] = yaml.safe_load(f) or {}
init_kwargs = {}
for f_def in fields(cls):
yaml_value = raw.get(f_def.name, None)
# Determine default value from default_factory or default
if f_def.default_factory is not MISSING:
default_value = f_def.default_factory()
elif f_def.default is not MISSING:
default_value = f_def.default
else:
default_value = None
# Handle nested dataclasses
if is_dataclass(f_def.type):
if isinstance(yaml_value, dict):
# Merge YAML values with defaults
merged_data = {fld.name: getattr(default_value, fld.name) for fld in fields(f_def.type)}
merged_data.update(yaml_value)
init_kwargs[f_def.name] = f_def.type(**merged_data)
else:
init_kwargs[f_def.name] = default_value
else:
init_kwargs[f_def.name] = yaml_value if yaml_value is not None else default_value
return cls(**init_kwargs)
# ---------- SINGLETON DECORATOR ----------
T = TypeVar("T")
def singleton_loader(func: Callable[..., T]) -> Callable[..., T]:
"""Ensure the function only runs once, returning the cached value."""
cache: dict[str, T] = {}
@functools.wraps(func)
def wrapper(*args, **kwargs) -> T:
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
@singleton_loader
def get_settings(config_path: str | Path | None = None) -> Settings:
"""
Returns the singleton Settings instance.
Args:
config_path: Optional path to the YAML config file. If not provided,
defaults to 'config/settings.yaml' in the current working directory.
"""
DEFAULT_SETTINGS_FILE = Path.cwd() / "config" /"settings.yaml"
if config_path is None:
config_path = DEFAULT_SETTINGS_FILE
else:
config_path = Path(config_path)
return Settings.from_yaml(config_path)

287
app/utils/tasks_master.py Normal file
View File

@@ -0,0 +1,287 @@
import os
import sys
import datetime
import logging
import functools
import threading
import importlib
import importlib.util
logger = logging.getLogger(__file__)
try:
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
except ModuleNotFoundError:
msg = (
"Required modules are not installed. "
"Can not continue with module / application loading.\n"
"Install it with: pip install -r requirements"
)
print(msg, file=sys.stderr)
logger.error(msg)
exit()
# ---------- TASKSMASTER CLASS ----------
class TasksMaster:
TASK_DEFAULT_CRON = '*/15 * * * *'
TASK_JITTER = 240
TASKS_FOLDER = "tasks"
def __init__(self, scheduler: BackgroundScheduler):
self.tasks = self._config_tasks()
self.scheduler = scheduler
self.last_run_times = {}
self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
def _config_tasks(self):
"""
Loads tasks from the TASKS_FOLDER and logs how many were found.
"""
tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER)
logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}")
return tasks_defined
def _load_tasks_from_folder(self, folder_path):
"""
Loads and registers task modules from a specified folder.
This function scans the given folder for Python (.py) files, dynamically
imports each as a module, and looks for two attributes:
- TASK_CONFIG: A dictionary containing task metadata, specifically the
'name' and 'cron' (cron schedule string).
- main: A callable function that represents the task's execution logic.
Tasks with both attributes are added to a list with their configuration and
execution function.
Args:
folder_path (str): Path to the folder containing task scripts.
Returns:
list[dict]: A list of task definitions with keys:
- 'name' (str): The name of the task.
- 'filename' (str): The file the task was loaded from.
- 'cron' (str): The crontab string for scheduling.
- 'enabled' (bool): Whether the task is enabled.
- 'run_when_loaded' (bool): Whether to run the task immediately.
"""
tasks = []
if not os.path.exists(folder_path):
logger.error(f"{folder_path} does not exist! Unable to load tasks!")
return tasks
# we sort the files so that we have a set order, which helps with debugging
for filename in sorted(os.listdir(folder_path)):
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
if not filename.endswith('.py') or filename.startswith("__"):
continue
path = os.path.join(folder_path, filename)
module_name = filename[:-3]
spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path)
module = importlib.util.module_from_spec(spec)
try:
spec.loader.exec_module(module)
sys.modules[f"tasks.{module_name}"] = module
except Exception as e:
logger.error(f"Failed to import {filename}: {e}")
continue
# if we have a tasks config and a main function, we attempt to schedule it
if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
# ensure task_config is a dict
if not isinstance(module.TASK_CONFIG, dict):
logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
continue
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
task_name = module.TASK_CONFIG.get("name", module_name)
# ensure the task_cron is a valid cron value
try:
CronTrigger.from_crontab(task_cron)
except ValueError as ve:
logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
continue
task = {
'name': module.TASK_CONFIG.get('name', module_name),
'filename': filename,
'cron': task_cron,
"enabled": module.TASK_CONFIG.get("enabled", False),
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
}
tasks.append(task)
# we are missing things, and we log what's missing
else:
if not hasattr(module, 'TASK_CONFIG'):
logger.warning(f"Missing TASK_CONFIG in {filename}")
elif not hasattr(module, 'main'):
logger.warning(f"Missing main() in {filename}")
return tasks
def _add_jobs(self):
# for each task in the tasks config file...
for task_to_run in self.tasks:
# remember, these tasks, are built from the "load_tasks_from_folder" function,
# if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
task_name = task_to_run.get("name")
run_when_loaded = task_to_run.get("run_when_loaded")
module_name = os.path.splitext(task_to_run.get("filename"))[0]
task_enabled = task_to_run.get("enabled", False)
# if no crontab set for this task, we use 15 as the default.
task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON
# if task is disabled, skip this one
if not task_enabled:
logger.info(f"{task_name} is disabled in client config. Skipping task")
continue
try:
if os.path.isfile(os.path.join("tasks", task_to_run.get("filename"))):
# schedule the task now that everything has checked out above...
self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
else:
logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
except Exception as e:
logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
try:
# Dynamically import the module
module = importlib.import_module(f"tasks.{module_name}")
# Check if the module has a 'main' function
if hasattr(module, 'main'):
logger.info(f"Scheduling {task_name} - {module_name} Main Function")
# unique_job_id
job_identifier = f"{module_name}__{task_name}"
# little insurance to make sure the cron is set to something and not none
if task_cron is None:
task_cron = self.TASK_DEFAULT_CRON
trigger = CronTrigger.from_crontab(task_cron)
# schedule the task / job
if run_when_loaded:
logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
self.scheduler.add_job(
module.main,
trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
next_run_time=datetime.datetime.now(),
max_instances=1
)
else:
self.scheduler.add_job(
module.main,
trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
max_instances=1
)
else:
logger.error(f"{module_name} does not define a 'main' function.")
except Exception as e:
logger.error(f"Failed to load {module_name}: {e}")
def job_listener(self, event):
job_id = event.job_id
self.last_run_times[job_id] = datetime.datetime.now()
if event.exception:
logger.error(f"Job {event.job_id} failed: {event.exception}")
else:
logger.info(f"Job {event.job_id} completed successfully.")
def list_jobs(self):
scheduled_jobs = self.scheduler.get_jobs()
jobs_list = []
for job in scheduled_jobs:
jobs_list.append({
"id": job.id,
"name": job.name,
"next_run": job.next_run_time,
})
return jobs_list
def run_scheduled_tasks(self):
"""
Runs and schedules enabled tasks using the background scheduler.
This method performs the following:
1. Retrieves the current task configurations and updates internal state.
2. Adds new jobs to the scheduler based on the latest configuration.
3. Starts the scheduler to begin executing tasks at their defined intervals.
This ensures the scheduler is always running with the most up-to-date
task definitions and enabled status.
"""
# Add enabled tasks to the scheduler
self._add_jobs()
# Start the scheduler to begin executing the scheduled tasks (if not already running)
if not self.scheduler.running:
self.scheduler.start()
# ---------- SINGLETON WRAPPER ----------
T = type
def singleton_loader(func):
"""Decorator to ensure only one instance exists."""
cache: dict[str, T] = {}
lock = threading.Lock()
@functools.wraps(func)
def wrapper(*args, **kwargs) -> T:
with lock:
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
@singleton_loader
def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster:
"""
Returns the singleton TasksMaster instance.
- Automatically creates a BackgroundScheduler if none is provided.
- Automatically starts the scheduler when the singleton is created.
:param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created.
"""
if scheduler is None:
scheduler = BackgroundScheduler()
tm_instance = TasksMaster(scheduler)
# Auto-start scheduler if not already running
if not scheduler.running:
scheduler.start()
logger.info("TasksMaster scheduler started automatically with singleton creation.")
return tm_instance