import os import json import logging from datetime import datetime, timedelta # Note - several things in this file are tied to Jira or other utils that are not included in this file due to proprietary info in those files. # Configure logging once, this is just a basic logger - implement your own and adjust as needed. logging.basicConfig( level=logging.INFO, # or DEBUG for development format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) logger = logging.getLogger(__file__) # External Imports try: import jq import requests from requests.auth import HTTPBasicAuth from requests.packages.urllib3.exceptions import InsecureRequestWarning # type: ignore requests.packages.urllib3.disable_warnings(InsecureRequestWarning) except ImportError as e: print(f"Unable to load external libraries - {e}") print("Run pip install -r requirements.txt") exit(1) class wazuh_dsl_query: def __init__(self, query_start_time_iso:str, query_end_time_iso:str): self.size = 1 self.aggs = {} self.filters = [ {"match_all": {}} ] self.excludes = [] self.add_filter_range("timestamp",query_start_time_iso,query_end_time_iso) pass def set_size(self,new_size:int): self.size = new_size def add_agg_count(self, agg_name:str, agg_field_name:str): agg = { agg_name:{ "value_count":{ "field":agg_field_name } } } self.aggs.update(agg) def add_agg_count_group(self, agg_name:str, agg_field_name:str, max_size=20): agg = { agg_name:{ "terms":{ "field":agg_field_name, "size": max_size } } } self.aggs.update(agg) def add_filter_range(self, field_name:str, greater_than, less_than, timestamp=False): if timestamp: self.filters.append({"range": { field_name: { "gte": greater_than, "lte": less_than, "format": "strict_date_optional_time" } }}) else: self.filters.append({"range": { field_name: { "gte": greater_than, "lte": less_than, } }}) def add_filter_exact_match(self,field_name:str, field_value:str): self.filters.append({ "match_phrase": { field_name: field_value } }) def add_filter_value_in_field(self,field_name:str,field_values_list:list): should_list = [] for field_value in field_values_list: entry = {"match_phrase": {field_name: field_value}} should_list.append(entry) self.filters.append({ "bool": { "should":should_list, "minimum_should_match": 1 }}) def add_filter_field_must_exist(self,field_name:str): self.filters.append({ "exists": { "field": field_name } }) def add_filter_wildcard_match(self,value:str): self.filters.append({ "multi_match": { "type": "best_fields", "query": value, "lenient": True }}) def add_filter_exclude(self,field_name:str,value:str): e = { "match_phrase": { field_name: value } } self.excludes.append(e) def get_query(self,exclude_size=True): query = {} query.update({"filter":self.filters}) if len(self.excludes) > 0: query.update({"must_not": self.excludes}) r = { "query": {"bool":query} } if not exclude_size: r.update({"size":self.size}) # if we have aggrigates if len(self.aggs) > 0: r.update({"aggs": self.aggs}) # if we have excludes return r class wazuh_api: ALERT_LOWER=12 ALERT_HIGHEST=16 def __init__(self, indexer_url:str, analyst_url:str, admin_api_url:str, user:str, password:str, admin_username:str, admin_password:str,snapshot_repo: str = "AWS-S3"): self.base_url = indexer_url self.security_url = admin_api_url self.analyst_url = analyst_url self.user = user self.password = password self.admin_username = admin_username self.admin_password = admin_password self.snapshot_repo = snapshot_repo self.headers = { "Accept": "application/json", "Content-Type": "application/json" } pass def settings_valid(self): if (self.base_url is None) or (self.user is None) or (self.password is None): logger.critical("API settings are not valid. Unable to run Ingestion. Please check config.") return False else: return True def get_time_around_wazuh_alert_time(self, wazuh_alert_time:str, minutes_around=2): # Convert the date string to a datetime object original_date = datetime.strptime(wazuh_alert_time, '%Y-%m-%dT%H:%M:%S.%fZ') original_date = original_date.replace(second=0, microsecond=0) # Calculate the dates x minutes before and after the original date date_before = original_date - timedelta(minutes=minutes_around) date_after = original_date + timedelta(minutes=minutes_around) # Return the results as strings in the same format as the input return date_before.strftime('%Y-%m-%dT%H:%M:%S.%fZ'), date_after.strftime('%Y-%m-%dT%H:%M:%S.%fZ') def search_logs_related_to_alert(self, timestamp:str,minutes_around:int, filter_label:str, filter_value:str): start,end = self.get_time_around_wazuh_alert_time(timestamp,minutes_around) q = wazuh_dsl_query(start,end) q.set_size(50) q.add_filter_exact_match(filter_label,filter_value) query = q.get_query(exclude_size=False) return self.do_wazuh_search(query) def do_wazuh_search(self, query:dict, index="wazuh-alerts-*",hits_only=True): try: search_url = f"{self.base_url}/{index}/_search" logger.info(f"Searching Wazuh - {search_url}",extra={"query":query}) resp = self.__get_json_payload_request(search_url,query) hits = resp.get("hits",{}).get("hits",[]) if hits_only: return hits else: return resp except Exception as e: logger.error(f"Error while searching Wazuh! - {e}", extra={"query": query, "url":search_url}) return [] def do_wazuh_sql_query(self,query:str): logger.info(f"Executing SQL Search in Wazuh",extra={"query":query}) try: sql_url = f"{self.base_url}/_plugins/_sql?format=json" payload = {"query":query} resp = self._post_json_request(sql_url,payload) return resp except Exception as e: logger.error(f"Error while doing SQL Post to Wazuh! - {e}", extra={"query": query, "url":sql_url}) return [] def check_snapshot_exists(self, date:str): url = f"{self.base_url}/_snapshot/{self.snapshot_repo}/{date}" try: resp = self._get_param_request(url,None) snapshots = resp.get("snapshots",[]) if len(snapshots) > 0: state = snapshots[0].get("state") return state return state except Exception as e: return False def do_snapshot(self, date:str): url = f"{self.base_url}/_snapshot/AWS-S3/{date}" payload = { "indices": "wazuh-alerts-*" } try: resp = self._post_json_request(url,payload) return True except Exception as e: return False def get_count_for_wazuh_query(self,query:dict, index="wazuh-alerts-*"): try: search_url = f"{self.base_url}/{index}/_count" logger.info(f"Getting Count From Wazuah - {search_url}",extra={"query":query}) resp = self.__get_json_payload_request(search_url,query) return resp except Exception as e: logger.error(f"Error while searching Wazuh! - {e}", extra={"query": query, "url":search_url}) return [] def get_token(self): url = f"{self.security_url}/security/user/authenticate" payload = None resp = self._post_json_request(url,payload,True) token = resp.get("data",{}).get("token") if token is not None: logger.info("Successfully generated auth token") else: logger.critical(f"Unable to get token for request to url",extra={"url":url,"payload":payload}) return token def get_agent_overview(self): url = f"{self.security_url}/overview/agents" parms = {} resp = self.__get_json_request_w_token(url,parms) data = resp.get("data") return data def get_alerts_count_for_time_range(self,start_date_iso:str,end_date_iso:str): q = wazuh_dsl_query(start_date_iso,end_date_iso) query = q.get_query(exclude_size=True) return self.get_count_for_wazuh_query(query) def parse_aggs_return(self,aggs:dict) -> dict: buckets = {} if not isinstance(aggs,dict): return {} for group_name,dict_values in aggs.items(): items = [] for entry in dict_values.get("buckets",[]): items.append({entry.get("key"):entry.get("doc_count")}) buckets.update({group_name:items}) return buckets def format_cve_list(self, entry): # Step 1: Extract the list of CVEs from the data buckets = entry.get('data.vulnerability.cve', {}).get('buckets', []) cve_items = [] for bucket in buckets: key = bucket.get('key') if key: cve_items.append(key) # Step 2: Format the list into a string that looks like a tuple if cve_items: quoted_items = ['"{}"'.format(cve) for cve in cve_items] cve_string = ', '.join(quoted_items) return f'({cve_string})' else: return '' def get_vuln_report_data_v2(self,start_date_iso:str, end_date_iso:str, limit=False): query = f"""SELECT agent.name FROM wazuh-alerts-* WHERE timestamp >= '{start_date_iso}' AND timestamp <= '{end_date_iso}' AND location = 'vulnerability-detector' AND data.vulnerability.status = 'Active' GROUP BY agent.name, data.vulnerability.cve HAVING SUM(CASE WHEN data.vulnerability.status = 'Solved' THEN 1 ELSE 0 END) = 0""" resp = self.do_wazuh_sql_query(query) agent_groups = resp.get("aggregations",{}).get("agent.name",{}).get("buckets",{}) agents_with_vulns = [] if len(agent_groups) > 0: x=0 for entry in agent_groups: x = x+1 if ( limit and (x >=6) ): return agents_with_vulns agent_name = entry['key'] cve_list = self.format_cve_list(entry) query = f"""SELECT d.agent.name, d.agent.ip, d.data.vulnerability.package.name, d.data.vulnerability.package.version, d.data.vulnerability.severity, d.data.vulnerability.cve, d.data.vulnerability.cvss.cvss3.base_score, d.data.vulnerability.rationale, d.data.vulnerability.package.condition, d.data.vulnerability.references, d.data.vulnerability.updated FROM wazuh-alerts-* d WHERE agent.name = '{agent_name}' AND timestamp >= '{start_date_iso}' AND timestamp <= '{end_date_iso}' AND data.vulnerability.cve IN {cve_list}""" resp = self.do_wazuh_sql_query(query) data = resp.get("hits",{}).get("hits",{}) parsed_data = self.__parse_vuln_record(data) agents_with_vulns.extend(parsed_data) return agents_with_vulns def __parse_vuln_record(self,vuln_record:list): jq_script = """ .[] | { "Machine Name": ._source.agent.name, "IP": ._source.agent.ip, "Application": ._source.data.vulnerability.package.name, "App Version": ._source.data.vulnerability.package.version, "Severity": ._source.data.vulnerability.severity, "CVE ID": ._source.data.vulnerability.cve, "CVE Score": ._source.data.vulnerability.cvss.cvss3.base_score, "CVE Summary": ._source.data.vulnerability.rationale, "CVE Condition": ._source.data.vulnerability.package.condition, "CVE References": (._source.data.vulnerability.references // []) | join(", "), "CVE Updated Date": ._source.data.vulnerability.updated } """ parsed_doc = jq.compile(jq_script).input_value(vuln_record).all() return parsed_doc def get_top_agents_with_alerts(self,start_date_iso:str,end_date_iso:str): q = wazuh_dsl_query(start_date_iso,end_date_iso) q.set_size(0) q.add_filter_range("rule.level",self.ALERT_LOWER,self.ALERT_HIGHEST) q.add_filter_field_must_exist("rule.level") q.add_filter_exclude("agent.id","000") q.add_agg_count_group("agents","agent.name",10) query = q.get_query(exclude_size=False) # do a search (we are not interested in hits, and the size was 0 any, so no records were returned) resp = self.do_wazuh_search(query,hits_only=False) # get the aggregations and parse them aggs = resp.get("aggregations",{}) bucket_data = self.parse_aggs_return(aggs) return bucket_data def get_mitre_attack_data(self,start_date_iso:str,end_date_iso:str): # build a query that searches for alerts, grouping by mitre tactic and technique q = wazuh_dsl_query(start_date_iso,end_date_iso) q.set_size(0) q.add_filter_range("rule.level",self.ALERT_LOWER,self.ALERT_HIGHEST) q.add_filter_field_must_exist("rule.mitre.tactic") q.add_agg_count_group("mitre_tactics","rule.mitre.tactic",10) q.add_agg_count_group("mitre_tech","rule.mitre.technique",10) query = q.get_query(exclude_size=False) # do a search (we are not interested in hits, and the size was 0 any, so no records were returned) resp = self.do_wazuh_search(query,hits_only=False) # get the aggregations and parse them aggs = resp.get("aggregations",{}) bucket_data = self.parse_aggs_return(aggs) return bucket_data def get_alert_by_id(self,alert_id:str,date="2025-02-11"): query = { "size": 1, "query": { "bool": { "must": [], "filter": [ { "match_all": {} }, { "match_phrase": { "_id": f"{alert_id}" } }, { "range": { "timestamp": { "gte": f"{date}T00:00:00.000Z", "lte": f"{date}T23:23:59.999Z", "format": "strict_date_optional_time" } } } ], "should": [], "must_not": [] } } } alert = self.do_wazuh_search(query) return alert def get_alerts(self, index="wazuh-alerts-*", min_alert_level:int=12, max_alert_level:int=16,max_alerts:int=1000,minutes_back:int=60): query = { "size": max_alerts, "query": { "bool": { "must": [], "filter": [ { "bool": { "should": [ { "range": { "rule.level": { "gte": min_alert_level, "lte": max_alert_level } } } ], "minimum_should_match": 1 } }, { "range": { "timestamp": { "gte": f"now-{minutes_back}m", "lte": "now", "format": "strict_date_optional_time" } } } ], "should": [], "must_not": [] } } } return self.do_wazuh_search(query,index) def get_integration_type(self, record:dict): record_integration = record.get("_source",{}).get("data",{}).get("integration").replace("-","_") return record_integration def handle_default_wazuh(self, alert_record:dict) -> dict: # print(json.dumps(alert_record,indent=True)) # exit() jq_script = """ { "timestamp":._source.timestamp, "human_date": (._source.timestamp | split("T")[0]), "human_time": (._source.timestamp | split("T")[1] | split(".")[0]), "priority":._source.rule.level, "summary":._source.rule.description, "mitre_tactics":(._source.rule.mitre.tactic // []), "mitre_techniques":(._source.rule.mitre.technique // []), "rule_id":._source.rule.id, "rule_fired_times":(._source.rule.firedtimes // 1), "integration":(._source.data.integration // "default"), "source_user":(._source.data.srcuser // ""), "source_ip":(._source.data.srcip // ""), } """ jq_script = "{"+jq_script+"}" try: r = jq.compile(jq_script).input_value(alert_record).first() return r except Exception as e: logger.error(f"Unable to parse native Wazuh alert",extra={"alert":alert_record}) return {} def handle_json(self,alert_record:dict) -> dict: integration_type = self.get_integration_type(alert_record) parser = f"parse_{integration_type}_integration" try: if callable(getattr(self,parser)): parsed_data = getattr(self, parser)(alert_record) return parsed_data except AttributeError as e: logger.error(f"No parser found for integration {integration_type}, create one at {parser}") return {} def parse_default_integration(self, alert_record: dict) -> dict: """Fallback parser that just uses the standard handler.""" return self.handle_default_wazuh(alert_record) def _get_param_request(self,url,params): try: if params is None: response = requests.request("GET",url,headers=self.headers,auth=(self.user, self.password), verify=False, timeout=10) else: response = requests.request("GET",url,headers=self.headers,params=params,auth=(self.user, self.password), verify=False, timeout=10) if not response.ok: logger.critical("Error doing Get Request",extra={"api_response":response.text,"status_code":response.status_code,"params":params,"url":url}) return {} else: return response.json() except Exception as error: logger.critical(f"Get Call Not sent {params} - error:{error}") return {} def __get_json_payload_request(self,url, payload): try: if payload is None: response = requests.request("GET",url,headers=self.headers,auth=(self.user, self.password), verify=False, timeout=10) else: response = requests.request("GET",url,headers=self.headers,json=payload,auth=(self.user, self.password), verify=False, timeout=10) if not response.ok: logger.critical("Error doing Get Request",extra={"api_response":response.text,"status_code":response.status_code,"payload":payload,"url":url}) return {} else: return response.json() except Exception as error: logger.critical(f"Get Call Not sent {url} - error:{error}",extra={"payload":payload}) return {} def _post_json_request(self, url, payload,use_admin_creds=False): try: if use_admin_creds: response = requests.post(url, json=payload, headers=self.headers, auth=(self.admin_username, self.admin_password), verify=False, timeout=10) else: response = requests.post(url, json=payload, headers=self.headers, auth=(self.user, self.password), verify=False, timeout=10) if not response.ok: logger.critical("Error posting Request",extra={"api_response":response.text,"status_code":response.status_code,"payload":payload,"url":url}) return {} else: return response.json() except Exception as error: logger.critical(f"Payload not delivered {payload} - error:{error}",extra={"payload":payload,"url":url}) return {} def __post_json_request_w_token(self, url, payload): try: auth_token = self.get_token() if auth_token is None: return {} auth_headers = self.headers auth_headers.update({"Authorization":f"Bearer {auth_token}"}) response = requests.post(url, json=payload, headers=auth_headers, verify=False, timeout=10) if not response.ok: logger.critical("Error posting Request",extra={"api_response":response.text,"status_code":response.status_code,"payload":payload,"url":url}) return {} else: return response.json() except Exception as error: logger.critical(f"Payload not delivered {payload} - error:{error}",extra={"payload":payload,"url":url}) return {} def __get_json_request_w_token(self, url, params:dict): try: auth_token = self.get_token() if auth_token is None: return {} auth_headers = self.headers auth_headers.update({"Authorization":f"Bearer {auth_token}"}) response = requests.get(url, params=params, headers=auth_headers, verify=False, timeout=10) if not response.ok: logger.critical("Error posting Request",extra={"api_response":response.text,"status_code":response.status_code,"params":params,"url":url}) return {} else: return response.json() except Exception as error: logger.critical(f"Unable to get URL {url} - error:{error}",extra={"params":params,"url":url}) return {}