Files
vulncheck/app/routers/scans.py
T
vulncheck 49b55e0066 fix(scans): unify wazuh scan_type → 'WAZUH' for clear source labelling
Tester noticed the Scan Jobs history showed 'Full' for every scheduled
Wazuh run and 'Syscollector' for every autoscan. Operator can't tell
which scanner produced the row at a glance — both labels are opaque
implementation details, not the source.

Consolidates to canonical source-named ScanType.WAZUH:
  - scheduler.py (was FULL)
  - routers/scans.py autoscan (was SYSCOLLECTOR)
Nessus path already uses ScanType.NESSUS so the table now reads
'wazuh' / 'nessus' / 'manual' — matches what the operator expects.

Migration 019:
  1. ALTER TYPE scantype ADD VALUE IF NOT EXISTS 'WAZUH' (uppercase
     to match SQLAlchemy Enum.name binding used by existing values).
  2. UPDATE scans SET scan_type='WAZUH' WHERE scan_type IN
     ('FULL','SYSCOLLECTOR') — historic rows pick up the cleaner
     label too, so the operator doesn't see mixed history.

Old FULL/SYSCOLLECTOR enum values remain in the type — dropping
Postgres enum values is destructive. They're flagged as legacy in
the Python enum comment and never written by new code.

Frontend display is unchanged — already uses {run.scan_type} with
.capitalize, so 'wazuh' → 'Wazuh' renders automatically.
2026-05-20 08:33:06 +02:00

407 lines
13 KiB
Python

"""
Scan Jobs Management Router
"""
from typing import List, Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.orm import Session
from sqlalchemy import func
from pydantic import BaseModel
from app.database import get_db
from app.models.scan import Scan, ScanType, ScanStatus
from app.models.scan_schedule import ScanSchedule, ScheduleInterval
from app.models.asset import Asset
from app.models.user import User
from app.auth.dependencies import get_current_user, RequireEditor
from app.routers.vulnerabilities import sync_agent_vulnerabilities
router = APIRouter(prefix="/api/v1/scans", tags=["Scans"])
# --- Schemas ---
class ScanResponse(BaseModel):
id: int
asset_id: int
scan_type: ScanType
status: ScanStatus
started_at: Optional[datetime]
completed_at: Optional[datetime]
vulnerabilities_found: int
error_message: Optional[str]
asset_hostname: Optional[str] = None # For display convenience
class Config:
from_attributes = True
class ScheduleCreateRequest(BaseModel):
name: str
interval: ScheduleInterval
cron_expression: Optional[str] = None
scanner_type: str = "wazuh" # "wazuh" | "nessus"
class ScheduleResponse(BaseModel):
id: int
name: str
interval: ScheduleInterval
cron_expression: Optional[str]
scanner_type: str = "wazuh"
enabled: bool
last_run: Optional[datetime]
next_run: Optional[datetime]
created_by: Optional[int]
class Config:
from_attributes = True
# --- Endpoints ---
@router.get("", response_model=List[ScanResponse])
async def list_scans(
status: Optional[ScanStatus] = Query(None),
asset_id: Optional[int] = Query(None),
limit: int = Query(50),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List scans with optional filtering"""
query = db.query(Scan)
if status:
query = query.filter(Scan.status == status)
if asset_id:
query = query.filter(Scan.asset_id == asset_id)
# Order by newest first
scans = query.order_by(Scan.created_at.desc()).limit(limit).all()
# Enrich with asset hostname for UI
results = []
for s in scans:
asset_name = s.asset.hostname if s.asset else "Unknown"
results.append({**s.__dict__, "asset_hostname": asset_name})
return results
class ScanRunSummary(BaseModel):
scan_ids: List[int] # IDs of all scans in this run
started_at: datetime
completed_at: Optional[datetime]
scan_type: ScanType
total_assets: int
completed: int
failed: int
running: int
total_vulns_found: int
failed_assets: List[dict] # [{asset_hostname, error_message}]
class Config:
from_attributes = True
@router.get("/summary", response_model=List[ScanRunSummary])
async def list_scan_summaries(
limit: int = Query(20),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Returns scan runs grouped by time window (scans triggered within 5 seconds = one run).
Shows summary per run instead of individual per-asset rows.
"""
# Get all scans ordered by start time
scans = db.query(Scan).order_by(Scan.started_at.desc()).limit(500).all()
if not scans:
return []
# Group scans into "runs" — scans started within 5 seconds of each other
runs = []
current_run = [scans[0]]
for scan in scans[1:]:
time_diff = abs((current_run[0].started_at - scan.started_at).total_seconds())
if time_diff <= 5 and scan.scan_type == current_run[0].scan_type:
current_run.append(scan)
else:
runs.append(current_run)
current_run = [scan]
runs.append(current_run)
# Build summaries
summaries = []
for run in runs[:limit]:
failed_assets = []
completed_count = 0
failed_count = 0
running_count = 0
total_vulns = 0
for s in run:
if s.status == ScanStatus.COMPLETED:
completed_count += 1
total_vulns += s.vulnerabilities_found or 0
elif s.status == ScanStatus.FAILED:
failed_count += 1
hostname = s.asset.hostname if s.asset else f"Asset #{s.asset_id}"
failed_assets.append({
"asset_hostname": hostname,
"error_message": s.error_message or "Unknown error"
})
elif s.status == ScanStatus.RUNNING:
running_count += 1
completed_ats = [s.completed_at for s in run if s.completed_at]
summaries.append(ScanRunSummary(
scan_ids=[s.id for s in run],
started_at=min(s.started_at for s in run),
completed_at=max(completed_ats) if completed_ats else None,
scan_type=run[0].scan_type,
total_assets=len(run),
completed=completed_count,
failed=failed_count,
running=running_count,
total_vulns_found=total_vulns,
failed_assets=failed_assets,
))
return summaries
class ScanDeleteRequest(BaseModel):
scan_ids: Optional[List[int]] = None # Delete specific scans by ID
status_filter: Optional[str] = None # Delete by status: "failed", "completed", etc.
all: bool = False # Delete all scans
@router.post("/clear", response_model=dict)
async def clear_scans(
body: ScanDeleteRequest,
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""Delete scan history by IDs, status, or all."""
query = db.query(Scan)
if body.scan_ids:
query = query.filter(Scan.id.in_(body.scan_ids))
elif body.status_filter:
try:
status_enum = ScanStatus(body.status_filter.lower())
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid status: {body.status_filter}")
query = query.filter(Scan.status == status_enum)
elif not body.all:
raise HTTPException(status_code=400, detail="Provide scan_ids, status_filter, or set all=true")
count = query.delete(synchronize_session=False)
db.commit()
return {"deleted": count}
# --- Autoscan Endpoint ---
import json
from app.integrations.wazuh_client import WazuhClient, WazuhAPIError
from app.models.setting import Setting
@router.post("/autoscan", response_model=dict)
async def trigger_autoscan(
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""
Trigger automated scans for ALL assets connected to Wazuh.
"""
# 1. Get Wazuh Configuration from DB (transparently decrypted)
from app.auth.setting_crypto import read_setting_value
raw_wazuh = read_setting_value(db, "wazuh_config")
if not raw_wazuh:
raise HTTPException(status_code=400, detail="Wazuh configuration not found. Please configure settings first.")
try:
config = json.loads(raw_wazuh)
api_url = config.get("api_url")
username = config.get("username")
password = config.get("password")
except json.JSONDecodeError:
raise HTTPException(status_code=500, detail="Invalid Wazuh configuration format.")
if not all([api_url, username, password]):
raise HTTPException(status_code=400, detail="Incomplete Wazuh configuration.")
indexer_url = config.get("indexer_url")
indexer_username = config.get("indexer_username")
indexer_password = config.get("indexer_password")
# 2. Find eligible assets
assets = db.query(Asset).filter(Asset.wazuh_agent_id.isnot(None)).all()
if not assets:
return {"message": "No assets found with linked Wazuh Agent IDs.", "scans_triggered": 0}
# 3. Initialize Wazuh Client
triggered_count = 0
errors = []
try:
with WazuhClient(
base_url=api_url,
username=username,
password=password,
indexer_url=indexer_url,
indexer_username=indexer_username,
indexer_password=indexer_password,
verify_ssl=config.get("verify_ssl", False)
) as client:
for asset in assets:
try:
# Create Scan Record
scan = Scan(
asset_id=asset.id,
scan_type=ScanType.WAZUH, # source-labelled (was SYSCOLLECTOR)
status=ScanStatus.RUNNING,
started_at=datetime.now()
)
db.add(scan)
# Sync vulnerabilities from Wazuh
sync_agent_vulnerabilities(db, client, asset.wazuh_agent_id, asset)
scan.status = ScanStatus.COMPLETED
scan.completed_at = datetime.now()
triggered_count += 1
except Exception as e:
scan.status = ScanStatus.FAILED
scan.completed_at = datetime.now()
scan.error_message = str(e)
errors.append(f"{asset.hostname}: {str(e)}")
db.commit()
except Exception as e:
# Mark all pending scans as FAILED since Wazuh connection failed
for scan_obj in db.new:
if isinstance(scan_obj, Scan) and scan_obj.status == ScanStatus.RUNNING:
scan_obj.status = ScanStatus.FAILED
scan_obj.error_message = f"Wazuh connection failed: {str(e)}"
scan_obj.completed_at = datetime.now()
db.commit()
raise HTTPException(status_code=500, detail="Failed to connect to Wazuh. Check server logs for details.")
return {
"message": f"Triggered scans for {triggered_count} assets.",
"scans_triggered": triggered_count,
"errors": errors
}
# --- Schedule Endpoints ---
INTERVAL_DELTA_MAP = {
ScheduleInterval.EVERY_HOUR: timedelta(hours=1),
ScheduleInterval.EVERY_6_HOURS: timedelta(hours=6),
ScheduleInterval.EVERY_12_HOURS: timedelta(hours=12),
ScheduleInterval.DAILY: timedelta(days=1),
ScheduleInterval.WEEKLY: timedelta(weeks=1),
}
@router.get("/schedules", response_model=List[ScheduleResponse])
async def list_schedules(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List of all scan schedules"""
schedules = db.query(ScanSchedule).order_by(ScanSchedule.created_at.desc()).all()
return schedules
@router.post("/schedules", response_model=ScheduleResponse, status_code=status.HTTP_201_CREATED)
async def create_schedule(
data: ScheduleCreateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""Creates a new automated scan schedule"""
schedule = ScanSchedule(
name=data.name,
interval=data.interval,
scanner_type=data.scanner_type or "wazuh",
enabled=True,
created_by=current_user.id,
cron_expression=data.cron_expression,
next_run=datetime.now() + INTERVAL_DELTA_MAP.get(data.interval, timedelta(days=1))
)
db.add(schedule)
db.commit()
db.refresh(schedule)
return schedule
@router.put("/schedules/{schedule_id}", response_model=ScheduleResponse)
async def update_schedule(
schedule_id: int,
data: ScheduleCreateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""Updates an existing scan schedule"""
schedule = db.query(ScanSchedule).filter(ScanSchedule.id == schedule_id).first()
if not schedule:
raise HTTPException(status_code=404, detail="Schedule not found")
schedule.name = data.name
schedule.interval = data.interval
schedule.cron_expression = data.cron_expression
schedule.scanner_type = data.scanner_type or "wazuh"
# Recalculate next_run
if schedule.enabled:
if schedule.interval == ScheduleInterval.CRON and schedule.cron_expression:
# Basic calc or reset to pick up by scheduler sync
schedule.next_run = None
else:
schedule.next_run = datetime.now() + INTERVAL_DELTA_MAP.get(data.interval, timedelta(days=1))
db.commit()
db.refresh(schedule)
return schedule
@router.patch("/schedules/{schedule_id}", response_model=ScheduleResponse)
async def toggle_schedule(
schedule_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""Activates/Deactivates a scan schedule"""
schedule = db.query(ScanSchedule).filter(ScanSchedule.id == schedule_id).first()
if not schedule:
raise HTTPException(status_code=404, detail="Schedule not found")
schedule.enabled = not schedule.enabled
if schedule.enabled:
if schedule.interval == ScheduleInterval.CRON and schedule.cron_expression:
schedule.next_run = None # Picked up by sync
else:
schedule.next_run = datetime.now() + INTERVAL_DELTA_MAP.get(schedule.interval, timedelta(days=1))
else:
schedule.next_run = None
db.commit()
db.refresh(schedule)
return schedule
@router.delete("/schedules/{schedule_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_schedule(
schedule_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(RequireEditor)
):
"""Deletes a scan schedule"""
schedule = db.query(ScanSchedule).filter(ScanSchedule.id == schedule_id).first()
if not schedule:
raise HTTPException(status_code=404, detail="Schedule not found")
db.delete(schedule)
db.commit()