Add operation metrics with logging integration in metrics UI
This commit is contained in:
@@ -16,6 +16,7 @@ from flask_wtf.csrf import CSRFError
|
|||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
|
|
||||||
from .access_logging import AccessLoggingService
|
from .access_logging import AccessLoggingService
|
||||||
|
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
||||||
from .compression import GzipMiddleware
|
from .compression import GzipMiddleware
|
||||||
from .acl import AclService
|
from .acl import AclService
|
||||||
from .bucket_policies import BucketPolicyStore
|
from .bucket_policies import BucketPolicyStore
|
||||||
@@ -187,6 +188,15 @@ def create_app(
|
|||||||
app.extensions["notifications"] = notification_service
|
app.extensions["notifications"] = notification_service
|
||||||
app.extensions["access_logging"] = access_logging_service
|
app.extensions["access_logging"] = access_logging_service
|
||||||
|
|
||||||
|
operation_metrics_collector = None
|
||||||
|
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
||||||
|
operation_metrics_collector = OperationMetricsCollector(
|
||||||
|
storage_root,
|
||||||
|
interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
|
||||||
|
retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
|
||||||
|
)
|
||||||
|
app.extensions["operation_metrics"] = operation_metrics_collector
|
||||||
|
|
||||||
@app.errorhandler(500)
|
@app.errorhandler(500)
|
||||||
def internal_error(error):
|
def internal_error(error):
|
||||||
return render_template('500.html'), 500
|
return render_template('500.html'), 500
|
||||||
@@ -356,6 +366,7 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
def _log_request_start() -> None:
|
def _log_request_start() -> None:
|
||||||
g.request_id = uuid.uuid4().hex
|
g.request_id = uuid.uuid4().hex
|
||||||
g.request_started_at = time.perf_counter()
|
g.request_started_at = time.perf_counter()
|
||||||
|
g.request_bytes_in = request.content_length or 0
|
||||||
app.logger.info(
|
app.logger.info(
|
||||||
"Request started",
|
"Request started",
|
||||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||||
@@ -377,4 +388,21 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||||
|
|
||||||
|
operation_metrics = app.extensions.get("operation_metrics")
|
||||||
|
if operation_metrics:
|
||||||
|
bytes_in = getattr(g, "request_bytes_in", 0)
|
||||||
|
bytes_out = response.content_length or 0
|
||||||
|
error_code = getattr(g, "s3_error_code", None)
|
||||||
|
endpoint_type = classify_endpoint(request.path)
|
||||||
|
operation_metrics.record_request(
|
||||||
|
method=request.method,
|
||||||
|
endpoint_type=endpoint_type,
|
||||||
|
status_code=response.status_code,
|
||||||
|
latency_ms=duration_ms,
|
||||||
|
bytes_in=bytes_in,
|
||||||
|
bytes_out=bytes_out,
|
||||||
|
error_code=error_code,
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|||||||
@@ -87,6 +87,9 @@ class AppConfig:
|
|||||||
metrics_history_enabled: bool
|
metrics_history_enabled: bool
|
||||||
metrics_history_retention_hours: int
|
metrics_history_retention_hours: int
|
||||||
metrics_history_interval_minutes: int
|
metrics_history_interval_minutes: int
|
||||||
|
operation_metrics_enabled: bool
|
||||||
|
operation_metrics_interval_minutes: int
|
||||||
|
operation_metrics_retention_hours: int
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||||
@@ -186,6 +189,9 @@ class AppConfig:
|
|||||||
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
|
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
|
||||||
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
|
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
|
||||||
|
operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
|
||||||
|
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
|
||||||
|
|
||||||
return cls(storage_root=storage_root,
|
return cls(storage_root=storage_root,
|
||||||
max_upload_size=max_upload_size,
|
max_upload_size=max_upload_size,
|
||||||
@@ -227,7 +233,10 @@ class AppConfig:
|
|||||||
lifecycle_interval_seconds=lifecycle_interval_seconds,
|
lifecycle_interval_seconds=lifecycle_interval_seconds,
|
||||||
metrics_history_enabled=metrics_history_enabled,
|
metrics_history_enabled=metrics_history_enabled,
|
||||||
metrics_history_retention_hours=metrics_history_retention_hours,
|
metrics_history_retention_hours=metrics_history_retention_hours,
|
||||||
metrics_history_interval_minutes=metrics_history_interval_minutes)
|
metrics_history_interval_minutes=metrics_history_interval_minutes,
|
||||||
|
operation_metrics_enabled=operation_metrics_enabled,
|
||||||
|
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
|
||||||
|
operation_metrics_retention_hours=operation_metrics_retention_hours)
|
||||||
|
|
||||||
def validate_and_report(self) -> list[str]:
|
def validate_and_report(self) -> list[str]:
|
||||||
"""Validate configuration and return a list of warnings/issues.
|
"""Validate configuration and return a list of warnings/issues.
|
||||||
@@ -359,4 +368,7 @@ class AppConfig:
|
|||||||
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
|
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
|
||||||
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
|
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
|
||||||
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
|
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
|
||||||
|
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
|
||||||
|
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
|
||||||
|
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
|
||||||
}
|
}
|
||||||
|
|||||||
271
app/operation_metrics.py
Normal file
271
app/operation_metrics.py
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OperationStats:
|
||||||
|
count: int = 0
|
||||||
|
success_count: int = 0
|
||||||
|
error_count: int = 0
|
||||||
|
latency_sum_ms: float = 0.0
|
||||||
|
latency_min_ms: float = float("inf")
|
||||||
|
latency_max_ms: float = 0.0
|
||||||
|
bytes_in: int = 0
|
||||||
|
bytes_out: int = 0
|
||||||
|
|
||||||
|
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
||||||
|
self.count += 1
|
||||||
|
if success:
|
||||||
|
self.success_count += 1
|
||||||
|
else:
|
||||||
|
self.error_count += 1
|
||||||
|
self.latency_sum_ms += latency_ms
|
||||||
|
if latency_ms < self.latency_min_ms:
|
||||||
|
self.latency_min_ms = latency_ms
|
||||||
|
if latency_ms > self.latency_max_ms:
|
||||||
|
self.latency_max_ms = latency_ms
|
||||||
|
self.bytes_in += bytes_in
|
||||||
|
self.bytes_out += bytes_out
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
||||||
|
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
||||||
|
return {
|
||||||
|
"count": self.count,
|
||||||
|
"success_count": self.success_count,
|
||||||
|
"error_count": self.error_count,
|
||||||
|
"latency_avg_ms": round(avg_latency, 2),
|
||||||
|
"latency_min_ms": round(min_latency, 2),
|
||||||
|
"latency_max_ms": round(self.latency_max_ms, 2),
|
||||||
|
"bytes_in": self.bytes_in,
|
||||||
|
"bytes_out": self.bytes_out,
|
||||||
|
}
|
||||||
|
|
||||||
|
def merge(self, other: "OperationStats") -> None:
|
||||||
|
self.count += other.count
|
||||||
|
self.success_count += other.success_count
|
||||||
|
self.error_count += other.error_count
|
||||||
|
self.latency_sum_ms += other.latency_sum_ms
|
||||||
|
if other.latency_min_ms < self.latency_min_ms:
|
||||||
|
self.latency_min_ms = other.latency_min_ms
|
||||||
|
if other.latency_max_ms > self.latency_max_ms:
|
||||||
|
self.latency_max_ms = other.latency_max_ms
|
||||||
|
self.bytes_in += other.bytes_in
|
||||||
|
self.bytes_out += other.bytes_out
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MetricsSnapshot:
|
||||||
|
timestamp: datetime
|
||||||
|
window_seconds: int
|
||||||
|
by_method: Dict[str, Dict[str, Any]]
|
||||||
|
by_endpoint: Dict[str, Dict[str, Any]]
|
||||||
|
by_status_class: Dict[str, int]
|
||||||
|
error_codes: Dict[str, int]
|
||||||
|
totals: Dict[str, Any]
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
"window_seconds": self.window_seconds,
|
||||||
|
"by_method": self.by_method,
|
||||||
|
"by_endpoint": self.by_endpoint,
|
||||||
|
"by_status_class": self.by_status_class,
|
||||||
|
"error_codes": self.error_codes,
|
||||||
|
"totals": self.totals,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
|
||||||
|
return cls(
|
||||||
|
timestamp=datetime.fromisoformat(data["timestamp"]),
|
||||||
|
window_seconds=data.get("window_seconds", 300),
|
||||||
|
by_method=data.get("by_method", {}),
|
||||||
|
by_endpoint=data.get("by_endpoint", {}),
|
||||||
|
by_status_class=data.get("by_status_class", {}),
|
||||||
|
error_codes=data.get("error_codes", {}),
|
||||||
|
totals=data.get("totals", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OperationMetricsCollector:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
storage_root: Path,
|
||||||
|
interval_minutes: int = 5,
|
||||||
|
retention_hours: int = 24,
|
||||||
|
):
|
||||||
|
self.storage_root = storage_root
|
||||||
|
self.interval_seconds = interval_minutes * 60
|
||||||
|
self.retention_hours = retention_hours
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._by_method: Dict[str, OperationStats] = {}
|
||||||
|
self._by_endpoint: Dict[str, OperationStats] = {}
|
||||||
|
self._by_status_class: Dict[str, int] = {}
|
||||||
|
self._error_codes: Dict[str, int] = {}
|
||||||
|
self._totals = OperationStats()
|
||||||
|
self._window_start = time.time()
|
||||||
|
self._shutdown = threading.Event()
|
||||||
|
self._snapshots: List[MetricsSnapshot] = []
|
||||||
|
|
||||||
|
self._load_history()
|
||||||
|
|
||||||
|
self._snapshot_thread = threading.Thread(
|
||||||
|
target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
|
||||||
|
)
|
||||||
|
self._snapshot_thread.start()
|
||||||
|
|
||||||
|
def _config_path(self) -> Path:
|
||||||
|
return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
|
||||||
|
|
||||||
|
def _load_history(self) -> None:
|
||||||
|
config_path = self._config_path()
|
||||||
|
if not config_path.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||||
|
snapshots_data = data.get("snapshots", [])
|
||||||
|
self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
|
||||||
|
self._prune_old_snapshots()
|
||||||
|
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||||
|
logger.warning(f"Failed to load operation metrics history: {e}")
|
||||||
|
|
||||||
|
def _save_history(self) -> None:
|
||||||
|
config_path = self._config_path()
|
||||||
|
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
data = {"snapshots": [s.to_dict() for s in self._snapshots]}
|
||||||
|
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"Failed to save operation metrics history: {e}")
|
||||||
|
|
||||||
|
def _prune_old_snapshots(self) -> None:
|
||||||
|
if not self._snapshots:
|
||||||
|
return
|
||||||
|
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
||||||
|
self._snapshots = [
|
||||||
|
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
||||||
|
]
|
||||||
|
|
||||||
|
def _snapshot_loop(self) -> None:
|
||||||
|
while not self._shutdown.is_set():
|
||||||
|
self._shutdown.wait(timeout=self.interval_seconds)
|
||||||
|
if not self._shutdown.is_set():
|
||||||
|
self._take_snapshot()
|
||||||
|
|
||||||
|
def _take_snapshot(self) -> None:
|
||||||
|
with self._lock:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
window_seconds = int(time.time() - self._window_start)
|
||||||
|
|
||||||
|
snapshot = MetricsSnapshot(
|
||||||
|
timestamp=now,
|
||||||
|
window_seconds=window_seconds,
|
||||||
|
by_method={k: v.to_dict() for k, v in self._by_method.items()},
|
||||||
|
by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||||
|
by_status_class=dict(self._by_status_class),
|
||||||
|
error_codes=dict(self._error_codes),
|
||||||
|
totals=self._totals.to_dict(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self._snapshots.append(snapshot)
|
||||||
|
self._prune_old_snapshots()
|
||||||
|
self._save_history()
|
||||||
|
|
||||||
|
self._by_method.clear()
|
||||||
|
self._by_endpoint.clear()
|
||||||
|
self._by_status_class.clear()
|
||||||
|
self._error_codes.clear()
|
||||||
|
self._totals = OperationStats()
|
||||||
|
self._window_start = time.time()
|
||||||
|
|
||||||
|
def record_request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
endpoint_type: str,
|
||||||
|
status_code: int,
|
||||||
|
latency_ms: float,
|
||||||
|
bytes_in: int = 0,
|
||||||
|
bytes_out: int = 0,
|
||||||
|
error_code: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
success = 200 <= status_code < 400
|
||||||
|
status_class = f"{status_code // 100}xx"
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if method not in self._by_method:
|
||||||
|
self._by_method[method] = OperationStats()
|
||||||
|
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
||||||
|
|
||||||
|
if endpoint_type not in self._by_endpoint:
|
||||||
|
self._by_endpoint[endpoint_type] = OperationStats()
|
||||||
|
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
||||||
|
|
||||||
|
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
||||||
|
|
||||||
|
if error_code:
|
||||||
|
self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
|
||||||
|
|
||||||
|
self._totals.record(latency_ms, success, bytes_in, bytes_out)
|
||||||
|
|
||||||
|
def get_current_stats(self) -> Dict[str, Any]:
|
||||||
|
with self._lock:
|
||||||
|
window_seconds = int(time.time() - self._window_start)
|
||||||
|
return {
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"window_seconds": window_seconds,
|
||||||
|
"by_method": {k: v.to_dict() for k, v in self._by_method.items()},
|
||||||
|
"by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||||
|
"by_status_class": dict(self._by_status_class),
|
||||||
|
"error_codes": dict(self._error_codes),
|
||||||
|
"totals": self._totals.to_dict(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||||
|
with self._lock:
|
||||||
|
snapshots = list(self._snapshots)
|
||||||
|
|
||||||
|
if hours:
|
||||||
|
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||||
|
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
||||||
|
|
||||||
|
return [s.to_dict() for s in snapshots]
|
||||||
|
|
||||||
|
def shutdown(self) -> None:
|
||||||
|
self._shutdown.set()
|
||||||
|
self._take_snapshot()
|
||||||
|
self._snapshot_thread.join(timeout=5.0)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_endpoint(path: str) -> str:
|
||||||
|
if not path or path == "/":
|
||||||
|
return "service"
|
||||||
|
|
||||||
|
path = path.rstrip("/")
|
||||||
|
|
||||||
|
if path.startswith("/ui"):
|
||||||
|
return "ui"
|
||||||
|
|
||||||
|
if path.startswith("/kms"):
|
||||||
|
return "kms"
|
||||||
|
|
||||||
|
if path.startswith("/myfsio"):
|
||||||
|
return "service"
|
||||||
|
|
||||||
|
parts = path.lstrip("/").split("/")
|
||||||
|
if len(parts) == 0:
|
||||||
|
return "service"
|
||||||
|
elif len(parts) == 1:
|
||||||
|
return "bucket"
|
||||||
|
else:
|
||||||
|
return "object"
|
||||||
@@ -88,6 +88,7 @@ def _xml_response(element: Element, status: int = 200) -> Response:
|
|||||||
|
|
||||||
|
|
||||||
def _error_response(code: str, message: str, status: int) -> Response:
|
def _error_response(code: str, message: str, status: int) -> Response:
|
||||||
|
g.s3_error_code = code
|
||||||
error = Element("Error")
|
error = Element("Error")
|
||||||
SubElement(error, "Code").text = code
|
SubElement(error, "Code").text = code
|
||||||
SubElement(error, "Message").text = message
|
SubElement(error, "Message").text = message
|
||||||
|
|||||||
51
app/ui.py
51
app/ui.py
@@ -141,6 +141,10 @@ def _acl() -> AclService:
|
|||||||
return current_app.extensions["acl"]
|
return current_app.extensions["acl"]
|
||||||
|
|
||||||
|
|
||||||
|
def _operation_metrics():
|
||||||
|
return current_app.extensions.get("operation_metrics")
|
||||||
|
|
||||||
|
|
||||||
def _format_bytes(num: int) -> str:
|
def _format_bytes(num: int) -> str:
|
||||||
step = 1024
|
step = 1024
|
||||||
units = ["B", "KB", "MB", "GB", "TB", "PB"]
|
units = ["B", "KB", "MB", "GB", "TB", "PB"]
|
||||||
@@ -2196,6 +2200,7 @@ def metrics_dashboard():
|
|||||||
"uptime_days": uptime_days,
|
"uptime_days": uptime_days,
|
||||||
},
|
},
|
||||||
metrics_history_enabled=current_app.config.get("METRICS_HISTORY_ENABLED", False),
|
metrics_history_enabled=current_app.config.get("METRICS_HISTORY_ENABLED", False),
|
||||||
|
operation_metrics_enabled=current_app.config.get("OPERATION_METRICS_ENABLED", False),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -2329,6 +2334,52 @@ def metrics_settings():
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/metrics/operations")
|
||||||
|
def metrics_operations():
|
||||||
|
principal = _current_principal()
|
||||||
|
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:list_users")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
collector = _operation_metrics()
|
||||||
|
if not collector:
|
||||||
|
return jsonify({
|
||||||
|
"enabled": False,
|
||||||
|
"stats": None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"enabled": True,
|
||||||
|
"stats": collector.get_current_stats(),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/metrics/operations/history")
|
||||||
|
def metrics_operations_history():
|
||||||
|
principal = _current_principal()
|
||||||
|
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:list_users")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
collector = _operation_metrics()
|
||||||
|
if not collector:
|
||||||
|
return jsonify({
|
||||||
|
"enabled": False,
|
||||||
|
"history": [],
|
||||||
|
})
|
||||||
|
|
||||||
|
hours = request.args.get("hours", type=int)
|
||||||
|
return jsonify({
|
||||||
|
"enabled": True,
|
||||||
|
"history": collector.get_history(hours),
|
||||||
|
"interval_minutes": current_app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
@ui_bp.route("/buckets/<bucket_name>/lifecycle", methods=["GET", "POST", "DELETE"])
|
@ui_bp.route("/buckets/<bucket_name>/lifecycle", methods=["GET", "POST", "DELETE"])
|
||||||
def bucket_lifecycle(bucket_name: str):
|
def bucket_lifecycle(bucket_name: str):
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
|
|||||||
@@ -268,6 +268,121 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if operation_metrics_enabled %}
|
||||||
|
<div class="row g-4 mt-2">
|
||||||
|
<div class="col-12">
|
||||||
|
<div class="card shadow-sm border-0">
|
||||||
|
<div class="card-header bg-transparent border-0 pt-4 px-4 d-flex justify-content-between align-items-center">
|
||||||
|
<h5 class="card-title mb-0 fw-semibold">API Operations</h5>
|
||||||
|
<div class="d-flex align-items-center gap-3">
|
||||||
|
<span class="small text-muted" id="opStatus">Loading...</span>
|
||||||
|
<button class="btn btn-outline-secondary btn-sm" id="resetOpMetricsBtn" title="Reset current window">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-arrow-counterclockwise" viewBox="0 0 16 16">
|
||||||
|
<path fill-rule="evenodd" d="M8 3a5 5 0 1 1-4.546 2.914.5.5 0 0 0-.908-.417A6 6 0 1 0 8 2v1z"/>
|
||||||
|
<path d="M8 4.466V.534a.25.25 0 0 0-.41-.192L5.23 2.308a.25.25 0 0 0 0 .384l2.36 1.966A.25.25 0 0 0 8 4.466z"/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="card-body p-4">
|
||||||
|
<div class="row g-3 mb-4">
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1" id="opTotalRequests">0</h4>
|
||||||
|
<small class="text-muted">Requests</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1 text-success" id="opSuccessRate">0%</h4>
|
||||||
|
<small class="text-muted">Success</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1 text-danger" id="opErrorCount">0</h4>
|
||||||
|
<small class="text-muted">Errors</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1 text-info" id="opAvgLatency">0ms</h4>
|
||||||
|
<small class="text-muted">Latency</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1 text-primary" id="opBytesIn">0 B</h4>
|
||||||
|
<small class="text-muted">Bytes In</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-6 col-md-4 col-lg-2">
|
||||||
|
<div class="text-center p-3 bg-light rounded h-100">
|
||||||
|
<h4 class="fw-bold mb-1 text-secondary" id="opBytesOut">0 B</h4>
|
||||||
|
<small class="text-muted">Bytes Out</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row g-4">
|
||||||
|
<div class="col-lg-6">
|
||||||
|
<div class="bg-light rounded p-3">
|
||||||
|
<h6 class="text-muted small fw-bold text-uppercase mb-3">Requests by Method</h6>
|
||||||
|
<div style="height: 220px; display: flex; align-items: center; justify-content: center;">
|
||||||
|
<canvas id="methodChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-6">
|
||||||
|
<div class="bg-light rounded p-3">
|
||||||
|
<h6 class="text-muted small fw-bold text-uppercase mb-3">Requests by Status</h6>
|
||||||
|
<div style="height: 220px;">
|
||||||
|
<canvas id="statusChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row g-4 mt-1">
|
||||||
|
<div class="col-lg-6">
|
||||||
|
<div class="bg-light rounded p-3">
|
||||||
|
<h6 class="text-muted small fw-bold text-uppercase mb-3">Requests by Endpoint</h6>
|
||||||
|
<div style="height: 180px;">
|
||||||
|
<canvas id="endpointChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-6">
|
||||||
|
<div class="bg-light rounded p-3 h-100 d-flex flex-column">
|
||||||
|
<div class="d-flex justify-content-between align-items-start mb-3">
|
||||||
|
<h6 class="text-muted small fw-bold text-uppercase mb-0">S3 Error Codes</h6>
|
||||||
|
<span class="badge bg-secondary-subtle text-secondary" style="font-size: 0.65rem;" title="Tracks S3 API errors like NoSuchKey, AccessDenied, etc.">API Only</span>
|
||||||
|
</div>
|
||||||
|
<div class="flex-grow-1 d-flex flex-column" style="min-height: 150px;">
|
||||||
|
<div class="d-flex border-bottom pb-2 mb-2" style="font-size: 0.75rem;">
|
||||||
|
<div class="text-muted fw-semibold" style="flex: 1;">Code</div>
|
||||||
|
<div class="text-muted fw-semibold text-end" style="width: 60px;">Count</div>
|
||||||
|
<div class="text-muted fw-semibold text-end" style="width: 100px;">Distribution</div>
|
||||||
|
</div>
|
||||||
|
<div id="errorCodesContainer" class="flex-grow-1" style="overflow-y: auto;">
|
||||||
|
<div id="errorCodesBody">
|
||||||
|
<div class="text-muted small text-center py-4">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="bi bi-check-circle mb-2 text-success" viewBox="0 0 16 16">
|
||||||
|
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
|
||||||
|
<path d="M10.97 4.97a.235.235 0 0 0-.02.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-1.071-1.05z"/>
|
||||||
|
</svg>
|
||||||
|
<div>No S3 API errors</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if metrics_history_enabled %}
|
{% if metrics_history_enabled %}
|
||||||
<div class="row g-4 mt-2">
|
<div class="row g-4 mt-2">
|
||||||
<div class="col-12">
|
<div class="col-12">
|
||||||
@@ -307,7 +422,7 @@
|
|||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
{% block extra_scripts %}
|
{% block extra_scripts %}
|
||||||
{% if metrics_history_enabled %}
|
{% if metrics_history_enabled or operation_metrics_enabled %}
|
||||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<script>
|
<script>
|
||||||
@@ -413,6 +528,237 @@
|
|||||||
startPolling();
|
startPolling();
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
{% if operation_metrics_enabled %}
|
||||||
|
(function() {
|
||||||
|
var methodChart = null;
|
||||||
|
var statusChart = null;
|
||||||
|
var endpointChart = null;
|
||||||
|
var opStatus = document.getElementById('opStatus');
|
||||||
|
var opTimer = null;
|
||||||
|
var methodColors = {
|
||||||
|
'GET': '#0d6efd',
|
||||||
|
'PUT': '#198754',
|
||||||
|
'POST': '#ffc107',
|
||||||
|
'DELETE': '#dc3545',
|
||||||
|
'HEAD': '#6c757d',
|
||||||
|
'OPTIONS': '#0dcaf0'
|
||||||
|
};
|
||||||
|
var statusColors = {
|
||||||
|
'2xx': '#198754',
|
||||||
|
'3xx': '#0dcaf0',
|
||||||
|
'4xx': '#ffc107',
|
||||||
|
'5xx': '#dc3545'
|
||||||
|
};
|
||||||
|
var endpointColors = {
|
||||||
|
'object': '#0d6efd',
|
||||||
|
'bucket': '#198754',
|
||||||
|
'ui': '#6c757d',
|
||||||
|
'service': '#0dcaf0',
|
||||||
|
'kms': '#ffc107'
|
||||||
|
};
|
||||||
|
|
||||||
|
function formatBytes(bytes) {
|
||||||
|
if (bytes === 0) return '0 B';
|
||||||
|
var k = 1024;
|
||||||
|
var sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||||
|
var i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
function initOpCharts() {
|
||||||
|
var methodCtx = document.getElementById('methodChart');
|
||||||
|
var statusCtx = document.getElementById('statusChart');
|
||||||
|
var endpointCtx = document.getElementById('endpointChart');
|
||||||
|
|
||||||
|
if (methodCtx) {
|
||||||
|
methodChart = new Chart(methodCtx, {
|
||||||
|
type: 'doughnut',
|
||||||
|
data: {
|
||||||
|
labels: [],
|
||||||
|
datasets: [{
|
||||||
|
data: [],
|
||||||
|
backgroundColor: []
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
maintainAspectRatio: false,
|
||||||
|
animation: false,
|
||||||
|
plugins: {
|
||||||
|
legend: { position: 'right', labels: { boxWidth: 12, font: { size: 11 } } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (statusCtx) {
|
||||||
|
statusChart = new Chart(statusCtx, {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
labels: [],
|
||||||
|
datasets: [{
|
||||||
|
data: [],
|
||||||
|
backgroundColor: []
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
maintainAspectRatio: false,
|
||||||
|
animation: false,
|
||||||
|
plugins: { legend: { display: false } },
|
||||||
|
scales: {
|
||||||
|
y: { beginAtZero: true, ticks: { stepSize: 1 } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endpointCtx) {
|
||||||
|
endpointChart = new Chart(endpointCtx, {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
labels: [],
|
||||||
|
datasets: [{
|
||||||
|
data: [],
|
||||||
|
backgroundColor: []
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
maintainAspectRatio: false,
|
||||||
|
indexAxis: 'y',
|
||||||
|
animation: false,
|
||||||
|
plugins: { legend: { display: false } },
|
||||||
|
scales: {
|
||||||
|
x: { beginAtZero: true, ticks: { stepSize: 1 } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateOpMetrics() {
|
||||||
|
if (document.hidden) return;
|
||||||
|
fetch('/ui/metrics/operations')
|
||||||
|
.then(function(r) { return r.json(); })
|
||||||
|
.then(function(data) {
|
||||||
|
if (!data.enabled || !data.stats) {
|
||||||
|
if (opStatus) opStatus.textContent = 'Operation metrics not available';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var stats = data.stats;
|
||||||
|
var totals = stats.totals || {};
|
||||||
|
|
||||||
|
var totalEl = document.getElementById('opTotalRequests');
|
||||||
|
var successEl = document.getElementById('opSuccessRate');
|
||||||
|
var errorEl = document.getElementById('opErrorCount');
|
||||||
|
var latencyEl = document.getElementById('opAvgLatency');
|
||||||
|
var bytesInEl = document.getElementById('opBytesIn');
|
||||||
|
var bytesOutEl = document.getElementById('opBytesOut');
|
||||||
|
|
||||||
|
if (totalEl) totalEl.textContent = totals.count || 0;
|
||||||
|
if (successEl) {
|
||||||
|
var rate = totals.count > 0 ? ((totals.success_count / totals.count) * 100).toFixed(1) : 0;
|
||||||
|
successEl.textContent = rate + '%';
|
||||||
|
}
|
||||||
|
if (errorEl) errorEl.textContent = totals.error_count || 0;
|
||||||
|
if (latencyEl) latencyEl.textContent = (totals.latency_avg_ms || 0).toFixed(1) + 'ms';
|
||||||
|
if (bytesInEl) bytesInEl.textContent = formatBytes(totals.bytes_in || 0);
|
||||||
|
if (bytesOutEl) bytesOutEl.textContent = formatBytes(totals.bytes_out || 0);
|
||||||
|
|
||||||
|
if (methodChart && stats.by_method) {
|
||||||
|
var methods = Object.keys(stats.by_method);
|
||||||
|
var methodData = methods.map(function(m) { return stats.by_method[m].count; });
|
||||||
|
var methodBg = methods.map(function(m) { return methodColors[m] || '#6c757d'; });
|
||||||
|
methodChart.data.labels = methods;
|
||||||
|
methodChart.data.datasets[0].data = methodData;
|
||||||
|
methodChart.data.datasets[0].backgroundColor = methodBg;
|
||||||
|
methodChart.update('none');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (statusChart && stats.by_status_class) {
|
||||||
|
var statuses = Object.keys(stats.by_status_class).sort();
|
||||||
|
var statusData = statuses.map(function(s) { return stats.by_status_class[s]; });
|
||||||
|
var statusBg = statuses.map(function(s) { return statusColors[s] || '#6c757d'; });
|
||||||
|
statusChart.data.labels = statuses;
|
||||||
|
statusChart.data.datasets[0].data = statusData;
|
||||||
|
statusChart.data.datasets[0].backgroundColor = statusBg;
|
||||||
|
statusChart.update('none');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endpointChart && stats.by_endpoint) {
|
||||||
|
var endpoints = Object.keys(stats.by_endpoint);
|
||||||
|
var endpointData = endpoints.map(function(e) { return stats.by_endpoint[e].count; });
|
||||||
|
var endpointBg = endpoints.map(function(e) { return endpointColors[e] || '#6c757d'; });
|
||||||
|
endpointChart.data.labels = endpoints;
|
||||||
|
endpointChart.data.datasets[0].data = endpointData;
|
||||||
|
endpointChart.data.datasets[0].backgroundColor = endpointBg;
|
||||||
|
endpointChart.update('none');
|
||||||
|
}
|
||||||
|
|
||||||
|
var errorBody = document.getElementById('errorCodesBody');
|
||||||
|
if (errorBody && stats.error_codes) {
|
||||||
|
var errorCodes = Object.entries(stats.error_codes);
|
||||||
|
errorCodes.sort(function(a, b) { return b[1] - a[1]; });
|
||||||
|
var totalErrors = errorCodes.reduce(function(sum, e) { return sum + e[1]; }, 0);
|
||||||
|
errorCodes = errorCodes.slice(0, 10);
|
||||||
|
if (errorCodes.length === 0) {
|
||||||
|
errorBody.innerHTML = '<div class="text-muted small text-center py-4">' +
|
||||||
|
'<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="bi bi-check-circle mb-2 text-success" viewBox="0 0 16 16">' +
|
||||||
|
'<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>' +
|
||||||
|
'<path d="M10.97 4.97a.235.235 0 0 0-.02.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-1.071-1.05z"/>' +
|
||||||
|
'</svg><div>No S3 API errors</div></div>';
|
||||||
|
} else {
|
||||||
|
errorBody.innerHTML = errorCodes.map(function(e) {
|
||||||
|
var pct = totalErrors > 0 ? ((e[1] / totalErrors) * 100).toFixed(0) : 0;
|
||||||
|
return '<div class="d-flex align-items-center py-1" style="font-size: 0.8rem;">' +
|
||||||
|
'<div style="flex: 1;"><code class="text-danger">' + e[0] + '</code></div>' +
|
||||||
|
'<div class="text-end fw-semibold" style="width: 60px;">' + e[1] + '</div>' +
|
||||||
|
'<div style="width: 100px; padding-left: 10px;"><div class="progress" style="height: 6px;"><div class="progress-bar bg-danger" style="width: ' + pct + '%"></div></div></div>' +
|
||||||
|
'</div>';
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var windowMins = Math.floor(stats.window_seconds / 60);
|
||||||
|
var windowSecs = stats.window_seconds % 60;
|
||||||
|
var windowStr = windowMins > 0 ? windowMins + 'm ' + windowSecs + 's' : windowSecs + 's';
|
||||||
|
if (opStatus) opStatus.textContent = 'Window: ' + windowStr + ' | ' + new Date().toLocaleTimeString();
|
||||||
|
})
|
||||||
|
.catch(function(err) {
|
||||||
|
console.error('Operation metrics fetch error:', err);
|
||||||
|
if (opStatus) opStatus.textContent = 'Failed to load';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function startOpPolling() {
|
||||||
|
if (opTimer) clearInterval(opTimer);
|
||||||
|
opTimer = setInterval(updateOpMetrics, 5000);
|
||||||
|
}
|
||||||
|
|
||||||
|
var resetBtn = document.getElementById('resetOpMetricsBtn');
|
||||||
|
if (resetBtn) {
|
||||||
|
resetBtn.addEventListener('click', function() {
|
||||||
|
updateOpMetrics();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('visibilitychange', function() {
|
||||||
|
if (document.hidden) {
|
||||||
|
if (opTimer) clearInterval(opTimer);
|
||||||
|
opTimer = null;
|
||||||
|
} else {
|
||||||
|
updateOpMetrics();
|
||||||
|
startOpPolling();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
initOpCharts();
|
||||||
|
updateOpMetrics();
|
||||||
|
startOpPolling();
|
||||||
|
})();
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if metrics_history_enabled %}
|
{% if metrics_history_enabled %}
|
||||||
(function() {
|
(function() {
|
||||||
var cpuChart = null;
|
var cpuChart = null;
|
||||||
|
|||||||
297
tests/test_operation_metrics.py
Normal file
297
tests/test_operation_metrics.py
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.operation_metrics import (
|
||||||
|
OperationMetricsCollector,
|
||||||
|
OperationStats,
|
||||||
|
classify_endpoint,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestOperationStats:
|
||||||
|
def test_initial_state(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
assert stats.count == 0
|
||||||
|
assert stats.success_count == 0
|
||||||
|
assert stats.error_count == 0
|
||||||
|
assert stats.latency_sum_ms == 0.0
|
||||||
|
assert stats.bytes_in == 0
|
||||||
|
assert stats.bytes_out == 0
|
||||||
|
|
||||||
|
def test_record_success(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
stats.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
|
||||||
|
|
||||||
|
assert stats.count == 1
|
||||||
|
assert stats.success_count == 1
|
||||||
|
assert stats.error_count == 0
|
||||||
|
assert stats.latency_sum_ms == 50.0
|
||||||
|
assert stats.latency_min_ms == 50.0
|
||||||
|
assert stats.latency_max_ms == 50.0
|
||||||
|
assert stats.bytes_in == 100
|
||||||
|
assert stats.bytes_out == 200
|
||||||
|
|
||||||
|
def test_record_error(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
stats.record(latency_ms=100.0, success=False, bytes_in=50, bytes_out=0)
|
||||||
|
|
||||||
|
assert stats.count == 1
|
||||||
|
assert stats.success_count == 0
|
||||||
|
assert stats.error_count == 1
|
||||||
|
|
||||||
|
def test_latency_min_max(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
stats.record(latency_ms=50.0, success=True)
|
||||||
|
stats.record(latency_ms=10.0, success=True)
|
||||||
|
stats.record(latency_ms=100.0, success=True)
|
||||||
|
|
||||||
|
assert stats.latency_min_ms == 10.0
|
||||||
|
assert stats.latency_max_ms == 100.0
|
||||||
|
assert stats.latency_sum_ms == 160.0
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
stats.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
|
||||||
|
stats.record(latency_ms=100.0, success=False, bytes_in=50, bytes_out=0)
|
||||||
|
|
||||||
|
result = stats.to_dict()
|
||||||
|
assert result["count"] == 2
|
||||||
|
assert result["success_count"] == 1
|
||||||
|
assert result["error_count"] == 1
|
||||||
|
assert result["latency_avg_ms"] == 75.0
|
||||||
|
assert result["latency_min_ms"] == 50.0
|
||||||
|
assert result["latency_max_ms"] == 100.0
|
||||||
|
assert result["bytes_in"] == 150
|
||||||
|
assert result["bytes_out"] == 200
|
||||||
|
|
||||||
|
def test_to_dict_empty(self):
|
||||||
|
stats = OperationStats()
|
||||||
|
result = stats.to_dict()
|
||||||
|
assert result["count"] == 0
|
||||||
|
assert result["latency_avg_ms"] == 0.0
|
||||||
|
assert result["latency_min_ms"] == 0.0
|
||||||
|
|
||||||
|
def test_merge(self):
|
||||||
|
stats1 = OperationStats()
|
||||||
|
stats1.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
|
||||||
|
|
||||||
|
stats2 = OperationStats()
|
||||||
|
stats2.record(latency_ms=10.0, success=True, bytes_in=50, bytes_out=100)
|
||||||
|
stats2.record(latency_ms=100.0, success=False, bytes_in=25, bytes_out=50)
|
||||||
|
|
||||||
|
stats1.merge(stats2)
|
||||||
|
|
||||||
|
assert stats1.count == 3
|
||||||
|
assert stats1.success_count == 2
|
||||||
|
assert stats1.error_count == 1
|
||||||
|
assert stats1.latency_min_ms == 10.0
|
||||||
|
assert stats1.latency_max_ms == 100.0
|
||||||
|
assert stats1.bytes_in == 175
|
||||||
|
assert stats1.bytes_out == 350
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassifyEndpoint:
|
||||||
|
def test_root_path(self):
|
||||||
|
assert classify_endpoint("/") == "service"
|
||||||
|
assert classify_endpoint("") == "service"
|
||||||
|
|
||||||
|
def test_ui_paths(self):
|
||||||
|
assert classify_endpoint("/ui") == "ui"
|
||||||
|
assert classify_endpoint("/ui/buckets") == "ui"
|
||||||
|
assert classify_endpoint("/ui/metrics") == "ui"
|
||||||
|
|
||||||
|
def test_kms_paths(self):
|
||||||
|
assert classify_endpoint("/kms") == "kms"
|
||||||
|
assert classify_endpoint("/kms/keys") == "kms"
|
||||||
|
|
||||||
|
def test_service_paths(self):
|
||||||
|
assert classify_endpoint("/myfsio/health") == "service"
|
||||||
|
|
||||||
|
def test_bucket_paths(self):
|
||||||
|
assert classify_endpoint("/mybucket") == "bucket"
|
||||||
|
assert classify_endpoint("/mybucket/") == "bucket"
|
||||||
|
|
||||||
|
def test_object_paths(self):
|
||||||
|
assert classify_endpoint("/mybucket/mykey") == "object"
|
||||||
|
assert classify_endpoint("/mybucket/folder/nested/key.txt") == "object"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOperationMetricsCollector:
|
||||||
|
def test_record_and_get_stats(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector.record_request(
|
||||||
|
method="GET",
|
||||||
|
endpoint_type="bucket",
|
||||||
|
status_code=200,
|
||||||
|
latency_ms=50.0,
|
||||||
|
bytes_in=0,
|
||||||
|
bytes_out=1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
collector.record_request(
|
||||||
|
method="PUT",
|
||||||
|
endpoint_type="object",
|
||||||
|
status_code=201,
|
||||||
|
latency_ms=100.0,
|
||||||
|
bytes_in=500,
|
||||||
|
bytes_out=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
collector.record_request(
|
||||||
|
method="GET",
|
||||||
|
endpoint_type="object",
|
||||||
|
status_code=404,
|
||||||
|
latency_ms=25.0,
|
||||||
|
bytes_in=0,
|
||||||
|
bytes_out=0,
|
||||||
|
error_code="NoSuchKey",
|
||||||
|
)
|
||||||
|
|
||||||
|
stats = collector.get_current_stats()
|
||||||
|
|
||||||
|
assert stats["totals"]["count"] == 3
|
||||||
|
assert stats["totals"]["success_count"] == 2
|
||||||
|
assert stats["totals"]["error_count"] == 1
|
||||||
|
|
||||||
|
assert "GET" in stats["by_method"]
|
||||||
|
assert stats["by_method"]["GET"]["count"] == 2
|
||||||
|
assert "PUT" in stats["by_method"]
|
||||||
|
assert stats["by_method"]["PUT"]["count"] == 1
|
||||||
|
|
||||||
|
assert "bucket" in stats["by_endpoint"]
|
||||||
|
assert "object" in stats["by_endpoint"]
|
||||||
|
assert stats["by_endpoint"]["object"]["count"] == 2
|
||||||
|
|
||||||
|
assert stats["by_status_class"]["2xx"] == 2
|
||||||
|
assert stats["by_status_class"]["4xx"] == 1
|
||||||
|
|
||||||
|
assert stats["error_codes"]["NoSuchKey"] == 1
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
|
|
||||||
|
def test_thread_safety(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
num_threads = 5
|
||||||
|
requests_per_thread = 100
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
def record_requests():
|
||||||
|
for _ in range(requests_per_thread):
|
||||||
|
collector.record_request(
|
||||||
|
method="GET",
|
||||||
|
endpoint_type="object",
|
||||||
|
status_code=200,
|
||||||
|
latency_ms=10.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(num_threads):
|
||||||
|
t = threading.Thread(target=record_requests)
|
||||||
|
threads.append(t)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
stats = collector.get_current_stats()
|
||||||
|
assert stats["totals"]["count"] == num_threads * requests_per_thread
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
|
|
||||||
|
def test_status_class_categorization(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector.record_request("GET", "object", 200, 10.0)
|
||||||
|
collector.record_request("GET", "object", 204, 10.0)
|
||||||
|
collector.record_request("GET", "object", 301, 10.0)
|
||||||
|
collector.record_request("GET", "object", 304, 10.0)
|
||||||
|
collector.record_request("GET", "object", 400, 10.0)
|
||||||
|
collector.record_request("GET", "object", 403, 10.0)
|
||||||
|
collector.record_request("GET", "object", 404, 10.0)
|
||||||
|
collector.record_request("GET", "object", 500, 10.0)
|
||||||
|
collector.record_request("GET", "object", 503, 10.0)
|
||||||
|
|
||||||
|
stats = collector.get_current_stats()
|
||||||
|
assert stats["by_status_class"]["2xx"] == 2
|
||||||
|
assert stats["by_status_class"]["3xx"] == 2
|
||||||
|
assert stats["by_status_class"]["4xx"] == 3
|
||||||
|
assert stats["by_status_class"]["5xx"] == 2
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
|
|
||||||
|
def test_error_code_tracking(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector.record_request("GET", "object", 404, 10.0, error_code="NoSuchKey")
|
||||||
|
collector.record_request("GET", "object", 404, 10.0, error_code="NoSuchKey")
|
||||||
|
collector.record_request("GET", "bucket", 403, 10.0, error_code="AccessDenied")
|
||||||
|
collector.record_request("PUT", "object", 500, 10.0, error_code="InternalError")
|
||||||
|
|
||||||
|
stats = collector.get_current_stats()
|
||||||
|
assert stats["error_codes"]["NoSuchKey"] == 2
|
||||||
|
assert stats["error_codes"]["AccessDenied"] == 1
|
||||||
|
assert stats["error_codes"]["InternalError"] == 1
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
|
|
||||||
|
def test_history_persistence(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector.record_request("GET", "object", 200, 10.0)
|
||||||
|
collector._take_snapshot()
|
||||||
|
|
||||||
|
history = collector.get_history()
|
||||||
|
assert len(history) == 1
|
||||||
|
assert history[0]["totals"]["count"] == 1
|
||||||
|
|
||||||
|
config_path = tmp_path / ".myfsio.sys" / "config" / "operation_metrics.json"
|
||||||
|
assert config_path.exists()
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
|
|
||||||
|
def test_get_history_with_hours_filter(self, tmp_path: Path):
|
||||||
|
collector = OperationMetricsCollector(
|
||||||
|
storage_root=tmp_path,
|
||||||
|
interval_minutes=60,
|
||||||
|
retention_hours=24,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector.record_request("GET", "object", 200, 10.0)
|
||||||
|
collector._take_snapshot()
|
||||||
|
|
||||||
|
history_all = collector.get_history()
|
||||||
|
history_recent = collector.get_history(hours=1)
|
||||||
|
|
||||||
|
assert len(history_all) >= len(history_recent)
|
||||||
|
finally:
|
||||||
|
collector.shutdown()
|
||||||
Reference in New Issue
Block a user