Files
MyFSIO/crates/myfsio-server/src/services/metrics.rs

369 lines
11 KiB
Rust

use chrono::{DateTime, Utc};
use parking_lot::Mutex;
use rand::Rng;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
const MAX_LATENCY_SAMPLES: usize = 5000;
pub struct MetricsConfig {
pub interval_minutes: u64,
pub retention_hours: u64,
}
impl Default for MetricsConfig {
fn default() -> Self {
Self {
interval_minutes: 5,
retention_hours: 24,
}
}
}
#[derive(Debug, Clone)]
struct OperationStats {
count: u64,
success_count: u64,
error_count: u64,
latency_sum_ms: f64,
latency_min_ms: f64,
latency_max_ms: f64,
bytes_in: u64,
bytes_out: u64,
latency_samples: Vec<f64>,
}
impl Default for OperationStats {
fn default() -> Self {
Self {
count: 0,
success_count: 0,
error_count: 0,
latency_sum_ms: 0.0,
latency_min_ms: f64::INFINITY,
latency_max_ms: 0.0,
bytes_in: 0,
bytes_out: 0,
latency_samples: Vec::new(),
}
}
}
impl OperationStats {
fn record(&mut self, latency_ms: f64, success: bool, bytes_in: u64, bytes_out: u64) {
self.count += 1;
if success {
self.success_count += 1;
} else {
self.error_count += 1;
}
self.latency_sum_ms += latency_ms;
if latency_ms < self.latency_min_ms {
self.latency_min_ms = latency_ms;
}
if latency_ms > self.latency_max_ms {
self.latency_max_ms = latency_ms;
}
self.bytes_in += bytes_in;
self.bytes_out += bytes_out;
if self.latency_samples.len() < MAX_LATENCY_SAMPLES {
self.latency_samples.push(latency_ms);
} else {
let mut rng = rand::thread_rng();
let j = rng.gen_range(0..self.count as usize);
if j < MAX_LATENCY_SAMPLES {
self.latency_samples[j] = latency_ms;
}
}
}
fn compute_percentile(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
let k = (sorted.len() - 1) as f64 * (p / 100.0);
let f = k.floor() as usize;
let c = (f + 1).min(sorted.len() - 1);
let d = k - f as f64;
sorted[f] + d * (sorted[c] - sorted[f])
}
fn to_json(&self) -> Value {
let avg = if self.count > 0 {
self.latency_sum_ms / self.count as f64
} else {
0.0
};
let min = if self.latency_min_ms.is_infinite() {
0.0
} else {
self.latency_min_ms
};
let mut sorted = self.latency_samples.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
json!({
"count": self.count,
"success_count": self.success_count,
"error_count": self.error_count,
"latency_avg_ms": round2(avg),
"latency_min_ms": round2(min),
"latency_max_ms": round2(self.latency_max_ms),
"latency_p50_ms": round2(Self::compute_percentile(&sorted, 50.0)),
"latency_p95_ms": round2(Self::compute_percentile(&sorted, 95.0)),
"latency_p99_ms": round2(Self::compute_percentile(&sorted, 99.0)),
"bytes_in": self.bytes_in,
"bytes_out": self.bytes_out,
})
}
}
fn round2(v: f64) -> f64 {
(v * 100.0).round() / 100.0
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub timestamp: DateTime<Utc>,
pub window_seconds: u64,
pub by_method: HashMap<String, Value>,
pub by_endpoint: HashMap<String, Value>,
pub by_status_class: HashMap<String, u64>,
pub error_codes: HashMap<String, u64>,
pub totals: Value,
}
struct Inner {
by_method: HashMap<String, OperationStats>,
by_endpoint: HashMap<String, OperationStats>,
by_status_class: HashMap<String, u64>,
error_codes: HashMap<String, u64>,
totals: OperationStats,
window_start: f64,
snapshots: Vec<MetricsSnapshot>,
}
pub struct MetricsService {
config: MetricsConfig,
inner: Arc<Mutex<Inner>>,
snapshots_path: PathBuf,
}
impl MetricsService {
pub fn new(storage_root: &Path, config: MetricsConfig) -> Self {
let snapshots_path = storage_root
.join(".myfsio.sys")
.join("config")
.join("operation_metrics.json");
let mut snapshots: Vec<MetricsSnapshot> = if snapshots_path.exists() {
std::fs::read_to_string(&snapshots_path)
.ok()
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
.and_then(|v| {
v.get("snapshots").and_then(|s| {
serde_json::from_value::<Vec<MetricsSnapshot>>(s.clone()).ok()
})
})
.unwrap_or_default()
} else {
Vec::new()
};
let cutoff = now_secs() - (config.retention_hours * 3600) as f64;
snapshots.retain(|s| s.timestamp.timestamp() as f64 > cutoff);
Self {
config,
inner: Arc::new(Mutex::new(Inner {
by_method: HashMap::new(),
by_endpoint: HashMap::new(),
by_status_class: HashMap::new(),
error_codes: HashMap::new(),
totals: OperationStats::default(),
window_start: now_secs(),
snapshots,
})),
snapshots_path,
}
}
pub fn record_request(
&self,
method: &str,
endpoint_type: &str,
status_code: u16,
latency_ms: f64,
bytes_in: u64,
bytes_out: u64,
error_code: Option<&str>,
) {
let success = (200..400).contains(&status_code);
let status_class = format!("{}xx", status_code / 100);
let mut inner = self.inner.lock();
inner
.by_method
.entry(method.to_string())
.or_default()
.record(latency_ms, success, bytes_in, bytes_out);
inner
.by_endpoint
.entry(endpoint_type.to_string())
.or_default()
.record(latency_ms, success, bytes_in, bytes_out);
*inner.by_status_class.entry(status_class).or_insert(0) += 1;
if let Some(code) = error_code {
*inner.error_codes.entry(code.to_string()).or_insert(0) += 1;
}
inner
.totals
.record(latency_ms, success, bytes_in, bytes_out);
}
pub fn get_current_stats(&self) -> Value {
let inner = self.inner.lock();
let window_seconds = (now_secs() - inner.window_start).max(0.0) as u64;
let by_method: HashMap<String, Value> = inner
.by_method
.iter()
.map(|(k, v)| (k.clone(), v.to_json()))
.collect();
let by_endpoint: HashMap<String, Value> = inner
.by_endpoint
.iter()
.map(|(k, v)| (k.clone(), v.to_json()))
.collect();
json!({
"timestamp": Utc::now().to_rfc3339(),
"window_seconds": window_seconds,
"by_method": by_method,
"by_endpoint": by_endpoint,
"by_status_class": inner.by_status_class,
"error_codes": inner.error_codes,
"totals": inner.totals.to_json(),
})
}
pub fn get_history(&self, hours: Option<u64>) -> Vec<MetricsSnapshot> {
let inner = self.inner.lock();
let mut snapshots = inner.snapshots.clone();
if let Some(h) = hours {
let cutoff = now_secs() - (h * 3600) as f64;
snapshots.retain(|s| s.timestamp.timestamp() as f64 > cutoff);
}
snapshots
}
pub fn snapshot(&self) -> Value {
let current = self.get_current_stats();
let history = self.get_history(None);
json!({
"enabled": true,
"current": current,
"snapshots": history,
})
}
fn take_snapshot(&self) {
let snapshot = {
let mut inner = self.inner.lock();
let window_seconds = (now_secs() - inner.window_start).max(0.0) as u64;
let by_method: HashMap<String, Value> = inner
.by_method
.iter()
.map(|(k, v)| (k.clone(), v.to_json()))
.collect();
let by_endpoint: HashMap<String, Value> = inner
.by_endpoint
.iter()
.map(|(k, v)| (k.clone(), v.to_json()))
.collect();
let snap = MetricsSnapshot {
timestamp: Utc::now(),
window_seconds,
by_method,
by_endpoint,
by_status_class: inner.by_status_class.clone(),
error_codes: inner.error_codes.clone(),
totals: inner.totals.to_json(),
};
inner.snapshots.push(snap.clone());
let cutoff = now_secs() - (self.config.retention_hours * 3600) as f64;
inner
.snapshots
.retain(|s| s.timestamp.timestamp() as f64 > cutoff);
inner.by_method.clear();
inner.by_endpoint.clear();
inner.by_status_class.clear();
inner.error_codes.clear();
inner.totals = OperationStats::default();
inner.window_start = now_secs();
snap
};
let _ = snapshot;
self.save_snapshots();
}
fn save_snapshots(&self) {
let snapshots = { self.inner.lock().snapshots.clone() };
if let Some(parent) = self.snapshots_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let data = json!({ "snapshots": snapshots });
let _ = std::fs::write(
&self.snapshots_path,
serde_json::to_string_pretty(&data).unwrap_or_default(),
);
}
pub fn start_background(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
let interval = std::time::Duration::from_secs(self.config.interval_minutes * 60);
tokio::spawn(async move {
let mut timer = tokio::time::interval(interval);
timer.tick().await;
loop {
timer.tick().await;
self.take_snapshot();
}
})
}
}
pub fn classify_endpoint(path: &str) -> &'static str {
if path.is_empty() || path == "/" {
return "service";
}
let trimmed = path.trim_end_matches('/');
if trimmed.starts_with("/ui") {
return "ui";
}
if trimmed.starts_with("/kms") {
return "kms";
}
if trimmed.starts_with("/myfsio") {
return "service";
}
let parts: Vec<&str> = trimmed.trim_start_matches('/').split('/').collect();
match parts.len() {
0 => "service",
1 => "bucket",
_ => "object",
}
}
fn now_secs() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs_f64())
.unwrap_or(0.0)
}