Add replication failure tracking and lifecycle execution history

This commit is contained in:
2026-01-05 00:18:08 +08:00
parent 9ab750650c
commit 93a5aa6618
6 changed files with 720 additions and 9 deletions

View File

@@ -124,7 +124,7 @@ def create_app(
)
connections = ConnectionStore(connections_path)
replication = ReplicationManager(storage, connections, replication_rules_path)
replication = ReplicationManager(storage, connections, replication_rules_path, storage_root)
encryption_config = {
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
@@ -156,6 +156,7 @@ def create_app(
lifecycle_manager = LifecycleManager(
base_storage,
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
storage_root=storage_root,
)
lifecycle_manager.start()

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import logging
import threading
import time
@@ -23,13 +24,104 @@ class LifecycleResult:
execution_time_seconds: float = 0.0
@dataclass
class LifecycleExecutionRecord:
timestamp: float
bucket_name: str
objects_deleted: int
versions_deleted: int
uploads_aborted: int
errors: List[str]
execution_time_seconds: float
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"bucket_name": self.bucket_name,
"objects_deleted": self.objects_deleted,
"versions_deleted": self.versions_deleted,
"uploads_aborted": self.uploads_aborted,
"errors": self.errors,
"execution_time_seconds": self.execution_time_seconds,
}
@classmethod
def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
return cls(
timestamp=data["timestamp"],
bucket_name=data["bucket_name"],
objects_deleted=data["objects_deleted"],
versions_deleted=data["versions_deleted"],
uploads_aborted=data["uploads_aborted"],
errors=data.get("errors", []),
execution_time_seconds=data["execution_time_seconds"],
)
@classmethod
def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
return cls(
timestamp=time.time(),
bucket_name=result.bucket_name,
objects_deleted=result.objects_deleted,
versions_deleted=result.versions_deleted,
uploads_aborted=result.uploads_aborted,
errors=result.errors.copy(),
execution_time_seconds=result.execution_time_seconds,
)
class LifecycleHistoryStore:
MAX_HISTORY_PER_BUCKET = 50
def __init__(self, storage_root: Path) -> None:
self.storage_root = storage_root
self._lock = threading.Lock()
def _get_history_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
path = self._get_history_path(bucket_name)
if not path.exists():
return []
try:
with open(path, "r") as f:
data = json.load(f)
return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
return []
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
path = self._get_history_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[:self.MAX_HISTORY_PER_BUCKET]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
with self._lock:
records = self.load_history(bucket_name)
records.insert(0, record)
self.save_history(bucket_name, records)
def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
records = self.load_history(bucket_name)
return records[offset:offset + limit]
class LifecycleManager:
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600):
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600, storage_root: Optional[Path] = None):
self.storage = storage
self.interval_seconds = interval_seconds
self.storage_root = storage_root
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = LifecycleHistoryStore(storage_root) if storage_root else None
def start(self) -> None:
if self._timer is not None:
@@ -98,12 +190,15 @@ class LifecycleManager:
logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
result.execution_time_seconds = time.time() - start_time
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
logger.info(
f"Lifecycle enforcement for {bucket_name}: "
f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
)
if self.history_store:
record = LifecycleExecutionRecord.from_result(result)
self.history_store.add_record(bucket_name, record)
return result
def _enforce_expiration(
@@ -233,3 +328,8 @@ class LifecycleManager:
if bucket_name:
return {bucket_name: self.enforce_rules(bucket_name)}
return self.enforce_all_buckets()
def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
if not self.history_store:
return []
return self.history_store.get_history(bucket_name, limit, offset)

View File

@@ -8,7 +8,7 @@ import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
import boto3
from botocore.config import Config
@@ -87,6 +87,40 @@ class ReplicationStats:
)
@dataclass
class ReplicationFailure:
object_key: str
error_message: str
timestamp: float
failure_count: int
bucket_name: str
action: str
last_error_code: Optional[str] = None
def to_dict(self) -> dict:
return {
"object_key": self.object_key,
"error_message": self.error_message,
"timestamp": self.timestamp,
"failure_count": self.failure_count,
"bucket_name": self.bucket_name,
"action": self.action,
"last_error_code": self.last_error_code,
}
@classmethod
def from_dict(cls, data: dict) -> "ReplicationFailure":
return cls(
object_key=data["object_key"],
error_message=data["error_message"],
timestamp=data["timestamp"],
failure_count=data["failure_count"],
bucket_name=data["bucket_name"],
action=data["action"],
last_error_code=data.get("last_error_code"),
)
@dataclass
class ReplicationRule:
bucket_name: str
@@ -120,15 +154,86 @@ class ReplicationRule:
return rule
class ReplicationFailureStore:
MAX_FAILURES_PER_BUCKET = 50
def __init__(self, storage_root: Path) -> None:
self.storage_root = storage_root
self._lock = threading.Lock()
def _get_failures_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
path = self._get_failures_path(bucket_name)
if not path.exists():
return []
try:
with open(path, "r") as f:
data = json.load(f)
return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])]
except (OSError, ValueError, KeyError) as e:
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
return []
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
path = self._get_failures_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"failures": [f.to_dict() for f in failures[:self.MAX_FAILURES_PER_BUCKET]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
with self._lock:
failures = self.load_failures(bucket_name)
existing = next((f for f in failures if f.object_key == failure.object_key), None)
if existing:
existing.failure_count += 1
existing.timestamp = failure.timestamp
existing.error_message = failure.error_message
existing.last_error_code = failure.last_error_code
else:
failures.insert(0, failure)
self.save_failures(bucket_name, failures)
def remove_failure(self, bucket_name: str, object_key: str) -> bool:
with self._lock:
failures = self.load_failures(bucket_name)
original_len = len(failures)
failures = [f for f in failures if f.object_key != object_key]
if len(failures) < original_len:
self.save_failures(bucket_name, failures)
return True
return False
def clear_failures(self, bucket_name: str) -> None:
with self._lock:
path = self._get_failures_path(bucket_name)
if path.exists():
path.unlink()
def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]:
failures = self.load_failures(bucket_name)
return next((f for f in failures if f.object_key == object_key), None)
def get_failure_count(self, bucket_name: str) -> int:
return len(self.load_failures(bucket_name))
class ReplicationManager:
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None:
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path, storage_root: Path) -> None:
self.storage = storage
self.connections = connections
self.rules_path = rules_path
self.storage_root = storage_root
self._rules: Dict[str, ReplicationRule] = {}
self._stats_lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self._shutdown = False
self.failure_store = ReplicationFailureStore(storage_root)
self.reload_rules()
def shutdown(self, wait: bool = True) -> None:
@@ -331,8 +436,19 @@ class ReplicationManager:
s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
self._update_last_sync(bucket_name, object_key)
self.failure_store.remove_failure(bucket_name, object_key)
except ClientError as e:
error_code = e.response.get('Error', {}).get('Code')
logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
self.failure_store.add_failure(bucket_name, ReplicationFailure(
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action="delete",
last_error_code=error_code,
))
return
try:
@@ -405,9 +521,89 @@ class ReplicationManager:
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
self._update_last_sync(bucket_name, object_key)
self.failure_store.remove_failure(bucket_name, object_key)
except (ClientError, OSError, ValueError) as e:
error_code = None
if isinstance(e, ClientError):
error_code = e.response.get('Error', {}).get('Code')
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
except Exception:
self.failure_store.add_failure(bucket_name, ReplicationFailure(
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action=action,
last_error_code=error_code,
))
except Exception as e:
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
self.failure_store.add_failure(bucket_name, ReplicationFailure(
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action=action,
last_error_code=None,
))
def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]:
failures = self.failure_store.load_failures(bucket_name)
return failures[offset:offset + limit]
def get_failure_count(self, bucket_name: str) -> int:
return self.failure_store.get_failure_count(bucket_name)
def retry_failed_item(self, bucket_name: str, object_key: str) -> bool:
failure = self.failure_store.get_failure(bucket_name, object_key)
if not failure:
return False
rule = self.get_rule(bucket_name)
if not rule or not rule.enabled:
return False
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
return False
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
return False
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action)
return True
def retry_all_failed(self, bucket_name: str) -> Dict[str, int]:
failures = self.failure_store.load_failures(bucket_name)
if not failures:
return {"submitted": 0, "skipped": 0}
rule = self.get_rule(bucket_name)
if not rule or not rule.enabled:
return {"submitted": 0, "skipped": len(failures)}
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
return {"submitted": 0, "skipped": len(failures)}
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
return {"submitted": 0, "skipped": len(failures)}
submitted = 0
for failure in failures:
self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action)
submitted += 1
return {"submitted": submitted, "skipped": 0}
def dismiss_failure(self, bucket_name: str, object_key: str) -> bool:
return self.failure_store.remove_failure(bucket_name, object_key)
def clear_failures(self, bucket_name: str) -> None:
self.failure_store.clear_failures(bucket_name)

109
app/ui.py
View File

@@ -1590,6 +1590,84 @@ def get_replication_status(bucket_name: str):
})
@ui_bp.get("/buckets/<bucket_name>/replication/failures")
def get_replication_failures(bucket_name: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "replication")
except IamError:
return jsonify({"error": "Access denied"}), 403
limit = request.args.get("limit", 50, type=int)
offset = request.args.get("offset", 0, type=int)
failures = _replication().get_failed_items(bucket_name, limit, offset)
total = _replication().get_failure_count(bucket_name)
return jsonify({
"failures": [f.to_dict() for f in failures],
"total": total,
"limit": limit,
"offset": offset,
})
@ui_bp.post("/buckets/<bucket_name>/replication/failures/<path:object_key>/retry")
def retry_replication_failure(bucket_name: str, object_key: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "replication")
except IamError:
return jsonify({"error": "Access denied"}), 403
success = _replication().retry_failed_item(bucket_name, object_key)
if success:
return jsonify({"status": "submitted", "object_key": object_key})
return jsonify({"error": "Failed to submit retry"}), 400
@ui_bp.post("/buckets/<bucket_name>/replication/failures/retry-all")
def retry_all_replication_failures(bucket_name: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "replication")
except IamError:
return jsonify({"error": "Access denied"}), 403
result = _replication().retry_all_failed(bucket_name)
return jsonify({
"status": "submitted",
"submitted": result["submitted"],
"skipped": result["skipped"],
})
@ui_bp.delete("/buckets/<bucket_name>/replication/failures/<path:object_key>")
def dismiss_replication_failure(bucket_name: str, object_key: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "replication")
except IamError:
return jsonify({"error": "Access denied"}), 403
success = _replication().dismiss_failure(bucket_name, object_key)
if success:
return jsonify({"status": "dismissed", "object_key": object_key})
return jsonify({"error": "Failure not found"}), 404
@ui_bp.delete("/buckets/<bucket_name>/replication/failures")
def clear_replication_failures(bucket_name: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "replication")
except IamError:
return jsonify({"error": "Access denied"}), 403
_replication().clear_failures(bucket_name)
return jsonify({"status": "cleared"})
@ui_bp.get("/connections/<connection_id>/health")
def check_connection_health(connection_id: str):
"""Check if a connection endpoint is reachable."""
@@ -1742,6 +1820,37 @@ def bucket_lifecycle(bucket_name: str):
return jsonify({"status": "ok", "message": "Lifecycle configuration saved", "rules": validated_rules})
@ui_bp.get("/buckets/<bucket_name>/lifecycle/history")
def get_lifecycle_history(bucket_name: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "policy")
except IamError:
return jsonify({"error": "Access denied"}), 403
limit = request.args.get("limit", 50, type=int)
offset = request.args.get("offset", 0, type=int)
lifecycle_manager = current_app.extensions.get("lifecycle")
if not lifecycle_manager:
return jsonify({
"executions": [],
"total": 0,
"limit": limit,
"offset": offset,
"enabled": False,
})
records = lifecycle_manager.get_execution_history(bucket_name, limit, offset)
return jsonify({
"executions": [r.to_dict() for r in records],
"total": len(lifecycle_manager.get_execution_history(bucket_name, 1000, 0)),
"limit": limit,
"offset": offset,
"enabled": True,
})
@ui_bp.route("/buckets/<bucket_name>/cors", methods=["GET", "POST", "DELETE"])
def bucket_cors(bucket_name: str):
principal = _current_principal()

View File

@@ -1184,11 +1184,65 @@
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
</svg>
<strong>Last sync:</strong>
<strong>Last sync:</strong>
<span data-stat="last-sync-time"></span>
<span data-stat="last-sync-key"></span>
</div>
<div class="card border mb-4" id="replication-failures-card" style="display: none;"
data-failures-endpoint="{{ url_for('ui.get_replication_failures', bucket_name=bucket_name) }}"
data-retry-endpoint="{{ url_for('ui.retry_replication_failure', bucket_name=bucket_name, object_key='__KEY__') }}"
data-retry-all-endpoint="{{ url_for('ui.retry_all_replication_failures', bucket_name=bucket_name) }}"
data-dismiss-endpoint="{{ url_for('ui.dismiss_replication_failure', bucket_name=bucket_name, object_key='__KEY__') }}"
data-clear-endpoint="{{ url_for('ui.clear_replication_failures', bucket_name=bucket_name) }}">
<div class="card-header d-flex justify-content-between align-items-center py-2">
<div class="d-flex align-items-center">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-danger me-2" viewBox="0 0 16 16">
<path d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/>
</svg>
<span class="fw-semibold small">Failed Replications</span>
<span class="badge bg-danger ms-2" id="replication-failure-count">0</span>
</div>
<div class="btn-group btn-group-sm">
<button class="btn btn-outline-primary btn-sm" id="retry-all-failures-btn" title="Retry All">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 1-4.546 2.914.5.5 0 0 0-.908-.417A6 6 0 1 0 8 2v1z"/>
<path d="M8 4.466V.534a.25.25 0 0 0-.41-.192L5.23 2.308a.25.25 0 0 0 0 .384l2.36 1.966A.25.25 0 0 0 8 4.466z"/>
</svg>
Retry All
</button>
<button class="btn btn-outline-secondary btn-sm" id="clear-failures-btn" title="Clear All">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M5.5 5.5A.5.5 0 0 1 6 6v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm3 .5a.5.5 0 0 0-1 0v6a.5.5 0 0 0 1 0V6z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
Clear
</button>
</div>
</div>
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-sm table-hover mb-0">
<thead class="table-light">
<tr>
<th class="ps-3">Object Key</th>
<th>Error</th>
<th>Last Attempt</th>
<th class="text-center">Attempts</th>
<th class="text-end pe-3">Actions</th>
</tr>
</thead>
<tbody id="replication-failures-body">
</tbody>
</table>
</div>
<div class="px-3 py-2 border-top" id="replication-failures-pagination" style="display: none;">
<button class="btn btn-link btn-sm p-0" id="show-more-failures">Show more...</button>
<span class="text-muted small ms-2" id="failures-shown-count"></span>
</div>
</div>
</div>
<h6 class="text-muted text-uppercase small mb-3">Replication Target</h6>
<div class="card border mb-4">
<div class="card-body">
@@ -1526,6 +1580,46 @@
</div>
</div>
</div>
<div class="card shadow-sm mt-4" id="lifecycle-history-card"
data-history-endpoint="{{ url_for('ui.get_lifecycle_history', bucket_name=bucket_name) }}">
<div class="card-header d-flex align-items-center">
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" class="text-primary me-2" viewBox="0 0 16 16">
<path d="M8.515 1.019A7 7 0 0 0 8 1V0a8 8 0 0 1 .589.022l-.074.997zm2.004.45a7.003 7.003 0 0 0-.985-.299l.219-.976c.383.086.76.2 1.126.342l-.36.933zm1.37.71a7.01 7.01 0 0 0-.439-.27l.493-.87a8.025 8.025 0 0 1 .979.654l-.615.789a6.996 6.996 0 0 0-.418-.302zm1.834 1.79a6.99 6.99 0 0 0-.653-.796l.724-.69c.27.285.52.59.747.91l-.818.576zm.744 1.352a7.08 7.08 0 0 0-.214-.468l.893-.45a7.976 7.976 0 0 1 .45 1.088l-.95.313a7.023 7.023 0 0 0-.179-.483zm.53 2.507a6.991 6.991 0 0 0-.1-1.025l.985-.17c.067.386.106.778.116 1.17l-1 .025zm-.131 1.538c.033-.17.06-.339.081-.51l.993.123a7.957 7.957 0 0 1-.23 1.155l-.964-.267c.046-.165.086-.332.12-.501zm-.952 2.379c.184-.29.346-.594.486-.908l.914.405c-.16.36-.345.706-.555 1.038l-.845-.535zm-.964 1.205c.122-.122.239-.248.35-.378l.758.653a8.073 8.073 0 0 1-.401.432l-.707-.707z"/>
<path d="M8 1a7 7 0 1 0 4.95 11.95l.707.707A8.001 8.001 0 1 1 8 0v1z"/>
<path d="M7.5 3a.5.5 0 0 1 .5.5v5.21l3.248 1.856a.5.5 0 0 1-.496.868l-3.5-2A.5.5 0 0 1 7 9V3.5a.5.5 0 0 1 .5-.5z"/>
</svg>
<span class="fw-semibold">Execution History</span>
</div>
<div class="card-body">
<p class="text-muted small mb-3">Lifecycle rules are evaluated automatically (default: every hour). Recent executions are shown below.</p>
<div class="table-responsive">
<table class="table table-sm align-middle mb-0">
<thead class="table-light">
<tr>
<th>Executed</th>
<th class="text-center">Deleted</th>
<th class="text-center">Versions</th>
<th class="text-center">Aborted</th>
<th class="text-center">Status</th>
</tr>
</thead>
<tbody id="lifecycle-history-body">
<tr>
<td colspan="5" class="text-center text-muted py-4">
<div class="spinner-border spinner-border-sm me-2" role="status"></div>
Loading...
</td>
</tr>
</tbody>
</table>
</div>
<div class="mt-2" id="lifecycle-history-pagination" style="display: none;">
<button class="btn btn-link btn-sm p-0" id="show-more-history">Show more...</button>
<span class="text-muted small ms-2" id="history-shown-count"></span>
</div>
</div>
</div>
</div>
<div class="col-lg-4">
<div class="card bg-body-tertiary border-0 mb-3">
@@ -5018,7 +5112,139 @@
if (orphanedEl) orphanedEl.innerHTML = '<span class="spinner-border spinner-border-sm" role="status"></span>';
if (bytesEl) bytesEl.innerHTML = '<span class="spinner-border spinner-border-sm" role="status"></span>';
loadReplicationStats();
loadReplicationFailures();
});
const failuresCard = document.getElementById('replication-failures-card');
const failuresBody = document.getElementById('replication-failures-body');
const failureCountBadge = document.getElementById('replication-failure-count');
const retryAllBtn = document.getElementById('retry-all-failures-btn');
const clearFailuresBtn = document.getElementById('clear-failures-btn');
const showMoreFailuresBtn = document.getElementById('show-more-failures');
const failuresPagination = document.getElementById('replication-failures-pagination');
const failuresShownCount = document.getElementById('failures-shown-count');
let failuresExpanded = false;
let currentFailures = [];
const loadReplicationFailures = async () => {
if (!failuresCard) return;
const endpoint = failuresCard.dataset.failuresEndpoint;
const limit = failuresExpanded ? 50 : 5;
try {
const resp = await fetch(`${endpoint}?limit=${limit}`);
if (!resp.ok) throw new Error('Failed to fetch failures');
const data = await resp.json();
currentFailures = data.failures;
const total = data.total;
if (total > 0) {
failuresCard.style.display = '';
failureCountBadge.textContent = total;
renderFailures(currentFailures);
if (total > 5 && !failuresExpanded) {
failuresPagination.style.display = '';
failuresShownCount.textContent = `Showing ${Math.min(5, total)} of ${total}`;
} else {
failuresPagination.style.display = 'none';
}
} else {
failuresCard.style.display = 'none';
}
} catch (err) {
console.error('Failed to load replication failures:', err);
}
};
const renderFailures = (failures) => {
if (!failuresBody) return;
failuresBody.innerHTML = failures.map(f => `
<tr>
<td class="ps-3 text-break" style="max-width: 200px;">
<code class="small">${escapeHtml(f.object_key)}</code>
</td>
<td class="small text-muted text-break" style="max-width: 250px;" title="${escapeHtml(f.error_message)}">
${escapeHtml(f.error_message.length > 60 ? f.error_message.substring(0, 60) + '...' : f.error_message)}
</td>
<td class="small text-muted">${new Date(f.timestamp * 1000).toLocaleString()}</td>
<td class="text-center"><span class="badge bg-secondary">${f.failure_count}</span></td>
<td class="text-end pe-3">
<button class="btn btn-sm btn-outline-primary py-0 px-2" onclick="retryFailure('${escapeHtml(f.object_key)}')" title="Retry">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 1-4.546 2.914.5.5 0 0 0-.908-.417A6 6 0 1 0 8 2v1z"/>
<path d="M8 4.466V.534a.25.25 0 0 0-.41-.192L5.23 2.308a.25.25 0 0 0 0 .384l2.36 1.966A.25.25 0 0 0 8 4.466z"/>
</svg>
</button>
<button class="btn btn-sm btn-outline-secondary py-0 px-2" onclick="dismissFailure('${escapeHtml(f.object_key)}')" title="Dismiss">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" viewBox="0 0 16 16">
<path d="M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z"/>
</svg>
</button>
</td>
</tr>
`).join('');
};
window.retryFailure = async (objectKey) => {
const endpoint = failuresCard.dataset.retryEndpoint.replace('__KEY__', encodeURIComponent(objectKey));
try {
const resp = await fetch(endpoint, { method: 'POST' });
if (resp.ok) {
loadReplicationFailures();
}
} catch (err) {
console.error('Failed to retry:', err);
}
};
window.dismissFailure = async (objectKey) => {
const endpoint = failuresCard.dataset.dismissEndpoint.replace('__KEY__', encodeURIComponent(objectKey));
try {
const resp = await fetch(endpoint, { method: 'DELETE' });
if (resp.ok) {
loadReplicationFailures();
}
} catch (err) {
console.error('Failed to dismiss:', err);
}
};
retryAllBtn?.addEventListener('click', async () => {
const endpoint = failuresCard.dataset.retryAllEndpoint;
try {
const resp = await fetch(endpoint, { method: 'POST' });
if (resp.ok) {
loadReplicationFailures();
}
} catch (err) {
console.error('Failed to retry all:', err);
}
});
clearFailuresBtn?.addEventListener('click', async () => {
if (!confirm('Clear all failure records?')) return;
const endpoint = failuresCard.dataset.clearEndpoint;
try {
const resp = await fetch(endpoint, { method: 'DELETE' });
if (resp.ok) {
loadReplicationFailures();
}
} catch (err) {
console.error('Failed to clear failures:', err);
}
});
showMoreFailuresBtn?.addEventListener('click', () => {
failuresExpanded = !failuresExpanded;
showMoreFailuresBtn.textContent = failuresExpanded ? 'Show less' : 'Show more...';
loadReplicationFailures();
});
loadReplicationFailures();
}
const algoAes256Radio = document.getElementById('algo_aes256');
@@ -5657,6 +5883,83 @@
};
if (lifecycleCard) loadLifecycleRules();
const lifecycleHistoryCard = document.getElementById('lifecycle-history-card');
const lifecycleHistoryBody = document.getElementById('lifecycle-history-body');
const lifecycleHistoryPagination = document.getElementById('lifecycle-history-pagination');
const showMoreHistoryBtn = document.getElementById('show-more-history');
const historyShownCount = document.getElementById('history-shown-count');
let historyExpanded = false;
const loadLifecycleHistory = async () => {
if (!lifecycleHistoryCard || !lifecycleHistoryBody) return;
const endpoint = lifecycleHistoryCard.dataset.historyEndpoint;
const limit = historyExpanded ? 50 : 5;
lifecycleHistoryBody.innerHTML = '<tr><td colspan="5" class="text-center text-muted py-4"><div class="spinner-border spinner-border-sm me-2" role="status"></div>Loading...</td></tr>';
try {
const resp = await fetch(`${endpoint}?limit=${limit}`);
if (!resp.ok) throw new Error('Failed to fetch history');
const data = await resp.json();
if (!data.enabled) {
lifecycleHistoryBody.innerHTML = '<tr><td colspan="5" class="text-center text-muted py-4">Lifecycle enforcement is not enabled</td></tr>';
return;
}
const executions = data.executions || [];
const total = data.total || 0;
if (executions.length === 0) {
lifecycleHistoryBody.innerHTML = '<tr><td colspan="5" class="text-center text-muted py-4">No executions recorded yet</td></tr>';
lifecycleHistoryPagination.style.display = 'none';
return;
}
lifecycleHistoryBody.innerHTML = executions.map(e => {
const date = new Date(e.timestamp * 1000);
const hasErrors = e.errors && e.errors.length > 0;
const hasActivity = e.objects_deleted > 0 || e.versions_deleted > 0 || e.uploads_aborted > 0;
let statusBadge;
if (hasErrors) {
statusBadge = '<span class="badge bg-danger">Errors</span>';
} else if (hasActivity) {
statusBadge = '<span class="badge bg-success">Success</span>';
} else {
statusBadge = '<span class="badge bg-secondary">No action</span>';
}
const errorTooltip = hasErrors ? ` title="${escapeHtml(e.errors.join('; '))}"` : '';
return `<tr${errorTooltip}>
<td class="small">${date.toLocaleString()}</td>
<td class="text-center"><span class="badge bg-danger-subtle text-danger">${e.objects_deleted}</span></td>
<td class="text-center"><span class="badge bg-warning-subtle text-warning">${e.versions_deleted}</span></td>
<td class="text-center"><span class="badge bg-secondary">${e.uploads_aborted}</span></td>
<td class="text-center">${statusBadge}</td>
</tr>`;
}).join('');
if (total > 5 && !historyExpanded) {
lifecycleHistoryPagination.style.display = '';
historyShownCount.textContent = `Showing ${Math.min(5, total)} of ${total}`;
} else {
lifecycleHistoryPagination.style.display = 'none';
}
} catch (err) {
console.error('Failed to load lifecycle history:', err);
lifecycleHistoryBody.innerHTML = '<tr><td colspan="5" class="text-center text-danger py-4">Failed to load history</td></tr>';
}
};
showMoreHistoryBtn?.addEventListener('click', () => {
historyExpanded = !historyExpanded;
showMoreHistoryBtn.textContent = historyExpanded ? 'Show less' : 'Show more...';
loadLifecycleHistory();
});
if (lifecycleHistoryCard) loadLifecycleHistory();
if (corsCard) loadCorsRules();
if (aclCard) loadAcl();
</script>

View File

@@ -43,7 +43,9 @@ def connections(tmp_path: Path):
@pytest.fixture
def replication_manager(storage, connections, tmp_path):
rules_path = tmp_path / "replication_rules.json"
manager = ReplicationManager(storage, connections, rules_path)
storage_root = tmp_path / "data"
storage_root.mkdir(exist_ok=True)
manager = ReplicationManager(storage, connections, rules_path, storage_root)
yield manager
manager.shutdown(wait=False)