336 lines
13 KiB
Python
336 lines
13 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import threading
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from .storage import ObjectStorage, StorageError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class LifecycleResult:
|
|
bucket_name: str
|
|
objects_deleted: int = 0
|
|
versions_deleted: int = 0
|
|
uploads_aborted: int = 0
|
|
errors: List[str] = field(default_factory=list)
|
|
execution_time_seconds: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class LifecycleExecutionRecord:
|
|
timestamp: float
|
|
bucket_name: str
|
|
objects_deleted: int
|
|
versions_deleted: int
|
|
uploads_aborted: int
|
|
errors: List[str]
|
|
execution_time_seconds: float
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"timestamp": self.timestamp,
|
|
"bucket_name": self.bucket_name,
|
|
"objects_deleted": self.objects_deleted,
|
|
"versions_deleted": self.versions_deleted,
|
|
"uploads_aborted": self.uploads_aborted,
|
|
"errors": self.errors,
|
|
"execution_time_seconds": self.execution_time_seconds,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
|
|
return cls(
|
|
timestamp=data["timestamp"],
|
|
bucket_name=data["bucket_name"],
|
|
objects_deleted=data["objects_deleted"],
|
|
versions_deleted=data["versions_deleted"],
|
|
uploads_aborted=data["uploads_aborted"],
|
|
errors=data.get("errors", []),
|
|
execution_time_seconds=data["execution_time_seconds"],
|
|
)
|
|
|
|
@classmethod
|
|
def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
|
|
return cls(
|
|
timestamp=time.time(),
|
|
bucket_name=result.bucket_name,
|
|
objects_deleted=result.objects_deleted,
|
|
versions_deleted=result.versions_deleted,
|
|
uploads_aborted=result.uploads_aborted,
|
|
errors=result.errors.copy(),
|
|
execution_time_seconds=result.execution_time_seconds,
|
|
)
|
|
|
|
|
|
class LifecycleHistoryStore:
|
|
MAX_HISTORY_PER_BUCKET = 50
|
|
|
|
def __init__(self, storage_root: Path) -> None:
|
|
self.storage_root = storage_root
|
|
self._lock = threading.Lock()
|
|
|
|
def _get_history_path(self, bucket_name: str) -> Path:
|
|
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
|
|
|
|
def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
|
|
path = self._get_history_path(bucket_name)
|
|
if not path.exists():
|
|
return []
|
|
try:
|
|
with open(path, "r") as f:
|
|
data = json.load(f)
|
|
return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
|
except (OSError, ValueError, KeyError) as e:
|
|
logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
|
|
return []
|
|
|
|
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
|
|
path = self._get_history_path(bucket_name)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
data = {"executions": [r.to_dict() for r in records[:self.MAX_HISTORY_PER_BUCKET]]}
|
|
try:
|
|
with open(path, "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
except OSError as e:
|
|
logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
|
|
|
|
def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
|
|
with self._lock:
|
|
records = self.load_history(bucket_name)
|
|
records.insert(0, record)
|
|
self.save_history(bucket_name, records)
|
|
|
|
def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
|
records = self.load_history(bucket_name)
|
|
return records[offset:offset + limit]
|
|
|
|
|
|
class LifecycleManager:
|
|
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600, storage_root: Optional[Path] = None):
|
|
self.storage = storage
|
|
self.interval_seconds = interval_seconds
|
|
self.storage_root = storage_root
|
|
self._timer: Optional[threading.Timer] = None
|
|
self._shutdown = False
|
|
self._lock = threading.Lock()
|
|
self.history_store = LifecycleHistoryStore(storage_root) if storage_root else None
|
|
|
|
def start(self) -> None:
|
|
if self._timer is not None:
|
|
return
|
|
self._shutdown = False
|
|
self._schedule_next()
|
|
logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s")
|
|
|
|
def stop(self) -> None:
|
|
self._shutdown = True
|
|
if self._timer:
|
|
self._timer.cancel()
|
|
self._timer = None
|
|
logger.info("Lifecycle manager stopped")
|
|
|
|
def _schedule_next(self) -> None:
|
|
if self._shutdown:
|
|
return
|
|
self._timer = threading.Timer(self.interval_seconds, self._run_enforcement)
|
|
self._timer.daemon = True
|
|
self._timer.start()
|
|
|
|
def _run_enforcement(self) -> None:
|
|
if self._shutdown:
|
|
return
|
|
try:
|
|
self.enforce_all_buckets()
|
|
except Exception as e:
|
|
logger.error(f"Lifecycle enforcement failed: {e}")
|
|
finally:
|
|
self._schedule_next()
|
|
|
|
def enforce_all_buckets(self) -> Dict[str, LifecycleResult]:
|
|
results = {}
|
|
try:
|
|
buckets = self.storage.list_buckets()
|
|
for bucket in buckets:
|
|
result = self.enforce_rules(bucket.name)
|
|
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
|
|
results[bucket.name] = result
|
|
except StorageError as e:
|
|
logger.error(f"Failed to list buckets for lifecycle: {e}")
|
|
return results
|
|
|
|
def enforce_rules(self, bucket_name: str) -> LifecycleResult:
|
|
start_time = time.time()
|
|
result = LifecycleResult(bucket_name=bucket_name)
|
|
|
|
try:
|
|
lifecycle = self.storage.get_bucket_lifecycle(bucket_name)
|
|
if not lifecycle:
|
|
return result
|
|
|
|
for rule in lifecycle:
|
|
if rule.get("Status") != "Enabled":
|
|
continue
|
|
rule_id = rule.get("ID", "unknown")
|
|
prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", ""))
|
|
|
|
self._enforce_expiration(bucket_name, rule, prefix, result)
|
|
self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result)
|
|
self._enforce_abort_multipart(bucket_name, rule, result)
|
|
|
|
except StorageError as e:
|
|
result.errors.append(str(e))
|
|
logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
|
|
|
|
result.execution_time_seconds = time.time() - start_time
|
|
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
|
|
logger.info(
|
|
f"Lifecycle enforcement for {bucket_name}: "
|
|
f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
|
|
f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
|
|
)
|
|
if self.history_store:
|
|
record = LifecycleExecutionRecord.from_result(result)
|
|
self.history_store.add_record(bucket_name, record)
|
|
return result
|
|
|
|
def _enforce_expiration(
|
|
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
|
) -> None:
|
|
expiration = rule.get("Expiration", {})
|
|
if not expiration:
|
|
return
|
|
|
|
days = expiration.get("Days")
|
|
date_str = expiration.get("Date")
|
|
|
|
if days:
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
|
elif date_str:
|
|
try:
|
|
cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
except ValueError:
|
|
return
|
|
else:
|
|
return
|
|
|
|
try:
|
|
objects = self.storage.list_objects_all(bucket_name)
|
|
for obj in objects:
|
|
if prefix and not obj.key.startswith(prefix):
|
|
continue
|
|
if obj.last_modified < cutoff:
|
|
try:
|
|
self.storage.delete_object(bucket_name, obj.key)
|
|
result.objects_deleted += 1
|
|
except StorageError as e:
|
|
result.errors.append(f"Failed to delete {obj.key}: {e}")
|
|
except StorageError as e:
|
|
result.errors.append(f"Failed to list objects: {e}")
|
|
|
|
def _enforce_noncurrent_expiration(
|
|
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
|
) -> None:
|
|
noncurrent = rule.get("NoncurrentVersionExpiration", {})
|
|
noncurrent_days = noncurrent.get("NoncurrentDays")
|
|
if not noncurrent_days:
|
|
return
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days)
|
|
|
|
try:
|
|
objects = self.storage.list_objects_all(bucket_name)
|
|
for obj in objects:
|
|
if prefix and not obj.key.startswith(prefix):
|
|
continue
|
|
try:
|
|
versions = self.storage.list_object_versions(bucket_name, obj.key)
|
|
for version in versions:
|
|
archived_at_str = version.get("archived_at", "")
|
|
if not archived_at_str:
|
|
continue
|
|
try:
|
|
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
|
if archived_at < cutoff:
|
|
version_id = version.get("version_id")
|
|
if version_id:
|
|
self.storage.delete_object_version(bucket_name, obj.key, version_id)
|
|
result.versions_deleted += 1
|
|
except (ValueError, StorageError) as e:
|
|
result.errors.append(f"Failed to process version: {e}")
|
|
except StorageError:
|
|
pass
|
|
except StorageError as e:
|
|
result.errors.append(f"Failed to list objects: {e}")
|
|
|
|
try:
|
|
orphaned = self.storage.list_orphaned_objects(bucket_name)
|
|
for item in orphaned:
|
|
obj_key = item.get("key", "")
|
|
if prefix and not obj_key.startswith(prefix):
|
|
continue
|
|
try:
|
|
versions = self.storage.list_object_versions(bucket_name, obj_key)
|
|
for version in versions:
|
|
archived_at_str = version.get("archived_at", "")
|
|
if not archived_at_str:
|
|
continue
|
|
try:
|
|
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
|
if archived_at < cutoff:
|
|
version_id = version.get("version_id")
|
|
if version_id:
|
|
self.storage.delete_object_version(bucket_name, obj_key, version_id)
|
|
result.versions_deleted += 1
|
|
except (ValueError, StorageError) as e:
|
|
result.errors.append(f"Failed to process orphaned version: {e}")
|
|
except StorageError:
|
|
pass
|
|
except StorageError as e:
|
|
result.errors.append(f"Failed to list orphaned objects: {e}")
|
|
|
|
def _enforce_abort_multipart(
|
|
self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult
|
|
) -> None:
|
|
abort_config = rule.get("AbortIncompleteMultipartUpload", {})
|
|
days_after = abort_config.get("DaysAfterInitiation")
|
|
if not days_after:
|
|
return
|
|
|
|
cutoff = datetime.now(timezone.utc) - timedelta(days=days_after)
|
|
|
|
try:
|
|
uploads = self.storage.list_multipart_uploads(bucket_name)
|
|
for upload in uploads:
|
|
created_at_str = upload.get("created_at", "")
|
|
if not created_at_str:
|
|
continue
|
|
try:
|
|
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
|
if created_at < cutoff:
|
|
upload_id = upload.get("upload_id")
|
|
if upload_id:
|
|
self.storage.abort_multipart_upload(bucket_name, upload_id)
|
|
result.uploads_aborted += 1
|
|
except (ValueError, StorageError) as e:
|
|
result.errors.append(f"Failed to abort upload: {e}")
|
|
except StorageError as e:
|
|
result.errors.append(f"Failed to list multipart uploads: {e}")
|
|
|
|
def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]:
|
|
if bucket_name:
|
|
return {bucket_name: self.enforce_rules(bucket_name)}
|
|
return self.enforce_all_buckets()
|
|
|
|
def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
|
if not self.history_store:
|
|
return []
|
|
return self.history_store.get_history(bucket_name, limit, offset)
|