Optimize replication failure caching, batch UI auth checks, add bulk download size limit, background parent cleanup

This commit is contained in:
2026-02-09 18:23:45 +08:00
parent 6e6d6d32bf
commit aa6d7c4d28
4 changed files with 66 additions and 60 deletions

View File

@@ -309,6 +309,18 @@ class IamService:
if not self._is_allowed(principal, normalized, action):
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
self._maybe_reload()
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
normalized_actions = {a: self._normalize_action(a) for a in actions}
results: Dict[str, bool] = {}
for original, canonical in normalized_actions.items():
if canonical not in ALLOWED_ACTIONS:
results[original] = False
else:
results[original] = self._is_allowed(principal, bucket_name, canonical)
return results
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]

View File

@@ -176,11 +176,12 @@ class ReplicationFailureStore:
self.storage_root = storage_root
self.max_failures_per_bucket = max_failures_per_bucket
self._lock = threading.Lock()
self._cache: Dict[str, List[ReplicationFailure]] = {}
def _get_failures_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
path = self._get_failures_path(bucket_name)
if not path.exists():
return []
@@ -192,7 +193,7 @@ class ReplicationFailureStore:
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
return []
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
path = self._get_failures_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
@@ -202,6 +203,18 @@ class ReplicationFailureStore:
except OSError as e:
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
if bucket_name in self._cache:
return list(self._cache[bucket_name])
failures = self._load_from_disk(bucket_name)
self._cache[bucket_name] = failures
return list(failures)
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
trimmed = failures[:self.max_failures_per_bucket]
self._cache[bucket_name] = trimmed
self._save_to_disk(bucket_name, trimmed)
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
with self._lock:
failures = self.load_failures(bucket_name)
@@ -227,6 +240,7 @@ class ReplicationFailureStore:
def clear_failures(self, bucket_name: str) -> None:
with self._lock:
self._cache.pop(bucket_name, None)
path = self._get_failures_path(bucket_name)
if path.exists():
path.unlink()

View File

@@ -11,6 +11,7 @@ import time
import unicodedata
import uuid
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
@@ -187,6 +188,7 @@ class ObjectStorage:
self._object_cache_max_size = object_cache_max_size
self._object_key_max_length_bytes = object_key_max_length_bytes
self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {}
self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup")
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
@@ -544,11 +546,14 @@ class ObjectStorage:
return self._read_metadata(bucket_path.name, safe_key) or {}
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
"""Remove empty parent directories up to (but not including) stop_at.
"""Remove empty parent directories in a background thread.
On Windows/OneDrive, directories may be locked briefly after file deletion.
This method retries with a small delay to handle that case.
Running this in the background avoids blocking the request thread with retries.
"""
self._cleanup_executor.submit(self._do_cleanup_empty_parents, path, stop_at)
def _do_cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
for parent in path.parents:
if parent == stop_at:
break
@@ -556,7 +561,7 @@ class ObjectStorage:
try:
if parent.exists() and not any(parent.iterdir()):
parent.rmdir()
break
break
except OSError:
if attempt < 2:
time.sleep(0.1)

View File

@@ -423,57 +423,25 @@ def bucket_detail(bucket_name: str):
},
indent=2,
)
can_edit_policy = False
if principal:
try:
_iam().authorize(principal, bucket_name, "policy")
can_edit_policy = True
except IamError:
can_edit_policy = False
iam = _iam()
bucket_perms = iam.check_permissions(
principal, bucket_name, ["policy", "lifecycle", "cors", "write", "replication"],
) if principal else {}
admin_perms = iam.check_permissions(
principal, None, ["iam:list_users"],
) if principal else {}
can_manage_lifecycle = False
if principal:
try:
_iam().authorize(principal, bucket_name, "lifecycle")
can_manage_lifecycle = True
except IamError:
can_manage_lifecycle = False
can_manage_cors = False
if principal:
try:
_iam().authorize(principal, bucket_name, "cors")
can_manage_cors = True
except IamError:
can_manage_cors = False
can_edit_policy = bucket_perms.get("policy", False)
can_manage_lifecycle = bucket_perms.get("lifecycle", False)
can_manage_cors = bucket_perms.get("cors", False)
can_manage_versioning = bucket_perms.get("write", False)
can_manage_replication = bucket_perms.get("replication", False)
is_replication_admin = admin_perms.get("iam:list_users", False)
try:
versioning_enabled = storage.is_versioning_enabled(bucket_name)
except StorageError:
versioning_enabled = False
can_manage_versioning = False
if principal:
try:
_iam().authorize(principal, bucket_name, "write")
can_manage_versioning = True
except IamError:
can_manage_versioning = False
can_manage_replication = False
if principal:
try:
_iam().authorize(principal, bucket_name, "replication")
can_manage_replication = True
except IamError:
can_manage_replication = False
is_replication_admin = False
if principal:
try:
_iam().authorize(principal, None, "iam:list_users")
is_replication_admin = True
except IamError:
is_replication_admin = False
replication_rule = _replication().get_rule(bucket_name)
connections = _connections().list() if (is_replication_admin or replication_rule) else []
@@ -489,12 +457,7 @@ def bucket_detail(bucket_name: str):
bucket_quota = storage.get_bucket_quota(bucket_name)
bucket_stats = storage.bucket_stats(bucket_name)
can_manage_quota = False
try:
_iam().authorize(principal, None, "iam:list_users")
can_manage_quota = True
except IamError:
pass
can_manage_quota = is_replication_admin
objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name)
@@ -1003,21 +966,33 @@ def bulk_download_objects(bucket_name: str):
unique_keys = list(dict.fromkeys(cleaned))
storage = _storage()
try:
_authorize_ui(principal, bucket_name, "read")
except IamError as exc:
return jsonify({"error": str(exc)}), 403
max_total_bytes = current_app.config.get("BULK_DOWNLOAD_MAX_BYTES", 1024 * 1024 * 1024)
total_size = 0
for key in unique_keys:
try:
path = storage.get_object_path(bucket_name, key)
total_size += path.stat().st_size
except (StorageError, OSError):
continue
if total_size > max_total_bytes:
limit_mb = max_total_bytes // (1024 * 1024)
return jsonify({"error": f"Total download size exceeds {limit_mb} MB limit. Select fewer objects."}), 400
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for key in unique_keys:
try:
_authorize_ui(principal, bucket_name, "read", object_key=key)
metadata = storage.get_object_metadata(bucket_name, key)
is_encrypted = "x-amz-server-side-encryption" in metadata
if is_encrypted and hasattr(storage, 'get_object_data'):
data, _ = storage.get_object_data(bucket_name, key)
zf.writestr(key, data)