Add new tests; Fix typo and validations

This commit is contained in:
2026-01-03 23:29:07 +08:00
parent 2d60e36fbf
commit b9cfc45aa2
14 changed files with 1970 additions and 125 deletions

View File

@@ -289,17 +289,17 @@ def _configure_logging(app: Flask) -> None:
formatter = logging.Formatter(
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
)
# Stream Handler (stdout) - Primary for Docker
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
stream_handler.addFilter(_RequestContextFilter())
logger = app.logger
for handler in logger.handlers[:]:
handler.close()
logger.handlers.clear()
logger.addHandler(stream_handler)
# File Handler (optional, if configured)
if app.config.get("LOG_TO_FILE"):
log_file = Path(app.config["LOG_FILE"])
log_file.parent.mkdir(parents=True, exist_ok=True)

View File

@@ -196,18 +196,21 @@ class AccessLoggingService:
)
target_key = f"{config.target_bucket}:{config.target_prefix}"
should_flush = False
with self._buffer_lock:
if target_key not in self._buffer:
self._buffer[target_key] = []
self._buffer[target_key].append(entry)
should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
if len(self._buffer[target_key]) >= self.max_buffer_size:
self._flush_buffer(target_key)
if should_flush:
self._flush_buffer(target_key)
def _flush_loop(self) -> None:
while not self._shutdown.is_set():
time.sleep(self.flush_interval)
self._flush_all()
self._shutdown.wait(timeout=self.flush_interval)
if not self._shutdown.is_set():
self._flush_all()
def _flush_all(self) -> None:
with self._buffer_lock:

View File

@@ -84,7 +84,7 @@ class AppConfig:
return overrides.get(name, os.getenv(name, default))
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
@@ -108,6 +108,10 @@ class AppConfig:
try:
secret_file.parent.mkdir(parents=True, exist_ok=True)
secret_file.write_text(generated)
try:
os.chmod(secret_file, 0o600)
except OSError:
pass
secret_key = generated
except OSError:
secret_key = generated

View File

@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_CONNECT_TIMEOUT = 5
REPLICATION_READ_TIMEOUT = 30
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024 # 10 MiB - use streaming for larger files
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024
REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all"
@@ -307,7 +307,6 @@ class ReplicationManager:
if self._shutdown:
return
# Re-check if rule is still enabled (may have been paused after task was submitted)
current_rule = self.get_rule(bucket_name)
if not current_rule or not current_rule.enabled:
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
@@ -358,7 +357,6 @@ class ReplicationManager:
extra_args["ContentType"] = content_type
if file_size >= STREAMING_THRESHOLD_BYTES:
# Use multipart upload for large files
s3.upload_file(
str(path),
rule.target_bucket,
@@ -366,7 +364,6 @@ class ReplicationManager:
ExtraArgs=extra_args if extra_args else None,
)
else:
# Read small files into memory
file_content = path.read_bytes()
put_kwargs = {
"Bucket": rule.target_bucket,

View File

@@ -25,7 +25,7 @@ from .iam import IamError, Principal
from .notifications import NotificationService, NotificationConfiguration, WebhookDestination
from .object_lock import ObjectLockService, ObjectLockRetention, ObjectLockConfig, ObjectLockError, RetentionMode
from .replication import ReplicationManager
from .storage import ObjectStorage, StorageError, QuotaExceededError
from .storage import ObjectStorage, StorageError, QuotaExceededError, BucketNotFoundError, ObjectNotFoundError
logger = logging.getLogger(__name__)
@@ -217,7 +217,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
if not hmac.compare_digest(calculated_signature, signature):
# Only log detailed signature debug info if DEBUG_SIGV4 is enabled
if current_app.config.get("DEBUG_SIGV4"):
logger.warning(
"SigV4 signature mismatch",
@@ -260,7 +259,13 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
raise IamError("Invalid Date format")
now = datetime.now(timezone.utc)
if now > req_time + timedelta(seconds=int(expires)):
try:
expires_seconds = int(expires)
if expires_seconds <= 0:
raise IamError("Invalid Expires value: must be positive")
except ValueError:
raise IamError("Invalid Expires value: must be an integer")
if now > req_time + timedelta(seconds=expires_seconds):
raise IamError("Request expired")
secret_key = _iam().get_secret_key(access_key)
@@ -1036,21 +1041,23 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
if request.method == "GET":
try:
tags = storage.get_object_tags(bucket_name, object_key)
except BucketNotFoundError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
except ObjectNotFoundError as exc:
return _error_response("NoSuchKey", str(exc), 404)
except StorageError as exc:
message = str(exc)
if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404)
return _error_response("NoSuchKey", message, 404)
return _error_response("InternalError", str(exc), 500)
return _xml_response(_render_tagging_document(tags))
if request.method == "DELETE":
try:
storage.delete_object_tags(bucket_name, object_key)
except BucketNotFoundError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
except ObjectNotFoundError as exc:
return _error_response("NoSuchKey", str(exc), 404)
except StorageError as exc:
message = str(exc)
if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404)
return _error_response("NoSuchKey", message, 404)
return _error_response("InternalError", str(exc), 500)
current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
return Response(status=204)
@@ -1063,11 +1070,12 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
try:
storage.set_object_tags(bucket_name, object_key, tags)
except BucketNotFoundError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
except ObjectNotFoundError as exc:
return _error_response("NoSuchKey", str(exc), 404)
except StorageError as exc:
message = str(exc)
if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404)
return _error_response("NoSuchKey", message, 404)
return _error_response("InternalError", str(exc), 500)
current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
return Response(status=204)
@@ -1283,7 +1291,10 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
prefix = request.args.get("prefix", "")
delimiter = request.args.get("delimiter", "")
max_keys = min(int(request.args.get("max-keys", 1000)), 1000)
try:
max_keys = max(1, min(int(request.args.get("max-keys", 1000)), 1000))
except ValueError:
return _error_response("InvalidArgument", "max-keys must be an integer", 400)
key_marker = request.args.get("key-marker", "")
if prefix:
@@ -1476,7 +1487,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
expiration: dict = {}
days_el = exp_el.find("{*}Days") or exp_el.find("Days")
if days_el is not None and days_el.text:
expiration["Days"] = int(days_el.text.strip())
days_val = int(days_el.text.strip())
if days_val <= 0:
raise ValueError("Expiration Days must be a positive integer")
expiration["Days"] = days_val
date_el = exp_el.find("{*}Date") or exp_el.find("Date")
if date_el is not None and date_el.text:
expiration["Date"] = date_el.text.strip()
@@ -1491,7 +1505,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
nve: dict = {}
days_el = nve_el.find("{*}NoncurrentDays") or nve_el.find("NoncurrentDays")
if days_el is not None and days_el.text:
nve["NoncurrentDays"] = int(days_el.text.strip())
noncurrent_days = int(days_el.text.strip())
if noncurrent_days <= 0:
raise ValueError("NoncurrentDays must be a positive integer")
nve["NoncurrentDays"] = noncurrent_days
if nve:
rule["NoncurrentVersionExpiration"] = nve
@@ -1500,7 +1517,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
aimu: dict = {}
days_el = aimu_el.find("{*}DaysAfterInitiation") or aimu_el.find("DaysAfterInitiation")
if days_el is not None and days_el.text:
aimu["DaysAfterInitiation"] = int(days_el.text.strip())
days_after = int(days_el.text.strip())
if days_after <= 0:
raise ValueError("DaysAfterInitiation must be a positive integer")
aimu["DaysAfterInitiation"] = days_after
if aimu:
rule["AbortIncompleteMultipartUpload"] = aimu
@@ -2086,7 +2106,10 @@ def bucket_handler(bucket_name: str) -> Response:
list_type = request.args.get("list-type")
prefix = request.args.get("prefix", "")
delimiter = request.args.get("delimiter", "")
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
try:
max_keys = max(1, min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000))
except ValueError:
return _error_response("InvalidArgument", "max-keys must be an integer", 400)
marker = request.args.get("marker", "") # ListObjects v1
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
@@ -2099,7 +2122,7 @@ def bucket_handler(bucket_name: str) -> Response:
if continuation_token:
try:
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
except Exception:
except (ValueError, UnicodeDecodeError):
effective_start = continuation_token
elif start_after:
effective_start = start_after
@@ -2742,7 +2765,7 @@ class AwsChunkedDecoder:
def __init__(self, stream):
self.stream = stream
self._read_buffer = bytearray() # Performance: Pre-allocated buffer
self._read_buffer = bytearray()
self.chunk_remaining = 0
self.finished = False
@@ -2753,20 +2776,15 @@ class AwsChunkedDecoder:
"""
line = bytearray()
while True:
# Check if we have data in buffer
if self._read_buffer:
# Look for CRLF in buffer
idx = self._read_buffer.find(b"\r\n")
if idx != -1:
# Found CRLF - extract line and update buffer
line.extend(self._read_buffer[: idx + 2])
del self._read_buffer[: idx + 2]
return bytes(line)
# No CRLF yet - consume entire buffer
line.extend(self._read_buffer)
self._read_buffer.clear()
# Read more data in larger chunks (64 bytes is enough for chunk headers)
chunk = self.stream.read(64)
if not chunk:
return bytes(line) if line else b""
@@ -2775,14 +2793,11 @@ class AwsChunkedDecoder:
def _read_exact(self, n: int) -> bytes:
"""Read exactly n bytes, using buffer first."""
result = bytearray()
# Use buffered data first
if self._read_buffer:
take = min(len(self._read_buffer), n)
result.extend(self._read_buffer[:take])
del self._read_buffer[:take]
n -= take
# Read remaining directly from stream
if n > 0:
data = self.stream.read(n)
if data:
@@ -2794,7 +2809,7 @@ class AwsChunkedDecoder:
if self.finished:
return b""
result = bytearray() # Performance: Use bytearray for building result
result = bytearray()
while size == -1 or len(result) < size:
if self.chunk_remaining > 0:
to_read = self.chunk_remaining
@@ -2828,7 +2843,6 @@ class AwsChunkedDecoder:
if chunk_size == 0:
self.finished = True
# Skip trailing headers
while True:
trailer = self._read_line()
if trailer == b"\r\n" or not trailer:
@@ -2969,10 +2983,11 @@ def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
try:
_storage().abort_multipart_upload(bucket_name, upload_id)
except BucketNotFoundError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
except StorageError as exc:
if "Bucket does not exist" in str(exc):
return _error_response("NoSuchBucket", str(exc), 404)
current_app.logger.warning(f"Error aborting multipart upload: {exc}")
return Response(status=204)
@@ -2984,13 +2999,15 @@ def resolve_principal():
(request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
g.principal = _verify_sigv4(request)
return
except Exception:
pass
except IamError as exc:
logger.debug(f"SigV4 authentication failed: {exc}")
except (ValueError, KeyError) as exc:
logger.debug(f"SigV4 parsing error: {exc}")
access_key = request.headers.get("X-Access-Key")
secret_key = request.headers.get("X-Secret-Key")
if access_key and secret_key:
try:
g.principal = _iam().authenticate(access_key, secret_key)
except Exception:
pass
except IamError as exc:
logger.debug(f"Header authentication failed: {exc}")

View File

@@ -76,6 +76,14 @@ class StorageError(RuntimeError):
"""Raised when the storage layer encounters an unrecoverable problem."""
class BucketNotFoundError(StorageError):
"""Raised when the bucket does not exist."""
class ObjectNotFoundError(StorageError):
"""Raised when the object does not exist."""
class QuotaExceededError(StorageError):
"""Raised when an operation would exceed bucket quota limits."""
@@ -106,7 +114,7 @@ class ListObjectsResult:
objects: List[ObjectMeta]
is_truncated: bool
next_continuation_token: Optional[str]
total_count: Optional[int] = None # Total objects in bucket (from stats cache)
total_count: Optional[int] = None
def _utcnow() -> datetime:
@@ -130,22 +138,18 @@ class ObjectStorage:
MULTIPART_MANIFEST = "manifest.json"
BUCKET_CONFIG_FILE = ".bucket.json"
KEY_INDEX_CACHE_TTL = 30
OBJECT_CACHE_MAX_SIZE = 100 # Maximum number of buckets to cache
OBJECT_CACHE_MAX_SIZE = 100
def __init__(self, root: Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots()
# LRU cache for object metadata with thread-safe access
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
self._cache_lock = threading.Lock() # Global lock for cache structure
# Performance: Per-bucket locks to reduce contention
self._cache_lock = threading.Lock()
self._bucket_locks: Dict[str, threading.Lock] = {}
# Cache version counter for detecting stale reads
self._cache_version: Dict[str, int] = {}
# Performance: Bucket config cache with TTL
self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
self._bucket_config_cache_ttl = 30.0 # 30 second TTL
self._bucket_config_cache_ttl = 30.0
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
@@ -170,6 +174,11 @@ class ObjectStorage:
def bucket_exists(self, bucket_name: str) -> bool:
return self._bucket_path(bucket_name).exists()
def _require_bucket_exists(self, bucket_path: Path) -> None:
"""Raise BucketNotFoundError if bucket does not exist."""
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
def _validate_bucket_name(self, bucket_name: str) -> None:
if len(bucket_name) < 3 or len(bucket_name) > 63:
raise StorageError("Bucket name must be between 3 and 63 characters")
@@ -188,14 +197,14 @@ class ObjectStorage:
def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
"""Return object count and total size for the bucket (cached).
Args:
bucket_name: Name of the bucket
cache_ttl: Cache time-to-live in seconds (default 60)
"""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
if cache_path.exists():
@@ -257,8 +266,7 @@ class ObjectStorage:
def delete_bucket(self, bucket_name: str) -> None:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
# Performance: Single check instead of three separate traversals
raise BucketNotFoundError("Bucket does not exist")
has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
if has_objects:
raise StorageError("Bucket not empty")
@@ -291,7 +299,7 @@ class ObjectStorage:
"""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
object_cache = self._get_object_cache(bucket_id, bucket_path)
@@ -352,7 +360,7 @@ class ObjectStorage:
) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
@@ -409,7 +417,6 @@ class ObjectStorage:
self._invalidate_bucket_stats_cache(bucket_id)
# Performance: Lazy update - only update the affected key instead of invalidating whole cache
obj_meta = ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
@@ -424,7 +431,7 @@ class ObjectStorage:
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
path = self._object_path(bucket_name, object_key)
if not path.exists():
raise StorageError("Object not found")
raise ObjectNotFoundError("Object not found")
return path
def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
@@ -467,7 +474,6 @@ class ObjectStorage:
self._delete_metadata(bucket_id, rel)
self._invalidate_bucket_stats_cache(bucket_id)
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
self._cleanup_empty_parents(path, bucket_path)
@@ -490,14 +496,13 @@ class ObjectStorage:
shutil.rmtree(legacy_version_dir, ignore_errors=True)
self._invalidate_bucket_stats_cache(bucket_id)
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
self._cleanup_empty_parents(target, bucket_path)
def is_versioning_enabled(self, bucket_name: str) -> bool:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
return self._is_versioning_enabled(bucket_path)
def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
@@ -689,11 +694,11 @@ class ObjectStorage:
"""Get tags for an object."""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
safe_key = self._sanitize_object_key(object_key)
object_path = bucket_path / safe_key
if not object_path.exists():
raise StorageError("Object does not exist")
raise ObjectNotFoundError("Object does not exist")
for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
if not meta_file.exists():
@@ -712,11 +717,11 @@ class ObjectStorage:
"""Set tags for an object."""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
safe_key = self._sanitize_object_key(object_key)
object_path = bucket_path / safe_key
if not object_path.exists():
raise StorageError("Object does not exist")
raise ObjectNotFoundError("Object does not exist")
meta_file = self._metadata_file(bucket_path.name, safe_key)
@@ -750,7 +755,7 @@ class ObjectStorage:
def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key)
@@ -774,7 +779,7 @@ class ObjectStorage:
def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key)
@@ -811,7 +816,7 @@ class ObjectStorage:
def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key)
@@ -834,7 +839,7 @@ class ObjectStorage:
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
if not any(root.exists() for root in version_roots):
@@ -902,7 +907,7 @@ class ObjectStorage:
) -> str:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
upload_id = uuid.uuid4().hex
@@ -929,8 +934,8 @@ class ObjectStorage:
Uses file locking to safely update the manifest and handle concurrent uploads.
"""
if part_number < 1:
raise StorageError("part_number must be >= 1")
if part_number < 1 or part_number > 10000:
raise StorageError("part_number must be between 1 and 10000")
bucket_path = self._bucket_path(bucket_name)
upload_root = self._multipart_dir(bucket_path.name, upload_id)
@@ -939,7 +944,6 @@ class ObjectStorage:
if not upload_root.exists():
raise StorageError("Multipart upload not found")
# Write part to temporary file first, then rename atomically
checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename
@@ -948,11 +952,8 @@ class ObjectStorage:
try:
with temp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
# Atomic rename (or replace on Windows)
temp_path.replace(part_path)
except OSError:
# Clean up temp file on failure
try:
temp_path.unlink(missing_ok=True)
except OSError:
@@ -968,7 +969,6 @@ class ObjectStorage:
manifest_path = upload_root / self.MULTIPART_MANIFEST
lock_path = upload_root / ".manifest.lock"
# Retry loop for handling transient lock/read failures
max_retries = 3
for attempt in range(max_retries):
try:
@@ -1151,10 +1151,10 @@ class ObjectStorage:
"""List all active multipart uploads for a bucket."""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
uploads = []
multipart_root = self._bucket_multipart_root(bucket_id)
multipart_root = self._multipart_bucket_root(bucket_id)
if multipart_root.exists():
for upload_dir in multipart_root.iterdir():
if not upload_dir.is_dir():
@@ -1171,7 +1171,7 @@ class ObjectStorage:
})
except (OSError, json.JSONDecodeError):
continue
legacy_root = self._legacy_multipart_root(bucket_id)
legacy_root = self._legacy_multipart_bucket_root(bucket_id)
if legacy_root.exists():
for upload_dir in legacy_root.iterdir():
if not upload_dir.is_dir():
@@ -1394,7 +1394,6 @@ class ObjectStorage:
"""
now = time.time()
# Quick check with global lock (brief)
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
@@ -1404,10 +1403,8 @@ class ObjectStorage:
return objects
cache_version = self._cache_version.get(bucket_id, 0)
# Use per-bucket lock for cache building (allows parallel builds for different buckets)
bucket_lock = self._get_bucket_lock(bucket_id)
with bucket_lock:
# Double-check cache after acquiring per-bucket lock
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
@@ -1415,17 +1412,12 @@ class ObjectStorage:
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
self._object_cache.move_to_end(bucket_id)
return objects
# Build cache with per-bucket lock held (prevents duplicate work)
objects = self._build_object_cache(bucket_path)
with self._cache_lock:
# Check if cache was invalidated while we were building
current_version = self._cache_version.get(bucket_id, 0)
if current_version != cache_version:
objects = self._build_object_cache(bucket_path)
# Evict oldest entries if cache is full
while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
self._object_cache.popitem(last=False)
@@ -1459,12 +1451,9 @@ class ObjectStorage:
if cached:
objects, timestamp = cached
if meta is None:
# Delete operation - remove key from cache
objects.pop(key, None)
else:
# Put operation - update/add key in cache
objects[key] = meta
# Keep same timestamp - don't reset TTL for single key updates
def _ensure_system_roots(self) -> None:
for path in (
@@ -1485,13 +1474,12 @@ class ObjectStorage:
return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
# Performance: Check cache first
now = time.time()
cached = self._bucket_config_cache.get(bucket_name)
if cached:
config, cached_time = cached
if now - cached_time < self._bucket_config_cache_ttl:
return config.copy() # Return copy to prevent mutation
return config.copy()
config_path = self._bucket_config_path(bucket_name)
if not config_path.exists():
@@ -1510,7 +1498,6 @@ class ObjectStorage:
config_path = self._bucket_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(payload), encoding="utf-8")
# Performance: Update cache immediately after write
self._bucket_config_cache[bucket_name] = (payload.copy(), time.time())
def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
@@ -1636,7 +1623,6 @@ class ObjectStorage:
def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
"""Check bucket for objects, versions, and multipart uploads in a single pass.
Performance optimization: Combines three separate rglob traversals into one.
Returns (has_visible_objects, has_archived_versions, has_active_multipart_uploads).
Uses early exit when all three are found.
"""
@@ -1645,7 +1631,6 @@ class ObjectStorage:
has_multipart = False
bucket_name = bucket_path.name
# Check visible objects in bucket
for path in bucket_path.rglob("*"):
if has_objects:
break
@@ -1656,7 +1641,6 @@ class ObjectStorage:
continue
has_objects = True
# Check archived versions (only if needed)
for version_root in (
self._bucket_versions_root(bucket_name),
self._legacy_versions_root(bucket_name),
@@ -1669,7 +1653,6 @@ class ObjectStorage:
has_versions = True
break
# Check multipart uploads (only if needed)
for uploads_root in (
self._multipart_bucket_root(bucket_name),
self._legacy_multipart_bucket_root(bucket_name),
@@ -1703,7 +1686,7 @@ class ObjectStorage:
try:
os.chmod(target_path, stat.S_IRWXU)
func(target_path)
except Exception as exc: # pragma: no cover - fallback failure
except Exception as exc:
raise StorageError(f"Unable to delete bucket contents: {exc}") from exc
try:

View File

@@ -371,7 +371,7 @@ def bucket_detail(bucket_name: str):
kms_keys = kms_manager.list_keys() if kms_manager else []
kms_enabled = current_app.config.get("KMS_ENABLED", False)
encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
can_manage_encryption = can_manage_versioning # Same as other bucket properties
can_manage_encryption = can_manage_versioning
bucket_quota = storage.get_bucket_quota(bucket_name)
bucket_stats = storage.bucket_stats(bucket_name)
@@ -450,8 +450,6 @@ def list_bucket_objects(bucket_name: str):
except StorageError:
versioning_enabled = False
# Pre-compute URL templates once (not per-object) for performance
# Frontend will construct actual URLs by replacing KEY_PLACEHOLDER
preview_template = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
delete_template = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
presign_template = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
@@ -527,8 +525,6 @@ def upload_object(bucket_name: str):
try:
_authorize_ui(principal, bucket_name, "write")
_storage().put_object(bucket_name, object_key, file.stream, metadata=metadata)
# Trigger replication
_replication().trigger_replication(bucket_name, object_key)
message = f"Uploaded '{object_key}'"
@@ -765,20 +761,18 @@ def bulk_download_objects(bucket_name: str):
if not cleaned:
return jsonify({"error": "Select at least one object to download"}), 400
MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) # Reuse same limit for now
MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)
if len(cleaned) > MAX_KEYS:
return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400
unique_keys = list(dict.fromkeys(cleaned))
storage = _storage()
# Verify permission to read bucket contents
try:
_authorize_ui(principal, bucket_name, "read")
except IamError as exc:
return jsonify({"error": str(exc)}), 403
# Create ZIP archive of selected objects
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for key in unique_keys:
@@ -795,7 +789,6 @@ def bulk_download_objects(bucket_name: str):
path = storage.get_object_path(bucket_name, key)
zf.write(path, arcname=key)
except (StorageError, IamError):
# Skip objects that can't be accessed
continue
buffer.seek(0)
@@ -846,7 +839,6 @@ def object_preview(bucket_name: str, object_key: str) -> Response:
download = request.args.get("download") == "1"
# Check if object is encrypted and needs decryption
is_encrypted = "x-amz-server-side-encryption" in metadata
if is_encrypted and hasattr(storage, 'get_object_data'):
try:
@@ -882,7 +874,6 @@ def object_presign(bucket_name: str, object_key: str):
encoded_key = quote(object_key, safe="/")
url = f"{api_base}/presign/{bucket_name}/{encoded_key}"
# Use API base URL for forwarded headers so presigned URLs point to API, not UI
parsed_api = urlparse(api_base)
headers = _api_headers()
headers["X-Forwarded-Host"] = parsed_api.netloc or "127.0.0.1:5000"
@@ -1027,7 +1018,6 @@ def update_bucket_quota(bucket_name: str):
"""Update bucket quota configuration (admin only)."""
principal = _current_principal()
# Quota management is admin-only
is_admin = False
try:
_iam().authorize(principal, None, "iam:list_users")
@@ -1049,7 +1039,6 @@ def update_bucket_quota(bucket_name: str):
flash(_friendly_error_message(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Parse quota values
max_mb_str = request.form.get("max_mb", "").strip()
max_objects_str = request.form.get("max_objects", "").strip()
@@ -1061,7 +1050,7 @@ def update_bucket_quota(bucket_name: str):
max_mb = int(max_mb_str)
if max_mb < 1:
raise ValueError("Size must be at least 1 MB")
max_bytes = max_mb * 1024 * 1024 # Convert MB to bytes
max_bytes = max_mb * 1024 * 1024
except ValueError as exc:
flash(f"Invalid size value: {exc}", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
@@ -1114,7 +1103,6 @@ def update_bucket_encryption(bucket_name: str):
flash("Invalid encryption algorithm", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Build encryption configuration in AWS S3 format
encryption_config: dict[str, Any] = {
"Rules": [
{
@@ -1505,7 +1493,6 @@ def update_bucket_replication(bucket_name: str):
if rule:
rule.enabled = True
_replication().set_rule(rule)
# When resuming, sync any pending objects that accumulated while paused
if rule.mode == REPLICATION_MODE_ALL:
_replication().replicate_existing_objects(bucket_name)
flash("Replication resumed. Syncing pending objects in background.", "success")