Add new tests; Fix typo and validations

2026-01-03 23:29:07 +08:00
parent 2d60e36fbf
commit b9cfc45aa2
14 changed files with 1970 additions and 125 deletions
--- a/app/init.py
+++ b/app/init.py
@@ -289,17 +289,17 @@ def _configure_logging(app: Flask) -> None:
    formatter = logging.Formatter(
        "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
    )
-    
-    # Stream Handler (stdout) - Primary for Docker
+
    stream_handler = logging.StreamHandler(sys.stdout)
    stream_handler.setFormatter(formatter)
    stream_handler.addFilter(_RequestContextFilter())

    logger = app.logger
+    for handler in logger.handlers[:]:
+        handler.close()
    logger.handlers.clear()
    logger.addHandler(stream_handler)

-    # File Handler (optional, if configured)
    if app.config.get("LOG_TO_FILE"):
        log_file = Path(app.config["LOG_FILE"])
        log_file.parent.mkdir(parents=True, exist_ok=True)
--- a/app/access_logging.py
+++ b/app/access_logging.py
@@ -196,18 +196,21 @@ class AccessLoggingService:
        )

        target_key = f"{config.target_bucket}:{config.target_prefix}"
+        should_flush = False
        with self._buffer_lock:
            if target_key not in self._buffer:
                self._buffer[target_key] = []
            self._buffer[target_key].append(entry)
+            should_flush = len(self._buffer[target_key]) >= self.max_buffer_size

-            if len(self._buffer[target_key]) >= self.max_buffer_size:
-                self._flush_buffer(target_key)
+        if should_flush:
+            self._flush_buffer(target_key)

    def _flush_loop(self) -> None:
        while not self._shutdown.is_set():
-            time.sleep(self.flush_interval)
-            self._flush_all()
+            self._shutdown.wait(timeout=self.flush_interval)
+            if not self._shutdown.is_set():
+                self._flush_all()

    def _flush_all(self) -> None:
        with self._buffer_lock:
--- a/app/config.py
+++ b/app/config.py
@@ -84,7 +84,7 @@ class AppConfig:
            return overrides.get(name, os.getenv(name, default))

        storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
-        max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))  # 1 GiB default
+        max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) 
        ui_page_size = int(_get("UI_PAGE_SIZE", 100))
        auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
        auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
@@ -108,6 +108,10 @@ class AppConfig:
                try:
                    secret_file.parent.mkdir(parents=True, exist_ok=True)
                    secret_file.write_text(generated)
+                    try:
+                        os.chmod(secret_file, 0o600)
+                    except OSError:
+                        pass
                    secret_key = generated
                except OSError:
                    secret_key = generated
--- a/app/replication.py
+++ b/app/replication.py
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
 REPLICATION_CONNECT_TIMEOUT = 5
 REPLICATION_READ_TIMEOUT = 30
-STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024  # 10 MiB - use streaming for larger files
+STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024 

 REPLICATION_MODE_NEW_ONLY = "new_only"
 REPLICATION_MODE_ALL = "all"
@@ -307,7 +307,6 @@ class ReplicationManager:
        if self._shutdown:
            return

-        # Re-check if rule is still enabled (may have been paused after task was submitted)
        current_rule = self.get_rule(bucket_name)
        if not current_rule or not current_rule.enabled:
            logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
@@ -358,7 +357,6 @@ class ReplicationManager:
                    extra_args["ContentType"] = content_type

                if file_size >= STREAMING_THRESHOLD_BYTES:
-                    # Use multipart upload for large files
                    s3.upload_file(
                        str(path),
                        rule.target_bucket,
@@ -366,7 +364,6 @@ class ReplicationManager:
                        ExtraArgs=extra_args if extra_args else None,
                    )
                else:
-                    # Read small files into memory
                    file_content = path.read_bytes()
                    put_kwargs = {
                        "Bucket": rule.target_bucket,
--- a/app/s3_api.py
+++ b/app/s3_api.py
@@ -25,7 +25,7 @@ from .iam import IamError, Principal
 from .notifications import NotificationService, NotificationConfiguration, WebhookDestination
 from .object_lock import ObjectLockService, ObjectLockRetention, ObjectLockConfig, ObjectLockError, RetentionMode
 from .replication import ReplicationManager
-from .storage import ObjectStorage, StorageError, QuotaExceededError
+from .storage import ObjectStorage, StorageError, QuotaExceededError, BucketNotFoundError, ObjectNotFoundError

 logger = logging.getLogger(__name__)

@@ -217,7 +217,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
    calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()

    if not hmac.compare_digest(calculated_signature, signature):
-        # Only log detailed signature debug info if DEBUG_SIGV4 is enabled
        if current_app.config.get("DEBUG_SIGV4"):
            logger.warning(
                "SigV4 signature mismatch",
@@ -260,7 +259,13 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
        raise IamError("Invalid Date format")
    
    now = datetime.now(timezone.utc)
-    if now > req_time + timedelta(seconds=int(expires)):
+    try:
+        expires_seconds = int(expires)
+        if expires_seconds <= 0:
+            raise IamError("Invalid Expires value: must be positive")
+    except ValueError:
+        raise IamError("Invalid Expires value: must be an integer")
+    if now > req_time + timedelta(seconds=expires_seconds):
        raise IamError("Request expired")

    secret_key = _iam().get_secret_key(access_key)
@@ -1036,21 +1041,23 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
    if request.method == "GET":
        try:
            tags = storage.get_object_tags(bucket_name, object_key)
+        except BucketNotFoundError as exc:
+            return _error_response("NoSuchBucket", str(exc), 404)
+        except ObjectNotFoundError as exc:
+            return _error_response("NoSuchKey", str(exc), 404)
        except StorageError as exc:
-            message = str(exc)
-            if "Bucket" in message:
-                return _error_response("NoSuchBucket", message, 404)
-            return _error_response("NoSuchKey", message, 404)
+            return _error_response("InternalError", str(exc), 500)
        return _xml_response(_render_tagging_document(tags))
    
    if request.method == "DELETE":
        try:
            storage.delete_object_tags(bucket_name, object_key)
+        except BucketNotFoundError as exc:
+            return _error_response("NoSuchBucket", str(exc), 404)
+        except ObjectNotFoundError as exc:
+            return _error_response("NoSuchKey", str(exc), 404)
        except StorageError as exc:
-            message = str(exc)
-            if "Bucket" in message:
-                return _error_response("NoSuchBucket", message, 404)
-            return _error_response("NoSuchKey", message, 404)
+            return _error_response("InternalError", str(exc), 500)
        current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
        return Response(status=204)

@@ -1063,11 +1070,12 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
        return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
    try:
        storage.set_object_tags(bucket_name, object_key, tags)
+    except BucketNotFoundError as exc:
+        return _error_response("NoSuchBucket", str(exc), 404)
+    except ObjectNotFoundError as exc:
+        return _error_response("NoSuchKey", str(exc), 404)
    except StorageError as exc:
-        message = str(exc)
-        if "Bucket" in message:
-            return _error_response("NoSuchBucket", message, 404)
-        return _error_response("NoSuchKey", message, 404)
+        return _error_response("InternalError", str(exc), 500)
    current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
    return Response(status=204)

@@ -1283,7 +1291,10 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
    
    prefix = request.args.get("prefix", "")
    delimiter = request.args.get("delimiter", "")
-    max_keys = min(int(request.args.get("max-keys", 1000)), 1000)
+    try:
+        max_keys = max(1, min(int(request.args.get("max-keys", 1000)), 1000))
+    except ValueError:
+        return _error_response("InvalidArgument", "max-keys must be an integer", 400)
    key_marker = request.args.get("key-marker", "")
    
    if prefix:
@@ -1476,7 +1487,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
            expiration: dict = {}
            days_el = exp_el.find("{*}Days") or exp_el.find("Days")
            if days_el is not None and days_el.text:
-                expiration["Days"] = int(days_el.text.strip())
+                days_val = int(days_el.text.strip())
+                if days_val <= 0:
+                    raise ValueError("Expiration Days must be a positive integer")
+                expiration["Days"] = days_val
            date_el = exp_el.find("{*}Date") or exp_el.find("Date")
            if date_el is not None and date_el.text:
                expiration["Date"] = date_el.text.strip()
@@ -1491,7 +1505,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
            nve: dict = {}
            days_el = nve_el.find("{*}NoncurrentDays") or nve_el.find("NoncurrentDays")
            if days_el is not None and days_el.text:
-                nve["NoncurrentDays"] = int(days_el.text.strip())
+                noncurrent_days = int(days_el.text.strip())
+                if noncurrent_days <= 0:
+                    raise ValueError("NoncurrentDays must be a positive integer")
+                nve["NoncurrentDays"] = noncurrent_days
            if nve:
                rule["NoncurrentVersionExpiration"] = nve
        
@@ -1500,7 +1517,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
            aimu: dict = {}
            days_el = aimu_el.find("{*}DaysAfterInitiation") or aimu_el.find("DaysAfterInitiation")
            if days_el is not None and days_el.text:
-                aimu["DaysAfterInitiation"] = int(days_el.text.strip())
+                days_after = int(days_el.text.strip())
+                if days_after <= 0:
+                    raise ValueError("DaysAfterInitiation must be a positive integer")
+                aimu["DaysAfterInitiation"] = days_after
            if aimu:
                rule["AbortIncompleteMultipartUpload"] = aimu
        
@@ -2086,7 +2106,10 @@ def bucket_handler(bucket_name: str) -> Response:
    list_type = request.args.get("list-type")
    prefix = request.args.get("prefix", "")
    delimiter = request.args.get("delimiter", "")
-    max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
+    try:
+        max_keys = max(1, min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000))
+    except ValueError:
+        return _error_response("InvalidArgument", "max-keys must be an integer", 400)
    
    marker = request.args.get("marker", "")  # ListObjects v1
    continuation_token = request.args.get("continuation-token", "")  # ListObjectsV2
@@ -2099,7 +2122,7 @@ def bucket_handler(bucket_name: str) -> Response:
        if continuation_token:
            try:
                effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
-            except Exception:
+            except (ValueError, UnicodeDecodeError):
                effective_start = continuation_token
        elif start_after:
            effective_start = start_after
@@ -2742,7 +2765,7 @@ class AwsChunkedDecoder:

    def __init__(self, stream):
        self.stream = stream
-        self._read_buffer = bytearray()  # Performance: Pre-allocated buffer
+        self._read_buffer = bytearray()
        self.chunk_remaining = 0
        self.finished = False

@@ -2753,20 +2776,15 @@ class AwsChunkedDecoder:
        """
        line = bytearray()
        while True:
-            # Check if we have data in buffer
            if self._read_buffer:
-                # Look for CRLF in buffer
                idx = self._read_buffer.find(b"\r\n")
                if idx != -1:
-                    # Found CRLF - extract line and update buffer
                    line.extend(self._read_buffer[: idx + 2])
                    del self._read_buffer[: idx + 2]
                    return bytes(line)
-                # No CRLF yet - consume entire buffer
                line.extend(self._read_buffer)
                self._read_buffer.clear()

-            # Read more data in larger chunks (64 bytes is enough for chunk headers)
            chunk = self.stream.read(64)
            if not chunk:
                return bytes(line) if line else b""
@@ -2775,14 +2793,11 @@ class AwsChunkedDecoder:
    def _read_exact(self, n: int) -> bytes:
        """Read exactly n bytes, using buffer first."""
        result = bytearray()
-        # Use buffered data first
        if self._read_buffer:
            take = min(len(self._read_buffer), n)
            result.extend(self._read_buffer[:take])
            del self._read_buffer[:take]
            n -= take
-
-        # Read remaining directly from stream
        if n > 0:
            data = self.stream.read(n)
            if data:
@@ -2794,7 +2809,7 @@ class AwsChunkedDecoder:
        if self.finished:
            return b""

-        result = bytearray()  # Performance: Use bytearray for building result
+        result = bytearray()
        while size == -1 or len(result) < size:
            if self.chunk_remaining > 0:
                to_read = self.chunk_remaining
@@ -2828,7 +2843,6 @@ class AwsChunkedDecoder:

                if chunk_size == 0:
                    self.finished = True
-                    # Skip trailing headers
                    while True:
                        trailer = self._read_line()
                        if trailer == b"\r\n" or not trailer:
@@ -2969,10 +2983,11 @@ def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:

    try:
        _storage().abort_multipart_upload(bucket_name, upload_id)
+    except BucketNotFoundError as exc:
+        return _error_response("NoSuchBucket", str(exc), 404)
    except StorageError as exc:
-        if "Bucket does not exist" in str(exc):
-            return _error_response("NoSuchBucket", str(exc), 404)
-            
+        current_app.logger.warning(f"Error aborting multipart upload: {exc}")
+
    return Response(status=204)


@@ -2984,13 +2999,15 @@ def resolve_principal():
           (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
            g.principal = _verify_sigv4(request)
            return
-    except Exception:
-        pass
-    
+    except IamError as exc:
+        logger.debug(f"SigV4 authentication failed: {exc}")
+    except (ValueError, KeyError) as exc:
+        logger.debug(f"SigV4 parsing error: {exc}")
+
    access_key = request.headers.get("X-Access-Key")
    secret_key = request.headers.get("X-Secret-Key")
    if access_key and secret_key:
        try:
            g.principal = _iam().authenticate(access_key, secret_key)
-        except Exception:
-            pass
+        except IamError as exc:
+            logger.debug(f"Header authentication failed: {exc}")
--- a/app/storage.py
+++ b/app/storage.py
@@ -76,6 +76,14 @@ class StorageError(RuntimeError):
    """Raised when the storage layer encounters an unrecoverable problem."""


+class BucketNotFoundError(StorageError):
+    """Raised when the bucket does not exist."""
+
+
+class ObjectNotFoundError(StorageError):
+    """Raised when the object does not exist."""
+
+
 class QuotaExceededError(StorageError):
    """Raised when an operation would exceed bucket quota limits."""
    
@@ -106,7 +114,7 @@ class ListObjectsResult:
    objects: List[ObjectMeta]
    is_truncated: bool
    next_continuation_token: Optional[str]
-    total_count: Optional[int] = None  # Total objects in bucket (from stats cache)
+    total_count: Optional[int] = None


 def _utcnow() -> datetime:
@@ -130,22 +138,18 @@ class ObjectStorage:
    MULTIPART_MANIFEST = "manifest.json"
    BUCKET_CONFIG_FILE = ".bucket.json"
    KEY_INDEX_CACHE_TTL = 30
-    OBJECT_CACHE_MAX_SIZE = 100  # Maximum number of buckets to cache
+    OBJECT_CACHE_MAX_SIZE = 100

    def __init__(self, root: Path) -> None:
        self.root = Path(root)
        self.root.mkdir(parents=True, exist_ok=True)
        self._ensure_system_roots()
-        # LRU cache for object metadata with thread-safe access
        self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
-        self._cache_lock = threading.Lock()  # Global lock for cache structure
-        # Performance: Per-bucket locks to reduce contention
+        self._cache_lock = threading.Lock()
        self._bucket_locks: Dict[str, threading.Lock] = {}
-        # Cache version counter for detecting stale reads
        self._cache_version: Dict[str, int] = {}
-        # Performance: Bucket config cache with TTL
        self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
-        self._bucket_config_cache_ttl = 30.0  # 30 second TTL
+        self._bucket_config_cache_ttl = 30.0

    def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
        """Get or create a lock for a specific bucket. Reduces global lock contention."""
@@ -170,6 +174,11 @@ class ObjectStorage:
    def bucket_exists(self, bucket_name: str) -> bool:
        return self._bucket_path(bucket_name).exists()

+    def _require_bucket_exists(self, bucket_path: Path) -> None:
+        """Raise BucketNotFoundError if bucket does not exist."""
+        if not bucket_path.exists():
+            raise BucketNotFoundError("Bucket does not exist")
+
    def _validate_bucket_name(self, bucket_name: str) -> None:
        if len(bucket_name) < 3 or len(bucket_name) > 63:
            raise StorageError("Bucket name must be between 3 and 63 characters")
@@ -188,14 +197,14 @@ class ObjectStorage:

    def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
        """Return object count and total size for the bucket (cached).
-        
+
        Args:
            bucket_name: Name of the bucket
            cache_ttl: Cache time-to-live in seconds (default 60)
        """
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")

        cache_path = self._system_bucket_root(bucket_name) / "stats.json"
        if cache_path.exists():
@@ -257,8 +266,7 @@ class ObjectStorage:
    def delete_bucket(self, bucket_name: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
-        # Performance: Single check instead of three separate traversals
+            raise BucketNotFoundError("Bucket does not exist")
        has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
        if has_objects:
            raise StorageError("Bucket not empty")
@@ -291,7 +299,7 @@ class ObjectStorage:
        """
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name

        object_cache = self._get_object_cache(bucket_id, bucket_path)
@@ -352,7 +360,7 @@ class ObjectStorage:
    ) -> ObjectMeta:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name

        safe_key = self._sanitize_object_key(object_key)
@@ -409,7 +417,6 @@ class ObjectStorage:

        self._invalidate_bucket_stats_cache(bucket_id)

-        # Performance: Lazy update - only update the affected key instead of invalidating whole cache
        obj_meta = ObjectMeta(
            key=safe_key.as_posix(),
            size=stat.st_size,
@@ -424,7 +431,7 @@ class ObjectStorage:
    def get_object_path(self, bucket_name: str, object_key: str) -> Path:
        path = self._object_path(bucket_name, object_key)
        if not path.exists():
-            raise StorageError("Object not found")
+            raise ObjectNotFoundError("Object not found")
        return path

    def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
@@ -467,7 +474,6 @@ class ObjectStorage:
        self._delete_metadata(bucket_id, rel)

        self._invalidate_bucket_stats_cache(bucket_id)
-        # Performance: Lazy update - only remove the affected key instead of invalidating whole cache
        self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
        self._cleanup_empty_parents(path, bucket_path)

@@ -490,14 +496,13 @@ class ObjectStorage:
            shutil.rmtree(legacy_version_dir, ignore_errors=True)

        self._invalidate_bucket_stats_cache(bucket_id)
-        # Performance: Lazy update - only remove the affected key instead of invalidating whole cache
        self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
        self._cleanup_empty_parents(target, bucket_path)

    def is_versioning_enabled(self, bucket_name: str) -> bool:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        return self._is_versioning_enabled(bucket_path)

    def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
@@ -689,11 +694,11 @@ class ObjectStorage:
        """Get tags for an object."""
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        safe_key = self._sanitize_object_key(object_key)
        object_path = bucket_path / safe_key
        if not object_path.exists():
-            raise StorageError("Object does not exist")
+            raise ObjectNotFoundError("Object does not exist")
        
        for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
            if not meta_file.exists():
@@ -712,11 +717,11 @@ class ObjectStorage:
        """Set tags for an object."""
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        safe_key = self._sanitize_object_key(object_key)
        object_path = bucket_path / safe_key
        if not object_path.exists():
-            raise StorageError("Object does not exist")
+            raise ObjectNotFoundError("Object does not exist")
        
        meta_file = self._metadata_file(bucket_path.name, safe_key)
        
@@ -750,7 +755,7 @@ class ObjectStorage:
    def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key)
        version_dir = self._version_dir(bucket_id, safe_key)
@@ -774,7 +779,7 @@ class ObjectStorage:
    def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key)
        version_dir = self._version_dir(bucket_id, safe_key)
@@ -811,7 +816,7 @@ class ObjectStorage:
    def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key)
        version_dir = self._version_dir(bucket_id, safe_key)
@@ -834,7 +839,7 @@ class ObjectStorage:
    def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
        if not any(root.exists() for root in version_roots):
@@ -902,7 +907,7 @@ class ObjectStorage:
    ) -> str:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key)
        upload_id = uuid.uuid4().hex
@@ -929,8 +934,8 @@ class ObjectStorage:

        Uses file locking to safely update the manifest and handle concurrent uploads.
        """
-        if part_number < 1:
-            raise StorageError("part_number must be >= 1")
+        if part_number < 1 or part_number > 10000:
+            raise StorageError("part_number must be between 1 and 10000")
        bucket_path = self._bucket_path(bucket_name)

        upload_root = self._multipart_dir(bucket_path.name, upload_id)
@@ -939,7 +944,6 @@ class ObjectStorage:
        if not upload_root.exists():
            raise StorageError("Multipart upload not found")

-        # Write part to temporary file first, then rename atomically
        checksum = hashlib.md5()
        part_filename = f"part-{part_number:05d}.part"
        part_path = upload_root / part_filename
@@ -948,11 +952,8 @@ class ObjectStorage:
        try:
            with temp_path.open("wb") as target:
                shutil.copyfileobj(_HashingReader(stream, checksum), target)
-
-            # Atomic rename (or replace on Windows)
            temp_path.replace(part_path)
        except OSError:
-            # Clean up temp file on failure
            try:
                temp_path.unlink(missing_ok=True)
            except OSError:
@@ -968,7 +969,6 @@ class ObjectStorage:
        manifest_path = upload_root / self.MULTIPART_MANIFEST
        lock_path = upload_root / ".manifest.lock"

-        # Retry loop for handling transient lock/read failures
        max_retries = 3
        for attempt in range(max_retries):
            try:
@@ -1151,10 +1151,10 @@ class ObjectStorage:
        """List all active multipart uploads for a bucket."""
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
-            raise StorageError("Bucket does not exist")
+            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        uploads = []
-        multipart_root = self._bucket_multipart_root(bucket_id)
+        multipart_root = self._multipart_bucket_root(bucket_id)
        if multipart_root.exists():
            for upload_dir in multipart_root.iterdir():
                if not upload_dir.is_dir():
@@ -1171,7 +1171,7 @@ class ObjectStorage:
                    })
                except (OSError, json.JSONDecodeError):
                    continue
-        legacy_root = self._legacy_multipart_root(bucket_id)
+        legacy_root = self._legacy_multipart_bucket_root(bucket_id)
        if legacy_root.exists():
            for upload_dir in legacy_root.iterdir():
                if not upload_dir.is_dir():
@@ -1394,7 +1394,6 @@ class ObjectStorage:
        """
        now = time.time()

-        # Quick check with global lock (brief)
        with self._cache_lock:
            cached = self._object_cache.get(bucket_id)
            if cached:
@@ -1404,10 +1403,8 @@ class ObjectStorage:
                    return objects
            cache_version = self._cache_version.get(bucket_id, 0)

-        # Use per-bucket lock for cache building (allows parallel builds for different buckets)
        bucket_lock = self._get_bucket_lock(bucket_id)
        with bucket_lock:
-            # Double-check cache after acquiring per-bucket lock
            with self._cache_lock:
                cached = self._object_cache.get(bucket_id)
                if cached:
@@ -1415,17 +1412,12 @@ class ObjectStorage:
                    if now - timestamp < self.KEY_INDEX_CACHE_TTL:
                        self._object_cache.move_to_end(bucket_id)
                        return objects
-
-            # Build cache with per-bucket lock held (prevents duplicate work)
            objects = self._build_object_cache(bucket_path)

            with self._cache_lock:
-                # Check if cache was invalidated while we were building
                current_version = self._cache_version.get(bucket_id, 0)
                if current_version != cache_version:
                    objects = self._build_object_cache(bucket_path)
-
-                # Evict oldest entries if cache is full
                while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
                    self._object_cache.popitem(last=False)

@@ -1459,12 +1451,9 @@ class ObjectStorage:
            if cached:
                objects, timestamp = cached
                if meta is None:
-                    # Delete operation - remove key from cache
                    objects.pop(key, None)
                else:
-                    # Put operation - update/add key in cache
                    objects[key] = meta
-                # Keep same timestamp - don't reset TTL for single key updates

    def _ensure_system_roots(self) -> None:
        for path in (
@@ -1485,13 +1474,12 @@ class ObjectStorage:
        return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE

    def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
-        # Performance: Check cache first
        now = time.time()
        cached = self._bucket_config_cache.get(bucket_name)
        if cached:
            config, cached_time = cached
            if now - cached_time < self._bucket_config_cache_ttl:
-                return config.copy()  # Return copy to prevent mutation
+                return config.copy()

        config_path = self._bucket_config_path(bucket_name)
        if not config_path.exists():
@@ -1510,7 +1498,6 @@ class ObjectStorage:
        config_path = self._bucket_config_path(bucket_name)
        config_path.parent.mkdir(parents=True, exist_ok=True)
        config_path.write_text(json.dumps(payload), encoding="utf-8")
-        # Performance: Update cache immediately after write
        self._bucket_config_cache[bucket_name] = (payload.copy(), time.time())

    def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
@@ -1636,7 +1623,6 @@ class ObjectStorage:
    def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
        """Check bucket for objects, versions, and multipart uploads in a single pass.

-        Performance optimization: Combines three separate rglob traversals into one.
        Returns (has_visible_objects, has_archived_versions, has_active_multipart_uploads).
        Uses early exit when all three are found.
        """
@@ -1645,7 +1631,6 @@ class ObjectStorage:
        has_multipart = False
        bucket_name = bucket_path.name

-        # Check visible objects in bucket
        for path in bucket_path.rglob("*"):
            if has_objects:
                break
@@ -1656,7 +1641,6 @@ class ObjectStorage:
                continue
            has_objects = True

-        # Check archived versions (only if needed)
        for version_root in (
            self._bucket_versions_root(bucket_name),
            self._legacy_versions_root(bucket_name),
@@ -1669,7 +1653,6 @@ class ObjectStorage:
                        has_versions = True
                        break

-        # Check multipart uploads (only if needed)
        for uploads_root in (
            self._multipart_bucket_root(bucket_name),
            self._legacy_multipart_bucket_root(bucket_name),
@@ -1703,7 +1686,7 @@ class ObjectStorage:
            try:
                os.chmod(target_path, stat.S_IRWXU)
                func(target_path)
-            except Exception as exc:  # pragma: no cover - fallback failure
+            except Exception as exc:
                raise StorageError(f"Unable to delete bucket contents: {exc}") from exc

        try:
--- a/app/ui.py
+++ b/app/ui.py
@@ -371,7 +371,7 @@ def bucket_detail(bucket_name: str):
    kms_keys = kms_manager.list_keys() if kms_manager else []
    kms_enabled = current_app.config.get("KMS_ENABLED", False)
    encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
-    can_manage_encryption = can_manage_versioning  # Same as other bucket properties
+    can_manage_encryption = can_manage_versioning

    bucket_quota = storage.get_bucket_quota(bucket_name)
    bucket_stats = storage.bucket_stats(bucket_name)
@@ -450,8 +450,6 @@ def list_bucket_objects(bucket_name: str):
    except StorageError:
        versioning_enabled = False

-    # Pre-compute URL templates once (not per-object) for performance
-    # Frontend will construct actual URLs by replacing KEY_PLACEHOLDER
    preview_template = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
    delete_template = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
    presign_template = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
@@ -527,8 +525,6 @@ def upload_object(bucket_name: str):
    try:
        _authorize_ui(principal, bucket_name, "write")
        _storage().put_object(bucket_name, object_key, file.stream, metadata=metadata)
-        
-        # Trigger replication
        _replication().trigger_replication(bucket_name, object_key)
        
        message = f"Uploaded '{object_key}'"
@@ -765,20 +761,18 @@ def bulk_download_objects(bucket_name: str):
    if not cleaned:
        return jsonify({"error": "Select at least one object to download"}), 400

-    MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)  # Reuse same limit for now
+    MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)
    if len(cleaned) > MAX_KEYS:
        return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400

    unique_keys = list(dict.fromkeys(cleaned))
    storage = _storage()
    
-    # Verify permission to read bucket contents
    try:
        _authorize_ui(principal, bucket_name, "read")
    except IamError as exc:
        return jsonify({"error": str(exc)}), 403

-    # Create ZIP archive of selected objects
    buffer = io.BytesIO()
    with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
        for key in unique_keys:
@@ -795,7 +789,6 @@ def bulk_download_objects(bucket_name: str):
                    path = storage.get_object_path(bucket_name, key)
                    zf.write(path, arcname=key)
            except (StorageError, IamError):
-                # Skip objects that can't be accessed
                continue
    
    buffer.seek(0)
@@ -846,7 +839,6 @@ def object_preview(bucket_name: str, object_key: str) -> Response:
    
    download = request.args.get("download") == "1"
    
-    # Check if object is encrypted and needs decryption
    is_encrypted = "x-amz-server-side-encryption" in metadata
    if is_encrypted and hasattr(storage, 'get_object_data'):
        try:
@@ -882,7 +874,6 @@ def object_presign(bucket_name: str, object_key: str):
    encoded_key = quote(object_key, safe="/")
    url = f"{api_base}/presign/{bucket_name}/{encoded_key}"
    
-    # Use API base URL for forwarded headers so presigned URLs point to API, not UI
    parsed_api = urlparse(api_base)
    headers = _api_headers()
    headers["X-Forwarded-Host"] = parsed_api.netloc or "127.0.0.1:5000"
@@ -1027,7 +1018,6 @@ def update_bucket_quota(bucket_name: str):
    """Update bucket quota configuration (admin only)."""
    principal = _current_principal()
    
-    # Quota management is admin-only
    is_admin = False
    try:
        _iam().authorize(principal, None, "iam:list_users")
@@ -1049,7 +1039,6 @@ def update_bucket_quota(bucket_name: str):
            flash(_friendly_error_message(exc), "danger")
        return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
    
-    # Parse quota values
    max_mb_str = request.form.get("max_mb", "").strip()
    max_objects_str = request.form.get("max_objects", "").strip()
    
@@ -1061,7 +1050,7 @@ def update_bucket_quota(bucket_name: str):
            max_mb = int(max_mb_str)
            if max_mb < 1:
                raise ValueError("Size must be at least 1 MB")
-            max_bytes = max_mb * 1024 * 1024  # Convert MB to bytes
+            max_bytes = max_mb * 1024 * 1024 
        except ValueError as exc:
            flash(f"Invalid size value: {exc}", "danger")
            return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
@@ -1114,7 +1103,6 @@ def update_bucket_encryption(bucket_name: str):
        flash("Invalid encryption algorithm", "danger")
        return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
    
-    # Build encryption configuration in AWS S3 format
    encryption_config: dict[str, Any] = {
        "Rules": [
            {
@@ -1505,7 +1493,6 @@ def update_bucket_replication(bucket_name: str):
        if rule:
            rule.enabled = True
            _replication().set_rule(rule)
-            # When resuming, sync any pending objects that accumulated while paused
            if rule.mode == REPLICATION_MODE_ALL:
                _replication().replicate_existing_objects(bucket_name)
                flash("Replication resumed. Syncing pending objects in background.", "success")