MyFSIO/app/storage.py

from __future__ import annotations

import hashlib
import json
import os
import re
import shutil
import stat
import threading
import time
import unicodedata
import uuid
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path, PurePosixPath
from typing import Any, BinaryIO, Dict, Generator, List, Optional

try:
    import myfsio_core as _rc
    _HAS_RUST = True
except ImportError:
    _rc = None
    _HAS_RUST = False

# Platform-specific file locking
if os.name == "nt":
    import msvcrt

    @contextmanager
    def _file_lock(file_handle) -> Generator[None, None, None]:
        """Acquire an exclusive lock on a file (Windows)."""
        try:
            msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
            yield
        finally:
            try:
                file_handle.seek(0)
                msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
            except OSError:
                pass
else:
    import fcntl  # type: ignore

    @contextmanager
    def _file_lock(file_handle) -> Generator[None, None, None]:
        """Acquire an exclusive lock on a file (Unix)."""
        try:
            fcntl.flock(file_handle.fileno(), fcntl.LOCK_EX)
            yield
        finally:
            fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)


@contextmanager
def _atomic_lock_file(lock_path: Path, max_retries: int = 10, base_delay: float = 0.1) -> Generator[None, None, None]:
    """Atomically acquire a lock file with exponential backoff.

    Uses O_EXCL to ensure atomic creation of the lock file.
    """
    lock_path.parent.mkdir(parents=True, exist_ok=True)
    fd = None
    for attempt in range(max_retries):
        try:
            fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
            break
        except FileExistsError:
            if attempt == max_retries - 1:
                raise BlockingIOError("Another upload to this key is in progress")
            delay = base_delay * (2 ** attempt)
            time.sleep(min(delay, 2.0))
    try:
        yield
    finally:
        if fd is not None:
            os.close(fd)
        try:
            lock_path.unlink(missing_ok=True)
        except OSError:
            pass


WINDOWS_RESERVED_NAMES = {
    "CON",
    "PRN",
    "AUX",
    "NUL",
    "COM1",
    "COM2",
    "COM3",
    "COM4",
    "COM5",
    "COM6",
    "COM7",
    "COM8",
    "COM9",
    "LPT1",
    "LPT2",
    "LPT3",
    "LPT4",
    "LPT5",
    "LPT6",
    "LPT7",
    "LPT8",
    "LPT9",
}


class StorageError(RuntimeError):
    """Raised when the storage layer encounters an unrecoverable problem."""


class BucketNotFoundError(StorageError):
    """Raised when the bucket does not exist."""


class ObjectNotFoundError(StorageError):
    """Raised when the object does not exist."""


class QuotaExceededError(StorageError):
    """Raised when an operation would exceed bucket quota limits."""

    def __init__(self, message: str, quota: Dict[str, Any], usage: Dict[str, int]):
        super().__init__(message)
        self.quota = quota
        self.usage = usage


@dataclass
class ObjectMeta:
    key: str
    size: int
    last_modified: datetime
    etag: Optional[str] = None
    metadata: Optional[Dict[str, str]] = None


@dataclass
class BucketMeta:
    name: str
    created_at: datetime


@dataclass
class ListObjectsResult:
    """Paginated result for object listing."""
    objects: List[ObjectMeta]
    is_truncated: bool
    next_continuation_token: Optional[str]
    total_count: Optional[int] = None


@dataclass
class ShallowListResult:
    """Result for delimiter-aware directory-level listing."""
    objects: List[ObjectMeta]
    common_prefixes: List[str]
    is_truncated: bool
    next_continuation_token: Optional[str]


def _utcnow() -> datetime:
    return datetime.now(timezone.utc)


def _utc_isoformat() -> str:
    return _utcnow().isoformat().replace("+00:00", "Z")


class ObjectStorage:
    """Very small filesystem wrapper implementing the bare S3 primitives."""

    INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
    SYSTEM_ROOT = ".myfsio.sys"
    SYSTEM_BUCKETS_DIR = "buckets"
    SYSTEM_MULTIPART_DIR = "multipart"
    SYSTEM_TMP_DIR = "tmp"
    BUCKET_META_DIR = "meta"
    BUCKET_VERSIONS_DIR = "versions"
    MULTIPART_MANIFEST = "manifest.json"
    BUCKET_CONFIG_FILE = ".bucket.json"

    def __init__(
        self,
        root: Path,
        cache_ttl: int = 5,
        object_cache_max_size: int = 100,
        bucket_config_cache_ttl: float = 30.0,
        object_key_max_length_bytes: int = 1024,
    ) -> None:
        self.root = Path(root)
        self.root.mkdir(parents=True, exist_ok=True)
        self._ensure_system_roots()
        self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict()
        self._obj_cache_lock = threading.Lock()
        self._meta_cache_lock = threading.Lock()
        self._registry_lock = threading.Lock()
        self._bucket_locks: Dict[str, threading.Lock] = {}
        self._cache_version: Dict[str, int] = {}
        self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
        self._bucket_config_cache_ttl = bucket_config_cache_ttl
        self._cache_ttl = cache_ttl
        self._object_cache_max_size = object_cache_max_size
        self._object_key_max_length_bytes = object_key_max_length_bytes
        self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {}
        self._meta_index_locks: Dict[str, threading.Lock] = {}
        self._meta_read_cache: OrderedDict[tuple, Optional[Dict[str, Any]]] = OrderedDict()
        self._meta_read_cache_max = 2048
        self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup")
        self._stats_mem: Dict[str, Dict[str, int]] = {}
        self._stats_serial: Dict[str, int] = {}
        self._stats_mem_time: Dict[str, float] = {}
        self._stats_lock = threading.Lock()
        self._stats_dirty: set[str] = set()
        self._stats_flush_timer: Optional[threading.Timer] = None
        self._etag_index_dirty: set[str] = set()
        self._etag_index_flush_timer: Optional[threading.Timer] = None

    def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
        with self._registry_lock:
            if bucket_id not in self._bucket_locks:
                self._bucket_locks[bucket_id] = threading.Lock()
            return self._bucket_locks[bucket_id]

    def list_buckets(self) -> List[BucketMeta]:
        buckets: List[BucketMeta] = []
        for bucket in sorted(self.root.iterdir()):
            if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT:
                stat = bucket.stat()
                buckets.append(
                    BucketMeta(
                        name=bucket.name,
                        created_at=datetime.fromtimestamp(stat.st_ctime, timezone.utc),
                    )
                )
        return buckets

    def bucket_exists(self, bucket_name: str) -> bool:
        return self._bucket_path(bucket_name).exists()

    def _require_bucket_exists(self, bucket_path: Path) -> None:
        """Raise BucketNotFoundError if bucket does not exist."""
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")

    def _validate_bucket_name(self, bucket_name: str) -> None:
        if _HAS_RUST:
            error = _rc.validate_bucket_name(bucket_name)
            if error:
                raise StorageError(error)
            return
        if len(bucket_name) < 3 or len(bucket_name) > 63:
            raise StorageError("Bucket name must be between 3 and 63 characters")
        if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
            raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number")
        if ".." in bucket_name:
            raise StorageError("Bucket name must not contain consecutive periods")
        if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name):
            raise StorageError("Bucket name must not be formatted as an IP address")

    def create_bucket(self, bucket_name: str) -> None:
        self._validate_bucket_name(bucket_name)
        bucket_path = self._bucket_path(bucket_name)
        bucket_path.mkdir(parents=True, exist_ok=False)
        self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)

    def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")

        with self._stats_lock:
            if bucket_name in self._stats_mem:
                cached_at = self._stats_mem_time.get(bucket_name, 0.0)
                if (time.monotonic() - cached_at) < cache_ttl:
                    return dict(self._stats_mem[bucket_name])
                self._stats_mem.pop(bucket_name, None)
                self._stats_mem_time.pop(bucket_name, None)

        cache_path = self._system_bucket_root(bucket_name) / "stats.json"
        cached_stats = None

        if cache_path.exists():
            try:
                cached_stats = json.loads(cache_path.read_text(encoding="utf-8"))
            except (OSError, json.JSONDecodeError):
                pass

        object_count = 0
        total_bytes = 0
        version_count = 0
        version_bytes = 0

        internal = self.INTERNAL_FOLDERS
        bucket_str = str(bucket_path)

        try:
            if _HAS_RUST:
                versions_root = str(self._bucket_versions_root(bucket_name))
                object_count, total_bytes, version_count, version_bytes = _rc.bucket_stats_scan(
                    bucket_str, versions_root
                )
            else:
                stack = [bucket_str]
                while stack:
                    current = stack.pop()
                    try:
                        with os.scandir(current) as it:
                            for entry in it:
                                if current == bucket_str and entry.name in internal:
                                    continue
                                if entry.is_dir(follow_symlinks=False):
                                    stack.append(entry.path)
                                elif entry.is_file(follow_symlinks=False):
                                    object_count += 1
                                    total_bytes += entry.stat(follow_symlinks=False).st_size
                    except PermissionError:
                        continue

                versions_root = self._bucket_versions_root(bucket_name)
                if versions_root.exists():
                    v_stack = [str(versions_root)]
                    while v_stack:
                        v_current = v_stack.pop()
                        try:
                            with os.scandir(v_current) as it:
                                for entry in it:
                                    if entry.is_dir(follow_symlinks=False):
                                        v_stack.append(entry.path)
                                    elif entry.is_file(follow_symlinks=False) and entry.name.endswith(".bin"):
                                        version_count += 1
                                        version_bytes += entry.stat(follow_symlinks=False).st_size
                        except PermissionError:
                            continue
        except OSError:
            if cached_stats is not None:
                return cached_stats
            raise

        existing_serial = 0
        if cached_stats is not None:
            existing_serial = cached_stats.get("_cache_serial", 0)

        stats = {
            "objects": object_count,
            "bytes": total_bytes,
            "version_count": version_count,
            "version_bytes": version_bytes,
            "total_objects": object_count + version_count,
            "total_bytes": total_bytes + version_bytes,
            "_cache_serial": existing_serial,
        }

        with self._stats_lock:
            self._stats_mem[bucket_name] = stats
            self._stats_mem_time[bucket_name] = time.monotonic()
            self._stats_serial[bucket_name] = existing_serial

        try:
            cache_path.parent.mkdir(parents=True, exist_ok=True)
            self._atomic_write_json(cache_path, stats)
        except OSError:
            pass

        return stats

    def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
        with self._stats_lock:
            self._stats_mem.pop(bucket_id, None)
            self._stats_mem_time.pop(bucket_id, None)
            self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
            self._stats_dirty.discard(bucket_id)
        cache_path = self._system_bucket_root(bucket_id) / "stats.json"
        try:
            cache_path.unlink(missing_ok=True)
        except OSError:
            pass

    def _update_bucket_stats_cache(
        self,
        bucket_id: str,
        *,
        bytes_delta: int = 0,
        objects_delta: int = 0,
        version_bytes_delta: int = 0,
        version_count_delta: int = 0,
    ) -> None:
        with self._stats_lock:
            if bucket_id not in self._stats_mem:
                self._stats_mem[bucket_id] = {
                    "objects": 0, "bytes": 0, "version_count": 0,
                    "version_bytes": 0, "total_objects": 0, "total_bytes": 0,
                    "_cache_serial": 0,
                }
            data = self._stats_mem[bucket_id]
            data["objects"] = max(0, data["objects"] + objects_delta)
            data["bytes"] = max(0, data["bytes"] + bytes_delta)
            data["version_count"] = max(0, data["version_count"] + version_count_delta)
            data["version_bytes"] = max(0, data["version_bytes"] + version_bytes_delta)
            data["total_objects"] = max(0, data["total_objects"] + objects_delta + version_count_delta)
            data["total_bytes"] = max(0, data["total_bytes"] + bytes_delta + version_bytes_delta)
            data["_cache_serial"] = data["_cache_serial"] + 1
            self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
            self._stats_mem_time[bucket_id] = time.monotonic()
            self._stats_dirty.add(bucket_id)
            self._schedule_stats_flush()

    def _schedule_stats_flush(self) -> None:
        if self._stats_flush_timer is None or not self._stats_flush_timer.is_alive():
            self._stats_flush_timer = threading.Timer(3.0, self._flush_stats)
            self._stats_flush_timer.daemon = True
            self._stats_flush_timer.start()

    def _flush_stats(self) -> None:
        with self._stats_lock:
            dirty = list(self._stats_dirty)
            self._stats_dirty.clear()
            snapshots = {b: dict(self._stats_mem[b]) for b in dirty if b in self._stats_mem}
        for bucket_id, data in snapshots.items():
            cache_path = self._system_bucket_root(bucket_id) / "stats.json"
            try:
                cache_path.parent.mkdir(parents=True, exist_ok=True)
                self._atomic_write_json(cache_path, data)
            except OSError:
                pass

    def shutdown_stats(self) -> None:
        if self._stats_flush_timer is not None:
            self._stats_flush_timer.cancel()
        self._flush_stats()
        if self._etag_index_flush_timer is not None:
            self._etag_index_flush_timer.cancel()
        self._flush_etag_indexes()

    def delete_bucket(self, bucket_name: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
        if has_objects:
            raise StorageError("Bucket not empty")
        if has_versions:
            raise StorageError("Bucket contains archived object versions")
        if has_multipart:
            raise StorageError("Bucket has active multipart uploads")
        bucket_id = bucket_path.name
        self._remove_tree(bucket_path)
        self._remove_tree(self._system_bucket_root(bucket_id))
        self._remove_tree(self._multipart_bucket_root(bucket_id))
        self._bucket_config_cache.pop(bucket_id, None)
        with self._obj_cache_lock:
            self._object_cache.pop(bucket_id, None)
            self._cache_version.pop(bucket_id, None)
            self._sorted_key_cache.pop(bucket_id, None)
        with self._meta_cache_lock:
            stale = [k for k in self._meta_read_cache if k[0] == bucket_id]
            for k in stale:
                del self._meta_read_cache[k]
        with self._stats_lock:
            self._stats_mem.pop(bucket_id, None)
            self._stats_mem_time.pop(bucket_id, None)
            self._stats_serial.pop(bucket_id, None)
            self._stats_dirty.discard(bucket_id)
        self._etag_index_dirty.discard(bucket_id)

    def list_objects(
        self,
        bucket_name: str,
        *,
        max_keys: int = 1000,
        continuation_token: Optional[str] = None,
        prefix: Optional[str] = None,
    ) -> ListObjectsResult:
        """List objects in a bucket with pagination support.

        Args:
            bucket_name: Name of the bucket
            max_keys: Maximum number of objects to return (default 1000)
            continuation_token: Token from previous request for pagination
            prefix: Filter objects by key prefix

        Returns:
            ListObjectsResult with objects, truncation status, and continuation token
        """
        import bisect

        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name

        object_cache = self._get_object_cache(bucket_id, bucket_path)

        cache_version = self._cache_version.get(bucket_id, 0)
        cached_entry = self._sorted_key_cache.get(bucket_id)
        if cached_entry and cached_entry[1] == cache_version:
            all_keys = cached_entry[0]
        else:
            all_keys = sorted(object_cache.keys())
            self._sorted_key_cache[bucket_id] = (all_keys, cache_version)

        if prefix:
            lo = bisect.bisect_left(all_keys, prefix)
            hi = len(all_keys)
            for i in range(lo, len(all_keys)):
                if not all_keys[i].startswith(prefix):
                    hi = i
                    break
            all_keys = all_keys[lo:hi]

        total_count = len(all_keys)
        start_index = 0
        if continuation_token:
            start_index = bisect.bisect_right(all_keys, continuation_token)
            if start_index >= total_count:
                return ListObjectsResult(
                    objects=[],
                    is_truncated=False,
                    next_continuation_token=None,
                    total_count=total_count,
                )

        end_index = start_index + max_keys
        keys_slice = all_keys[start_index:end_index]
        is_truncated = end_index < total_count

        objects: List[ObjectMeta] = []
        for key in keys_slice:
            obj = object_cache.get(key)
            if obj:
                objects.append(obj)

        next_token = keys_slice[-1] if is_truncated and keys_slice else None

        return ListObjectsResult(
            objects=objects,
            is_truncated=is_truncated,
            next_continuation_token=next_token,
            total_count=total_count,
        )

    def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]:
        """List all objects in a bucket (no pagination). Use with caution for large buckets."""
        result = self.list_objects(bucket_name, max_keys=100000)
        return result.objects

    def list_objects_shallow(
        self,
        bucket_name: str,
        *,
        prefix: str = "",
        delimiter: str = "/",
        max_keys: int = 1000,
        continuation_token: Optional[str] = None,
    ) -> ShallowListResult:
        import bisect

        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name

        if delimiter != "/" or (prefix and not prefix.endswith(delimiter)):
            return self._shallow_via_full_scan(
                bucket_name, prefix=prefix, delimiter=delimiter,
                max_keys=max_keys, continuation_token=continuation_token,
            )

        target_dir = bucket_path
        if prefix:
            safe_prefix_path = Path(prefix.rstrip("/"))
            if ".." in safe_prefix_path.parts:
                return ShallowListResult(
                    objects=[], common_prefixes=[],
                    is_truncated=False, next_continuation_token=None,
                )
            target_dir = bucket_path / safe_prefix_path
            try:
                resolved = target_dir.resolve()
                bucket_resolved = bucket_path.resolve()
                if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
                    return ShallowListResult(
                        objects=[], common_prefixes=[],
                        is_truncated=False, next_continuation_token=None,
                    )
            except (OSError, ValueError):
                return ShallowListResult(
                    objects=[], common_prefixes=[],
                    is_truncated=False, next_continuation_token=None,
                )

        if not target_dir.exists() or not target_dir.is_dir():
            return ShallowListResult(
                objects=[], common_prefixes=[],
                is_truncated=False, next_continuation_token=None,
            )

        etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
        meta_cache: Dict[str, str] = {}
        if etag_index_path.exists():
            try:
                with open(etag_index_path, 'r', encoding='utf-8') as f:
                    meta_cache = json.load(f)
            except (OSError, json.JSONDecodeError):
                pass

        entries_files: list[tuple[str, int, float, Optional[str]]] = []
        entries_dirs: list[str] = []

        if _HAS_RUST:
            try:
                raw = _rc.shallow_scan(str(target_dir), prefix, json.dumps(meta_cache))
                entries_files = []
                for key, size, mtime, etag in raw["files"]:
                    if etag is None:
                        safe_key = PurePosixPath(key)
                        meta = self._read_metadata(bucket_id, Path(safe_key))
                        etag = meta.get("__etag__") if meta else None
                    entries_files.append((key, size, mtime, etag))
                entries_dirs = raw["dirs"]
                all_items = raw["merged_keys"]
            except OSError:
                return ShallowListResult(
                    objects=[], common_prefixes=[],
                    is_truncated=False, next_continuation_token=None,
                )
        else:
            try:
                with os.scandir(str(target_dir)) as it:
                    for entry in it:
                        name = entry.name
                        if name in self.INTERNAL_FOLDERS:
                            continue
                        if entry.is_dir(follow_symlinks=False):
                            cp = prefix + name + delimiter
                            entries_dirs.append(cp)
                        elif entry.is_file(follow_symlinks=False):
                            key = prefix + name
                            try:
                                st = entry.stat()
                                etag = meta_cache.get(key)
                                if etag is None:
                                    safe_key = PurePosixPath(key)
                                    meta = self._read_metadata(bucket_id, Path(safe_key))
                                    etag = meta.get("__etag__") if meta else None
                                entries_files.append((key, st.st_size, st.st_mtime, etag))
                            except OSError:
                                pass
            except OSError:
                return ShallowListResult(
                    objects=[], common_prefixes=[],
                    is_truncated=False, next_continuation_token=None,
                )

            entries_dirs.sort()
            entries_files.sort(key=lambda x: x[0])

            all_items: list[tuple[str, bool]] = []
            fi, di = 0, 0
            while fi < len(entries_files) and di < len(entries_dirs):
                if entries_files[fi][0] <= entries_dirs[di]:
                    all_items.append((entries_files[fi][0], False))
                    fi += 1
                else:
                    all_items.append((entries_dirs[di], True))
                    di += 1
            while fi < len(entries_files):
                all_items.append((entries_files[fi][0], False))
                fi += 1
            while di < len(entries_dirs):
                all_items.append((entries_dirs[di], True))
                di += 1

        files_map = {e[0]: e for e in entries_files}

        start_index = 0
        if continuation_token:
            all_keys = [item[0] for item in all_items]
            start_index = bisect.bisect_right(all_keys, continuation_token)

        selected = all_items[start_index:start_index + max_keys]
        is_truncated = (start_index + max_keys) < len(all_items)

        result_objects: list[ObjectMeta] = []
        result_prefixes: list[str] = []
        for item_key, is_dir in selected:
            if is_dir:
                result_prefixes.append(item_key)
            else:
                fdata = files_map[item_key]
                result_objects.append(ObjectMeta(
                    key=fdata[0],
                    size=fdata[1],
                    last_modified=datetime.fromtimestamp(fdata[2], timezone.utc),
                    etag=fdata[3],
                    metadata=None,
                ))

        next_token = None
        if is_truncated and selected:
            next_token = selected[-1][0]

        return ShallowListResult(
            objects=result_objects,
            common_prefixes=result_prefixes,
            is_truncated=is_truncated,
            next_continuation_token=next_token,
        )

    def _shallow_via_full_scan(
        self,
        bucket_name: str,
        *,
        prefix: str = "",
        delimiter: str = "/",
        max_keys: int = 1000,
        continuation_token: Optional[str] = None,
    ) -> ShallowListResult:
        list_result = self.list_objects(
            bucket_name,
            max_keys=max_keys * 10,
            continuation_token=continuation_token,
            prefix=prefix or None,
        )

        common_prefixes: list[str] = []
        filtered_objects: list[ObjectMeta] = []
        seen_prefixes: set[str] = set()

        for obj in list_result.objects:
            key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
            if delimiter in key_after_prefix:
                cp = prefix + key_after_prefix.split(delimiter)[0] + delimiter
                if cp not in seen_prefixes:
                    seen_prefixes.add(cp)
                    common_prefixes.append(cp)
            else:
                filtered_objects.append(obj)

        common_prefixes.sort()
        total_items = len(filtered_objects) + len(common_prefixes)
        is_truncated = total_items > max_keys or list_result.is_truncated

        if len(filtered_objects) >= max_keys:
            filtered_objects = filtered_objects[:max_keys]
            common_prefixes = []
        else:
            remaining = max_keys - len(filtered_objects)
            common_prefixes = common_prefixes[:remaining]

        next_token = None
        if is_truncated:
            if filtered_objects:
                next_token = filtered_objects[-1].key
            elif common_prefixes:
                next_token = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]

        return ShallowListResult(
            objects=filtered_objects,
            common_prefixes=common_prefixes,
            is_truncated=is_truncated,
            next_continuation_token=next_token,
        )

    def search_objects(
        self,
        bucket_name: str,
        query: str,
        *,
        prefix: str = "",
        limit: int = 500,
    ) -> Dict[str, Any]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.is_dir():
            raise BucketNotFoundError("Bucket does not exist")

        if prefix:
            search_root = bucket_path / prefix.replace("/", os.sep)
            if not search_root.is_dir():
                return {"results": [], "truncated": False}
            resolved = search_root.resolve()
            if not str(resolved).startswith(str(bucket_path.resolve())):
                return {"results": [], "truncated": False}
        else:
            search_root = bucket_path

        if _HAS_RUST:
            raw = _rc.search_objects_scan(
                str(bucket_path), str(search_root), query, limit
            )
            results = [
                {
                    "key": k,
                    "size": s,
                    "last_modified": datetime.fromtimestamp(
                        m, tz=timezone.utc
                    ).strftime("%Y-%m-%dT%H:%M:%S.000Z"),
                }
                for k, s, m in raw["results"]
            ]
            return {"results": results, "truncated": raw["truncated"]}

        query_lower = query.lower()
        results: list[Dict[str, Any]] = []
        internal = self.INTERNAL_FOLDERS
        bucket_str = str(bucket_path)
        bucket_len = len(bucket_str) + 1
        meta_root = self._bucket_meta_root(bucket_name)
        scan_limit = limit * 4

        matched = 0
        scanned = 0
        search_str = str(search_root)
        stack = [search_str]
        while stack:
            current = stack.pop()
            try:
                with os.scandir(current) as it:
                    for entry in it:
                        if current == bucket_str and entry.name in internal:
                            continue
                        if entry.is_dir(follow_symlinks=False):
                            stack.append(entry.path)
                        elif entry.is_file(follow_symlinks=False):
                            scanned += 1
                            key = entry.path[bucket_len:].replace(os.sep, "/")
                            if query_lower in key.lower():
                                st = entry.stat(follow_symlinks=False)
                                meta_path = meta_root / (key + ".meta.json")
                                last_modified = ""
                                try:
                                    if meta_path.exists():
                                        md = json.loads(meta_path.read_text(encoding="utf-8"))
                                        last_modified = md.get("last_modified", "")
                                except (OSError, json.JSONDecodeError):
                                    pass
                                if not last_modified:
                                    last_modified = datetime.fromtimestamp(
                                        st.st_mtime, tz=timezone.utc
                                    ).strftime("%Y-%m-%dT%H:%M:%S.000Z")
                                results.append({
                                    "key": key,
                                    "size": st.st_size,
                                    "last_modified": last_modified,
                                })
                                matched += 1
                            if matched >= scan_limit:
                                break
            except PermissionError:
                continue
            if matched >= scan_limit:
                break

        results.sort(key=lambda r: r["key"])
        truncated = len(results) > limit
        return {"results": results[:limit], "truncated": truncated}

    def put_object(
        self,
        bucket_name: str,
        object_key: str,
        stream: BinaryIO,
        *,
        metadata: Optional[Dict[str, str]] = None,
        enforce_quota: bool = True,
    ) -> ObjectMeta:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name

        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        destination = bucket_path / safe_key
        destination.parent.mkdir(parents=True, exist_ok=True)

        is_overwrite = destination.exists()
        existing_size = destination.stat().st_size if is_overwrite else 0

        tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
        tmp_dir.mkdir(parents=True, exist_ok=True)

        if _HAS_RUST:
            tmp_path = None
            try:
                tmp_path_str, etag, new_size = _rc.stream_to_file_with_md5(
                    stream, str(tmp_dir)
                )
                tmp_path = Path(tmp_path_str)

                size_delta = new_size - existing_size
                object_delta = 0 if is_overwrite else 1

                if enforce_quota:
                    quota_check = self.check_quota(
                        bucket_name,
                        additional_bytes=max(0, size_delta),
                        additional_objects=object_delta,
                    )
                    if not quota_check["allowed"]:
                        raise QuotaExceededError(
                            quota_check["message"] or "Quota exceeded",
                            quota_check["quota"],
                            quota_check["usage"],
                        )
            except BaseException:
                if tmp_path:
                    try:
                        tmp_path.unlink(missing_ok=True)
                    except OSError:
                        pass
                raise
        else:
            tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
            try:
                checksum = hashlib.md5()
                with tmp_path.open("wb") as target:
                    shutil.copyfileobj(_HashingReader(stream, checksum), target)
                    target.flush()
                    os.fsync(target.fileno())

                new_size = tmp_path.stat().st_size
                size_delta = new_size - existing_size
                object_delta = 0 if is_overwrite else 1

                if enforce_quota:
                    quota_check = self.check_quota(
                        bucket_name,
                        additional_bytes=max(0, size_delta),
                        additional_objects=object_delta,
                    )
                    if not quota_check["allowed"]:
                        raise QuotaExceededError(
                            quota_check["message"] or "Quota exceeded",
                            quota_check["quota"],
                            quota_check["usage"],
                        )

                etag = checksum.hexdigest()
            except BaseException:
                try:
                    tmp_path.unlink(missing_ok=True)
                except OSError:
                    pass
                raise

        lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
        try:
            with _atomic_lock_file(lock_file_path):
                archived_version_size = 0
                if self._is_versioning_enabled(bucket_path) and is_overwrite:
                    archived_version_size = existing_size
                    self._archive_current_version(bucket_id, safe_key, reason="overwrite")

                shutil.move(str(tmp_path), str(destination))
                tmp_path = None

                stat = destination.stat()

                internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
                combined_meta = {**internal_meta, **(metadata or {})}
                self._write_metadata(bucket_id, safe_key, combined_meta)
        except BlockingIOError:
            try:
                if tmp_path:
                    tmp_path.unlink(missing_ok=True)
            except OSError:
                pass
            raise StorageError("Another upload to this key is in progress")
        finally:
            if tmp_path:
                try:
                    tmp_path.unlink(missing_ok=True)
                except OSError:
                    pass

        self._update_bucket_stats_cache(
            bucket_id,
            bytes_delta=size_delta,
            objects_delta=object_delta,
            version_bytes_delta=archived_version_size,
            version_count_delta=1 if archived_version_size > 0 else 0,
        )

        obj_meta = ObjectMeta(
            key=safe_key.as_posix(),
            size=stat.st_size,
            last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
            etag=etag,
            metadata=metadata,
        )
        self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)

        return obj_meta

    def get_object_path(self, bucket_name: str, object_key: str) -> Path:
        path = self._object_path(bucket_name, object_key)
        if not path.is_file():
            raise ObjectNotFoundError("Object not found")
        return path

    def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            return {}
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        return self._read_metadata(bucket_path.name, safe_key) or {}

    def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
        """Remove empty parent directories in a background thread.

        On Windows/OneDrive, directories may be locked briefly after file deletion.
        Running this in the background avoids blocking the request thread with retries.
        """
        self._cleanup_executor.submit(self._do_cleanup_empty_parents, path, stop_at)

    def _do_cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
        for parent in path.parents:
            if parent == stop_at:
                break
            for attempt in range(3):
                try:
                    if parent.exists() and not any(parent.iterdir()):
                        parent.rmdir()
                        break
                except OSError:
                    if attempt < 2:
                        time.sleep(0.1)
                    break

    def delete_object(self, bucket_name: str, object_key: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        path = self._object_path(bucket_name, object_key)
        if not path.exists():
            return
        deleted_size = path.stat().st_size
        safe_key = path.relative_to(bucket_path)
        bucket_id = bucket_path.name
        archived_version_size = 0
        if self._is_versioning_enabled(bucket_path):
            archived_version_size = deleted_size
            self._archive_current_version(bucket_id, safe_key, reason="delete")
        rel = path.relative_to(bucket_path)
        self._safe_unlink(path)
        self._delete_metadata(bucket_id, rel)

        self._update_bucket_stats_cache(
            bucket_id,
            bytes_delta=-deleted_size,
            objects_delta=-1,
            version_bytes_delta=archived_version_size,
            version_count_delta=1 if archived_version_size > 0 else 0,
        )
        self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
        self._cleanup_empty_parents(path, bucket_path)

    def purge_object(self, bucket_name: str, object_key: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        target = self._object_path(bucket_name, object_key)
        bucket_id = bucket_path.name
        if target.exists():
            rel = target.relative_to(bucket_path)
            self._safe_unlink(target)
            self._delete_metadata(bucket_id, rel)
        else:
            rel = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
            self._delete_metadata(bucket_id, rel)
        version_dir = self._version_dir(bucket_id, rel)
        if version_dir.exists():
            shutil.rmtree(version_dir, ignore_errors=True)
        legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
        if legacy_version_dir.exists():
            shutil.rmtree(legacy_version_dir, ignore_errors=True)

        self._invalidate_bucket_stats_cache(bucket_id)
        self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
        self._cleanup_empty_parents(target, bucket_path)

    def is_versioning_enabled(self, bucket_name: str) -> bool:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        return self._is_versioning_enabled(bucket_path)

    def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        config["versioning_enabled"] = bool(enabled)
        self._write_bucket_config(bucket_path.name, config)

    def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]:
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        raw_tags = config.get("tags")
        if not isinstance(raw_tags, list):
            return []
        tags: List[Dict[str, str]] = []
        for entry in raw_tags:
            if not isinstance(entry, dict):
                continue
            key = str(entry.get("Key", "")).strip()
            if not key:
                continue
            value = str(entry.get("Value", ""))
            tags.append({"Key": key, "Value": value})
        return tags

    def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        if not tags:
            self._set_bucket_config_entry(bucket_path.name, "tags", None)
            return
        clean: List[Dict[str, str]] = []
        for entry in tags:
            if not isinstance(entry, dict):
                continue
            key = str(entry.get("Key", "")).strip()
            if not key:
                continue
            clean.append({"Key": key, "Value": str(entry.get("Value", ""))})
        self._set_bucket_config_entry(bucket_path.name, "tags", clean or None)

    def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]:
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        cors_rules = config.get("cors")
        return cors_rules if isinstance(cors_rules, list) else []

    def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        self._set_bucket_config_entry(bucket_path.name, "cors", rules or None)

    def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]:
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        payload = config.get("encryption")
        return payload if isinstance(payload, dict) else {}

    def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None)

    def get_bucket_lifecycle(self, bucket_name: str) -> Optional[List[Dict[str, Any]]]:
        """Get lifecycle configuration for bucket."""
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        lifecycle = config.get("lifecycle")
        return lifecycle if isinstance(lifecycle, list) else None

    def set_bucket_lifecycle(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)

    def get_bucket_website(self, bucket_name: str) -> Optional[Dict[str, Any]]:
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        website = config.get("website")
        return website if isinstance(website, dict) else None

    def set_bucket_website(self, bucket_name: str, website_config: Optional[Dict[str, Any]]) -> None:
        bucket_path = self._require_bucket_path(bucket_name)
        self._set_bucket_config_entry(bucket_path.name, "website", website_config)

    def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
        """Get quota configuration for bucket.

        Returns:
            Dict with 'max_bytes' and 'max_objects' (None if unlimited).
        """
        bucket_path = self._require_bucket_path(bucket_name)
        config = self._read_bucket_config(bucket_path.name)
        quota = config.get("quota")
        if isinstance(quota, dict):
            return {
                "max_bytes": quota.get("max_bytes"),
                "max_objects": quota.get("max_objects"),
            }
        return {"max_bytes": None, "max_objects": None}

    def set_bucket_quota(
        self,
        bucket_name: str,
        *,
        max_bytes: Optional[int] = None,
        max_objects: Optional[int] = None,
    ) -> None:
        """Set quota limits for a bucket.

        Args:
            bucket_name: Name of the bucket
            max_bytes: Maximum total size in bytes (None to remove limit)
            max_objects: Maximum number of objects (None to remove limit)
        """
        bucket_path = self._require_bucket_path(bucket_name)

        if max_bytes is None and max_objects is None:
            self._set_bucket_config_entry(bucket_path.name, "quota", None)
            return

        quota: Dict[str, Any] = {}
        if max_bytes is not None:
            if max_bytes < 0:
                raise StorageError("max_bytes must be non-negative")
            quota["max_bytes"] = max_bytes
        if max_objects is not None:
            if max_objects < 0:
                raise StorageError("max_objects must be non-negative")
            quota["max_objects"] = max_objects

        self._set_bucket_config_entry(bucket_path.name, "quota", quota)

    def check_quota(
        self,
        bucket_name: str,
        additional_bytes: int = 0,
        additional_objects: int = 0,
    ) -> Dict[str, Any]:
        """Check if an operation would exceed bucket quota.

        Args:
            bucket_name: Name of the bucket
            additional_bytes: Bytes that would be added
            additional_objects: Objects that would be added

        Returns:
            Dict with 'allowed' (bool), 'quota' (current limits),
            'usage' (current usage), and 'message' (if not allowed).
        """
        quota = self.get_bucket_quota(bucket_name)
        if not quota:
            return {
                "allowed": True,
                "quota": None,
                "usage": None,
                "message": None,
            }

        stats = self.bucket_stats(bucket_name)
        current_bytes = stats.get("total_bytes", stats.get("bytes", 0))
        current_objects = stats.get("total_objects", stats.get("objects", 0))

        result = {
            "allowed": True,
            "quota": quota,
            "usage": {
                "bytes": current_bytes,
                "objects": current_objects,
                "version_count": stats.get("version_count", 0),
                "version_bytes": stats.get("version_bytes", 0),
            },
            "message": None,
        }

        max_bytes_limit = quota.get("max_bytes")
        max_objects = quota.get("max_objects")

        if max_bytes_limit is not None:
            projected_bytes = current_bytes + additional_bytes
            if projected_bytes > max_bytes_limit:
                result["allowed"] = False
                result["message"] = (
                    f"Quota exceeded: adding {additional_bytes} bytes would result in "
                    f"{projected_bytes} bytes, exceeding limit of {max_bytes_limit} bytes"
                )
                return result

        if max_objects is not None:
            projected_objects = current_objects + additional_objects
            if projected_objects > max_objects:
                result["allowed"] = False
                result["message"] = (
                    f"Quota exceeded: adding {additional_objects} objects would result in "
                    f"{projected_objects} objects, exceeding limit of {max_objects} objects"
                )
                return result

        return result

    def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]:
        """Get tags for an object."""
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        object_path = bucket_path / safe_key
        if not object_path.exists():
            raise ObjectNotFoundError("Object does not exist")

        entry = self._read_index_entry(bucket_path.name, safe_key)
        if entry is not None:
            tags = entry.get("tags")
            return tags if isinstance(tags, list) else []
        for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
            if not meta_file.exists():
                continue
            try:
                payload = json.loads(meta_file.read_text(encoding="utf-8"))
                tags = payload.get("tags")
                if isinstance(tags, list):
                    return tags
                return []
            except (OSError, json.JSONDecodeError):
                return []
        return []

    def set_object_tags(self, bucket_name: str, object_key: str, tags: Optional[List[Dict[str, str]]]) -> None:
        """Set tags for an object."""
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        object_path = bucket_path / safe_key
        if not object_path.exists():
            raise ObjectNotFoundError("Object does not exist")

        bucket_id = bucket_path.name
        existing_entry = self._read_index_entry(bucket_id, safe_key) or {}
        if not existing_entry:
            meta_file = self._metadata_file(bucket_id, safe_key)
            if meta_file.exists():
                try:
                    existing_entry = json.loads(meta_file.read_text(encoding="utf-8"))
                except (OSError, json.JSONDecodeError):
                    pass

        if tags:
            existing_entry["tags"] = tags
        else:
            existing_entry.pop("tags", None)

        if existing_entry.get("metadata") or existing_entry.get("tags"):
            self._write_index_entry(bucket_id, safe_key, existing_entry)
        else:
            self._delete_index_entry(bucket_id, safe_key)
        old_meta = self._metadata_file(bucket_id, safe_key)
        try:
            if old_meta.exists():
                old_meta.unlink()
        except OSError:
            pass

    def delete_object_tags(self, bucket_name: str, object_key: str) -> None:
        """Delete all tags from an object."""
        self.set_object_tags(bucket_name, object_key, None)

    def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        version_dir = self._version_dir(bucket_id, safe_key)
        if not version_dir.exists():
            version_dir = self._legacy_version_dir(bucket_id, safe_key)
            if not version_dir.exists():
                return []
        versions: List[Dict[str, Any]] = []
        for meta_file in version_dir.glob("*.json"):
            try:
                payload = json.loads(meta_file.read_text(encoding="utf-8"))
            except (OSError, json.JSONDecodeError):
                continue
            if not isinstance(payload, dict):
                continue
            payload.setdefault("version_id", meta_file.stem)
            versions.append(payload)
        versions.sort(key=lambda item: item.get("archived_at") or "1970-01-01T00:00:00Z", reverse=True)
        return versions

    def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        version_dir = self._version_dir(bucket_id, safe_key)
        data_path = version_dir / f"{version_id}.bin"
        meta_path = version_dir / f"{version_id}.json"
        if not data_path.exists() or not meta_path.exists():
            raise StorageError("Version not found")
        try:
            payload = json.loads(meta_path.read_text(encoding="utf-8"))
        except (OSError, json.JSONDecodeError):
            payload = {}
        metadata = payload.get("metadata") if isinstance(payload, dict) else {}
        if not isinstance(metadata, dict):
            metadata = {}
        destination = bucket_path / safe_key
        restored_size = data_path.stat().st_size
        is_overwrite = destination.exists()
        existing_size = destination.stat().st_size if is_overwrite else 0
        archived_version_size = 0
        if self._is_versioning_enabled(bucket_path) and is_overwrite:
            archived_version_size = existing_size
            self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
        destination.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(data_path, destination)
        if metadata:
            self._write_metadata(bucket_id, safe_key, metadata)
        else:
            self._delete_metadata(bucket_id, safe_key)
        stat = destination.stat()
        self._update_bucket_stats_cache(
            bucket_id,
            bytes_delta=restored_size - existing_size,
            objects_delta=0 if is_overwrite else 1,
            version_bytes_delta=archived_version_size,
            version_count_delta=1 if archived_version_size > 0 else 0,
        )
        etag = self._compute_etag(destination)
        internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
        combined_meta = {**internal_meta, **(metadata or {})}
        self._write_metadata(bucket_id, safe_key, combined_meta)
        obj_meta = ObjectMeta(
            key=safe_key.as_posix(),
            size=stat.st_size,
            last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
            etag=etag,
            metadata=metadata or None,
        )
        self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
        return obj_meta

    def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        version_dir = self._version_dir(bucket_id, safe_key)
        data_path = version_dir / f"{version_id}.bin"
        meta_path = version_dir / f"{version_id}.json"
        if not data_path.exists() and not meta_path.exists():
            legacy_version_dir = self._legacy_version_dir(bucket_id, safe_key)
            data_path = legacy_version_dir / f"{version_id}.bin"
            meta_path = legacy_version_dir / f"{version_id}.json"
        if not data_path.exists() and not meta_path.exists():
            raise StorageError(f"Version {version_id} not found")
        deleted_version_size = data_path.stat().st_size if data_path.exists() else 0
        if data_path.exists():
            data_path.unlink()
        if meta_path.exists():
            meta_path.unlink()
        parent = data_path.parent
        if parent.exists() and not any(parent.iterdir()):
            parent.rmdir()
        if deleted_version_size > 0:
            self._update_bucket_stats_cache(
                bucket_id,
                version_bytes_delta=-deleted_version_size,
                version_count_delta=-1,
            )

    def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
        if not any(root.exists() for root in version_roots):
            return []
        aggregated: Dict[str, Dict[str, Any]] = {}
        skipped: set[str] = set()
        for version_root in version_roots:
            if not version_root.exists():
                continue
            for meta_file in version_root.glob("**/*.json"):
                if not meta_file.is_file():
                    continue
                rel = meta_file.parent.relative_to(version_root)
                rel_key = rel.as_posix()
                if rel_key in skipped:
                    continue
                object_path = bucket_path / rel
                if object_path.exists():
                    skipped.add(rel_key)
                    continue
                try:
                    payload = json.loads(meta_file.read_text(encoding="utf-8"))
                except (OSError, json.JSONDecodeError):
                    payload = {}
                version_id = payload.get("version_id") or meta_file.stem
                archived_at = payload.get("archived_at") or "1970-01-01T00:00:00Z"
                size = int(payload.get("size") or 0)
                reason = payload.get("reason") or "update"
                record = aggregated.setdefault(
                    rel_key,
                    {
                        "key": rel_key,
                        "versions": 0,
                        "total_size": 0,
                        "latest": None,
                        "_latest_sort": None,
                    },
                )
                record["versions"] += 1
                record["total_size"] += size
                candidate = {
                    "version_id": version_id,
                    "archived_at": archived_at,
                    "size": size,
                    "reason": reason,
                }
                sort_key = (
                    archived_at,
                    meta_file.stat().st_mtime,
                )
                current_sort = record.get("_latest_sort")
                if current_sort is None or sort_key > current_sort:
                    record["_latest_sort"] = sort_key
                    record["latest"] = candidate
        for record in aggregated.values():
            record.pop("_latest_sort", None)
        return sorted(aggregated.values(), key=lambda item: item["key"])

    def initiate_multipart_upload(
        self,
        bucket_name: str,
        object_key: str,
        *,
        metadata: Optional[Dict[str, str]] = None,
    ) -> str:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        upload_id = uuid.uuid4().hex
        upload_root = self._multipart_dir(bucket_id, upload_id)
        upload_root.mkdir(parents=True, exist_ok=False)
        manifest = {
            "upload_id": upload_id,
            "object_key": safe_key.as_posix(),
            "metadata": self._normalize_metadata(metadata),
            "parts": {},
            "created_at": _utc_isoformat(),
        }
        self._write_multipart_manifest(upload_root, manifest)
        return upload_id

    def upload_multipart_part(
        self,
        bucket_name: str,
        upload_id: str,
        part_number: int,
        stream: BinaryIO,
    ) -> str:
        """Upload a part for a multipart upload.

        Uses file locking to safely update the manifest and handle concurrent uploads.
        """
        if part_number < 1 or part_number > 10000:
            raise StorageError("part_number must be between 1 and 10000")
        bucket_path = self._bucket_path(bucket_name)

        upload_root = self._multipart_dir(bucket_path.name, upload_id)
        if not upload_root.exists():
            upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
        if not upload_root.exists():
            raise StorageError("Multipart upload not found")

        part_filename = f"part-{part_number:05d}.part"
        part_path = upload_root / part_filename
        temp_path = upload_root / f".{part_filename}.tmp"

        try:
            if _HAS_RUST:
                with temp_path.open("wb") as target:
                    shutil.copyfileobj(stream, target)
                part_etag = _rc.md5_file(str(temp_path))
            else:
                checksum = hashlib.md5()
                with temp_path.open("wb") as target:
                    shutil.copyfileobj(_HashingReader(stream, checksum), target)
                    target.flush()
                    os.fsync(target.fileno())
                part_etag = checksum.hexdigest()
            temp_path.replace(part_path)
        except OSError:
            try:
                temp_path.unlink(missing_ok=True)
            except OSError:
                pass
            raise

        record = {
            "etag": part_etag,
            "size": part_path.stat().st_size,
            "filename": part_filename,
        }

        manifest_path = upload_root / self.MULTIPART_MANIFEST
        lock_path = upload_root / ".manifest.lock"

        max_retries = 3
        for attempt in range(max_retries):
            try:
                with lock_path.open("w") as lock_file:
                    with _file_lock(lock_file):
                        try:
                            manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
                        except (OSError, json.JSONDecodeError) as exc:
                            if attempt < max_retries - 1:
                                time.sleep(0.1 * (attempt + 1))
                                continue
                            raise StorageError("Multipart manifest unreadable") from exc

                        parts = manifest.setdefault("parts", {})
                        parts[str(part_number)] = record
                        self._atomic_write_json(manifest_path, manifest)
                break
            except OSError as exc:
                if attempt < max_retries - 1:
                    time.sleep(0.1 * (attempt + 1))
                    continue
                raise StorageError(f"Failed to update multipart manifest: {exc}") from exc

        return record["etag"]

    def upload_part_copy(
        self,
        bucket_name: str,
        upload_id: str,
        part_number: int,
        source_bucket: str,
        source_key: str,
        start_byte: Optional[int] = None,
        end_byte: Optional[int] = None,
    ) -> Dict[str, Any]:
        """Copy a range from an existing object as a multipart part."""
        if part_number < 1 or part_number > 10000:
            raise StorageError("part_number must be between 1 and 10000")

        source_path = self.get_object_path(source_bucket, source_key)
        source_size = source_path.stat().st_size

        if start_byte is None:
            start_byte = 0
        if end_byte is None:
            end_byte = source_size - 1

        if start_byte < 0 or end_byte >= source_size or start_byte > end_byte:
            raise StorageError("Invalid byte range")

        bucket_path = self._bucket_path(bucket_name)
        upload_root = self._multipart_dir(bucket_path.name, upload_id)
        if not upload_root.exists():
            upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
        if not upload_root.exists():
            raise StorageError("Multipart upload not found")

        checksum = hashlib.md5()
        part_filename = f"part-{part_number:05d}.part"
        part_path = upload_root / part_filename
        temp_path = upload_root / f".{part_filename}.tmp"

        try:
            with source_path.open("rb") as src:
                src.seek(start_byte)
                bytes_to_copy = end_byte - start_byte + 1
                with temp_path.open("wb") as target:
                    remaining = bytes_to_copy
                    while remaining > 0:
                        chunk_size = min(65536, remaining)
                        chunk = src.read(chunk_size)
                        if not chunk:
                            break
                        checksum.update(chunk)
                        target.write(chunk)
                        remaining -= len(chunk)
            temp_path.replace(part_path)
        except OSError:
            try:
                temp_path.unlink(missing_ok=True)
            except OSError:
                pass
            raise

        record = {
            "etag": checksum.hexdigest(),
            "size": part_path.stat().st_size,
            "filename": part_filename,
        }

        manifest_path = upload_root / self.MULTIPART_MANIFEST
        lock_path = upload_root / ".manifest.lock"

        max_retries = 3
        for attempt in range(max_retries):
            try:
                with lock_path.open("w") as lock_file:
                    with _file_lock(lock_file):
                        try:
                            manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
                        except (OSError, json.JSONDecodeError) as exc:
                            if attempt < max_retries - 1:
                                time.sleep(0.1 * (attempt + 1))
                                continue
                            raise StorageError("Multipart manifest unreadable") from exc

                        parts = manifest.setdefault("parts", {})
                        parts[str(part_number)] = record
                        self._atomic_write_json(manifest_path, manifest)
                break
            except OSError as exc:
                if attempt < max_retries - 1:
                    time.sleep(0.1 * (attempt + 1))
                    continue
                raise StorageError(f"Failed to update multipart manifest: {exc}") from exc

        return {
            "etag": record["etag"],
            "last_modified": datetime.fromtimestamp(part_path.stat().st_mtime, timezone.utc),
        }

    def complete_multipart_upload(
        self,
        bucket_name: str,
        upload_id: str,
        ordered_parts: List[Dict[str, Any]],
        enforce_quota: bool = True,
    ) -> ObjectMeta:
        if not ordered_parts:
            raise StorageError("parts list required")
        bucket_path = self._bucket_path(bucket_name)
        bucket_id = bucket_path.name
        manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id)
        parts_map = manifest.get("parts") or {}
        if not parts_map:
            raise StorageError("No uploaded parts found")
        validated: List[tuple[int, Dict[str, Any]]] = []
        total_size = 0
        for part in ordered_parts:
            raw_number = part.get("part_number")
            if raw_number is None:
                raw_number = part.get("PartNumber")
            try:
                number = int(raw_number)
            except (TypeError, ValueError) as exc:
                raise StorageError("Each part must include part_number") from exc
            if number < 1:
                raise StorageError("part numbers must be >= 1")
            key = str(number)
            record = parts_map.get(key)
            if not record:
                raise StorageError(f"Part {number} missing from upload")
            raw_etag = part.get("etag", part.get("ETag", ""))
            supplied_etag = str(raw_etag).strip() or record.get("etag")
            if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
                raise StorageError(f"ETag mismatch for part {number}")
            validated.append((number, record))
            total_size += record.get("size", 0)
        validated.sort(key=lambda entry: entry[0])

        safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes)
        destination = bucket_path / safe_key

        is_overwrite = destination.exists()
        existing_size = destination.stat().st_size if is_overwrite else 0
        size_delta = total_size - existing_size
        object_delta = 0 if is_overwrite else 1
        versioning_enabled = self._is_versioning_enabled(bucket_path)

        if enforce_quota:
            quota_check = self.check_quota(
                bucket_name,
                additional_bytes=max(0, size_delta),
                additional_objects=object_delta,
            )
            if not quota_check["allowed"]:
                raise QuotaExceededError(
                    quota_check["message"] or "Quota exceeded",
                    quota_check["quota"],
                    quota_check["usage"],
                )

        destination.parent.mkdir(parents=True, exist_ok=True)

        lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"

        archived_version_size = 0
        try:
            with _atomic_lock_file(lock_file_path):
                if versioning_enabled and destination.exists():
                    archived_version_size = destination.stat().st_size
                    self._archive_current_version(bucket_id, safe_key, reason="overwrite")
                if _HAS_RUST:
                    part_paths = []
                    for _, record in validated:
                        pp = upload_root / record["filename"]
                        if not pp.exists():
                            raise StorageError(f"Missing part file {record['filename']}")
                        part_paths.append(str(pp))
                    checksum_hex = _rc.assemble_parts_with_md5(part_paths, str(destination))
                else:
                    checksum = hashlib.md5()
                    with destination.open("wb") as target:
                        for _, record in validated:
                            part_path = upload_root / record["filename"]
                            if not part_path.exists():
                                raise StorageError(f"Missing part file {record['filename']}")
                            with part_path.open("rb") as chunk:
                                while True:
                                    data = chunk.read(1024 * 1024)
                                    if not data:
                                        break
                                    checksum.update(data)
                                    target.write(data)
                        target.flush()
                        os.fsync(target.fileno())
                    checksum_hex = checksum.hexdigest()
        except BlockingIOError:
            raise StorageError("Another upload to this key is in progress")

        shutil.rmtree(upload_root, ignore_errors=True)

        self._update_bucket_stats_cache(
            bucket_id,
            bytes_delta=size_delta,
            objects_delta=object_delta,
            version_bytes_delta=archived_version_size,
            version_count_delta=1 if archived_version_size > 0 else 0,
        )

        stat = destination.stat()
        etag = checksum_hex
        metadata = manifest.get("metadata")

        internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
        combined_meta = {**internal_meta, **(metadata or {})}
        self._write_metadata(bucket_id, safe_key, combined_meta)

        obj_meta = ObjectMeta(
            key=safe_key.as_posix(),
            size=stat.st_size,
            last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
            etag=etag,
            metadata=metadata,
        )
        self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)

        return obj_meta

    def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        upload_root = self._multipart_dir(bucket_path.name, upload_id)
        if upload_root.exists():
            shutil.rmtree(upload_root, ignore_errors=True)
            return
        legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
        if legacy_root.exists():
            shutil.rmtree(legacy_root, ignore_errors=True)

    def list_multipart_parts(self, bucket_name: str, upload_id: str) -> List[Dict[str, Any]]:
        """List uploaded parts for a multipart upload."""
        bucket_path = self._bucket_path(bucket_name)
        manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id)

        parts = []
        parts_map = manifest.get("parts", {})
        for part_num_str, record in parts_map.items():
            part_num = int(part_num_str)
            part_filename = record.get("filename")
            if not part_filename:
                continue
            part_path = upload_root / part_filename
            if not part_path.exists():
                continue

            stat = part_path.stat()
            parts.append({
                "PartNumber": part_num,
                "Size": stat.st_size,
                "ETag": record.get("etag"),
                "LastModified": datetime.fromtimestamp(stat.st_mtime, timezone.utc)
            })

        parts.sort(key=lambda x: x["PartNumber"])
        return parts

    def list_multipart_uploads(self, bucket_name: str, include_orphaned: bool = False) -> List[Dict[str, Any]]:
        """List all active multipart uploads for a bucket.

        Args:
            bucket_name: The bucket to list uploads for.
            include_orphaned: If True, also include upload directories that have
                files but no valid manifest.json (orphaned/interrupted uploads).
        """
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise BucketNotFoundError("Bucket does not exist")
        bucket_id = bucket_path.name
        uploads = []

        for multipart_root in (
            self._multipart_bucket_root(bucket_id),
            self._legacy_multipart_bucket_root(bucket_id),
        ):
            if not multipart_root.exists():
                continue
            for upload_dir in multipart_root.iterdir():
                if not upload_dir.is_dir():
                    continue
                manifest_path = upload_dir / "manifest.json"
                if manifest_path.exists():
                    try:
                        manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
                        uploads.append({
                            "upload_id": manifest.get("upload_id", upload_dir.name),
                            "object_key": manifest.get("object_key", ""),
                            "created_at": manifest.get("created_at", ""),
                        })
                    except (OSError, json.JSONDecodeError):
                        if include_orphaned:
                            has_files = any(upload_dir.rglob("*"))
                            if has_files:
                                uploads.append({
                                    "upload_id": upload_dir.name,
                                    "object_key": "(unknown)",
                                    "created_at": "",
                                    "orphaned": True,
                                })
                elif include_orphaned:
                    has_files = any(f.is_file() for f in upload_dir.rglob("*"))
                    if has_files:
                        uploads.append({
                            "upload_id": upload_dir.name,
                            "object_key": "(unknown)",
                            "created_at": "",
                            "orphaned": True,
                        })
        return uploads

    def _bucket_path(self, bucket_name: str) -> Path:
        safe_name = self._sanitize_bucket_name(bucket_name)
        return self.root / safe_name

    def _require_bucket_path(self, bucket_name: str) -> Path:
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise StorageError("Bucket does not exist")
        return bucket_path

    def _object_path(self, bucket_name: str, object_key: str) -> Path:
        bucket_path = self._bucket_path(bucket_name)
        safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
        return bucket_path / safe_key

    def _system_root_path(self) -> Path:
        return self.root / self.SYSTEM_ROOT

    def _system_buckets_root(self) -> Path:
        return self._system_root_path() / self.SYSTEM_BUCKETS_DIR

    def _system_bucket_root(self, bucket_name: str) -> Path:
        return self._system_buckets_root() / bucket_name

    def _bucket_meta_root(self, bucket_name: str) -> Path:
        return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR

    def _bucket_versions_root(self, bucket_name: str) -> Path:
        return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR

    def _multipart_root(self) -> Path:
        return self._system_root_path() / self.SYSTEM_MULTIPART_DIR

    def _multipart_bucket_root(self, bucket_name: str) -> Path:
        return self._multipart_root() / bucket_name

    def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path:
        meta_root = self._legacy_meta_root(bucket_name)
        meta_rel = Path(key.as_posix() + ".meta.json")
        return meta_root / meta_rel

    def _legacy_meta_root(self, bucket_name: str) -> Path:
        return self._bucket_path(bucket_name) / ".meta"

    def _legacy_versions_root(self, bucket_name: str) -> Path:
        return self._bucket_path(bucket_name) / ".versions"

    def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path:
        return self._legacy_versions_root(bucket_name) / key

    def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path:
        return self._bucket_path(bucket_name) / ".multipart"

    def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
        return self._legacy_multipart_bucket_root(bucket_name) / upload_id

    def _fast_list_keys(self, bucket_path: Path) -> List[str]:
        """Fast directory walk using os.scandir instead of pathlib.rglob.

        This is significantly faster for large directories (10K+ files).
        Returns just the keys (for backward compatibility).
        """
        return list(self._build_object_cache(bucket_path).keys())

    def _build_object_cache(self, bucket_path: Path) -> Dict[str, ObjectMeta]:
        from concurrent.futures import ThreadPoolExecutor

        bucket_id = bucket_path.name
        objects: Dict[str, ObjectMeta] = {}
        bucket_str = str(bucket_path)
        bucket_len = len(bucket_str) + 1

        if _HAS_RUST:
            etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
            raw = _rc.build_object_cache(
                bucket_str,
                str(self._bucket_meta_root(bucket_id)),
                str(etag_index_path),
            )
            if raw["etag_cache_changed"] and raw["etag_cache"]:
                try:
                    etag_index_path.parent.mkdir(parents=True, exist_ok=True)
                    with open(etag_index_path, 'w', encoding='utf-8') as f:
                        json.dump(raw["etag_cache"], f)
                except OSError:
                    pass
            for key, size, mtime, etag in raw["objects"]:
                objects[key] = ObjectMeta(
                    key=key,
                    size=size,
                    last_modified=datetime.fromtimestamp(mtime, timezone.utc),
                    etag=etag,
                    metadata=None,
                )
            return objects

        etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
        meta_cache: Dict[str, str] = {}
        index_mtime: float = 0

        if etag_index_path.exists():
            try:
                index_mtime = etag_index_path.stat().st_mtime
                with open(etag_index_path, 'r', encoding='utf-8') as f:
                    meta_cache = json.load(f)
            except (OSError, json.JSONDecodeError):
                meta_cache = {}

        meta_root = self._bucket_meta_root(bucket_id)
        needs_rebuild = False

        if meta_root.exists() and index_mtime > 0:
            def check_newer(dir_path: str) -> bool:
                try:
                    with os.scandir(dir_path) as it:
                        for entry in it:
                            if entry.is_dir(follow_symlinks=False):
                                if check_newer(entry.path):
                                    return True
                            elif entry.is_file(follow_symlinks=False) and (entry.name.endswith('.meta.json') or entry.name == '_index.json'):
                                if entry.stat().st_mtime > index_mtime:
                                    return True
                except OSError:
                    pass
                return False
            needs_rebuild = check_newer(str(meta_root))
        elif not meta_cache:
            needs_rebuild = True

        if needs_rebuild and meta_root.exists():
            meta_str = str(meta_root)
            meta_len = len(meta_str) + 1
            meta_files: list[tuple[str, str]] = []
            index_files: list[str] = []

            def collect_meta_files(dir_path: str) -> None:
                try:
                    with os.scandir(dir_path) as it:
                        for entry in it:
                            if entry.is_dir(follow_symlinks=False):
                                collect_meta_files(entry.path)
                            elif entry.is_file(follow_symlinks=False):
                                if entry.name == '_index.json':
                                    index_files.append(entry.path)
                                elif entry.name.endswith('.meta.json'):
                                    rel = entry.path[meta_len:]
                                    key = rel[:-10].replace(os.sep, '/')
                                    meta_files.append((key, entry.path))
                except OSError:
                    pass

            collect_meta_files(meta_str)

            meta_cache = {}

            for idx_path in index_files:
                try:
                    with open(idx_path, 'r', encoding='utf-8') as f:
                        idx_data = json.load(f)
                    rel_dir = idx_path[meta_len:]
                    rel_dir = rel_dir.replace(os.sep, '/')
                    if rel_dir.endswith('/_index.json'):
                        dir_prefix = rel_dir[:-len('/_index.json')]
                    else:
                        dir_prefix = ''
                    for entry_name, entry_data in idx_data.items():
                        if dir_prefix:
                            key = f"{dir_prefix}/{entry_name}"
                        else:
                            key = entry_name
                        meta = entry_data.get("metadata", {})
                        etag = meta.get("__etag__")
                        if etag:
                            meta_cache[key] = etag
                except (OSError, json.JSONDecodeError):
                    pass

            def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
                key, path = item
                try:
                    with open(path, 'rb') as f:
                        content = f.read()
                    etag_marker = b'"__etag__"'
                    idx = content.find(etag_marker)
                    if idx != -1:
                        start = content.find(b'"', idx + len(etag_marker) + 1)
                        if start != -1:
                            end = content.find(b'"', start + 1)
                            if end != -1:
                                return key, content[start+1:end].decode('utf-8')
                    return key, None
                except (OSError, UnicodeDecodeError):
                    return key, None

            legacy_meta_files = [(k, p) for k, p in meta_files if k not in meta_cache]
            if legacy_meta_files:
                max_workers = min((os.cpu_count() or 4) * 2, len(legacy_meta_files), 16)
                with ThreadPoolExecutor(max_workers=max_workers) as executor:
                    for key, etag in executor.map(read_meta_file, legacy_meta_files):
                        if etag:
                            meta_cache[key] = etag

            if meta_cache:
                try:
                    etag_index_path.parent.mkdir(parents=True, exist_ok=True)
                    with open(etag_index_path, 'w', encoding='utf-8') as f:
                        json.dump(meta_cache, f)
                except OSError:
                    pass

        def scan_dir(dir_path: str) -> None:
            try:
                with os.scandir(dir_path) as it:
                    for entry in it:
                        if entry.is_dir(follow_symlinks=False):
                            rel_start = entry.path[bucket_len:].split(os.sep)[0] if len(entry.path) > bucket_len else entry.name
                            if rel_start in self.INTERNAL_FOLDERS:
                                continue
                            scan_dir(entry.path)
                        elif entry.is_file(follow_symlinks=False):
                            rel = entry.path[bucket_len:]
                            first_part = rel.split(os.sep)[0] if os.sep in rel else rel
                            if first_part in self.INTERNAL_FOLDERS:
                                continue

                            key = rel.replace(os.sep, '/')
                            try:
                                stat = entry.stat()

                                etag = meta_cache.get(key)

                                objects[key] = ObjectMeta(
                                    key=key,
                                    size=stat.st_size,
                                    last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
                                    etag=etag,
                                    metadata=None,
                                )
                            except OSError:
                                pass
            except OSError:
                pass

        scan_dir(bucket_str)
        return objects

    def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]:
        """Get cached object metadata for a bucket, refreshing if stale.

        Uses LRU eviction to prevent unbounded cache growth.
        Thread-safe with per-bucket locks to reduce contention.
        Checks stats.json for cross-process cache invalidation.
        """
        now = time.time()
        current_stats_mtime = self._get_cache_marker_mtime(bucket_id)

        with self._obj_cache_lock:
            cached = self._object_cache.get(bucket_id)
            if cached:
                objects, timestamp, cached_stats_mtime = cached
                if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
                    self._object_cache.move_to_end(bucket_id)
                    return objects

        bucket_lock = self._get_bucket_lock(bucket_id)
        with bucket_lock:
            now = time.time()
            current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
            with self._obj_cache_lock:
                cached = self._object_cache.get(bucket_id)
                if cached:
                    objects, timestamp, cached_stats_mtime = cached
                    if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
                        self._object_cache.move_to_end(bucket_id)
                        return objects

            objects = self._build_object_cache(bucket_path)
            new_stats_mtime = self._get_cache_marker_mtime(bucket_id)

            with self._obj_cache_lock:
                while len(self._object_cache) >= self._object_cache_max_size:
                    self._object_cache.popitem(last=False)

                self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime)
                self._object_cache.move_to_end(bucket_id)
                self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
                self._sorted_key_cache.pop(bucket_id, None)

        return objects

    def _invalidate_object_cache(self, bucket_id: str) -> None:
        with self._obj_cache_lock:
            self._object_cache.pop(bucket_id, None)
            self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1

        self._etag_index_dirty.discard(bucket_id)
        etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
        try:
            etag_index_path.unlink(missing_ok=True)
        except OSError:
            pass

    def _get_cache_marker_mtime(self, bucket_id: str) -> float:
        return float(self._stats_serial.get(bucket_id, 0))

    def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
        with self._obj_cache_lock:
            cached = self._object_cache.get(bucket_id)
            if cached:
                objects, timestamp, stats_mtime = cached
                if meta is None:
                    objects.pop(key, None)
                else:
                    objects[key] = meta
            self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
            self._sorted_key_cache.pop(bucket_id, None)

        self._etag_index_dirty.add(bucket_id)
        self._schedule_etag_index_flush()

    def _schedule_etag_index_flush(self) -> None:
        if self._etag_index_flush_timer is None or not self._etag_index_flush_timer.is_alive():
            self._etag_index_flush_timer = threading.Timer(5.0, self._flush_etag_indexes)
            self._etag_index_flush_timer.daemon = True
            self._etag_index_flush_timer.start()

    def _flush_etag_indexes(self) -> None:
        dirty = set(self._etag_index_dirty)
        self._etag_index_dirty.clear()
        for bucket_id in dirty:
            with self._obj_cache_lock:
                cached = self._object_cache.get(bucket_id)
                if not cached:
                    continue
                objects = cached[0]
                index = {k: v.etag for k, v in objects.items() if v.etag}
            etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
            try:
                etag_index_path.parent.mkdir(parents=True, exist_ok=True)
                with open(etag_index_path, 'w', encoding='utf-8') as f:
                    json.dump(index, f)
            except OSError:
                pass

    def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
        """Pre-warm the object cache for specified buckets or all buckets.

        This is called on startup to ensure the first request is fast.
        """
        if bucket_names is None:
            bucket_names = [b.name for b in self.list_buckets()]

        for bucket_name in bucket_names:
            try:
                bucket_path = self._bucket_path(bucket_name)
                if bucket_path.exists():
                    self._get_object_cache(bucket_path.name, bucket_path)
            except Exception:
                pass

    def warm_cache_async(self, bucket_names: Optional[List[str]] = None) -> threading.Thread:
        """Start cache warming in a background thread.

        Returns the thread object so caller can optionally wait for it.
        """
        thread = threading.Thread(
            target=self.warm_cache,
            args=(bucket_names,),
            daemon=True,
            name="cache-warmer",
        )
        thread.start()
        return thread

    def _ensure_system_roots(self) -> None:
        for path in (
            self._system_root_path(),
            self._system_buckets_root(),
            self._multipart_root(),
            self._system_root_path() / self.SYSTEM_TMP_DIR,
        ):
            path.mkdir(parents=True, exist_ok=True)

    @staticmethod
    def _atomic_write_json(path: Path, data: Any) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp_path = path.with_suffix(".tmp")
        try:
            with tmp_path.open("w", encoding="utf-8") as f:
                json.dump(data, f)
                f.flush()
                os.fsync(f.fileno())
            tmp_path.replace(path)
        except BaseException:
            try:
                tmp_path.unlink(missing_ok=True)
            except OSError:
                pass
            raise

    def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
        return self._multipart_bucket_root(bucket_name) / upload_id

    def _version_dir(self, bucket_name: str, key: Path) -> Path:
        return self._bucket_versions_root(bucket_name) / key

    def _bucket_config_path(self, bucket_name: str) -> Path:
        return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE

    def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
        now = time.time()
        config_path = self._bucket_config_path(bucket_name)
        cached = self._bucket_config_cache.get(bucket_name)
        if cached:
            config, cached_time, cached_mtime = cached
            if now - cached_time < self._bucket_config_cache_ttl:
                return config.copy()

        if not config_path.exists():
            self._bucket_config_cache[bucket_name] = ({}, now, 0.0)
            return {}
        try:
            data = json.loads(config_path.read_text(encoding="utf-8"))
            config = data if isinstance(data, dict) else {}
            mtime = config_path.stat().st_mtime
            self._bucket_config_cache[bucket_name] = (config, now, mtime)
            return config.copy()
        except (OSError, json.JSONDecodeError):
            self._bucket_config_cache[bucket_name] = ({}, now, 0.0)
            return {}

    def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
        config_path = self._bucket_config_path(bucket_name)
        config_path.parent.mkdir(parents=True, exist_ok=True)
        self._atomic_write_json(config_path, payload)
        try:
            mtime = config_path.stat().st_mtime
        except OSError:
            mtime = 0.0
        self._bucket_config_cache[bucket_name] = (payload.copy(), time.time(), mtime)

    def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
        config = self._read_bucket_config(bucket_name)
        if value is None:
            config.pop(key, None)
        else:
            config[key] = value
        self._write_bucket_config(bucket_name, config)

    def _is_versioning_enabled(self, bucket_path: Path) -> bool:
        config = self._read_bucket_config(bucket_path.name)
        return bool(config.get("versioning_enabled"))

    def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]:
        upload_root = self._multipart_dir(bucket_name, upload_id)
        if not upload_root.exists():
            upload_root = self._legacy_multipart_dir(bucket_name, upload_id)
        manifest_path = upload_root / self.MULTIPART_MANIFEST
        if not manifest_path.exists():
            raise StorageError("Multipart upload not found")
        try:
            manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
        except (OSError, json.JSONDecodeError) as exc:
            raise StorageError("Multipart manifest unreadable") from exc
        return manifest, upload_root

    def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
        manifest_path = upload_root / self.MULTIPART_MANIFEST
        self._atomic_write_json(manifest_path, manifest)

    def _metadata_file(self, bucket_name: str, key: Path) -> Path:
        meta_root = self._bucket_meta_root(bucket_name)
        meta_rel = Path(key.as_posix() + ".meta.json")
        return meta_root / meta_rel

    def _index_file_for_key(self, bucket_name: str, key: Path) -> tuple[Path, str]:
        meta_root = self._bucket_meta_root(bucket_name)
        parent = key.parent
        entry_name = key.name
        if parent == Path("."):
            return meta_root / "_index.json", entry_name
        return meta_root / parent / "_index.json", entry_name

    def _get_meta_index_lock(self, index_path: str) -> threading.Lock:
        with self._registry_lock:
            if index_path not in self._meta_index_locks:
                self._meta_index_locks[index_path] = threading.Lock()
            return self._meta_index_locks[index_path]

    def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]:
        cache_key = (bucket_name, str(key))
        with self._meta_cache_lock:
            hit = self._meta_read_cache.get(cache_key)
            if hit is not None:
                self._meta_read_cache.move_to_end(cache_key)
                cached = hit[0]
                return dict(cached) if cached is not None else None

        index_path, entry_name = self._index_file_for_key(bucket_name, key)
        if _HAS_RUST:
            result = _rc.read_index_entry(str(index_path), entry_name)
        else:
            if not index_path.exists():
                result = None
            else:
                try:
                    index_data = json.loads(index_path.read_text(encoding="utf-8"))
                    result = index_data.get(entry_name)
                except (OSError, json.JSONDecodeError):
                    result = None

        with self._meta_cache_lock:
            while len(self._meta_read_cache) >= self._meta_read_cache_max:
                self._meta_read_cache.popitem(last=False)
            self._meta_read_cache[cache_key] = (dict(result) if result is not None else None,)

        return result

    def _invalidate_meta_read_cache(self, bucket_name: str, key: Path) -> None:
        cache_key = (bucket_name, str(key))
        with self._meta_cache_lock:
            self._meta_read_cache.pop(cache_key, None)

    def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None:
        index_path, entry_name = self._index_file_for_key(bucket_name, key)
        lock = self._get_meta_index_lock(str(index_path))
        with lock:
            if _HAS_RUST:
                _rc.write_index_entry(str(index_path), entry_name, json.dumps(entry))
            else:
                index_path.parent.mkdir(parents=True, exist_ok=True)
                index_data: Dict[str, Any] = {}
                if index_path.exists():
                    try:
                        index_data = json.loads(index_path.read_text(encoding="utf-8"))
                    except (OSError, json.JSONDecodeError):
                        pass
                index_data[entry_name] = entry
                self._atomic_write_json(index_path, index_data)
        self._invalidate_meta_read_cache(bucket_name, key)

    def _delete_index_entry(self, bucket_name: str, key: Path) -> None:
        index_path, entry_name = self._index_file_for_key(bucket_name, key)
        if not index_path.exists():
            self._invalidate_meta_read_cache(bucket_name, key)
            return
        lock = self._get_meta_index_lock(str(index_path))
        with lock:
            if _HAS_RUST:
                _rc.delete_index_entry(str(index_path), entry_name)
            else:
                try:
                    index_data = json.loads(index_path.read_text(encoding="utf-8"))
                except (OSError, json.JSONDecodeError):
                    self._invalidate_meta_read_cache(bucket_name, key)
                    return
                if entry_name in index_data:
                    del index_data[entry_name]
                    if index_data:
                        self._atomic_write_json(index_path, index_data)
                    else:
                        try:
                            index_path.unlink()
                        except OSError:
                            pass
        self._invalidate_meta_read_cache(bucket_name, key)

    def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
        if not metadata:
            return None
        clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()}
        return clean or None

    def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None:
        clean = self._normalize_metadata(metadata)
        if not clean:
            self._delete_metadata(bucket_name, key)
            return
        self._write_index_entry(bucket_name, key, {"metadata": clean})
        old_meta = self._metadata_file(bucket_name, key)
        try:
            if old_meta.exists():
                old_meta.unlink()
        except OSError:
            pass

    def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
        bucket_path = self._bucket_path(bucket_name)
        source = bucket_path / key
        if not source.exists():
            return
        version_dir = self._version_dir(bucket_name, key)
        version_dir.mkdir(parents=True, exist_ok=True)
        now = _utcnow()
        version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}"
        data_path = version_dir / f"{version_id}.bin"
        shutil.copy2(source, data_path)
        metadata = self._read_metadata(bucket_name, key)
        record = {
            "version_id": version_id,
            "key": key.as_posix(),
            "size": source.stat().st_size,
            "archived_at": now.isoformat().replace("+00:00", "Z"),
            "etag": self._compute_etag(source),
            "metadata": metadata or {},
            "reason": reason,
        }
        manifest_path = version_dir / f"{version_id}.json"
        self._atomic_write_json(manifest_path, record)

    def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
        entry = self._read_index_entry(bucket_name, key)
        if entry is not None:
            data = entry.get("metadata")
            return data if isinstance(data, dict) else {}
        for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
            if not meta_file.exists():
                continue
            try:
                payload = json.loads(meta_file.read_text(encoding="utf-8"))
                data = payload.get("metadata")
                return data if isinstance(data, dict) else {}
            except (OSError, json.JSONDecodeError):
                return {}
        return {}

    def _safe_unlink(self, path: Path) -> None:
        attempts = 3
        last_error: PermissionError | None = None
        for attempt in range(attempts):
            try:
                path.unlink()
                return
            except FileNotFoundError:
                return
            except PermissionError as exc:
                last_error = exc
                if os.name == "nt":
                    time.sleep(0.15 * (attempt + 1))
            except OSError as exc:
                raise StorageError(f"Unable to delete object: {exc}") from exc
        message = "Object file is currently in use. Close active previews or wait and try again."
        raise StorageError(message) from last_error

    def _delete_metadata(self, bucket_name: str, key: Path) -> None:
        self._delete_index_entry(bucket_name, key)
        locations = (
            (self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
            (self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
        )
        for meta_file, meta_root in locations:
            try:
                if meta_file.exists():
                    meta_file.unlink()
                    parent = meta_file.parent
                    while parent != meta_root and parent.exists() and not any(parent.iterdir()):
                        parent.rmdir()
                        parent = parent.parent
            except OSError:
                continue

    def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
        bucket_name = bucket_path.name

        if _HAS_RUST:
            return _rc.check_bucket_contents(
                str(bucket_path),
                [
                    str(self._bucket_versions_root(bucket_name)),
                    str(self._legacy_versions_root(bucket_name)),
                ],
                [
                    str(self._multipart_bucket_root(bucket_name)),
                    str(self._legacy_multipart_bucket_root(bucket_name)),
                ],
            )

        has_objects = False
        has_versions = False
        has_multipart = False

        for path in bucket_path.rglob("*"):
            if has_objects:
                break
            if not path.is_file():
                continue
            rel = path.relative_to(bucket_path)
            if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
                continue
            has_objects = True

        for version_root in (
            self._bucket_versions_root(bucket_name),
            self._legacy_versions_root(bucket_name),
        ):
            if has_versions:
                break
            if version_root.exists():
                for path in version_root.rglob("*"):
                    if path.is_file():
                        has_versions = True
                        break

        for uploads_root in (
            self._multipart_bucket_root(bucket_name),
            self._legacy_multipart_bucket_root(bucket_name),
        ):
            if has_multipart:
                break
            if uploads_root.exists():
                for path in uploads_root.rglob("*"):
                    if path.is_file():
                        has_multipart = True
                        break

        return has_objects, has_versions, has_multipart

    def _has_visible_objects(self, bucket_path: Path) -> bool:
        has_objects, _, _ = self._check_bucket_contents(bucket_path)
        return has_objects

    def _has_archived_versions(self, bucket_path: Path) -> bool:
        _, has_versions, _ = self._check_bucket_contents(bucket_path)
        return has_versions

    def _has_active_multipart_uploads(self, bucket_path: Path) -> bool:
        _, _, has_multipart = self._check_bucket_contents(bucket_path)
        return has_multipart

    def _remove_tree(self, path: Path) -> None:
        if not path.exists():
            return
        def _handle_error(func, target_path, exc_info):
            try:
                os.chmod(target_path, stat.S_IRWXU)
                func(target_path)
            except Exception as exc:
                raise StorageError(f"Unable to delete bucket contents: {exc}") from exc

        try:
            shutil.rmtree(path, onerror=_handle_error)
        except FileNotFoundError:
            return
        except PermissionError as exc:
            raise StorageError("Bucket in use. Close open files and try again") from exc

    @staticmethod
    def _sanitize_bucket_name(bucket_name: str) -> str:
        if not bucket_name:
            raise StorageError("Bucket name required")

        name = bucket_name.lower()
        if len(name) < 3 or len(name) > 63:
            raise StorageError("Bucket name must be between 3 and 63 characters")

        if name.startswith("-") or name.endswith("-"):
            raise StorageError("Bucket name cannot start or end with a hyphen")

        if ".." in name:
            raise StorageError("Bucket name cannot contain consecutive periods")

        if name.startswith("xn--"):
            raise StorageError("Bucket name cannot start with 'xn--'")

        if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name):
            raise StorageError("Bucket name cannot be formatted like an IP address")

        if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name):
            raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens")

        return name

    @staticmethod
    def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path:
        if _HAS_RUST:
            error = _rc.validate_object_key(object_key, max_length_bytes, os.name == "nt")
            if error:
                raise StorageError(error)
            normalized = unicodedata.normalize("NFC", object_key)
            candidate = Path(normalized)
            if candidate.is_absolute():
                raise StorageError("Absolute object keys are not allowed")
            if getattr(candidate, "drive", ""):
                raise StorageError("Object key cannot include a drive letter")
            return Path(*candidate.parts) if candidate.parts else candidate

        if not object_key:
            raise StorageError("Object key required")
        if "\x00" in object_key:
            raise StorageError("Object key contains null bytes")
        object_key = unicodedata.normalize("NFC", object_key)
        if len(object_key.encode("utf-8")) > max_length_bytes:
            raise StorageError(f"Object key exceeds maximum length of {max_length_bytes} bytes")
        if object_key.startswith(("/", "\\")):
            raise StorageError("Object key cannot start with a slash")

        candidate = Path(object_key)
        if ".." in candidate.parts:
            raise StorageError("Object key contains parent directory references")

        if candidate.is_absolute():
            raise StorageError("Absolute object keys are not allowed")
        if getattr(candidate, "drive", ""):
            raise StorageError("Object key cannot include a drive letter")
        parts = []
        for part in candidate.parts:
            if part in ("", ".", ".."):
                raise StorageError("Object key contains invalid segments")
            if any(ord(ch) < 32 for ch in part):
                raise StorageError("Object key contains control characters")
            if os.name == "nt":
                if any(ch in part for ch in "<>:\"/\\|?*"):
                    raise StorageError("Object key contains characters not supported on Windows filesystems")
                if part.endswith((" ", ".")):
                    raise StorageError("Object key segments cannot end with spaces or periods on Windows")
                trimmed = part.upper().rstrip(". ")
                if trimmed in WINDOWS_RESERVED_NAMES:
                    raise StorageError(f"Invalid filename segment: {part}")
            parts.append(part)
        if parts:
            top_level = parts[0]
            if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT:
                raise StorageError("Object key uses a reserved prefix")
        return Path(*parts)

    @staticmethod
    def _compute_etag(path: Path) -> str:
        if _HAS_RUST:
            return _rc.md5_file(str(path))
        checksum = hashlib.md5()
        with path.open("rb") as handle:
            for chunk in iter(lambda: handle.read(8192), b""):
                checksum.update(chunk)
        return checksum.hexdigest()


class _HashingReader:
    """Wraps a binary stream, updating the checksum as it is read."""

    def __init__(self, stream: BinaryIO, checksum: Any) -> None:
        self.stream = stream
        self.checksum = checksum

    def read(self, size: int = -1) -> bytes:
        data = self.stream.read(size)
        if data:
            self.checksum.update(data)
        return data