Revamp object bucket browser logic; Add new tests

2025-12-22 10:04:36 +08:00
parent 97860669ec
commit 97435f15e5
17 changed files with 800 additions and 336 deletions
--- a/app/encrypted_storage.py
+++ b/app/encrypted_storage.py
@@ -188,8 +188,11 @@ class EncryptedObjectStorage:
    def bucket_stats(self, bucket_name: str, cache_ttl: int = 60):
        return self.storage.bucket_stats(bucket_name, cache_ttl)
    
-    def list_objects(self, bucket_name: str):
-        return self.storage.list_objects(bucket_name)
+    def list_objects(self, bucket_name: str, **kwargs):
+        return self.storage.list_objects(bucket_name, **kwargs)
+    
+    def list_objects_all(self, bucket_name: str):
+        return self.storage.list_objects_all(bucket_name)
    
    def get_object_path(self, bucket_name: str, object_key: str):
        return self.storage.get_object_path(bucket_name, object_key)
--- a/app/replication.py
+++ b/app/replication.py
@@ -155,7 +155,7 @@ class ReplicationManager:
        
        try:
            # Get source objects
-            source_objects = self.storage.list_objects(bucket_name)
+            source_objects = self.storage.list_objects_all(bucket_name)
            source_keys = {obj.key: obj.size for obj in source_objects}
            
            # Get destination objects
@@ -219,7 +219,7 @@ class ReplicationManager:
            return
        
        try:
-            objects = self.storage.list_objects(bucket_name)
+            objects = self.storage.list_objects_all(bucket_name)
            logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
            for obj in objects:
                self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write")
--- a/app/s3_api.py
+++ b/app/s3_api.py
@@ -1155,7 +1155,7 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
    storage = _storage()
    
    try:
-        objects = storage.list_objects(bucket_name)
+        objects = storage.list_objects_all(bucket_name)
    except StorageError as exc:
        return _error_response("NoSuchBucket", str(exc), 404)
    
@@ -1651,7 +1651,7 @@ def bucket_handler(bucket_name: str) -> Response:
            return error
        return _error_response("AccessDenied", str(exc), 403)
    try:
-        objects = storage.list_objects(bucket_name)
+        objects = storage.list_objects_all(bucket_name)
    except StorageError as exc:
        return _error_response("NoSuchBucket", str(exc), 404)

--- a/app/storage.py
+++ b/app/storage.py
@@ -99,6 +99,15 @@ class BucketMeta:
    created_at: datetime


+@dataclass
+class ListObjectsResult:
+    """Paginated result for object listing."""
+    objects: List[ObjectMeta]
+    is_truncated: bool
+    next_continuation_token: Optional[str]
+    total_count: Optional[int] = None  # Total objects in bucket (from stats cache)
+
+
 def _utcnow() -> datetime:
    return datetime.now(timezone.utc)

@@ -241,31 +250,105 @@ class ObjectStorage:
        self._remove_tree(self._system_bucket_root(bucket_path.name))
        self._remove_tree(self._multipart_bucket_root(bucket_path.name))

-    def list_objects(self, bucket_name: str) -> List[ObjectMeta]:
+    def list_objects(
+        self,
+        bucket_name: str,
+        *,
+        max_keys: int = 1000,
+        continuation_token: Optional[str] = None,
+        prefix: Optional[str] = None,
+    ) -> ListObjectsResult:
+        """List objects in a bucket with pagination support.
+        
+        Args:
+            bucket_name: Name of the bucket
+            max_keys: Maximum number of objects to return (default 1000)
+            continuation_token: Token from previous request for pagination
+            prefix: Filter objects by key prefix
+            
+        Returns:
+            ListObjectsResult with objects, truncation status, and continuation token
+        """
        bucket_path = self._bucket_path(bucket_name)
        if not bucket_path.exists():
            raise StorageError("Bucket does not exist")
        bucket_id = bucket_path.name

-        objects: List[ObjectMeta] = []
+        # Collect all matching object keys first (lightweight - just paths)
+        all_keys: List[str] = []
        for path in bucket_path.rglob("*"):
            if path.is_file():
-                stat = path.stat()
                rel = path.relative_to(bucket_path)
                if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
                    continue
-                metadata = self._read_metadata(bucket_id, rel)
+                key = str(rel.as_posix())
+                if prefix and not key.startswith(prefix):
+                    continue
+                all_keys.append(key)
+        
+        all_keys.sort()
+        total_count = len(all_keys)
+        
+        # Handle continuation token (the key to start after)
+        start_index = 0
+        if continuation_token:
+            try:
+                # continuation_token is the last key from previous page
+                for i, key in enumerate(all_keys):
+                    if key > continuation_token:
+                        start_index = i
+                        break
+                else:
+                    # Token is past all keys
+                    return ListObjectsResult(
+                        objects=[],
+                        is_truncated=False,
+                        next_continuation_token=None,
+                        total_count=total_count,
+                    )
+            except Exception:
+                pass  # Invalid token, start from beginning
+        
+        # Get the slice we need
+        end_index = start_index + max_keys
+        keys_slice = all_keys[start_index:end_index]
+        is_truncated = end_index < total_count
+        
+        # Now load full metadata only for the objects we're returning
+        objects: List[ObjectMeta] = []
+        for key in keys_slice:
+            safe_key = self._sanitize_object_key(key)
+            path = bucket_path / safe_key
+            if not path.exists():
+                continue  # Object may have been deleted
+            try:
+                stat = path.stat()
+                metadata = self._read_metadata(bucket_id, safe_key)
                objects.append(
                    ObjectMeta(
-                        key=str(rel.as_posix()),
+                        key=key,
                        size=stat.st_size,
                        last_modified=datetime.fromtimestamp(stat.st_mtime),
                        etag=self._compute_etag(path),
                        metadata=metadata or None,
                    )
                )
-        objects.sort(key=lambda meta: meta.key)
-        return objects
+            except OSError:
+                continue  # File may have been deleted during iteration
+        
+        next_token = keys_slice[-1] if is_truncated and keys_slice else None
+        
+        return ListObjectsResult(
+            objects=objects,
+            is_truncated=is_truncated,
+            next_continuation_token=next_token,
+            total_count=total_count,
+        )
+
+    def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]:
+        """List all objects in a bucket (no pagination). Use with caution for large buckets."""
+        result = self.list_objects(bucket_name, max_keys=100000)
+        return result.objects

    def put_object(
        self,
--- a/app/ui.py
+++ b/app/ui.py
@@ -294,7 +294,9 @@ def bucket_detail(bucket_name: str):
    storage = _storage()
    try:
        _authorize_ui(principal, bucket_name, "list")
-        objects = storage.list_objects(bucket_name)
+        # Don't load objects here - UI fetches them asynchronously via /buckets/<name>/objects
+        if not storage.bucket_exists(bucket_name):
+            raise StorageError("Bucket does not exist")
    except (StorageError, IamError) as exc:
        flash(_friendly_error_message(exc), "danger")
        return redirect(url_for("ui.buckets_overview"))
@@ -382,10 +384,13 @@ def bucket_detail(bucket_name: str):
    except IamError:
        pass

+    # Pass the objects API endpoint URL for async loading
+    objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
+
    return render_template(
        "bucket_detail.html",
        bucket_name=bucket_name,
-        objects=objects,
+        objects_api_url=objects_api_url,
        principal=principal,
        bucket_policy_text=policy_text,
        bucket_policy=bucket_policy,
@@ -408,6 +413,61 @@ def bucket_detail(bucket_name: str):
    )


+@ui_bp.get("/buckets/<bucket_name>/objects")
+def list_bucket_objects(bucket_name: str):
+    """API endpoint for paginated object listing."""
+    principal = _current_principal()
+    storage = _storage()
+    try:
+        _authorize_ui(principal, bucket_name, "list")
+    except IamError as exc:
+        return jsonify({"error": str(exc)}), 403
+
+    max_keys = min(int(request.args.get("max_keys", 100)), 1000)
+    continuation_token = request.args.get("continuation_token") or None
+    prefix = request.args.get("prefix") or None
+
+    try:
+        result = storage.list_objects(
+            bucket_name,
+            max_keys=max_keys,
+            continuation_token=continuation_token,
+            prefix=prefix,
+        )
+    except StorageError as exc:
+        return jsonify({"error": str(exc)}), 400
+
+    try:
+        versioning_enabled = storage.is_versioning_enabled(bucket_name)
+    except StorageError:
+        versioning_enabled = False
+
+    objects_data = []
+    for obj in result.objects:
+        objects_data.append({
+            "key": obj.key,
+            "size": obj.size,
+            "last_modified": obj.last_modified.isoformat(),
+            "last_modified_display": obj.last_modified.strftime("%b %d, %Y %H:%M"),
+            "etag": obj.etag,
+            "metadata": obj.metadata or {},
+            "preview_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key),
+            "download_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key) + "?download=1",
+            "presign_endpoint": url_for("ui.object_presign", bucket_name=bucket_name, object_key=obj.key),
+            "delete_endpoint": url_for("ui.delete_object", bucket_name=bucket_name, object_key=obj.key),
+            "versions_endpoint": url_for("ui.object_versions", bucket_name=bucket_name, object_key=obj.key),
+            "restore_template": url_for("ui.restore_object_version", bucket_name=bucket_name, object_key=obj.key, version_id="VERSION_ID_PLACEHOLDER"),
+        })
+
+    return jsonify({
+        "objects": objects_data,
+        "is_truncated": result.is_truncated,
+        "next_continuation_token": result.next_continuation_token,
+        "total_count": result.total_count,
+        "versioning_enabled": versioning_enabled,
+    })
+
+
@ui_bp.post("/buckets/<bucket_name>/upload")
@limiter.limit("30 per minute")
 def upload_object(bucket_name: str):
--- a/app/version.py
+++ b/app/version.py
@@ -1,7 +1,7 @@
 """Central location for the application version string."""
 from __future__ import annotations

-APP_VERSION = "0.1.6"
+APP_VERSION = "0.1.7"


 def get_version() -> str: