From 97435f15e588186523b6b8a3210f3828601555bc Mon Sep 17 00:00:00 2001 From: kqjy Date: Mon, 22 Dec 2025 10:04:36 +0800 Subject: [PATCH 1/4] Revamp object bucket browser logic; Add new tests --- app/encrypted_storage.py | 7 +- app/replication.py | 4 +- app/s3_api.py | 4 +- app/storage.py | 97 ++++- app/ui.py | 64 ++- app/version.py | 2 +- pytest.ini | 3 + templates/base.html | 11 +- templates/bucket_detail.html | 712 ++++++++++++++++++++------------- templates/buckets.html | 7 +- templates/connections.html | 4 - templates/iam.html | 18 +- tests/test_edge_cases.py | 2 +- tests/test_storage_features.py | 2 +- tests/test_ui_bulk_delete.py | 4 +- tests/test_ui_pagination.py | 183 +++++++++ tests/test_ui_policy.py | 12 +- 17 files changed, 800 insertions(+), 336 deletions(-) create mode 100644 pytest.ini create mode 100644 tests/test_ui_pagination.py diff --git a/app/encrypted_storage.py b/app/encrypted_storage.py index 2acc679..372dd1c 100644 --- a/app/encrypted_storage.py +++ b/app/encrypted_storage.py @@ -188,8 +188,11 @@ class EncryptedObjectStorage: def bucket_stats(self, bucket_name: str, cache_ttl: int = 60): return self.storage.bucket_stats(bucket_name, cache_ttl) - def list_objects(self, bucket_name: str): - return self.storage.list_objects(bucket_name) + def list_objects(self, bucket_name: str, **kwargs): + return self.storage.list_objects(bucket_name, **kwargs) + + def list_objects_all(self, bucket_name: str): + return self.storage.list_objects_all(bucket_name) def get_object_path(self, bucket_name: str, object_key: str): return self.storage.get_object_path(bucket_name, object_key) diff --git a/app/replication.py b/app/replication.py index 6c30a7b..a48296a 100644 --- a/app/replication.py +++ b/app/replication.py @@ -155,7 +155,7 @@ class ReplicationManager: try: # Get source objects - source_objects = self.storage.list_objects(bucket_name) + source_objects = self.storage.list_objects_all(bucket_name) source_keys = {obj.key: obj.size for obj in source_objects} # Get destination objects @@ -219,7 +219,7 @@ class ReplicationManager: return try: - objects = self.storage.list_objects(bucket_name) + objects = self.storage.list_objects_all(bucket_name) logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}") for obj in objects: self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write") diff --git a/app/s3_api.py b/app/s3_api.py index f67d816..9406206 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1155,7 +1155,7 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response: storage = _storage() try: - objects = storage.list_objects(bucket_name) + objects = storage.list_objects_all(bucket_name) except StorageError as exc: return _error_response("NoSuchBucket", str(exc), 404) @@ -1651,7 +1651,7 @@ def bucket_handler(bucket_name: str) -> Response: return error return _error_response("AccessDenied", str(exc), 403) try: - objects = storage.list_objects(bucket_name) + objects = storage.list_objects_all(bucket_name) except StorageError as exc: return _error_response("NoSuchBucket", str(exc), 404) diff --git a/app/storage.py b/app/storage.py index 7f91596..4875519 100644 --- a/app/storage.py +++ b/app/storage.py @@ -99,6 +99,15 @@ class BucketMeta: created_at: datetime +@dataclass +class ListObjectsResult: + """Paginated result for object listing.""" + objects: List[ObjectMeta] + is_truncated: bool + next_continuation_token: Optional[str] + total_count: Optional[int] = None # Total objects in bucket (from stats cache) + + def _utcnow() -> datetime: return datetime.now(timezone.utc) @@ -241,31 +250,105 @@ class ObjectStorage: self._remove_tree(self._system_bucket_root(bucket_path.name)) self._remove_tree(self._multipart_bucket_root(bucket_path.name)) - def list_objects(self, bucket_name: str) -> List[ObjectMeta]: + def list_objects( + self, + bucket_name: str, + *, + max_keys: int = 1000, + continuation_token: Optional[str] = None, + prefix: Optional[str] = None, + ) -> ListObjectsResult: + """List objects in a bucket with pagination support. + + Args: + bucket_name: Name of the bucket + max_keys: Maximum number of objects to return (default 1000) + continuation_token: Token from previous request for pagination + prefix: Filter objects by key prefix + + Returns: + ListObjectsResult with objects, truncation status, and continuation token + """ bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): raise StorageError("Bucket does not exist") bucket_id = bucket_path.name - objects: List[ObjectMeta] = [] + # Collect all matching object keys first (lightweight - just paths) + all_keys: List[str] = [] for path in bucket_path.rglob("*"): if path.is_file(): - stat = path.stat() rel = path.relative_to(bucket_path) if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: continue - metadata = self._read_metadata(bucket_id, rel) + key = str(rel.as_posix()) + if prefix and not key.startswith(prefix): + continue + all_keys.append(key) + + all_keys.sort() + total_count = len(all_keys) + + # Handle continuation token (the key to start after) + start_index = 0 + if continuation_token: + try: + # continuation_token is the last key from previous page + for i, key in enumerate(all_keys): + if key > continuation_token: + start_index = i + break + else: + # Token is past all keys + return ListObjectsResult( + objects=[], + is_truncated=False, + next_continuation_token=None, + total_count=total_count, + ) + except Exception: + pass # Invalid token, start from beginning + + # Get the slice we need + end_index = start_index + max_keys + keys_slice = all_keys[start_index:end_index] + is_truncated = end_index < total_count + + # Now load full metadata only for the objects we're returning + objects: List[ObjectMeta] = [] + for key in keys_slice: + safe_key = self._sanitize_object_key(key) + path = bucket_path / safe_key + if not path.exists(): + continue # Object may have been deleted + try: + stat = path.stat() + metadata = self._read_metadata(bucket_id, safe_key) objects.append( ObjectMeta( - key=str(rel.as_posix()), + key=key, size=stat.st_size, last_modified=datetime.fromtimestamp(stat.st_mtime), etag=self._compute_etag(path), metadata=metadata or None, ) ) - objects.sort(key=lambda meta: meta.key) - return objects + except OSError: + continue # File may have been deleted during iteration + + next_token = keys_slice[-1] if is_truncated and keys_slice else None + + return ListObjectsResult( + objects=objects, + is_truncated=is_truncated, + next_continuation_token=next_token, + total_count=total_count, + ) + + def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]: + """List all objects in a bucket (no pagination). Use with caution for large buckets.""" + result = self.list_objects(bucket_name, max_keys=100000) + return result.objects def put_object( self, diff --git a/app/ui.py b/app/ui.py index 96f807a..2d2936f 100644 --- a/app/ui.py +++ b/app/ui.py @@ -294,7 +294,9 @@ def bucket_detail(bucket_name: str): storage = _storage() try: _authorize_ui(principal, bucket_name, "list") - objects = storage.list_objects(bucket_name) + # Don't load objects here - UI fetches them asynchronously via /buckets//objects + if not storage.bucket_exists(bucket_name): + raise StorageError("Bucket does not exist") except (StorageError, IamError) as exc: flash(_friendly_error_message(exc), "danger") return redirect(url_for("ui.buckets_overview")) @@ -382,10 +384,13 @@ def bucket_detail(bucket_name: str): except IamError: pass + # Pass the objects API endpoint URL for async loading + objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name) + return render_template( "bucket_detail.html", bucket_name=bucket_name, - objects=objects, + objects_api_url=objects_api_url, principal=principal, bucket_policy_text=policy_text, bucket_policy=bucket_policy, @@ -408,6 +413,61 @@ def bucket_detail(bucket_name: str): ) +@ui_bp.get("/buckets//objects") +def list_bucket_objects(bucket_name: str): + """API endpoint for paginated object listing.""" + principal = _current_principal() + storage = _storage() + try: + _authorize_ui(principal, bucket_name, "list") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + + max_keys = min(int(request.args.get("max_keys", 100)), 1000) + continuation_token = request.args.get("continuation_token") or None + prefix = request.args.get("prefix") or None + + try: + result = storage.list_objects( + bucket_name, + max_keys=max_keys, + continuation_token=continuation_token, + prefix=prefix, + ) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + + try: + versioning_enabled = storage.is_versioning_enabled(bucket_name) + except StorageError: + versioning_enabled = False + + objects_data = [] + for obj in result.objects: + objects_data.append({ + "key": obj.key, + "size": obj.size, + "last_modified": obj.last_modified.isoformat(), + "last_modified_display": obj.last_modified.strftime("%b %d, %Y %H:%M"), + "etag": obj.etag, + "metadata": obj.metadata or {}, + "preview_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key), + "download_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key) + "?download=1", + "presign_endpoint": url_for("ui.object_presign", bucket_name=bucket_name, object_key=obj.key), + "delete_endpoint": url_for("ui.delete_object", bucket_name=bucket_name, object_key=obj.key), + "versions_endpoint": url_for("ui.object_versions", bucket_name=bucket_name, object_key=obj.key), + "restore_template": url_for("ui.restore_object_version", bucket_name=bucket_name, object_key=obj.key, version_id="VERSION_ID_PLACEHOLDER"), + }) + + return jsonify({ + "objects": objects_data, + "is_truncated": result.is_truncated, + "next_continuation_token": result.next_continuation_token, + "total_count": result.total_count, + "versioning_enabled": versioning_enabled, + }) + + @ui_bp.post("/buckets//upload") @limiter.limit("30 per minute") def upload_object(bucket_name: str): diff --git a/app/version.py b/app/version.py index 78d120a..bd1e371 100644 --- a/app/version.py +++ b/app/version.py @@ -1,7 +1,7 @@ """Central location for the application version string.""" from __future__ import annotations -APP_VERSION = "0.1.6" +APP_VERSION = "0.1.7" def get_version() -> str: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..3271334 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +norecursedirs = data .git __pycache__ .venv diff --git a/templates/base.html b/templates/base.html index 67f71b5..e3c3055 100644 --- a/templates/base.html +++ b/templates/base.html @@ -199,7 +199,7 @@ })();