diff --git a/README.md b/README.md index 1f6519d..a7ae1f4 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ MyFSIO is a batteries-included, Flask-based recreation of Amazon S3 and IAM work - **IAM + access keys:** Users, access keys, key rotation, and bucket-scoped actions (`list/read/write/delete/policy`) now live in `data/.myfsio.sys/config/iam.json` and are editable from the IAM dashboard. - **Bucket policies + hot reload:** `data/.myfsio.sys/config/bucket_policies.json` uses AWS' policy grammar (Version `2012-10-17`) with a built-in watcher, so editing the JSON file applies immediately. The UI also ships Public/Private/Custom presets for faster edits. - **Presigned URLs everywhere:** Signature Version 4 presigned URLs respect IAM + bucket policies and replace the now-removed "share link" feature for public access scenarios. -- **Modern UI:** Responsive tables, quick filters, preview sidebar, object-level delete buttons, a presign modal, and an inline JSON policy editor that respects dark mode keep bucket management friendly. +- **Modern UI:** Responsive tables, quick filters, preview sidebar, object-level delete buttons, a presign modal, and an inline JSON policy editor that respects dark mode keep bucket management friendly. The object browser supports folder navigation, infinite scroll pagination, bulk operations, and automatic retry on load failures. - **Tests & health:** `/healthz` for smoke checks and `pytest` coverage for IAM, CRUD, presign, and policy flows. ## Architecture at a Glance diff --git a/app/encrypted_storage.py b/app/encrypted_storage.py index 2acc679..372dd1c 100644 --- a/app/encrypted_storage.py +++ b/app/encrypted_storage.py @@ -188,8 +188,11 @@ class EncryptedObjectStorage: def bucket_stats(self, bucket_name: str, cache_ttl: int = 60): return self.storage.bucket_stats(bucket_name, cache_ttl) - def list_objects(self, bucket_name: str): - return self.storage.list_objects(bucket_name) + def list_objects(self, bucket_name: str, **kwargs): + return self.storage.list_objects(bucket_name, **kwargs) + + def list_objects_all(self, bucket_name: str): + return self.storage.list_objects_all(bucket_name) def get_object_path(self, bucket_name: str, object_key: str): return self.storage.get_object_path(bucket_name, object_key) diff --git a/app/replication.py b/app/replication.py index 6c30a7b..a48296a 100644 --- a/app/replication.py +++ b/app/replication.py @@ -155,7 +155,7 @@ class ReplicationManager: try: # Get source objects - source_objects = self.storage.list_objects(bucket_name) + source_objects = self.storage.list_objects_all(bucket_name) source_keys = {obj.key: obj.size for obj in source_objects} # Get destination objects @@ -219,7 +219,7 @@ class ReplicationManager: return try: - objects = self.storage.list_objects(bucket_name) + objects = self.storage.list_objects_all(bucket_name) logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}") for obj in objects: self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write") diff --git a/app/s3_api.py b/app/s3_api.py index f67d816..9406206 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1155,7 +1155,7 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response: storage = _storage() try: - objects = storage.list_objects(bucket_name) + objects = storage.list_objects_all(bucket_name) except StorageError as exc: return _error_response("NoSuchBucket", str(exc), 404) @@ -1651,7 +1651,7 @@ def bucket_handler(bucket_name: str) -> Response: return error return _error_response("AccessDenied", str(exc), 403) try: - objects = storage.list_objects(bucket_name) + objects = storage.list_objects_all(bucket_name) except StorageError as exc: return _error_response("NoSuchBucket", str(exc), 404) diff --git a/app/storage.py b/app/storage.py index 7f91596..4875519 100644 --- a/app/storage.py +++ b/app/storage.py @@ -99,6 +99,15 @@ class BucketMeta: created_at: datetime +@dataclass +class ListObjectsResult: + """Paginated result for object listing.""" + objects: List[ObjectMeta] + is_truncated: bool + next_continuation_token: Optional[str] + total_count: Optional[int] = None # Total objects in bucket (from stats cache) + + def _utcnow() -> datetime: return datetime.now(timezone.utc) @@ -241,31 +250,105 @@ class ObjectStorage: self._remove_tree(self._system_bucket_root(bucket_path.name)) self._remove_tree(self._multipart_bucket_root(bucket_path.name)) - def list_objects(self, bucket_name: str) -> List[ObjectMeta]: + def list_objects( + self, + bucket_name: str, + *, + max_keys: int = 1000, + continuation_token: Optional[str] = None, + prefix: Optional[str] = None, + ) -> ListObjectsResult: + """List objects in a bucket with pagination support. + + Args: + bucket_name: Name of the bucket + max_keys: Maximum number of objects to return (default 1000) + continuation_token: Token from previous request for pagination + prefix: Filter objects by key prefix + + Returns: + ListObjectsResult with objects, truncation status, and continuation token + """ bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): raise StorageError("Bucket does not exist") bucket_id = bucket_path.name - objects: List[ObjectMeta] = [] + # Collect all matching object keys first (lightweight - just paths) + all_keys: List[str] = [] for path in bucket_path.rglob("*"): if path.is_file(): - stat = path.stat() rel = path.relative_to(bucket_path) if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: continue - metadata = self._read_metadata(bucket_id, rel) + key = str(rel.as_posix()) + if prefix and not key.startswith(prefix): + continue + all_keys.append(key) + + all_keys.sort() + total_count = len(all_keys) + + # Handle continuation token (the key to start after) + start_index = 0 + if continuation_token: + try: + # continuation_token is the last key from previous page + for i, key in enumerate(all_keys): + if key > continuation_token: + start_index = i + break + else: + # Token is past all keys + return ListObjectsResult( + objects=[], + is_truncated=False, + next_continuation_token=None, + total_count=total_count, + ) + except Exception: + pass # Invalid token, start from beginning + + # Get the slice we need + end_index = start_index + max_keys + keys_slice = all_keys[start_index:end_index] + is_truncated = end_index < total_count + + # Now load full metadata only for the objects we're returning + objects: List[ObjectMeta] = [] + for key in keys_slice: + safe_key = self._sanitize_object_key(key) + path = bucket_path / safe_key + if not path.exists(): + continue # Object may have been deleted + try: + stat = path.stat() + metadata = self._read_metadata(bucket_id, safe_key) objects.append( ObjectMeta( - key=str(rel.as_posix()), + key=key, size=stat.st_size, last_modified=datetime.fromtimestamp(stat.st_mtime), etag=self._compute_etag(path), metadata=metadata or None, ) ) - objects.sort(key=lambda meta: meta.key) - return objects + except OSError: + continue # File may have been deleted during iteration + + next_token = keys_slice[-1] if is_truncated and keys_slice else None + + return ListObjectsResult( + objects=objects, + is_truncated=is_truncated, + next_continuation_token=next_token, + total_count=total_count, + ) + + def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]: + """List all objects in a bucket (no pagination). Use with caution for large buckets.""" + result = self.list_objects(bucket_name, max_keys=100000) + return result.objects def put_object( self, diff --git a/app/ui.py b/app/ui.py index 96f807a..2d2936f 100644 --- a/app/ui.py +++ b/app/ui.py @@ -294,7 +294,9 @@ def bucket_detail(bucket_name: str): storage = _storage() try: _authorize_ui(principal, bucket_name, "list") - objects = storage.list_objects(bucket_name) + # Don't load objects here - UI fetches them asynchronously via /buckets//objects + if not storage.bucket_exists(bucket_name): + raise StorageError("Bucket does not exist") except (StorageError, IamError) as exc: flash(_friendly_error_message(exc), "danger") return redirect(url_for("ui.buckets_overview")) @@ -382,10 +384,13 @@ def bucket_detail(bucket_name: str): except IamError: pass + # Pass the objects API endpoint URL for async loading + objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name) + return render_template( "bucket_detail.html", bucket_name=bucket_name, - objects=objects, + objects_api_url=objects_api_url, principal=principal, bucket_policy_text=policy_text, bucket_policy=bucket_policy, @@ -408,6 +413,61 @@ def bucket_detail(bucket_name: str): ) +@ui_bp.get("/buckets//objects") +def list_bucket_objects(bucket_name: str): + """API endpoint for paginated object listing.""" + principal = _current_principal() + storage = _storage() + try: + _authorize_ui(principal, bucket_name, "list") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + + max_keys = min(int(request.args.get("max_keys", 100)), 1000) + continuation_token = request.args.get("continuation_token") or None + prefix = request.args.get("prefix") or None + + try: + result = storage.list_objects( + bucket_name, + max_keys=max_keys, + continuation_token=continuation_token, + prefix=prefix, + ) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + + try: + versioning_enabled = storage.is_versioning_enabled(bucket_name) + except StorageError: + versioning_enabled = False + + objects_data = [] + for obj in result.objects: + objects_data.append({ + "key": obj.key, + "size": obj.size, + "last_modified": obj.last_modified.isoformat(), + "last_modified_display": obj.last_modified.strftime("%b %d, %Y %H:%M"), + "etag": obj.etag, + "metadata": obj.metadata or {}, + "preview_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key), + "download_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key) + "?download=1", + "presign_endpoint": url_for("ui.object_presign", bucket_name=bucket_name, object_key=obj.key), + "delete_endpoint": url_for("ui.delete_object", bucket_name=bucket_name, object_key=obj.key), + "versions_endpoint": url_for("ui.object_versions", bucket_name=bucket_name, object_key=obj.key), + "restore_template": url_for("ui.restore_object_version", bucket_name=bucket_name, object_key=obj.key, version_id="VERSION_ID_PLACEHOLDER"), + }) + + return jsonify({ + "objects": objects_data, + "is_truncated": result.is_truncated, + "next_continuation_token": result.next_continuation_token, + "total_count": result.total_count, + "versioning_enabled": versioning_enabled, + }) + + @ui_bp.post("/buckets//upload") @limiter.limit("30 per minute") def upload_object(bucket_name: str): diff --git a/app/version.py b/app/version.py index 78d120a..bd1e371 100644 --- a/app/version.py +++ b/app/version.py @@ -1,7 +1,7 @@ """Central location for the application version string.""" from __future__ import annotations -APP_VERSION = "0.1.6" +APP_VERSION = "0.1.7" def get_version() -> str: diff --git a/docs.md b/docs.md index 2f9c3d7..6515b6c 100644 --- a/docs.md +++ b/docs.md @@ -341,6 +341,7 @@ Before upgrading across major versions, verify compatibility: | From Version | To Version | Breaking Changes | Migration Required | |--------------|------------|------------------|-------------------| | 0.1.x | 0.2.x | None expected | No | +| 0.1.6 | 0.1.7 | None | No | | < 0.1.0 | >= 0.1.0 | New IAM config format | Yes - run migration script | **Automatic compatibility detection:** @@ -634,6 +635,48 @@ curl -X PUT http://127.0.0.1:5000/bucket-policy/test \ The UI will reflect this change as soon as the request completes thanks to the hot reload. +### UI Object Browser + +The bucket detail page includes a powerful object browser with the following features: + +#### Folder Navigation + +Objects with forward slashes (`/`) in their keys are displayed as a folder hierarchy. Click a folder row to navigate into it. A breadcrumb navigation bar shows your current path and allows quick navigation back to parent folders or the root. + +#### Pagination & Infinite Scroll + +- Objects load in configurable batches (50, 100, 150, 200, or 250 per page) +- Scroll to the bottom to automatically load more objects (infinite scroll) +- A **Load more** button is available as a fallback for touch devices or when infinite scroll doesn't trigger +- The footer shows the current load status (e.g., "Showing 100 of 500 objects") + +#### Bulk Operations + +- Select multiple objects using checkboxes +- **Bulk Delete**: Delete multiple objects at once +- **Bulk Download**: Download selected objects as individual files + +#### Search & Filter + +Use the search box to filter objects by name in real-time. The filter applies to the currently loaded objects. + +#### Error Handling + +If object loading fails (e.g., network error), a friendly error message is displayed with a **Retry** button to attempt loading again. + +#### Object Preview + +Click any object row to view its details in the preview sidebar: +- File size and last modified date +- ETag (content hash) +- Custom metadata (if present) +- Download and presign (share link) buttons +- Version history (when versioning is enabled) + +#### Drag & Drop Upload + +Drag files directly onto the objects table to upload them to the current bucket and folder path. + ## 6. Presigned URLs - Trigger from the UI using the **Presign** button after selecting an object. diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..3271334 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +norecursedirs = data .git __pycache__ .venv diff --git a/requirements.txt b/requirements.txt index 544968d..e69aa35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ Flask>=3.1.2 -Flask-Limiter>=4.1.0 -Flask-Cors>=6.0.1 +Flask-Limiter>=4.1.1 +Flask-Cors>=6.0.2 Flask-WTF>=1.2.2 -pytest>=9.0.1 +pytest>=9.0.2 requests>=2.32.5 -boto3>=1.42.1 +boto3>=1.42.14 waitress>=3.0.2 psutil>=7.1.3 cryptography>=46.0.3 \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 67f71b5..e3c3055 100644 --- a/templates/base.html +++ b/templates/base.html @@ -199,7 +199,7 @@ })();