diff --git a/app/encrypted_storage.py b/app/encrypted_storage.py index a0d3a58..b64e1d1 100644 --- a/app/encrypted_storage.py +++ b/app/encrypted_storage.py @@ -193,6 +193,9 @@ class EncryptedObjectStorage: def list_objects_shallow(self, bucket_name: str, **kwargs): return self.storage.list_objects_shallow(bucket_name, **kwargs) + def iter_objects_shallow(self, bucket_name: str, **kwargs): + return self.storage.iter_objects_shallow(bucket_name, **kwargs) + def search_objects(self, bucket_name: str, query: str, **kwargs): return self.storage.search_objects(bucket_name, query, **kwargs) diff --git a/app/storage.py b/app/storage.py index 21e920a..fa66332 100644 --- a/app/storage.py +++ b/app/storage.py @@ -714,6 +714,73 @@ class ObjectStorage: next_continuation_token=next_token, ) + def iter_objects_shallow( + self, + bucket_name: str, + *, + prefix: str = "", + delimiter: str = "/", + ) -> Generator[tuple[str, ObjectMeta | str], None, None]: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise BucketNotFoundError("Bucket does not exist") + bucket_id = bucket_path.name + + target_dir = bucket_path + if prefix: + safe_prefix_path = Path(prefix.rstrip("/")) + if ".." in safe_prefix_path.parts: + return + target_dir = bucket_path / safe_prefix_path + try: + resolved = target_dir.resolve() + bucket_resolved = bucket_path.resolve() + if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved: + return + except (OSError, ValueError): + return + + if not target_dir.exists() or not target_dir.is_dir(): + return + + etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" + meta_cache: Dict[str, str] = {} + if etag_index_path.exists(): + try: + with open(etag_index_path, 'r', encoding='utf-8') as f: + meta_cache = json.load(f) + except (OSError, json.JSONDecodeError): + pass + + try: + with os.scandir(str(target_dir)) as it: + for entry in it: + name = entry.name + if name in self.INTERNAL_FOLDERS: + continue + if entry.is_dir(follow_symlinks=False): + yield ("folder", prefix + name + delimiter) + elif entry.is_file(follow_symlinks=False): + key = prefix + name + try: + st = entry.stat() + etag = meta_cache.get(key) + if etag is None: + safe_key = PurePosixPath(key) + meta = self._read_metadata(bucket_id, Path(safe_key)) + etag = meta.get("__etag__") if meta else None + yield ("object", ObjectMeta( + key=key, + size=st.st_size, + last_modified=datetime.fromtimestamp(st.st_mtime, timezone.utc), + etag=etag, + metadata=None, + )) + except OSError: + pass + except OSError: + return + def _shallow_via_full_scan( self, bucket_name: str, diff --git a/app/ui.py b/app/ui.py index 9fb832b..b1a0947 100644 --- a/app/ui.py +++ b/app/ui.py @@ -618,20 +618,77 @@ def stream_bucket_objects(bucket_name: str): prefix = request.args.get("prefix") or None delimiter = request.args.get("delimiter") or None + storage = _storage() try: - client = get_session_s3_client() - except (PermissionError, RuntimeError) as exc: - return jsonify({"error": str(exc)}), 403 - - versioning_enabled = get_versioning_via_s3(client, bucket_name) + versioning_enabled = storage.is_versioning_enabled(bucket_name) + except StorageError: + versioning_enabled = False url_templates = build_url_templates(bucket_name) display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC") + def generate(): + yield json.dumps({ + "type": "meta", + "versioning_enabled": versioning_enabled, + "url_templates": url_templates, + }) + "\n" + yield json.dumps({"type": "count", "total_count": 0}) + "\n" + + running_count = 0 + try: + if delimiter: + for item_type, item in storage.iter_objects_shallow( + bucket_name, prefix=prefix or "", delimiter=delimiter, + ): + if item_type == "folder": + yield json.dumps({"type": "folder", "prefix": item}) + "\n" + else: + last_mod = item.last_modified + yield json.dumps({ + "type": "object", + "key": item.key, + "size": item.size, + "last_modified": last_mod.isoformat(), + "last_modified_display": _format_datetime_display(last_mod, display_tz), + "last_modified_iso": _format_datetime_iso(last_mod, display_tz), + "etag": item.etag or "", + }) + "\n" + running_count += 1 + if running_count % 1000 == 0: + yield json.dumps({"type": "count", "total_count": running_count}) + "\n" + else: + continuation_token = None + while True: + result = storage.list_objects( + bucket_name, + max_keys=1000, + continuation_token=continuation_token, + prefix=prefix, + ) + for obj in result.objects: + last_mod = obj.last_modified + yield json.dumps({ + "type": "object", + "key": obj.key, + "size": obj.size, + "last_modified": last_mod.isoformat(), + "last_modified_display": _format_datetime_display(last_mod, display_tz), + "last_modified_iso": _format_datetime_iso(last_mod, display_tz), + "etag": obj.etag or "", + }) + "\n" + running_count += len(result.objects) + yield json.dumps({"type": "count", "total_count": running_count}) + "\n" + if not result.is_truncated: + break + continuation_token = result.next_continuation_token + except StorageError as exc: + yield json.dumps({"type": "error", "error": str(exc)}) + "\n" + return + yield json.dumps({"type": "count", "total_count": running_count}) + "\n" + yield json.dumps({"type": "done"}) + "\n" + return Response( - stream_objects_ndjson( - client, bucket_name, prefix, url_templates, display_tz, versioning_enabled, - delimiter=delimiter, - ), + generate(), mimetype='application/x-ndjson', headers={ 'Cache-Control': 'no-cache',