perf: shallow listing, os.scandir stats, server-side search for large buckets

This commit is contained in:
2026-02-26 17:11:07 +08:00
parent 1c328ee3af
commit a356bb0c4e
4 changed files with 184 additions and 15 deletions

View File

@@ -192,7 +192,10 @@ class EncryptedObjectStorage:
def list_objects_shallow(self, bucket_name: str, **kwargs):
return self.storage.list_objects_shallow(bucket_name, **kwargs)
def search_objects(self, bucket_name: str, query: str, **kwargs):
return self.storage.search_objects(bucket_name, query, **kwargs)
def list_objects_all(self, bucket_name: str):
return self.storage.list_objects_all(bucket_name)

View File

@@ -692,6 +692,83 @@ class ObjectStorage:
next_continuation_token=next_token,
)
def search_objects(
self,
bucket_name: str,
query: str,
*,
prefix: str = "",
limit: int = 500,
) -> Dict[str, Any]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.is_dir():
raise BucketNotFoundError("Bucket does not exist")
if prefix:
search_root = bucket_path / prefix.replace("/", os.sep)
if not search_root.is_dir():
return {"results": [], "truncated": False}
resolved = search_root.resolve()
if not str(resolved).startswith(str(bucket_path.resolve())):
return {"results": [], "truncated": False}
else:
search_root = bucket_path
query_lower = query.lower()
results: list[Dict[str, Any]] = []
internal = self.INTERNAL_FOLDERS
bucket_str = str(bucket_path)
bucket_len = len(bucket_str) + 1
meta_root = self._bucket_meta_root(bucket_name)
scan_limit = limit * 4
matched = 0
scanned = 0
search_str = str(search_root)
stack = [search_str]
while stack:
current = stack.pop()
try:
with os.scandir(current) as it:
for entry in it:
if current == bucket_str and entry.name in internal:
continue
if entry.is_dir(follow_symlinks=False):
stack.append(entry.path)
elif entry.is_file(follow_symlinks=False):
scanned += 1
key = entry.path[bucket_len:].replace(os.sep, "/")
if query_lower in key.lower():
st = entry.stat(follow_symlinks=False)
meta_path = meta_root / (key + ".meta.json")
last_modified = ""
try:
if meta_path.exists():
md = json.loads(meta_path.read_text(encoding="utf-8"))
last_modified = md.get("last_modified", "")
except (OSError, json.JSONDecodeError):
pass
if not last_modified:
last_modified = datetime.fromtimestamp(
st.st_mtime, tz=timezone.utc
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
results.append({
"key": key,
"size": st.st_size,
"last_modified": last_modified,
})
matched += 1
if matched >= scan_limit:
break
except PermissionError:
continue
if matched >= scan_limit:
break
results.sort(key=lambda r: r["key"])
truncated = len(results) > limit
return {"results": results[:limit], "truncated": truncated}
def put_object(
self,
bucket_name: str,

View File

@@ -641,6 +641,33 @@ def stream_bucket_objects(bucket_name: str):
)
@ui_bp.get("/buckets/<bucket_name>/objects/search")
@limiter.limit("30 per minute")
def search_bucket_objects(bucket_name: str):
principal = _current_principal()
try:
_authorize_ui(principal, bucket_name, "list")
except IamError as exc:
return jsonify({"error": str(exc)}), 403
query = request.args.get("q", "").strip()
if not query:
return jsonify({"results": [], "truncated": False})
try:
limit = max(1, min(int(request.args.get("limit", 500)), 1000))
except (ValueError, TypeError):
limit = 500
prefix = request.args.get("prefix", "").strip()
storage = _storage()
try:
return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit))
except StorageError as exc:
return jsonify({"error": str(exc)}), 404
@ui_bp.post("/buckets/<bucket_name>/upload")
@limiter.limit("30 per minute")
def upload_object(bucket_name: str):