Fix list performance for large buckets: delimiter-aware shallow listing, cache TTL increase, UI delimiter streaming. header badge shows total bucket objects, fix status bar text concatenation

This commit is contained in:
2026-02-26 16:29:28 +08:00
parent 5bf7962c04
commit 1c328ee3af
9 changed files with 387 additions and 118 deletions

View File

@@ -115,7 +115,7 @@ def create_app(
storage = ObjectStorage(
Path(app.config["STORAGE_ROOT"]),
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),

View File

@@ -241,7 +241,7 @@ class AppConfig:
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 5))
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"

View File

@@ -189,6 +189,9 @@ class EncryptedObjectStorage:
def list_objects(self, bucket_name: str, **kwargs):
return self.storage.list_objects(bucket_name, **kwargs)
def list_objects_shallow(self, bucket_name: str, **kwargs):
return self.storage.list_objects_shallow(bucket_name, **kwargs)
def list_objects_all(self, bucket_name: str):
return self.storage.list_objects_all(bucket_name)

View File

@@ -2671,54 +2671,43 @@ def bucket_handler(bucket_name: str) -> Response:
else:
effective_start = marker
fetch_keys = max_keys * 10 if delimiter else max_keys
try:
list_result = storage.list_objects(
bucket_name,
max_keys=fetch_keys,
continuation_token=effective_start or None,
prefix=prefix or None,
)
objects = list_result.objects
if delimiter:
shallow_result = storage.list_objects_shallow(
bucket_name,
prefix=prefix,
delimiter=delimiter,
max_keys=max_keys,
continuation_token=effective_start or None,
)
objects = shallow_result.objects
common_prefixes = shallow_result.common_prefixes
is_truncated = shallow_result.is_truncated
next_marker = shallow_result.next_continuation_token or ""
next_continuation_token = ""
if is_truncated and next_marker and list_type == "2":
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
else:
list_result = storage.list_objects(
bucket_name,
max_keys=max_keys,
continuation_token=effective_start or None,
prefix=prefix or None,
)
objects = list_result.objects
common_prefixes = []
is_truncated = list_result.is_truncated
next_marker = ""
next_continuation_token = ""
if is_truncated:
if objects:
next_marker = objects[-1].key
if list_type == "2" and next_marker:
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
common_prefixes: list[str] = []
filtered_objects: list = []
if delimiter:
seen_prefixes: set[str] = set()
for obj in objects:
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
if delimiter in key_after_prefix:
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
if common_prefix not in seen_prefixes:
seen_prefixes.add(common_prefix)
common_prefixes.append(common_prefix)
else:
filtered_objects.append(obj)
objects = filtered_objects
common_prefixes = sorted(common_prefixes)
total_items = len(objects) + len(common_prefixes)
is_truncated = total_items > max_keys or list_result.is_truncated
if len(objects) >= max_keys:
objects = objects[:max_keys]
common_prefixes = []
else:
remaining = max_keys - len(objects)
common_prefixes = common_prefixes[:remaining]
next_marker = ""
next_continuation_token = ""
if is_truncated:
if objects:
next_marker = objects[-1].key
elif common_prefixes:
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
if list_type == "2" and next_marker:
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
if list_type == "2":
root = Element("ListBucketResult")

View File

@@ -245,6 +245,7 @@ def stream_objects_ndjson(
url_templates: dict[str, str],
display_tz: str = "UTC",
versioning_enabled: bool = False,
delimiter: Optional[str] = None,
) -> Generator[str, None, None]:
meta_line = json.dumps({
"type": "meta",
@@ -258,11 +259,20 @@ def stream_objects_ndjson(
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
if prefix:
kwargs["Prefix"] = prefix
if delimiter:
kwargs["Delimiter"] = delimiter
running_count = 0
try:
paginator = client.get_paginator("list_objects_v2")
for page in paginator.paginate(**kwargs):
for obj in page.get("Contents", []):
for cp in page.get("CommonPrefixes", []):
yield json.dumps({
"type": "folder",
"prefix": cp["Prefix"],
}) + "\n"
page_contents = page.get("Contents", [])
for obj in page_contents:
last_mod = obj["LastModified"]
yield json.dumps({
"type": "object",
@@ -273,6 +283,8 @@ def stream_objects_ndjson(
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
"etag": obj.get("ETag", "").strip('"'),
}) + "\n"
running_count += len(page_contents)
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
except ClientError as exc:
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
yield json.dumps({"type": "error", "error": error_msg}) + "\n"

View File

@@ -154,6 +154,15 @@ class ListObjectsResult:
total_count: Optional[int] = None
@dataclass
class ShallowListResult:
"""Result for delimiter-aware directory-level listing."""
objects: List[ObjectMeta]
common_prefixes: List[str]
is_truncated: bool
next_continuation_token: Optional[str]
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
@@ -279,25 +288,41 @@ class ObjectStorage:
version_count = 0
version_bytes = 0
internal = self.INTERNAL_FOLDERS
bucket_str = str(bucket_path)
try:
for path in bucket_path.rglob("*"):
if path.is_file():
rel = path.relative_to(bucket_path)
if not rel.parts:
continue
top_folder = rel.parts[0]
if top_folder not in self.INTERNAL_FOLDERS:
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
stack = [bucket_str]
while stack:
current = stack.pop()
try:
with os.scandir(current) as it:
for entry in it:
if current == bucket_str and entry.name in internal:
continue
if entry.is_dir(follow_symlinks=False):
stack.append(entry.path)
elif entry.is_file(follow_symlinks=False):
object_count += 1
total_bytes += entry.stat(follow_symlinks=False).st_size
except PermissionError:
continue
versions_root = self._bucket_versions_root(bucket_name)
if versions_root.exists():
for path in versions_root.rglob("*.bin"):
if path.is_file():
stat = path.stat()
version_count += 1
version_bytes += stat.st_size
v_stack = [str(versions_root)]
while v_stack:
v_current = v_stack.pop()
try:
with os.scandir(v_current) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
v_stack.append(entry.path)
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(".bin"):
version_count += 1
version_bytes += entry.stat(follow_symlinks=False).st_size
except PermissionError:
continue
except OSError:
if cached_stats is not None:
return cached_stats
@@ -471,6 +496,202 @@ class ObjectStorage:
result = self.list_objects(bucket_name, max_keys=100000)
return result.objects
def list_objects_shallow(
self,
bucket_name: str,
*,
prefix: str = "",
delimiter: str = "/",
max_keys: int = 1000,
continuation_token: Optional[str] = None,
) -> ShallowListResult:
import bisect
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
if delimiter != "/" or (prefix and not prefix.endswith(delimiter)):
return self._shallow_via_full_scan(
bucket_name, prefix=prefix, delimiter=delimiter,
max_keys=max_keys, continuation_token=continuation_token,
)
target_dir = bucket_path
if prefix:
safe_prefix_path = Path(prefix.rstrip("/"))
if ".." in safe_prefix_path.parts:
return ShallowListResult(
objects=[], common_prefixes=[],
is_truncated=False, next_continuation_token=None,
)
target_dir = bucket_path / safe_prefix_path
try:
resolved = target_dir.resolve()
bucket_resolved = bucket_path.resolve()
if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
return ShallowListResult(
objects=[], common_prefixes=[],
is_truncated=False, next_continuation_token=None,
)
except (OSError, ValueError):
return ShallowListResult(
objects=[], common_prefixes=[],
is_truncated=False, next_continuation_token=None,
)
if not target_dir.exists() or not target_dir.is_dir():
return ShallowListResult(
objects=[], common_prefixes=[],
is_truncated=False, next_continuation_token=None,
)
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
meta_cache: Dict[str, str] = {}
if etag_index_path.exists():
try:
with open(etag_index_path, 'r', encoding='utf-8') as f:
meta_cache = json.load(f)
except (OSError, json.JSONDecodeError):
pass
entries_files: list[tuple[str, int, float, Optional[str]]] = []
entries_dirs: list[str] = []
try:
with os.scandir(str(target_dir)) as it:
for entry in it:
name = entry.name
if name in self.INTERNAL_FOLDERS:
continue
if entry.is_dir(follow_symlinks=False):
cp = prefix + name + delimiter
entries_dirs.append(cp)
elif entry.is_file(follow_symlinks=False):
key = prefix + name
try:
st = entry.stat()
etag = meta_cache.get(key)
entries_files.append((key, st.st_size, st.st_mtime, etag))
except OSError:
pass
except OSError:
return ShallowListResult(
objects=[], common_prefixes=[],
is_truncated=False, next_continuation_token=None,
)
entries_dirs.sort()
entries_files.sort(key=lambda x: x[0])
all_items: list[tuple[str, bool]] = []
fi, di = 0, 0
while fi < len(entries_files) and di < len(entries_dirs):
if entries_files[fi][0] <= entries_dirs[di]:
all_items.append((entries_files[fi][0], False))
fi += 1
else:
all_items.append((entries_dirs[di], True))
di += 1
while fi < len(entries_files):
all_items.append((entries_files[fi][0], False))
fi += 1
while di < len(entries_dirs):
all_items.append((entries_dirs[di], True))
di += 1
files_map = {e[0]: e for e in entries_files}
start_index = 0
if continuation_token:
all_keys = [item[0] for item in all_items]
start_index = bisect.bisect_right(all_keys, continuation_token)
selected = all_items[start_index:start_index + max_keys]
is_truncated = (start_index + max_keys) < len(all_items)
result_objects: list[ObjectMeta] = []
result_prefixes: list[str] = []
for item_key, is_dir in selected:
if is_dir:
result_prefixes.append(item_key)
else:
fdata = files_map[item_key]
result_objects.append(ObjectMeta(
key=fdata[0],
size=fdata[1],
last_modified=datetime.fromtimestamp(fdata[2], timezone.utc),
etag=fdata[3],
metadata=None,
))
next_token = None
if is_truncated and selected:
next_token = selected[-1][0]
return ShallowListResult(
objects=result_objects,
common_prefixes=result_prefixes,
is_truncated=is_truncated,
next_continuation_token=next_token,
)
def _shallow_via_full_scan(
self,
bucket_name: str,
*,
prefix: str = "",
delimiter: str = "/",
max_keys: int = 1000,
continuation_token: Optional[str] = None,
) -> ShallowListResult:
list_result = self.list_objects(
bucket_name,
max_keys=max_keys * 10,
continuation_token=continuation_token,
prefix=prefix or None,
)
common_prefixes: list[str] = []
filtered_objects: list[ObjectMeta] = []
seen_prefixes: set[str] = set()
for obj in list_result.objects:
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
if delimiter in key_after_prefix:
cp = prefix + key_after_prefix.split(delimiter)[0] + delimiter
if cp not in seen_prefixes:
seen_prefixes.add(cp)
common_prefixes.append(cp)
else:
filtered_objects.append(obj)
common_prefixes.sort()
total_items = len(filtered_objects) + len(common_prefixes)
is_truncated = total_items > max_keys or list_result.is_truncated
if len(filtered_objects) >= max_keys:
filtered_objects = filtered_objects[:max_keys]
common_prefixes = []
else:
remaining = max_keys - len(filtered_objects)
common_prefixes = common_prefixes[:remaining]
next_token = None
if is_truncated:
if filtered_objects:
next_token = filtered_objects[-1].key
elif common_prefixes:
next_token = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
return ShallowListResult(
objects=filtered_objects,
common_prefixes=common_prefixes,
is_truncated=is_truncated,
next_continuation_token=next_token,
)
def put_object(
self,
bucket_name: str,

View File

@@ -616,6 +616,7 @@ def stream_bucket_objects(bucket_name: str):
return jsonify({"error": str(exc)}), 403
prefix = request.args.get("prefix") or None
delimiter = request.args.get("delimiter") or None
try:
client = get_session_s3_client()
@@ -629,6 +630,7 @@ def stream_bucket_objects(bucket_name: str):
return Response(
stream_objects_ndjson(
client, bucket_name, prefix, url_templates, display_tz, versioning_enabled,
delimiter=delimiter,
),
mimetype='application/x-ndjson',
headers={