Fix list performance for large buckets: delimiter-aware shallow listing, cache TTL increase, UI delimiter streaming. header badge shows total bucket objects, fix status bar text concatenation
This commit is contained in:
@@ -115,7 +115,7 @@ def create_app(
|
||||
|
||||
storage = ObjectStorage(
|
||||
Path(app.config["STORAGE_ROOT"]),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
|
||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
||||
|
||||
@@ -241,7 +241,7 @@ class AppConfig:
|
||||
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 5))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
|
||||
|
||||
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
||||
|
||||
@@ -189,6 +189,9 @@ class EncryptedObjectStorage:
|
||||
|
||||
def list_objects(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects(bucket_name, **kwargs)
|
||||
|
||||
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def list_objects_all(self, bucket_name: str):
|
||||
return self.storage.list_objects_all(bucket_name)
|
||||
|
||||
@@ -2671,54 +2671,43 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
else:
|
||||
effective_start = marker
|
||||
|
||||
fetch_keys = max_keys * 10 if delimiter else max_keys
|
||||
try:
|
||||
list_result = storage.list_objects(
|
||||
bucket_name,
|
||||
max_keys=fetch_keys,
|
||||
continuation_token=effective_start or None,
|
||||
prefix=prefix or None,
|
||||
)
|
||||
objects = list_result.objects
|
||||
if delimiter:
|
||||
shallow_result = storage.list_objects_shallow(
|
||||
bucket_name,
|
||||
prefix=prefix,
|
||||
delimiter=delimiter,
|
||||
max_keys=max_keys,
|
||||
continuation_token=effective_start or None,
|
||||
)
|
||||
objects = shallow_result.objects
|
||||
common_prefixes = shallow_result.common_prefixes
|
||||
is_truncated = shallow_result.is_truncated
|
||||
|
||||
next_marker = shallow_result.next_continuation_token or ""
|
||||
next_continuation_token = ""
|
||||
if is_truncated and next_marker and list_type == "2":
|
||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||
else:
|
||||
list_result = storage.list_objects(
|
||||
bucket_name,
|
||||
max_keys=max_keys,
|
||||
continuation_token=effective_start or None,
|
||||
prefix=prefix or None,
|
||||
)
|
||||
objects = list_result.objects
|
||||
common_prefixes = []
|
||||
is_truncated = list_result.is_truncated
|
||||
|
||||
next_marker = ""
|
||||
next_continuation_token = ""
|
||||
if is_truncated:
|
||||
if objects:
|
||||
next_marker = objects[-1].key
|
||||
if list_type == "2" and next_marker:
|
||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
|
||||
common_prefixes: list[str] = []
|
||||
filtered_objects: list = []
|
||||
if delimiter:
|
||||
seen_prefixes: set[str] = set()
|
||||
for obj in objects:
|
||||
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
|
||||
if delimiter in key_after_prefix:
|
||||
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
|
||||
if common_prefix not in seen_prefixes:
|
||||
seen_prefixes.add(common_prefix)
|
||||
common_prefixes.append(common_prefix)
|
||||
else:
|
||||
filtered_objects.append(obj)
|
||||
objects = filtered_objects
|
||||
common_prefixes = sorted(common_prefixes)
|
||||
|
||||
total_items = len(objects) + len(common_prefixes)
|
||||
is_truncated = total_items > max_keys or list_result.is_truncated
|
||||
|
||||
if len(objects) >= max_keys:
|
||||
objects = objects[:max_keys]
|
||||
common_prefixes = []
|
||||
else:
|
||||
remaining = max_keys - len(objects)
|
||||
common_prefixes = common_prefixes[:remaining]
|
||||
|
||||
next_marker = ""
|
||||
next_continuation_token = ""
|
||||
if is_truncated:
|
||||
if objects:
|
||||
next_marker = objects[-1].key
|
||||
elif common_prefixes:
|
||||
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
||||
|
||||
if list_type == "2" and next_marker:
|
||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||
|
||||
if list_type == "2":
|
||||
root = Element("ListBucketResult")
|
||||
|
||||
@@ -245,6 +245,7 @@ def stream_objects_ndjson(
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
delimiter: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
meta_line = json.dumps({
|
||||
"type": "meta",
|
||||
@@ -258,11 +259,20 @@ def stream_objects_ndjson(
|
||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||
if prefix:
|
||||
kwargs["Prefix"] = prefix
|
||||
if delimiter:
|
||||
kwargs["Delimiter"] = delimiter
|
||||
|
||||
running_count = 0
|
||||
try:
|
||||
paginator = client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(**kwargs):
|
||||
for obj in page.get("Contents", []):
|
||||
for cp in page.get("CommonPrefixes", []):
|
||||
yield json.dumps({
|
||||
"type": "folder",
|
||||
"prefix": cp["Prefix"],
|
||||
}) + "\n"
|
||||
page_contents = page.get("Contents", [])
|
||||
for obj in page_contents:
|
||||
last_mod = obj["LastModified"]
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
@@ -273,6 +283,8 @@ def stream_objects_ndjson(
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
}) + "\n"
|
||||
running_count += len(page_contents)
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
except ClientError as exc:
|
||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||
|
||||
251
app/storage.py
251
app/storage.py
@@ -154,6 +154,15 @@ class ListObjectsResult:
|
||||
total_count: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShallowListResult:
|
||||
"""Result for delimiter-aware directory-level listing."""
|
||||
objects: List[ObjectMeta]
|
||||
common_prefixes: List[str]
|
||||
is_truncated: bool
|
||||
next_continuation_token: Optional[str]
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
@@ -279,25 +288,41 @@ class ObjectStorage:
|
||||
version_count = 0
|
||||
version_bytes = 0
|
||||
|
||||
internal = self.INTERNAL_FOLDERS
|
||||
bucket_str = str(bucket_path)
|
||||
|
||||
try:
|
||||
for path in bucket_path.rglob("*"):
|
||||
if path.is_file():
|
||||
rel = path.relative_to(bucket_path)
|
||||
if not rel.parts:
|
||||
continue
|
||||
top_folder = rel.parts[0]
|
||||
if top_folder not in self.INTERNAL_FOLDERS:
|
||||
stat = path.stat()
|
||||
object_count += 1
|
||||
total_bytes += stat.st_size
|
||||
stack = [bucket_str]
|
||||
while stack:
|
||||
current = stack.pop()
|
||||
try:
|
||||
with os.scandir(current) as it:
|
||||
for entry in it:
|
||||
if current == bucket_str and entry.name in internal:
|
||||
continue
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
stack.append(entry.path)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
object_count += 1
|
||||
total_bytes += entry.stat(follow_symlinks=False).st_size
|
||||
except PermissionError:
|
||||
continue
|
||||
|
||||
versions_root = self._bucket_versions_root(bucket_name)
|
||||
if versions_root.exists():
|
||||
for path in versions_root.rglob("*.bin"):
|
||||
if path.is_file():
|
||||
stat = path.stat()
|
||||
version_count += 1
|
||||
version_bytes += stat.st_size
|
||||
v_stack = [str(versions_root)]
|
||||
while v_stack:
|
||||
v_current = v_stack.pop()
|
||||
try:
|
||||
with os.scandir(v_current) as it:
|
||||
for entry in it:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
v_stack.append(entry.path)
|
||||
elif entry.is_file(follow_symlinks=False) and entry.name.endswith(".bin"):
|
||||
version_count += 1
|
||||
version_bytes += entry.stat(follow_symlinks=False).st_size
|
||||
except PermissionError:
|
||||
continue
|
||||
except OSError:
|
||||
if cached_stats is not None:
|
||||
return cached_stats
|
||||
@@ -471,6 +496,202 @@ class ObjectStorage:
|
||||
result = self.list_objects(bucket_name, max_keys=100000)
|
||||
return result.objects
|
||||
|
||||
def list_objects_shallow(
|
||||
self,
|
||||
bucket_name: str,
|
||||
*,
|
||||
prefix: str = "",
|
||||
delimiter: str = "/",
|
||||
max_keys: int = 1000,
|
||||
continuation_token: Optional[str] = None,
|
||||
) -> ShallowListResult:
|
||||
import bisect
|
||||
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
|
||||
if delimiter != "/" or (prefix and not prefix.endswith(delimiter)):
|
||||
return self._shallow_via_full_scan(
|
||||
bucket_name, prefix=prefix, delimiter=delimiter,
|
||||
max_keys=max_keys, continuation_token=continuation_token,
|
||||
)
|
||||
|
||||
target_dir = bucket_path
|
||||
if prefix:
|
||||
safe_prefix_path = Path(prefix.rstrip("/"))
|
||||
if ".." in safe_prefix_path.parts:
|
||||
return ShallowListResult(
|
||||
objects=[], common_prefixes=[],
|
||||
is_truncated=False, next_continuation_token=None,
|
||||
)
|
||||
target_dir = bucket_path / safe_prefix_path
|
||||
try:
|
||||
resolved = target_dir.resolve()
|
||||
bucket_resolved = bucket_path.resolve()
|
||||
if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
|
||||
return ShallowListResult(
|
||||
objects=[], common_prefixes=[],
|
||||
is_truncated=False, next_continuation_token=None,
|
||||
)
|
||||
except (OSError, ValueError):
|
||||
return ShallowListResult(
|
||||
objects=[], common_prefixes=[],
|
||||
is_truncated=False, next_continuation_token=None,
|
||||
)
|
||||
|
||||
if not target_dir.exists() or not target_dir.is_dir():
|
||||
return ShallowListResult(
|
||||
objects=[], common_prefixes=[],
|
||||
is_truncated=False, next_continuation_token=None,
|
||||
)
|
||||
|
||||
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
|
||||
meta_cache: Dict[str, str] = {}
|
||||
if etag_index_path.exists():
|
||||
try:
|
||||
with open(etag_index_path, 'r', encoding='utf-8') as f:
|
||||
meta_cache = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
entries_files: list[tuple[str, int, float, Optional[str]]] = []
|
||||
entries_dirs: list[str] = []
|
||||
|
||||
try:
|
||||
with os.scandir(str(target_dir)) as it:
|
||||
for entry in it:
|
||||
name = entry.name
|
||||
if name in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
cp = prefix + name + delimiter
|
||||
entries_dirs.append(cp)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
key = prefix + name
|
||||
try:
|
||||
st = entry.stat()
|
||||
etag = meta_cache.get(key)
|
||||
entries_files.append((key, st.st_size, st.st_mtime, etag))
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
return ShallowListResult(
|
||||
objects=[], common_prefixes=[],
|
||||
is_truncated=False, next_continuation_token=None,
|
||||
)
|
||||
|
||||
entries_dirs.sort()
|
||||
entries_files.sort(key=lambda x: x[0])
|
||||
|
||||
all_items: list[tuple[str, bool]] = []
|
||||
fi, di = 0, 0
|
||||
while fi < len(entries_files) and di < len(entries_dirs):
|
||||
if entries_files[fi][0] <= entries_dirs[di]:
|
||||
all_items.append((entries_files[fi][0], False))
|
||||
fi += 1
|
||||
else:
|
||||
all_items.append((entries_dirs[di], True))
|
||||
di += 1
|
||||
while fi < len(entries_files):
|
||||
all_items.append((entries_files[fi][0], False))
|
||||
fi += 1
|
||||
while di < len(entries_dirs):
|
||||
all_items.append((entries_dirs[di], True))
|
||||
di += 1
|
||||
|
||||
files_map = {e[0]: e for e in entries_files}
|
||||
|
||||
start_index = 0
|
||||
if continuation_token:
|
||||
all_keys = [item[0] for item in all_items]
|
||||
start_index = bisect.bisect_right(all_keys, continuation_token)
|
||||
|
||||
selected = all_items[start_index:start_index + max_keys]
|
||||
is_truncated = (start_index + max_keys) < len(all_items)
|
||||
|
||||
result_objects: list[ObjectMeta] = []
|
||||
result_prefixes: list[str] = []
|
||||
for item_key, is_dir in selected:
|
||||
if is_dir:
|
||||
result_prefixes.append(item_key)
|
||||
else:
|
||||
fdata = files_map[item_key]
|
||||
result_objects.append(ObjectMeta(
|
||||
key=fdata[0],
|
||||
size=fdata[1],
|
||||
last_modified=datetime.fromtimestamp(fdata[2], timezone.utc),
|
||||
etag=fdata[3],
|
||||
metadata=None,
|
||||
))
|
||||
|
||||
next_token = None
|
||||
if is_truncated and selected:
|
||||
next_token = selected[-1][0]
|
||||
|
||||
return ShallowListResult(
|
||||
objects=result_objects,
|
||||
common_prefixes=result_prefixes,
|
||||
is_truncated=is_truncated,
|
||||
next_continuation_token=next_token,
|
||||
)
|
||||
|
||||
def _shallow_via_full_scan(
|
||||
self,
|
||||
bucket_name: str,
|
||||
*,
|
||||
prefix: str = "",
|
||||
delimiter: str = "/",
|
||||
max_keys: int = 1000,
|
||||
continuation_token: Optional[str] = None,
|
||||
) -> ShallowListResult:
|
||||
list_result = self.list_objects(
|
||||
bucket_name,
|
||||
max_keys=max_keys * 10,
|
||||
continuation_token=continuation_token,
|
||||
prefix=prefix or None,
|
||||
)
|
||||
|
||||
common_prefixes: list[str] = []
|
||||
filtered_objects: list[ObjectMeta] = []
|
||||
seen_prefixes: set[str] = set()
|
||||
|
||||
for obj in list_result.objects:
|
||||
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
|
||||
if delimiter in key_after_prefix:
|
||||
cp = prefix + key_after_prefix.split(delimiter)[0] + delimiter
|
||||
if cp not in seen_prefixes:
|
||||
seen_prefixes.add(cp)
|
||||
common_prefixes.append(cp)
|
||||
else:
|
||||
filtered_objects.append(obj)
|
||||
|
||||
common_prefixes.sort()
|
||||
total_items = len(filtered_objects) + len(common_prefixes)
|
||||
is_truncated = total_items > max_keys or list_result.is_truncated
|
||||
|
||||
if len(filtered_objects) >= max_keys:
|
||||
filtered_objects = filtered_objects[:max_keys]
|
||||
common_prefixes = []
|
||||
else:
|
||||
remaining = max_keys - len(filtered_objects)
|
||||
common_prefixes = common_prefixes[:remaining]
|
||||
|
||||
next_token = None
|
||||
if is_truncated:
|
||||
if filtered_objects:
|
||||
next_token = filtered_objects[-1].key
|
||||
elif common_prefixes:
|
||||
next_token = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
||||
|
||||
return ShallowListResult(
|
||||
objects=filtered_objects,
|
||||
common_prefixes=common_prefixes,
|
||||
is_truncated=is_truncated,
|
||||
next_continuation_token=next_token,
|
||||
)
|
||||
|
||||
def put_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
|
||||
@@ -616,6 +616,7 @@ def stream_bucket_objects(bucket_name: str):
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
prefix = request.args.get("prefix") or None
|
||||
delimiter = request.args.get("delimiter") or None
|
||||
|
||||
try:
|
||||
client = get_session_s3_client()
|
||||
@@ -629,6 +630,7 @@ def stream_bucket_objects(bucket_name: str):
|
||||
return Response(
|
||||
stream_objects_ndjson(
|
||||
client, bucket_name, prefix, url_templates, display_tz, versioning_enabled,
|
||||
delimiter=delimiter,
|
||||
),
|
||||
mimetype='application/x-ndjson',
|
||||
headers={
|
||||
|
||||
Reference in New Issue
Block a user