From 70b61fd8e66e519d21027419b404953542b1ca2b Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 17:45:34 +0800 Subject: [PATCH 01/14] Further optimize CPU usage; Improve security and performance; 4 bug fixes. --- app/admin_api.py | 5 ++ app/iam.py | 14 ++- app/s3_api.py | 7 +- app/secret_store.py | 12 +++ app/storage.py | 192 ++++++++++++++++++++++++++++++----------- app/ui.py | 23 +++-- app/version.py | 2 +- docs.md | 6 +- templates/docs.html | 8 +- templates/metrics.html | 17 +++- 10 files changed, 219 insertions(+), 67 deletions(-) diff --git a/app/admin_api.py b/app/admin_api.py index 8ebc76f..a3d436d 100644 --- a/app/admin_api.py +++ b/app/admin_api.py @@ -1,6 +1,7 @@ from __future__ import annotations import ipaddress +import json import logging import re import socket @@ -354,6 +355,10 @@ def update_peer_site(site_id: str): if region_error: return _json_error("ValidationError", region_error, 400) + if "connection_id" in payload: + if payload["connection_id"] and not _connections().get(payload["connection_id"]): + return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400) + peer = PeerSite( site_id=site_id, endpoint=payload.get("endpoint", existing.endpoint), diff --git a/app/iam.py b/app/iam.py index 6ab4f38..4dd6b68 100644 --- a/app/iam.py +++ b/app/iam.py @@ -529,11 +529,13 @@ class IamService: return candidate if candidate in ALLOWED_ACTIONS else "" def _write_default(self) -> None: + access_key = secrets.token_hex(12) + secret_key = secrets.token_urlsafe(32) default = { "users": [ { - "access_key": "localadmin", - "secret_key": "localadmin", + "access_key": access_key, + "secret_key": secret_key, "display_name": "Local Admin", "policies": [ {"bucket": "*", "actions": list(ALLOWED_ACTIONS)} @@ -542,6 +544,14 @@ class IamService: ] } self.config_path.write_text(json.dumps(default, indent=2)) + print(f"\n{'='*60}") + print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED") + print(f"{'='*60}") + print(f"Access Key: {access_key}") + print(f"Secret Key: {secret_key}") + print(f"{'='*60}") + print(f"Missed this? Check: {self.config_path}") + print(f"{'='*60}\n") def _generate_access_key(self) -> str: return secrets.token_hex(8) diff --git a/app/s3_api.py b/app/s3_api.py index f5f599e..f2cfa0b 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1004,7 +1004,8 @@ def _apply_object_headers( response.headers["ETag"] = f'"{etag}"' response.headers["Accept-Ranges"] = "bytes" for key, value in (metadata or {}).items(): - response.headers[f"X-Amz-Meta-{key}"] = value + safe_value = _sanitize_header_value(str(value)) + response.headers[f"X-Amz-Meta-{key}"] = safe_value def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None: @@ -2342,10 +2343,12 @@ def _post_object(bucket_name: str) -> Response: success_action_redirect = request.form.get("success_action_redirect") if success_action_redirect: allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", []) + if not allowed_hosts: + allowed_hosts = [request.host] parsed = urlparse(success_action_redirect) if parsed.scheme not in ("http", "https"): return _error_response("InvalidArgument", "Redirect URL must use http or https", 400) - if allowed_hosts and parsed.netloc not in allowed_hosts: + if parsed.netloc not in allowed_hosts: return _error_response("InvalidArgument", "Redirect URL host not allowed", 400) redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}" return Response(status=303, headers={"Location": redirect_url}) diff --git a/app/secret_store.py b/app/secret_store.py index 5c14f6e..903c2fa 100644 --- a/app/secret_store.py +++ b/app/secret_store.py @@ -18,6 +18,18 @@ class EphemeralSecretStore: self._store[token] = (payload, expires_at) return token + def peek(self, token: str | None) -> Any | None: + if not token: + return None + entry = self._store.get(token) + if not entry: + return None + payload, expires_at = entry + if expires_at < time.time(): + self._store.pop(token, None) + return None + return payload + def pop(self, token: str | None) -> Any | None: if not token: return None diff --git a/app/storage.py b/app/storage.py index 502a93c..22391be 100644 --- a/app/storage.py +++ b/app/storage.py @@ -186,6 +186,7 @@ class ObjectStorage: self._cache_ttl = cache_ttl self._object_cache_max_size = object_cache_max_size self._object_key_max_length_bytes = object_key_max_length_bytes + self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {} def _get_bucket_lock(self, bucket_id: str) -> threading.Lock: """Get or create a lock for a specific bucket. Reduces global lock contention.""" @@ -243,10 +244,15 @@ class ObjectStorage: raise BucketNotFoundError("Bucket does not exist") cache_path = self._system_bucket_root(bucket_name) / "stats.json" + cached_stats = None + cache_fresh = False + if cache_path.exists(): try: - if time.time() - cache_path.stat().st_mtime < cache_ttl: - return json.loads(cache_path.read_text(encoding="utf-8")) + cache_fresh = time.time() - cache_path.stat().st_mtime < cache_ttl + cached_stats = json.loads(cache_path.read_text(encoding="utf-8")) + if cache_fresh: + return cached_stats except (OSError, json.JSONDecodeError): pass @@ -255,40 +261,45 @@ class ObjectStorage: version_count = 0 version_bytes = 0 - for path in bucket_path.rglob("*"): - if path.is_file(): - rel = path.relative_to(bucket_path) - if not rel.parts: - continue - top_folder = rel.parts[0] - if top_folder not in self.INTERNAL_FOLDERS: - stat = path.stat() - object_count += 1 - total_bytes += stat.st_size - - versions_root = self._bucket_versions_root(bucket_name) - if versions_root.exists(): - for path in versions_root.rglob("*.bin"): + try: + for path in bucket_path.rglob("*"): if path.is_file(): - stat = path.stat() - version_count += 1 - version_bytes += stat.st_size - + rel = path.relative_to(bucket_path) + if not rel.parts: + continue + top_folder = rel.parts[0] + if top_folder not in self.INTERNAL_FOLDERS: + stat = path.stat() + object_count += 1 + total_bytes += stat.st_size + + versions_root = self._bucket_versions_root(bucket_name) + if versions_root.exists(): + for path in versions_root.rglob("*.bin"): + if path.is_file(): + stat = path.stat() + version_count += 1 + version_bytes += stat.st_size + except OSError: + if cached_stats is not None: + return cached_stats + raise + stats = { "objects": object_count, "bytes": total_bytes, "version_count": version_count, "version_bytes": version_bytes, "total_objects": object_count + version_count, - "total_bytes": total_bytes + version_bytes, + "total_bytes": total_bytes + version_bytes, } - + try: cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.write_text(json.dumps(stats), encoding="utf-8") except OSError: pass - + return stats def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None: @@ -299,6 +310,34 @@ class ObjectStorage: except OSError: pass + def _update_bucket_stats_cache( + self, + bucket_id: str, + *, + bytes_delta: int = 0, + objects_delta: int = 0, + version_bytes_delta: int = 0, + version_count_delta: int = 0, + ) -> None: + """Incrementally update cached bucket statistics instead of invalidating. + + This avoids expensive full directory scans on every PUT/DELETE by + adjusting the cached values directly. + """ + cache_path = self._system_bucket_root(bucket_id) / "stats.json" + try: + if cache_path.exists(): + data = json.loads(cache_path.read_text(encoding="utf-8")) + data["objects"] = max(0, data.get("objects", 0) + objects_delta) + data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta) + data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta) + data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta) + data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta) + data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta) + cache_path.write_text(json.dumps(data), encoding="utf-8") + except (OSError, json.JSONDecodeError): + pass + def delete_bucket(self, bucket_name: str) -> None: bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): @@ -333,22 +372,35 @@ class ObjectStorage: Returns: ListObjectsResult with objects, truncation status, and continuation token """ + import bisect + bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): raise BucketNotFoundError("Bucket does not exist") bucket_id = bucket_path.name object_cache = self._get_object_cache(bucket_id, bucket_path) - - all_keys = sorted(object_cache.keys()) - + + cache_version = self._cache_version.get(bucket_id, 0) + cached_entry = self._sorted_key_cache.get(bucket_id) + if cached_entry and cached_entry[1] == cache_version: + all_keys = cached_entry[0] + else: + all_keys = sorted(object_cache.keys()) + self._sorted_key_cache[bucket_id] = (all_keys, cache_version) + if prefix: - all_keys = [k for k in all_keys if k.startswith(prefix)] - + lo = bisect.bisect_left(all_keys, prefix) + hi = len(all_keys) + for i in range(lo, len(all_keys)): + if not all_keys[i].startswith(prefix): + hi = i + break + all_keys = all_keys[lo:hi] + total_count = len(all_keys) start_index = 0 if continuation_token: - import bisect start_index = bisect.bisect_right(all_keys, continuation_token) if start_index >= total_count: return ListObjectsResult( @@ -356,8 +408,8 @@ class ObjectStorage: is_truncated=False, next_continuation_token=None, total_count=total_count, - ) - + ) + end_index = start_index + max_keys keys_slice = all_keys[start_index:end_index] is_truncated = end_index < total_count @@ -403,7 +455,9 @@ class ObjectStorage: is_overwrite = destination.exists() existing_size = destination.stat().st_size if is_overwrite else 0 + archived_version_size = 0 if self._is_versioning_enabled(bucket_path) and is_overwrite: + archived_version_size = existing_size self._archive_current_version(bucket_id, safe_key, reason="overwrite") tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR @@ -416,11 +470,10 @@ class ObjectStorage: shutil.copyfileobj(_HashingReader(stream, checksum), target) new_size = tmp_path.stat().st_size - + size_delta = new_size - existing_size + object_delta = 0 if is_overwrite else 1 + if enforce_quota: - size_delta = new_size - existing_size - object_delta = 0 if is_overwrite else 1 - quota_check = self.check_quota( bucket_name, additional_bytes=max(0, size_delta), @@ -432,7 +485,7 @@ class ObjectStorage: quota_check["quota"], quota_check["usage"], ) - + shutil.move(str(tmp_path), str(destination)) finally: @@ -448,7 +501,13 @@ class ObjectStorage: combined_meta = {**internal_meta, **(metadata or {})} self._write_metadata(bucket_id, safe_key, combined_meta) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=size_delta, + objects_delta=object_delta, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) obj_meta = ObjectMeta( key=safe_key.as_posix(), @@ -498,15 +557,24 @@ class ObjectStorage: path = self._object_path(bucket_name, object_key) if not path.exists(): return + deleted_size = path.stat().st_size safe_key = path.relative_to(bucket_path) bucket_id = bucket_path.name + archived_version_size = 0 if self._is_versioning_enabled(bucket_path): + archived_version_size = deleted_size self._archive_current_version(bucket_id, safe_key, reason="delete") rel = path.relative_to(bucket_path) self._safe_unlink(path) self._delete_metadata(bucket_id, rel) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=-deleted_size, + objects_delta=-1, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None) self._cleanup_empty_parents(path, bucket_path) @@ -828,7 +896,12 @@ class ObjectStorage: if not isinstance(metadata, dict): metadata = {} destination = bucket_path / safe_key - if self._is_versioning_enabled(bucket_path) and destination.exists(): + restored_size = data_path.stat().st_size + is_overwrite = destination.exists() + existing_size = destination.stat().st_size if is_overwrite else 0 + archived_version_size = 0 + if self._is_versioning_enabled(bucket_path) and is_overwrite: + archived_version_size = existing_size self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite") destination.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(data_path, destination) @@ -837,7 +910,13 @@ class ObjectStorage: else: self._delete_metadata(bucket_id, safe_key) stat = destination.stat() - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=restored_size - existing_size, + objects_delta=0 if is_overwrite else 1, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) return ObjectMeta( key=safe_key.as_posix(), size=stat.st_size, @@ -861,6 +940,7 @@ class ObjectStorage: meta_path = legacy_version_dir / f"{version_id}.json" if not data_path.exists() and not meta_path.exists(): raise StorageError(f"Version {version_id} not found") + deleted_version_size = data_path.stat().st_size if data_path.exists() else 0 if data_path.exists(): data_path.unlink() if meta_path.exists(): @@ -868,6 +948,12 @@ class ObjectStorage: parent = data_path.parent if parent.exists() and not any(parent.iterdir()): parent.rmdir() + if deleted_version_size > 0: + self._update_bucket_stats_cache( + bucket_id, + version_bytes_delta=-deleted_version_size, + version_count_delta=-1, + ) def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]: bucket_path = self._bucket_path(bucket_name) @@ -1164,14 +1250,14 @@ class ObjectStorage: safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes) destination = bucket_path / safe_key - + is_overwrite = destination.exists() existing_size = destination.stat().st_size if is_overwrite else 0 - + size_delta = total_size - existing_size + object_delta = 0 if is_overwrite else 1 + versioning_enabled = self._is_versioning_enabled(bucket_path) + if enforce_quota: - size_delta = total_size - existing_size - object_delta = 0 if is_overwrite else 1 - quota_check = self.check_quota( bucket_name, additional_bytes=max(0, size_delta), @@ -1183,14 +1269,16 @@ class ObjectStorage: quota_check["quota"], quota_check["usage"], ) - + destination.parent.mkdir(parents=True, exist_ok=True) lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock" + archived_version_size = 0 try: with _atomic_lock_file(lock_file_path): - if self._is_versioning_enabled(bucket_path) and destination.exists(): + if versioning_enabled and destination.exists(): + archived_version_size = destination.stat().st_size self._archive_current_version(bucket_id, safe_key, reason="overwrite") checksum = hashlib.md5() with destination.open("wb") as target: @@ -1210,7 +1298,13 @@ class ObjectStorage: shutil.rmtree(upload_root, ignore_errors=True) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=size_delta, + objects_delta=object_delta, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) stat = destination.stat() etag = checksum.hexdigest() @@ -1586,6 +1680,8 @@ class ObjectStorage: objects.pop(key, None) else: objects[key] = meta + self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 + self._sorted_key_cache.pop(bucket_id, None) def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None: """Pre-warm the object cache for specified buckets or all buckets. diff --git a/app/ui.py b/app/ui.py index 334ba89..1aec8ce 100644 --- a/app/ui.py +++ b/app/ui.py @@ -220,13 +220,16 @@ def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]: def _current_principal(): - creds = session.get("credentials") + token = session.get("cred_token") + creds = _secret_store().peek(token) if token else None if not creds: return None try: return _iam().authenticate(creds["access_key"], creds["secret_key"]) except IamError: - session.pop("credentials", None) + session.pop("cred_token", None) + if token: + _secret_store().pop(token) return None @@ -251,7 +254,8 @@ def _authorize_ui(principal, bucket_name: str | None, action: str, *, object_key def _api_headers() -> dict[str, str]: - creds = session.get("credentials") or {} + token = session.get("cred_token") + creds = _secret_store().peek(token) or {} return { "X-Access-Key": creds.get("access_key", ""), "X-Secret-Key": creds.get("secret_key", ""), @@ -296,7 +300,9 @@ def login(): except IamError as exc: flash(_friendly_error_message(exc), "danger") return render_template("login.html") - session["credentials"] = {"access_key": access_key, "secret_key": secret_key} + creds = {"access_key": access_key, "secret_key": secret_key} + token = _secret_store().remember(creds, ttl=3600) + session["cred_token"] = token session.permanent = True flash(f"Welcome back, {principal.display_name}", "success") return redirect(url_for("ui.buckets_overview")) @@ -305,7 +311,9 @@ def login(): @ui_bp.post("/logout") def logout(): - session.pop("credentials", None) + token = session.pop("cred_token", None) + if token: + _secret_store().pop(token) flash("Signed out", "info") return redirect(url_for("ui.login")) @@ -542,7 +550,10 @@ def list_bucket_objects(bucket_name: str): except IamError as exc: return jsonify({"error": str(exc)}), 403 - max_keys = min(int(request.args.get("max_keys", 1000)), 100000) + try: + max_keys = min(int(request.args.get("max_keys", 1000)), 100000) + except ValueError: + return jsonify({"error": "max_keys must be an integer"}), 400 continuation_token = request.args.get("continuation_token") or None prefix = request.args.get("prefix") or None diff --git a/app/version.py b/app/version.py index be22c33..e4ed325 100644 --- a/app/version.py +++ b/app/version.py @@ -1,6 +1,6 @@ from __future__ import annotations -APP_VERSION = "0.2.5" +APP_VERSION = "0.2.6" def get_version() -> str: diff --git a/docs.md b/docs.md index e582c76..d4917d0 100644 --- a/docs.md +++ b/docs.md @@ -619,13 +619,15 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th ### Getting Started -1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access. -2. Sign into the UI using those credentials, then open **IAM**: +1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext. +2. Sign into the UI using the generated credentials, then open **IAM**: - **Create user**: supply a display name and optional JSON inline policy array. - **Rotate secret**: generates a new secret key; the UI surfaces it once. - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). 3. Wildcard action `iam:*` is supported for admin user definitions. +> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials. + ### Authentication The API expects every request to include authentication headers. The UI persists them in the Flask session after login. diff --git a/templates/docs.html b/templates/docs.html index 2f713d7..66b9baf 100644 --- a/templates/docs.html +++ b/templates/docs.html @@ -451,10 +451,10 @@ sudo journalctl -u myfsio -f # View logs 03

Authenticate & manage IAM

-

MyFSIO seeds data/.myfsio.sys/config/iam.json with localadmin/localadmin. Sign in once, rotate it, then grant least-privilege access to teammates and tools.

+

On first startup, MyFSIO generates random admin credentials and prints them to the console. Missed it? Check data/.myfsio.sys/config/iam.json directly—credentials are stored in plaintext.

    -
  1. Visit /ui/login, enter the bootstrap credentials, and rotate them immediately from the IAM page.
  2. +
  3. Check the console output (or iam.json) for the generated Access Key and Secret Key, then visit /ui/login.
  4. Create additional users with descriptive display names and AWS-style inline policies (for example {"bucket": "*", "actions": ["list", "read"]}).
  5. Rotate secrets when sharing with CI jobs—new secrets display once and persist to data/.myfsio.sys/config/iam.json.
  6. Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.
  7. @@ -2136,8 +2136,8 @@ curl -X PUT "{{ api_base }}/<bucket>?tagging" \ {{ api_base }}
-
Sample user
- localadmin / localadmin +
Initial credentials
+ Generated on first run (check console)
Logs
diff --git a/templates/metrics.html b/templates/metrics.html index 6425531..c77372a 100644 --- a/templates/metrics.html +++ b/templates/metrics.html @@ -398,6 +398,14 @@ +
@@ -817,8 +825,8 @@ var diskChart = null; var historyStatus = document.getElementById('historyStatus'); var timeRangeSelect = document.getElementById('historyTimeRange'); + var maxDataPointsSelect = document.getElementById('maxDataPoints'); var historyTimer = null; - var MAX_DATA_POINTS = 500; function createChart(ctx, label, color) { return new Chart(ctx, { @@ -889,7 +897,8 @@ if (historyStatus) historyStatus.textContent = 'No history data available yet. Data is recorded every ' + (data.interval_minutes || 5) + ' minutes.'; return; } - var history = data.history.slice(-MAX_DATA_POINTS); + var maxPoints = maxDataPointsSelect ? parseInt(maxDataPointsSelect.value, 10) : 500; + var history = maxPoints > 0 ? data.history.slice(-maxPoints) : data.history; var labels = history.map(function(h) { return formatTime(h.timestamp); }); var cpuData = history.map(function(h) { return h.cpu_percent; }); var memData = history.map(function(h) { return h.memory_percent; }); @@ -927,6 +936,10 @@ timeRangeSelect.addEventListener('change', loadHistory); } + if (maxDataPointsSelect) { + maxDataPointsSelect.addEventListener('change', loadHistory); + } + document.addEventListener('visibilitychange', function() { if (document.hidden) { if (historyTimer) clearInterval(historyTimer); From ebe7f6222dd889786d759397dd9ed260ca7e4556 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 19:08:18 +0800 Subject: [PATCH 02/14] Fix hardcoded secret key ttl session --- app/ui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/ui.py b/app/ui.py index 1aec8ce..9df2131 100644 --- a/app/ui.py +++ b/app/ui.py @@ -301,7 +301,8 @@ def login(): flash(_friendly_error_message(exc), "danger") return render_template("login.html") creds = {"access_key": access_key, "secret_key": secret_key} - token = _secret_store().remember(creds, ttl=3600) + ttl = int(current_app.permanent_session_lifetime.total_seconds()) + token = _secret_store().remember(creds, ttl=ttl) session["cred_token"] = token session.permanent = True flash(f"Welcome back, {principal.display_name}", "success") From 4a60cb269a878bc952ab7dd1c11ca79f9d48dea8 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 19:11:00 +0800 Subject: [PATCH 03/14] Update python version in Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 74c6efb..d236349 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1.7 -FROM python:3.12.12-slim +FROM python:3.14-slim ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 From cbdf1a27c8bbd784985bff3599c68b39939b46c9 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 19:11:42 +0800 Subject: [PATCH 04/14] Pin dockerfile python version to 3.14.3 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d236349..489c5fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1.7 -FROM python:3.14-slim +FROM python:3.14.3-slim ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 From e76c311231566c6f25ab399d17bc5db2974e306b Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 19:21:18 +0800 Subject: [PATCH 05/14] Update install/uninstall scripts with new config options and credential capture --- Dockerfile | 4 -- scripts/install.sh | 121 +++++++++++++++++++++++++++++++++++++------ scripts/uninstall.sh | 14 +++-- 3 files changed, 115 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index 489c5fe..184f240 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -# syntax=docker/dockerfile:1.7 FROM python:3.14.3-slim ENV PYTHONDONTWRITEBYTECODE=1 \ @@ -6,7 +5,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app -# Install build deps for any wheels that need compilation, then clean up RUN apt-get update \ && apt-get install -y --no-install-recommends build-essential \ && rm -rf /var/lib/apt/lists/* @@ -16,10 +14,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . -# Make entrypoint executable RUN chmod +x docker-entrypoint.sh -# Create data directory and set permissions RUN mkdir -p /app/data \ && useradd -m -u 1000 myfsio \ && chown -R myfsio:myfsio /app diff --git a/scripts/install.sh b/scripts/install.sh index b8b09dc..8fdad2b 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -192,31 +192,86 @@ cat > "$INSTALL_DIR/myfsio.env" << EOF # Generated by install.sh on $(date) # Documentation: https://go.jzwsite.com/myfsio -# Storage paths +# ============================================================================= +# STORAGE PATHS +# ============================================================================= STORAGE_ROOT=$DATA_DIR LOG_DIR=$LOG_DIR -# Network +# ============================================================================= +# NETWORK +# ============================================================================= APP_HOST=0.0.0.0 APP_PORT=$API_PORT -# Security - CHANGE IN PRODUCTION -SECRET_KEY=$SECRET_KEY -CORS_ORIGINS=* - -# Public URL (set this if behind a reverse proxy) +# Public URL (set this if behind a reverse proxy for presigned URLs) $(if [[ -n "$API_URL" ]]; then echo "API_BASE_URL=$API_URL"; else echo "# API_BASE_URL=https://s3.example.com"; fi) -# Logging +# ============================================================================= +# SECURITY +# ============================================================================= +# Secret key for session signing (auto-generated if not set) +SECRET_KEY=$SECRET_KEY + +# CORS settings - restrict in production +CORS_ORIGINS=* + +# Brute-force protection +AUTH_MAX_ATTEMPTS=5 +AUTH_LOCKOUT_MINUTES=15 + +# Reverse proxy settings (set to number of trusted proxies in front) +# NUM_TRUSTED_PROXIES=1 + +# Allow internal admin endpoints (only enable on trusted networks) +# ALLOW_INTERNAL_ENDPOINTS=false + +# Allowed hosts for redirects (comma-separated, empty = restrict all) +# ALLOWED_REDIRECT_HOSTS= + +# ============================================================================= +# LOGGING +# ============================================================================= LOG_LEVEL=INFO LOG_TO_FILE=true -# Rate limiting +# ============================================================================= +# RATE LIMITING +# ============================================================================= RATE_LIMIT_DEFAULT=200 per minute +# RATE_LIMIT_LIST_BUCKETS=60 per minute +# RATE_LIMIT_BUCKET_OPS=120 per minute +# RATE_LIMIT_OBJECT_OPS=240 per minute +# RATE_LIMIT_ADMIN=60 per minute -# Optional: Encryption (uncomment to enable) +# ============================================================================= +# SERVER TUNING (0 = auto-detect based on system resources) +# ============================================================================= +# SERVER_THREADS=0 +# SERVER_CONNECTION_LIMIT=0 +# SERVER_BACKLOG=0 +# SERVER_CHANNEL_TIMEOUT=120 + +# ============================================================================= +# ENCRYPTION (uncomment to enable) +# ============================================================================= # ENCRYPTION_ENABLED=true # KMS_ENABLED=true + +# ============================================================================= +# SITE SYNC / REPLICATION (for multi-site deployments) +# ============================================================================= +# SITE_ID=site-1 +# SITE_ENDPOINT=https://s3-site1.example.com +# SITE_REGION=us-east-1 +# SITE_SYNC_ENABLED=false + +# ============================================================================= +# OPTIONAL FEATURES +# ============================================================================= +# LIFECYCLE_ENABLED=false +# METRICS_HISTORY_ENABLED=false +# OPERATION_METRICS_ENABLED=false EOF chmod 600 "$INSTALL_DIR/myfsio.env" echo " [OK] Created $INSTALL_DIR/myfsio.env" @@ -308,7 +363,7 @@ if [[ "$SKIP_SYSTEMD" != true ]]; then systemctl start myfsio echo " [OK] Service started" echo "" - + read -p "Would you like to enable MyFSIO to start on boot? [Y/n] " -n 1 -r echo if [[ ! $REPLY =~ ^[Nn]$ ]]; then @@ -316,12 +371,37 @@ if [[ "$SKIP_SYSTEMD" != true ]]; then echo " [OK] Service enabled on boot" fi echo "" - - sleep 2 + + echo " Waiting for service initialization..." + sleep 3 + echo " Service Status:" echo " ---------------" if systemctl is-active --quiet myfsio; then echo " [OK] MyFSIO is running" + + IAM_FILE="$DATA_DIR/.myfsio.sys/config/iam.json" + if [[ -f "$IAM_FILE" ]]; then + echo "" + echo " ============================================" + echo " ADMIN CREDENTIALS (save these securely!)" + echo " ============================================" + if command -v jq &>/dev/null; then + ACCESS_KEY=$(jq -r '.users[0].access_key' "$IAM_FILE" 2>/dev/null) + SECRET_KEY=$(jq -r '.users[0].secret_key' "$IAM_FILE" 2>/dev/null) + else + ACCESS_KEY=$(grep -o '"access_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$IAM_FILE" | head -1 | sed 's/.*"\([^"]*\)"$/\1/') + SECRET_KEY=$(grep -o '"secret_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$IAM_FILE" | head -1 | sed 's/.*"\([^"]*\)"$/\1/') + fi + if [[ -n "$ACCESS_KEY" && -n "$SECRET_KEY" ]]; then + echo " Access Key: $ACCESS_KEY" + echo " Secret Key: $SECRET_KEY" + else + echo " [!] Could not parse credentials from $IAM_FILE" + echo " Check the file manually or view service logs." + fi + echo " ============================================" + fi else echo " [WARNING] MyFSIO may not have started correctly" echo " Check logs with: journalctl -u myfsio -f" @@ -346,19 +426,26 @@ echo "Access Points:" echo " API: http://$(hostname -I 2>/dev/null | awk '{print $1}' || echo "localhost"):$API_PORT" echo " UI: http://$(hostname -I 2>/dev/null | awk '{print $1}' || echo "localhost"):$UI_PORT/ui" echo "" -echo "Default Credentials:" -echo " Username: localadmin" -echo " Password: localadmin" -echo " [!] WARNING: Change these immediately after first login!" +echo "Credentials:" +echo " Admin credentials were shown above (if service was started)." +echo " You can also find them in: $DATA_DIR/.myfsio.sys/config/iam.json" echo "" echo "Configuration Files:" echo " Environment: $INSTALL_DIR/myfsio.env" echo " IAM Users: $DATA_DIR/.myfsio.sys/config/iam.json" echo " Bucket Policies: $DATA_DIR/.myfsio.sys/config/bucket_policies.json" +echo " Secret Key: $DATA_DIR/.myfsio.sys/config/.secret (auto-generated)" +echo "" +echo "Security Notes:" +echo " - Rate limiting is enabled by default (200 req/min)" +echo " - Brute-force protection: 5 attempts, 15 min lockout" +echo " - Set CORS_ORIGINS to specific domains in production" +echo " - Set NUM_TRUSTED_PROXIES if behind a reverse proxy" echo "" echo "Useful Commands:" echo " Check status: sudo systemctl status myfsio" echo " View logs: sudo journalctl -u myfsio -f" +echo " Validate config: $INSTALL_DIR/myfsio --check-config" echo " Restart: sudo systemctl restart myfsio" echo " Stop: sudo systemctl stop myfsio" echo "" diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index 49befeb..a920eb2 100644 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -88,7 +88,8 @@ echo "The following items will be removed:" echo "" echo " Install directory: $INSTALL_DIR" if [[ "$KEEP_DATA" != true ]]; then - echo " Data directory: $DATA_DIR (ALL YOUR DATA WILL BE DELETED!)" + echo " Data directory: $DATA_DIR" + echo " [!] ALL DATA, IAM USERS, AND ENCRYPTION KEYS WILL BE DELETED!" else echo " Data directory: $DATA_DIR (WILL BE KEPT)" fi @@ -227,8 +228,15 @@ echo "" if [[ "$KEEP_DATA" == true ]]; then echo "Your data has been preserved at: $DATA_DIR" echo "" - echo "To reinstall MyFSIO with existing data, run:" - echo " curl -fsSL https://go.jzwsite.com/myfsio-install | sudo bash" + echo "Preserved files include:" + echo " - All buckets and objects" + echo " - IAM configuration: $DATA_DIR/.myfsio.sys/config/iam.json" + echo " - Bucket policies: $DATA_DIR/.myfsio.sys/config/bucket_policies.json" + echo " - Secret key: $DATA_DIR/.myfsio.sys/config/.secret" + echo " - Encryption keys: $DATA_DIR/.myfsio.sys/keys/ (if encryption was enabled)" + echo "" + echo "To reinstall MyFSIO with existing data:" + echo " ./install.sh --data-dir $DATA_DIR" echo "" fi From 033b8a82be65f68f5c3b759a95b7905b9e68b795 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 20:44:11 +0800 Subject: [PATCH 06/14] Fix error handlers for API mode; distinguish files from directories in object lookup; Fix UI not showing newly uploaded objects by adding Cache-Control headers --- app/__init__.py | 30 ++++++++++++++++++++++++++++-- app/s3_api.py | 28 +++++++++++++++++++++------- app/storage.py | 2 +- app/ui.py | 4 +++- 4 files changed, 53 insertions(+), 11 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index ef13ad4..636dc7c 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -263,11 +263,37 @@ def create_app( @app.errorhandler(500) def internal_error(error): - return render_template('500.html'), 500 + wants_html = request.accept_mimetypes.accept_html + path = request.path or "" + if include_ui and wants_html and (path.startswith("/ui") or path == "/"): + return render_template('500.html'), 500 + error_xml = ( + '' + '' + 'InternalError' + 'An internal server error occurred' + f'{path}' + f'{getattr(g, "request_id", "-")}' + '' + ) + return error_xml, 500, {'Content-Type': 'application/xml'} @app.errorhandler(CSRFError) def handle_csrf_error(e): - return render_template('csrf_error.html', reason=e.description), 400 + wants_html = request.accept_mimetypes.accept_html + path = request.path or "" + if include_ui and wants_html and (path.startswith("/ui") or path == "/"): + return render_template('csrf_error.html', reason=e.description), 400 + error_xml = ( + '' + '' + 'CSRFError' + f'{e.description}' + f'{path}' + f'{getattr(g, "request_id", "-")}' + '' + ) + return error_xml, 400, {'Content-Type': 'application/xml'} @app.template_filter("filesizeformat") def filesizeformat(value: int) -> str: diff --git a/app/s3_api.py b/app/s3_api.py index f2cfa0b..12a72d1 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -2776,9 +2776,14 @@ def object_handler(bucket_name: str, object_key: str): except StorageError as exc: return _error_response("InternalError", str(exc), 500) else: - stat = path.stat() - file_size = stat.st_size - etag = storage._compute_etag(path) + try: + stat = path.stat() + file_size = stat.st_size + etag = storage._compute_etag(path) + except PermissionError: + return _error_response("AccessDenied", "Permission denied accessing object", 403) + except OSError as exc: + return _error_response("InternalError", f"Failed to access object: {exc}", 500) if range_header: try: @@ -2819,13 +2824,22 @@ def object_handler(bucket_name: str, object_key: str): except StorageError as exc: return _error_response("InternalError", str(exc), 500) else: - stat = path.stat() - response = Response(status=200) - etag = storage._compute_etag(path) + try: + stat = path.stat() + response = Response(status=200) + etag = storage._compute_etag(path) + except PermissionError: + return _error_response("AccessDenied", "Permission denied accessing object", 403) + except OSError as exc: + return _error_response("InternalError", f"Failed to access object: {exc}", 500) response.headers["Content-Type"] = mimetype logged_bytes = 0 - _apply_object_headers(response, file_stat=path.stat() if not is_encrypted else None, metadata=metadata, etag=etag) + try: + file_stat = path.stat() if not is_encrypted else None + except (PermissionError, OSError): + file_stat = None + _apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag) if request.method == "GET": response_overrides = { diff --git a/app/storage.py b/app/storage.py index 22391be..f102949 100644 --- a/app/storage.py +++ b/app/storage.py @@ -522,7 +522,7 @@ class ObjectStorage: def get_object_path(self, bucket_name: str, object_key: str) -> Path: path = self._object_path(bucket_name, object_key) - if not path.exists(): + if not path.is_file(): raise ObjectNotFoundError("Object not found") return path diff --git a/app/ui.py b/app/ui.py index 9df2131..8287259 100644 --- a/app/ui.py +++ b/app/ui.py @@ -594,7 +594,7 @@ def list_bucket_objects(bucket_name: str): "etag": obj.etag, }) - return jsonify({ + response = jsonify({ "objects": objects_data, "is_truncated": result.is_truncated, "next_continuation_token": result.next_continuation_token, @@ -613,6 +613,8 @@ def list_bucket_objects(bucket_name: str): "metadata": metadata_template, }, }) + response.headers["Cache-Control"] = "no-store" + return response @ui_bp.get("/buckets//objects/stream") From e9a035827bb5c284e44421464b0baa8d9aca3469 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 20:56:42 +0800 Subject: [PATCH 07/14] Add _touch_cache_marker for UI object delay count issue --- app/storage.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/app/storage.py b/app/storage.py index f102949..992a25c 100644 --- a/app/storage.py +++ b/app/storage.py @@ -1617,14 +1617,16 @@ class ObjectStorage: Uses LRU eviction to prevent unbounded cache growth. Thread-safe with per-bucket locks to reduce contention. + Also checks file-based marker for cross-process cache invalidation. """ now = time.time() + marker_mtime = self._get_cache_marker_mtime(bucket_id) with self._cache_lock: cached = self._object_cache.get(bucket_id) if cached: objects, timestamp = cached - if now - timestamp < self._cache_ttl: + if now - timestamp < self._cache_ttl and marker_mtime <= timestamp: self._object_cache.move_to_end(bucket_id) return objects cache_version = self._cache_version.get(bucket_id, 0) @@ -1635,7 +1637,7 @@ class ObjectStorage: cached = self._object_cache.get(bucket_id) if cached: objects, timestamp = cached - if now - timestamp < self._cache_ttl: + if now - timestamp < self._cache_ttl and marker_mtime <= timestamp: self._object_cache.move_to_end(bucket_id) return objects objects = self._build_object_cache(bucket_path) @@ -1656,21 +1658,42 @@ class ObjectStorage: """Invalidate the object cache and etag index for a bucket. Increments version counter to signal stale reads. + Also touches marker file for cross-process invalidation. """ with self._cache_lock: self._object_cache.pop(bucket_id, None) self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 + self._touch_cache_marker(bucket_id) + etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" try: etag_index_path.unlink(missing_ok=True) except OSError: pass + def _touch_cache_marker(self, bucket_id: str) -> None: + """Touch the cache marker file to signal other processes that cache is stale.""" + marker_path = self._system_bucket_root(bucket_id) / ".cache_marker" + try: + marker_path.parent.mkdir(parents=True, exist_ok=True) + marker_path.write_text(str(time.time()), encoding="utf-8") + except OSError: + pass + + def _get_cache_marker_mtime(self, bucket_id: str) -> float: + """Get the mtime of the cache marker file, or 0 if it doesn't exist.""" + marker_path = self._system_bucket_root(bucket_id) / ".cache_marker" + try: + return marker_path.stat().st_mtime + except OSError: + return 0 + def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None: """Update a single entry in the object cache instead of invalidating the whole cache. This is a performance optimization - lazy update instead of full invalidation. + Also touches a marker file to signal cache invalidation to other processes. """ with self._cache_lock: cached = self._object_cache.get(bucket_id) @@ -1682,6 +1705,7 @@ class ObjectStorage: objects[key] = meta self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 self._sorted_key_cache.pop(bucket_id, None) + self._touch_cache_marker(bucket_id) def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None: """Pre-warm the object cache for specified buckets or all buckets. From 2643a79121b58eaacecd09d21f2fe70fbaa41c75 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 21:08:18 +0800 Subject: [PATCH 08/14] Debug object browser object count delay --- app/storage.py | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/app/storage.py b/app/storage.py index 992a25c..716bfdd 100644 --- a/app/storage.py +++ b/app/storage.py @@ -322,19 +322,23 @@ class ObjectStorage: """Incrementally update cached bucket statistics instead of invalidating. This avoids expensive full directory scans on every PUT/DELETE by - adjusting the cached values directly. + adjusting the cached values directly. Also signals cross-process cache + invalidation by updating the file mtime. """ cache_path = self._system_bucket_root(bucket_id) / "stats.json" try: + cache_path.parent.mkdir(parents=True, exist_ok=True) if cache_path.exists(): data = json.loads(cache_path.read_text(encoding="utf-8")) - data["objects"] = max(0, data.get("objects", 0) + objects_delta) - data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta) - data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta) - data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta) - data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta) - data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta) - cache_path.write_text(json.dumps(data), encoding="utf-8") + else: + data = {"objects": 0, "bytes": 0, "version_count": 0, "version_bytes": 0, "total_objects": 0, "total_bytes": 0} + data["objects"] = max(0, data.get("objects", 0) + objects_delta) + data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta) + data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta) + data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta) + data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta) + data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta) + cache_path.write_text(json.dumps(data), encoding="utf-8") except (OSError, json.JSONDecodeError): pass @@ -1658,34 +1662,27 @@ class ObjectStorage: """Invalidate the object cache and etag index for a bucket. Increments version counter to signal stale reads. - Also touches marker file for cross-process invalidation. + Cross-process invalidation is handled by checking stats.json mtime. """ with self._cache_lock: self._object_cache.pop(bucket_id, None) self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 - self._touch_cache_marker(bucket_id) - etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json" try: etag_index_path.unlink(missing_ok=True) except OSError: pass - def _touch_cache_marker(self, bucket_id: str) -> None: - """Touch the cache marker file to signal other processes that cache is stale.""" - marker_path = self._system_bucket_root(bucket_id) / ".cache_marker" - try: - marker_path.parent.mkdir(parents=True, exist_ok=True) - marker_path.write_text(str(time.time()), encoding="utf-8") - except OSError: - pass - def _get_cache_marker_mtime(self, bucket_id: str) -> float: - """Get the mtime of the cache marker file, or 0 if it doesn't exist.""" - marker_path = self._system_bucket_root(bucket_id) / ".cache_marker" + """Get the mtime of stats.json for cross-process cache invalidation. + + Uses stats.json because it's already updated on every object change + via _update_bucket_stats_cache. + """ + stats_path = self._system_bucket_root(bucket_id) / "stats.json" try: - return marker_path.stat().st_mtime + return stats_path.stat().st_mtime except OSError: return 0 @@ -1693,7 +1690,7 @@ class ObjectStorage: """Update a single entry in the object cache instead of invalidating the whole cache. This is a performance optimization - lazy update instead of full invalidation. - Also touches a marker file to signal cache invalidation to other processes. + Cross-process invalidation is handled by checking stats.json mtime. """ with self._cache_lock: cached = self._object_cache.get(bucket_id) @@ -1705,7 +1702,6 @@ class ObjectStorage: objects[key] = meta self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 self._sorted_key_cache.pop(bucket_id, None) - self._touch_cache_marker(bucket_id) def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None: """Pre-warm the object cache for specified buckets or all buckets. From 147962e1dd43a67857b72a1f3da28006128e2855 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 21:18:35 +0800 Subject: [PATCH 09/14] Further debugging of object browser object count delay --- app/storage.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/app/storage.py b/app/storage.py index 716bfdd..41e307d 100644 --- a/app/storage.py +++ b/app/storage.py @@ -177,7 +177,7 @@ class ObjectStorage: self.root = Path(root) self.root.mkdir(parents=True, exist_ok=True) self._ensure_system_roots() - self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict() + self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict() self._cache_lock = threading.Lock() self._bucket_locks: Dict[str, threading.Lock] = {} self._cache_version: Dict[str, int] = {} @@ -1621,39 +1621,43 @@ class ObjectStorage: Uses LRU eviction to prevent unbounded cache growth. Thread-safe with per-bucket locks to reduce contention. - Also checks file-based marker for cross-process cache invalidation. + Checks stats.json mtime for cross-process cache invalidation. """ now = time.time() - marker_mtime = self._get_cache_marker_mtime(bucket_id) + current_stats_mtime = self._get_cache_marker_mtime(bucket_id) with self._cache_lock: cached = self._object_cache.get(bucket_id) if cached: - objects, timestamp = cached - if now - timestamp < self._cache_ttl and marker_mtime <= timestamp: + objects, timestamp, cached_stats_mtime = cached + if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime: self._object_cache.move_to_end(bucket_id) return objects cache_version = self._cache_version.get(bucket_id, 0) bucket_lock = self._get_bucket_lock(bucket_id) with bucket_lock: + current_stats_mtime = self._get_cache_marker_mtime(bucket_id) with self._cache_lock: cached = self._object_cache.get(bucket_id) if cached: - objects, timestamp = cached - if now - timestamp < self._cache_ttl and marker_mtime <= timestamp: + objects, timestamp, cached_stats_mtime = cached + if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime: self._object_cache.move_to_end(bucket_id) return objects + objects = self._build_object_cache(bucket_path) + new_stats_mtime = self._get_cache_marker_mtime(bucket_id) with self._cache_lock: current_version = self._cache_version.get(bucket_id, 0) if current_version != cache_version: objects = self._build_object_cache(bucket_path) + new_stats_mtime = self._get_cache_marker_mtime(bucket_id) while len(self._object_cache) >= self._object_cache_max_size: self._object_cache.popitem(last=False) - self._object_cache[bucket_id] = (objects, time.time()) + self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime) self._object_cache.move_to_end(bucket_id) return objects @@ -1695,7 +1699,7 @@ class ObjectStorage: with self._cache_lock: cached = self._object_cache.get(bucket_id) if cached: - objects, timestamp = cached + objects, timestamp, stats_mtime = cached if meta is None: objects.pop(key, None) else: From 07fb1ac7731d62112e3c6f6d460bd8f9b75bd011 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 21:32:40 +0800 Subject: [PATCH 10/14] Fix cross-process cache invalidation on Windows using version counter instead of mtime --- app/storage.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/app/storage.py b/app/storage.py index 41e307d..3385b8e 100644 --- a/app/storage.py +++ b/app/storage.py @@ -285,6 +285,10 @@ class ObjectStorage: return cached_stats raise + existing_serial = 0 + if cached_stats is not None: + existing_serial = cached_stats.get("_cache_serial", 0) + stats = { "objects": object_count, "bytes": total_bytes, @@ -292,6 +296,7 @@ class ObjectStorage: "version_bytes": version_bytes, "total_objects": object_count + version_count, "total_bytes": total_bytes + version_bytes, + "_cache_serial": existing_serial, } try: @@ -323,7 +328,7 @@ class ObjectStorage: This avoids expensive full directory scans on every PUT/DELETE by adjusting the cached values directly. Also signals cross-process cache - invalidation by updating the file mtime. + invalidation by incrementing _cache_serial. """ cache_path = self._system_bucket_root(bucket_id) / "stats.json" try: @@ -331,13 +336,14 @@ class ObjectStorage: if cache_path.exists(): data = json.loads(cache_path.read_text(encoding="utf-8")) else: - data = {"objects": 0, "bytes": 0, "version_count": 0, "version_bytes": 0, "total_objects": 0, "total_bytes": 0} + data = {"objects": 0, "bytes": 0, "version_count": 0, "version_bytes": 0, "total_objects": 0, "total_bytes": 0, "_cache_serial": 0} data["objects"] = max(0, data.get("objects", 0) + objects_delta) data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta) data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta) data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta) data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta) data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta) + data["_cache_serial"] = data.get("_cache_serial", 0) + 1 cache_path.write_text(json.dumps(data), encoding="utf-8") except (OSError, json.JSONDecodeError): pass @@ -1679,15 +1685,16 @@ class ObjectStorage: pass def _get_cache_marker_mtime(self, bucket_id: str) -> float: - """Get the mtime of stats.json for cross-process cache invalidation. + """Get the cache serial from stats.json for cross-process cache invalidation. - Uses stats.json because it's already updated on every object change - via _update_bucket_stats_cache. + Uses _cache_serial field instead of file mtime because Windows filesystem + caching can delay mtime visibility across processes. """ stats_path = self._system_bucket_root(bucket_id) / "stats.json" try: - return stats_path.stat().st_mtime - except OSError: + data = json.loads(stats_path.read_text(encoding="utf-8")) + return float(data.get("_cache_serial", 0)) + except (OSError, json.JSONDecodeError): return 0 def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None: From 126657c99f483795b046ba5dc8333e267782cf07 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 21:45:02 +0800 Subject: [PATCH 11/14] Further debugging of object browser object count delay --- app/storage.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/storage.py b/app/storage.py index 3385b8e..6839b48 100644 --- a/app/storage.py +++ b/app/storage.py @@ -1685,15 +1685,17 @@ class ObjectStorage: pass def _get_cache_marker_mtime(self, bucket_id: str) -> float: - """Get the cache serial from stats.json for cross-process cache invalidation. + """Get a cache marker combining serial and object count for cross-process invalidation. - Uses _cache_serial field instead of file mtime because Windows filesystem - caching can delay mtime visibility across processes. + Returns a combined value that changes if either _cache_serial or object count changes. + This handles cases where the serial was reset but object count differs. """ stats_path = self._system_bucket_root(bucket_id) / "stats.json" try: data = json.loads(stats_path.read_text(encoding="utf-8")) - return float(data.get("_cache_serial", 0)) + serial = data.get("_cache_serial", 0) + count = data.get("objects", 0) + return float(serial * 1000000 + count) except (OSError, json.JSONDecodeError): return 0 From e0dee9db363e622b78ed2b721aa970dfe6a4e4a5 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 22:22:59 +0800 Subject: [PATCH 12/14] Fix UI object browser not showing objects uploaded via S3 API --- app/storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/storage.py b/app/storage.py index 6839b48..ba13c09 100644 --- a/app/storage.py +++ b/app/storage.py @@ -1627,7 +1627,7 @@ class ObjectStorage: Uses LRU eviction to prevent unbounded cache growth. Thread-safe with per-bucket locks to reduce contention. - Checks stats.json mtime for cross-process cache invalidation. + Checks stats.json for cross-process cache invalidation. """ now = time.time() current_stats_mtime = self._get_cache_marker_mtime(bucket_id) @@ -1635,7 +1635,7 @@ class ObjectStorage: with self._cache_lock: cached = self._object_cache.get(bucket_id) if cached: - objects, timestamp, cached_stats_mtime = cached + objects, timestamp, cached_stats_mtime = cached if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime: self._object_cache.move_to_end(bucket_id) return objects @@ -1665,6 +1665,8 @@ class ObjectStorage: self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime) self._object_cache.move_to_end(bucket_id) + self._cache_version[bucket_id] = current_version + 1 + self._sorted_key_cache.pop(bucket_id, None) return objects From 0f750b9d899632897398e21daa8d347d94ff84f2 Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 22:56:00 +0800 Subject: [PATCH 13/14] Optimize object browser for large listings on slow networks --- app/compression.py | 13 ++++++++++-- app/ui.py | 1 + static/js/bucket-detail-main.js | 36 +++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/app/compression.py b/app/compression.py index a0bed7c..28e6899 100644 --- a/app/compression.py +++ b/app/compression.py @@ -36,10 +36,11 @@ class GzipMiddleware: content_type = None content_length = None should_compress = False + is_streaming = False exc_info_holder = [None] def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None): - nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress + nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, is_streaming response_started = True status_code = int(status.split(' ', 1)[0]) response_headers = list(headers) @@ -54,6 +55,9 @@ class GzipMiddleware: elif name_lower == 'content-encoding': should_compress = False return start_response(status, headers, exc_info) + elif name_lower == 'x-stream-response': + is_streaming = True + return start_response(status, headers, exc_info) if content_type and content_type in COMPRESSIBLE_MIMES: if content_length is None or content_length >= self.min_size: @@ -61,7 +65,12 @@ class GzipMiddleware: return None - response_body = b''.join(self.app(environ, custom_start_response)) + app_iter = self.app(environ, custom_start_response) + + if is_streaming: + return app_iter + + response_body = b''.join(app_iter) if not response_started: return [response_body] diff --git a/app/ui.py b/app/ui.py index 8287259..461d006 100644 --- a/app/ui.py +++ b/app/ui.py @@ -711,6 +711,7 @@ def stream_bucket_objects(bucket_name: str): headers={ 'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no', + 'X-Stream-Response': 'true', } ) diff --git a/static/js/bucket-detail-main.js b/static/js/bucket-detail-main.js index 97d047d..48dfbcd 100644 --- a/static/js/bucket-detail-main.js +++ b/static/js/bucket-detail-main.js @@ -182,6 +182,9 @@ let visibleItems = []; let renderedRange = { start: 0, end: 0 }; + let memoizedVisibleItems = null; + let memoizedInputs = { objectCount: -1, prefix: null, filterTerm: null }; + const createObjectRow = (obj, displayKey = null) => { const tr = document.createElement('tr'); tr.dataset.objectRow = ''; @@ -340,7 +343,21 @@ } }; - const computeVisibleItems = () => { + const computeVisibleItems = (forceRecompute = false) => { + const currentInputs = { + objectCount: allObjects.length, + prefix: currentPrefix, + filterTerm: currentFilterTerm + }; + + if (!forceRecompute && + memoizedVisibleItems !== null && + memoizedInputs.objectCount === currentInputs.objectCount && + memoizedInputs.prefix === currentInputs.prefix && + memoizedInputs.filterTerm === currentInputs.filterTerm) { + return memoizedVisibleItems; + } + const items = []; const folders = new Set(); @@ -381,6 +398,8 @@ return aKey.localeCompare(bKey); }); + memoizedVisibleItems = items; + memoizedInputs = currentInputs; return items; }; @@ -533,6 +552,8 @@ loadedObjectCount = 0; totalObjectCount = 0; allObjects = []; + memoizedVisibleItems = null; + memoizedInputs = { objectCount: -1, prefix: null, filterTerm: null }; pendingStreamObjects = []; streamAbortController = new AbortController(); @@ -643,6 +664,8 @@ loadedObjectCount = 0; totalObjectCount = 0; allObjects = []; + memoizedVisibleItems = null; + memoizedInputs = { objectCount: -1, prefix: null, filterTerm: null }; } if (append && loadMoreSpinner) { @@ -985,13 +1008,15 @@ }; const navigateToFolder = (prefix) => { + if (streamAbortController) { + streamAbortController.abort(); + streamAbortController = null; + } + currentPrefix = prefix; if (scrollContainer) scrollContainer.scrollTop = 0; - refreshVirtualList(); - renderBreadcrumb(prefix); - selectedRows.clear(); if (typeof updateBulkDeleteState === 'function') { @@ -1001,6 +1026,9 @@ if (previewPanel) previewPanel.classList.add('d-none'); if (previewEmpty) previewEmpty.classList.remove('d-none'); activeRow = null; + + isLoadingObjects = false; + loadObjects(false); }; const renderObjectsView = () => { From 77a46d072555c9f61c99f7c92d396ea4cb9c9b4e Mon Sep 17 00:00:00 2001 From: kqjy Date: Thu, 5 Feb 2026 23:49:36 +0800 Subject: [PATCH 14/14] Binary run fix --- run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/run.py b/run.py index 013f79c..ae120fe 100644 --- a/run.py +++ b/run.py @@ -5,6 +5,7 @@ import argparse import os import sys import warnings +import multiprocessing from multiprocessing import Process from pathlib import Path @@ -87,6 +88,10 @@ def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) if __name__ == "__main__": + multiprocessing.freeze_support() + if _is_frozen(): + multiprocessing.set_start_method("spawn", force=True) + parser = argparse.ArgumentParser(description="Run the S3 clone services.") parser.add_argument("--mode", choices=["api", "ui", "both"], default="both") parser.add_argument("--api-port", type=int, default=5000)