diff --git a/app/admin_api.py b/app/admin_api.py index 8ebc76f..a3d436d 100644 --- a/app/admin_api.py +++ b/app/admin_api.py @@ -1,6 +1,7 @@ from __future__ import annotations import ipaddress +import json import logging import re import socket @@ -354,6 +355,10 @@ def update_peer_site(site_id: str): if region_error: return _json_error("ValidationError", region_error, 400) + if "connection_id" in payload: + if payload["connection_id"] and not _connections().get(payload["connection_id"]): + return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400) + peer = PeerSite( site_id=site_id, endpoint=payload.get("endpoint", existing.endpoint), diff --git a/app/iam.py b/app/iam.py index 6ab4f38..4dd6b68 100644 --- a/app/iam.py +++ b/app/iam.py @@ -529,11 +529,13 @@ class IamService: return candidate if candidate in ALLOWED_ACTIONS else "" def _write_default(self) -> None: + access_key = secrets.token_hex(12) + secret_key = secrets.token_urlsafe(32) default = { "users": [ { - "access_key": "localadmin", - "secret_key": "localadmin", + "access_key": access_key, + "secret_key": secret_key, "display_name": "Local Admin", "policies": [ {"bucket": "*", "actions": list(ALLOWED_ACTIONS)} @@ -542,6 +544,14 @@ class IamService: ] } self.config_path.write_text(json.dumps(default, indent=2)) + print(f"\n{'='*60}") + print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED") + print(f"{'='*60}") + print(f"Access Key: {access_key}") + print(f"Secret Key: {secret_key}") + print(f"{'='*60}") + print(f"Missed this? Check: {self.config_path}") + print(f"{'='*60}\n") def _generate_access_key(self) -> str: return secrets.token_hex(8) diff --git a/app/s3_api.py b/app/s3_api.py index f5f599e..f2cfa0b 100644 --- a/app/s3_api.py +++ b/app/s3_api.py @@ -1004,7 +1004,8 @@ def _apply_object_headers( response.headers["ETag"] = f'"{etag}"' response.headers["Accept-Ranges"] = "bytes" for key, value in (metadata or {}).items(): - response.headers[f"X-Amz-Meta-{key}"] = value + safe_value = _sanitize_header_value(str(value)) + response.headers[f"X-Amz-Meta-{key}"] = safe_value def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None: @@ -2342,10 +2343,12 @@ def _post_object(bucket_name: str) -> Response: success_action_redirect = request.form.get("success_action_redirect") if success_action_redirect: allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", []) + if not allowed_hosts: + allowed_hosts = [request.host] parsed = urlparse(success_action_redirect) if parsed.scheme not in ("http", "https"): return _error_response("InvalidArgument", "Redirect URL must use http or https", 400) - if allowed_hosts and parsed.netloc not in allowed_hosts: + if parsed.netloc not in allowed_hosts: return _error_response("InvalidArgument", "Redirect URL host not allowed", 400) redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}" return Response(status=303, headers={"Location": redirect_url}) diff --git a/app/secret_store.py b/app/secret_store.py index 5c14f6e..903c2fa 100644 --- a/app/secret_store.py +++ b/app/secret_store.py @@ -18,6 +18,18 @@ class EphemeralSecretStore: self._store[token] = (payload, expires_at) return token + def peek(self, token: str | None) -> Any | None: + if not token: + return None + entry = self._store.get(token) + if not entry: + return None + payload, expires_at = entry + if expires_at < time.time(): + self._store.pop(token, None) + return None + return payload + def pop(self, token: str | None) -> Any | None: if not token: return None diff --git a/app/storage.py b/app/storage.py index 502a93c..22391be 100644 --- a/app/storage.py +++ b/app/storage.py @@ -186,6 +186,7 @@ class ObjectStorage: self._cache_ttl = cache_ttl self._object_cache_max_size = object_cache_max_size self._object_key_max_length_bytes = object_key_max_length_bytes + self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {} def _get_bucket_lock(self, bucket_id: str) -> threading.Lock: """Get or create a lock for a specific bucket. Reduces global lock contention.""" @@ -243,10 +244,15 @@ class ObjectStorage: raise BucketNotFoundError("Bucket does not exist") cache_path = self._system_bucket_root(bucket_name) / "stats.json" + cached_stats = None + cache_fresh = False + if cache_path.exists(): try: - if time.time() - cache_path.stat().st_mtime < cache_ttl: - return json.loads(cache_path.read_text(encoding="utf-8")) + cache_fresh = time.time() - cache_path.stat().st_mtime < cache_ttl + cached_stats = json.loads(cache_path.read_text(encoding="utf-8")) + if cache_fresh: + return cached_stats except (OSError, json.JSONDecodeError): pass @@ -255,40 +261,45 @@ class ObjectStorage: version_count = 0 version_bytes = 0 - for path in bucket_path.rglob("*"): - if path.is_file(): - rel = path.relative_to(bucket_path) - if not rel.parts: - continue - top_folder = rel.parts[0] - if top_folder not in self.INTERNAL_FOLDERS: - stat = path.stat() - object_count += 1 - total_bytes += stat.st_size - - versions_root = self._bucket_versions_root(bucket_name) - if versions_root.exists(): - for path in versions_root.rglob("*.bin"): + try: + for path in bucket_path.rglob("*"): if path.is_file(): - stat = path.stat() - version_count += 1 - version_bytes += stat.st_size - + rel = path.relative_to(bucket_path) + if not rel.parts: + continue + top_folder = rel.parts[0] + if top_folder not in self.INTERNAL_FOLDERS: + stat = path.stat() + object_count += 1 + total_bytes += stat.st_size + + versions_root = self._bucket_versions_root(bucket_name) + if versions_root.exists(): + for path in versions_root.rglob("*.bin"): + if path.is_file(): + stat = path.stat() + version_count += 1 + version_bytes += stat.st_size + except OSError: + if cached_stats is not None: + return cached_stats + raise + stats = { "objects": object_count, "bytes": total_bytes, "version_count": version_count, "version_bytes": version_bytes, "total_objects": object_count + version_count, - "total_bytes": total_bytes + version_bytes, + "total_bytes": total_bytes + version_bytes, } - + try: cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.write_text(json.dumps(stats), encoding="utf-8") except OSError: pass - + return stats def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None: @@ -299,6 +310,34 @@ class ObjectStorage: except OSError: pass + def _update_bucket_stats_cache( + self, + bucket_id: str, + *, + bytes_delta: int = 0, + objects_delta: int = 0, + version_bytes_delta: int = 0, + version_count_delta: int = 0, + ) -> None: + """Incrementally update cached bucket statistics instead of invalidating. + + This avoids expensive full directory scans on every PUT/DELETE by + adjusting the cached values directly. + """ + cache_path = self._system_bucket_root(bucket_id) / "stats.json" + try: + if cache_path.exists(): + data = json.loads(cache_path.read_text(encoding="utf-8")) + data["objects"] = max(0, data.get("objects", 0) + objects_delta) + data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta) + data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta) + data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta) + data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta) + data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta) + cache_path.write_text(json.dumps(data), encoding="utf-8") + except (OSError, json.JSONDecodeError): + pass + def delete_bucket(self, bucket_name: str) -> None: bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): @@ -333,22 +372,35 @@ class ObjectStorage: Returns: ListObjectsResult with objects, truncation status, and continuation token """ + import bisect + bucket_path = self._bucket_path(bucket_name) if not bucket_path.exists(): raise BucketNotFoundError("Bucket does not exist") bucket_id = bucket_path.name object_cache = self._get_object_cache(bucket_id, bucket_path) - - all_keys = sorted(object_cache.keys()) - + + cache_version = self._cache_version.get(bucket_id, 0) + cached_entry = self._sorted_key_cache.get(bucket_id) + if cached_entry and cached_entry[1] == cache_version: + all_keys = cached_entry[0] + else: + all_keys = sorted(object_cache.keys()) + self._sorted_key_cache[bucket_id] = (all_keys, cache_version) + if prefix: - all_keys = [k for k in all_keys if k.startswith(prefix)] - + lo = bisect.bisect_left(all_keys, prefix) + hi = len(all_keys) + for i in range(lo, len(all_keys)): + if not all_keys[i].startswith(prefix): + hi = i + break + all_keys = all_keys[lo:hi] + total_count = len(all_keys) start_index = 0 if continuation_token: - import bisect start_index = bisect.bisect_right(all_keys, continuation_token) if start_index >= total_count: return ListObjectsResult( @@ -356,8 +408,8 @@ class ObjectStorage: is_truncated=False, next_continuation_token=None, total_count=total_count, - ) - + ) + end_index = start_index + max_keys keys_slice = all_keys[start_index:end_index] is_truncated = end_index < total_count @@ -403,7 +455,9 @@ class ObjectStorage: is_overwrite = destination.exists() existing_size = destination.stat().st_size if is_overwrite else 0 + archived_version_size = 0 if self._is_versioning_enabled(bucket_path) and is_overwrite: + archived_version_size = existing_size self._archive_current_version(bucket_id, safe_key, reason="overwrite") tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR @@ -416,11 +470,10 @@ class ObjectStorage: shutil.copyfileobj(_HashingReader(stream, checksum), target) new_size = tmp_path.stat().st_size - + size_delta = new_size - existing_size + object_delta = 0 if is_overwrite else 1 + if enforce_quota: - size_delta = new_size - existing_size - object_delta = 0 if is_overwrite else 1 - quota_check = self.check_quota( bucket_name, additional_bytes=max(0, size_delta), @@ -432,7 +485,7 @@ class ObjectStorage: quota_check["quota"], quota_check["usage"], ) - + shutil.move(str(tmp_path), str(destination)) finally: @@ -448,7 +501,13 @@ class ObjectStorage: combined_meta = {**internal_meta, **(metadata or {})} self._write_metadata(bucket_id, safe_key, combined_meta) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=size_delta, + objects_delta=object_delta, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) obj_meta = ObjectMeta( key=safe_key.as_posix(), @@ -498,15 +557,24 @@ class ObjectStorage: path = self._object_path(bucket_name, object_key) if not path.exists(): return + deleted_size = path.stat().st_size safe_key = path.relative_to(bucket_path) bucket_id = bucket_path.name + archived_version_size = 0 if self._is_versioning_enabled(bucket_path): + archived_version_size = deleted_size self._archive_current_version(bucket_id, safe_key, reason="delete") rel = path.relative_to(bucket_path) self._safe_unlink(path) self._delete_metadata(bucket_id, rel) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=-deleted_size, + objects_delta=-1, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None) self._cleanup_empty_parents(path, bucket_path) @@ -828,7 +896,12 @@ class ObjectStorage: if not isinstance(metadata, dict): metadata = {} destination = bucket_path / safe_key - if self._is_versioning_enabled(bucket_path) and destination.exists(): + restored_size = data_path.stat().st_size + is_overwrite = destination.exists() + existing_size = destination.stat().st_size if is_overwrite else 0 + archived_version_size = 0 + if self._is_versioning_enabled(bucket_path) and is_overwrite: + archived_version_size = existing_size self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite") destination.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(data_path, destination) @@ -837,7 +910,13 @@ class ObjectStorage: else: self._delete_metadata(bucket_id, safe_key) stat = destination.stat() - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=restored_size - existing_size, + objects_delta=0 if is_overwrite else 1, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) return ObjectMeta( key=safe_key.as_posix(), size=stat.st_size, @@ -861,6 +940,7 @@ class ObjectStorage: meta_path = legacy_version_dir / f"{version_id}.json" if not data_path.exists() and not meta_path.exists(): raise StorageError(f"Version {version_id} not found") + deleted_version_size = data_path.stat().st_size if data_path.exists() else 0 if data_path.exists(): data_path.unlink() if meta_path.exists(): @@ -868,6 +948,12 @@ class ObjectStorage: parent = data_path.parent if parent.exists() and not any(parent.iterdir()): parent.rmdir() + if deleted_version_size > 0: + self._update_bucket_stats_cache( + bucket_id, + version_bytes_delta=-deleted_version_size, + version_count_delta=-1, + ) def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]: bucket_path = self._bucket_path(bucket_name) @@ -1164,14 +1250,14 @@ class ObjectStorage: safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes) destination = bucket_path / safe_key - + is_overwrite = destination.exists() existing_size = destination.stat().st_size if is_overwrite else 0 - + size_delta = total_size - existing_size + object_delta = 0 if is_overwrite else 1 + versioning_enabled = self._is_versioning_enabled(bucket_path) + if enforce_quota: - size_delta = total_size - existing_size - object_delta = 0 if is_overwrite else 1 - quota_check = self.check_quota( bucket_name, additional_bytes=max(0, size_delta), @@ -1183,14 +1269,16 @@ class ObjectStorage: quota_check["quota"], quota_check["usage"], ) - + destination.parent.mkdir(parents=True, exist_ok=True) lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock" + archived_version_size = 0 try: with _atomic_lock_file(lock_file_path): - if self._is_versioning_enabled(bucket_path) and destination.exists(): + if versioning_enabled and destination.exists(): + archived_version_size = destination.stat().st_size self._archive_current_version(bucket_id, safe_key, reason="overwrite") checksum = hashlib.md5() with destination.open("wb") as target: @@ -1210,7 +1298,13 @@ class ObjectStorage: shutil.rmtree(upload_root, ignore_errors=True) - self._invalidate_bucket_stats_cache(bucket_id) + self._update_bucket_stats_cache( + bucket_id, + bytes_delta=size_delta, + objects_delta=object_delta, + version_bytes_delta=archived_version_size, + version_count_delta=1 if archived_version_size > 0 else 0, + ) stat = destination.stat() etag = checksum.hexdigest() @@ -1586,6 +1680,8 @@ class ObjectStorage: objects.pop(key, None) else: objects[key] = meta + self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1 + self._sorted_key_cache.pop(bucket_id, None) def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None: """Pre-warm the object cache for specified buckets or all buckets. diff --git a/app/ui.py b/app/ui.py index 334ba89..1aec8ce 100644 --- a/app/ui.py +++ b/app/ui.py @@ -220,13 +220,16 @@ def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]: def _current_principal(): - creds = session.get("credentials") + token = session.get("cred_token") + creds = _secret_store().peek(token) if token else None if not creds: return None try: return _iam().authenticate(creds["access_key"], creds["secret_key"]) except IamError: - session.pop("credentials", None) + session.pop("cred_token", None) + if token: + _secret_store().pop(token) return None @@ -251,7 +254,8 @@ def _authorize_ui(principal, bucket_name: str | None, action: str, *, object_key def _api_headers() -> dict[str, str]: - creds = session.get("credentials") or {} + token = session.get("cred_token") + creds = _secret_store().peek(token) or {} return { "X-Access-Key": creds.get("access_key", ""), "X-Secret-Key": creds.get("secret_key", ""), @@ -296,7 +300,9 @@ def login(): except IamError as exc: flash(_friendly_error_message(exc), "danger") return render_template("login.html") - session["credentials"] = {"access_key": access_key, "secret_key": secret_key} + creds = {"access_key": access_key, "secret_key": secret_key} + token = _secret_store().remember(creds, ttl=3600) + session["cred_token"] = token session.permanent = True flash(f"Welcome back, {principal.display_name}", "success") return redirect(url_for("ui.buckets_overview")) @@ -305,7 +311,9 @@ def login(): @ui_bp.post("/logout") def logout(): - session.pop("credentials", None) + token = session.pop("cred_token", None) + if token: + _secret_store().pop(token) flash("Signed out", "info") return redirect(url_for("ui.login")) @@ -542,7 +550,10 @@ def list_bucket_objects(bucket_name: str): except IamError as exc: return jsonify({"error": str(exc)}), 403 - max_keys = min(int(request.args.get("max_keys", 1000)), 100000) + try: + max_keys = min(int(request.args.get("max_keys", 1000)), 100000) + except ValueError: + return jsonify({"error": "max_keys must be an integer"}), 400 continuation_token = request.args.get("continuation_token") or None prefix = request.args.get("prefix") or None diff --git a/app/version.py b/app/version.py index be22c33..e4ed325 100644 --- a/app/version.py +++ b/app/version.py @@ -1,6 +1,6 @@ from __future__ import annotations -APP_VERSION = "0.2.5" +APP_VERSION = "0.2.6" def get_version() -> str: diff --git a/docs.md b/docs.md index e582c76..d4917d0 100644 --- a/docs.md +++ b/docs.md @@ -619,13 +619,15 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th ### Getting Started -1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access. -2. Sign into the UI using those credentials, then open **IAM**: +1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext. +2. Sign into the UI using the generated credentials, then open **IAM**: - **Create user**: supply a display name and optional JSON inline policy array. - **Rotate secret**: generates a new secret key; the UI surfaces it once. - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). 3. Wildcard action `iam:*` is supported for admin user definitions. +> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials. + ### Authentication The API expects every request to include authentication headers. The UI persists them in the Flask session after login. diff --git a/templates/docs.html b/templates/docs.html index 2f713d7..66b9baf 100644 --- a/templates/docs.html +++ b/templates/docs.html @@ -451,10 +451,10 @@ sudo journalctl -u myfsio -f # View logs 03
MyFSIO seeds data/.myfsio.sys/config/iam.json with localadmin/localadmin. Sign in once, rotate it, then grant least-privilege access to teammates and tools.
On first startup, MyFSIO generates random admin credentials and prints them to the console. Missed it? Check data/.myfsio.sys/config/iam.json directly—credentials are stored in plaintext.
/ui/login, enter the bootstrap credentials, and rotate them immediately from the IAM page.iam.json) for the generated Access Key and Secret Key, then visit /ui/login.{"bucket": "*", "actions": ["list", "read"]}).data/.myfsio.sys/config/iam.json.{{ api_base }}
localadmin / localadmin
+