15 Commits

Author SHA1 Message Date
50fb5aa387 MyFSIO v0.3.9 Release
Reviewed-on: #32
2026-03-14 09:44:14 +00:00
55568d6892 Fix video seekbar in static website hosting by adding HTTP Range request support 2026-03-10 22:21:55 +08:00
a4ae81c77c Add integrity scanner: background detection and healing of corrupted objects, orphaned files, phantom metadata, stale versions, etag cache inconsistencies, and legacy metadata drift 2026-03-10 22:14:39 +08:00
9da7104887 Redesign tags UI: split pills, grid editor with column headers, ghost delete buttons 2026-03-10 17:48:17 +08:00
cc161bf362 MyFSIO v0.3.8 Release
Reviewed-on: #31
2026-03-10 08:31:27 +00:00
de5377e5ac Add garbage collection: background cleanup of orphaned temp files, multipart uploads, lock files, metadata, versions, and empty directories 2026-03-09 17:34:21 +08:00
80b77b64eb Fix bucket dashboard missing created date and incorrect object count badge in folder view 2026-03-09 15:27:08 +08:00
6c912a3d71 Add conditional GET/HEAD headers: If-Match, If-None-Match, If-Modified-Since, If-Unmodified-Since 2026-03-09 15:09:15 +08:00
2a0e77a754 MyFSIO v0.3.7 Release
Reviewed-on: #30
2026-03-09 06:25:50 +00:00
c6e368324a Update docs.md and docs.html for credential expiry, IAM encryption, admin key env vars, and --reset-cred 2026-03-08 13:38:44 +08:00
7b6c096bb7 Remove the check out the documentation paragraph at login page 2026-03-08 13:18:03 +08:00
03353a0aec Add credential expiry support: per-user expires_at with UI management, presets, and badge indicators; Add credential expiry support: per-user expires_at with UI management, presets, and badge indicators; Fix IAM card dropdown clipped by overflow: remove gradient bar, allow overflow visible 2026-03-08 13:08:57 +08:00
eb0e435a5a MyFSIO v0.3.6 Release
Reviewed-on: #29
2026-03-08 04:46:31 +00:00
72f5d9d70c Restore data integrity guarantees: Content-MD5 validation, fsync durability, atomic metadata writes, concurrent write protection 2026-03-07 17:54:00 +08:00
be63e27c15 Reduce per-request CPU overhead: eliminate double stat(), cache content type and policy context, gate logging, configurable stat intervals 2026-03-07 14:08:23 +08:00
25 changed files with 3818 additions and 214 deletions

View File

@@ -29,6 +29,8 @@ from .encryption import EncryptionManager
from .extensions import limiter, csrf from .extensions import limiter, csrf
from .iam import IamService from .iam import IamService
from .kms import KMSManager from .kms import KMSManager
from .gc import GarbageCollector
from .integrity import IntegrityChecker
from .lifecycle import LifecycleManager from .lifecycle import LifecycleManager
from .notifications import NotificationService from .notifications import NotificationService
from .object_lock import ObjectLockService from .object_lock import ObjectLockService
@@ -130,6 +132,7 @@ def create_app(
Path(app.config["IAM_CONFIG"]), Path(app.config["IAM_CONFIG"]),
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5), auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15), auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
encryption_key=app.config.get("SECRET_KEY"),
) )
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
@@ -220,6 +223,29 @@ def create_app(
) )
lifecycle_manager.start() lifecycle_manager.start()
gc_collector = None
if app.config.get("GC_ENABLED", False):
gc_collector = GarbageCollector(
storage_root=storage_root,
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
dry_run=app.config.get("GC_DRY_RUN", False),
)
gc_collector.start()
integrity_checker = None
if app.config.get("INTEGRITY_ENABLED", False):
integrity_checker = IntegrityChecker(
storage_root=storage_root,
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
)
integrity_checker.start()
app.extensions["object_storage"] = storage app.extensions["object_storage"] = storage
app.extensions["iam"] = iam app.extensions["iam"] = iam
app.extensions["bucket_policies"] = bucket_policies app.extensions["bucket_policies"] = bucket_policies
@@ -231,6 +257,8 @@ def create_app(
app.extensions["kms"] = kms_manager app.extensions["kms"] = kms_manager
app.extensions["acl"] = acl_service app.extensions["acl"] = acl_service
app.extensions["lifecycle"] = lifecycle_manager app.extensions["lifecycle"] = lifecycle_manager
app.extensions["gc"] = gc_collector
app.extensions["integrity"] = integrity_checker
app.extensions["object_lock"] = object_lock_service app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service app.extensions["access_logging"] = access_logging_service
@@ -486,10 +514,6 @@ def _configure_logging(app: Flask) -> None:
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}" g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
g.request_started_at = time.perf_counter() g.request_started_at = time.perf_counter()
g.request_bytes_in = request.content_length or 0 g.request_bytes_in = request.content_length or 0
app.logger.info(
"Request started",
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
)
@app.before_request @app.before_request
def _maybe_serve_website(): def _maybe_serve_website():
@@ -538,30 +562,57 @@ def _configure_logging(app: Flask) -> None:
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError): except (StorageError, OSError):
pass pass
if request.method == "HEAD":
response = Response(status=200)
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response.headers["Content-Length"] = len(data)
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
else:
try:
stat = obj_path.stat()
response.headers["Content-Length"] = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
response.headers["Content-Type"] = content_type
return response
if is_encrypted and hasattr(storage, "get_object_data"): if is_encrypted and hasattr(storage, "get_object_data"):
try: try:
data, _ = storage.get_object_data(bucket, object_key) data, _ = storage.get_object_data(bucket, object_key)
response = Response(data, mimetype=content_type) file_size = len(data)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError): except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error") return _website_error_response(500, "Internal Server Error")
else:
data = None
try:
stat = obj_path.stat()
file_size = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
if request.method == "HEAD":
response = Response(status=200)
response.headers["Content-Length"] = file_size
response.headers["Content-Type"] = content_type
response.headers["Accept-Ranges"] = "bytes"
return response
from .s3_api import _parse_range_header
range_header = request.headers.get("Range")
if range_header:
ranges = _parse_range_header(range_header, file_size)
if ranges is None:
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
start, end = ranges[0]
length = end - start + 1
if data is not None:
partial_data = data[start:end + 1]
response = Response(partial_data, status=206, mimetype=content_type)
else:
def _stream_range(file_path, start_pos, length_to_read):
with file_path.open("rb") as f:
f.seek(start_pos)
remaining = length_to_read
while remaining > 0:
chunk = f.read(min(262144, remaining))
if not chunk:
break
remaining -= len(chunk)
yield chunk
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
response.headers["Content-Length"] = length
response.headers["Accept-Ranges"] = "bytes"
return response
if data is not None:
response = Response(data, mimetype=content_type)
response.headers["Content-Length"] = file_size
response.headers["Accept-Ranges"] = "bytes"
return response
def _stream(file_path): def _stream(file_path):
with file_path.open("rb") as f: with file_path.open("rb") as f:
while True: while True:
@@ -569,13 +620,10 @@ def _configure_logging(app: Flask) -> None:
if not chunk: if not chunk:
break break
yield chunk yield chunk
try: response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
stat = obj_path.stat() response.headers["Content-Length"] = file_size
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True) response.headers["Accept-Ranges"] = "bytes"
response.headers["Content-Length"] = stat.st_size return response
return response
except OSError:
return _website_error_response(500, "Internal Server Error")
def _serve_website_error(storage, bucket, error_doc_key, status_code): def _serve_website_error(storage, bucket, error_doc_key, status_code):
if not error_doc_key: if not error_doc_key:
@@ -620,14 +668,15 @@ def _configure_logging(app: Flask) -> None:
duration_ms = (time.perf_counter() - g.request_started_at) * 1000 duration_ms = (time.perf_counter() - g.request_started_at) * 1000
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}") request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
response.headers.setdefault("X-Request-ID", request_id) response.headers.setdefault("X-Request-ID", request_id)
app.logger.info( if app.logger.isEnabledFor(logging.INFO):
"Request completed", app.logger.info(
extra={ "Request completed",
"path": request.path, extra={
"method": request.method, "path": request.path,
"remote_addr": request.remote_addr, "method": request.method,
}, "remote_addr": request.remote_addr,
) },
)
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
operation_metrics = app.extensions.get("operation_metrics") operation_metrics = app.extensions.get("operation_metrics")

View File

@@ -14,6 +14,8 @@ from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore from .connections import ConnectionStore
from .extensions import limiter from .extensions import limiter
from .gc import GarbageCollector
from .integrity import IntegrityChecker
from .iam import IamError, Principal from .iam import IamError, Principal
from .replication import ReplicationManager from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry from .site_registry import PeerSite, SiteInfo, SiteRegistry
@@ -776,3 +778,106 @@ def delete_website_domain(domain: str):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404) return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
logger.info("Website domain mapping deleted: %s", domain) logger.info("Website domain mapping deleted: %s", domain)
return Response(status=204) return Response(status=204)
def _gc() -> Optional[GarbageCollector]:
return current_app.extensions.get("gc")
@admin_api_bp.route("/gc/status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_status():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
return jsonify(gc.get_status())
@admin_api_bp.route("/gc/run", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_run_now():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return _json_error("InvalidRequest", "GC is not enabled", 400)
payload = request.get_json(silent=True) or {}
original_dry_run = gc.dry_run
if "dry_run" in payload:
gc.dry_run = bool(payload["dry_run"])
try:
result = gc.run_now()
finally:
gc.dry_run = original_dry_run
logger.info("GC manual run by %s", principal.access_key)
return jsonify(result.to_dict())
@admin_api_bp.route("/gc/history", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_history():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 50)), 200)
offset = int(request.args.get("offset", 0))
records = gc.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})
def _integrity() -> Optional[IntegrityChecker]:
return current_app.extensions.get("integrity")
@admin_api_bp.route("/integrity/status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_status():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
return jsonify(checker.get_status())
@admin_api_bp.route("/integrity/run", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_run_now():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
payload = request.get_json(silent=True) or {}
override_dry_run = payload.get("dry_run")
override_auto_heal = payload.get("auto_heal")
result = checker.run_now(
auto_heal=override_auto_heal if override_auto_heal is not None else None,
dry_run=override_dry_run if override_dry_run is not None else None,
)
logger.info("Integrity manual run by %s", principal.access_key)
return jsonify(result.to_dict())
@admin_api_bp.route("/integrity/history", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_history():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 50)), 200)
offset = int(request.args.get("offset", 0))
records = checker.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import ipaddress import ipaddress
import json import json
import os
import re import re
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -268,7 +269,7 @@ class BucketPolicyStore:
self._last_mtime = self._current_mtime() self._last_mtime = self._current_mtime()
# Performance: Avoid stat() on every request # Performance: Avoid stat() on every request
self._last_stat_check = 0.0 self._last_stat_check = 0.0
self._stat_check_interval = 1.0 # Only check mtime every 1 second self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
def maybe_reload(self) -> None: def maybe_reload(self) -> None:
# Performance: Skip stat check if we checked recently # Performance: Skip stat check if we checked recently

View File

@@ -150,6 +150,17 @@ class AppConfig:
allowed_redirect_hosts: list[str] allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool allow_internal_endpoints: bool
website_hosting_enabled: bool website_hosting_enabled: bool
gc_enabled: bool
gc_interval_hours: float
gc_temp_file_max_age_hours: float
gc_multipart_max_age_days: int
gc_lock_file_max_age_hours: float
gc_dry_run: bool
integrity_enabled: bool
integrity_interval_hours: float
integrity_batch_size: int
integrity_auto_heal: bool
integrity_dry_run: bool
@classmethod @classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -319,6 +330,17 @@ class AppConfig:
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()] allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"} allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
return cls(storage_root=storage_root, return cls(storage_root=storage_root,
max_upload_size=max_upload_size, max_upload_size=max_upload_size,
@@ -406,7 +428,18 @@ class AppConfig:
num_trusted_proxies=num_trusted_proxies, num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts, allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints, allow_internal_endpoints=allow_internal_endpoints,
website_hosting_enabled=website_hosting_enabled) website_hosting_enabled=website_hosting_enabled,
gc_enabled=gc_enabled,
gc_interval_hours=gc_interval_hours,
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
gc_multipart_max_age_days=gc_multipart_max_age_days,
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
gc_dry_run=gc_dry_run,
integrity_enabled=integrity_enabled,
integrity_interval_hours=integrity_interval_hours,
integrity_batch_size=integrity_batch_size,
integrity_auto_heal=integrity_auto_heal,
integrity_dry_run=integrity_dry_run)
def validate_and_report(self) -> list[str]: def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues. """Validate configuration and return a list of warnings/issues.
@@ -617,4 +650,15 @@ class AppConfig:
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts, "ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints, "ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled, "WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
"GC_ENABLED": self.gc_enabled,
"GC_INTERVAL_HOURS": self.gc_interval_hours,
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
"GC_DRY_RUN": self.gc_dry_run,
"INTEGRITY_ENABLED": self.integrity_enabled,
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
} }

531
app/gc.py Normal file
View File

@@ -0,0 +1,531 @@
from __future__ import annotations
import json
import logging
import os
import shutil
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class GCResult:
temp_files_deleted: int = 0
temp_bytes_freed: int = 0
multipart_uploads_deleted: int = 0
multipart_bytes_freed: int = 0
lock_files_deleted: int = 0
orphaned_metadata_deleted: int = 0
orphaned_versions_deleted: int = 0
orphaned_version_bytes_freed: int = 0
empty_dirs_removed: int = 0
errors: List[str] = field(default_factory=list)
execution_time_seconds: float = 0.0
def to_dict(self) -> dict:
return {
"temp_files_deleted": self.temp_files_deleted,
"temp_bytes_freed": self.temp_bytes_freed,
"multipart_uploads_deleted": self.multipart_uploads_deleted,
"multipart_bytes_freed": self.multipart_bytes_freed,
"lock_files_deleted": self.lock_files_deleted,
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
"orphaned_versions_deleted": self.orphaned_versions_deleted,
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
"empty_dirs_removed": self.empty_dirs_removed,
"errors": self.errors,
"execution_time_seconds": self.execution_time_seconds,
}
@property
def total_bytes_freed(self) -> int:
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
@property
def has_work(self) -> bool:
return (
self.temp_files_deleted > 0
or self.multipart_uploads_deleted > 0
or self.lock_files_deleted > 0
or self.orphaned_metadata_deleted > 0
or self.orphaned_versions_deleted > 0
or self.empty_dirs_removed > 0
)
@dataclass
class GCExecutionRecord:
timestamp: float
result: dict
dry_run: bool
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"result": self.result,
"dry_run": self.dry_run,
}
@classmethod
def from_dict(cls, data: dict) -> GCExecutionRecord:
return cls(
timestamp=data["timestamp"],
result=data["result"],
dry_run=data.get("dry_run", False),
)
class GCHistoryStore:
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
self.storage_root = storage_root
self.max_records = max_records
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
def load(self) -> List[GCExecutionRecord]:
path = self._get_path()
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error("Failed to load GC history: %s", e)
return []
def save(self, records: List[GCExecutionRecord]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save GC history: %s", e)
def add(self, record: GCExecutionRecord) -> None:
with self._lock:
records = self.load()
records.insert(0, record)
self.save(records)
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
return self.load()[offset : offset + limit]
def _dir_size(path: Path) -> int:
total = 0
try:
for f in path.rglob("*"):
if f.is_file():
try:
total += f.stat().st_size
except OSError:
pass
except OSError:
pass
return total
def _file_age_hours(path: Path) -> float:
try:
mtime = path.stat().st_mtime
return (time.time() - mtime) / 3600.0
except OSError:
return 0.0
class GarbageCollector:
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_TMP_DIR = "tmp"
SYSTEM_MULTIPART_DIR = "multipart"
SYSTEM_BUCKETS_DIR = "buckets"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
def __init__(
self,
storage_root: Path,
interval_hours: float = 6.0,
temp_file_max_age_hours: float = 24.0,
multipart_max_age_days: int = 7,
lock_file_max_age_hours: float = 1.0,
dry_run: bool = False,
max_history: int = 50,
) -> None:
self.storage_root = Path(storage_root)
self.interval_seconds = interval_hours * 3600.0
self.temp_file_max_age_hours = temp_file_max_age_hours
self.multipart_max_age_days = multipart_max_age_days
self.lock_file_max_age_hours = lock_file_max_age_hours
self.dry_run = dry_run
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
self.interval_seconds / 3600.0,
self.temp_file_max_age_hours,
self.multipart_max_age_days,
self.lock_file_max_age_hours,
self.dry_run,
)
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("GC stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
self._timer.daemon = True
self._timer.start()
def _run_cycle(self) -> None:
if self._shutdown:
return
try:
self.run_now()
except Exception as e:
logger.error("GC cycle failed: %s", e)
finally:
self._schedule_next()
def run_now(self) -> GCResult:
start = time.time()
result = GCResult()
self._clean_temp_files(result)
self._clean_orphaned_multipart(result)
self._clean_stale_locks(result)
self._clean_orphaned_metadata(result)
self._clean_orphaned_versions(result)
self._clean_empty_dirs(result)
result.execution_time_seconds = time.time() - start
if result.has_work or result.errors:
logger.info(
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
result.execution_time_seconds,
result.temp_files_deleted,
result.temp_bytes_freed / (1024 * 1024),
result.multipart_uploads_deleted,
result.multipart_bytes_freed / (1024 * 1024),
result.lock_files_deleted,
result.orphaned_metadata_deleted,
result.orphaned_versions_deleted,
result.orphaned_version_bytes_freed / (1024 * 1024),
result.empty_dirs_removed,
len(result.errors),
" (dry run)" if self.dry_run else "",
)
record = GCExecutionRecord(
timestamp=time.time(),
result=result.to_dict(),
dry_run=self.dry_run,
)
self.history_store.add(record)
return result
def _system_path(self) -> Path:
return self.storage_root / self.SYSTEM_ROOT
def _list_bucket_names(self) -> List[str]:
names = []
try:
for entry in self.storage_root.iterdir():
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
names.append(entry.name)
except OSError:
pass
return names
def _clean_temp_files(self, result: GCResult) -> None:
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
if not tmp_dir.exists():
return
try:
for entry in tmp_dir.iterdir():
if not entry.is_file():
continue
age = _file_age_hours(entry)
if age < self.temp_file_max_age_hours:
continue
try:
size = entry.stat().st_size
if not self.dry_run:
entry.unlink()
result.temp_files_deleted += 1
result.temp_bytes_freed += size
except OSError as e:
result.errors.append(f"temp file {entry.name}: {e}")
except OSError as e:
result.errors.append(f"scan tmp dir: {e}")
def _clean_orphaned_multipart(self, result: GCResult) -> None:
cutoff_hours = self.multipart_max_age_days * 24.0
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
for multipart_root in (
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
self.storage_root / bucket_name / ".multipart",
):
if not multipart_root.exists():
continue
try:
for upload_dir in multipart_root.iterdir():
if not upload_dir.is_dir():
continue
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
except OSError as e:
result.errors.append(f"scan multipart {bucket_name}: {e}")
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
manifest_path = upload_dir / "manifest.json"
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
if age < cutoff_hours:
return
dir_bytes = _dir_size(upload_dir)
try:
if not self.dry_run:
shutil.rmtree(upload_dir, ignore_errors=True)
result.multipart_uploads_deleted += 1
result.multipart_bytes_freed += dir_bytes
except OSError as e:
result.errors.append(f"multipart {upload_dir.name}: {e}")
def _clean_stale_locks(self, result: GCResult) -> None:
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
if not buckets_root.exists():
return
try:
for bucket_dir in buckets_root.iterdir():
if not bucket_dir.is_dir():
continue
locks_dir = bucket_dir / "locks"
if not locks_dir.exists():
continue
try:
for lock_file in locks_dir.iterdir():
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
continue
age = _file_age_hours(lock_file)
if age < self.lock_file_max_age_hours:
continue
try:
if not self.dry_run:
lock_file.unlink(missing_ok=True)
result.lock_files_deleted += 1
except OSError as e:
result.errors.append(f"lock {lock_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
except OSError as e:
result.errors.append(f"scan buckets for locks: {e}")
def _clean_orphaned_metadata(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
legacy_meta = self.storage_root / bucket_name / ".meta"
if legacy_meta.exists():
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if new_meta.exists():
self._clean_index_metadata(bucket_name, new_meta, result)
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for meta_file in meta_root.rglob("*.meta.json"):
if not meta_file.is_file():
continue
try:
rel = meta_file.relative_to(meta_root)
object_key = rel.as_posix().removesuffix(".meta.json")
object_path = bucket_path / object_key
if not object_path.exists():
if not self.dry_run:
meta_file.unlink(missing_ok=True)
result.orphaned_metadata_deleted += 1
except (OSError, ValueError) as e:
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for index_file in meta_root.rglob("_index.json"):
if not index_file.is_file():
continue
try:
with open(index_file, "r", encoding="utf-8") as f:
index_data = json.load(f)
except (OSError, json.JSONDecodeError):
continue
keys_to_remove = []
for key in index_data:
rel_dir = index_file.parent.relative_to(meta_root)
if rel_dir == Path("."):
full_key = key
else:
full_key = rel_dir.as_posix() + "/" + key
object_path = bucket_path / full_key
if not object_path.exists():
keys_to_remove.append(key)
if keys_to_remove:
if not self.dry_run:
for k in keys_to_remove:
index_data.pop(k, None)
if index_data:
try:
with open(index_file, "w", encoding="utf-8") as f:
json.dump(index_data, f)
except OSError as e:
result.errors.append(f"write index {bucket_name}: {e}")
continue
else:
try:
index_file.unlink(missing_ok=True)
except OSError:
pass
result.orphaned_metadata_deleted += len(keys_to_remove)
except OSError as e:
result.errors.append(f"scan index meta {bucket_name}: {e}")
def _clean_orphaned_versions(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
bucket_path = self.storage_root / bucket_name
for versions_root in (
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
self.storage_root / bucket_name / ".versions",
):
if not versions_root.exists():
continue
try:
for key_dir in versions_root.iterdir():
if not key_dir.is_dir():
continue
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
except OSError as e:
result.errors.append(f"scan versions {bucket_name}: {e}")
def _clean_versions_for_key(
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
) -> None:
try:
rel = key_dir.relative_to(versions_root)
except ValueError:
return
object_path = bucket_path / rel
if object_path.exists():
return
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
if not version_files:
return
for vf in version_files:
try:
size = vf.stat().st_size if vf.suffix == ".bin" else 0
if not self.dry_run:
vf.unlink(missing_ok=True)
if vf.suffix == ".bin":
result.orphaned_version_bytes_freed += size
result.orphaned_versions_deleted += 1
except OSError as e:
result.errors.append(f"version file {vf.name}: {e}")
def _clean_empty_dirs(self, result: GCResult) -> None:
targets = [
self._system_path() / self.SYSTEM_TMP_DIR,
self._system_path() / self.SYSTEM_MULTIPART_DIR,
self._system_path() / self.SYSTEM_BUCKETS_DIR,
]
for bucket_name in self._list_bucket_names():
targets.append(self.storage_root / bucket_name / ".meta")
targets.append(self.storage_root / bucket_name / ".versions")
targets.append(self.storage_root / bucket_name / ".multipart")
for root in targets:
if not root.exists():
continue
self._remove_empty_dirs_recursive(root, root, result)
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
if not path.is_dir():
return False
try:
children = list(path.iterdir())
except OSError:
return False
all_empty = True
for child in children:
if child.is_dir():
if not self._remove_empty_dirs_recursive(child, stop_at, result):
all_empty = False
else:
all_empty = False
if all_empty and path != stop_at:
try:
if not self.dry_run:
path.rmdir()
result.empty_dirs_removed += 1
return True
except OSError:
return False
return all_empty
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
records = self.history_store.get_history(limit, offset)
return [r.to_dict() for r in records]
def get_status(self) -> dict:
return {
"enabled": not self._shutdown or self._timer is not None,
"running": self._timer is not None and not self._shutdown,
"interval_hours": self.interval_seconds / 3600.0,
"temp_file_max_age_hours": self.temp_file_max_age_hours,
"multipart_max_age_days": self.multipart_max_age_days,
"lock_file_max_age_hours": self.lock_file_max_age_hours,
"dry_run": self.dry_run,
}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import base64
import hashlib import hashlib
import hmac import hmac
import json import json
@@ -14,6 +15,8 @@ from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from cryptography.fernet import Fernet, InvalidToken
class IamError(RuntimeError): class IamError(RuntimeError):
"""Raised when authentication or authorization fails.""" """Raised when authentication or authorization fails."""
@@ -107,13 +110,24 @@ class Principal:
policies: List[Policy] policies: List[Policy]
def _derive_fernet_key(secret: str) -> bytes:
raw = hashlib.pbkdf2_hmac("sha256", secret.encode(), b"myfsio-iam-encryption", 100_000)
return base64.urlsafe_b64encode(raw)
_IAM_ENCRYPTED_PREFIX = b"MYFSIO_IAM_ENC:"
class IamService: class IamService:
"""Loads IAM configuration, manages users, and evaluates policies.""" """Loads IAM configuration, manages users, and evaluates policies."""
def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None: def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15, encryption_key: str | None = None) -> None:
self.config_path = Path(config_path) self.config_path = Path(config_path)
self.auth_max_attempts = auth_max_attempts self.auth_max_attempts = auth_max_attempts
self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes) self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
self._fernet: Fernet | None = None
if encryption_key:
self._fernet = Fernet(_derive_fernet_key(encryption_key))
self.config_path.parent.mkdir(parents=True, exist_ok=True) self.config_path.parent.mkdir(parents=True, exist_ok=True)
if not self.config_path.exists(): if not self.config_path.exists():
self._write_default() self._write_default()
@@ -125,7 +139,7 @@ class IamService:
self._secret_key_cache: Dict[str, Tuple[str, float]] = {} self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0")) self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0 self._last_stat_check = 0.0
self._stat_check_interval = 1.0 self._stat_check_interval = float(os.environ.get("IAM_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
self._sessions: Dict[str, Dict[str, Any]] = {} self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock() self._session_lock = threading.Lock()
self._load() self._load()
@@ -145,6 +159,19 @@ class IamService:
except OSError: except OSError:
pass pass
def _check_expiry(self, access_key: str, record: Dict[str, Any]) -> None:
expires_at = record.get("expires_at")
if not expires_at:
return
try:
exp_dt = datetime.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) >= exp_dt:
raise IamError(f"Credentials for '{access_key}' have expired")
except (ValueError, TypeError):
pass
def authenticate(self, access_key: str, secret_key: str) -> Principal: def authenticate(self, access_key: str, secret_key: str) -> Principal:
self._maybe_reload() self._maybe_reload()
access_key = (access_key or "").strip() access_key = (access_key or "").strip()
@@ -161,6 +188,7 @@ class IamService:
if not record or not hmac.compare_digest(stored_secret, secret_key): if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key) self._record_failed_attempt(access_key)
raise IamError("Invalid credentials") raise IamError("Invalid credentials")
self._check_expiry(access_key, record)
self._clear_failed_attempts(access_key) self._clear_failed_attempts(access_key)
return self._build_principal(access_key, record) return self._build_principal(access_key, record)
@@ -288,12 +316,16 @@ class IamService:
if cached: if cached:
principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return principal return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
self._check_expiry(access_key, record)
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal
@@ -303,6 +335,7 @@ class IamService:
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
self._check_expiry(access_key, record)
return record["secret_key"] return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None: def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
@@ -347,6 +380,7 @@ class IamService:
{ {
"access_key": access_key, "access_key": access_key,
"display_name": record["display_name"], "display_name": record["display_name"],
"expires_at": record.get("expires_at"),
"policies": [ "policies": [
{"bucket": policy.bucket, "actions": sorted(policy.actions)} {"bucket": policy.bucket, "actions": sorted(policy.actions)}
for policy in record["policies"] for policy in record["policies"]
@@ -362,20 +396,25 @@ class IamService:
policies: Optional[Sequence[Dict[str, Any]]] = None, policies: Optional[Sequence[Dict[str, Any]]] = None,
access_key: str | None = None, access_key: str | None = None,
secret_key: str | None = None, secret_key: str | None = None,
expires_at: str | None = None,
) -> Dict[str, str]: ) -> Dict[str, str]:
access_key = (access_key or self._generate_access_key()).strip() access_key = (access_key or self._generate_access_key()).strip()
if not access_key: if not access_key:
raise IamError("Access key cannot be empty") raise IamError("Access key cannot be empty")
if access_key in self._users: if access_key in self._users:
raise IamError("Access key already exists") raise IamError("Access key already exists")
if expires_at:
self._validate_expires_at(expires_at)
secret_key = secret_key or self._generate_secret_key() secret_key = secret_key or self._generate_secret_key()
sanitized_policies = self._prepare_policy_payload(policies) sanitized_policies = self._prepare_policy_payload(policies)
record = { record: Dict[str, Any] = {
"access_key": access_key, "access_key": access_key,
"secret_key": secret_key, "secret_key": secret_key,
"display_name": display_name or access_key, "display_name": display_name or access_key,
"policies": sanitized_policies, "policies": sanitized_policies,
} }
if expires_at:
record["expires_at"] = expires_at
self._raw_config.setdefault("users", []).append(record) self._raw_config.setdefault("users", []).append(record)
self._save() self._save()
self._load() self._load()
@@ -414,17 +453,43 @@ class IamService:
clear_signing_key_cache() clear_signing_key_cache()
self._load() self._load()
def update_user_expiry(self, access_key: str, expires_at: str | None) -> None:
user = self._get_raw_user(access_key)
if expires_at:
self._validate_expires_at(expires_at)
user["expires_at"] = expires_at
else:
user.pop("expires_at", None)
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
user = self._get_raw_user(access_key) user = self._get_raw_user(access_key)
user["policies"] = self._prepare_policy_payload(policies) user["policies"] = self._prepare_policy_payload(policies)
self._save() self._save()
self._load() self._load()
def _decrypt_content(self, raw_bytes: bytes) -> str:
if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
if not self._fernet:
raise IamError("IAM config is encrypted but no encryption key provided. Set SECRET_KEY or use 'python run.py reset-cred'.")
try:
encrypted_data = raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]
return self._fernet.decrypt(encrypted_data).decode("utf-8")
except InvalidToken:
raise IamError("Cannot decrypt IAM config. SECRET_KEY may have changed. Use 'python run.py reset-cred' to reset credentials.")
return raw_bytes.decode("utf-8")
def _load(self) -> None: def _load(self) -> None:
try: try:
self._last_load_time = self.config_path.stat().st_mtime self._last_load_time = self.config_path.stat().st_mtime
content = self.config_path.read_text(encoding='utf-8') raw_bytes = self.config_path.read_bytes()
content = self._decrypt_content(raw_bytes)
raw = json.loads(content) raw = json.loads(content)
except IamError:
raise
except FileNotFoundError: except FileNotFoundError:
raise IamError(f"IAM config not found: {self.config_path}") raise IamError(f"IAM config not found: {self.config_path}")
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
@@ -434,33 +499,47 @@ class IamService:
except (OSError, ValueError) as e: except (OSError, ValueError) as e:
raise IamError(f"Failed to load IAM config: {e}") raise IamError(f"Failed to load IAM config: {e}")
was_plaintext = not raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX)
users: Dict[str, Dict[str, Any]] = {} users: Dict[str, Dict[str, Any]] = {}
for user in raw.get("users", []): for user in raw.get("users", []):
policies = self._build_policy_objects(user.get("policies", [])) policies = self._build_policy_objects(user.get("policies", []))
users[user["access_key"]] = { user_record: Dict[str, Any] = {
"secret_key": user["secret_key"], "secret_key": user["secret_key"],
"display_name": user.get("display_name", user["access_key"]), "display_name": user.get("display_name", user["access_key"]),
"policies": policies, "policies": policies,
} }
if user.get("expires_at"):
user_record["expires_at"] = user["expires_at"]
users[user["access_key"]] = user_record
if not users: if not users:
raise IamError("IAM configuration contains no users") raise IamError("IAM configuration contains no users")
self._users = users self._users = users
self._raw_config = { raw_users: List[Dict[str, Any]] = []
"users": [ for entry in raw.get("users", []):
{ raw_entry: Dict[str, Any] = {
"access_key": entry["access_key"], "access_key": entry["access_key"],
"secret_key": entry["secret_key"], "secret_key": entry["secret_key"],
"display_name": entry.get("display_name", entry["access_key"]), "display_name": entry.get("display_name", entry["access_key"]),
"policies": entry.get("policies", []), "policies": entry.get("policies", []),
} }
for entry in raw.get("users", []) if entry.get("expires_at"):
] raw_entry["expires_at"] = entry["expires_at"]
} raw_users.append(raw_entry)
self._raw_config = {"users": raw_users}
if was_plaintext and self._fernet:
self._save()
def _save(self) -> None: def _save(self) -> None:
try: try:
json_text = json.dumps(self._raw_config, indent=2)
temp_path = self.config_path.with_suffix('.json.tmp') temp_path = self.config_path.with_suffix('.json.tmp')
temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8') if self._fernet:
encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
temp_path.write_text(json_text, encoding='utf-8')
temp_path.replace(self.config_path) temp_path.replace(self.config_path)
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}") raise IamError(f"Cannot save IAM config: {e}")
@@ -475,9 +554,14 @@ class IamService:
def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]: def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
payload: Dict[str, Any] = {"users": []} payload: Dict[str, Any] = {"users": []}
for user in self._raw_config.get("users", []): for user in self._raw_config.get("users", []):
record = dict(user) record: Dict[str, Any] = {
if mask_secrets and "secret_key" in record: "access_key": user["access_key"],
record["secret_key"] = "••••••••••" "secret_key": "••••••••••" if mask_secrets else user["secret_key"],
"display_name": user["display_name"],
"policies": user["policies"],
}
if user.get("expires_at"):
record["expires_at"] = user["expires_at"]
payload["users"].append(record) payload["users"].append(record)
return payload return payload
@@ -546,8 +630,9 @@ class IamService:
return candidate if candidate in ALLOWED_ACTIONS else "" return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None: def _write_default(self) -> None:
access_key = secrets.token_hex(12) access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
secret_key = secrets.token_urlsafe(32) secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
default = { default = {
"users": [ "users": [
{ {
@@ -560,16 +645,37 @@ class IamService:
} }
] ]
} }
self.config_path.write_text(json.dumps(default, indent=2)) json_text = json.dumps(default, indent=2)
if self._fernet:
encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
self.config_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
self.config_path.write_text(json_text)
print(f"\n{'='*60}") print(f"\n{'='*60}")
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED") print("MYFSIO FIRST RUN - ADMIN CREDENTIALS")
print(f"{'='*60}") print(f"{'='*60}")
print(f"Access Key: {access_key}") if custom_keys:
print(f"Secret Key: {secret_key}") print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
else:
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}") print(f"{'='*60}")
print(f"Missed this? Check: {self.config_path}") if self._fernet:
print("IAM config is encrypted at rest.")
print("Lost credentials? Run: python run.py reset-cred")
else:
print(f"Missed this? Check: {self.config_path}")
print(f"{'='*60}\n") print(f"{'='*60}\n")
def _validate_expires_at(self, expires_at: str) -> None:
try:
dt = datetime.fromisoformat(expires_at)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
except (ValueError, TypeError):
raise IamError(f"Invalid expires_at format: {expires_at}. Use ISO 8601 (e.g. 2026-12-31T23:59:59Z)")
def _generate_access_key(self) -> str: def _generate_access_key(self) -> str:
return secrets.token_hex(8) return secrets.token_hex(8)
@@ -588,11 +694,15 @@ class IamService:
if cached: if cached:
secret_key, cached_time = cached secret_key, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return secret_key return secret_key
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
self._check_expiry(access_key, record)
secret_key = record["secret_key"] secret_key = record["secret_key"]
self._secret_key_cache[access_key] = (secret_key, now) self._secret_key_cache[access_key] = (secret_key, now)
return secret_key return secret_key
@@ -604,11 +714,15 @@ class IamService:
if cached: if cached:
principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return principal return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
self._check_expiry(access_key, record)
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal

738
app/integrity.py Normal file
View File

@@ -0,0 +1,738 @@
from __future__ import annotations
import hashlib
import json
import logging
import os
import threading
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
try:
import myfsio_core as _rc
_HAS_RUST = True
except ImportError:
_HAS_RUST = False
logger = logging.getLogger(__name__)
def _compute_etag(path: Path) -> str:
if _HAS_RUST:
return _rc.md5_file(str(path))
checksum = hashlib.md5()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(8192), b""):
checksum.update(chunk)
return checksum.hexdigest()
@dataclass
class IntegrityIssue:
issue_type: str
bucket: str
key: str
detail: str
healed: bool = False
heal_action: str = ""
def to_dict(self) -> dict:
return {
"issue_type": self.issue_type,
"bucket": self.bucket,
"key": self.key,
"detail": self.detail,
"healed": self.healed,
"heal_action": self.heal_action,
}
@dataclass
class IntegrityResult:
corrupted_objects: int = 0
orphaned_objects: int = 0
phantom_metadata: int = 0
stale_versions: int = 0
etag_cache_inconsistencies: int = 0
legacy_metadata_drifts: int = 0
issues_healed: int = 0
issues: List[IntegrityIssue] = field(default_factory=list)
errors: List[str] = field(default_factory=list)
objects_scanned: int = 0
buckets_scanned: int = 0
execution_time_seconds: float = 0.0
def to_dict(self) -> dict:
return {
"corrupted_objects": self.corrupted_objects,
"orphaned_objects": self.orphaned_objects,
"phantom_metadata": self.phantom_metadata,
"stale_versions": self.stale_versions,
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
"legacy_metadata_drifts": self.legacy_metadata_drifts,
"issues_healed": self.issues_healed,
"issues": [i.to_dict() for i in self.issues],
"errors": self.errors,
"objects_scanned": self.objects_scanned,
"buckets_scanned": self.buckets_scanned,
"execution_time_seconds": self.execution_time_seconds,
}
@property
def total_issues(self) -> int:
return (
self.corrupted_objects
+ self.orphaned_objects
+ self.phantom_metadata
+ self.stale_versions
+ self.etag_cache_inconsistencies
+ self.legacy_metadata_drifts
)
@property
def has_issues(self) -> bool:
return self.total_issues > 0
@dataclass
class IntegrityExecutionRecord:
timestamp: float
result: dict
dry_run: bool
auto_heal: bool
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"result": self.result,
"dry_run": self.dry_run,
"auto_heal": self.auto_heal,
}
@classmethod
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
return cls(
timestamp=data["timestamp"],
result=data["result"],
dry_run=data.get("dry_run", False),
auto_heal=data.get("auto_heal", False),
)
class IntegrityHistoryStore:
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
self.storage_root = storage_root
self.max_records = max_records
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
def load(self) -> List[IntegrityExecutionRecord]:
path = self._get_path()
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error("Failed to load integrity history: %s", e)
return []
def save(self, records: List[IntegrityExecutionRecord]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save integrity history: %s", e)
def add(self, record: IntegrityExecutionRecord) -> None:
with self._lock:
records = self.load()
records.insert(0, record)
self.save(records)
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
return self.load()[offset : offset + limit]
MAX_ISSUES = 500
class IntegrityChecker:
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_BUCKETS_DIR = "buckets"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
def __init__(
self,
storage_root: Path,
interval_hours: float = 24.0,
batch_size: int = 1000,
auto_heal: bool = False,
dry_run: bool = False,
max_history: int = 50,
) -> None:
self.storage_root = Path(storage_root)
self.interval_seconds = interval_hours * 3600.0
self.batch_size = batch_size
self.auto_heal = auto_heal
self.dry_run = dry_run
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
self.interval_seconds / 3600.0,
self.batch_size,
self.auto_heal,
self.dry_run,
)
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("Integrity checker stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
self._timer.daemon = True
self._timer.start()
def _run_cycle(self) -> None:
if self._shutdown:
return
try:
self.run_now()
except Exception as e:
logger.error("Integrity check cycle failed: %s", e)
finally:
self._schedule_next()
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
effective_dry_run = dry_run if dry_run is not None else self.dry_run
start = time.time()
result = IntegrityResult()
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
if result.objects_scanned >= self.batch_size:
break
result.buckets_scanned += 1
self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
result.execution_time_seconds = time.time() - start
if result.has_issues or result.errors:
logger.info(
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
result.execution_time_seconds,
result.corrupted_objects,
result.orphaned_objects,
result.phantom_metadata,
result.stale_versions,
result.etag_cache_inconsistencies,
result.legacy_metadata_drifts,
result.issues_healed,
len(result.errors),
" (dry run)" if effective_dry_run else "",
)
record = IntegrityExecutionRecord(
timestamp=time.time(),
result=result.to_dict(),
dry_run=effective_dry_run,
auto_heal=effective_auto_heal,
)
self.history_store.add(record)
return result
def _system_path(self) -> Path:
return self.storage_root / self.SYSTEM_ROOT
def _list_bucket_names(self) -> List[str]:
names = []
try:
for entry in self.storage_root.iterdir():
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
names.append(entry.name)
except OSError:
pass
return names
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
if len(result.issues) < MAX_ISSUES:
result.issues.append(issue)
def _check_corrupted_objects(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return
try:
for index_file in meta_root.rglob("_index.json"):
if result.objects_scanned >= self.batch_size:
return
if not index_file.is_file():
continue
try:
index_data = json.loads(index_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
for key_name, entry in list(index_data.items()):
if result.objects_scanned >= self.batch_size:
return
rel_dir = index_file.parent.relative_to(meta_root)
if rel_dir == Path("."):
full_key = key_name
else:
full_key = rel_dir.as_posix() + "/" + key_name
object_path = bucket_path / full_key
if not object_path.exists():
continue
result.objects_scanned += 1
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
stored_etag = meta.get("__etag__")
if not stored_etag:
continue
try:
actual_etag = _compute_etag(object_path)
except OSError:
continue
if actual_etag != stored_etag:
result.corrupted_objects += 1
issue = IntegrityIssue(
issue_type="corrupted_object",
bucket=bucket_name,
key=full_key,
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
)
if auto_heal and not dry_run:
try:
stat = object_path.stat()
meta["__etag__"] = actual_etag
meta["__size__"] = str(stat.st_size)
meta["__last_modified__"] = str(stat.st_mtime)
index_data[key_name] = {"metadata": meta}
self._atomic_write_index(index_file, index_data)
issue.healed = True
issue.heal_action = "updated etag in index"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check corrupted {bucket_name}: {e}")
def _check_orphaned_objects(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
try:
for entry in bucket_path.rglob("*"):
if result.objects_scanned >= self.batch_size:
return
if not entry.is_file():
continue
try:
rel = entry.relative_to(bucket_path)
except ValueError:
continue
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue
full_key = rel.as_posix()
key_name = rel.name
parent = rel.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
has_entry = False
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
has_entry = key_name in index_data
except (OSError, json.JSONDecodeError):
pass
if not has_entry:
result.orphaned_objects += 1
issue = IntegrityIssue(
issue_type="orphaned_object",
bucket=bucket_name,
key=full_key,
detail="file exists without metadata entry",
)
if auto_heal and not dry_run:
try:
etag = _compute_etag(entry)
stat = entry.stat()
meta = {
"__etag__": etag,
"__size__": str(stat.st_size),
"__last_modified__": str(stat.st_mtime),
}
index_data = {}
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
pass
index_data[key_name] = {"metadata": meta}
self._atomic_write_index(index_path, index_data)
issue.healed = True
issue.heal_action = "created metadata entry"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check orphaned {bucket_name}: {e}")
def _check_phantom_metadata(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return
try:
for index_file in meta_root.rglob("_index.json"):
if not index_file.is_file():
continue
try:
index_data = json.loads(index_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
keys_to_remove = []
for key_name in list(index_data.keys()):
rel_dir = index_file.parent.relative_to(meta_root)
if rel_dir == Path("."):
full_key = key_name
else:
full_key = rel_dir.as_posix() + "/" + key_name
object_path = bucket_path / full_key
if not object_path.exists():
result.phantom_metadata += 1
issue = IntegrityIssue(
issue_type="phantom_metadata",
bucket=bucket_name,
key=full_key,
detail="metadata entry without file on disk",
)
if auto_heal and not dry_run:
keys_to_remove.append(key_name)
issue.healed = True
issue.heal_action = "removed stale index entry"
result.issues_healed += 1
self._add_issue(result, issue)
if keys_to_remove and auto_heal and not dry_run:
try:
for k in keys_to_remove:
index_data.pop(k, None)
if index_data:
self._atomic_write_index(index_file, index_data)
else:
index_file.unlink(missing_ok=True)
except OSError as e:
result.errors.append(f"heal phantom {bucket_name}: {e}")
except OSError as e:
result.errors.append(f"check phantom {bucket_name}: {e}")
def _check_stale_versions(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
if not versions_root.exists():
return
try:
for key_dir in versions_root.rglob("*"):
if not key_dir.is_dir():
continue
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
json_files = {f.stem: f for f in key_dir.glob("*.json")}
for stem, bin_file in bin_files.items():
if stem not in json_files:
result.stale_versions += 1
issue = IntegrityIssue(
issue_type="stale_version",
bucket=bucket_name,
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
detail="version data without manifest",
)
if auto_heal and not dry_run:
try:
bin_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "removed orphaned version data"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal stale version {bin_file}: {e}")
self._add_issue(result, issue)
for stem, json_file in json_files.items():
if stem not in bin_files:
result.stale_versions += 1
issue = IntegrityIssue(
issue_type="stale_version",
bucket=bucket_name,
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
detail="version manifest without data",
)
if auto_heal and not dry_run:
try:
json_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "removed orphaned version manifest"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal stale version {json_file}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check stale versions {bucket_name}: {e}")
def _check_etag_cache(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
if not etag_index_path.exists():
return
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return
try:
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return
found_mismatch = False
for full_key, cached_etag in etag_cache.items():
key_path = Path(full_key)
key_name = key_path.name
parent = key_path.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
if not index_path.exists():
continue
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
entry = index_data.get(key_name)
if not entry:
continue
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
stored_etag = meta.get("__etag__")
if stored_etag and cached_etag != stored_etag:
result.etag_cache_inconsistencies += 1
found_mismatch = True
issue = IntegrityIssue(
issue_type="etag_cache_inconsistency",
bucket=bucket_name,
key=full_key,
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
)
self._add_issue(result, issue)
if found_mismatch and auto_heal and not dry_run:
try:
etag_index_path.unlink(missing_ok=True)
for issue in result.issues:
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
issue.healed = True
issue.heal_action = "deleted etag_index.json"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal etag cache {bucket_name}: {e}")
def _check_legacy_metadata(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
legacy_meta_root = self.storage_root / bucket_name / ".meta"
if not legacy_meta_root.exists():
return
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
try:
for meta_file in legacy_meta_root.rglob("*.meta.json"):
if not meta_file.is_file():
continue
try:
rel = meta_file.relative_to(legacy_meta_root)
except ValueError:
continue
full_key = rel.as_posix().removesuffix(".meta.json")
key_path = Path(full_key)
key_name = key_path.name
parent = key_path.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
try:
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
index_entry = None
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
index_entry = index_data.get(key_name)
except (OSError, json.JSONDecodeError):
pass
if index_entry is None:
result.legacy_metadata_drifts += 1
issue = IntegrityIssue(
issue_type="legacy_metadata_drift",
bucket=bucket_name,
key=full_key,
detail="unmigrated legacy .meta.json",
)
if auto_heal and not dry_run:
try:
index_data = {}
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
pass
index_data[key_name] = {"metadata": legacy_data}
self._atomic_write_index(index_path, index_data)
meta_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "migrated to index and deleted legacy file"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
else:
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
if legacy_data != index_meta:
result.legacy_metadata_drifts += 1
issue = IntegrityIssue(
issue_type="legacy_metadata_drift",
bucket=bucket_name,
key=full_key,
detail="legacy .meta.json differs from index entry",
)
if auto_heal and not dry_run:
try:
meta_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "deleted legacy file (index is authoritative)"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check legacy meta {bucket_name}: {e}")
@staticmethod
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
index_path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = index_path.with_suffix(".tmp")
try:
with open(tmp_path, "w", encoding="utf-8") as f:
json.dump(data, f)
os.replace(str(tmp_path), str(index_path))
except BaseException:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
records = self.history_store.get_history(limit, offset)
return [r.to_dict() for r in records]
def get_status(self) -> dict:
return {
"enabled": not self._shutdown or self._timer is not None,
"running": self._timer is not None and not self._shutdown,
"interval_hours": self.interval_seconds / 3600.0,
"batch_size": self.batch_size,
"auto_heal": self.auto_heal,
"dry_run": self.dry_run,
}

View File

@@ -85,6 +85,9 @@ def _bucket_policies() -> BucketPolicyStore:
def _build_policy_context() -> Dict[str, Any]: def _build_policy_context() -> Dict[str, Any]:
cached = getattr(g, "_policy_context", None)
if cached is not None:
return cached
ctx: Dict[str, Any] = {} ctx: Dict[str, Any] = {}
if request.headers.get("Referer"): if request.headers.get("Referer"):
ctx["aws:Referer"] = request.headers.get("Referer") ctx["aws:Referer"] = request.headers.get("Referer")
@@ -98,6 +101,7 @@ def _build_policy_context() -> Dict[str, Any]:
ctx["aws:SecureTransport"] = str(request.is_secure).lower() ctx["aws:SecureTransport"] = str(request.is_secure).lower()
if request.headers.get("User-Agent"): if request.headers.get("User-Agent"):
ctx["aws:UserAgent"] = request.headers.get("User-Agent") ctx["aws:UserAgent"] = request.headers.get("User-Agent")
g._policy_context = ctx
return ctx return ctx
@@ -1015,17 +1019,73 @@ def _method_not_allowed(allowed: list[str]) -> Response:
return response return response
def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None:
from email.utils import parsedate_to_datetime
if_match = request.headers.get("If-Match")
if if_match:
if if_match.strip() != "*":
match_etags = [e.strip().strip('"') for e in if_match.split(",")]
if etag not in match_etags:
return Response(status=412)
if_unmodified = request.headers.get("If-Unmodified-Since")
if not if_match and if_unmodified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_unmodified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt > dt:
return Response(status=412)
except (TypeError, ValueError):
pass
if_none_match = request.headers.get("If-None-Match")
if if_none_match:
if if_none_match.strip() == "*":
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")]
if etag in none_match_etags:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
if_modified = request.headers.get("If-Modified-Since")
if not if_none_match and if_modified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_modified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt <= dt:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
except (TypeError, ValueError):
pass
return None
def _apply_object_headers( def _apply_object_headers(
response: Response, response: Response,
*, *,
file_stat, file_stat,
metadata: Dict[str, str] | None, metadata: Dict[str, str] | None,
etag: str, etag: str,
size_override: int | None = None,
mtime_override: float | None = None,
) -> None: ) -> None:
if file_stat is not None: effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None)
if response.status_code != 206: effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None)
response.headers["Content-Length"] = str(file_stat.st_size) if effective_size is not None and response.status_code != 206:
response.headers["Last-Modified"] = http_date(file_stat.st_mtime) response.headers["Content-Length"] = str(effective_size)
if effective_mtime is not None:
response.headers["Last-Modified"] = http_date(effective_mtime)
response.headers["ETag"] = f'"{etag}"' response.headers["ETag"] = f'"{etag}"'
response.headers["Accept-Ranges"] = "bytes" response.headers["Accept-Ranges"] = "bytes"
for key, value in (metadata or {}).items(): for key, value in (metadata or {}).items():
@@ -2820,6 +2880,8 @@ def object_handler(bucket_name: str, object_key: str):
if validation_error: if validation_error:
return _error_response("InvalidArgument", validation_error, 400) return _error_response("InvalidArgument", validation_error, 400)
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try: try:
meta = storage.put_object( meta = storage.put_object(
bucket_name, bucket_name,
@@ -2834,10 +2896,23 @@ def object_handler(bucket_name: str, object_key: str):
if "Bucket" in message: if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404) return _error_response("NoSuchBucket", message, 404)
return _error_response("InvalidArgument", message, 400) return _error_response("InvalidArgument", message, 400)
current_app.logger.info(
"Object uploaded", content_md5 = request.headers.get("Content-MD5")
extra={"bucket": bucket_name, "key": object_key, "size": meta.size}, if content_md5 and meta.etag:
) try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
storage.delete_object(bucket_name, object_key)
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != meta.etag:
storage.delete_object(bucket_name, object_key)
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info(
"Object uploaded",
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
)
response = Response(status=200) response = Response(status=200)
if meta.etag: if meta.etag:
response.headers["ETag"] = f'"{meta.etag}"' response.headers["ETag"] = f'"{meta.etag}"'
@@ -2871,10 +2946,27 @@ def object_handler(bucket_name: str, object_key: str):
except StorageError as exc: except StorageError as exc:
return _error_response("NoSuchKey", str(exc), 404) return _error_response("NoSuchKey", str(exc), 404)
metadata = storage.get_object_metadata(bucket_name, object_key) metadata = storage.get_object_metadata(bucket_name, object_key)
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream" mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
cond_etag = metadata.get("__etag__")
if not cond_etag and not is_encrypted:
try:
cond_etag = storage._compute_etag(path)
except OSError:
cond_etag = None
if cond_etag:
cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
if cond_mtime is None:
try:
cond_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(cond_etag, cond_mtime)
if cond_resp:
return cond_resp
if request.method == "GET": if request.method == "GET":
range_header = request.headers.get("Range") range_header = request.headers.get("Range")
@@ -2963,10 +3055,7 @@ def object_handler(bucket_name: str, object_key: str):
response.headers["Content-Type"] = mimetype response.headers["Content-Type"] = mimetype
logged_bytes = 0 logged_bytes = 0
try: file_stat = stat if not is_encrypted else None
file_stat = path.stat() if not is_encrypted else None
except (PermissionError, OSError):
file_stat = None
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag) _apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
if request.method == "GET": if request.method == "GET":
@@ -2983,8 +3072,9 @@ def object_handler(bucket_name: str, object_key: str):
if value: if value:
response.headers[header] = _sanitize_header_value(value) response.headers[header] = _sanitize_header_value(value)
action = "Object read" if request.method == "GET" else "Object head" if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes}) action = "Object read" if request.method == "GET" else "Object head"
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
return response return response
if "uploadId" in request.args: if "uploadId" in request.args:
@@ -3002,7 +3092,8 @@ def object_handler(bucket_name: str, object_key: str):
storage.delete_object(bucket_name, object_key) storage.delete_object(bucket_name, object_key)
lock_service.delete_object_lock_metadata(bucket_name, object_key) lock_service.delete_object_lock_metadata(bucket_name, object_key)
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
principal, _ = _require_principal() principal, _ = _require_principal()
_notifications().emit_object_removed( _notifications().emit_object_removed(
@@ -3343,12 +3434,30 @@ def head_object(bucket_name: str, object_key: str) -> Response:
_authorize_action(principal, bucket_name, "read", object_key=object_key) _authorize_action(principal, bucket_name, "read", object_key=object_key)
path = _storage().get_object_path(bucket_name, object_key) path = _storage().get_object_path(bucket_name, object_key)
metadata = _storage().get_object_metadata(bucket_name, object_key) metadata = _storage().get_object_metadata(bucket_name, object_key)
stat = path.stat()
etag = metadata.get("__etag__") or _storage()._compute_etag(path) etag = metadata.get("__etag__") or _storage()._compute_etag(path)
response = Response(status=200) head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag) if head_mtime is None:
response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream" try:
head_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(etag, head_mtime)
if cond_resp:
return cond_resp
cached_size = metadata.get("__size__")
cached_mtime = metadata.get("__last_modified__")
if cached_size is not None and cached_mtime is not None:
size_val = int(cached_size)
mtime_val = float(cached_mtime)
response = Response(status=200)
_apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val)
else:
stat = path.stat()
response = Response(status=200)
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
return response return response
except (StorageError, FileNotFoundError): except (StorageError, FileNotFoundError):
return _error_response("NoSuchKey", "Object not found", 404) return _error_response("NoSuchKey", "Object not found", 404)
@@ -3578,6 +3687,8 @@ def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
return error return error
metadata = _extract_request_metadata() metadata = _extract_request_metadata()
content_type = request.headers.get("Content-Type")
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try: try:
upload_id = _storage().initiate_multipart_upload( upload_id = _storage().initiate_multipart_upload(
bucket_name, bucket_name,
@@ -3630,6 +3741,15 @@ def _upload_part(bucket_name: str, object_key: str) -> Response:
return _error_response("NoSuchUpload", str(exc), 404) return _error_response("NoSuchUpload", str(exc), 404)
return _error_response("InvalidArgument", str(exc), 400) return _error_response("InvalidArgument", str(exc), 400)
content_md5 = request.headers.get("Content-MD5")
if content_md5 and etag:
try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != etag:
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
response = Response(status=200) response = Response(status=200)
response.headers["ETag"] = f'"{etag}"' response.headers["ETag"] = f'"{etag}"'
return response return response

View File

@@ -361,7 +361,7 @@ class ObjectStorage:
try: try:
cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_path.write_text(json.dumps(stats), encoding="utf-8") self._atomic_write_json(cache_path, stats)
except OSError: except OSError:
pass pass
@@ -423,7 +423,7 @@ class ObjectStorage:
cache_path = self._system_bucket_root(bucket_id) / "stats.json" cache_path = self._system_bucket_root(bucket_id) / "stats.json"
try: try:
cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_path.write_text(json.dumps(data), encoding="utf-8") self._atomic_write_json(cache_path, data)
except OSError: except OSError:
pass pass
@@ -879,11 +879,6 @@ class ObjectStorage:
is_overwrite = destination.exists() is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0 existing_size = destination.stat().st_size if is_overwrite else 0
archived_version_size = 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
archived_version_size = existing_size
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
tmp_dir.mkdir(parents=True, exist_ok=True) tmp_dir.mkdir(parents=True, exist_ok=True)
@@ -910,19 +905,21 @@ class ObjectStorage:
quota_check["quota"], quota_check["quota"],
quota_check["usage"], quota_check["usage"],
) )
except BaseException:
shutil.move(str(tmp_path), str(destination))
finally:
if tmp_path: if tmp_path:
try: try:
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
except OSError: except OSError:
pass pass
raise
else: else:
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp" tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try: try:
checksum = hashlib.md5()
with tmp_path.open("wb") as target: with tmp_path.open("wb") as target:
shutil.copyfileobj(stream, target) shutil.copyfileobj(_HashingReader(stream, checksum), target)
target.flush()
os.fsync(target.fileno())
new_size = tmp_path.stat().st_size new_size = tmp_path.stat().st_size
size_delta = new_size - existing_size size_delta = new_size - existing_size
@@ -941,27 +938,43 @@ class ObjectStorage:
quota_check["usage"], quota_check["usage"],
) )
checksum = hashlib.md5()
with tmp_path.open("rb") as f:
while True:
chunk = f.read(1048576)
if not chunk:
break
checksum.update(chunk)
etag = checksum.hexdigest() etag = checksum.hexdigest()
except BaseException:
shutil.move(str(tmp_path), str(destination))
finally:
try: try:
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
except OSError: except OSError:
pass pass
raise
stat = destination.stat() lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
try:
with _atomic_lock_file(lock_file_path):
archived_version_size = 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
archived_version_size = existing_size
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)} shutil.move(str(tmp_path), str(destination))
combined_meta = {**internal_meta, **(metadata or {})} tmp_path = None
self._write_metadata(bucket_id, safe_key, combined_meta)
stat = destination.stat()
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
except BlockingIOError:
try:
if tmp_path:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise StorageError("Another upload to this key is in progress")
finally:
if tmp_path:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
self._update_bucket_stats_cache( self._update_bucket_stats_cache(
bucket_id, bucket_id,
@@ -1553,18 +1566,16 @@ class ObjectStorage:
temp_path = upload_root / f".{part_filename}.tmp" temp_path = upload_root / f".{part_filename}.tmp"
try: try:
with temp_path.open("wb") as target:
shutil.copyfileobj(stream, target)
if _HAS_RUST: if _HAS_RUST:
with temp_path.open("wb") as target:
shutil.copyfileobj(stream, target)
part_etag = _rc.md5_file(str(temp_path)) part_etag = _rc.md5_file(str(temp_path))
else: else:
checksum = hashlib.md5() checksum = hashlib.md5()
with temp_path.open("rb") as f: with temp_path.open("wb") as target:
while True: shutil.copyfileobj(_HashingReader(stream, checksum), target)
chunk = f.read(1048576) target.flush()
if not chunk: os.fsync(target.fileno())
break
checksum.update(chunk)
part_etag = checksum.hexdigest() part_etag = checksum.hexdigest()
temp_path.replace(part_path) temp_path.replace(part_path)
except OSError: except OSError:
@@ -1598,7 +1609,7 @@ class ObjectStorage:
parts = manifest.setdefault("parts", {}) parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record parts[str(part_number)] = record
manifest_path.write_text(json.dumps(manifest), encoding="utf-8") self._atomic_write_json(manifest_path, manifest)
break break
except OSError as exc: except OSError as exc:
if attempt < max_retries - 1: if attempt < max_retries - 1:
@@ -1691,7 +1702,7 @@ class ObjectStorage:
parts = manifest.setdefault("parts", {}) parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record parts[str(part_number)] = record
manifest_path.write_text(json.dumps(manifest), encoding="utf-8") self._atomic_write_json(manifest_path, manifest)
break break
except OSError as exc: except OSError as exc:
if attempt < max_retries - 1: if attempt < max_retries - 1:
@@ -1797,6 +1808,8 @@ class ObjectStorage:
break break
checksum.update(data) checksum.update(data)
target.write(data) target.write(data)
target.flush()
os.fsync(target.fileno())
checksum_hex = checksum.hexdigest() checksum_hex = checksum.hexdigest()
except BlockingIOError: except BlockingIOError:
raise StorageError("Another upload to this key is in progress") raise StorageError("Another upload to this key is in progress")
@@ -1815,7 +1828,7 @@ class ObjectStorage:
etag = checksum_hex etag = checksum_hex
metadata = manifest.get("metadata") metadata = manifest.get("metadata")
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)} internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
combined_meta = {**internal_meta, **(metadata or {})} combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta) self._write_metadata(bucket_id, safe_key, combined_meta)
@@ -2303,6 +2316,23 @@ class ObjectStorage:
): ):
path.mkdir(parents=True, exist_ok=True) path.mkdir(parents=True, exist_ok=True)
@staticmethod
def _atomic_write_json(path: Path, data: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_suffix(".tmp")
try:
with tmp_path.open("w", encoding="utf-8") as f:
json.dump(data, f)
f.flush()
os.fsync(f.fileno())
tmp_path.replace(path)
except BaseException:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path: def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._multipart_bucket_root(bucket_name) / upload_id return self._multipart_bucket_root(bucket_name) / upload_id
@@ -2337,7 +2367,7 @@ class ObjectStorage:
def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None: def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
config_path = self._bucket_config_path(bucket_name) config_path = self._bucket_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True) config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(payload), encoding="utf-8") self._atomic_write_json(config_path, payload)
try: try:
mtime = config_path.stat().st_mtime mtime = config_path.stat().st_mtime
except OSError: except OSError:
@@ -2371,8 +2401,7 @@ class ObjectStorage:
def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None: def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
manifest_path = upload_root / self.MULTIPART_MANIFEST manifest_path = upload_root / self.MULTIPART_MANIFEST
manifest_path.parent.mkdir(parents=True, exist_ok=True) self._atomic_write_json(manifest_path, manifest)
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
def _metadata_file(self, bucket_name: str, key: Path) -> Path: def _metadata_file(self, bucket_name: str, key: Path) -> Path:
meta_root = self._bucket_meta_root(bucket_name) meta_root = self._bucket_meta_root(bucket_name)
@@ -2442,7 +2471,7 @@ class ObjectStorage:
except (OSError, json.JSONDecodeError): except (OSError, json.JSONDecodeError):
pass pass
index_data[entry_name] = entry index_data[entry_name] = entry
index_path.write_text(json.dumps(index_data), encoding="utf-8") self._atomic_write_json(index_path, index_data)
self._invalidate_meta_read_cache(bucket_name, key) self._invalidate_meta_read_cache(bucket_name, key)
def _delete_index_entry(self, bucket_name: str, key: Path) -> None: def _delete_index_entry(self, bucket_name: str, key: Path) -> None:
@@ -2463,7 +2492,7 @@ class ObjectStorage:
if entry_name in index_data: if entry_name in index_data:
del index_data[entry_name] del index_data[entry_name]
if index_data: if index_data:
index_path.write_text(json.dumps(index_data), encoding="utf-8") self._atomic_write_json(index_path, index_data)
else: else:
try: try:
index_path.unlink() index_path.unlink()
@@ -2512,7 +2541,7 @@ class ObjectStorage:
"reason": reason, "reason": reason,
} }
manifest_path = version_dir / f"{version_id}.json" manifest_path = version_dir / f"{version_id}.json"
manifest_path.write_text(json.dumps(record), encoding="utf-8") self._atomic_write_json(manifest_path, record)
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]: def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
entry = self._read_index_entry(bucket_name, key) entry = self._read_index_entry(bucket_name, key)

View File

@@ -1754,6 +1754,10 @@ def iam_dashboard():
users = iam_service.list_users() if not locked else [] users = iam_service.list_users() if not locked else []
config_summary = iam_service.config_summary() config_summary = iam_service.config_summary()
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2) config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
_now = _dt.now(_tz.utc)
now_iso = _now.isoformat()
soon_iso = (_now + _td(days=7)).isoformat()
return render_template( return render_template(
"iam.html", "iam.html",
users=users, users=users,
@@ -1763,6 +1767,8 @@ def iam_dashboard():
config_summary=config_summary, config_summary=config_summary,
config_document=config_document, config_document=config_document,
disclosed_secret=disclosed_secret, disclosed_secret=disclosed_secret,
now_iso=now_iso,
soon_iso=soon_iso,
) )
@@ -1782,6 +1788,8 @@ def create_iam_user():
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400 return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
flash("Display name must be 64 characters or fewer", "danger") flash("Display name must be 64 characters or fewer", "danger")
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
custom_access_key = request.form.get("access_key", "").strip() or None
custom_secret_key = request.form.get("secret_key", "").strip() or None
policies_text = request.form.get("policies", "").strip() policies_text = request.form.get("policies", "").strip()
policies = None policies = None
if policies_text: if policies_text:
@@ -1792,8 +1800,21 @@ def create_iam_user():
return jsonify({"error": f"Invalid JSON: {exc}"}), 400 return jsonify({"error": f"Invalid JSON: {exc}"}), 400
flash(f"Invalid JSON: {exc}", "danger") flash(f"Invalid JSON: {exc}", "danger")
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try: try:
created = _iam().create_user(display_name=display_name, policies=policies) created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
except IamError as exc: except IamError as exc:
if _wants_json(): if _wants_json():
return jsonify({"error": str(exc)}), 400 return jsonify({"error": str(exc)}), 400
@@ -1967,6 +1988,45 @@ def update_iam_policies(access_key: str):
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/iam/users/<access_key>/expiry")
def update_iam_expiry(access_key: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:update_policy")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 403
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try:
_iam().update_user_expiry(access_key, expires_at)
if _wants_json():
return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
label = expires_at if expires_at else "never"
flash(f"Expiry for {access_key} set to {label}", "success")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 400
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/connections") @ui_bp.post("/connections")
def create_connection(): def create_connection():
principal = _current_principal() principal = _current_principal()

View File

@@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
APP_VERSION = "0.3.5" APP_VERSION = "0.3.9"
def get_version() -> str: def get_version() -> str:

152
docs.md
View File

@@ -145,13 +145,15 @@ All configuration is done via environment variables. The table below lists every
| Variable | Default | Notes | | Variable | Default | Notes |
| --- | --- | --- | | --- | --- | --- |
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. | | `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. Encrypted at rest when `SECRET_KEY` is set. |
| `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). | | `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). |
| `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. | | `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. |
| `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. | | `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. |
| `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. | | `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. |
| `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). | | `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). |
| `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. | | `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. |
| `ADMIN_ACCESS_KEY` | (none) | Custom access key for the admin user on first run or credential reset. If unset, a random key is generated. |
| `ADMIN_SECRET_KEY` | (none) | Custom secret key for the admin user on first run or credential reset. If unset, a random key is generated. |
### CORS (Cross-Origin Resource Sharing) ### CORS (Cross-Origin Resource Sharing)
@@ -250,6 +252,60 @@ Once enabled, configure lifecycle rules via:
</LifecycleConfiguration> </LifecycleConfiguration>
``` ```
## Garbage Collection
The garbage collector (GC) automatically cleans up orphaned data that accumulates over time: stale temporary files from failed uploads, abandoned multipart uploads, stale lock files, orphaned metadata entries, orphaned version files, and empty directories.
### Enabling GC
By default, GC is disabled. Enable it by setting:
```bash
GC_ENABLED=true python run.py
```
Or in your `myfsio.env` file:
```
GC_ENABLED=true
GC_INTERVAL_HOURS=6 # Run every 6 hours (default)
GC_TEMP_FILE_MAX_AGE_HOURS=24 # Delete temp files older than 24h
GC_MULTIPART_MAX_AGE_DAYS=7 # Delete orphaned multipart uploads older than 7 days
GC_LOCK_FILE_MAX_AGE_HOURS=1 # Delete stale lock files older than 1h
GC_DRY_RUN=false # Set to true to log without deleting
```
### What Gets Cleaned
| Type | Location | Condition |
|------|----------|-----------|
| **Temp files** | `.myfsio.sys/tmp/` | Older than `GC_TEMP_FILE_MAX_AGE_HOURS` |
| **Orphaned multipart uploads** | `.myfsio.sys/multipart/` and `<bucket>/.multipart/` | Older than `GC_MULTIPART_MAX_AGE_DAYS` |
| **Stale lock files** | `.myfsio.sys/buckets/<bucket>/locks/` | Older than `GC_LOCK_FILE_MAX_AGE_HOURS` |
| **Orphaned metadata** | `.myfsio.sys/buckets/<bucket>/meta/` and `<bucket>/.meta/` | Object file no longer exists |
| **Orphaned versions** | `.myfsio.sys/buckets/<bucket>/versions/` and `<bucket>/.versions/` | Main object no longer exists |
| **Empty directories** | Various internal directories | Directory is empty after cleanup |
### Admin API
All GC endpoints require admin (`iam:*`) permissions.
| Method | Route | Description |
|--------|-------|-------------|
| `GET` | `/admin/gc/status` | Get GC status and configuration |
| `POST` | `/admin/gc/run` | Trigger a manual GC run (body: `{"dry_run": true}` for preview) |
| `GET` | `/admin/gc/history` | Get GC execution history (query: `?limit=50&offset=0`) |
### Dry Run Mode
Set `GC_DRY_RUN=true` to log what would be deleted without actually removing anything. You can also trigger a one-time dry run via the admin API:
```bash
curl -X POST "http://localhost:5000/admin/gc/run" \
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
-H "Content-Type: application/json" \
-d '{"dry_run": true}'
```
### Performance Tuning ### Performance Tuning
| Variable | Default | Notes | | Variable | Default | Notes |
@@ -277,13 +333,14 @@ API responses for JSON, XML, HTML, CSS, and JavaScript are automatically gzip-co
Before deploying to production, ensure you: Before deploying to production, ensure you:
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`) 1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`). This also enables IAM config encryption at rest.
2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*` 2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*`
3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies 3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies
4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination 4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs 5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption 6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server 7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server
8. **Set credential expiry** - Assign `expires_at` to non-admin users for time-limited access
### Proxy Configuration ### Proxy Configuration
@@ -299,6 +356,69 @@ The application automatically trusts these headers to generate correct presigned
| `ALLOWED_REDIRECT_HOSTS` | `""` | Comma-separated whitelist of safe redirect targets. Empty allows only same-host redirects. | | `ALLOWED_REDIRECT_HOSTS` | `""` | Comma-separated whitelist of safe redirect targets. Empty allows only same-host redirects. |
| `ALLOW_INTERNAL_ENDPOINTS` | `false` | Allow connections to internal/private IPs for webhooks and replication targets. **Keep disabled in production unless needed.** | | `ALLOW_INTERNAL_ENDPOINTS` | `false` | Allow connections to internal/private IPs for webhooks and replication targets. **Keep disabled in production unless needed.** |
## Integrity Scanner
The integrity scanner detects and optionally auto-repairs data inconsistencies: corrupted objects (ETag mismatch), orphaned files without metadata, phantom metadata without files, stale version archives, ETag cache drift, and unmigrated legacy `.meta.json` files.
### Enabling Integrity Scanner
By default, the integrity scanner is disabled. Enable it by setting:
```bash
INTEGRITY_ENABLED=true python run.py
```
Or in your `myfsio.env` file:
```
INTEGRITY_ENABLED=true
INTEGRITY_INTERVAL_HOURS=24 # Run every 24 hours (default)
INTEGRITY_BATCH_SIZE=1000 # Max objects to scan per cycle
INTEGRITY_AUTO_HEAL=false # Automatically repair detected issues
INTEGRITY_DRY_RUN=false # Set to true to log without healing
```
### What Gets Checked
| Check | Detection | Heal Action |
|-------|-----------|-------------|
| **Corrupted objects** | File MD5 does not match stored `__etag__` | Update `__etag__` in index (disk data is authoritative) |
| **Orphaned objects** | File exists on disk without metadata entry | Create index entry with computed MD5/size/mtime |
| **Phantom metadata** | Index entry exists but file is missing from disk | Remove stale entry from `_index.json` |
| **Stale versions** | `.json` manifest without `.bin` data or vice versa | Remove orphaned version file |
| **ETag cache inconsistency** | `etag_index.json` entry differs from metadata `__etag__` | Delete `etag_index.json` (auto-rebuilt on next list) |
| **Legacy metadata drift** | Legacy `.meta.json` differs from index or is unmigrated | Migrate to index and delete legacy file |
### Admin API
All integrity endpoints require admin (`iam:*`) permissions.
| Method | Route | Description |
|--------|-------|-------------|
| `GET` | `/admin/integrity/status` | Get scanner status and configuration |
| `POST` | `/admin/integrity/run` | Trigger a manual scan (body: `{"dry_run": true, "auto_heal": true}`) |
| `GET` | `/admin/integrity/history` | Get scan history (query: `?limit=50&offset=0`) |
### Dry Run Mode
Set `INTEGRITY_DRY_RUN=true` to log detected issues without making any changes. You can also trigger a one-time dry run via the admin API:
```bash
curl -X POST "http://localhost:5000/admin/integrity/run" \
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
-H "Content-Type: application/json" \
-d '{"dry_run": true, "auto_heal": true}'
```
### Configuration Reference
| Variable | Default | Description |
|----------|---------|-------------|
| `INTEGRITY_ENABLED` | `false` | Enable background integrity scanning |
| `INTEGRITY_INTERVAL_HOURS` | `24` | Hours between scan cycles |
| `INTEGRITY_BATCH_SIZE` | `1000` | Max objects to scan per cycle |
| `INTEGRITY_AUTO_HEAL` | `false` | Automatically repair detected issues |
| `INTEGRITY_DRY_RUN` | `false` | Log issues without healing |
## 4. Upgrading and Updates ## 4. Upgrading and Updates
### Version Checking ### Version Checking
@@ -633,9 +753,10 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
### Getting Started ### Getting Started
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext. 1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. You can set `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` environment variables to use custom credentials instead of random ones. If `SECRET_KEY` is configured, the IAM config file is encrypted at rest using AES (Fernet). To reset admin credentials later, run `python run.py --reset-cred`.
2. Sign into the UI using the generated credentials, then open **IAM**: 2. Sign into the UI using the generated credentials, then open **IAM**:
- **Create user**: supply a display name and optional JSON inline policy array. - **Create user**: supply a display name, optional JSON inline policy array, and optional credential expiry date.
- **Set expiry**: assign an expiration date to any user's credentials. Expired credentials are rejected at authentication time. The UI shows expiry badges and preset durations (1h, 24h, 7d, 30d, 90d).
- **Rotate secret**: generates a new secret key; the UI surfaces it once. - **Rotate secret**: generates a new secret key; the UI surfaces it once.
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
3. Wildcard action `iam:*` is supported for admin user definitions. 3. Wildcard action `iam:*` is supported for admin user definitions.
@@ -653,8 +774,11 @@ The API expects every request to include authentication headers. The UI persists
**Security Features:** **Security Features:**
- **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes). - **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes).
- **Credential Expiry**: Each user can have an optional `expires_at` timestamp (ISO 8601). Once expired, all API requests using those credentials are rejected. Set or clear expiry via the UI or API.
- **IAM Config Encryption**: When `SECRET_KEY` is set, the IAM config file (`iam.json`) is encrypted at rest using Fernet (AES-256-CBC with HMAC). Existing plaintext configs are automatically encrypted on next load.
- **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days). - **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days).
- **Hot Reload**: IAM configuration changes take effect immediately without restart. - **Hot Reload**: IAM configuration changes take effect immediately without restart.
- **Credential Reset**: Run `python run.py --reset-cred` to reset admin credentials. Supports `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` env vars for deterministic keys.
### Permission Model ### Permission Model
@@ -814,7 +938,8 @@ curl -X POST http://localhost:5000/iam/users \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \ -H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{ -d '{
"display_name": "New User", "display_name": "New User",
"policies": [{"bucket": "*", "actions": ["list", "read"]}] "policies": [{"bucket": "*", "actions": ["list", "read"]}],
"expires_at": "2026-12-31T23:59:59Z"
}' }'
# Rotate user secret (requires iam:rotate_key) # Rotate user secret (requires iam:rotate_key)
@@ -827,6 +952,18 @@ curl -X PUT http://localhost:5000/iam/users/<access-key>/policies \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \ -H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '[{"bucket": "*", "actions": ["list", "read", "write"]}]' -d '[{"bucket": "*", "actions": ["list", "read", "write"]}]'
# Update credential expiry (requires iam:update_policy)
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
-H "Content-Type: application/x-www-form-urlencoded" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d 'expires_at=2026-12-31T23:59:59Z'
# Remove credential expiry (never expires)
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
-H "Content-Type: application/x-www-form-urlencoded" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d 'expires_at='
# Delete a user (requires iam:delete_user) # Delete a user (requires iam:delete_user)
curl -X DELETE http://localhost:5000/iam/users/<access-key> \ curl -X DELETE http://localhost:5000/iam/users/<access-key> \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." -H "X-Access-Key: ..." -H "X-Secret-Key: ..."
@@ -838,8 +975,9 @@ When a request is made, permissions are evaluated in this order:
1. **Authentication** Verify the access key and secret key are valid 1. **Authentication** Verify the access key and secret key are valid
2. **Lockout Check** Ensure the account is not locked due to failed attempts 2. **Lockout Check** Ensure the account is not locked due to failed attempts
3. **IAM Policy Check** Verify the user has the required action for the target bucket 3. **Expiry Check** Reject requests if the user's credentials have expired (`expires_at`)
4. **Bucket Policy Check** If a bucket policy exists, verify it allows the action 4. **IAM Policy Check** Verify the user has the required action for the target bucket
5. **Bucket Policy Check** If a bucket policy exists, verify it allows the action
A request is allowed only if: A request is allowed only if:
- The IAM policy grants the action, AND - The IAM policy grants the action, AND

View File

@@ -46,6 +46,8 @@ pub fn stream_to_file_with_md5(
py.check_signals()?; py.check_signals()?;
} }
file.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(()) Ok(())
})(); })();
@@ -102,6 +104,9 @@ pub fn assemble_parts_with_md5(
} }
} }
target.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(format!("{:x}", hasher.finalize())) Ok(format!("{:x}", hasher.finalize()))
}) })
} }

103
run.py
View File

@@ -23,6 +23,7 @@ from typing import Optional
from app import create_api_app, create_ui_app from app import create_api_app, create_ui_app
from app.config import AppConfig from app.config import AppConfig
from app.iam import IamService, IamError, ALLOWED_ACTIONS, _derive_fernet_key
def _server_host() -> str: def _server_host() -> str:
@@ -87,21 +88,121 @@ def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None)
app.run(host=_server_host(), port=port, debug=debug) app.run(host=_server_host(), port=port, debug=debug)
def reset_credentials() -> None:
import json
import secrets
from cryptography.fernet import Fernet
config = AppConfig.from_env()
iam_path = config.iam_config_path
encryption_key = config.secret_key
access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
fernet = Fernet(_derive_fernet_key(encryption_key)) if encryption_key else None
raw_config = None
if iam_path.exists():
try:
raw_bytes = iam_path.read_bytes()
from app.iam import _IAM_ENCRYPTED_PREFIX
if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
if fernet:
try:
content = fernet.decrypt(raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]).decode("utf-8")
raw_config = json.loads(content)
except Exception:
print("WARNING: Could not decrypt existing IAM config. Creating fresh config.")
else:
print("WARNING: IAM config is encrypted but no SECRET_KEY available. Creating fresh config.")
else:
try:
raw_config = json.loads(raw_bytes.decode("utf-8"))
except json.JSONDecodeError:
print("WARNING: Existing IAM config is corrupted. Creating fresh config.")
except OSError:
pass
if raw_config and raw_config.get("users"):
admin_user = None
for user in raw_config["users"]:
policies = user.get("policies", [])
for p in policies:
actions = p.get("actions", [])
if "iam:*" in actions or "*" in actions:
admin_user = user
break
if admin_user:
break
if not admin_user:
admin_user = raw_config["users"][0]
admin_user["access_key"] = access_key
admin_user["secret_key"] = secret_key
else:
raw_config = {
"users": [
{
"access_key": access_key,
"secret_key": secret_key,
"display_name": "Local Admin",
"policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
],
}
]
}
json_text = json.dumps(raw_config, indent=2)
iam_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = iam_path.with_suffix(".json.tmp")
if fernet:
from app.iam import _IAM_ENCRYPTED_PREFIX
encrypted = fernet.encrypt(json_text.encode("utf-8"))
temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
temp_path.write_text(json_text, encoding="utf-8")
temp_path.replace(iam_path)
print(f"\n{'='*60}")
print("MYFSIO - ADMIN CREDENTIALS RESET")
print(f"{'='*60}")
if custom_keys:
print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
else:
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
if fernet:
print("IAM config saved (encrypted).")
else:
print(f"IAM config saved to: {iam_path}")
print(f"{'='*60}\n")
if __name__ == "__main__": if __name__ == "__main__":
multiprocessing.freeze_support() multiprocessing.freeze_support()
if _is_frozen(): if _is_frozen():
multiprocessing.set_start_method("spawn", force=True) multiprocessing.set_start_method("spawn", force=True)
parser = argparse.ArgumentParser(description="Run the S3 clone services.") parser = argparse.ArgumentParser(description="Run the S3 clone services.")
parser.add_argument("--mode", choices=["api", "ui", "both"], default="both") parser.add_argument("--mode", choices=["api", "ui", "both", "reset-cred"], default="both")
parser.add_argument("--api-port", type=int, default=5000) parser.add_argument("--api-port", type=int, default=5000)
parser.add_argument("--ui-port", type=int, default=5100) parser.add_argument("--ui-port", type=int, default=5100)
parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress") parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress")
parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)") parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)")
parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit") parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit")
parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit") parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
parser.add_argument("--reset-cred", action="store_true", help="Reset admin credentials and exit")
args = parser.parse_args() args = parser.parse_args()
if args.reset_cred or args.mode == "reset-cred":
reset_credentials()
sys.exit(0)
if args.check_config or args.show_config: if args.check_config or args.show_config:
config = AppConfig.from_env() config = AppConfig.from_env()
config.print_startup_summary() config.print_startup_summary()

View File

@@ -15,6 +15,12 @@
--myfsio-hover-bg: rgba(59, 130, 246, 0.12); --myfsio-hover-bg: rgba(59, 130, 246, 0.12);
--myfsio-accent: #3b82f6; --myfsio-accent: #3b82f6;
--myfsio-accent-hover: #2563eb; --myfsio-accent-hover: #2563eb;
--myfsio-tag-key-bg: #e0e7ff;
--myfsio-tag-key-text: #3730a3;
--myfsio-tag-value-bg: #f0f1fa;
--myfsio-tag-value-text: #4338ca;
--myfsio-tag-border: #c7d2fe;
--myfsio-tag-delete-hover: #ef4444;
} }
[data-theme='dark'] { [data-theme='dark'] {
@@ -34,6 +40,12 @@
--myfsio-hover-bg: rgba(59, 130, 246, 0.2); --myfsio-hover-bg: rgba(59, 130, 246, 0.2);
--myfsio-accent: #60a5fa; --myfsio-accent: #60a5fa;
--myfsio-accent-hover: #3b82f6; --myfsio-accent-hover: #3b82f6;
--myfsio-tag-key-bg: #312e81;
--myfsio-tag-key-text: #c7d2fe;
--myfsio-tag-value-bg: #1e1b4b;
--myfsio-tag-value-text: #a5b4fc;
--myfsio-tag-border: #4338ca;
--myfsio-tag-delete-hover: #f87171;
} }
[data-theme='dark'] body, [data-theme='dark'] body,
@@ -1154,39 +1166,20 @@ html.sidebar-will-collapse .sidebar-user {
position: relative; position: relative;
border: 1px solid var(--myfsio-card-border) !important; border: 1px solid var(--myfsio-card-border) !important;
border-radius: 1rem !important; border-radius: 1rem !important;
overflow: hidden; overflow: visible;
transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1); transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
} }
.iam-user-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
opacity: 0;
transition: opacity 0.2s ease;
}
.iam-user-card:hover { .iam-user-card:hover {
transform: translateY(-2px); transform: translateY(-2px);
box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.12), 0 4px 8px -4px rgba(0, 0, 0, 0.08); box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.12), 0 4px 8px -4px rgba(0, 0, 0, 0.08);
border-color: var(--myfsio-accent) !important; border-color: var(--myfsio-accent) !important;
} }
.iam-user-card:hover::before {
opacity: 1;
}
[data-theme='dark'] .iam-user-card:hover { [data-theme='dark'] .iam-user-card:hover {
box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.4), 0 4px 8px -4px rgba(0, 0, 0, 0.3); box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.4), 0 4px 8px -4px rgba(0, 0, 0, 0.3);
} }
.iam-admin-card::before {
background: linear-gradient(90deg, #f59e0b, #ef4444);
}
.iam-role-badge { .iam-role-badge {
display: inline-flex; display: inline-flex;
@@ -3021,6 +3014,89 @@ body:has(.login-card) .main-wrapper {
padding: 0.375rem 1rem; padding: 0.375rem 1rem;
} }
.tag-pill {
display: inline-flex;
border-radius: 9999px;
border: 1px solid var(--myfsio-tag-border);
overflow: hidden;
font-size: 0.75rem;
line-height: 1;
}
.tag-pill-key {
padding: 0.3rem 0.5rem;
background: var(--myfsio-tag-key-bg);
color: var(--myfsio-tag-key-text);
font-weight: 600;
}
.tag-pill-value {
padding: 0.3rem 0.5rem;
background: var(--myfsio-tag-value-bg);
color: var(--myfsio-tag-value-text);
font-weight: 400;
}
.tag-editor-card {
background: var(--myfsio-preview-bg);
border-radius: 0.5rem;
padding: 0.75rem;
}
.tag-editor-header,
.tag-editor-row {
display: grid;
grid-template-columns: 1fr 1fr 28px;
gap: 0.5rem;
align-items: center;
}
.tag-editor-header {
padding-bottom: 0.375rem;
border-bottom: 1px solid var(--myfsio-card-border);
margin-bottom: 0.5rem;
}
.tag-editor-header span {
font-size: 0.7rem;
font-weight: 600;
text-transform: uppercase;
color: var(--myfsio-muted);
letter-spacing: 0.05em;
}
.tag-editor-row {
margin-bottom: 0.375rem;
}
.tag-editor-delete {
display: inline-flex;
align-items: center;
justify-content: center;
width: 28px;
height: 28px;
border: none;
background: transparent;
color: var(--myfsio-muted);
border-radius: 0.375rem;
cursor: pointer;
transition: color 0.15s, background 0.15s;
}
.tag-editor-delete:hover {
color: var(--myfsio-tag-delete-hover);
background: rgba(239, 68, 68, 0.1);
}
.tag-editor-actions {
display: flex;
align-items: center;
gap: 0.5rem;
margin-top: 0.75rem;
padding-top: 0.5rem;
border-top: 1px solid var(--myfsio-card-border);
}
@media (prefers-reduced-motion: reduce) { @media (prefers-reduced-motion: reduce) {
*, *,
*::before, *::before,

View File

@@ -702,7 +702,7 @@
flushPendingStreamObjects(); flushPendingStreamObjects();
hasMoreObjects = false; hasMoreObjects = false;
totalObjectCount = loadedObjectCount; totalObjectCount = loadedObjectCount;
if (!currentPrefix) bucketTotalObjects = totalObjectCount; if (!currentPrefix && !useDelimiterMode) bucketTotalObjects = totalObjectCount;
updateObjectCountBadge(); updateObjectCountBadge();
if (objectsLoadingRow && objectsLoadingRow.parentNode) { if (objectsLoadingRow && objectsLoadingRow.parentNode) {
@@ -767,7 +767,7 @@
} }
totalObjectCount = data.total_count || 0; totalObjectCount = data.total_count || 0;
if (!append && !currentPrefix) bucketTotalObjects = totalObjectCount; if (!append && !currentPrefix && !useDelimiterMode) bucketTotalObjects = totalObjectCount;
nextContinuationToken = data.next_continuation_token; nextContinuationToken = data.next_continuation_token;
if (!append && objectsLoadingRow) { if (!append && objectsLoadingRow) {
@@ -3948,6 +3948,7 @@
const cancelTagsButton = document.getElementById('cancelTagsButton'); const cancelTagsButton = document.getElementById('cancelTagsButton');
let currentObjectTags = []; let currentObjectTags = [];
let isEditingTags = false; let isEditingTags = false;
let savedObjectTags = [];
const loadObjectTags = async (row) => { const loadObjectTags = async (row) => {
if (!row || !previewTagsPanel) return; if (!row || !previewTagsPanel) return;
@@ -3976,17 +3977,26 @@
previewTagsEmpty.classList.remove('d-none'); previewTagsEmpty.classList.remove('d-none');
} else { } else {
previewTagsEmpty.classList.add('d-none'); previewTagsEmpty.classList.add('d-none');
previewTagsList.innerHTML = currentObjectTags.map(t => `<span class="badge bg-info-subtle text-info">${escapeHtml(t.Key)}=${escapeHtml(t.Value)}</span>`).join(''); previewTagsList.innerHTML = currentObjectTags.map(t => `<span class="tag-pill"><span class="tag-pill-key">${escapeHtml(t.Key)}</span><span class="tag-pill-value">${escapeHtml(t.Value)}</span></span>`).join('');
} }
}; };
const syncTagInputs = () => {
previewTagsInputs?.querySelectorAll('.tag-editor-row').forEach((row, idx) => {
if (idx < currentObjectTags.length) {
currentObjectTags[idx].Key = row.querySelector(`[data-tag-key="${idx}"]`)?.value || '';
currentObjectTags[idx].Value = row.querySelector(`[data-tag-value="${idx}"]`)?.value || '';
}
});
};
const renderTagEditor = () => { const renderTagEditor = () => {
if (!previewTagsInputs) return; if (!previewTagsInputs) return;
previewTagsInputs.innerHTML = currentObjectTags.map((t, idx) => ` previewTagsInputs.innerHTML = currentObjectTags.map((t, idx) => `
<div class="input-group input-group-sm mb-1"> <div class="tag-editor-row">
<input type="text" class="form-control" placeholder="Key" value="${escapeHtml(t.Key)}" data-tag-key="${idx}"> <input type="text" class="form-control form-control-sm" placeholder="e.g. Environment" value="${escapeHtml(t.Key)}" data-tag-key="${idx}">
<input type="text" class="form-control" placeholder="Value" value="${escapeHtml(t.Value)}" data-tag-value="${idx}"> <input type="text" class="form-control form-control-sm" placeholder="e.g. Production" value="${escapeHtml(t.Value)}" data-tag-value="${idx}">
<button class="btn btn-outline-danger" type="button" onclick="removeTagRow(${idx})"> <button class="tag-editor-delete" type="button" onclick="removeTagRow(${idx})">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" viewBox="0 0 16 16"><path d="M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z"/></svg> <svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" viewBox="0 0 16 16"><path d="M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z"/></svg>
</button> </button>
</div> </div>
@@ -3994,20 +4004,29 @@
}; };
window.removeTagRow = (idx) => { window.removeTagRow = (idx) => {
syncTagInputs();
currentObjectTags.splice(idx, 1); currentObjectTags.splice(idx, 1);
renderTagEditor(); renderTagEditor();
}; };
editTagsButton?.addEventListener('click', () => { editTagsButton?.addEventListener('click', () => {
savedObjectTags = currentObjectTags.map(t => ({ Key: t.Key, Value: t.Value }));
isEditingTags = true; isEditingTags = true;
previewTagsList.classList.add('d-none'); previewTagsList.classList.add('d-none');
previewTagsEmpty.classList.add('d-none'); previewTagsEmpty.classList.add('d-none');
previewTagsEditor?.classList.remove('d-none'); previewTagsEditor?.classList.remove('d-none');
const card = previewTagsEditor?.querySelector('.tag-editor-card');
if (card) {
card.style.opacity = '0';
card.style.transition = 'opacity 0.2s ease';
requestAnimationFrame(() => { card.style.opacity = '1'; });
}
renderTagEditor(); renderTagEditor();
}); });
cancelTagsButton?.addEventListener('click', () => { cancelTagsButton?.addEventListener('click', () => {
isEditingTags = false; isEditingTags = false;
currentObjectTags = savedObjectTags.map(t => ({ Key: t.Key, Value: t.Value }));
previewTagsEditor?.classList.add('d-none'); previewTagsEditor?.classList.add('d-none');
previewTagsList.classList.remove('d-none'); previewTagsList.classList.remove('d-none');
renderObjectTags(); renderObjectTags();
@@ -4018,6 +4037,7 @@
showMessage({ title: 'Limit reached', body: 'Maximum 10 tags allowed per object.', variant: 'warning' }); showMessage({ title: 'Limit reached', body: 'Maximum 10 tags allowed per object.', variant: 'warning' });
return; return;
} }
syncTagInputs();
currentObjectTags.push({ Key: '', Value: '' }); currentObjectTags.push({ Key: '', Value: '' });
renderTagEditor(); renderTagEditor();
}); });
@@ -4026,7 +4046,7 @@
if (!activeRow) return; if (!activeRow) return;
const tagsUrl = activeRow.dataset.tagsUrl; const tagsUrl = activeRow.dataset.tagsUrl;
if (!tagsUrl) return; if (!tagsUrl) return;
const inputs = previewTagsInputs?.querySelectorAll('.input-group'); const inputs = previewTagsInputs?.querySelectorAll('.tag-editor-row');
const newTags = []; const newTags = [];
inputs?.forEach((group, idx) => { inputs?.forEach((group, idx) => {
const key = group.querySelector(`[data-tag-key="${idx}"]`)?.value?.trim() || ''; const key = group.querySelector(`[data-tag-key="${idx}"]`)?.value?.trim() || '';

View File

@@ -11,9 +11,11 @@ window.IAMManagement = (function() {
var editUserModal = null; var editUserModal = null;
var deleteUserModal = null; var deleteUserModal = null;
var rotateSecretModal = null; var rotateSecretModal = null;
var expiryModal = null;
var currentRotateKey = null; var currentRotateKey = null;
var currentEditKey = null; var currentEditKey = null;
var currentDeleteKey = null; var currentDeleteKey = null;
var currentExpiryKey = null;
var ALL_S3_ACTIONS = ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'lifecycle', 'cors']; var ALL_S3_ACTIONS = ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'lifecycle', 'cors'];
@@ -65,6 +67,7 @@ window.IAMManagement = (function() {
setupEditUserModal(); setupEditUserModal();
setupDeleteUserModal(); setupDeleteUserModal();
setupRotateSecretModal(); setupRotateSecretModal();
setupExpiryModal();
setupFormHandlers(); setupFormHandlers();
setupSearch(); setupSearch();
setupCopyAccessKeyButtons(); setupCopyAccessKeyButtons();
@@ -75,11 +78,13 @@ window.IAMManagement = (function() {
var editModalEl = document.getElementById('editUserModal'); var editModalEl = document.getElementById('editUserModal');
var deleteModalEl = document.getElementById('deleteUserModal'); var deleteModalEl = document.getElementById('deleteUserModal');
var rotateModalEl = document.getElementById('rotateSecretModal'); var rotateModalEl = document.getElementById('rotateSecretModal');
var expiryModalEl = document.getElementById('expiryModal');
if (policyModalEl) policyModal = new bootstrap.Modal(policyModalEl); if (policyModalEl) policyModal = new bootstrap.Modal(policyModalEl);
if (editModalEl) editUserModal = new bootstrap.Modal(editModalEl); if (editModalEl) editUserModal = new bootstrap.Modal(editModalEl);
if (deleteModalEl) deleteUserModal = new bootstrap.Modal(deleteModalEl); if (deleteModalEl) deleteUserModal = new bootstrap.Modal(deleteModalEl);
if (rotateModalEl) rotateSecretModal = new bootstrap.Modal(rotateModalEl); if (rotateModalEl) rotateSecretModal = new bootstrap.Modal(rotateModalEl);
if (expiryModalEl) expiryModal = new bootstrap.Modal(expiryModalEl);
} }
function setupJsonAutoIndent() { function setupJsonAutoIndent() {
@@ -97,6 +102,15 @@ window.IAMManagement = (function() {
}); });
}); });
var accessKeyCopyButton = document.querySelector('[data-access-key-copy]');
if (accessKeyCopyButton) {
accessKeyCopyButton.addEventListener('click', async function() {
var accessKeyInput = document.getElementById('disclosedAccessKeyValue');
if (!accessKeyInput) return;
await window.UICore.copyToClipboard(accessKeyInput.value, accessKeyCopyButton, 'Copy');
});
}
var secretCopyButton = document.querySelector('[data-secret-copy]'); var secretCopyButton = document.querySelector('[data-secret-copy]');
if (secretCopyButton) { if (secretCopyButton) {
secretCopyButton.addEventListener('click', async function() { secretCopyButton.addEventListener('click', async function() {
@@ -143,6 +157,22 @@ window.IAMManagement = (function() {
}); });
} }
function generateSecureHex(byteCount) {
var arr = new Uint8Array(byteCount);
crypto.getRandomValues(arr);
return Array.from(arr).map(function(b) { return b.toString(16).padStart(2, '0'); }).join('');
}
function generateSecureBase64(byteCount) {
var arr = new Uint8Array(byteCount);
crypto.getRandomValues(arr);
var binary = '';
for (var i = 0; i < arr.length; i++) {
binary += String.fromCharCode(arr[i]);
}
return btoa(binary).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
}
function setupCreateUserModal() { function setupCreateUserModal() {
var createUserPoliciesEl = document.getElementById('createUserPolicies'); var createUserPoliciesEl = document.getElementById('createUserPolicies');
@@ -151,6 +181,22 @@ window.IAMManagement = (function() {
applyPolicyTemplate(button.dataset.createPolicyTemplate, createUserPoliciesEl); applyPolicyTemplate(button.dataset.createPolicyTemplate, createUserPoliciesEl);
}); });
}); });
var genAccessKeyBtn = document.getElementById('generateAccessKeyBtn');
if (genAccessKeyBtn) {
genAccessKeyBtn.addEventListener('click', function() {
var input = document.getElementById('createUserAccessKey');
if (input) input.value = generateSecureHex(8);
});
}
var genSecretKeyBtn = document.getElementById('generateSecretKeyBtn');
if (genSecretKeyBtn) {
genSecretKeyBtn.addEventListener('click', function() {
var input = document.getElementById('createUserSecretKey');
if (input) input.value = generateSecureBase64(24);
});
}
} }
function setupEditUserModal() { function setupEditUserModal() {
@@ -271,6 +317,77 @@ window.IAMManagement = (function() {
} }
} }
function openExpiryModal(key, expiresAt) {
currentExpiryKey = key;
var label = document.getElementById('expiryUserLabel');
var input = document.getElementById('expiryDateInput');
var form = document.getElementById('expiryForm');
if (label) label.textContent = key;
if (expiresAt) {
try {
var dt = new Date(expiresAt);
var local = new Date(dt.getTime() - dt.getTimezoneOffset() * 60000);
if (input) input.value = local.toISOString().slice(0, 16);
} catch(e) {
if (input) input.value = '';
}
} else {
if (input) input.value = '';
}
if (form) form.action = endpoints.updateExpiry.replace('ACCESS_KEY', key);
var modalEl = document.getElementById('expiryModal');
if (modalEl) {
var modal = bootstrap.Modal.getOrCreateInstance(modalEl);
modal.show();
}
}
function setupExpiryModal() {
document.querySelectorAll('[data-expiry-user]').forEach(function(btn) {
btn.addEventListener('click', function(e) {
e.preventDefault();
openExpiryModal(btn.dataset.expiryUser, btn.dataset.expiresAt || '');
});
});
document.querySelectorAll('[data-expiry-preset]').forEach(function(btn) {
btn.addEventListener('click', function() {
var preset = btn.dataset.expiryPreset;
var input = document.getElementById('expiryDateInput');
if (!input) return;
if (preset === 'clear') {
input.value = '';
return;
}
var now = new Date();
var ms = 0;
if (preset === '1h') ms = 3600000;
else if (preset === '24h') ms = 86400000;
else if (preset === '7d') ms = 7 * 86400000;
else if (preset === '30d') ms = 30 * 86400000;
else if (preset === '90d') ms = 90 * 86400000;
var future = new Date(now.getTime() + ms);
var local = new Date(future.getTime() - future.getTimezoneOffset() * 60000);
input.value = local.toISOString().slice(0, 16);
});
});
var expiryForm = document.getElementById('expiryForm');
if (expiryForm) {
expiryForm.addEventListener('submit', function(e) {
e.preventDefault();
window.UICore.submitFormAjax(expiryForm, {
successMessage: 'Expiry updated',
onSuccess: function() {
var modalEl = document.getElementById('expiryModal');
if (modalEl) bootstrap.Modal.getOrCreateInstance(modalEl).hide();
window.location.reload();
}
});
});
}
}
function createUserCardHtml(accessKey, displayName, policies) { function createUserCardHtml(accessKey, displayName, policies) {
var admin = isAdminUser(policies); var admin = isAdminUser(policies);
var cardClass = 'card h-100 iam-user-card' + (admin ? ' iam-admin-card' : ''); var cardClass = 'card h-100 iam-user-card' + (admin ? ' iam-admin-card' : '');
@@ -324,6 +441,8 @@ window.IAMManagement = (function() {
'<ul class="dropdown-menu dropdown-menu-end">' + '<ul class="dropdown-menu dropdown-menu-end">' +
'<li><button class="dropdown-item" type="button" data-edit-user="' + esc(accessKey) + '" data-display-name="' + esc(displayName) + '">' + '<li><button class="dropdown-item" type="button" data-edit-user="' + esc(accessKey) + '" data-display-name="' + esc(displayName) + '">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/></svg>Edit Name</button></li>' + '<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/></svg>Edit Name</button></li>' +
'<li><button class="dropdown-item" type="button" data-expiry-user="' + esc(accessKey) + '" data-expires-at="">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/><path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/></svg>Set Expiry</button></li>' +
'<li><button class="dropdown-item" type="button" data-rotate-user="' + esc(accessKey) + '">' + '<li><button class="dropdown-item" type="button" data-rotate-user="' + esc(accessKey) + '">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/><path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/></svg>Rotate Secret</button></li>' + '<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/><path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/></svg>Rotate Secret</button></li>' +
'<li><hr class="dropdown-divider"></li>' + '<li><hr class="dropdown-divider"></li>' +
@@ -379,6 +498,14 @@ window.IAMManagement = (function() {
}); });
} }
var expiryBtn = cardElement.querySelector('[data-expiry-user]');
if (expiryBtn) {
expiryBtn.addEventListener('click', function(e) {
e.preventDefault();
openExpiryModal(accessKey, '');
});
}
var policyBtn = cardElement.querySelector('[data-policy-editor]'); var policyBtn = cardElement.querySelector('[data-policy-editor]');
if (policyBtn) { if (policyBtn) {
policyBtn.addEventListener('click', function() { policyBtn.addEventListener('click', function() {
@@ -428,10 +555,15 @@ window.IAMManagement = (function() {
'</svg>' + '</svg>' +
'<div class="flex-grow-1">' + '<div class="flex-grow-1">' +
'<div class="fw-semibold">New user created: <code>' + window.UICore.escapeHtml(data.access_key) + '</code></div>' + '<div class="fw-semibold">New user created: <code>' + window.UICore.escapeHtml(data.access_key) + '</code></div>' +
'<p class="mb-2 small">This secret is only shown once. Copy it now and store it securely.</p>' + '<p class="mb-2 small">These credentials are only shown once. Copy them now and store them securely.</p>' +
'</div>' + '</div>' +
'<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>' + '<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>' +
'</div>' + '</div>' +
'<div class="input-group mb-2">' +
'<span class="input-group-text"><strong>Access key</strong></span>' +
'<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.access_key) + '" readonly />' +
'<button class="btn btn-outline-primary" type="button" id="copyNewUserAccessKey">Copy</button>' +
'</div>' +
'<div class="input-group">' + '<div class="input-group">' +
'<span class="input-group-text"><strong>Secret key</strong></span>' + '<span class="input-group-text"><strong>Secret key</strong></span>' +
'<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.secret_key) + '" readonly id="newUserSecret" />' + '<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.secret_key) + '" readonly id="newUserSecret" />' +
@@ -440,6 +572,9 @@ window.IAMManagement = (function() {
var container = document.querySelector('.page-header'); var container = document.querySelector('.page-header');
if (container) { if (container) {
container.insertAdjacentHTML('afterend', alertHtml); container.insertAdjacentHTML('afterend', alertHtml);
document.getElementById('copyNewUserAccessKey').addEventListener('click', async function() {
await window.UICore.copyToClipboard(data.access_key, this, 'Copy');
});
document.getElementById('copyNewUserSecret').addEventListener('click', async function() { document.getElementById('copyNewUserSecret').addEventListener('click', async function() {
await window.UICore.copyToClipboard(data.secret_key, this, 'Copy'); await window.UICore.copyToClipboard(data.secret_key, this, 'Copy');
}); });

View File

@@ -292,19 +292,28 @@
Edit Edit
</button> </button>
</div> </div>
<div id="preview-tags-list" class="d-flex flex-wrap gap-1"></div> <div id="preview-tags-list" class="d-flex flex-wrap gap-2"></div>
<div id="preview-tags-empty" class="text-muted small p-2 bg-body-tertiary rounded">No tags</div> <div id="preview-tags-empty" class="text-muted small p-2 bg-body-tertiary rounded">No tags</div>
<div id="preview-tags-editor" class="d-none mt-2"> <div id="preview-tags-editor" class="d-none mt-2">
<div id="preview-tags-inputs" class="mb-2"></div> <div class="tag-editor-card">
<div class="d-flex gap-2"> <div class="tag-editor-header">
<button class="btn btn-sm btn-outline-secondary flex-grow-1" type="button" id="addTagRow"> <span>Key</span>
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="me-1" viewBox="0 0 16 16"> <span>Value</span>
<path d="M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"/> <span></span>
</svg> </div>
Add Tag <div id="preview-tags-inputs"></div>
</button> <div class="tag-editor-actions">
<button class="btn btn-sm btn-primary" type="button" id="saveTagsButton">Save</button> <button class="btn btn-sm btn-outline-secondary" type="button" id="addTagRow">
<button class="btn btn-sm btn-outline-secondary" type="button" id="cancelTagsButton">Cancel</button> <svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M8 4a.5.5 0 0 1 .5.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3A.5.5 0 0 1 8 4z"/>
</svg>
Add Tag
</button>
<div class="ms-auto d-flex gap-2">
<button class="btn btn-sm btn-outline-secondary" type="button" id="cancelTagsButton">Cancel</button>
<button class="btn btn-sm btn-primary" type="button" id="saveTagsButton">Save</button>
</div>
</div>
</div> </div>
<div class="form-text mt-1">Maximum 10 tags. Keys and values up to 256 characters.</div> <div class="form-text mt-1">Maximum 10 tags. Keys and values up to 256 characters.</div>
</div> </div>

View File

@@ -51,7 +51,7 @@
</div> </div>
<div> <div>
<h5 class="bucket-name text-break">{{ bucket.meta.name }}</h5> <h5 class="bucket-name text-break">{{ bucket.meta.name }}</h5>
<small class="text-muted">Created {{ bucket.meta.created_at | format_datetime }}</small> <small class="text-muted">Created {{ bucket.meta.creation_date | format_datetime }}</small>
</div> </div>
</div> </div>
<span class="badge {{ bucket.access_badge }} bucket-access-badge">{{ bucket.access_label }}</span> <span class="badge {{ bucket.access_badge }} bucket-access-badge">{{ bucket.access_label }}</span>

View File

@@ -40,6 +40,8 @@
<li><a href="#quotas">Bucket Quotas</a></li> <li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li> <li><a href="#encryption">Encryption</a></li>
<li><a href="#lifecycle">Lifecycle Rules</a></li> <li><a href="#lifecycle">Lifecycle Rules</a></li>
<li><a href="#garbage-collection">Garbage Collection</a></li>
<li><a href="#integrity">Integrity Scanner</a></li>
<li><a href="#metrics">Metrics History</a></li> <li><a href="#metrics">Metrics History</a></li>
<li><a href="#operation-metrics">Operation Metrics</a></li> <li><a href="#operation-metrics">Operation Metrics</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li> <li><a href="#troubleshooting">Troubleshooting</a></li>
@@ -202,6 +204,16 @@ python run.py --mode ui
<td><code>60 per minute</code></td> <td><code>60 per minute</code></td>
<td>Rate limit for admin API endpoints (<code>/admin/*</code>).</td> <td>Rate limit for admin API endpoints (<code>/admin/*</code>).</td>
</tr> </tr>
<tr>
<td><code>ADMIN_ACCESS_KEY</code></td>
<td>(none)</td>
<td>Custom access key for the admin user on first run or credential reset. Random if unset.</td>
</tr>
<tr>
<td><code>ADMIN_SECRET_KEY</code></td>
<td>(none)</td>
<td>Custom secret key for the admin user on first run or credential reset. Random if unset.</td>
</tr>
<tr class="table-secondary"> <tr class="table-secondary">
<td colspan="3" class="fw-semibold">Server Settings</td> <td colspan="3" class="fw-semibold">Server Settings</td>
</tr> </tr>
@@ -428,7 +440,7 @@ python run.py --mode ui
</table> </table>
</div> </div>
<div class="alert alert-warning mt-3 mb-0 small"> <div class="alert alert-warning mt-3 mb-0 small">
<strong>Production Checklist:</strong> Set <code>SECRET_KEY</code>, restrict <code>CORS_ORIGINS</code>, configure <code>API_BASE_URL</code>, enable HTTPS via reverse proxy, and use <code>--prod</code> flag. <strong>Production Checklist:</strong> Set <code>SECRET_KEY</code> (also enables IAM config encryption at rest), restrict <code>CORS_ORIGINS</code>, configure <code>API_BASE_URL</code>, enable HTTPS via reverse proxy, use <code>--prod</code> flag, and set credential expiry on non-admin users.
</div> </div>
</div> </div>
</article> </article>
@@ -495,11 +507,12 @@ sudo journalctl -u myfsio -f # View logs</code></pre>
<span class="docs-section-kicker">03</span> <span class="docs-section-kicker">03</span>
<h2 class="h4 mb-0">Authenticate &amp; manage IAM</h2> <h2 class="h4 mb-0">Authenticate &amp; manage IAM</h2>
</div> </div>
<p class="text-muted">On first startup, MyFSIO generates random admin credentials and prints them to the console. Missed it? Check <code>data/.myfsio.sys/config/iam.json</code> directly—credentials are stored in plaintext.</p> <p class="text-muted">On first startup, MyFSIO generates random admin credentials and prints them to the console. Set <code>ADMIN_ACCESS_KEY</code> and <code>ADMIN_SECRET_KEY</code> env vars for custom credentials. When <code>SECRET_KEY</code> is configured, the IAM config is encrypted at rest. To reset credentials, run <code>python run.py --reset-cred</code>.</p>
<div class="docs-highlight mb-3"> <div class="docs-highlight mb-3">
<ol class="mb-0"> <ol class="mb-0">
<li>Check the console output (or <code>iam.json</code>) for the generated <code>Access Key</code> and <code>Secret Key</code>, then visit <code>/ui/login</code>.</li> <li>Check the console output for the generated <code>Access Key</code> and <code>Secret Key</code>, then visit <code>/ui/login</code>.</li>
<li>Create additional users with descriptive display names and AWS-style inline policies (for example <code>{"bucket": "*", "actions": ["list", "read"]}</code>).</li> <li>Create additional users with descriptive display names, AWS-style inline policies (for example <code>{"bucket": "*", "actions": ["list", "read"]}</code>), and optional credential expiry dates.</li>
<li>Set credential expiry on users to grant time-limited access. The UI shows expiry badges and provides preset durations (1h, 24h, 7d, 30d, 90d). Expired credentials are rejected at authentication.</li>
<li>Rotate secrets when sharing with CI jobs—new secrets display once and persist to <code>data/.myfsio.sys/config/iam.json</code>.</li> <li>Rotate secrets when sharing with CI jobs—new secrets display once and persist to <code>data/.myfsio.sys/config/iam.json</code>.</li>
<li>Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.</li> <li>Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.</li>
</ol> </ol>
@@ -1616,10 +1629,217 @@ curl "{{ api_base }}/&lt;bucket&gt;?lifecycle" \
</div> </div>
</div> </div>
</article> </article>
<article id="metrics" class="card shadow-sm docs-section"> <article id="garbage-collection" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">14</span> <span class="docs-section-kicker">14</span>
<h2 class="h4 mb-0">Garbage Collection</h2>
</div>
<p class="text-muted">Automatically clean up orphaned data that accumulates over time: stale temp files, abandoned multipart uploads, stale lock files, orphaned metadata, orphaned versions, and empty directories.</p>
<h3 class="h6 text-uppercase text-muted mt-4">Enabling GC</h3>
<p class="small text-muted">Disabled by default. Enable via environment variable:</p>
<pre class="mb-3"><code class="language-bash">GC_ENABLED=true python run.py</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">Configuration</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Variable</th>
<th>Default</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>GC_ENABLED</code></td><td><code>false</code></td><td>Enable garbage collection</td></tr>
<tr><td><code>GC_INTERVAL_HOURS</code></td><td><code>6</code></td><td>Hours between GC cycles</td></tr>
<tr><td><code>GC_TEMP_FILE_MAX_AGE_HOURS</code></td><td><code>24</code></td><td>Delete temp files older than this</td></tr>
<tr><td><code>GC_MULTIPART_MAX_AGE_DAYS</code></td><td><code>7</code></td><td>Delete orphaned multipart uploads older than this</td></tr>
<tr><td><code>GC_LOCK_FILE_MAX_AGE_HOURS</code></td><td><code>1</code></td><td>Delete stale lock files older than this</td></tr>
<tr><td><code>GC_DRY_RUN</code></td><td><code>false</code></td><td>Log what would be deleted without removing</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">What Gets Cleaned</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Type</th>
<th>Location</th>
<th>Condition</th>
</tr>
</thead>
<tbody>
<tr><td><strong>Temp files</strong></td><td><code>.myfsio.sys/tmp/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Orphaned multipart</strong></td><td><code>.myfsio.sys/multipart/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Stale lock files</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/locks/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Orphaned metadata</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/meta/</code></td><td>Object file no longer exists</td></tr>
<tr><td><strong>Orphaned versions</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/versions/</code></td><td>Main object no longer exists</td></tr>
<tr><td><strong>Empty directories</strong></td><td>Various internal dirs</td><td>Directory is empty after cleanup</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">Admin API</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Method</th>
<th>Route</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>GET</code></td><td><code>/admin/gc/status</code></td><td>Get GC status and configuration</td></tr>
<tr><td><code>POST</code></td><td><code>/admin/gc/run</code></td><td>Trigger manual GC run</td></tr>
<tr><td><code>GET</code></td><td><code>/admin/gc/history</code></td><td>Get execution history</td></tr>
</tbody>
</table>
</div>
<pre class="mb-3"><code class="language-bash"># Trigger a dry run (preview what would be cleaned)
curl -X POST "{{ api_base }}/admin/gc/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-H "Content-Type: application/json" \
-d '{"dry_run": true}'
# Trigger actual GC
curl -X POST "{{ api_base }}/admin/gc/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# Check status
curl "{{ api_base }}/admin/gc/status" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# View history
curl "{{ api_base }}/admin/gc/history?limit=10" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"</code></pre>
<div class="alert alert-light border mb-0">
<div class="d-flex gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-info-circle text-muted mt-1 flex-shrink-0" viewBox="0 0 16 16">
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
<path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/>
</svg>
<div>
<strong>Dry Run:</strong> Use <code>GC_DRY_RUN=true</code> or pass <code>{"dry_run": true}</code> to the API to preview what would be deleted without actually removing anything. Check the logs or API response for details.
</div>
</div>
</div>
</div>
</article>
<article id="integrity" class="card shadow-sm docs-section">
<div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">15</span>
<h2 class="h4 mb-0">Integrity Scanner</h2>
</div>
<p class="text-muted">Detect and optionally auto-repair data inconsistencies: corrupted objects, orphaned files, phantom metadata, stale versions, ETag cache drift, and unmigrated legacy metadata.</p>
<h3 class="h6 text-uppercase text-muted mt-4">Enabling Integrity Scanner</h3>
<p class="small text-muted">Disabled by default. Enable via environment variable:</p>
<pre class="mb-3"><code class="language-bash">INTEGRITY_ENABLED=true python run.py</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">Configuration</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Variable</th>
<th>Default</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>INTEGRITY_ENABLED</code></td><td><code>false</code></td><td>Enable background integrity scanning</td></tr>
<tr><td><code>INTEGRITY_INTERVAL_HOURS</code></td><td><code>24</code></td><td>Hours between scan cycles</td></tr>
<tr><td><code>INTEGRITY_BATCH_SIZE</code></td><td><code>1000</code></td><td>Max objects to scan per cycle</td></tr>
<tr><td><code>INTEGRITY_AUTO_HEAL</code></td><td><code>false</code></td><td>Automatically repair detected issues</td></tr>
<tr><td><code>INTEGRITY_DRY_RUN</code></td><td><code>false</code></td><td>Log issues without healing</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">What Gets Checked</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Check</th>
<th>Detection</th>
<th>Heal Action</th>
</tr>
</thead>
<tbody>
<tr><td><strong>Corrupted objects</strong></td><td>File MD5 does not match stored ETag</td><td>Update ETag in index (disk is authoritative)</td></tr>
<tr><td><strong>Orphaned objects</strong></td><td>File exists without metadata entry</td><td>Create index entry with computed MD5/size/mtime</td></tr>
<tr><td><strong>Phantom metadata</strong></td><td>Index entry exists but file is missing</td><td>Remove stale entry from index</td></tr>
<tr><td><strong>Stale versions</strong></td><td>Manifest without data or vice versa</td><td>Remove orphaned version file</td></tr>
<tr><td><strong>ETag cache</strong></td><td><code>etag_index.json</code> differs from metadata</td><td>Delete cache file (auto-rebuilt)</td></tr>
<tr><td><strong>Legacy metadata</strong></td><td>Legacy <code>.meta.json</code> differs or unmigrated</td><td>Migrate to index, delete legacy file</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">Admin API</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Method</th>
<th>Route</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>GET</code></td><td><code>/admin/integrity/status</code></td><td>Get scanner status and configuration</td></tr>
<tr><td><code>POST</code></td><td><code>/admin/integrity/run</code></td><td>Trigger manual scan</td></tr>
<tr><td><code>GET</code></td><td><code>/admin/integrity/history</code></td><td>Get scan history</td></tr>
</tbody>
</table>
</div>
<pre class="mb-3"><code class="language-bash"># Trigger a dry run with auto-heal preview
curl -X POST "{{ api_base }}/admin/integrity/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-H "Content-Type: application/json" \
-d '{"dry_run": true, "auto_heal": true}'
# Trigger actual scan with healing
curl -X POST "{{ api_base }}/admin/integrity/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-H "Content-Type: application/json" \
-d '{"auto_heal": true}'
# Check status
curl "{{ api_base }}/admin/integrity/status" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# View history
curl "{{ api_base }}/admin/integrity/history?limit=10" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"</code></pre>
<div class="alert alert-light border mb-0">
<div class="d-flex gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-info-circle text-muted mt-1 flex-shrink-0" viewBox="0 0 16 16">
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
<path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/>
</svg>
<div>
<strong>Dry Run:</strong> Use <code>INTEGRITY_DRY_RUN=true</code> or pass <code>{"dry_run": true}</code> to the API to preview detected issues without making any changes. Combine with <code>{"auto_heal": true}</code> to see what would be repaired.
</div>
</div>
</div>
</div>
</article>
<article id="metrics" class="card shadow-sm docs-section">
<div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">16</span>
<h2 class="h4 mb-0">Metrics History</h2> <h2 class="h4 mb-0">Metrics History</h2>
</div> </div>
<p class="text-muted">Track CPU, memory, and disk usage over time with optional metrics history. Disabled by default to minimize overhead.</p> <p class="text-muted">Track CPU, memory, and disk usage over time with optional metrics history. Disabled by default to minimize overhead.</p>
@@ -1703,7 +1923,7 @@ curl -X PUT "{{ api_base | replace('/api', '/ui') }}/metrics/settings" \
<article id="operation-metrics" class="card shadow-sm docs-section"> <article id="operation-metrics" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">15</span> <span class="docs-section-kicker">17</span>
<h2 class="h4 mb-0">Operation Metrics</h2> <h2 class="h4 mb-0">Operation Metrics</h2>
</div> </div>
<p class="text-muted">Track API request statistics including request counts, latency, error rates, and bandwidth usage. Provides real-time visibility into API operations.</p> <p class="text-muted">Track API request statistics including request counts, latency, error rates, and bandwidth usage. Provides real-time visibility into API operations.</p>
@@ -1810,7 +2030,7 @@ curl "{{ api_base | replace('/api', '/ui') }}/metrics/operations/history?hours=6
<article id="troubleshooting" class="card shadow-sm docs-section"> <article id="troubleshooting" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">16</span> <span class="docs-section-kicker">18</span>
<h2 class="h4 mb-0">Troubleshooting &amp; tips</h2> <h2 class="h4 mb-0">Troubleshooting &amp; tips</h2>
</div> </div>
<div class="table-responsive"> <div class="table-responsive">
@@ -1861,7 +2081,7 @@ curl "{{ api_base | replace('/api', '/ui') }}/metrics/operations/history?hours=6
<article id="health-check" class="card shadow-sm docs-section"> <article id="health-check" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">17</span> <span class="docs-section-kicker">19</span>
<h2 class="h4 mb-0">Health Check Endpoint</h2> <h2 class="h4 mb-0">Health Check Endpoint</h2>
</div> </div>
<p class="text-muted">The API exposes a health check endpoint for monitoring and load balancer integration.</p> <p class="text-muted">The API exposes a health check endpoint for monitoring and load balancer integration.</p>
@@ -1883,7 +2103,7 @@ curl {{ api_base }}/myfsio/health
<article id="object-lock" class="card shadow-sm docs-section"> <article id="object-lock" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">18</span> <span class="docs-section-kicker">20</span>
<h2 class="h4 mb-0">Object Lock &amp; Retention</h2> <h2 class="h4 mb-0">Object Lock &amp; Retention</h2>
</div> </div>
<p class="text-muted">Object Lock prevents objects from being deleted or overwritten for a specified retention period.</p> <p class="text-muted">Object Lock prevents objects from being deleted or overwritten for a specified retention period.</p>
@@ -1943,7 +2163,7 @@ curl "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;?legal-hold" \
<article id="access-logging" class="card shadow-sm docs-section"> <article id="access-logging" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">19</span> <span class="docs-section-kicker">21</span>
<h2 class="h4 mb-0">Access Logging</h2> <h2 class="h4 mb-0">Access Logging</h2>
</div> </div>
<p class="text-muted">Enable S3-style access logging to track all requests to your buckets for audit and analysis.</p> <p class="text-muted">Enable S3-style access logging to track all requests to your buckets for audit and analysis.</p>
@@ -1970,7 +2190,7 @@ curl "{{ api_base }}/&lt;bucket&gt;?logging" \
<article id="notifications" class="card shadow-sm docs-section"> <article id="notifications" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">20</span> <span class="docs-section-kicker">22</span>
<h2 class="h4 mb-0">Notifications &amp; Webhooks</h2> <h2 class="h4 mb-0">Notifications &amp; Webhooks</h2>
</div> </div>
<p class="text-muted">Configure event notifications to trigger webhooks when objects are created or deleted.</p> <p class="text-muted">Configure event notifications to trigger webhooks when objects are created or deleted.</p>
@@ -2033,7 +2253,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;?notification" \
<article id="select-content" class="card shadow-sm docs-section"> <article id="select-content" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">21</span> <span class="docs-section-kicker">23</span>
<h2 class="h4 mb-0">SelectObjectContent (SQL)</h2> <h2 class="h4 mb-0">SelectObjectContent (SQL)</h2>
</div> </div>
<p class="text-muted">Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object.</p> <p class="text-muted">Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object.</p>
@@ -2078,7 +2298,7 @@ curl -X POST "{{ api_base }}/&lt;bucket&gt;/data.csv?select" \
<article id="advanced-ops" class="card shadow-sm docs-section"> <article id="advanced-ops" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">22</span> <span class="docs-section-kicker">24</span>
<h2 class="h4 mb-0">Advanced S3 Operations</h2> <h2 class="h4 mb-0">Advanced S3 Operations</h2>
</div> </div>
<p class="text-muted">Copy, move, and partially download objects using advanced S3 operations.</p> <p class="text-muted">Copy, move, and partially download objects using advanced S3 operations.</p>
@@ -2152,7 +2372,7 @@ curl "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;" \
<article id="acls" class="card shadow-sm docs-section"> <article id="acls" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">23</span> <span class="docs-section-kicker">25</span>
<h2 class="h4 mb-0">Access Control Lists (ACLs)</h2> <h2 class="h4 mb-0">Access Control Lists (ACLs)</h2>
</div> </div>
<p class="text-muted">ACLs provide legacy-style permission management for buckets and objects.</p> <p class="text-muted">ACLs provide legacy-style permission management for buckets and objects.</p>
@@ -2206,7 +2426,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;" \
<article id="tagging" class="card shadow-sm docs-section"> <article id="tagging" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">24</span> <span class="docs-section-kicker">26</span>
<h2 class="h4 mb-0">Object &amp; Bucket Tagging</h2> <h2 class="h4 mb-0">Object &amp; Bucket Tagging</h2>
</div> </div>
<p class="text-muted">Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.</p> <p class="text-muted">Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.</p>
@@ -2265,7 +2485,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;?tagging" \
<article id="website-hosting" class="card shadow-sm docs-section"> <article id="website-hosting" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">25</span> <span class="docs-section-kicker">27</span>
<h2 class="h4 mb-0">Static Website Hosting</h2> <h2 class="h4 mb-0">Static Website Hosting</h2>
</div> </div>
<p class="text-muted">Host static websites directly from S3 buckets with custom index and error pages, served via custom domain mapping.</p> <p class="text-muted">Host static websites directly from S3 buckets with custom index and error pages, served via custom domain mapping.</p>
@@ -2358,7 +2578,7 @@ server {
<article id="cors-config" class="card shadow-sm docs-section"> <article id="cors-config" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">26</span> <span class="docs-section-kicker">28</span>
<h2 class="h4 mb-0">CORS Configuration</h2> <h2 class="h4 mb-0">CORS Configuration</h2>
</div> </div>
<p class="text-muted">Configure per-bucket Cross-Origin Resource Sharing rules to control which origins can access your bucket from a browser.</p> <p class="text-muted">Configure per-bucket Cross-Origin Resource Sharing rules to control which origins can access your bucket from a browser.</p>
@@ -2425,7 +2645,7 @@ curl -X DELETE "{{ api_base }}/&lt;bucket&gt;?cors" \
<article id="post-object" class="card shadow-sm docs-section"> <article id="post-object" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">27</span> <span class="docs-section-kicker">29</span>
<h2 class="h4 mb-0">PostObject (HTML Form Upload)</h2> <h2 class="h4 mb-0">PostObject (HTML Form Upload)</h2>
</div> </div>
<p class="text-muted">Upload objects directly from an HTML form using browser-based POST uploads with policy-based authorization.</p> <p class="text-muted">Upload objects directly from an HTML form using browser-based POST uploads with policy-based authorization.</p>
@@ -2467,7 +2687,7 @@ curl -X DELETE "{{ api_base }}/&lt;bucket&gt;?cors" \
<article id="list-objects-v2" class="card shadow-sm docs-section"> <article id="list-objects-v2" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">28</span> <span class="docs-section-kicker">30</span>
<h2 class="h4 mb-0">List Objects API v2</h2> <h2 class="h4 mb-0">List Objects API v2</h2>
</div> </div>
<p class="text-muted">Use the v2 list API for improved pagination with continuation tokens instead of markers.</p> <p class="text-muted">Use the v2 list API for improved pagination with continuation tokens instead of markers.</p>
@@ -2511,7 +2731,7 @@ curl "{{ api_base }}/&lt;bucket&gt;?list-type=2&amp;start-after=photos/2025/" \
<article id="upgrading" class="card shadow-sm docs-section"> <article id="upgrading" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">29</span> <span class="docs-section-kicker">31</span>
<h2 class="h4 mb-0">Upgrading &amp; Updates</h2> <h2 class="h4 mb-0">Upgrading &amp; Updates</h2>
</div> </div>
<p class="text-muted">How to safely update MyFSIO to a new version.</p> <p class="text-muted">How to safely update MyFSIO to a new version.</p>
@@ -2544,7 +2764,7 @@ cp -r logs/ logs-backup/</code></pre>
<article id="api-matrix" class="card shadow-sm docs-section"> <article id="api-matrix" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">30</span> <span class="docs-section-kicker">32</span>
<h2 class="h4 mb-0">Full API Reference</h2> <h2 class="h4 mb-0">Full API Reference</h2>
</div> </div>
<p class="text-muted">Complete list of all S3-compatible, admin, and KMS endpoints.</p> <p class="text-muted">Complete list of all S3-compatible, admin, and KMS endpoints.</p>
@@ -2642,6 +2862,7 @@ POST /kms/generate-random # Generate random bytes</code></pre>
<li><a href="#quotas">Bucket Quotas</a></li> <li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li> <li><a href="#encryption">Encryption</a></li>
<li><a href="#lifecycle">Lifecycle Rules</a></li> <li><a href="#lifecycle">Lifecycle Rules</a></li>
<li><a href="#garbage-collection">Garbage Collection</a></li>
<li><a href="#metrics">Metrics History</a></li> <li><a href="#metrics">Metrics History</a></li>
<li><a href="#operation-metrics">Operation Metrics</a></li> <li><a href="#operation-metrics">Operation Metrics</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li> <li><a href="#troubleshooting">Troubleshooting</a></li>

View File

@@ -50,9 +50,20 @@
New user created: <code>{{ disclosed_secret.access_key }}</code> New user created: <code>{{ disclosed_secret.access_key }}</code>
{% endif %} {% endif %}
</div> </div>
<p class="mb-2 small">⚠️ This secret is only shown once. Copy it now and store it securely.</p> <p class="mb-2 small">These credentials are only shown once. Copy them now and store them securely.</p>
</div> </div>
</div> </div>
<div class="input-group mb-2">
<span class="input-group-text"><strong>Access key</strong></span>
<input class="form-control font-monospace" type="text" value="{{ disclosed_secret.access_key }}" readonly id="disclosedAccessKeyValue" />
<button class="btn btn-outline-primary" type="button" data-access-key-copy>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-clipboard" viewBox="0 0 16 16">
<path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/>
<path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/>
</svg>
Copy
</button>
</div>
<div class="input-group"> <div class="input-group">
<span class="input-group-text"><strong>Secret key</strong></span> <span class="input-group-text"><strong>Secret key</strong></span>
<input class="form-control font-monospace" type="text" value="{{ disclosed_secret.secret_key }}" readonly id="disclosedSecretValue" /> <input class="form-control font-monospace" type="text" value="{{ disclosed_secret.secret_key }}" readonly id="disclosedSecretValue" />
@@ -79,7 +90,7 @@
<pre class="policy-preview mb-0" id="iamConfigPreview">{{ config_document }}</pre> <pre class="policy-preview mb-0" id="iamConfigPreview">{{ config_document }}</pre>
<button class="btn btn-outline-light btn-sm config-copy" type="button" data-copy-target="iamConfigPreview">Copy JSON</button> <button class="btn btn-outline-light btn-sm config-copy" type="button" data-copy-target="iamConfigPreview">Copy JSON</button>
</div> </div>
<p class="text-muted small mt-2 mb-0">Secrets are masked above. Access <code>{{ config_summary.path }}</code> directly to view full credentials.</p> <p class="text-muted small mt-2 mb-0">Secrets are masked above. IAM config is encrypted at rest.</p>
</div> </div>
</div> </div>
</div> </div>
@@ -122,12 +133,20 @@
{% endif %} {% endif %}
<div class="row g-3"> <div class="row g-3">
{% for user in users %} {% for user in users %}
{% set ns = namespace(is_admin=false) %} {% set ns = namespace(is_admin=false, is_expired=false, is_expiring_soon=false) %}
{% for policy in user.policies %} {% for policy in user.policies %}
{% if 'iam:*' in policy.actions or '*' in policy.actions %} {% if 'iam:*' in policy.actions or '*' in policy.actions %}
{% set ns.is_admin = true %} {% set ns.is_admin = true %}
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if user.expires_at %}
{% set exp_str = user.expires_at %}
{% if exp_str <= now_iso %}
{% set ns.is_expired = true %}
{% elif exp_str <= soon_iso %}
{% set ns.is_expiring_soon = true %}
{% endif %}
{% endif %}
<div class="col-md-6 col-xl-4 iam-user-item" data-display-name="{{ user.display_name|lower }}" data-access-key-filter="{{ user.access_key|lower }}"> <div class="col-md-6 col-xl-4 iam-user-item" data-display-name="{{ user.display_name|lower }}" data-access-key-filter="{{ user.access_key|lower }}">
<div class="card h-100 iam-user-card{{ ' iam-admin-card' if ns.is_admin else '' }}"> <div class="card h-100 iam-user-card{{ ' iam-admin-card' if ns.is_admin else '' }}">
<div class="card-body"> <div class="card-body">
@@ -146,6 +165,11 @@
{% else %} {% else %}
<span class="iam-role-badge iam-role-user" data-role-badge>User</span> <span class="iam-role-badge iam-role-user" data-role-badge>User</span>
{% endif %} {% endif %}
{% if ns.is_expired %}
<span class="badge text-bg-danger" style="font-size: .65rem">Expired</span>
{% elif ns.is_expiring_soon %}
<span class="badge text-bg-warning" style="font-size: .65rem">Expiring soon</span>
{% endif %}
</div> </div>
<div class="d-flex align-items-center gap-1"> <div class="d-flex align-items-center gap-1">
<code class="small text-muted text-truncate" title="{{ user.access_key }}">{{ user.access_key }}</code> <code class="small text-muted text-truncate" title="{{ user.access_key }}">{{ user.access_key }}</code>
@@ -173,6 +197,15 @@
Edit Name Edit Name
</button> </button>
</li> </li>
<li>
<button class="dropdown-item" type="button" data-expiry-user="{{ user.access_key }}" data-expires-at="{{ user.expires_at or '' }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
</svg>
Set Expiry
</button>
</li>
<li> <li>
<button class="dropdown-item" type="button" data-rotate-user="{{ user.access_key }}"> <button class="dropdown-item" type="button" data-rotate-user="{{ user.access_key }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
@@ -283,6 +316,32 @@
<label class="form-label fw-medium">Display Name</label> <label class="form-label fw-medium">Display Name</label>
<input class="form-control" type="text" name="display_name" placeholder="Analytics Team" required autofocus /> <input class="form-control" type="text" name="display_name" placeholder="Analytics Team" required autofocus />
</div> </div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Access Key <span class="text-muted fw-normal small">optional</span>
</label>
<div class="input-group">
<input class="form-control font-monospace" type="text" name="access_key" id="createUserAccessKey" placeholder="Leave blank to auto-generate" />
<button class="btn btn-outline-secondary" type="button" id="generateAccessKeyBtn" title="Generate secure access key">Generate</button>
</div>
</div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Secret Key <span class="text-muted fw-normal small">optional</span>
</label>
<div class="input-group">
<input class="form-control font-monospace" type="text" name="secret_key" id="createUserSecretKey" placeholder="Leave blank to auto-generate" />
<button class="btn btn-outline-secondary" type="button" id="generateSecretKeyBtn" title="Generate secure secret key">Generate</button>
</div>
<div class="form-text">If you set a custom secret key, copy it now. It will be encrypted and cannot be recovered.</div>
</div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Expiry <span class="text-muted fw-normal small">optional</span>
</label>
<input class="form-control" type="datetime-local" name="expires_at" id="createUserExpiry" />
<div class="form-text">Leave blank for no expiration. Expired users cannot authenticate.</div>
</div>
<div class="mb-3"> <div class="mb-3">
<label class="form-label fw-medium">Initial Policies (JSON)</label> <label class="form-label fw-medium">Initial Policies (JSON)</label>
<textarea class="form-control font-monospace" name="policies" id="createUserPolicies" rows="6" spellcheck="false" placeholder='[ <textarea class="form-control font-monospace" name="policies" id="createUserPolicies" rows="6" spellcheck="false" placeholder='[
@@ -495,6 +554,52 @@
</div> </div>
</div> </div>
<div class="modal fade" id="expiryModal" tabindex="-1" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered">
<div class="modal-content">
<div class="modal-header border-0 pb-0">
<h1 class="modal-title fs-5 fw-semibold">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
</svg>
Set Expiry
</h1>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<form method="post" id="expiryForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<div class="modal-body">
<p class="text-muted small mb-3">Set expiration for <code id="expiryUserLabel"></code></p>
<div class="mb-3">
<label class="form-label fw-medium">Expires at</label>
<input class="form-control" type="datetime-local" name="expires_at" id="expiryDateInput" />
<div class="form-text">Leave blank to remove expiration (never expires).</div>
</div>
<div class="d-flex flex-wrap gap-2">
<span class="text-muted small me-2 align-self-center">Quick presets:</span>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="1h">1 hour</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="24h">24 hours</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="7d">7 days</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="30d">30 days</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="90d">90 days</button>
<button class="btn btn-outline-secondary btn-sm text-danger" type="button" data-expiry-preset="clear">Never</button>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" data-bs-dismiss="modal">Cancel</button>
<button class="btn btn-primary" type="submit">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
Save Expiry
</button>
</div>
</form>
</div>
</div>
</div>
<script id="iamUsersJson" type="application/json">{{ users | tojson }}</script> <script id="iamUsersJson" type="application/json">{{ users | tojson }}</script>
{% endblock %} {% endblock %}
@@ -512,7 +617,8 @@
updateUser: "{{ url_for('ui.update_iam_user', access_key='ACCESS_KEY') }}", updateUser: "{{ url_for('ui.update_iam_user', access_key='ACCESS_KEY') }}",
deleteUser: "{{ url_for('ui.delete_iam_user', access_key='ACCESS_KEY') }}", deleteUser: "{{ url_for('ui.delete_iam_user', access_key='ACCESS_KEY') }}",
updatePolicies: "{{ url_for('ui.update_iam_policies', access_key='ACCESS_KEY') }}", updatePolicies: "{{ url_for('ui.update_iam_policies', access_key='ACCESS_KEY') }}",
rotateSecret: "{{ url_for('ui.rotate_iam_secret', access_key='ACCESS_KEY') }}" rotateSecret: "{{ url_for('ui.rotate_iam_secret', access_key='ACCESS_KEY') }}",
updateExpiry: "{{ url_for('ui.update_iam_expiry', access_key='ACCESS_KEY') }}"
} }
}); });
</script> </script>

View File

@@ -73,9 +73,6 @@
</svg> </svg>
</button> </button>
</form> </form>
<div class="text-center mt-4">
<small class="text-muted">Need help? Check the <a href="{{ url_for('ui.docs_page') }}" class="text-decoration-none">documentation</a></small>
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -0,0 +1,156 @@
import hashlib
import time
import pytest
@pytest.fixture()
def bucket(client, signer):
headers = signer("PUT", "/cond-test")
client.put("/cond-test", headers=headers)
return "cond-test"
@pytest.fixture()
def uploaded(client, signer, bucket):
body = b"hello conditional"
etag = hashlib.md5(body).hexdigest()
headers = signer("PUT", f"/{bucket}/obj.txt", body=body)
resp = client.put(f"/{bucket}/obj.txt", headers=headers, data=body)
last_modified = resp.headers.get("Last-Modified")
return {"etag": etag, "last_modified": last_modified}
class TestIfMatch:
def test_get_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_get_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_wildcard_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_one_matches(self, client, signer, bucket, uploaded):
etag_list = f'"bad1", "{uploaded["etag"]}", "bad2"'
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": etag_list})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_none_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"bad1", "bad2"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
class TestIfNoneMatch:
def test_get_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert uploaded["etag"] in resp.headers.get("ETag", "")
def test_get_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_head_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_wildcard_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
class TestIfModifiedSince:
def test_not_modified_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert "ETag" in resp.headers
def test_modified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_not_modified(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_if_none_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-None-Match": '"wrongetag"',
"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestIfUnmodifiedSince:
def test_unmodified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_if_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestConditionalWithRange:
def test_if_match_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 206
def test_if_match_fails_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": '"wrongetag"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412

350
tests/test_gc.py Normal file
View File

@@ -0,0 +1,350 @@
import json
import os
import time
from pathlib import Path
import pytest
from app.gc import GarbageCollector, GCResult
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
sys_root = root / ".myfsio.sys"
sys_root.mkdir()
(sys_root / "config").mkdir(parents=True)
(sys_root / "tmp").mkdir()
(sys_root / "multipart").mkdir()
(sys_root / "buckets").mkdir()
return root
@pytest.fixture
def gc(storage_root):
return GarbageCollector(
storage_root=storage_root,
interval_hours=1.0,
temp_file_max_age_hours=1.0,
multipart_max_age_days=1,
lock_file_max_age_hours=0.5,
dry_run=False,
)
def _make_old(path, hours=48):
old_time = time.time() - hours * 3600
os.utime(path, (old_time, old_time))
class TestTempFileCleanup:
def test_old_temp_files_deleted(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "abc123.tmp"
old_file.write_bytes(b"x" * 1000)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 1000
assert not old_file.exists()
def test_recent_temp_files_kept(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
new_file = tmp_dir / "recent.tmp"
new_file.write_bytes(b"data")
result = gc.run_now()
assert result.temp_files_deleted == 0
assert new_file.exists()
def test_dry_run_keeps_files(self, storage_root, gc):
gc.dry_run = True
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "stale.tmp"
old_file.write_bytes(b"x" * 500)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 500
assert old_file.exists()
class TestMultipartCleanup:
def test_old_orphaned_multipart_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-123"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-123", "object_key": "foo.txt"}))
part = upload_dir / "part-00001.part"
part.write_bytes(b"x" * 2000)
_make_old(manifest, hours=200)
_make_old(part, hours=200)
_make_old(upload_dir, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
assert result.multipart_bytes_freed > 0
assert not upload_dir.exists()
def test_recent_multipart_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-new"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-new", "object_key": "bar.txt"}))
result = gc.run_now()
assert result.multipart_uploads_deleted == 0
assert upload_dir.exists()
def test_legacy_multipart_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
legacy_mp = bucket / ".multipart" / "upload-old"
legacy_mp.mkdir(parents=True)
part = legacy_mp / "part-00001.part"
part.write_bytes(b"y" * 500)
_make_old(part, hours=200)
_make_old(legacy_mp, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
class TestLockFileCleanup:
def test_stale_lock_files_deleted(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "some_key.lock"
lock.write_text("")
_make_old(lock, hours=2)
result = gc.run_now()
assert result.lock_files_deleted == 1
assert not lock.exists()
def test_recent_lock_kept(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "active.lock"
lock.write_text("")
result = gc.run_now()
assert result.lock_files_deleted == 0
assert lock.exists()
class TestOrphanedMetadataCleanup:
def test_legacy_orphaned_metadata_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
meta_dir = bucket / ".meta"
meta_dir.mkdir()
orphan = meta_dir / "deleted_file.txt.meta.json"
orphan.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
assert not orphan.exists()
def test_valid_metadata_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "exists.txt"
obj.write_text("hello")
meta_dir = bucket / ".meta"
meta_dir.mkdir()
meta = meta_dir / "exists.txt.meta.json"
meta.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 0
assert meta.exists()
def test_index_orphaned_entries_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "keep.txt"
obj.write_text("hello")
meta_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "meta"
meta_dir.mkdir(parents=True)
index = meta_dir / "_index.json"
index.write_text(json.dumps({"keep.txt": {"etag": "a"}, "gone.txt": {"etag": "b"}}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
updated = json.loads(index.read_text())
assert "keep.txt" in updated
assert "gone.txt" not in updated
class TestOrphanedVersionsCleanup:
def test_orphaned_versions_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "deleted_obj.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_json = versions_dir / "v1.json"
v_bin.write_bytes(b"old data" * 100)
v_json.write_text(json.dumps({"version_id": "v1", "size": 800}))
result = gc.run_now()
assert result.orphaned_versions_deleted == 2
assert result.orphaned_version_bytes_freed == 800
def test_active_versions_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "active.txt"
obj.write_text("current")
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "active.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_bin.write_bytes(b"old version")
result = gc.run_now()
assert result.orphaned_versions_deleted == 0
assert v_bin.exists()
class TestEmptyDirCleanup:
def test_empty_dirs_removed(self, storage_root, gc):
empty = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks" / "sub"
empty.mkdir(parents=True)
result = gc.run_now()
assert result.empty_dirs_removed > 0
assert not empty.exists()
class TestHistory:
def test_history_recorded(self, storage_root, gc):
gc.run_now()
history = gc.get_history()
assert len(history) == 1
assert "result" in history[0]
assert "timestamp" in history[0]
def test_multiple_runs(self, storage_root, gc):
gc.run_now()
gc.run_now()
gc.run_now()
history = gc.get_history()
assert len(history) == 3
assert history[0]["timestamp"] >= history[1]["timestamp"]
class TestStatus:
def test_get_status(self, storage_root, gc):
status = gc.get_status()
assert status["interval_hours"] == 1.0
assert status["dry_run"] is False
assert status["temp_file_max_age_hours"] == 1.0
assert status["multipart_max_age_days"] == 1
assert status["lock_file_max_age_hours"] == 0.5
class TestGCResult:
def test_total_bytes_freed(self):
r = GCResult(temp_bytes_freed=100, multipart_bytes_freed=200, orphaned_version_bytes_freed=300)
assert r.total_bytes_freed == 600
def test_has_work(self):
assert not GCResult().has_work
assert GCResult(temp_files_deleted=1).has_work
assert GCResult(lock_files_deleted=1).has_work
assert GCResult(empty_dirs_removed=1).has_work
class TestAdminAPI:
@pytest.fixture
def gc_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"GC_ENABLED": True,
"GC_INTERVAL_HOURS": 1.0,
})
yield flask_app
gc = flask_app.extensions.get("gc")
if gc:
gc.stop()
def test_gc_status(self, gc_app):
client = gc_app.test_client()
resp = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
def test_gc_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert "temp_files_deleted" in data
def test_gc_dry_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
data=json.dumps({"dry_run": True}),
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert "temp_files_deleted" in data
def test_gc_history(self, gc_app):
client = gc_app.test_client()
client.post("/admin/gc/run", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
resp = client.get("/admin/gc/history", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert len(data["executions"]) >= 1
def test_gc_requires_admin(self, gc_app):
iam = gc_app.extensions["iam"]
user = iam.create_user(display_name="Regular")
client = gc_app.test_client()
resp = client.get(
"/admin/gc/status",
headers={"X-Access-Key": user["access_key"], "X-Secret-Key": user["secret_key"]},
)
assert resp.status_code == 403

499
tests/test_integrity.py Normal file
View File

@@ -0,0 +1,499 @@
import hashlib
import json
import os
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from app.integrity import IntegrityChecker, IntegrityResult
def _md5(data: bytes) -> str:
return hashlib.md5(data).hexdigest()
def _setup_bucket(storage_root: Path, bucket_name: str, objects: dict[str, bytes]) -> None:
bucket_path = storage_root / bucket_name
bucket_path.mkdir(parents=True, exist_ok=True)
meta_root = storage_root / ".myfsio.sys" / "buckets" / bucket_name / "meta"
meta_root.mkdir(parents=True, exist_ok=True)
bucket_json = storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".bucket.json"
bucket_json.write_text(json.dumps({"created": "2025-01-01"}))
for key, data in objects.items():
obj_path = bucket_path / key
obj_path.parent.mkdir(parents=True, exist_ok=True)
obj_path.write_bytes(data)
etag = _md5(data)
stat = obj_path.stat()
meta = {
"__etag__": etag,
"__size__": str(stat.st_size),
"__last_modified__": str(stat.st_mtime),
}
key_path = Path(key)
parent = key_path.parent
key_name = key_path.name
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
index_path.parent.mkdir(parents=True, exist_ok=True)
index_data = {}
if index_path.exists():
index_data = json.loads(index_path.read_text())
index_data[key_name] = {"metadata": meta}
index_path.write_text(json.dumps(index_data))
def _issues_of_type(result, issue_type):
return [i for i in result.issues if i.issue_type == issue_type]
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
(root / ".myfsio.sys" / "config").mkdir(parents=True, exist_ok=True)
return root
@pytest.fixture
def checker(storage_root):
return IntegrityChecker(
storage_root=storage_root,
interval_hours=24.0,
batch_size=1000,
auto_heal=False,
dry_run=False,
)
class TestCorruptedObjects:
def test_detect_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert len(issues) == 1
assert issues[0].bucket == "mybucket"
assert issues[0].key == "file.txt"
assert not issues[0].healed
def test_heal_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now(auto_heal=True)
assert result.corrupted_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.corrupted_objects == 0
def test_valid_objects_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
result = checker.run_now()
assert result.corrupted_objects == 0
assert result.objects_scanned == 1
def test_corrupted_nested_key(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"sub/dir/file.txt": b"nested content"})
(storage_root / "mybucket" / "sub" / "dir" / "file.txt").write_bytes(b"bad")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].key == "sub/dir/file.txt"
class TestOrphanedObjects:
def test_detect_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now()
assert result.orphaned_objects == 1
issues = _issues_of_type(result, "orphaned_object")
assert len(issues) == 1
def test_heal_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now(auto_heal=True)
assert result.orphaned_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "orphaned_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.orphaned_objects == 0
assert result2.objects_scanned >= 1
class TestPhantomMetadata:
def test_detect_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now()
assert result.phantom_metadata == 1
issues = _issues_of_type(result, "phantom_metadata")
assert len(issues) == 1
def test_heal_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now(auto_heal=True)
assert result.phantom_metadata == 1
assert result.issues_healed == 1
result2 = checker.run_now()
assert result2.phantom_metadata == 0
class TestStaleVersions:
def test_manifest_without_data(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "manifest without data" in issues[0].detail
def test_data_without_manifest(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "data without manifest" in issues[0].detail
def test_heal_stale_versions(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v2.bin").write_bytes(b"old data")
result = checker.run_now(auto_heal=True)
assert result.stale_versions == 2
assert result.issues_healed == 2
assert not (versions_root / "v1.json").exists()
assert not (versions_root / "v2.bin").exists()
def test_valid_versions_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 0
class TestEtagCache:
def test_detect_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now()
assert result.etag_cache_inconsistencies == 1
issues = _issues_of_type(result, "etag_cache_inconsistency")
assert len(issues) == 1
def test_heal_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now(auto_heal=True)
assert result.etag_cache_inconsistencies == 1
assert result.issues_healed == 1
assert not etag_path.exists()
class TestLegacyMetadata:
def test_detect_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(issues) == 1
assert issues[0].detail == "unmigrated legacy .meta.json"
def test_detect_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert "differs from index" in issues[0].detail
def test_heal_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_data = {"__etag__": _md5(b"hello"), "__size__": "5"}
legacy_meta.write_text(json.dumps(legacy_data))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(legacy_issues) == 1
assert legacy_issues[0].healed
assert not legacy_meta.exists()
index_data = json.loads(index_path.read_text())
assert "file.txt" in index_data
assert index_data["file.txt"]["metadata"]["__etag__"] == _md5(b"hello")
def test_heal_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert legacy_issues[0].healed
assert not legacy_meta.exists()
class TestDryRun:
def test_dry_run_no_changes(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted")
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan")
result = checker.run_now(auto_heal=True, dry_run=True)
assert result.corrupted_objects == 1
assert result.orphaned_objects == 1
assert result.issues_healed == 0
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_data = json.loads((meta_root / "_index.json").read_text())
assert "orphan.txt" not in index_data
class TestBatchSize:
def test_batch_limits_scan(self, storage_root):
objects = {f"file{i}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(
storage_root=storage_root,
batch_size=3,
)
result = checker.run_now()
assert result.objects_scanned <= 3
class TestHistory:
def test_history_recorded(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
history = checker.get_history()
assert len(history) == 1
assert "corrupted_objects" in history[0]["result"]
def test_history_multiple(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
checker.run_now()
checker.run_now()
history = checker.get_history()
assert len(history) == 3
def test_history_pagination(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
for _ in range(5):
checker.run_now()
history = checker.get_history(limit=2, offset=1)
assert len(history) == 2
AUTH_HEADERS = {"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}
class TestAdminAPI:
@pytest.fixture
def integrity_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": True,
"INTEGRITY_AUTO_HEAL": False,
"INTEGRITY_DRY_RUN": False,
})
yield flask_app
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
ic = flask_app.extensions.get("integrity")
if ic:
ic.stop()
def test_status_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
assert "interval_hours" in data
def test_run_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
assert resp.status_code == 200
data = resp.get_json()
assert "corrupted_objects" in data
assert "objects_scanned" in data
def test_run_with_overrides(self, integrity_app):
client = integrity_app.test_client()
resp = client.post(
"/admin/integrity/run",
headers=AUTH_HEADERS,
json={"dry_run": True, "auto_heal": True},
)
assert resp.status_code == 200
def test_history_endpoint(self, integrity_app):
client = integrity_app.test_client()
client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
resp = client.get("/admin/integrity/history", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert "executions" in data
assert len(data["executions"]) >= 1
def test_auth_required(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status")
assert resp.status_code in (401, 403)
def test_disabled_status(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data2"
iam_config = tmp_path / "iam2.json"
bucket_policies = tmp_path / "bp2.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": False,
})
c = flask_app.test_client()
resp = c.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is False
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
class TestMultipleBuckets:
def test_scans_multiple_buckets(self, storage_root, checker):
_setup_bucket(storage_root, "bucket1", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket2", {"b.txt": b"bbb"})
result = checker.run_now()
assert result.buckets_scanned == 2
assert result.objects_scanned == 2
assert result.corrupted_objects == 0
class TestGetStatus:
def test_status_fields(self, checker):
status = checker.get_status()
assert "enabled" in status
assert "running" in status
assert "interval_hours" in status
assert "batch_size" in status
assert "auto_heal" in status
assert "dry_run" in status