24 Commits

Author SHA1 Message Date
cc161bf362 MyFSIO v0.3.8 Release
Reviewed-on: #31
2026-03-10 08:31:27 +00:00
de5377e5ac Add garbage collection: background cleanup of orphaned temp files, multipart uploads, lock files, metadata, versions, and empty directories 2026-03-09 17:34:21 +08:00
80b77b64eb Fix bucket dashboard missing created date and incorrect object count badge in folder view 2026-03-09 15:27:08 +08:00
6c912a3d71 Add conditional GET/HEAD headers: If-Match, If-None-Match, If-Modified-Since, If-Unmodified-Since 2026-03-09 15:09:15 +08:00
2a0e77a754 MyFSIO v0.3.7 Release
Reviewed-on: #30
2026-03-09 06:25:50 +00:00
c6e368324a Update docs.md and docs.html for credential expiry, IAM encryption, admin key env vars, and --reset-cred 2026-03-08 13:38:44 +08:00
7b6c096bb7 Remove the check out the documentation paragraph at login page 2026-03-08 13:18:03 +08:00
03353a0aec Add credential expiry support: per-user expires_at with UI management, presets, and badge indicators; Add credential expiry support: per-user expires_at with UI management, presets, and badge indicators; Fix IAM card dropdown clipped by overflow: remove gradient bar, allow overflow visible 2026-03-08 13:08:57 +08:00
eb0e435a5a MyFSIO v0.3.6 Release
Reviewed-on: #29
2026-03-08 04:46:31 +00:00
72f5d9d70c Restore data integrity guarantees: Content-MD5 validation, fsync durability, atomic metadata writes, concurrent write protection 2026-03-07 17:54:00 +08:00
be63e27c15 Reduce per-request CPU overhead: eliminate double stat(), cache content type and policy context, gate logging, configurable stat intervals 2026-03-07 14:08:23 +08:00
7633007a08 MyFSIO v0.3.5 Release
Reviewed-on: #28
2026-03-07 05:53:02 +00:00
81ef0fe4c7 Fix stale object count in bucket header and metrics dashboard after deletes 2026-03-03 19:42:37 +08:00
5f24bd920d Reduce P99 tail latency: defer etag index writes, eliminate double cache rebuild, skip redundant stat() in bucket config 2026-03-02 22:39:37 +08:00
8552f193de Reduce CPU/lock contention under concurrent uploads: split cache lock, in-memory stats, dict copy, lightweight request IDs, defaultdict metrics 2026-03-02 22:05:54 +08:00
de0d869c9f Merge pull request 'MyFSIO v0.3.4 Release' (#27) from next into main
Reviewed-on: #27
2026-03-02 08:31:32 +00:00
5536330aeb Move performance-critical Python functions to Rust: streaming I/O, multipart assembly, and AES-256-GCM encryption 2026-02-27 22:55:20 +08:00
d4657c389d Fix misleading default credentials in README to match actual random generation behavior 2026-02-27 21:58:10 +08:00
3827235232 Reduce CPU usage on heavy uploads: skip SHA256 body hashing in SigV4, use Rust md5_file post-write instead of per-chunk _HashingReader 2026-02-27 21:57:13 +08:00
fdd068feee MyFSIO v0.3.3 Release
Reviewed-on: #26
2026-02-27 04:49:32 +00:00
dfc0058d0d Extend myfsio_core Rust extension with 7 storage hot paths (directory scanning, metadata I/O, object listing, search, bucket stats, cache building) 2026-02-27 12:22:39 +08:00
27aef84311 Fix rclone CopyObject SignatureDoesNotMatch caused by internal metadata leaking as X-Amz-Meta headers 2026-02-26 21:39:43 +08:00
66b7677d2c MyFSIO v0.3.2 Release
Reviewed-on: #25
2026-02-26 10:10:19 +00:00
5003514a3d Fix null ETags in shallow listing by updating etag index on store/delete 2026-02-26 18:09:08 +08:00
32 changed files with 4120 additions and 414 deletions

View File

@@ -80,7 +80,7 @@ python run.py --mode api # API only (port 5000)
python run.py --mode ui # UI only (port 5100) python run.py --mode ui # UI only (port 5100)
``` ```
**Default Credentials:** `localadmin` / `localadmin` **Credentials:** Generated automatically on first run and printed to the console. If missed, check the IAM config file at `<STORAGE_ROOT>/.myfsio.sys/config/iam.json`.
- **Web Console:** http://127.0.0.1:5100/ui - **Web Console:** http://127.0.0.1:5100/ui
- **API Endpoint:** http://127.0.0.1:5000 - **API Endpoint:** http://127.0.0.1:5000

View File

@@ -1,13 +1,13 @@
from __future__ import annotations from __future__ import annotations
import html as html_module import html as html_module
import itertools
import logging import logging
import mimetypes import mimetypes
import os import os
import shutil import shutil
import sys import sys
import time import time
import uuid
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from pathlib import Path from pathlib import Path
from datetime import timedelta from datetime import timedelta
@@ -29,6 +29,7 @@ from .encryption import EncryptionManager
from .extensions import limiter, csrf from .extensions import limiter, csrf
from .iam import IamService from .iam import IamService
from .kms import KMSManager from .kms import KMSManager
from .gc import GarbageCollector
from .lifecycle import LifecycleManager from .lifecycle import LifecycleManager
from .notifications import NotificationService from .notifications import NotificationService
from .object_lock import ObjectLockService from .object_lock import ObjectLockService
@@ -39,6 +40,8 @@ from .storage import ObjectStorage, StorageError
from .version import get_version from .version import get_version
from .website_domains import WebsiteDomainStore from .website_domains import WebsiteDomainStore
_request_counter = itertools.count(1)
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path: def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
"""Migrate config file from legacy locations to the active path. """Migrate config file from legacy locations to the active path.
@@ -128,6 +131,7 @@ def create_app(
Path(app.config["IAM_CONFIG"]), Path(app.config["IAM_CONFIG"]),
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5), auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15), auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
encryption_key=app.config.get("SECRET_KEY"),
) )
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
@@ -218,6 +222,18 @@ def create_app(
) )
lifecycle_manager.start() lifecycle_manager.start()
gc_collector = None
if app.config.get("GC_ENABLED", False):
gc_collector = GarbageCollector(
storage_root=storage_root,
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
dry_run=app.config.get("GC_DRY_RUN", False),
)
gc_collector.start()
app.extensions["object_storage"] = storage app.extensions["object_storage"] = storage
app.extensions["iam"] = iam app.extensions["iam"] = iam
app.extensions["bucket_policies"] = bucket_policies app.extensions["bucket_policies"] = bucket_policies
@@ -229,6 +245,7 @@ def create_app(
app.extensions["kms"] = kms_manager app.extensions["kms"] = kms_manager
app.extensions["acl"] = acl_service app.extensions["acl"] = acl_service
app.extensions["lifecycle"] = lifecycle_manager app.extensions["lifecycle"] = lifecycle_manager
app.extensions["gc"] = gc_collector
app.extensions["object_lock"] = object_lock_service app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service app.extensions["access_logging"] = access_logging_service
@@ -481,13 +498,9 @@ def _configure_logging(app: Flask) -> None:
@app.before_request @app.before_request
def _log_request_start() -> None: def _log_request_start() -> None:
g.request_id = uuid.uuid4().hex g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
g.request_started_at = time.perf_counter() g.request_started_at = time.perf_counter()
g.request_bytes_in = request.content_length or 0 g.request_bytes_in = request.content_length or 0
app.logger.info(
"Request started",
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
)
@app.before_request @app.before_request
def _maybe_serve_website(): def _maybe_serve_website():
@@ -616,16 +629,17 @@ def _configure_logging(app: Flask) -> None:
duration_ms = 0.0 duration_ms = 0.0
if hasattr(g, "request_started_at"): if hasattr(g, "request_started_at"):
duration_ms = (time.perf_counter() - g.request_started_at) * 1000 duration_ms = (time.perf_counter() - g.request_started_at) * 1000
request_id = getattr(g, "request_id", uuid.uuid4().hex) request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
response.headers.setdefault("X-Request-ID", request_id) response.headers.setdefault("X-Request-ID", request_id)
app.logger.info( if app.logger.isEnabledFor(logging.INFO):
"Request completed", app.logger.info(
extra={ "Request completed",
"path": request.path, extra={
"method": request.method, "path": request.path,
"remote_addr": request.remote_addr, "method": request.method,
}, "remote_addr": request.remote_addr,
) },
)
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
operation_metrics = app.extensions.get("operation_metrics") operation_metrics = app.extensions.get("operation_metrics")

View File

@@ -14,6 +14,7 @@ from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore from .connections import ConnectionStore
from .extensions import limiter from .extensions import limiter
from .gc import GarbageCollector
from .iam import IamError, Principal from .iam import IamError, Principal
from .replication import ReplicationManager from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry from .site_registry import PeerSite, SiteInfo, SiteRegistry
@@ -776,3 +777,55 @@ def delete_website_domain(domain: str):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404) return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
logger.info("Website domain mapping deleted: %s", domain) logger.info("Website domain mapping deleted: %s", domain)
return Response(status=204) return Response(status=204)
def _gc() -> Optional[GarbageCollector]:
return current_app.extensions.get("gc")
@admin_api_bp.route("/gc/status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_status():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
return jsonify(gc.get_status())
@admin_api_bp.route("/gc/run", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_run_now():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return _json_error("InvalidRequest", "GC is not enabled", 400)
payload = request.get_json(silent=True) or {}
original_dry_run = gc.dry_run
if "dry_run" in payload:
gc.dry_run = bool(payload["dry_run"])
try:
result = gc.run_now()
finally:
gc.dry_run = original_dry_run
logger.info("GC manual run by %s", principal.access_key)
return jsonify(result.to_dict())
@admin_api_bp.route("/gc/history", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_history():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 50)), 200)
offset = int(request.args.get("offset", 0))
records = gc.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import ipaddress import ipaddress
import json import json
import os
import re import re
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -268,7 +269,7 @@ class BucketPolicyStore:
self._last_mtime = self._current_mtime() self._last_mtime = self._current_mtime()
# Performance: Avoid stat() on every request # Performance: Avoid stat() on every request
self._last_stat_check = 0.0 self._last_stat_check = 0.0
self._stat_check_interval = 1.0 # Only check mtime every 1 second self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
def maybe_reload(self) -> None: def maybe_reload(self) -> None:
# Performance: Skip stat check if we checked recently # Performance: Skip stat check if we checked recently

View File

@@ -150,6 +150,12 @@ class AppConfig:
allowed_redirect_hosts: list[str] allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool allow_internal_endpoints: bool
website_hosting_enabled: bool website_hosting_enabled: bool
gc_enabled: bool
gc_interval_hours: float
gc_temp_file_max_age_hours: float
gc_multipart_max_age_days: int
gc_lock_file_max_age_hours: float
gc_dry_run: bool
@classmethod @classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -319,6 +325,12 @@ class AppConfig:
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()] allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"} allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
return cls(storage_root=storage_root, return cls(storage_root=storage_root,
max_upload_size=max_upload_size, max_upload_size=max_upload_size,
@@ -406,7 +418,13 @@ class AppConfig:
num_trusted_proxies=num_trusted_proxies, num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts, allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints, allow_internal_endpoints=allow_internal_endpoints,
website_hosting_enabled=website_hosting_enabled) website_hosting_enabled=website_hosting_enabled,
gc_enabled=gc_enabled,
gc_interval_hours=gc_interval_hours,
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
gc_multipart_max_age_days=gc_multipart_max_age_days,
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
gc_dry_run=gc_dry_run)
def validate_and_report(self) -> list[str]: def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues. """Validate configuration and return a list of warnings/issues.
@@ -617,4 +635,10 @@ class AppConfig:
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts, "ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints, "ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled, "WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
"GC_ENABLED": self.gc_enabled,
"GC_INTERVAL_HOURS": self.gc_interval_hours,
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
"GC_DRY_RUN": self.gc_dry_run,
} }

View File

@@ -19,6 +19,13 @@ from cryptography.hazmat.primitives import hashes
if sys.platform != "win32": if sys.platform != "win32":
import fcntl import fcntl
try:
import myfsio_core as _rc
_HAS_RUST = True
except ImportError:
_rc = None
_HAS_RUST = False
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -338,6 +345,69 @@ class StreamingEncryptor:
output.seek(0) output.seek(0)
return output return output
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
data_key, encrypted_data_key = self.provider.generate_data_key()
base_nonce = secrets.token_bytes(12)
if _HAS_RUST:
_rc.encrypt_stream_chunked(
input_path, output_path, data_key, base_nonce, self.chunk_size
)
else:
with open(input_path, "rb") as stream:
aesgcm = AESGCM(data_key)
with open(output_path, "wb") as out:
out.write(b"\x00\x00\x00\x00")
chunk_index = 0
while True:
chunk = stream.read(self.chunk_size)
if not chunk:
break
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
out.write(encrypted_chunk)
chunk_index += 1
out.seek(0)
out.write(chunk_index.to_bytes(4, "big"))
return EncryptionMetadata(
algorithm="AES256",
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
nonce=base_nonce,
encrypted_data_key=encrypted_data_key,
)
def decrypt_file(self, input_path: str, output_path: str,
metadata: EncryptionMetadata) -> None:
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
base_nonce = metadata.nonce
if _HAS_RUST:
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
else:
with open(input_path, "rb") as stream:
chunk_count_bytes = stream.read(4)
if len(chunk_count_bytes) < 4:
raise EncryptionError("Invalid encrypted stream: missing header")
chunk_count = int.from_bytes(chunk_count_bytes, "big")
aesgcm = AESGCM(data_key)
with open(output_path, "wb") as out:
for chunk_index in range(chunk_count):
size_bytes = stream.read(self.HEADER_SIZE)
if len(size_bytes) < self.HEADER_SIZE:
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
chunk_size = int.from_bytes(size_bytes, "big")
encrypted_chunk = stream.read(chunk_size)
if len(encrypted_chunk) < chunk_size:
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
try:
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
out.write(decrypted_chunk)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
class EncryptionManager: class EncryptionManager:
"""Manages encryption providers and operations.""" """Manages encryption providers and operations."""

531
app/gc.py Normal file
View File

@@ -0,0 +1,531 @@
from __future__ import annotations
import json
import logging
import os
import shutil
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class GCResult:
temp_files_deleted: int = 0
temp_bytes_freed: int = 0
multipart_uploads_deleted: int = 0
multipart_bytes_freed: int = 0
lock_files_deleted: int = 0
orphaned_metadata_deleted: int = 0
orphaned_versions_deleted: int = 0
orphaned_version_bytes_freed: int = 0
empty_dirs_removed: int = 0
errors: List[str] = field(default_factory=list)
execution_time_seconds: float = 0.0
def to_dict(self) -> dict:
return {
"temp_files_deleted": self.temp_files_deleted,
"temp_bytes_freed": self.temp_bytes_freed,
"multipart_uploads_deleted": self.multipart_uploads_deleted,
"multipart_bytes_freed": self.multipart_bytes_freed,
"lock_files_deleted": self.lock_files_deleted,
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
"orphaned_versions_deleted": self.orphaned_versions_deleted,
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
"empty_dirs_removed": self.empty_dirs_removed,
"errors": self.errors,
"execution_time_seconds": self.execution_time_seconds,
}
@property
def total_bytes_freed(self) -> int:
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
@property
def has_work(self) -> bool:
return (
self.temp_files_deleted > 0
or self.multipart_uploads_deleted > 0
or self.lock_files_deleted > 0
or self.orphaned_metadata_deleted > 0
or self.orphaned_versions_deleted > 0
or self.empty_dirs_removed > 0
)
@dataclass
class GCExecutionRecord:
timestamp: float
result: dict
dry_run: bool
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"result": self.result,
"dry_run": self.dry_run,
}
@classmethod
def from_dict(cls, data: dict) -> GCExecutionRecord:
return cls(
timestamp=data["timestamp"],
result=data["result"],
dry_run=data.get("dry_run", False),
)
class GCHistoryStore:
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
self.storage_root = storage_root
self.max_records = max_records
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
def load(self) -> List[GCExecutionRecord]:
path = self._get_path()
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error("Failed to load GC history: %s", e)
return []
def save(self, records: List[GCExecutionRecord]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save GC history: %s", e)
def add(self, record: GCExecutionRecord) -> None:
with self._lock:
records = self.load()
records.insert(0, record)
self.save(records)
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
return self.load()[offset : offset + limit]
def _dir_size(path: Path) -> int:
total = 0
try:
for f in path.rglob("*"):
if f.is_file():
try:
total += f.stat().st_size
except OSError:
pass
except OSError:
pass
return total
def _file_age_hours(path: Path) -> float:
try:
mtime = path.stat().st_mtime
return (time.time() - mtime) / 3600.0
except OSError:
return 0.0
class GarbageCollector:
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_TMP_DIR = "tmp"
SYSTEM_MULTIPART_DIR = "multipart"
SYSTEM_BUCKETS_DIR = "buckets"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
def __init__(
self,
storage_root: Path,
interval_hours: float = 6.0,
temp_file_max_age_hours: float = 24.0,
multipart_max_age_days: int = 7,
lock_file_max_age_hours: float = 1.0,
dry_run: bool = False,
max_history: int = 50,
) -> None:
self.storage_root = Path(storage_root)
self.interval_seconds = interval_hours * 3600.0
self.temp_file_max_age_hours = temp_file_max_age_hours
self.multipart_max_age_days = multipart_max_age_days
self.lock_file_max_age_hours = lock_file_max_age_hours
self.dry_run = dry_run
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
self.interval_seconds / 3600.0,
self.temp_file_max_age_hours,
self.multipart_max_age_days,
self.lock_file_max_age_hours,
self.dry_run,
)
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("GC stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
self._timer.daemon = True
self._timer.start()
def _run_cycle(self) -> None:
if self._shutdown:
return
try:
self.run_now()
except Exception as e:
logger.error("GC cycle failed: %s", e)
finally:
self._schedule_next()
def run_now(self) -> GCResult:
start = time.time()
result = GCResult()
self._clean_temp_files(result)
self._clean_orphaned_multipart(result)
self._clean_stale_locks(result)
self._clean_orphaned_metadata(result)
self._clean_orphaned_versions(result)
self._clean_empty_dirs(result)
result.execution_time_seconds = time.time() - start
if result.has_work or result.errors:
logger.info(
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
result.execution_time_seconds,
result.temp_files_deleted,
result.temp_bytes_freed / (1024 * 1024),
result.multipart_uploads_deleted,
result.multipart_bytes_freed / (1024 * 1024),
result.lock_files_deleted,
result.orphaned_metadata_deleted,
result.orphaned_versions_deleted,
result.orphaned_version_bytes_freed / (1024 * 1024),
result.empty_dirs_removed,
len(result.errors),
" (dry run)" if self.dry_run else "",
)
record = GCExecutionRecord(
timestamp=time.time(),
result=result.to_dict(),
dry_run=self.dry_run,
)
self.history_store.add(record)
return result
def _system_path(self) -> Path:
return self.storage_root / self.SYSTEM_ROOT
def _list_bucket_names(self) -> List[str]:
names = []
try:
for entry in self.storage_root.iterdir():
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
names.append(entry.name)
except OSError:
pass
return names
def _clean_temp_files(self, result: GCResult) -> None:
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
if not tmp_dir.exists():
return
try:
for entry in tmp_dir.iterdir():
if not entry.is_file():
continue
age = _file_age_hours(entry)
if age < self.temp_file_max_age_hours:
continue
try:
size = entry.stat().st_size
if not self.dry_run:
entry.unlink()
result.temp_files_deleted += 1
result.temp_bytes_freed += size
except OSError as e:
result.errors.append(f"temp file {entry.name}: {e}")
except OSError as e:
result.errors.append(f"scan tmp dir: {e}")
def _clean_orphaned_multipart(self, result: GCResult) -> None:
cutoff_hours = self.multipart_max_age_days * 24.0
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
for multipart_root in (
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
self.storage_root / bucket_name / ".multipart",
):
if not multipart_root.exists():
continue
try:
for upload_dir in multipart_root.iterdir():
if not upload_dir.is_dir():
continue
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
except OSError as e:
result.errors.append(f"scan multipart {bucket_name}: {e}")
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
manifest_path = upload_dir / "manifest.json"
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
if age < cutoff_hours:
return
dir_bytes = _dir_size(upload_dir)
try:
if not self.dry_run:
shutil.rmtree(upload_dir, ignore_errors=True)
result.multipart_uploads_deleted += 1
result.multipart_bytes_freed += dir_bytes
except OSError as e:
result.errors.append(f"multipart {upload_dir.name}: {e}")
def _clean_stale_locks(self, result: GCResult) -> None:
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
if not buckets_root.exists():
return
try:
for bucket_dir in buckets_root.iterdir():
if not bucket_dir.is_dir():
continue
locks_dir = bucket_dir / "locks"
if not locks_dir.exists():
continue
try:
for lock_file in locks_dir.iterdir():
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
continue
age = _file_age_hours(lock_file)
if age < self.lock_file_max_age_hours:
continue
try:
if not self.dry_run:
lock_file.unlink(missing_ok=True)
result.lock_files_deleted += 1
except OSError as e:
result.errors.append(f"lock {lock_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
except OSError as e:
result.errors.append(f"scan buckets for locks: {e}")
def _clean_orphaned_metadata(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
legacy_meta = self.storage_root / bucket_name / ".meta"
if legacy_meta.exists():
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if new_meta.exists():
self._clean_index_metadata(bucket_name, new_meta, result)
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for meta_file in meta_root.rglob("*.meta.json"):
if not meta_file.is_file():
continue
try:
rel = meta_file.relative_to(meta_root)
object_key = rel.as_posix().removesuffix(".meta.json")
object_path = bucket_path / object_key
if not object_path.exists():
if not self.dry_run:
meta_file.unlink(missing_ok=True)
result.orphaned_metadata_deleted += 1
except (OSError, ValueError) as e:
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for index_file in meta_root.rglob("_index.json"):
if not index_file.is_file():
continue
try:
with open(index_file, "r", encoding="utf-8") as f:
index_data = json.load(f)
except (OSError, json.JSONDecodeError):
continue
keys_to_remove = []
for key in index_data:
rel_dir = index_file.parent.relative_to(meta_root)
if rel_dir == Path("."):
full_key = key
else:
full_key = rel_dir.as_posix() + "/" + key
object_path = bucket_path / full_key
if not object_path.exists():
keys_to_remove.append(key)
if keys_to_remove:
if not self.dry_run:
for k in keys_to_remove:
index_data.pop(k, None)
if index_data:
try:
with open(index_file, "w", encoding="utf-8") as f:
json.dump(index_data, f)
except OSError as e:
result.errors.append(f"write index {bucket_name}: {e}")
continue
else:
try:
index_file.unlink(missing_ok=True)
except OSError:
pass
result.orphaned_metadata_deleted += len(keys_to_remove)
except OSError as e:
result.errors.append(f"scan index meta {bucket_name}: {e}")
def _clean_orphaned_versions(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
bucket_path = self.storage_root / bucket_name
for versions_root in (
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
self.storage_root / bucket_name / ".versions",
):
if not versions_root.exists():
continue
try:
for key_dir in versions_root.iterdir():
if not key_dir.is_dir():
continue
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
except OSError as e:
result.errors.append(f"scan versions {bucket_name}: {e}")
def _clean_versions_for_key(
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
) -> None:
try:
rel = key_dir.relative_to(versions_root)
except ValueError:
return
object_path = bucket_path / rel
if object_path.exists():
return
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
if not version_files:
return
for vf in version_files:
try:
size = vf.stat().st_size if vf.suffix == ".bin" else 0
if not self.dry_run:
vf.unlink(missing_ok=True)
if vf.suffix == ".bin":
result.orphaned_version_bytes_freed += size
result.orphaned_versions_deleted += 1
except OSError as e:
result.errors.append(f"version file {vf.name}: {e}")
def _clean_empty_dirs(self, result: GCResult) -> None:
targets = [
self._system_path() / self.SYSTEM_TMP_DIR,
self._system_path() / self.SYSTEM_MULTIPART_DIR,
self._system_path() / self.SYSTEM_BUCKETS_DIR,
]
for bucket_name in self._list_bucket_names():
targets.append(self.storage_root / bucket_name / ".meta")
targets.append(self.storage_root / bucket_name / ".versions")
targets.append(self.storage_root / bucket_name / ".multipart")
for root in targets:
if not root.exists():
continue
self._remove_empty_dirs_recursive(root, root, result)
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
if not path.is_dir():
return False
try:
children = list(path.iterdir())
except OSError:
return False
all_empty = True
for child in children:
if child.is_dir():
if not self._remove_empty_dirs_recursive(child, stop_at, result):
all_empty = False
else:
all_empty = False
if all_empty and path != stop_at:
try:
if not self.dry_run:
path.rmdir()
result.empty_dirs_removed += 1
return True
except OSError:
return False
return all_empty
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
records = self.history_store.get_history(limit, offset)
return [r.to_dict() for r in records]
def get_status(self) -> dict:
return {
"enabled": not self._shutdown or self._timer is not None,
"running": self._timer is not None and not self._shutdown,
"interval_hours": self.interval_seconds / 3600.0,
"temp_file_max_age_hours": self.temp_file_max_age_hours,
"multipart_max_age_days": self.multipart_max_age_days,
"lock_file_max_age_hours": self.lock_file_max_age_hours,
"dry_run": self.dry_run,
}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import base64
import hashlib import hashlib
import hmac import hmac
import json import json
@@ -14,6 +15,8 @@ from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from cryptography.fernet import Fernet, InvalidToken
class IamError(RuntimeError): class IamError(RuntimeError):
"""Raised when authentication or authorization fails.""" """Raised when authentication or authorization fails."""
@@ -107,13 +110,24 @@ class Principal:
policies: List[Policy] policies: List[Policy]
def _derive_fernet_key(secret: str) -> bytes:
raw = hashlib.pbkdf2_hmac("sha256", secret.encode(), b"myfsio-iam-encryption", 100_000)
return base64.urlsafe_b64encode(raw)
_IAM_ENCRYPTED_PREFIX = b"MYFSIO_IAM_ENC:"
class IamService: class IamService:
"""Loads IAM configuration, manages users, and evaluates policies.""" """Loads IAM configuration, manages users, and evaluates policies."""
def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None: def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15, encryption_key: str | None = None) -> None:
self.config_path = Path(config_path) self.config_path = Path(config_path)
self.auth_max_attempts = auth_max_attempts self.auth_max_attempts = auth_max_attempts
self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes) self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
self._fernet: Fernet | None = None
if encryption_key:
self._fernet = Fernet(_derive_fernet_key(encryption_key))
self.config_path.parent.mkdir(parents=True, exist_ok=True) self.config_path.parent.mkdir(parents=True, exist_ok=True)
if not self.config_path.exists(): if not self.config_path.exists():
self._write_default() self._write_default()
@@ -125,7 +139,7 @@ class IamService:
self._secret_key_cache: Dict[str, Tuple[str, float]] = {} self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0")) self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0 self._last_stat_check = 0.0
self._stat_check_interval = 1.0 self._stat_check_interval = float(os.environ.get("IAM_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
self._sessions: Dict[str, Dict[str, Any]] = {} self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock() self._session_lock = threading.Lock()
self._load() self._load()
@@ -145,6 +159,19 @@ class IamService:
except OSError: except OSError:
pass pass
def _check_expiry(self, access_key: str, record: Dict[str, Any]) -> None:
expires_at = record.get("expires_at")
if not expires_at:
return
try:
exp_dt = datetime.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) >= exp_dt:
raise IamError(f"Credentials for '{access_key}' have expired")
except (ValueError, TypeError):
pass
def authenticate(self, access_key: str, secret_key: str) -> Principal: def authenticate(self, access_key: str, secret_key: str) -> Principal:
self._maybe_reload() self._maybe_reload()
access_key = (access_key or "").strip() access_key = (access_key or "").strip()
@@ -161,6 +188,7 @@ class IamService:
if not record or not hmac.compare_digest(stored_secret, secret_key): if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key) self._record_failed_attempt(access_key)
raise IamError("Invalid credentials") raise IamError("Invalid credentials")
self._check_expiry(access_key, record)
self._clear_failed_attempts(access_key) self._clear_failed_attempts(access_key)
return self._build_principal(access_key, record) return self._build_principal(access_key, record)
@@ -288,12 +316,16 @@ class IamService:
if cached: if cached:
principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return principal return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
self._check_expiry(access_key, record)
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal
@@ -303,6 +335,7 @@ class IamService:
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
self._check_expiry(access_key, record)
return record["secret_key"] return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None: def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
@@ -347,6 +380,7 @@ class IamService:
{ {
"access_key": access_key, "access_key": access_key,
"display_name": record["display_name"], "display_name": record["display_name"],
"expires_at": record.get("expires_at"),
"policies": [ "policies": [
{"bucket": policy.bucket, "actions": sorted(policy.actions)} {"bucket": policy.bucket, "actions": sorted(policy.actions)}
for policy in record["policies"] for policy in record["policies"]
@@ -362,20 +396,25 @@ class IamService:
policies: Optional[Sequence[Dict[str, Any]]] = None, policies: Optional[Sequence[Dict[str, Any]]] = None,
access_key: str | None = None, access_key: str | None = None,
secret_key: str | None = None, secret_key: str | None = None,
expires_at: str | None = None,
) -> Dict[str, str]: ) -> Dict[str, str]:
access_key = (access_key or self._generate_access_key()).strip() access_key = (access_key or self._generate_access_key()).strip()
if not access_key: if not access_key:
raise IamError("Access key cannot be empty") raise IamError("Access key cannot be empty")
if access_key in self._users: if access_key in self._users:
raise IamError("Access key already exists") raise IamError("Access key already exists")
if expires_at:
self._validate_expires_at(expires_at)
secret_key = secret_key or self._generate_secret_key() secret_key = secret_key or self._generate_secret_key()
sanitized_policies = self._prepare_policy_payload(policies) sanitized_policies = self._prepare_policy_payload(policies)
record = { record: Dict[str, Any] = {
"access_key": access_key, "access_key": access_key,
"secret_key": secret_key, "secret_key": secret_key,
"display_name": display_name or access_key, "display_name": display_name or access_key,
"policies": sanitized_policies, "policies": sanitized_policies,
} }
if expires_at:
record["expires_at"] = expires_at
self._raw_config.setdefault("users", []).append(record) self._raw_config.setdefault("users", []).append(record)
self._save() self._save()
self._load() self._load()
@@ -414,17 +453,43 @@ class IamService:
clear_signing_key_cache() clear_signing_key_cache()
self._load() self._load()
def update_user_expiry(self, access_key: str, expires_at: str | None) -> None:
user = self._get_raw_user(access_key)
if expires_at:
self._validate_expires_at(expires_at)
user["expires_at"] = expires_at
else:
user.pop("expires_at", None)
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
user = self._get_raw_user(access_key) user = self._get_raw_user(access_key)
user["policies"] = self._prepare_policy_payload(policies) user["policies"] = self._prepare_policy_payload(policies)
self._save() self._save()
self._load() self._load()
def _decrypt_content(self, raw_bytes: bytes) -> str:
if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
if not self._fernet:
raise IamError("IAM config is encrypted but no encryption key provided. Set SECRET_KEY or use 'python run.py reset-cred'.")
try:
encrypted_data = raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]
return self._fernet.decrypt(encrypted_data).decode("utf-8")
except InvalidToken:
raise IamError("Cannot decrypt IAM config. SECRET_KEY may have changed. Use 'python run.py reset-cred' to reset credentials.")
return raw_bytes.decode("utf-8")
def _load(self) -> None: def _load(self) -> None:
try: try:
self._last_load_time = self.config_path.stat().st_mtime self._last_load_time = self.config_path.stat().st_mtime
content = self.config_path.read_text(encoding='utf-8') raw_bytes = self.config_path.read_bytes()
content = self._decrypt_content(raw_bytes)
raw = json.loads(content) raw = json.loads(content)
except IamError:
raise
except FileNotFoundError: except FileNotFoundError:
raise IamError(f"IAM config not found: {self.config_path}") raise IamError(f"IAM config not found: {self.config_path}")
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
@@ -433,34 +498,48 @@ class IamService:
raise IamError(f"Cannot read IAM config (permission denied): {e}") raise IamError(f"Cannot read IAM config (permission denied): {e}")
except (OSError, ValueError) as e: except (OSError, ValueError) as e:
raise IamError(f"Failed to load IAM config: {e}") raise IamError(f"Failed to load IAM config: {e}")
was_plaintext = not raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX)
users: Dict[str, Dict[str, Any]] = {} users: Dict[str, Dict[str, Any]] = {}
for user in raw.get("users", []): for user in raw.get("users", []):
policies = self._build_policy_objects(user.get("policies", [])) policies = self._build_policy_objects(user.get("policies", []))
users[user["access_key"]] = { user_record: Dict[str, Any] = {
"secret_key": user["secret_key"], "secret_key": user["secret_key"],
"display_name": user.get("display_name", user["access_key"]), "display_name": user.get("display_name", user["access_key"]),
"policies": policies, "policies": policies,
} }
if user.get("expires_at"):
user_record["expires_at"] = user["expires_at"]
users[user["access_key"]] = user_record
if not users: if not users:
raise IamError("IAM configuration contains no users") raise IamError("IAM configuration contains no users")
self._users = users self._users = users
self._raw_config = { raw_users: List[Dict[str, Any]] = []
"users": [ for entry in raw.get("users", []):
{ raw_entry: Dict[str, Any] = {
"access_key": entry["access_key"], "access_key": entry["access_key"],
"secret_key": entry["secret_key"], "secret_key": entry["secret_key"],
"display_name": entry.get("display_name", entry["access_key"]), "display_name": entry.get("display_name", entry["access_key"]),
"policies": entry.get("policies", []), "policies": entry.get("policies", []),
} }
for entry in raw.get("users", []) if entry.get("expires_at"):
] raw_entry["expires_at"] = entry["expires_at"]
} raw_users.append(raw_entry)
self._raw_config = {"users": raw_users}
if was_plaintext and self._fernet:
self._save()
def _save(self) -> None: def _save(self) -> None:
try: try:
json_text = json.dumps(self._raw_config, indent=2)
temp_path = self.config_path.with_suffix('.json.tmp') temp_path = self.config_path.with_suffix('.json.tmp')
temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8') if self._fernet:
encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
temp_path.write_text(json_text, encoding='utf-8')
temp_path.replace(self.config_path) temp_path.replace(self.config_path)
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}") raise IamError(f"Cannot save IAM config: {e}")
@@ -475,9 +554,14 @@ class IamService:
def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]: def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
payload: Dict[str, Any] = {"users": []} payload: Dict[str, Any] = {"users": []}
for user in self._raw_config.get("users", []): for user in self._raw_config.get("users", []):
record = dict(user) record: Dict[str, Any] = {
if mask_secrets and "secret_key" in record: "access_key": user["access_key"],
record["secret_key"] = "••••••••••" "secret_key": "••••••••••" if mask_secrets else user["secret_key"],
"display_name": user["display_name"],
"policies": user["policies"],
}
if user.get("expires_at"):
record["expires_at"] = user["expires_at"]
payload["users"].append(record) payload["users"].append(record)
return payload return payload
@@ -546,8 +630,9 @@ class IamService:
return candidate if candidate in ALLOWED_ACTIONS else "" return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None: def _write_default(self) -> None:
access_key = secrets.token_hex(12) access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
secret_key = secrets.token_urlsafe(32) secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
default = { default = {
"users": [ "users": [
{ {
@@ -560,16 +645,37 @@ class IamService:
} }
] ]
} }
self.config_path.write_text(json.dumps(default, indent=2)) json_text = json.dumps(default, indent=2)
if self._fernet:
encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
self.config_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
self.config_path.write_text(json_text)
print(f"\n{'='*60}") print(f"\n{'='*60}")
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED") print("MYFSIO FIRST RUN - ADMIN CREDENTIALS")
print(f"{'='*60}") print(f"{'='*60}")
print(f"Access Key: {access_key}") if custom_keys:
print(f"Secret Key: {secret_key}") print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
else:
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}") print(f"{'='*60}")
print(f"Missed this? Check: {self.config_path}") if self._fernet:
print("IAM config is encrypted at rest.")
print("Lost credentials? Run: python run.py reset-cred")
else:
print(f"Missed this? Check: {self.config_path}")
print(f"{'='*60}\n") print(f"{'='*60}\n")
def _validate_expires_at(self, expires_at: str) -> None:
try:
dt = datetime.fromisoformat(expires_at)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
except (ValueError, TypeError):
raise IamError(f"Invalid expires_at format: {expires_at}. Use ISO 8601 (e.g. 2026-12-31T23:59:59Z)")
def _generate_access_key(self) -> str: def _generate_access_key(self) -> str:
return secrets.token_hex(8) return secrets.token_hex(8)
@@ -588,11 +694,15 @@ class IamService:
if cached: if cached:
secret_key, cached_time = cached secret_key, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return secret_key return secret_key
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
self._check_expiry(access_key, record)
secret_key = record["secret_key"] secret_key = record["secret_key"]
self._secret_key_cache[access_key] = (secret_key, now) self._secret_key_cache[access_key] = (secret_key, now)
return secret_key return secret_key
@@ -604,11 +714,15 @@ class IamService:
if cached: if cached:
principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
record = self._users.get(access_key)
if record:
self._check_expiry(access_key, record)
return principal return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
self._check_expiry(access_key, record)
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal

View File

@@ -5,6 +5,7 @@ import logging
import random import random
import threading import threading
import time import time
from collections import defaultdict
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
@@ -138,8 +139,8 @@ class OperationMetricsCollector:
self.interval_seconds = interval_minutes * 60 self.interval_seconds = interval_minutes * 60
self.retention_hours = retention_hours self.retention_hours = retention_hours
self._lock = threading.Lock() self._lock = threading.Lock()
self._by_method: Dict[str, OperationStats] = {} self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
self._by_endpoint: Dict[str, OperationStats] = {} self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
self._by_status_class: Dict[str, int] = {} self._by_status_class: Dict[str, int] = {}
self._error_codes: Dict[str, int] = {} self._error_codes: Dict[str, int] = {}
self._totals = OperationStats() self._totals = OperationStats()
@@ -211,8 +212,8 @@ class OperationMetricsCollector:
self._prune_old_snapshots() self._prune_old_snapshots()
self._save_history() self._save_history()
self._by_method.clear() self._by_method = defaultdict(OperationStats)
self._by_endpoint.clear() self._by_endpoint = defaultdict(OperationStats)
self._by_status_class.clear() self._by_status_class.clear()
self._error_codes.clear() self._error_codes.clear()
self._totals = OperationStats() self._totals = OperationStats()
@@ -232,12 +233,7 @@ class OperationMetricsCollector:
status_class = f"{status_code // 100}xx" status_class = f"{status_code // 100}xx"
with self._lock: with self._lock:
if method not in self._by_method:
self._by_method[method] = OperationStats()
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out) self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
if endpoint_type not in self._by_endpoint:
self._by_endpoint[endpoint_type] = OperationStats()
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out) self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1 self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1

View File

@@ -85,6 +85,9 @@ def _bucket_policies() -> BucketPolicyStore:
def _build_policy_context() -> Dict[str, Any]: def _build_policy_context() -> Dict[str, Any]:
cached = getattr(g, "_policy_context", None)
if cached is not None:
return cached
ctx: Dict[str, Any] = {} ctx: Dict[str, Any] = {}
if request.headers.get("Referer"): if request.headers.get("Referer"):
ctx["aws:Referer"] = request.headers.get("Referer") ctx["aws:Referer"] = request.headers.get("Referer")
@@ -98,6 +101,7 @@ def _build_policy_context() -> Dict[str, Any]:
ctx["aws:SecureTransport"] = str(request.is_secure).lower() ctx["aws:SecureTransport"] = str(request.is_secure).lower()
if request.headers.get("User-Agent"): if request.headers.get("User-Agent"):
ctx["aws:UserAgent"] = request.headers.get("User-Agent") ctx["aws:UserAgent"] = request.headers.get("User-Agent")
g._policy_context = ctx
return ctx return ctx
@@ -293,9 +297,7 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
raise IamError("Required headers not signed") raise IamError("Required headers not signed")
canonical_uri = _get_canonical_uri(req) canonical_uri = _get_canonical_uri(req)
payload_hash = req.headers.get("X-Amz-Content-Sha256") payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD"
if not payload_hash:
payload_hash = hashlib.sha256(req.get_data()).hexdigest()
if _HAS_RUST: if _HAS_RUST:
query_params = list(req.args.items(multi=True)) query_params = list(req.args.items(multi=True))
@@ -305,16 +307,10 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
header_values, payload_hash, amz_date, date_stamp, region, header_values, payload_hash, amz_date, date_stamp, region,
service, secret_key, signature, service, secret_key, signature,
): ):
if current_app.config.get("DEBUG_SIGV4"):
logger.warning("SigV4 signature mismatch for %s %s", req.method, req.path)
raise IamError("SignatureDoesNotMatch") raise IamError("SignatureDoesNotMatch")
else: else:
method = req.method method = req.method
query_args = [] query_args = sorted(req.args.items(multi=True), key=lambda x: (x[0], x[1]))
for key, value in req.args.items(multi=True):
query_args.append((key, value))
query_args.sort(key=lambda x: (x[0], x[1]))
canonical_query_parts = [] canonical_query_parts = []
for k, v in query_args: for k, v in query_args:
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
@@ -339,8 +335,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
if not hmac.compare_digest(calculated_signature, signature): if not hmac.compare_digest(calculated_signature, signature):
if current_app.config.get("DEBUG_SIGV4"):
logger.warning("SigV4 signature mismatch for %s %s", method, req.path)
raise IamError("SignatureDoesNotMatch") raise IamError("SignatureDoesNotMatch")
session_token = req.headers.get("X-Amz-Security-Token") session_token = req.headers.get("X-Amz-Security-Token")
@@ -682,7 +676,7 @@ def _extract_request_metadata() -> Dict[str, str]:
for header, value in request.headers.items(): for header, value in request.headers.items():
if header.lower().startswith("x-amz-meta-"): if header.lower().startswith("x-amz-meta-"):
key = header[11:] key = header[11:]
if key: if key and not (key.startswith("__") and key.endswith("__")):
metadata[key] = value metadata[key] = value
return metadata return metadata
@@ -1025,20 +1019,78 @@ def _method_not_allowed(allowed: list[str]) -> Response:
return response return response
def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None:
from email.utils import parsedate_to_datetime
if_match = request.headers.get("If-Match")
if if_match:
if if_match.strip() != "*":
match_etags = [e.strip().strip('"') for e in if_match.split(",")]
if etag not in match_etags:
return Response(status=412)
if_unmodified = request.headers.get("If-Unmodified-Since")
if not if_match and if_unmodified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_unmodified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt > dt:
return Response(status=412)
except (TypeError, ValueError):
pass
if_none_match = request.headers.get("If-None-Match")
if if_none_match:
if if_none_match.strip() == "*":
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")]
if etag in none_match_etags:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
if_modified = request.headers.get("If-Modified-Since")
if not if_none_match and if_modified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_modified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt <= dt:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
except (TypeError, ValueError):
pass
return None
def _apply_object_headers( def _apply_object_headers(
response: Response, response: Response,
*, *,
file_stat, file_stat,
metadata: Dict[str, str] | None, metadata: Dict[str, str] | None,
etag: str, etag: str,
size_override: int | None = None,
mtime_override: float | None = None,
) -> None: ) -> None:
if file_stat is not None: effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None)
if response.status_code != 206: effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None)
response.headers["Content-Length"] = str(file_stat.st_size) if effective_size is not None and response.status_code != 206:
response.headers["Last-Modified"] = http_date(file_stat.st_mtime) response.headers["Content-Length"] = str(effective_size)
if effective_mtime is not None:
response.headers["Last-Modified"] = http_date(effective_mtime)
response.headers["ETag"] = f'"{etag}"' response.headers["ETag"] = f'"{etag}"'
response.headers["Accept-Ranges"] = "bytes" response.headers["Accept-Ranges"] = "bytes"
for key, value in (metadata or {}).items(): for key, value in (metadata or {}).items():
if key.startswith("__") and key.endswith("__"):
continue
safe_value = _sanitize_header_value(str(value)) safe_value = _sanitize_header_value(str(value))
response.headers[f"X-Amz-Meta-{key}"] = safe_value response.headers[f"X-Amz-Meta-{key}"] = safe_value
@@ -2467,7 +2519,7 @@ def _post_object(bucket_name: str) -> Response:
for field_name, value in request.form.items(): for field_name, value in request.form.items():
if field_name.lower().startswith("x-amz-meta-"): if field_name.lower().startswith("x-amz-meta-"):
key = field_name[11:] key = field_name[11:]
if key: if key and not (key.startswith("__") and key.endswith("__")):
metadata[key] = value metadata[key] = value
try: try:
meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None) meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None)
@@ -2828,6 +2880,8 @@ def object_handler(bucket_name: str, object_key: str):
if validation_error: if validation_error:
return _error_response("InvalidArgument", validation_error, 400) return _error_response("InvalidArgument", validation_error, 400)
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try: try:
meta = storage.put_object( meta = storage.put_object(
bucket_name, bucket_name,
@@ -2842,10 +2896,23 @@ def object_handler(bucket_name: str, object_key: str):
if "Bucket" in message: if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404) return _error_response("NoSuchBucket", message, 404)
return _error_response("InvalidArgument", message, 400) return _error_response("InvalidArgument", message, 400)
current_app.logger.info(
"Object uploaded", content_md5 = request.headers.get("Content-MD5")
extra={"bucket": bucket_name, "key": object_key, "size": meta.size}, if content_md5 and meta.etag:
) try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
storage.delete_object(bucket_name, object_key)
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != meta.etag:
storage.delete_object(bucket_name, object_key)
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info(
"Object uploaded",
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
)
response = Response(status=200) response = Response(status=200)
if meta.etag: if meta.etag:
response.headers["ETag"] = f'"{meta.etag}"' response.headers["ETag"] = f'"{meta.etag}"'
@@ -2879,10 +2946,27 @@ def object_handler(bucket_name: str, object_key: str):
except StorageError as exc: except StorageError as exc:
return _error_response("NoSuchKey", str(exc), 404) return _error_response("NoSuchKey", str(exc), 404)
metadata = storage.get_object_metadata(bucket_name, object_key) metadata = storage.get_object_metadata(bucket_name, object_key)
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream" mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
cond_etag = metadata.get("__etag__")
if not cond_etag and not is_encrypted:
try:
cond_etag = storage._compute_etag(path)
except OSError:
cond_etag = None
if cond_etag:
cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
if cond_mtime is None:
try:
cond_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(cond_etag, cond_mtime)
if cond_resp:
return cond_resp
if request.method == "GET": if request.method == "GET":
range_header = request.headers.get("Range") range_header = request.headers.get("Range")
@@ -2971,10 +3055,7 @@ def object_handler(bucket_name: str, object_key: str):
response.headers["Content-Type"] = mimetype response.headers["Content-Type"] = mimetype
logged_bytes = 0 logged_bytes = 0
try: file_stat = stat if not is_encrypted else None
file_stat = path.stat() if not is_encrypted else None
except (PermissionError, OSError):
file_stat = None
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag) _apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
if request.method == "GET": if request.method == "GET":
@@ -2991,8 +3072,9 @@ def object_handler(bucket_name: str, object_key: str):
if value: if value:
response.headers[header] = _sanitize_header_value(value) response.headers[header] = _sanitize_header_value(value)
action = "Object read" if request.method == "GET" else "Object head" if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes}) action = "Object read" if request.method == "GET" else "Object head"
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
return response return response
if "uploadId" in request.args: if "uploadId" in request.args:
@@ -3010,7 +3092,8 @@ def object_handler(bucket_name: str, object_key: str):
storage.delete_object(bucket_name, object_key) storage.delete_object(bucket_name, object_key)
lock_service.delete_object_lock_metadata(bucket_name, object_key) lock_service.delete_object_lock_metadata(bucket_name, object_key)
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
principal, _ = _require_principal() principal, _ = _require_principal()
_notifications().emit_object_removed( _notifications().emit_object_removed(
@@ -3351,12 +3434,30 @@ def head_object(bucket_name: str, object_key: str) -> Response:
_authorize_action(principal, bucket_name, "read", object_key=object_key) _authorize_action(principal, bucket_name, "read", object_key=object_key)
path = _storage().get_object_path(bucket_name, object_key) path = _storage().get_object_path(bucket_name, object_key)
metadata = _storage().get_object_metadata(bucket_name, object_key) metadata = _storage().get_object_metadata(bucket_name, object_key)
stat = path.stat()
etag = metadata.get("__etag__") or _storage()._compute_etag(path) etag = metadata.get("__etag__") or _storage()._compute_etag(path)
response = Response(status=200) head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag) if head_mtime is None:
response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream" try:
head_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(etag, head_mtime)
if cond_resp:
return cond_resp
cached_size = metadata.get("__size__")
cached_mtime = metadata.get("__last_modified__")
if cached_size is not None and cached_mtime is not None:
size_val = int(cached_size)
mtime_val = float(cached_mtime)
response = Response(status=200)
_apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val)
else:
stat = path.stat()
response = Response(status=200)
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
return response return response
except (StorageError, FileNotFoundError): except (StorageError, FileNotFoundError):
return _error_response("NoSuchKey", "Object not found", 404) return _error_response("NoSuchKey", "Object not found", 404)
@@ -3445,8 +3546,8 @@ def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
if validation_error: if validation_error:
return _error_response("InvalidArgument", validation_error, 400) return _error_response("InvalidArgument", validation_error, 400)
else: else:
metadata = source_metadata metadata = {k: v for k, v in source_metadata.items() if not (k.startswith("__") and k.endswith("__"))}
try: try:
with source_path.open("rb") as stream: with source_path.open("rb") as stream:
meta = storage.put_object( meta = storage.put_object(
@@ -3586,10 +3687,12 @@ def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
return error return error
metadata = _extract_request_metadata() metadata = _extract_request_metadata()
content_type = request.headers.get("Content-Type")
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try: try:
upload_id = _storage().initiate_multipart_upload( upload_id = _storage().initiate_multipart_upload(
bucket_name, bucket_name,
object_key, object_key,
metadata=metadata or None metadata=metadata or None
) )
except StorageError as exc: except StorageError as exc:
@@ -3638,6 +3741,15 @@ def _upload_part(bucket_name: str, object_key: str) -> Response:
return _error_response("NoSuchUpload", str(exc), 404) return _error_response("NoSuchUpload", str(exc), 404)
return _error_response("InvalidArgument", str(exc), 400) return _error_response("InvalidArgument", str(exc), 400)
content_md5 = request.headers.get("Content-MD5")
if content_md5 and etag:
try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != etag:
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
response = Response(status=200) response = Response(status=200)
response.headers["ETag"] = f'"{etag}"' response.headers["ETag"] = f'"{etag}"'
return response return response

File diff suppressed because it is too large Load Diff

View File

@@ -1754,6 +1754,10 @@ def iam_dashboard():
users = iam_service.list_users() if not locked else [] users = iam_service.list_users() if not locked else []
config_summary = iam_service.config_summary() config_summary = iam_service.config_summary()
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2) config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
_now = _dt.now(_tz.utc)
now_iso = _now.isoformat()
soon_iso = (_now + _td(days=7)).isoformat()
return render_template( return render_template(
"iam.html", "iam.html",
users=users, users=users,
@@ -1763,6 +1767,8 @@ def iam_dashboard():
config_summary=config_summary, config_summary=config_summary,
config_document=config_document, config_document=config_document,
disclosed_secret=disclosed_secret, disclosed_secret=disclosed_secret,
now_iso=now_iso,
soon_iso=soon_iso,
) )
@@ -1782,6 +1788,8 @@ def create_iam_user():
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400 return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
flash("Display name must be 64 characters or fewer", "danger") flash("Display name must be 64 characters or fewer", "danger")
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
custom_access_key = request.form.get("access_key", "").strip() or None
custom_secret_key = request.form.get("secret_key", "").strip() or None
policies_text = request.form.get("policies", "").strip() policies_text = request.form.get("policies", "").strip()
policies = None policies = None
if policies_text: if policies_text:
@@ -1792,8 +1800,21 @@ def create_iam_user():
return jsonify({"error": f"Invalid JSON: {exc}"}), 400 return jsonify({"error": f"Invalid JSON: {exc}"}), 400
flash(f"Invalid JSON: {exc}", "danger") flash(f"Invalid JSON: {exc}", "danger")
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try: try:
created = _iam().create_user(display_name=display_name, policies=policies) created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
except IamError as exc: except IamError as exc:
if _wants_json(): if _wants_json():
return jsonify({"error": str(exc)}), 400 return jsonify({"error": str(exc)}), 400
@@ -1967,6 +1988,45 @@ def update_iam_policies(access_key: str):
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/iam/users/<access_key>/expiry")
def update_iam_expiry(access_key: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:update_policy")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 403
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try:
_iam().update_user_expiry(access_key, expires_at)
if _wants_json():
return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
label = expires_at if expires_at else "never"
flash(f"Expiry for {access_key} set to {label}", "success")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 400
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/connections") @ui_bp.post("/connections")
def create_connection(): def create_connection():
principal = _current_principal() principal = _current_principal()

View File

@@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
APP_VERSION = "0.3.1" APP_VERSION = "0.3.8"
def get_version() -> str: def get_version() -> str:

89
docs.md
View File

@@ -145,13 +145,15 @@ All configuration is done via environment variables. The table below lists every
| Variable | Default | Notes | | Variable | Default | Notes |
| --- | --- | --- | | --- | --- | --- |
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. | | `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. Encrypted at rest when `SECRET_KEY` is set. |
| `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). | | `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). |
| `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. | | `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. |
| `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. | | `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. |
| `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. | | `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. |
| `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). | | `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). |
| `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. | | `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. |
| `ADMIN_ACCESS_KEY` | (none) | Custom access key for the admin user on first run or credential reset. If unset, a random key is generated. |
| `ADMIN_SECRET_KEY` | (none) | Custom secret key for the admin user on first run or credential reset. If unset, a random key is generated. |
### CORS (Cross-Origin Resource Sharing) ### CORS (Cross-Origin Resource Sharing)
@@ -250,6 +252,60 @@ Once enabled, configure lifecycle rules via:
</LifecycleConfiguration> </LifecycleConfiguration>
``` ```
## Garbage Collection
The garbage collector (GC) automatically cleans up orphaned data that accumulates over time: stale temporary files from failed uploads, abandoned multipart uploads, stale lock files, orphaned metadata entries, orphaned version files, and empty directories.
### Enabling GC
By default, GC is disabled. Enable it by setting:
```bash
GC_ENABLED=true python run.py
```
Or in your `myfsio.env` file:
```
GC_ENABLED=true
GC_INTERVAL_HOURS=6 # Run every 6 hours (default)
GC_TEMP_FILE_MAX_AGE_HOURS=24 # Delete temp files older than 24h
GC_MULTIPART_MAX_AGE_DAYS=7 # Delete orphaned multipart uploads older than 7 days
GC_LOCK_FILE_MAX_AGE_HOURS=1 # Delete stale lock files older than 1h
GC_DRY_RUN=false # Set to true to log without deleting
```
### What Gets Cleaned
| Type | Location | Condition |
|------|----------|-----------|
| **Temp files** | `.myfsio.sys/tmp/` | Older than `GC_TEMP_FILE_MAX_AGE_HOURS` |
| **Orphaned multipart uploads** | `.myfsio.sys/multipart/` and `<bucket>/.multipart/` | Older than `GC_MULTIPART_MAX_AGE_DAYS` |
| **Stale lock files** | `.myfsio.sys/buckets/<bucket>/locks/` | Older than `GC_LOCK_FILE_MAX_AGE_HOURS` |
| **Orphaned metadata** | `.myfsio.sys/buckets/<bucket>/meta/` and `<bucket>/.meta/` | Object file no longer exists |
| **Orphaned versions** | `.myfsio.sys/buckets/<bucket>/versions/` and `<bucket>/.versions/` | Main object no longer exists |
| **Empty directories** | Various internal directories | Directory is empty after cleanup |
### Admin API
All GC endpoints require admin (`iam:*`) permissions.
| Method | Route | Description |
|--------|-------|-------------|
| `GET` | `/admin/gc/status` | Get GC status and configuration |
| `POST` | `/admin/gc/run` | Trigger a manual GC run (body: `{"dry_run": true}` for preview) |
| `GET` | `/admin/gc/history` | Get GC execution history (query: `?limit=50&offset=0`) |
### Dry Run Mode
Set `GC_DRY_RUN=true` to log what would be deleted without actually removing anything. You can also trigger a one-time dry run via the admin API:
```bash
curl -X POST "http://localhost:5000/admin/gc/run" \
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
-H "Content-Type: application/json" \
-d '{"dry_run": true}'
```
### Performance Tuning ### Performance Tuning
| Variable | Default | Notes | | Variable | Default | Notes |
@@ -277,13 +333,14 @@ API responses for JSON, XML, HTML, CSS, and JavaScript are automatically gzip-co
Before deploying to production, ensure you: Before deploying to production, ensure you:
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`) 1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`). This also enables IAM config encryption at rest.
2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*` 2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*`
3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies 3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies
4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination 4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs 5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption 6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server 7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server
8. **Set credential expiry** - Assign `expires_at` to non-admin users for time-limited access
### Proxy Configuration ### Proxy Configuration
@@ -633,9 +690,10 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
### Getting Started ### Getting Started
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext. 1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. You can set `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` environment variables to use custom credentials instead of random ones. If `SECRET_KEY` is configured, the IAM config file is encrypted at rest using AES (Fernet). To reset admin credentials later, run `python run.py --reset-cred`.
2. Sign into the UI using the generated credentials, then open **IAM**: 2. Sign into the UI using the generated credentials, then open **IAM**:
- **Create user**: supply a display name and optional JSON inline policy array. - **Create user**: supply a display name, optional JSON inline policy array, and optional credential expiry date.
- **Set expiry**: assign an expiration date to any user's credentials. Expired credentials are rejected at authentication time. The UI shows expiry badges and preset durations (1h, 24h, 7d, 30d, 90d).
- **Rotate secret**: generates a new secret key; the UI surfaces it once. - **Rotate secret**: generates a new secret key; the UI surfaces it once.
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
3. Wildcard action `iam:*` is supported for admin user definitions. 3. Wildcard action `iam:*` is supported for admin user definitions.
@@ -653,8 +711,11 @@ The API expects every request to include authentication headers. The UI persists
**Security Features:** **Security Features:**
- **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes). - **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes).
- **Credential Expiry**: Each user can have an optional `expires_at` timestamp (ISO 8601). Once expired, all API requests using those credentials are rejected. Set or clear expiry via the UI or API.
- **IAM Config Encryption**: When `SECRET_KEY` is set, the IAM config file (`iam.json`) is encrypted at rest using Fernet (AES-256-CBC with HMAC). Existing plaintext configs are automatically encrypted on next load.
- **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days). - **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days).
- **Hot Reload**: IAM configuration changes take effect immediately without restart. - **Hot Reload**: IAM configuration changes take effect immediately without restart.
- **Credential Reset**: Run `python run.py --reset-cred` to reset admin credentials. Supports `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` env vars for deterministic keys.
### Permission Model ### Permission Model
@@ -814,7 +875,8 @@ curl -X POST http://localhost:5000/iam/users \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \ -H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{ -d '{
"display_name": "New User", "display_name": "New User",
"policies": [{"bucket": "*", "actions": ["list", "read"]}] "policies": [{"bucket": "*", "actions": ["list", "read"]}],
"expires_at": "2026-12-31T23:59:59Z"
}' }'
# Rotate user secret (requires iam:rotate_key) # Rotate user secret (requires iam:rotate_key)
@@ -827,6 +889,18 @@ curl -X PUT http://localhost:5000/iam/users/<access-key>/policies \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \ -H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '[{"bucket": "*", "actions": ["list", "read", "write"]}]' -d '[{"bucket": "*", "actions": ["list", "read", "write"]}]'
# Update credential expiry (requires iam:update_policy)
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
-H "Content-Type: application/x-www-form-urlencoded" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d 'expires_at=2026-12-31T23:59:59Z'
# Remove credential expiry (never expires)
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
-H "Content-Type: application/x-www-form-urlencoded" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d 'expires_at='
# Delete a user (requires iam:delete_user) # Delete a user (requires iam:delete_user)
curl -X DELETE http://localhost:5000/iam/users/<access-key> \ curl -X DELETE http://localhost:5000/iam/users/<access-key> \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." -H "X-Access-Key: ..." -H "X-Secret-Key: ..."
@@ -838,8 +912,9 @@ When a request is made, permissions are evaluated in this order:
1. **Authentication** Verify the access key and secret key are valid 1. **Authentication** Verify the access key and secret key are valid
2. **Lockout Check** Ensure the account is not locked due to failed attempts 2. **Lockout Check** Ensure the account is not locked due to failed attempts
3. **IAM Policy Check** Verify the user has the required action for the target bucket 3. **Expiry Check** Reject requests if the user's credentials have expired (`expires_at`)
4. **Bucket Policy Check** If a bucket policy exists, verify it allows the action 4. **IAM Policy Check** Verify the user has the required action for the target bucket
5. **Bucket Policy Check** If a bucket policy exists, verify it allows the action
A request is allowed only if: A request is allowed only if:
- The IAM policy grants the action, AND - The IAM policy grants the action, AND

View File

@@ -19,3 +19,6 @@ regex = "1"
lru = "0.14" lru = "0.14"
parking_lot = "0.12" parking_lot = "0.12"
percent-encoding = "2" percent-encoding = "2"
aes-gcm = "0.10"
hkdf = "0.12"
uuid = { version = "1", features = ["v4"] }

192
myfsio_core/src/crypto.rs Normal file
View File

@@ -0,0 +1,192 @@
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
use hkdf::Hkdf;
use pyo3::exceptions::{PyIOError, PyValueError};
use pyo3::prelude::*;
use sha2::Sha256;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
const DEFAULT_CHUNK_SIZE: usize = 65536;
const HEADER_SIZE: usize = 4;
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
let mut filled = 0;
while filled < buf.len() {
match reader.read(&mut buf[filled..]) {
Ok(0) => break,
Ok(n) => filled += n,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
Ok(filled)
}
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
let mut okm = [0u8; 12];
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
.map_err(|e| format!("HKDF expand failed: {}", e))?;
Ok(okm)
}
#[pyfunction]
#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
pub fn encrypt_stream_chunked(
py: Python<'_>,
input_path: &str,
output_path: &str,
key: &[u8],
base_nonce: &[u8],
chunk_size: usize,
) -> PyResult<u32> {
if key.len() != 32 {
return Err(PyValueError::new_err(format!(
"Key must be 32 bytes, got {}",
key.len()
)));
}
if base_nonce.len() != 12 {
return Err(PyValueError::new_err(format!(
"Base nonce must be 12 bytes, got {}",
base_nonce.len()
)));
}
let chunk_size = if chunk_size == 0 {
DEFAULT_CHUNK_SIZE
} else {
chunk_size
};
let inp = input_path.to_owned();
let out = output_path.to_owned();
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
py.detach(move || {
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(&inp)
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
let mut outfile = File::create(&out)
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
outfile
.write_all(&[0u8; 4])
.map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
let mut buf = vec![0u8; chunk_size];
let mut chunk_index: u32 = 0;
loop {
let n = read_exact_chunk(&mut infile, &mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
if n == 0 {
break;
}
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
.map_err(|e| PyValueError::new_err(e))?;
let nonce = Nonce::from_slice(&nonce_bytes);
let encrypted = cipher
.encrypt(nonce, &buf[..n])
.map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
let size = encrypted.len() as u32;
outfile
.write_all(&size.to_be_bytes())
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
outfile
.write_all(&encrypted)
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
chunk_index += 1;
}
outfile
.seek(SeekFrom::Start(0))
.map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
outfile
.write_all(&chunk_index.to_be_bytes())
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
Ok(chunk_index)
})
}
#[pyfunction]
pub fn decrypt_stream_chunked(
py: Python<'_>,
input_path: &str,
output_path: &str,
key: &[u8],
base_nonce: &[u8],
) -> PyResult<u32> {
if key.len() != 32 {
return Err(PyValueError::new_err(format!(
"Key must be 32 bytes, got {}",
key.len()
)));
}
if base_nonce.len() != 12 {
return Err(PyValueError::new_err(format!(
"Base nonce must be 12 bytes, got {}",
base_nonce.len()
)));
}
let inp = input_path.to_owned();
let out = output_path.to_owned();
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
py.detach(move || {
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(&inp)
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
let mut outfile = File::create(&out)
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
let mut header = [0u8; HEADER_SIZE];
infile
.read_exact(&mut header)
.map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
let chunk_count = u32::from_be_bytes(header);
let mut size_buf = [0u8; HEADER_SIZE];
for chunk_index in 0..chunk_count {
infile
.read_exact(&mut size_buf)
.map_err(|e| {
PyIOError::new_err(format!(
"Failed to read chunk {} size: {}",
chunk_index, e
))
})?;
let chunk_size = u32::from_be_bytes(size_buf) as usize;
let mut encrypted = vec![0u8; chunk_size];
infile.read_exact(&mut encrypted).map_err(|e| {
PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
})?;
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
.map_err(|e| PyValueError::new_err(e))?;
let nonce = Nonce::from_slice(&nonce_bytes);
let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
})?;
outfile.write_all(&decrypted).map_err(|e| {
PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
})?;
}
Ok(chunk_count)
})
}

View File

@@ -1,6 +1,9 @@
mod crypto;
mod hashing; mod hashing;
mod metadata; mod metadata;
mod sigv4; mod sigv4;
mod storage;
mod streaming;
mod validation; mod validation;
use pyo3::prelude::*; use pyo3::prelude::*;
@@ -29,6 +32,20 @@ mod myfsio_core {
m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?; m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?;
m.add_function(wrap_pyfunction!(storage::write_index_entry, m)?)?;
m.add_function(wrap_pyfunction!(storage::delete_index_entry, m)?)?;
m.add_function(wrap_pyfunction!(storage::check_bucket_contents, m)?)?;
m.add_function(wrap_pyfunction!(storage::shallow_scan, m)?)?;
m.add_function(wrap_pyfunction!(storage::bucket_stats_scan, m)?)?;
m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
Ok(()) Ok(())
} }
} }

817
myfsio_core/src/storage.rs Normal file
View File

@@ -0,0 +1,817 @@
use pyo3::exceptions::PyIOError;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyString, PyTuple};
use serde_json::Value;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::time::SystemTime;
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
fn system_time_to_epoch(t: SystemTime) -> f64 {
t.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs_f64())
.unwrap_or(0.0)
}
fn extract_etag_from_meta_bytes(content: &[u8]) -> Option<String> {
let marker = b"\"__etag__\"";
let idx = content.windows(marker.len()).position(|w| w == marker)?;
let after = &content[idx + marker.len()..];
let start = after.iter().position(|&b| b == b'"')? + 1;
let rest = &after[start..];
let end = rest.iter().position(|&b| b == b'"')?;
std::str::from_utf8(&rest[..end]).ok().map(|s| s.to_owned())
}
fn has_any_file(root: &str) -> bool {
let root_path = Path::new(root);
if !root_path.is_dir() {
return false;
}
let mut stack = vec![root_path.to_path_buf()];
while let Some(current) = stack.pop() {
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_file() {
return true;
}
if ft.is_dir() && !ft.is_symlink() {
stack.push(entry.path());
}
}
}
false
}
#[pyfunction]
pub fn write_index_entry(
py: Python<'_>,
path: &str,
entry_name: &str,
entry_data_json: &str,
) -> PyResult<()> {
let path_owned = path.to_owned();
let entry_owned = entry_name.to_owned();
let data_owned = entry_data_json.to_owned();
py.detach(move || -> PyResult<()> {
let entry_value: Value = serde_json::from_str(&data_owned)
.map_err(|e| PyIOError::new_err(format!("Failed to parse entry data: {}", e)))?;
if let Some(parent) = Path::new(&path_owned).parent() {
let _ = fs::create_dir_all(parent);
}
let mut index_data: serde_json::Map<String, Value> = match fs::read_to_string(&path_owned)
{
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
Err(_) => serde_json::Map::new(),
};
index_data.insert(entry_owned, entry_value);
let serialized = serde_json::to_string(&Value::Object(index_data))
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
fs::write(&path_owned, serialized)
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
Ok(())
})
}
#[pyfunction]
pub fn delete_index_entry(py: Python<'_>, path: &str, entry_name: &str) -> PyResult<bool> {
let path_owned = path.to_owned();
let entry_owned = entry_name.to_owned();
py.detach(move || -> PyResult<bool> {
let content = match fs::read_to_string(&path_owned) {
Ok(c) => c,
Err(_) => return Ok(false),
};
let mut index_data: serde_json::Map<String, Value> =
match serde_json::from_str(&content) {
Ok(v) => v,
Err(_) => return Ok(false),
};
if index_data.remove(&entry_owned).is_none() {
return Ok(false);
}
if index_data.is_empty() {
let _ = fs::remove_file(&path_owned);
return Ok(true);
}
let serialized = serde_json::to_string(&Value::Object(index_data))
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
fs::write(&path_owned, serialized)
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
Ok(false)
})
}
#[pyfunction]
pub fn check_bucket_contents(
py: Python<'_>,
bucket_path: &str,
version_roots: Vec<String>,
multipart_roots: Vec<String>,
) -> PyResult<(bool, bool, bool)> {
let bucket_owned = bucket_path.to_owned();
py.detach(move || -> PyResult<(bool, bool, bool)> {
let mut has_objects = false;
let bucket_p = Path::new(&bucket_owned);
if bucket_p.is_dir() {
let mut stack = vec![bucket_p.to_path_buf()];
'obj_scan: while let Some(current) = stack.pop() {
let is_root = current == bucket_p;
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if is_root {
if let Some(name) = entry.file_name().to_str() {
if INTERNAL_FOLDERS.contains(&name) {
continue;
}
}
}
if ft.is_file() && !ft.is_symlink() {
has_objects = true;
break 'obj_scan;
}
if ft.is_dir() && !ft.is_symlink() {
stack.push(entry.path());
}
}
}
}
let mut has_versions = false;
for root in &version_roots {
if has_versions {
break;
}
has_versions = has_any_file(root);
}
let mut has_multipart = false;
for root in &multipart_roots {
if has_multipart {
break;
}
has_multipart = has_any_file(root);
}
Ok((has_objects, has_versions, has_multipart))
})
}
#[pyfunction]
pub fn shallow_scan(
py: Python<'_>,
target_dir: &str,
prefix: &str,
meta_cache_json: &str,
) -> PyResult<Py<PyAny>> {
let target_owned = target_dir.to_owned();
let prefix_owned = prefix.to_owned();
let cache_owned = meta_cache_json.to_owned();
let result: (
Vec<(String, u64, f64, Option<String>)>,
Vec<String>,
Vec<(String, bool)>,
) = py.detach(move || -> PyResult<(
Vec<(String, u64, f64, Option<String>)>,
Vec<String>,
Vec<(String, bool)>,
)> {
let meta_cache: HashMap<String, String> =
serde_json::from_str(&cache_owned).unwrap_or_default();
let mut files: Vec<(String, u64, f64, Option<String>)> = Vec::new();
let mut dirs: Vec<String> = Vec::new();
let entries = match fs::read_dir(&target_owned) {
Ok(e) => e,
Err(_) => return Ok((files, dirs, Vec::new())),
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let name = match entry.file_name().into_string() {
Ok(n) => n,
Err(_) => continue,
};
if INTERNAL_FOLDERS.contains(&name.as_str()) {
continue;
}
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
let cp = format!("{}{}/", prefix_owned, name);
dirs.push(cp);
} else if ft.is_file() && !ft.is_symlink() {
let key = format!("{}{}", prefix_owned, name);
let md = match entry.metadata() {
Ok(m) => m,
Err(_) => continue,
};
let size = md.len();
let mtime = md
.modified()
.map(system_time_to_epoch)
.unwrap_or(0.0);
let etag = meta_cache.get(&key).cloned();
files.push((key, size, mtime, etag));
}
}
files.sort_by(|a, b| a.0.cmp(&b.0));
dirs.sort();
let mut merged: Vec<(String, bool)> = Vec::with_capacity(files.len() + dirs.len());
let mut fi = 0;
let mut di = 0;
while fi < files.len() && di < dirs.len() {
if files[fi].0 <= dirs[di] {
merged.push((files[fi].0.clone(), false));
fi += 1;
} else {
merged.push((dirs[di].clone(), true));
di += 1;
}
}
while fi < files.len() {
merged.push((files[fi].0.clone(), false));
fi += 1;
}
while di < dirs.len() {
merged.push((dirs[di].clone(), true));
di += 1;
}
Ok((files, dirs, merged))
})?;
let (files, dirs, merged) = result;
let dict = PyDict::new(py);
let files_list = PyList::empty(py);
for (key, size, mtime, etag) in &files {
let etag_py: Py<PyAny> = match etag {
Some(e) => PyString::new(py, e).into_any().unbind(),
None => py.None(),
};
let tuple = PyTuple::new(py, &[
PyString::new(py, key).into_any().unbind(),
size.into_pyobject(py)?.into_any().unbind(),
mtime.into_pyobject(py)?.into_any().unbind(),
etag_py,
])?;
files_list.append(tuple)?;
}
dict.set_item("files", files_list)?;
let dirs_list = PyList::empty(py);
for d in &dirs {
dirs_list.append(PyString::new(py, d))?;
}
dict.set_item("dirs", dirs_list)?;
let merged_list = PyList::empty(py);
for (key, is_dir) in &merged {
let bool_obj: Py<PyAny> = if *is_dir {
true.into_pyobject(py)?.to_owned().into_any().unbind()
} else {
false.into_pyobject(py)?.to_owned().into_any().unbind()
};
let tuple = PyTuple::new(py, &[
PyString::new(py, key).into_any().unbind(),
bool_obj,
])?;
merged_list.append(tuple)?;
}
dict.set_item("merged_keys", merged_list)?;
Ok(dict.into_any().unbind())
}
#[pyfunction]
pub fn bucket_stats_scan(
py: Python<'_>,
bucket_path: &str,
versions_root: &str,
) -> PyResult<(u64, u64, u64, u64)> {
let bucket_owned = bucket_path.to_owned();
let versions_owned = versions_root.to_owned();
py.detach(move || -> PyResult<(u64, u64, u64, u64)> {
let mut object_count: u64 = 0;
let mut total_bytes: u64 = 0;
let bucket_p = Path::new(&bucket_owned);
if bucket_p.is_dir() {
let mut stack = vec![bucket_p.to_path_buf()];
while let Some(current) = stack.pop() {
let is_root = current == bucket_p;
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
if is_root {
if let Some(name) = entry.file_name().to_str() {
if INTERNAL_FOLDERS.contains(&name) {
continue;
}
}
}
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
stack.push(entry.path());
} else if ft.is_file() && !ft.is_symlink() {
object_count += 1;
if let Ok(md) = entry.metadata() {
total_bytes += md.len();
}
}
}
}
}
let mut version_count: u64 = 0;
let mut version_bytes: u64 = 0;
let versions_p = Path::new(&versions_owned);
if versions_p.is_dir() {
let mut stack = vec![versions_p.to_path_buf()];
while let Some(current) = stack.pop() {
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
stack.push(entry.path());
} else if ft.is_file() && !ft.is_symlink() {
if let Some(name) = entry.file_name().to_str() {
if name.ends_with(".bin") {
version_count += 1;
if let Ok(md) = entry.metadata() {
version_bytes += md.len();
}
}
}
}
}
}
}
Ok((object_count, total_bytes, version_count, version_bytes))
})
}
#[pyfunction]
#[pyo3(signature = (bucket_path, search_root, query, limit))]
pub fn search_objects_scan(
py: Python<'_>,
bucket_path: &str,
search_root: &str,
query: &str,
limit: usize,
) -> PyResult<Py<PyAny>> {
let bucket_owned = bucket_path.to_owned();
let search_owned = search_root.to_owned();
let query_owned = query.to_owned();
let result: (Vec<(String, u64, f64)>, bool) = py.detach(
move || -> PyResult<(Vec<(String, u64, f64)>, bool)> {
let query_lower = query_owned.to_lowercase();
let bucket_len = bucket_owned.len() + 1;
let scan_limit = limit * 4;
let mut matched: usize = 0;
let mut results: Vec<(String, u64, f64)> = Vec::new();
let search_p = Path::new(&search_owned);
if !search_p.is_dir() {
return Ok((results, false));
}
let bucket_p = Path::new(&bucket_owned);
let mut stack = vec![search_p.to_path_buf()];
'scan: while let Some(current) = stack.pop() {
let is_bucket_root = current == bucket_p;
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
if is_bucket_root {
if let Some(name) = entry.file_name().to_str() {
if INTERNAL_FOLDERS.contains(&name) {
continue;
}
}
}
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
stack.push(entry.path());
} else if ft.is_file() && !ft.is_symlink() {
let full_path = entry.path();
let full_str = full_path.to_string_lossy();
if full_str.len() <= bucket_len {
continue;
}
let key = full_str[bucket_len..].replace('\\', "/");
if key.to_lowercase().contains(&query_lower) {
if let Ok(md) = entry.metadata() {
let size = md.len();
let mtime = md
.modified()
.map(system_time_to_epoch)
.unwrap_or(0.0);
results.push((key, size, mtime));
matched += 1;
}
}
if matched >= scan_limit {
break 'scan;
}
}
}
}
results.sort_by(|a, b| a.0.cmp(&b.0));
let truncated = results.len() > limit;
results.truncate(limit);
Ok((results, truncated))
},
)?;
let (results, truncated) = result;
let dict = PyDict::new(py);
let results_list = PyList::empty(py);
for (key, size, mtime) in &results {
let tuple = PyTuple::new(py, &[
PyString::new(py, key).into_any().unbind(),
size.into_pyobject(py)?.into_any().unbind(),
mtime.into_pyobject(py)?.into_any().unbind(),
])?;
results_list.append(tuple)?;
}
dict.set_item("results", results_list)?;
dict.set_item("truncated", truncated)?;
Ok(dict.into_any().unbind())
}
#[pyfunction]
pub fn build_object_cache(
py: Python<'_>,
bucket_path: &str,
meta_root: &str,
etag_index_path: &str,
) -> PyResult<Py<PyAny>> {
let bucket_owned = bucket_path.to_owned();
let meta_owned = meta_root.to_owned();
let index_path_owned = etag_index_path.to_owned();
let result: (HashMap<String, String>, Vec<(String, u64, f64, Option<String>)>, bool) =
py.detach(move || -> PyResult<(
HashMap<String, String>,
Vec<(String, u64, f64, Option<String>)>,
bool,
)> {
let mut meta_cache: HashMap<String, String> = HashMap::new();
let mut index_mtime: f64 = 0.0;
let mut etag_cache_changed = false;
let index_p = Path::new(&index_path_owned);
if index_p.is_file() {
if let Ok(md) = fs::metadata(&index_path_owned) {
index_mtime = md
.modified()
.map(system_time_to_epoch)
.unwrap_or(0.0);
}
if let Ok(content) = fs::read_to_string(&index_path_owned) {
if let Ok(parsed) = serde_json::from_str::<HashMap<String, String>>(&content) {
meta_cache = parsed;
}
}
}
let meta_p = Path::new(&meta_owned);
let mut needs_rebuild = false;
if meta_p.is_dir() && index_mtime > 0.0 {
fn check_newer(dir: &Path, index_mtime: f64) -> bool {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return false,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
if check_newer(&entry.path(), index_mtime) {
return true;
}
} else if ft.is_file() {
if let Some(name) = entry.file_name().to_str() {
if name.ends_with(".meta.json") || name == "_index.json" {
if let Ok(md) = entry.metadata() {
let mt = md
.modified()
.map(system_time_to_epoch)
.unwrap_or(0.0);
if mt > index_mtime {
return true;
}
}
}
}
}
}
false
}
needs_rebuild = check_newer(meta_p, index_mtime);
} else if meta_cache.is_empty() {
needs_rebuild = true;
}
if needs_rebuild && meta_p.is_dir() {
let meta_str = meta_owned.clone();
let meta_len = meta_str.len() + 1;
let mut index_files: Vec<String> = Vec::new();
let mut legacy_meta_files: Vec<(String, String)> = Vec::new();
fn collect_meta(
dir: &Path,
meta_len: usize,
index_files: &mut Vec<String>,
legacy_meta_files: &mut Vec<(String, String)>,
) {
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
collect_meta(&entry.path(), meta_len, index_files, legacy_meta_files);
} else if ft.is_file() {
if let Some(name) = entry.file_name().to_str() {
let full = entry.path().to_string_lossy().to_string();
if name == "_index.json" {
index_files.push(full);
} else if name.ends_with(".meta.json") {
if full.len() > meta_len {
let rel = &full[meta_len..];
let key = if rel.len() > 10 {
rel[..rel.len() - 10].replace('\\', "/")
} else {
continue;
};
legacy_meta_files.push((key, full));
}
}
}
}
}
}
collect_meta(
meta_p,
meta_len,
&mut index_files,
&mut legacy_meta_files,
);
meta_cache.clear();
for idx_path in &index_files {
if let Ok(content) = fs::read_to_string(idx_path) {
if let Ok(idx_data) = serde_json::from_str::<HashMap<String, Value>>(&content) {
let rel_dir = if idx_path.len() > meta_len {
let r = &idx_path[meta_len..];
r.replace('\\', "/")
} else {
String::new()
};
let dir_prefix = if rel_dir.ends_with("/_index.json") {
&rel_dir[..rel_dir.len() - "/_index.json".len()]
} else {
""
};
for (entry_name, entry_data) in &idx_data {
let key = if dir_prefix.is_empty() {
entry_name.clone()
} else {
format!("{}/{}", dir_prefix, entry_name)
};
if let Some(meta_obj) = entry_data.get("metadata") {
if let Some(etag) = meta_obj.get("__etag__") {
if let Some(etag_str) = etag.as_str() {
meta_cache.insert(key, etag_str.to_owned());
}
}
}
}
}
}
}
for (key, path) in &legacy_meta_files {
if meta_cache.contains_key(key) {
continue;
}
if let Ok(content) = fs::read(path) {
if let Some(etag) = extract_etag_from_meta_bytes(&content) {
meta_cache.insert(key.clone(), etag);
}
}
}
etag_cache_changed = true;
}
let bucket_p = Path::new(&bucket_owned);
let bucket_len = bucket_owned.len() + 1;
let mut objects: Vec<(String, u64, f64, Option<String>)> = Vec::new();
if bucket_p.is_dir() {
let mut stack = vec![bucket_p.to_path_buf()];
while let Some(current) = stack.pop() {
let entries = match fs::read_dir(&current) {
Ok(e) => e,
Err(_) => continue,
};
for entry_result in entries {
let entry = match entry_result {
Ok(e) => e,
Err(_) => continue,
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() && !ft.is_symlink() {
let full = entry.path();
let full_str = full.to_string_lossy();
if full_str.len() > bucket_len {
let first_part: &str = if let Some(sep_pos) =
full_str[bucket_len..].find(|c: char| c == '\\' || c == '/')
{
&full_str[bucket_len..bucket_len + sep_pos]
} else {
&full_str[bucket_len..]
};
if INTERNAL_FOLDERS.contains(&first_part) {
continue;
}
} else if let Some(name) = entry.file_name().to_str() {
if INTERNAL_FOLDERS.contains(&name) {
continue;
}
}
stack.push(full);
} else if ft.is_file() && !ft.is_symlink() {
let full = entry.path();
let full_str = full.to_string_lossy();
if full_str.len() <= bucket_len {
continue;
}
let rel = &full_str[bucket_len..];
let first_part: &str =
if let Some(sep_pos) = rel.find(|c: char| c == '\\' || c == '/') {
&rel[..sep_pos]
} else {
rel
};
if INTERNAL_FOLDERS.contains(&first_part) {
continue;
}
let key = rel.replace('\\', "/");
if let Ok(md) = entry.metadata() {
let size = md.len();
let mtime = md
.modified()
.map(system_time_to_epoch)
.unwrap_or(0.0);
let etag = meta_cache.get(&key).cloned();
objects.push((key, size, mtime, etag));
}
}
}
}
}
Ok((meta_cache, objects, etag_cache_changed))
})?;
let (meta_cache, objects, etag_cache_changed) = result;
let dict = PyDict::new(py);
let cache_dict = PyDict::new(py);
for (k, v) in &meta_cache {
cache_dict.set_item(k, v)?;
}
dict.set_item("etag_cache", cache_dict)?;
let objects_list = PyList::empty(py);
for (key, size, mtime, etag) in &objects {
let etag_py: Py<PyAny> = match etag {
Some(e) => PyString::new(py, e).into_any().unbind(),
None => py.None(),
};
let tuple = PyTuple::new(py, &[
PyString::new(py, key).into_any().unbind(),
size.into_pyobject(py)?.into_any().unbind(),
mtime.into_pyobject(py)?.into_any().unbind(),
etag_py,
])?;
objects_list.append(tuple)?;
}
dict.set_item("objects", objects_list)?;
dict.set_item("etag_cache_changed", etag_cache_changed)?;
Ok(dict.into_any().unbind())
}

View File

@@ -0,0 +1,112 @@
use md5::{Digest, Md5};
use pyo3::exceptions::{PyIOError, PyValueError};
use pyo3::prelude::*;
use std::fs::{self, File};
use std::io::{Read, Write};
use uuid::Uuid;
const DEFAULT_CHUNK_SIZE: usize = 262144;
#[pyfunction]
#[pyo3(signature = (stream, tmp_dir, chunk_size=DEFAULT_CHUNK_SIZE))]
pub fn stream_to_file_with_md5(
py: Python<'_>,
stream: &Bound<'_, PyAny>,
tmp_dir: &str,
chunk_size: usize,
) -> PyResult<(String, String, u64)> {
let chunk_size = if chunk_size == 0 {
DEFAULT_CHUNK_SIZE
} else {
chunk_size
};
fs::create_dir_all(tmp_dir)
.map_err(|e| PyIOError::new_err(format!("Failed to create tmp dir: {}", e)))?;
let tmp_name = format!("{}.tmp", Uuid::new_v4().as_hyphenated());
let tmp_path_buf = std::path::PathBuf::from(tmp_dir).join(&tmp_name);
let tmp_path = tmp_path_buf.to_string_lossy().into_owned();
let mut file = File::create(&tmp_path)
.map_err(|e| PyIOError::new_err(format!("Failed to create temp file: {}", e)))?;
let mut hasher = Md5::new();
let mut total_bytes: u64 = 0;
let result: PyResult<()> = (|| {
loop {
let chunk: Vec<u8> = stream.call_method1("read", (chunk_size,))?.extract()?;
if chunk.is_empty() {
break;
}
hasher.update(&chunk);
file.write_all(&chunk)
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
total_bytes += chunk.len() as u64;
py.check_signals()?;
}
file.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(())
})();
if let Err(e) = result {
drop(file);
let _ = fs::remove_file(&tmp_path);
return Err(e);
}
drop(file);
let md5_hex = format!("{:x}", hasher.finalize());
Ok((tmp_path, md5_hex, total_bytes))
}
#[pyfunction]
pub fn assemble_parts_with_md5(
py: Python<'_>,
part_paths: Vec<String>,
dest_path: &str,
) -> PyResult<String> {
if part_paths.is_empty() {
return Err(PyValueError::new_err("No parts to assemble"));
}
let dest = dest_path.to_owned();
let parts = part_paths;
py.detach(move || {
if let Some(parent) = std::path::Path::new(&dest).parent() {
fs::create_dir_all(parent)
.map_err(|e| PyIOError::new_err(format!("Failed to create dest dir: {}", e)))?;
}
let mut target = File::create(&dest)
.map_err(|e| PyIOError::new_err(format!("Failed to create dest file: {}", e)))?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; 1024 * 1024];
for part_path in &parts {
let mut part = File::open(part_path)
.map_err(|e| PyIOError::new_err(format!("Failed to open part {}: {}", part_path, e)))?;
loop {
let n = part
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read part: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
target
.write_all(&buf[..n])
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
}
}
target.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(format!("{:x}", hasher.finalize()))
})
}

103
run.py
View File

@@ -23,6 +23,7 @@ from typing import Optional
from app import create_api_app, create_ui_app from app import create_api_app, create_ui_app
from app.config import AppConfig from app.config import AppConfig
from app.iam import IamService, IamError, ALLOWED_ACTIONS, _derive_fernet_key
def _server_host() -> str: def _server_host() -> str:
@@ -87,21 +88,121 @@ def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None)
app.run(host=_server_host(), port=port, debug=debug) app.run(host=_server_host(), port=port, debug=debug)
def reset_credentials() -> None:
import json
import secrets
from cryptography.fernet import Fernet
config = AppConfig.from_env()
iam_path = config.iam_config_path
encryption_key = config.secret_key
access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
fernet = Fernet(_derive_fernet_key(encryption_key)) if encryption_key else None
raw_config = None
if iam_path.exists():
try:
raw_bytes = iam_path.read_bytes()
from app.iam import _IAM_ENCRYPTED_PREFIX
if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
if fernet:
try:
content = fernet.decrypt(raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]).decode("utf-8")
raw_config = json.loads(content)
except Exception:
print("WARNING: Could not decrypt existing IAM config. Creating fresh config.")
else:
print("WARNING: IAM config is encrypted but no SECRET_KEY available. Creating fresh config.")
else:
try:
raw_config = json.loads(raw_bytes.decode("utf-8"))
except json.JSONDecodeError:
print("WARNING: Existing IAM config is corrupted. Creating fresh config.")
except OSError:
pass
if raw_config and raw_config.get("users"):
admin_user = None
for user in raw_config["users"]:
policies = user.get("policies", [])
for p in policies:
actions = p.get("actions", [])
if "iam:*" in actions or "*" in actions:
admin_user = user
break
if admin_user:
break
if not admin_user:
admin_user = raw_config["users"][0]
admin_user["access_key"] = access_key
admin_user["secret_key"] = secret_key
else:
raw_config = {
"users": [
{
"access_key": access_key,
"secret_key": secret_key,
"display_name": "Local Admin",
"policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
],
}
]
}
json_text = json.dumps(raw_config, indent=2)
iam_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = iam_path.with_suffix(".json.tmp")
if fernet:
from app.iam import _IAM_ENCRYPTED_PREFIX
encrypted = fernet.encrypt(json_text.encode("utf-8"))
temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
temp_path.write_text(json_text, encoding="utf-8")
temp_path.replace(iam_path)
print(f"\n{'='*60}")
print("MYFSIO - ADMIN CREDENTIALS RESET")
print(f"{'='*60}")
if custom_keys:
print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
else:
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
if fernet:
print("IAM config saved (encrypted).")
else:
print(f"IAM config saved to: {iam_path}")
print(f"{'='*60}\n")
if __name__ == "__main__": if __name__ == "__main__":
multiprocessing.freeze_support() multiprocessing.freeze_support()
if _is_frozen(): if _is_frozen():
multiprocessing.set_start_method("spawn", force=True) multiprocessing.set_start_method("spawn", force=True)
parser = argparse.ArgumentParser(description="Run the S3 clone services.") parser = argparse.ArgumentParser(description="Run the S3 clone services.")
parser.add_argument("--mode", choices=["api", "ui", "both"], default="both") parser.add_argument("--mode", choices=["api", "ui", "both", "reset-cred"], default="both")
parser.add_argument("--api-port", type=int, default=5000) parser.add_argument("--api-port", type=int, default=5000)
parser.add_argument("--ui-port", type=int, default=5100) parser.add_argument("--ui-port", type=int, default=5100)
parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress") parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress")
parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)") parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)")
parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit") parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit")
parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit") parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
parser.add_argument("--reset-cred", action="store_true", help="Reset admin credentials and exit")
args = parser.parse_args() args = parser.parse_args()
if args.reset_cred or args.mode == "reset-cred":
reset_credentials()
sys.exit(0)
if args.check_config or args.show_config: if args.check_config or args.show_config:
config = AppConfig.from_env() config = AppConfig.from_env()
config.print_startup_summary() config.print_startup_summary()

View File

@@ -1154,39 +1154,20 @@ html.sidebar-will-collapse .sidebar-user {
position: relative; position: relative;
border: 1px solid var(--myfsio-card-border) !important; border: 1px solid var(--myfsio-card-border) !important;
border-radius: 1rem !important; border-radius: 1rem !important;
overflow: hidden; overflow: visible;
transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1); transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
} }
.iam-user-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
opacity: 0;
transition: opacity 0.2s ease;
}
.iam-user-card:hover { .iam-user-card:hover {
transform: translateY(-2px); transform: translateY(-2px);
box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.12), 0 4px 8px -4px rgba(0, 0, 0, 0.08); box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.12), 0 4px 8px -4px rgba(0, 0, 0, 0.08);
border-color: var(--myfsio-accent) !important; border-color: var(--myfsio-accent) !important;
} }
.iam-user-card:hover::before {
opacity: 1;
}
[data-theme='dark'] .iam-user-card:hover { [data-theme='dark'] .iam-user-card:hover {
box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.4), 0 4px 8px -4px rgba(0, 0, 0, 0.3); box-shadow: 0 8px 24px -4px rgba(0, 0, 0, 0.4), 0 4px 8px -4px rgba(0, 0, 0, 0.3);
} }
.iam-admin-card::before {
background: linear-gradient(90deg, #f59e0b, #ef4444);
}
.iam-role-badge { .iam-role-badge {
display: inline-flex; display: inline-flex;

View File

@@ -321,7 +321,7 @@
`; `;
}; };
const bucketTotalObjects = objectsContainer ? parseInt(objectsContainer.dataset.bucketTotalObjects || '0', 10) : 0; let bucketTotalObjects = objectsContainer ? parseInt(objectsContainer.dataset.bucketTotalObjects || '0', 10) : 0;
const updateObjectCountBadge = () => { const updateObjectCountBadge = () => {
if (!objectCountBadge) return; if (!objectCountBadge) return;
@@ -702,6 +702,7 @@
flushPendingStreamObjects(); flushPendingStreamObjects();
hasMoreObjects = false; hasMoreObjects = false;
totalObjectCount = loadedObjectCount; totalObjectCount = loadedObjectCount;
if (!currentPrefix && !useDelimiterMode) bucketTotalObjects = totalObjectCount;
updateObjectCountBadge(); updateObjectCountBadge();
if (objectsLoadingRow && objectsLoadingRow.parentNode) { if (objectsLoadingRow && objectsLoadingRow.parentNode) {
@@ -766,6 +767,7 @@
} }
totalObjectCount = data.total_count || 0; totalObjectCount = data.total_count || 0;
if (!append && !currentPrefix && !useDelimiterMode) bucketTotalObjects = totalObjectCount;
nextContinuationToken = data.next_continuation_token; nextContinuationToken = data.next_continuation_token;
if (!append && objectsLoadingRow) { if (!append && objectsLoadingRow) {

View File

@@ -11,9 +11,11 @@ window.IAMManagement = (function() {
var editUserModal = null; var editUserModal = null;
var deleteUserModal = null; var deleteUserModal = null;
var rotateSecretModal = null; var rotateSecretModal = null;
var expiryModal = null;
var currentRotateKey = null; var currentRotateKey = null;
var currentEditKey = null; var currentEditKey = null;
var currentDeleteKey = null; var currentDeleteKey = null;
var currentExpiryKey = null;
var ALL_S3_ACTIONS = ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'lifecycle', 'cors']; var ALL_S3_ACTIONS = ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'lifecycle', 'cors'];
@@ -65,6 +67,7 @@ window.IAMManagement = (function() {
setupEditUserModal(); setupEditUserModal();
setupDeleteUserModal(); setupDeleteUserModal();
setupRotateSecretModal(); setupRotateSecretModal();
setupExpiryModal();
setupFormHandlers(); setupFormHandlers();
setupSearch(); setupSearch();
setupCopyAccessKeyButtons(); setupCopyAccessKeyButtons();
@@ -75,11 +78,13 @@ window.IAMManagement = (function() {
var editModalEl = document.getElementById('editUserModal'); var editModalEl = document.getElementById('editUserModal');
var deleteModalEl = document.getElementById('deleteUserModal'); var deleteModalEl = document.getElementById('deleteUserModal');
var rotateModalEl = document.getElementById('rotateSecretModal'); var rotateModalEl = document.getElementById('rotateSecretModal');
var expiryModalEl = document.getElementById('expiryModal');
if (policyModalEl) policyModal = new bootstrap.Modal(policyModalEl); if (policyModalEl) policyModal = new bootstrap.Modal(policyModalEl);
if (editModalEl) editUserModal = new bootstrap.Modal(editModalEl); if (editModalEl) editUserModal = new bootstrap.Modal(editModalEl);
if (deleteModalEl) deleteUserModal = new bootstrap.Modal(deleteModalEl); if (deleteModalEl) deleteUserModal = new bootstrap.Modal(deleteModalEl);
if (rotateModalEl) rotateSecretModal = new bootstrap.Modal(rotateModalEl); if (rotateModalEl) rotateSecretModal = new bootstrap.Modal(rotateModalEl);
if (expiryModalEl) expiryModal = new bootstrap.Modal(expiryModalEl);
} }
function setupJsonAutoIndent() { function setupJsonAutoIndent() {
@@ -97,6 +102,15 @@ window.IAMManagement = (function() {
}); });
}); });
var accessKeyCopyButton = document.querySelector('[data-access-key-copy]');
if (accessKeyCopyButton) {
accessKeyCopyButton.addEventListener('click', async function() {
var accessKeyInput = document.getElementById('disclosedAccessKeyValue');
if (!accessKeyInput) return;
await window.UICore.copyToClipboard(accessKeyInput.value, accessKeyCopyButton, 'Copy');
});
}
var secretCopyButton = document.querySelector('[data-secret-copy]'); var secretCopyButton = document.querySelector('[data-secret-copy]');
if (secretCopyButton) { if (secretCopyButton) {
secretCopyButton.addEventListener('click', async function() { secretCopyButton.addEventListener('click', async function() {
@@ -143,6 +157,22 @@ window.IAMManagement = (function() {
}); });
} }
function generateSecureHex(byteCount) {
var arr = new Uint8Array(byteCount);
crypto.getRandomValues(arr);
return Array.from(arr).map(function(b) { return b.toString(16).padStart(2, '0'); }).join('');
}
function generateSecureBase64(byteCount) {
var arr = new Uint8Array(byteCount);
crypto.getRandomValues(arr);
var binary = '';
for (var i = 0; i < arr.length; i++) {
binary += String.fromCharCode(arr[i]);
}
return btoa(binary).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
}
function setupCreateUserModal() { function setupCreateUserModal() {
var createUserPoliciesEl = document.getElementById('createUserPolicies'); var createUserPoliciesEl = document.getElementById('createUserPolicies');
@@ -151,6 +181,22 @@ window.IAMManagement = (function() {
applyPolicyTemplate(button.dataset.createPolicyTemplate, createUserPoliciesEl); applyPolicyTemplate(button.dataset.createPolicyTemplate, createUserPoliciesEl);
}); });
}); });
var genAccessKeyBtn = document.getElementById('generateAccessKeyBtn');
if (genAccessKeyBtn) {
genAccessKeyBtn.addEventListener('click', function() {
var input = document.getElementById('createUserAccessKey');
if (input) input.value = generateSecureHex(8);
});
}
var genSecretKeyBtn = document.getElementById('generateSecretKeyBtn');
if (genSecretKeyBtn) {
genSecretKeyBtn.addEventListener('click', function() {
var input = document.getElementById('createUserSecretKey');
if (input) input.value = generateSecureBase64(24);
});
}
} }
function setupEditUserModal() { function setupEditUserModal() {
@@ -271,6 +317,77 @@ window.IAMManagement = (function() {
} }
} }
function openExpiryModal(key, expiresAt) {
currentExpiryKey = key;
var label = document.getElementById('expiryUserLabel');
var input = document.getElementById('expiryDateInput');
var form = document.getElementById('expiryForm');
if (label) label.textContent = key;
if (expiresAt) {
try {
var dt = new Date(expiresAt);
var local = new Date(dt.getTime() - dt.getTimezoneOffset() * 60000);
if (input) input.value = local.toISOString().slice(0, 16);
} catch(e) {
if (input) input.value = '';
}
} else {
if (input) input.value = '';
}
if (form) form.action = endpoints.updateExpiry.replace('ACCESS_KEY', key);
var modalEl = document.getElementById('expiryModal');
if (modalEl) {
var modal = bootstrap.Modal.getOrCreateInstance(modalEl);
modal.show();
}
}
function setupExpiryModal() {
document.querySelectorAll('[data-expiry-user]').forEach(function(btn) {
btn.addEventListener('click', function(e) {
e.preventDefault();
openExpiryModal(btn.dataset.expiryUser, btn.dataset.expiresAt || '');
});
});
document.querySelectorAll('[data-expiry-preset]').forEach(function(btn) {
btn.addEventListener('click', function() {
var preset = btn.dataset.expiryPreset;
var input = document.getElementById('expiryDateInput');
if (!input) return;
if (preset === 'clear') {
input.value = '';
return;
}
var now = new Date();
var ms = 0;
if (preset === '1h') ms = 3600000;
else if (preset === '24h') ms = 86400000;
else if (preset === '7d') ms = 7 * 86400000;
else if (preset === '30d') ms = 30 * 86400000;
else if (preset === '90d') ms = 90 * 86400000;
var future = new Date(now.getTime() + ms);
var local = new Date(future.getTime() - future.getTimezoneOffset() * 60000);
input.value = local.toISOString().slice(0, 16);
});
});
var expiryForm = document.getElementById('expiryForm');
if (expiryForm) {
expiryForm.addEventListener('submit', function(e) {
e.preventDefault();
window.UICore.submitFormAjax(expiryForm, {
successMessage: 'Expiry updated',
onSuccess: function() {
var modalEl = document.getElementById('expiryModal');
if (modalEl) bootstrap.Modal.getOrCreateInstance(modalEl).hide();
window.location.reload();
}
});
});
}
}
function createUserCardHtml(accessKey, displayName, policies) { function createUserCardHtml(accessKey, displayName, policies) {
var admin = isAdminUser(policies); var admin = isAdminUser(policies);
var cardClass = 'card h-100 iam-user-card' + (admin ? ' iam-admin-card' : ''); var cardClass = 'card h-100 iam-user-card' + (admin ? ' iam-admin-card' : '');
@@ -324,6 +441,8 @@ window.IAMManagement = (function() {
'<ul class="dropdown-menu dropdown-menu-end">' + '<ul class="dropdown-menu dropdown-menu-end">' +
'<li><button class="dropdown-item" type="button" data-edit-user="' + esc(accessKey) + '" data-display-name="' + esc(displayName) + '">' + '<li><button class="dropdown-item" type="button" data-edit-user="' + esc(accessKey) + '" data-display-name="' + esc(displayName) + '">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/></svg>Edit Name</button></li>' + '<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/></svg>Edit Name</button></li>' +
'<li><button class="dropdown-item" type="button" data-expiry-user="' + esc(accessKey) + '" data-expires-at="">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/><path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/></svg>Set Expiry</button></li>' +
'<li><button class="dropdown-item" type="button" data-rotate-user="' + esc(accessKey) + '">' + '<li><button class="dropdown-item" type="button" data-rotate-user="' + esc(accessKey) + '">' +
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/><path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/></svg>Rotate Secret</button></li>' + '<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"><path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/><path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/></svg>Rotate Secret</button></li>' +
'<li><hr class="dropdown-divider"></li>' + '<li><hr class="dropdown-divider"></li>' +
@@ -379,6 +498,14 @@ window.IAMManagement = (function() {
}); });
} }
var expiryBtn = cardElement.querySelector('[data-expiry-user]');
if (expiryBtn) {
expiryBtn.addEventListener('click', function(e) {
e.preventDefault();
openExpiryModal(accessKey, '');
});
}
var policyBtn = cardElement.querySelector('[data-policy-editor]'); var policyBtn = cardElement.querySelector('[data-policy-editor]');
if (policyBtn) { if (policyBtn) {
policyBtn.addEventListener('click', function() { policyBtn.addEventListener('click', function() {
@@ -428,10 +555,15 @@ window.IAMManagement = (function() {
'</svg>' + '</svg>' +
'<div class="flex-grow-1">' + '<div class="flex-grow-1">' +
'<div class="fw-semibold">New user created: <code>' + window.UICore.escapeHtml(data.access_key) + '</code></div>' + '<div class="fw-semibold">New user created: <code>' + window.UICore.escapeHtml(data.access_key) + '</code></div>' +
'<p class="mb-2 small">This secret is only shown once. Copy it now and store it securely.</p>' + '<p class="mb-2 small">These credentials are only shown once. Copy them now and store them securely.</p>' +
'</div>' + '</div>' +
'<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>' + '<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>' +
'</div>' + '</div>' +
'<div class="input-group mb-2">' +
'<span class="input-group-text"><strong>Access key</strong></span>' +
'<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.access_key) + '" readonly />' +
'<button class="btn btn-outline-primary" type="button" id="copyNewUserAccessKey">Copy</button>' +
'</div>' +
'<div class="input-group">' + '<div class="input-group">' +
'<span class="input-group-text"><strong>Secret key</strong></span>' + '<span class="input-group-text"><strong>Secret key</strong></span>' +
'<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.secret_key) + '" readonly id="newUserSecret" />' + '<input class="form-control font-monospace" type="text" value="' + window.UICore.escapeHtml(data.secret_key) + '" readonly id="newUserSecret" />' +
@@ -440,6 +572,9 @@ window.IAMManagement = (function() {
var container = document.querySelector('.page-header'); var container = document.querySelector('.page-header');
if (container) { if (container) {
container.insertAdjacentHTML('afterend', alertHtml); container.insertAdjacentHTML('afterend', alertHtml);
document.getElementById('copyNewUserAccessKey').addEventListener('click', async function() {
await window.UICore.copyToClipboard(data.access_key, this, 'Copy');
});
document.getElementById('copyNewUserSecret').addEventListener('click', async function() { document.getElementById('copyNewUserSecret').addEventListener('click', async function() {
await window.UICore.copyToClipboard(data.secret_key, this, 'Copy'); await window.UICore.copyToClipboard(data.secret_key, this, 'Copy');
}); });

View File

@@ -51,7 +51,7 @@
</div> </div>
<div> <div>
<h5 class="bucket-name text-break">{{ bucket.meta.name }}</h5> <h5 class="bucket-name text-break">{{ bucket.meta.name }}</h5>
<small class="text-muted">Created {{ bucket.meta.created_at | format_datetime }}</small> <small class="text-muted">Created {{ bucket.meta.creation_date | format_datetime }}</small>
</div> </div>
</div> </div>
<span class="badge {{ bucket.access_badge }} bucket-access-badge">{{ bucket.access_label }}</span> <span class="badge {{ bucket.access_badge }} bucket-access-badge">{{ bucket.access_label }}</span>

View File

@@ -40,6 +40,7 @@
<li><a href="#quotas">Bucket Quotas</a></li> <li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li> <li><a href="#encryption">Encryption</a></li>
<li><a href="#lifecycle">Lifecycle Rules</a></li> <li><a href="#lifecycle">Lifecycle Rules</a></li>
<li><a href="#garbage-collection">Garbage Collection</a></li>
<li><a href="#metrics">Metrics History</a></li> <li><a href="#metrics">Metrics History</a></li>
<li><a href="#operation-metrics">Operation Metrics</a></li> <li><a href="#operation-metrics">Operation Metrics</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li> <li><a href="#troubleshooting">Troubleshooting</a></li>
@@ -202,6 +203,16 @@ python run.py --mode ui
<td><code>60 per minute</code></td> <td><code>60 per minute</code></td>
<td>Rate limit for admin API endpoints (<code>/admin/*</code>).</td> <td>Rate limit for admin API endpoints (<code>/admin/*</code>).</td>
</tr> </tr>
<tr>
<td><code>ADMIN_ACCESS_KEY</code></td>
<td>(none)</td>
<td>Custom access key for the admin user on first run or credential reset. Random if unset.</td>
</tr>
<tr>
<td><code>ADMIN_SECRET_KEY</code></td>
<td>(none)</td>
<td>Custom secret key for the admin user on first run or credential reset. Random if unset.</td>
</tr>
<tr class="table-secondary"> <tr class="table-secondary">
<td colspan="3" class="fw-semibold">Server Settings</td> <td colspan="3" class="fw-semibold">Server Settings</td>
</tr> </tr>
@@ -428,7 +439,7 @@ python run.py --mode ui
</table> </table>
</div> </div>
<div class="alert alert-warning mt-3 mb-0 small"> <div class="alert alert-warning mt-3 mb-0 small">
<strong>Production Checklist:</strong> Set <code>SECRET_KEY</code>, restrict <code>CORS_ORIGINS</code>, configure <code>API_BASE_URL</code>, enable HTTPS via reverse proxy, and use <code>--prod</code> flag. <strong>Production Checklist:</strong> Set <code>SECRET_KEY</code> (also enables IAM config encryption at rest), restrict <code>CORS_ORIGINS</code>, configure <code>API_BASE_URL</code>, enable HTTPS via reverse proxy, use <code>--prod</code> flag, and set credential expiry on non-admin users.
</div> </div>
</div> </div>
</article> </article>
@@ -495,11 +506,12 @@ sudo journalctl -u myfsio -f # View logs</code></pre>
<span class="docs-section-kicker">03</span> <span class="docs-section-kicker">03</span>
<h2 class="h4 mb-0">Authenticate &amp; manage IAM</h2> <h2 class="h4 mb-0">Authenticate &amp; manage IAM</h2>
</div> </div>
<p class="text-muted">On first startup, MyFSIO generates random admin credentials and prints them to the console. Missed it? Check <code>data/.myfsio.sys/config/iam.json</code> directly—credentials are stored in plaintext.</p> <p class="text-muted">On first startup, MyFSIO generates random admin credentials and prints them to the console. Set <code>ADMIN_ACCESS_KEY</code> and <code>ADMIN_SECRET_KEY</code> env vars for custom credentials. When <code>SECRET_KEY</code> is configured, the IAM config is encrypted at rest. To reset credentials, run <code>python run.py --reset-cred</code>.</p>
<div class="docs-highlight mb-3"> <div class="docs-highlight mb-3">
<ol class="mb-0"> <ol class="mb-0">
<li>Check the console output (or <code>iam.json</code>) for the generated <code>Access Key</code> and <code>Secret Key</code>, then visit <code>/ui/login</code>.</li> <li>Check the console output for the generated <code>Access Key</code> and <code>Secret Key</code>, then visit <code>/ui/login</code>.</li>
<li>Create additional users with descriptive display names and AWS-style inline policies (for example <code>{"bucket": "*", "actions": ["list", "read"]}</code>).</li> <li>Create additional users with descriptive display names, AWS-style inline policies (for example <code>{"bucket": "*", "actions": ["list", "read"]}</code>), and optional credential expiry dates.</li>
<li>Set credential expiry on users to grant time-limited access. The UI shows expiry badges and provides preset durations (1h, 24h, 7d, 30d, 90d). Expired credentials are rejected at authentication.</li>
<li>Rotate secrets when sharing with CI jobs—new secrets display once and persist to <code>data/.myfsio.sys/config/iam.json</code>.</li> <li>Rotate secrets when sharing with CI jobs—new secrets display once and persist to <code>data/.myfsio.sys/config/iam.json</code>.</li>
<li>Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.</li> <li>Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.</li>
</ol> </ol>
@@ -1616,10 +1628,113 @@ curl "{{ api_base }}/&lt;bucket&gt;?lifecycle" \
</div> </div>
</div> </div>
</article> </article>
<article id="metrics" class="card shadow-sm docs-section"> <article id="garbage-collection" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">14</span> <span class="docs-section-kicker">14</span>
<h2 class="h4 mb-0">Garbage Collection</h2>
</div>
<p class="text-muted">Automatically clean up orphaned data that accumulates over time: stale temp files, abandoned multipart uploads, stale lock files, orphaned metadata, orphaned versions, and empty directories.</p>
<h3 class="h6 text-uppercase text-muted mt-4">Enabling GC</h3>
<p class="small text-muted">Disabled by default. Enable via environment variable:</p>
<pre class="mb-3"><code class="language-bash">GC_ENABLED=true python run.py</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">Configuration</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Variable</th>
<th>Default</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>GC_ENABLED</code></td><td><code>false</code></td><td>Enable garbage collection</td></tr>
<tr><td><code>GC_INTERVAL_HOURS</code></td><td><code>6</code></td><td>Hours between GC cycles</td></tr>
<tr><td><code>GC_TEMP_FILE_MAX_AGE_HOURS</code></td><td><code>24</code></td><td>Delete temp files older than this</td></tr>
<tr><td><code>GC_MULTIPART_MAX_AGE_DAYS</code></td><td><code>7</code></td><td>Delete orphaned multipart uploads older than this</td></tr>
<tr><td><code>GC_LOCK_FILE_MAX_AGE_HOURS</code></td><td><code>1</code></td><td>Delete stale lock files older than this</td></tr>
<tr><td><code>GC_DRY_RUN</code></td><td><code>false</code></td><td>Log what would be deleted without removing</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">What Gets Cleaned</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Type</th>
<th>Location</th>
<th>Condition</th>
</tr>
</thead>
<tbody>
<tr><td><strong>Temp files</strong></td><td><code>.myfsio.sys/tmp/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Orphaned multipart</strong></td><td><code>.myfsio.sys/multipart/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Stale lock files</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/locks/</code></td><td>Older than configured max age</td></tr>
<tr><td><strong>Orphaned metadata</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/meta/</code></td><td>Object file no longer exists</td></tr>
<tr><td><strong>Orphaned versions</strong></td><td><code>.myfsio.sys/buckets/&lt;bucket&gt;/versions/</code></td><td>Main object no longer exists</td></tr>
<tr><td><strong>Empty directories</strong></td><td>Various internal dirs</td><td>Directory is empty after cleanup</td></tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">Admin API</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Method</th>
<th>Route</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr><td><code>GET</code></td><td><code>/admin/gc/status</code></td><td>Get GC status and configuration</td></tr>
<tr><td><code>POST</code></td><td><code>/admin/gc/run</code></td><td>Trigger manual GC run</td></tr>
<tr><td><code>GET</code></td><td><code>/admin/gc/history</code></td><td>Get execution history</td></tr>
</tbody>
</table>
</div>
<pre class="mb-3"><code class="language-bash"># Trigger a dry run (preview what would be cleaned)
curl -X POST "{{ api_base }}/admin/gc/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-H "Content-Type: application/json" \
-d '{"dry_run": true}'
# Trigger actual GC
curl -X POST "{{ api_base }}/admin/gc/run" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# Check status
curl "{{ api_base }}/admin/gc/status" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# View history
curl "{{ api_base }}/admin/gc/history?limit=10" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"</code></pre>
<div class="alert alert-light border mb-0">
<div class="d-flex gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-info-circle text-muted mt-1 flex-shrink-0" viewBox="0 0 16 16">
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
<path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/>
</svg>
<div>
<strong>Dry Run:</strong> Use <code>GC_DRY_RUN=true</code> or pass <code>{"dry_run": true}</code> to the API to preview what would be deleted without actually removing anything. Check the logs or API response for details.
</div>
</div>
</div>
</div>
</article>
<article id="metrics" class="card shadow-sm docs-section">
<div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">15</span>
<h2 class="h4 mb-0">Metrics History</h2> <h2 class="h4 mb-0">Metrics History</h2>
</div> </div>
<p class="text-muted">Track CPU, memory, and disk usage over time with optional metrics history. Disabled by default to minimize overhead.</p> <p class="text-muted">Track CPU, memory, and disk usage over time with optional metrics history. Disabled by default to minimize overhead.</p>
@@ -1703,7 +1818,7 @@ curl -X PUT "{{ api_base | replace('/api', '/ui') }}/metrics/settings" \
<article id="operation-metrics" class="card shadow-sm docs-section"> <article id="operation-metrics" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">15</span> <span class="docs-section-kicker">16</span>
<h2 class="h4 mb-0">Operation Metrics</h2> <h2 class="h4 mb-0">Operation Metrics</h2>
</div> </div>
<p class="text-muted">Track API request statistics including request counts, latency, error rates, and bandwidth usage. Provides real-time visibility into API operations.</p> <p class="text-muted">Track API request statistics including request counts, latency, error rates, and bandwidth usage. Provides real-time visibility into API operations.</p>
@@ -1810,7 +1925,7 @@ curl "{{ api_base | replace('/api', '/ui') }}/metrics/operations/history?hours=6
<article id="troubleshooting" class="card shadow-sm docs-section"> <article id="troubleshooting" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">16</span> <span class="docs-section-kicker">17</span>
<h2 class="h4 mb-0">Troubleshooting &amp; tips</h2> <h2 class="h4 mb-0">Troubleshooting &amp; tips</h2>
</div> </div>
<div class="table-responsive"> <div class="table-responsive">
@@ -1861,7 +1976,7 @@ curl "{{ api_base | replace('/api', '/ui') }}/metrics/operations/history?hours=6
<article id="health-check" class="card shadow-sm docs-section"> <article id="health-check" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">17</span> <span class="docs-section-kicker">18</span>
<h2 class="h4 mb-0">Health Check Endpoint</h2> <h2 class="h4 mb-0">Health Check Endpoint</h2>
</div> </div>
<p class="text-muted">The API exposes a health check endpoint for monitoring and load balancer integration.</p> <p class="text-muted">The API exposes a health check endpoint for monitoring and load balancer integration.</p>
@@ -1883,7 +1998,7 @@ curl {{ api_base }}/myfsio/health
<article id="object-lock" class="card shadow-sm docs-section"> <article id="object-lock" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">18</span> <span class="docs-section-kicker">19</span>
<h2 class="h4 mb-0">Object Lock &amp; Retention</h2> <h2 class="h4 mb-0">Object Lock &amp; Retention</h2>
</div> </div>
<p class="text-muted">Object Lock prevents objects from being deleted or overwritten for a specified retention period.</p> <p class="text-muted">Object Lock prevents objects from being deleted or overwritten for a specified retention period.</p>
@@ -1943,7 +2058,7 @@ curl "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;?legal-hold" \
<article id="access-logging" class="card shadow-sm docs-section"> <article id="access-logging" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">19</span> <span class="docs-section-kicker">20</span>
<h2 class="h4 mb-0">Access Logging</h2> <h2 class="h4 mb-0">Access Logging</h2>
</div> </div>
<p class="text-muted">Enable S3-style access logging to track all requests to your buckets for audit and analysis.</p> <p class="text-muted">Enable S3-style access logging to track all requests to your buckets for audit and analysis.</p>
@@ -1970,7 +2085,7 @@ curl "{{ api_base }}/&lt;bucket&gt;?logging" \
<article id="notifications" class="card shadow-sm docs-section"> <article id="notifications" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">20</span> <span class="docs-section-kicker">21</span>
<h2 class="h4 mb-0">Notifications &amp; Webhooks</h2> <h2 class="h4 mb-0">Notifications &amp; Webhooks</h2>
</div> </div>
<p class="text-muted">Configure event notifications to trigger webhooks when objects are created or deleted.</p> <p class="text-muted">Configure event notifications to trigger webhooks when objects are created or deleted.</p>
@@ -2033,7 +2148,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;?notification" \
<article id="select-content" class="card shadow-sm docs-section"> <article id="select-content" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">21</span> <span class="docs-section-kicker">22</span>
<h2 class="h4 mb-0">SelectObjectContent (SQL)</h2> <h2 class="h4 mb-0">SelectObjectContent (SQL)</h2>
</div> </div>
<p class="text-muted">Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object.</p> <p class="text-muted">Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object.</p>
@@ -2078,7 +2193,7 @@ curl -X POST "{{ api_base }}/&lt;bucket&gt;/data.csv?select" \
<article id="advanced-ops" class="card shadow-sm docs-section"> <article id="advanced-ops" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">22</span> <span class="docs-section-kicker">23</span>
<h2 class="h4 mb-0">Advanced S3 Operations</h2> <h2 class="h4 mb-0">Advanced S3 Operations</h2>
</div> </div>
<p class="text-muted">Copy, move, and partially download objects using advanced S3 operations.</p> <p class="text-muted">Copy, move, and partially download objects using advanced S3 operations.</p>
@@ -2152,7 +2267,7 @@ curl "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;" \
<article id="acls" class="card shadow-sm docs-section"> <article id="acls" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">23</span> <span class="docs-section-kicker">24</span>
<h2 class="h4 mb-0">Access Control Lists (ACLs)</h2> <h2 class="h4 mb-0">Access Control Lists (ACLs)</h2>
</div> </div>
<p class="text-muted">ACLs provide legacy-style permission management for buckets and objects.</p> <p class="text-muted">ACLs provide legacy-style permission management for buckets and objects.</p>
@@ -2206,7 +2321,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;" \
<article id="tagging" class="card shadow-sm docs-section"> <article id="tagging" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">24</span> <span class="docs-section-kicker">25</span>
<h2 class="h4 mb-0">Object &amp; Bucket Tagging</h2> <h2 class="h4 mb-0">Object &amp; Bucket Tagging</h2>
</div> </div>
<p class="text-muted">Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.</p> <p class="text-muted">Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.</p>
@@ -2265,7 +2380,7 @@ curl -X PUT "{{ api_base }}/&lt;bucket&gt;?tagging" \
<article id="website-hosting" class="card shadow-sm docs-section"> <article id="website-hosting" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">25</span> <span class="docs-section-kicker">26</span>
<h2 class="h4 mb-0">Static Website Hosting</h2> <h2 class="h4 mb-0">Static Website Hosting</h2>
</div> </div>
<p class="text-muted">Host static websites directly from S3 buckets with custom index and error pages, served via custom domain mapping.</p> <p class="text-muted">Host static websites directly from S3 buckets with custom index and error pages, served via custom domain mapping.</p>
@@ -2358,7 +2473,7 @@ server {
<article id="cors-config" class="card shadow-sm docs-section"> <article id="cors-config" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">26</span> <span class="docs-section-kicker">27</span>
<h2 class="h4 mb-0">CORS Configuration</h2> <h2 class="h4 mb-0">CORS Configuration</h2>
</div> </div>
<p class="text-muted">Configure per-bucket Cross-Origin Resource Sharing rules to control which origins can access your bucket from a browser.</p> <p class="text-muted">Configure per-bucket Cross-Origin Resource Sharing rules to control which origins can access your bucket from a browser.</p>
@@ -2425,7 +2540,7 @@ curl -X DELETE "{{ api_base }}/&lt;bucket&gt;?cors" \
<article id="post-object" class="card shadow-sm docs-section"> <article id="post-object" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">27</span> <span class="docs-section-kicker">28</span>
<h2 class="h4 mb-0">PostObject (HTML Form Upload)</h2> <h2 class="h4 mb-0">PostObject (HTML Form Upload)</h2>
</div> </div>
<p class="text-muted">Upload objects directly from an HTML form using browser-based POST uploads with policy-based authorization.</p> <p class="text-muted">Upload objects directly from an HTML form using browser-based POST uploads with policy-based authorization.</p>
@@ -2467,7 +2582,7 @@ curl -X DELETE "{{ api_base }}/&lt;bucket&gt;?cors" \
<article id="list-objects-v2" class="card shadow-sm docs-section"> <article id="list-objects-v2" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">28</span> <span class="docs-section-kicker">29</span>
<h2 class="h4 mb-0">List Objects API v2</h2> <h2 class="h4 mb-0">List Objects API v2</h2>
</div> </div>
<p class="text-muted">Use the v2 list API for improved pagination with continuation tokens instead of markers.</p> <p class="text-muted">Use the v2 list API for improved pagination with continuation tokens instead of markers.</p>
@@ -2511,7 +2626,7 @@ curl "{{ api_base }}/&lt;bucket&gt;?list-type=2&amp;start-after=photos/2025/" \
<article id="upgrading" class="card shadow-sm docs-section"> <article id="upgrading" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">29</span> <span class="docs-section-kicker">30</span>
<h2 class="h4 mb-0">Upgrading &amp; Updates</h2> <h2 class="h4 mb-0">Upgrading &amp; Updates</h2>
</div> </div>
<p class="text-muted">How to safely update MyFSIO to a new version.</p> <p class="text-muted">How to safely update MyFSIO to a new version.</p>
@@ -2544,7 +2659,7 @@ cp -r logs/ logs-backup/</code></pre>
<article id="api-matrix" class="card shadow-sm docs-section"> <article id="api-matrix" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">30</span> <span class="docs-section-kicker">31</span>
<h2 class="h4 mb-0">Full API Reference</h2> <h2 class="h4 mb-0">Full API Reference</h2>
</div> </div>
<p class="text-muted">Complete list of all S3-compatible, admin, and KMS endpoints.</p> <p class="text-muted">Complete list of all S3-compatible, admin, and KMS endpoints.</p>
@@ -2642,6 +2757,7 @@ POST /kms/generate-random # Generate random bytes</code></pre>
<li><a href="#quotas">Bucket Quotas</a></li> <li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li> <li><a href="#encryption">Encryption</a></li>
<li><a href="#lifecycle">Lifecycle Rules</a></li> <li><a href="#lifecycle">Lifecycle Rules</a></li>
<li><a href="#garbage-collection">Garbage Collection</a></li>
<li><a href="#metrics">Metrics History</a></li> <li><a href="#metrics">Metrics History</a></li>
<li><a href="#operation-metrics">Operation Metrics</a></li> <li><a href="#operation-metrics">Operation Metrics</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li> <li><a href="#troubleshooting">Troubleshooting</a></li>

View File

@@ -50,9 +50,20 @@
New user created: <code>{{ disclosed_secret.access_key }}</code> New user created: <code>{{ disclosed_secret.access_key }}</code>
{% endif %} {% endif %}
</div> </div>
<p class="mb-2 small">⚠️ This secret is only shown once. Copy it now and store it securely.</p> <p class="mb-2 small">These credentials are only shown once. Copy them now and store them securely.</p>
</div> </div>
</div> </div>
<div class="input-group mb-2">
<span class="input-group-text"><strong>Access key</strong></span>
<input class="form-control font-monospace" type="text" value="{{ disclosed_secret.access_key }}" readonly id="disclosedAccessKeyValue" />
<button class="btn btn-outline-primary" type="button" data-access-key-copy>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-clipboard" viewBox="0 0 16 16">
<path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/>
<path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/>
</svg>
Copy
</button>
</div>
<div class="input-group"> <div class="input-group">
<span class="input-group-text"><strong>Secret key</strong></span> <span class="input-group-text"><strong>Secret key</strong></span>
<input class="form-control font-monospace" type="text" value="{{ disclosed_secret.secret_key }}" readonly id="disclosedSecretValue" /> <input class="form-control font-monospace" type="text" value="{{ disclosed_secret.secret_key }}" readonly id="disclosedSecretValue" />
@@ -79,7 +90,7 @@
<pre class="policy-preview mb-0" id="iamConfigPreview">{{ config_document }}</pre> <pre class="policy-preview mb-0" id="iamConfigPreview">{{ config_document }}</pre>
<button class="btn btn-outline-light btn-sm config-copy" type="button" data-copy-target="iamConfigPreview">Copy JSON</button> <button class="btn btn-outline-light btn-sm config-copy" type="button" data-copy-target="iamConfigPreview">Copy JSON</button>
</div> </div>
<p class="text-muted small mt-2 mb-0">Secrets are masked above. Access <code>{{ config_summary.path }}</code> directly to view full credentials.</p> <p class="text-muted small mt-2 mb-0">Secrets are masked above. IAM config is encrypted at rest.</p>
</div> </div>
</div> </div>
</div> </div>
@@ -122,12 +133,20 @@
{% endif %} {% endif %}
<div class="row g-3"> <div class="row g-3">
{% for user in users %} {% for user in users %}
{% set ns = namespace(is_admin=false) %} {% set ns = namespace(is_admin=false, is_expired=false, is_expiring_soon=false) %}
{% for policy in user.policies %} {% for policy in user.policies %}
{% if 'iam:*' in policy.actions or '*' in policy.actions %} {% if 'iam:*' in policy.actions or '*' in policy.actions %}
{% set ns.is_admin = true %} {% set ns.is_admin = true %}
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if user.expires_at %}
{% set exp_str = user.expires_at %}
{% if exp_str <= now_iso %}
{% set ns.is_expired = true %}
{% elif exp_str <= soon_iso %}
{% set ns.is_expiring_soon = true %}
{% endif %}
{% endif %}
<div class="col-md-6 col-xl-4 iam-user-item" data-display-name="{{ user.display_name|lower }}" data-access-key-filter="{{ user.access_key|lower }}"> <div class="col-md-6 col-xl-4 iam-user-item" data-display-name="{{ user.display_name|lower }}" data-access-key-filter="{{ user.access_key|lower }}">
<div class="card h-100 iam-user-card{{ ' iam-admin-card' if ns.is_admin else '' }}"> <div class="card h-100 iam-user-card{{ ' iam-admin-card' if ns.is_admin else '' }}">
<div class="card-body"> <div class="card-body">
@@ -146,6 +165,11 @@
{% else %} {% else %}
<span class="iam-role-badge iam-role-user" data-role-badge>User</span> <span class="iam-role-badge iam-role-user" data-role-badge>User</span>
{% endif %} {% endif %}
{% if ns.is_expired %}
<span class="badge text-bg-danger" style="font-size: .65rem">Expired</span>
{% elif ns.is_expiring_soon %}
<span class="badge text-bg-warning" style="font-size: .65rem">Expiring soon</span>
{% endif %}
</div> </div>
<div class="d-flex align-items-center gap-1"> <div class="d-flex align-items-center gap-1">
<code class="small text-muted text-truncate" title="{{ user.access_key }}">{{ user.access_key }}</code> <code class="small text-muted text-truncate" title="{{ user.access_key }}">{{ user.access_key }}</code>
@@ -173,6 +197,15 @@
Edit Name Edit Name
</button> </button>
</li> </li>
<li>
<button class="dropdown-item" type="button" data-expiry-user="{{ user.access_key }}" data-expires-at="{{ user.expires_at or '' }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
</svg>
Set Expiry
</button>
</li>
<li> <li>
<button class="dropdown-item" type="button" data-rotate-user="{{ user.access_key }}"> <button class="dropdown-item" type="button" data-rotate-user="{{ user.access_key }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
@@ -283,6 +316,32 @@
<label class="form-label fw-medium">Display Name</label> <label class="form-label fw-medium">Display Name</label>
<input class="form-control" type="text" name="display_name" placeholder="Analytics Team" required autofocus /> <input class="form-control" type="text" name="display_name" placeholder="Analytics Team" required autofocus />
</div> </div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Access Key <span class="text-muted fw-normal small">optional</span>
</label>
<div class="input-group">
<input class="form-control font-monospace" type="text" name="access_key" id="createUserAccessKey" placeholder="Leave blank to auto-generate" />
<button class="btn btn-outline-secondary" type="button" id="generateAccessKeyBtn" title="Generate secure access key">Generate</button>
</div>
</div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Secret Key <span class="text-muted fw-normal small">optional</span>
</label>
<div class="input-group">
<input class="form-control font-monospace" type="text" name="secret_key" id="createUserSecretKey" placeholder="Leave blank to auto-generate" />
<button class="btn btn-outline-secondary" type="button" id="generateSecretKeyBtn" title="Generate secure secret key">Generate</button>
</div>
<div class="form-text">If you set a custom secret key, copy it now. It will be encrypted and cannot be recovered.</div>
</div>
<div class="mb-3">
<label class="form-label fw-medium d-flex justify-content-between align-items-center">
Expiry <span class="text-muted fw-normal small">optional</span>
</label>
<input class="form-control" type="datetime-local" name="expires_at" id="createUserExpiry" />
<div class="form-text">Leave blank for no expiration. Expired users cannot authenticate.</div>
</div>
<div class="mb-3"> <div class="mb-3">
<label class="form-label fw-medium">Initial Policies (JSON)</label> <label class="form-label fw-medium">Initial Policies (JSON)</label>
<textarea class="form-control font-monospace" name="policies" id="createUserPolicies" rows="6" spellcheck="false" placeholder='[ <textarea class="form-control font-monospace" name="policies" id="createUserPolicies" rows="6" spellcheck="false" placeholder='[
@@ -495,6 +554,52 @@
</div> </div>
</div> </div>
<div class="modal fade" id="expiryModal" tabindex="-1" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered">
<div class="modal-content">
<div class="modal-header border-0 pb-0">
<h1 class="modal-title fs-5 fw-semibold">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
</svg>
Set Expiry
</h1>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<form method="post" id="expiryForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" />
<div class="modal-body">
<p class="text-muted small mb-3">Set expiration for <code id="expiryUserLabel"></code></p>
<div class="mb-3">
<label class="form-label fw-medium">Expires at</label>
<input class="form-control" type="datetime-local" name="expires_at" id="expiryDateInput" />
<div class="form-text">Leave blank to remove expiration (never expires).</div>
</div>
<div class="d-flex flex-wrap gap-2">
<span class="text-muted small me-2 align-self-center">Quick presets:</span>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="1h">1 hour</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="24h">24 hours</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="7d">7 days</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="30d">30 days</button>
<button class="btn btn-outline-secondary btn-sm" type="button" data-expiry-preset="90d">90 days</button>
<button class="btn btn-outline-secondary btn-sm text-danger" type="button" data-expiry-preset="clear">Never</button>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" data-bs-dismiss="modal">Cancel</button>
<button class="btn btn-primary" type="submit">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
Save Expiry
</button>
</div>
</form>
</div>
</div>
</div>
<script id="iamUsersJson" type="application/json">{{ users | tojson }}</script> <script id="iamUsersJson" type="application/json">{{ users | tojson }}</script>
{% endblock %} {% endblock %}
@@ -512,7 +617,8 @@
updateUser: "{{ url_for('ui.update_iam_user', access_key='ACCESS_KEY') }}", updateUser: "{{ url_for('ui.update_iam_user', access_key='ACCESS_KEY') }}",
deleteUser: "{{ url_for('ui.delete_iam_user', access_key='ACCESS_KEY') }}", deleteUser: "{{ url_for('ui.delete_iam_user', access_key='ACCESS_KEY') }}",
updatePolicies: "{{ url_for('ui.update_iam_policies', access_key='ACCESS_KEY') }}", updatePolicies: "{{ url_for('ui.update_iam_policies', access_key='ACCESS_KEY') }}",
rotateSecret: "{{ url_for('ui.rotate_iam_secret', access_key='ACCESS_KEY') }}" rotateSecret: "{{ url_for('ui.rotate_iam_secret', access_key='ACCESS_KEY') }}",
updateExpiry: "{{ url_for('ui.update_iam_expiry', access_key='ACCESS_KEY') }}"
} }
}); });
</script> </script>

View File

@@ -73,9 +73,6 @@
</svg> </svg>
</button> </button>
</form> </form>
<div class="text-center mt-4">
<small class="text-muted">Need help? Check the <a href="{{ url_for('ui.docs_page') }}" class="text-decoration-none">documentation</a></small>
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -43,6 +43,11 @@ def app(tmp_path: Path):
} }
) )
yield flask_app yield flask_app
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
@pytest.fixture() @pytest.fixture()

View File

@@ -0,0 +1,156 @@
import hashlib
import time
import pytest
@pytest.fixture()
def bucket(client, signer):
headers = signer("PUT", "/cond-test")
client.put("/cond-test", headers=headers)
return "cond-test"
@pytest.fixture()
def uploaded(client, signer, bucket):
body = b"hello conditional"
etag = hashlib.md5(body).hexdigest()
headers = signer("PUT", f"/{bucket}/obj.txt", body=body)
resp = client.put(f"/{bucket}/obj.txt", headers=headers, data=body)
last_modified = resp.headers.get("Last-Modified")
return {"etag": etag, "last_modified": last_modified}
class TestIfMatch:
def test_get_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_get_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_wildcard_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_one_matches(self, client, signer, bucket, uploaded):
etag_list = f'"bad1", "{uploaded["etag"]}", "bad2"'
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": etag_list})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_none_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"bad1", "bad2"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
class TestIfNoneMatch:
def test_get_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert uploaded["etag"] in resp.headers.get("ETag", "")
def test_get_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_head_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_wildcard_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
class TestIfModifiedSince:
def test_not_modified_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert "ETag" in resp.headers
def test_modified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_not_modified(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_if_none_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-None-Match": '"wrongetag"',
"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestIfUnmodifiedSince:
def test_unmodified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_if_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestConditionalWithRange:
def test_if_match_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 206
def test_if_match_fails_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": '"wrongetag"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412

View File

@@ -53,15 +53,17 @@ def test_special_characters_in_metadata(tmp_path: Path):
assert meta["special"] == "!@#$%^&*()" assert meta["special"] == "!@#$%^&*()"
def test_disk_full_scenario(tmp_path: Path, monkeypatch): def test_disk_full_scenario(tmp_path: Path, monkeypatch):
# Simulate disk full by mocking write to fail import app.storage as _storage_mod
monkeypatch.setattr(_storage_mod, "_HAS_RUST", False)
storage = ObjectStorage(tmp_path) storage = ObjectStorage(tmp_path)
storage.create_bucket("full") storage.create_bucket("full")
def mock_copyfileobj(*args, **kwargs): def mock_copyfileobj(*args, **kwargs):
raise OSError(28, "No space left on device") raise OSError(28, "No space left on device")
import shutil import shutil
monkeypatch.setattr(shutil, "copyfileobj", mock_copyfileobj) monkeypatch.setattr(shutil, "copyfileobj", mock_copyfileobj)
with pytest.raises(OSError, match="No space left on device"): with pytest.raises(OSError, match="No space left on device"):
storage.put_object("full", "file", io.BytesIO(b"data")) storage.put_object("full", "file", io.BytesIO(b"data"))

350
tests/test_gc.py Normal file
View File

@@ -0,0 +1,350 @@
import json
import os
import time
from pathlib import Path
import pytest
from app.gc import GarbageCollector, GCResult
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
sys_root = root / ".myfsio.sys"
sys_root.mkdir()
(sys_root / "config").mkdir(parents=True)
(sys_root / "tmp").mkdir()
(sys_root / "multipart").mkdir()
(sys_root / "buckets").mkdir()
return root
@pytest.fixture
def gc(storage_root):
return GarbageCollector(
storage_root=storage_root,
interval_hours=1.0,
temp_file_max_age_hours=1.0,
multipart_max_age_days=1,
lock_file_max_age_hours=0.5,
dry_run=False,
)
def _make_old(path, hours=48):
old_time = time.time() - hours * 3600
os.utime(path, (old_time, old_time))
class TestTempFileCleanup:
def test_old_temp_files_deleted(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "abc123.tmp"
old_file.write_bytes(b"x" * 1000)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 1000
assert not old_file.exists()
def test_recent_temp_files_kept(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
new_file = tmp_dir / "recent.tmp"
new_file.write_bytes(b"data")
result = gc.run_now()
assert result.temp_files_deleted == 0
assert new_file.exists()
def test_dry_run_keeps_files(self, storage_root, gc):
gc.dry_run = True
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "stale.tmp"
old_file.write_bytes(b"x" * 500)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 500
assert old_file.exists()
class TestMultipartCleanup:
def test_old_orphaned_multipart_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-123"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-123", "object_key": "foo.txt"}))
part = upload_dir / "part-00001.part"
part.write_bytes(b"x" * 2000)
_make_old(manifest, hours=200)
_make_old(part, hours=200)
_make_old(upload_dir, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
assert result.multipart_bytes_freed > 0
assert not upload_dir.exists()
def test_recent_multipart_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-new"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-new", "object_key": "bar.txt"}))
result = gc.run_now()
assert result.multipart_uploads_deleted == 0
assert upload_dir.exists()
def test_legacy_multipart_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
legacy_mp = bucket / ".multipart" / "upload-old"
legacy_mp.mkdir(parents=True)
part = legacy_mp / "part-00001.part"
part.write_bytes(b"y" * 500)
_make_old(part, hours=200)
_make_old(legacy_mp, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
class TestLockFileCleanup:
def test_stale_lock_files_deleted(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "some_key.lock"
lock.write_text("")
_make_old(lock, hours=2)
result = gc.run_now()
assert result.lock_files_deleted == 1
assert not lock.exists()
def test_recent_lock_kept(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "active.lock"
lock.write_text("")
result = gc.run_now()
assert result.lock_files_deleted == 0
assert lock.exists()
class TestOrphanedMetadataCleanup:
def test_legacy_orphaned_metadata_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
meta_dir = bucket / ".meta"
meta_dir.mkdir()
orphan = meta_dir / "deleted_file.txt.meta.json"
orphan.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
assert not orphan.exists()
def test_valid_metadata_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "exists.txt"
obj.write_text("hello")
meta_dir = bucket / ".meta"
meta_dir.mkdir()
meta = meta_dir / "exists.txt.meta.json"
meta.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 0
assert meta.exists()
def test_index_orphaned_entries_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "keep.txt"
obj.write_text("hello")
meta_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "meta"
meta_dir.mkdir(parents=True)
index = meta_dir / "_index.json"
index.write_text(json.dumps({"keep.txt": {"etag": "a"}, "gone.txt": {"etag": "b"}}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
updated = json.loads(index.read_text())
assert "keep.txt" in updated
assert "gone.txt" not in updated
class TestOrphanedVersionsCleanup:
def test_orphaned_versions_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "deleted_obj.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_json = versions_dir / "v1.json"
v_bin.write_bytes(b"old data" * 100)
v_json.write_text(json.dumps({"version_id": "v1", "size": 800}))
result = gc.run_now()
assert result.orphaned_versions_deleted == 2
assert result.orphaned_version_bytes_freed == 800
def test_active_versions_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "active.txt"
obj.write_text("current")
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "active.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_bin.write_bytes(b"old version")
result = gc.run_now()
assert result.orphaned_versions_deleted == 0
assert v_bin.exists()
class TestEmptyDirCleanup:
def test_empty_dirs_removed(self, storage_root, gc):
empty = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks" / "sub"
empty.mkdir(parents=True)
result = gc.run_now()
assert result.empty_dirs_removed > 0
assert not empty.exists()
class TestHistory:
def test_history_recorded(self, storage_root, gc):
gc.run_now()
history = gc.get_history()
assert len(history) == 1
assert "result" in history[0]
assert "timestamp" in history[0]
def test_multiple_runs(self, storage_root, gc):
gc.run_now()
gc.run_now()
gc.run_now()
history = gc.get_history()
assert len(history) == 3
assert history[0]["timestamp"] >= history[1]["timestamp"]
class TestStatus:
def test_get_status(self, storage_root, gc):
status = gc.get_status()
assert status["interval_hours"] == 1.0
assert status["dry_run"] is False
assert status["temp_file_max_age_hours"] == 1.0
assert status["multipart_max_age_days"] == 1
assert status["lock_file_max_age_hours"] == 0.5
class TestGCResult:
def test_total_bytes_freed(self):
r = GCResult(temp_bytes_freed=100, multipart_bytes_freed=200, orphaned_version_bytes_freed=300)
assert r.total_bytes_freed == 600
def test_has_work(self):
assert not GCResult().has_work
assert GCResult(temp_files_deleted=1).has_work
assert GCResult(lock_files_deleted=1).has_work
assert GCResult(empty_dirs_removed=1).has_work
class TestAdminAPI:
@pytest.fixture
def gc_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"GC_ENABLED": True,
"GC_INTERVAL_HOURS": 1.0,
})
yield flask_app
gc = flask_app.extensions.get("gc")
if gc:
gc.stop()
def test_gc_status(self, gc_app):
client = gc_app.test_client()
resp = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
def test_gc_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert "temp_files_deleted" in data
def test_gc_dry_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
data=json.dumps({"dry_run": True}),
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert "temp_files_deleted" in data
def test_gc_history(self, gc_app):
client = gc_app.test_client()
client.post("/admin/gc/run", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
resp = client.get("/admin/gc/history", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert len(data["executions"]) >= 1
def test_gc_requires_admin(self, gc_app):
iam = gc_app.extensions["iam"]
user = iam.create_user(display_name="Regular")
client = gc_app.test_client()
resp = client.get(
"/admin/gc/status",
headers={"X-Access-Key": user["access_key"], "X-Secret-Key": user["secret_key"]},
)
assert resp.status_code == 403

View File

@@ -0,0 +1,350 @@
import hashlib
import io
import os
import secrets
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
try:
import myfsio_core as _rc
HAS_RUST = True
except ImportError:
_rc = None
HAS_RUST = False
pytestmark = pytest.mark.skipif(not HAS_RUST, reason="myfsio_core not available")
class TestStreamToFileWithMd5:
def test_basic_write(self, tmp_path):
data = b"hello world" * 1000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(tmp_path_str).exists()
assert Path(tmp_path_str).read_bytes() == data
def test_empty_stream(self, tmp_path):
stream = io.BytesIO(b"")
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == 0
assert md5_hex == hashlib.md5(b"").hexdigest()
assert Path(tmp_path_str).read_bytes() == b""
def test_large_data(self, tmp_path):
data = os.urandom(1024 * 1024 * 2)
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
def test_custom_chunk_size(self, tmp_path):
data = b"x" * 10000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(
stream, tmp_dir, chunk_size=128
)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
class TestAssemblePartsWithMd5:
def test_basic_assembly(self, tmp_path):
parts = []
combined = b""
for i in range(3):
data = f"part{i}data".encode() * 100
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
def test_single_part(self, tmp_path):
data = b"single part data"
p = tmp_path / "part0"
p.write_bytes(data)
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5([str(p)], dest)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(dest).read_bytes() == data
def test_empty_parts_list(self):
with pytest.raises(ValueError, match="No parts"):
_rc.assemble_parts_with_md5([], "dummy")
def test_missing_part_file(self, tmp_path):
with pytest.raises(OSError):
_rc.assemble_parts_with_md5(
[str(tmp_path / "nonexistent")], str(tmp_path / "out")
)
def test_large_parts(self, tmp_path):
parts = []
combined = b""
for i in range(5):
data = os.urandom(512 * 1024)
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
class TestEncryptDecryptStreamChunked:
def _python_derive_chunk_nonce(self, base_nonce, chunk_index):
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
hkdf = HKDF(
algorithm=hashes.SHA256(),
length=12,
salt=base_nonce,
info=chunk_index.to_bytes(4, "big"),
)
return hkdf.derive(b"chunk_nonce")
def test_encrypt_decrypt_roundtrip(self, tmp_path):
data = b"Hello, encryption!" * 500
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count > 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == chunk_count
assert Path(decrypted_path).read_bytes() == data
def test_empty_file(self, tmp_path):
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "empty")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(b"")
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count == 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == 0
assert Path(decrypted_path).read_bytes() == b""
def test_custom_chunk_size(self, tmp_path):
data = os.urandom(10000)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=1024
)
assert chunk_count == 10
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_invalid_key_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="32 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), b"short", secrets.token_bytes(12)
)
def test_invalid_nonce_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="12 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), secrets.token_bytes(32), b"short"
)
def test_wrong_key_fails_decrypt(self, tmp_path):
data = b"sensitive data"
key = secrets.token_bytes(32)
wrong_key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
with pytest.raises((ValueError, OSError)):
_rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, wrong_key, base_nonce
)
def test_cross_compat_python_encrypt_rust_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat test data" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
encrypted_path = str(tmp_path / "py_encrypted")
with open(encrypted_path, "wb") as f:
f.write(b"\x00\x00\x00\x00")
aesgcm = AESGCM(key)
chunk_index = 0
offset = 0
while offset < len(data):
chunk = data[offset:offset + chunk_size]
nonce = self._python_derive_chunk_nonce(base_nonce, chunk_index)
enc = aesgcm.encrypt(nonce, chunk, None)
f.write(len(enc).to_bytes(4, "big"))
f.write(enc)
chunk_index += 1
offset += chunk_size
f.seek(0)
f.write(chunk_index.to_bytes(4, "big"))
decrypted_path = str(tmp_path / "rust_decrypted")
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_cross_compat_rust_encrypt_python_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat reverse test" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "rust_encrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=chunk_size
)
aesgcm = AESGCM(key)
with open(encrypted_path, "rb") as f:
count_bytes = f.read(4)
assert int.from_bytes(count_bytes, "big") == chunk_count
decrypted = b""
for i in range(chunk_count):
size = int.from_bytes(f.read(4), "big")
enc_chunk = f.read(size)
nonce = self._python_derive_chunk_nonce(base_nonce, i)
decrypted += aesgcm.decrypt(nonce, enc_chunk, None)
assert decrypted == data
def test_large_file_roundtrip(self, tmp_path):
data = os.urandom(1024 * 1024)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "large")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
class TestStreamingEncryptorFileMethods:
def test_encrypt_file_decrypt_file_roundtrip(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"file method test data" * 200
input_path = str(tmp_path / "input")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
metadata = encryptor.encrypt_file(input_path, encrypted_path)
assert metadata.algorithm == "AES256"
encryptor.decrypt_file(encrypted_path, decrypted_path, metadata)
assert Path(decrypted_path).read_bytes() == data
def test_encrypt_file_matches_encrypt_stream(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"stream vs file comparison" * 100
input_path = str(tmp_path / "input")
Path(input_path).write_bytes(data)
file_encrypted_path = str(tmp_path / "file_enc")
metadata_file = encryptor.encrypt_file(input_path, file_encrypted_path)
file_decrypted_path = str(tmp_path / "file_dec")
encryptor.decrypt_file(file_encrypted_path, file_decrypted_path, metadata_file)
assert Path(file_decrypted_path).read_bytes() == data
stream_enc, metadata_stream = encryptor.encrypt_stream(io.BytesIO(data))
stream_dec = encryptor.decrypt_stream(stream_enc, metadata_stream)
assert stream_dec.read() == data