Compare commits
109 Commits
4c661477d5
...
v0.4.2
| Author | SHA1 | Date | |
|---|---|---|---|
| ae11c654f9 | |||
| 1eadc7b75c | |||
| 4a224a127b | |||
| c498fe7aee | |||
| 3838aed954 | |||
| 6a193dbb1c | |||
| e94b341a5b | |||
| 2ad3736852 | |||
| f05b2668c0 | |||
| f7c1c1f809 | |||
| f0c95ac0a9 | |||
| 0e392e18b4 | |||
| 8996f1ce06 | |||
| f60dbaf9c9 | |||
| 1a5a7aa9e1 | |||
| 326367ae4c | |||
| a7f9b0a22f | |||
| 0e525713b1 | |||
| f43fad02fb | |||
| eff3e378f3 | |||
| 5e32cef792 | |||
| 9898167f8d | |||
| 8ff4797041 | |||
| 4a553555d3 | |||
| 7a3202c996 | |||
| bd20ca86ab | |||
| 532cf95d59 | |||
| 366f8ce60d | |||
| 7612cb054a | |||
| 966d524dca | |||
| e84f1f1851 | |||
| a059f0502d | |||
| afd7173ba0 | |||
| c807bb2388 | |||
| aa4f9f5566 | |||
| 14786151e5 | |||
| a496862902 | |||
| df4f27ca2e | |||
| d72e0a347e | |||
| 6ed4b7d8ea | |||
| 31ebbea680 | |||
| d878134ebf | |||
| 50fb5aa387 | |||
| 55568d6892 | |||
| a4ae81c77c | |||
| 9da7104887 | |||
| cc161bf362 | |||
| de5377e5ac | |||
| 80b77b64eb | |||
| 6c912a3d71 | |||
| 2a0e77a754 | |||
| c6e368324a | |||
| 7b6c096bb7 | |||
| 03353a0aec | |||
| eb0e435a5a | |||
| 72f5d9d70c | |||
| be63e27c15 | |||
| 7633007a08 | |||
| 81ef0fe4c7 | |||
| 5f24bd920d | |||
| 8552f193de | |||
| de0d869c9f | |||
| 5536330aeb | |||
| d4657c389d | |||
| 3827235232 | |||
| fdd068feee | |||
| dfc0058d0d | |||
| 27aef84311 | |||
| 66b7677d2c | |||
| 5003514a3d | |||
| 4d90ead816 | |||
| 20a314e030 | |||
| b37a51ed1d | |||
| d8232340c3 | |||
| a356bb0c4e | |||
| 1c328ee3af | |||
| 5bf7962c04 | |||
| e06f653606 | |||
| 0462a7b62e | |||
| 9c2809c195 | |||
| fb32ca0a7d | |||
| 6ab702a818 | |||
| 550e7d435c | |||
| 776967e80d | |||
| 082a7fbcd1 | |||
| ff287cf67b | |||
| bddf36d52d | |||
| cf6cec9cab | |||
| d425839e57 | |||
| 52660570c1 | |||
| 35f61313e0 | |||
| c470cfb576 | |||
| d96955deee | |||
| 85181f0be6 | |||
| d5ca7a8be1 | |||
| 476dc79e42 | |||
| bb6590fc5e | |||
| 899db3421b | |||
| caf01d6ada | |||
| bb366cb4cd | |||
| a2745ff2ee | |||
| 28cb656d94 | |||
| 3c44152fc6 | |||
| 397515edce | |||
| 980fced7e4 | |||
| bae5009ec4 | |||
| 233780617f | |||
| fd8fb21517 | |||
| c6cbe822e1 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -26,6 +26,10 @@ dist/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
|
||||
# Rust / maturin build artifacts
|
||||
myfsio_core/target/
|
||||
myfsio_core/Cargo.lock
|
||||
|
||||
# Local runtime artifacts
|
||||
logs/
|
||||
*.log
|
||||
|
||||
@@ -72,6 +72,11 @@ source .venv/bin/activate
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# (Optional) Build Rust native extension for better performance
|
||||
# Requires Rust toolchain: https://rustup.rs
|
||||
pip install maturin
|
||||
cd myfsio_core && maturin develop --release && cd ..
|
||||
|
||||
# Start both servers
|
||||
python run.py
|
||||
|
||||
@@ -80,7 +85,7 @@ python run.py --mode api # API only (port 5000)
|
||||
python run.py --mode ui # UI only (port 5100)
|
||||
```
|
||||
|
||||
**Default Credentials:** `localadmin` / `localadmin`
|
||||
**Credentials:** Generated automatically on first run and printed to the console. If missed, check the IAM config file at `<STORAGE_ROOT>/.myfsio.sys/config/iam.json`.
|
||||
|
||||
- **Web Console:** http://127.0.0.1:5100/ui
|
||||
- **API Endpoint:** http://127.0.0.1:5000
|
||||
|
||||
212
app/__init__.py
212
app/__init__.py
@@ -1,12 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import html as html_module
|
||||
import itertools
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
@@ -17,6 +18,8 @@ from flask_cors import CORS
|
||||
from flask_wtf.csrf import CSRFError
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
|
||||
import io
|
||||
|
||||
from .access_logging import AccessLoggingService
|
||||
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
||||
from .compression import GzipMiddleware
|
||||
@@ -28,6 +31,8 @@ from .encryption import EncryptionManager
|
||||
from .extensions import limiter, csrf
|
||||
from .iam import IamService
|
||||
from .kms import KMSManager
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .lifecycle import LifecycleManager
|
||||
from .notifications import NotificationService
|
||||
from .object_lock import ObjectLockService
|
||||
@@ -38,6 +43,66 @@ from .storage import ObjectStorage, StorageError
|
||||
from .version import get_version
|
||||
from .website_domains import WebsiteDomainStore
|
||||
|
||||
_request_counter = itertools.count(1)
|
||||
|
||||
|
||||
class _ChunkedTransferMiddleware:
|
||||
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
|
||||
return self.app(environ, start_response)
|
||||
|
||||
transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
|
||||
if "chunked" in transfer_encoding.lower():
|
||||
if content_length:
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
else:
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
if not content_length or content_length == "0":
|
||||
sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
|
||||
decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
|
||||
content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
|
||||
if ("STREAMING" in sha256.upper() or decoded_len
|
||||
or "aws-chunked" in content_encoding.lower()):
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw and hasattr(raw, "seek"):
|
||||
try:
|
||||
raw.seek(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self.app(environ, start_response)
|
||||
|
||||
|
||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||
"""Migrate config file from legacy locations to the active path.
|
||||
@@ -93,12 +158,20 @@ def create_app(
|
||||
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
||||
|
||||
# Trust X-Forwarded-* headers from proxies
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
|
||||
num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1)
|
||||
if num_proxies:
|
||||
if "NUM_TRUSTED_PROXIES" not in os.environ:
|
||||
logging.getLogger(__name__).warning(
|
||||
"NUM_TRUSTED_PROXIES not set, defaulting to 1. "
|
||||
"Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy."
|
||||
)
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
|
||||
|
||||
# Enable gzip compression for responses (10-20x smaller JSON payloads)
|
||||
if app.config.get("ENABLE_GZIP", True):
|
||||
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
|
||||
|
||||
app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
|
||||
|
||||
_configure_cors(app)
|
||||
_configure_logging(app)
|
||||
|
||||
@@ -107,10 +180,11 @@ def create_app(
|
||||
|
||||
storage = ObjectStorage(
|
||||
Path(app.config["STORAGE_ROOT"]),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
|
||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
||||
meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048),
|
||||
)
|
||||
|
||||
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
|
||||
@@ -120,6 +194,7 @@ def create_app(
|
||||
Path(app.config["IAM_CONFIG"]),
|
||||
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
||||
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
||||
encryption_key=app.config.get("SECRET_KEY"),
|
||||
)
|
||||
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
||||
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
||||
@@ -210,6 +285,31 @@ def create_app(
|
||||
)
|
||||
lifecycle_manager.start()
|
||||
|
||||
gc_collector = None
|
||||
if app.config.get("GC_ENABLED", False):
|
||||
gc_collector = GarbageCollector(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
|
||||
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
|
||||
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
|
||||
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
|
||||
dry_run=app.config.get("GC_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
gc_collector.start()
|
||||
|
||||
integrity_checker = None
|
||||
if app.config.get("INTEGRITY_ENABLED", False):
|
||||
integrity_checker = IntegrityChecker(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
|
||||
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
|
||||
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
|
||||
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
integrity_checker.start()
|
||||
|
||||
app.extensions["object_storage"] = storage
|
||||
app.extensions["iam"] = iam
|
||||
app.extensions["bucket_policies"] = bucket_policies
|
||||
@@ -221,6 +321,8 @@ def create_app(
|
||||
app.extensions["kms"] = kms_manager
|
||||
app.extensions["acl"] = acl_service
|
||||
app.extensions["lifecycle"] = lifecycle_manager
|
||||
app.extensions["gc"] = gc_collector
|
||||
app.extensions["integrity"] = integrity_checker
|
||||
app.extensions["object_lock"] = object_lock_service
|
||||
app.extensions["notifications"] = notification_service
|
||||
app.extensions["access_logging"] = access_logging_service
|
||||
@@ -473,13 +575,9 @@ def _configure_logging(app: Flask) -> None:
|
||||
|
||||
@app.before_request
|
||||
def _log_request_start() -> None:
|
||||
g.request_id = uuid.uuid4().hex
|
||||
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
|
||||
g.request_started_at = time.perf_counter()
|
||||
g.request_bytes_in = request.content_length or 0
|
||||
app.logger.info(
|
||||
"Request started",
|
||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||
)
|
||||
|
||||
@app.before_request
|
||||
def _maybe_serve_website():
|
||||
@@ -528,30 +626,57 @@ def _configure_logging(app: Flask) -> None:
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
except (StorageError, OSError):
|
||||
pass
|
||||
if request.method == "HEAD":
|
||||
response = Response(status=200)
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, object_key)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
else:
|
||||
try:
|
||||
stat = obj_path.stat()
|
||||
response.headers["Content-Length"] = stat.st_size
|
||||
except OSError:
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
response.headers["Content-Type"] = content_type
|
||||
return response
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, object_key)
|
||||
response = Response(data, mimetype=content_type)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
return response
|
||||
file_size = len(data)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
else:
|
||||
data = None
|
||||
try:
|
||||
stat = obj_path.stat()
|
||||
file_size = stat.st_size
|
||||
except OSError:
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
if request.method == "HEAD":
|
||||
response = Response(status=200)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Content-Type"] = content_type
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
from .s3_api import _parse_range_header
|
||||
range_header = request.headers.get("Range")
|
||||
if range_header:
|
||||
ranges = _parse_range_header(range_header, file_size)
|
||||
if ranges is None:
|
||||
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
|
||||
start, end = ranges[0]
|
||||
length = end - start + 1
|
||||
if data is not None:
|
||||
partial_data = data[start:end + 1]
|
||||
response = Response(partial_data, status=206, mimetype=content_type)
|
||||
else:
|
||||
def _stream_range(file_path, start_pos, length_to_read):
|
||||
with file_path.open("rb") as f:
|
||||
f.seek(start_pos)
|
||||
remaining = length_to_read
|
||||
while remaining > 0:
|
||||
chunk = f.read(min(262144, remaining))
|
||||
if not chunk:
|
||||
break
|
||||
remaining -= len(chunk)
|
||||
yield chunk
|
||||
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
||||
response.headers["Content-Length"] = length
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
if data is not None:
|
||||
response = Response(data, mimetype=content_type)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
def _stream(file_path):
|
||||
with file_path.open("rb") as f:
|
||||
while True:
|
||||
@@ -559,13 +684,10 @@ def _configure_logging(app: Flask) -> None:
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
try:
|
||||
stat = obj_path.stat()
|
||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Length"] = stat.st_size
|
||||
return response
|
||||
except OSError:
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
|
||||
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
||||
if not error_doc_key:
|
||||
@@ -608,17 +730,19 @@ def _configure_logging(app: Flask) -> None:
|
||||
duration_ms = 0.0
|
||||
if hasattr(g, "request_started_at"):
|
||||
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
||||
request_id = getattr(g, "request_id", uuid.uuid4().hex)
|
||||
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
|
||||
response.headers.setdefault("X-Request-ID", request_id)
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
if app.logger.isEnabledFor(logging.INFO):
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||
response.headers["Server"] = "MyFSIO"
|
||||
|
||||
operation_metrics = app.extensions.get("operation_metrics")
|
||||
if operation_metrics:
|
||||
|
||||
206
app/admin_api.py
206
app/admin_api.py
@@ -14,6 +14,8 @@ from flask import Blueprint, Response, current_app, jsonify, request
|
||||
|
||||
from .connections import ConnectionStore
|
||||
from .extensions import limiter
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .iam import IamError, Principal
|
||||
from .replication import ReplicationManager
|
||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
||||
@@ -684,6 +686,107 @@ def _storage():
|
||||
return current_app.extensions["object_storage"]
|
||||
|
||||
|
||||
def _require_iam_action(action: str):
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return None, error
|
||||
try:
|
||||
_iam().authorize(principal, None, action)
|
||||
return principal, None
|
||||
except IamError:
|
||||
return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_list_users():
|
||||
principal, error = _require_iam_action("iam:list_users")
|
||||
if error:
|
||||
return error
|
||||
return jsonify({"users": _iam().list_users()})
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user(identifier):
|
||||
principal, error = _require_iam_action("iam:get_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
user_id = _iam().resolve_user_id(identifier)
|
||||
return jsonify(_iam().get_user_by_id(user_id))
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/policies", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user_policies(identifier):
|
||||
principal, error = _require_iam_action("iam:get_policy")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
return jsonify({"policies": _iam().get_user_policies(identifier)})
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_create_access_key(identifier):
|
||||
principal, error = _require_iam_action("iam:create_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
result = _iam().create_access_key(identifier)
|
||||
logger.info("Access key created for %s by %s", identifier, principal.access_key)
|
||||
return jsonify(result), 201
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys/<access_key>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_delete_access_key(identifier, access_key):
|
||||
principal, error = _require_iam_action("iam:delete_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().delete_access_key(access_key)
|
||||
logger.info("Access key %s deleted by %s", access_key, principal.access_key)
|
||||
return "", 204
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/disable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_disable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().disable_user(identifier)
|
||||
logger.info("User %s disabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "disabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/enable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_enable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().enable_user(identifier)
|
||||
logger.info("User %s enabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "enabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def list_website_domains():
|
||||
@@ -776,3 +879,106 @@ def delete_website_domain(domain: str):
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
logger.info("Website domain mapping deleted: %s", domain)
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _gc() -> Optional[GarbageCollector]:
|
||||
return current_app.extensions.get("gc")
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
|
||||
return jsonify(gc.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return _json_error("InvalidRequest", "GC is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||
logger.info("GC manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "GC is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = gc.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
def _integrity() -> Optional[IntegrityChecker]:
|
||||
return current_app.extensions.get("integrity")
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
|
||||
return jsonify(checker.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
override_dry_run = payload.get("dry_run")
|
||||
override_auto_heal = payload.get("auto_heal")
|
||||
started = checker.run_async(
|
||||
auto_heal=override_auto_heal if override_auto_heal is not None else None,
|
||||
dry_run=override_dry_run if override_dry_run is not None else None,
|
||||
)
|
||||
logger.info("Integrity manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "A scan is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = checker.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
@@ -75,7 +76,7 @@ def _evaluate_condition_operator(
|
||||
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
||||
return is_null == expected_null
|
||||
|
||||
return True
|
||||
return False
|
||||
|
||||
ACTION_ALIASES = {
|
||||
"s3:listbucket": "list",
|
||||
@@ -268,7 +269,7 @@ class BucketPolicyStore:
|
||||
self._last_mtime = self._current_mtime()
|
||||
# Performance: Avoid stat() on every request
|
||||
self._last_stat_check = 0.0
|
||||
self._stat_check_interval = 1.0 # Only check mtime every 1 second
|
||||
self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
|
||||
|
||||
def maybe_reload(self) -> None:
|
||||
# Performance: Skip stat check if we checked recently
|
||||
|
||||
@@ -25,7 +25,7 @@ def _calculate_auto_connection_limit() -> int:
|
||||
|
||||
|
||||
def _calculate_auto_backlog(connection_limit: int) -> int:
|
||||
return max(64, min(connection_limit * 2, 4096))
|
||||
return max(128, min(connection_limit * 2, 4096))
|
||||
|
||||
|
||||
def _validate_rate_limit(value: str) -> str:
|
||||
@@ -115,6 +115,7 @@ class AppConfig:
|
||||
server_connection_limit: int
|
||||
server_backlog: int
|
||||
server_channel_timeout: int
|
||||
server_max_buffer_size: int
|
||||
server_threads_auto: bool
|
||||
server_connection_limit_auto: bool
|
||||
server_backlog_auto: bool
|
||||
@@ -135,6 +136,7 @@ class AppConfig:
|
||||
site_sync_clock_skew_tolerance_seconds: float
|
||||
object_key_max_length_bytes: int
|
||||
object_cache_max_size: int
|
||||
meta_read_cache_max: int
|
||||
bucket_config_cache_ttl_seconds: float
|
||||
object_tag_limit: int
|
||||
encryption_chunk_size_bytes: int
|
||||
@@ -150,6 +152,19 @@ class AppConfig:
|
||||
allowed_redirect_hosts: list[str]
|
||||
allow_internal_endpoints: bool
|
||||
website_hosting_enabled: bool
|
||||
gc_enabled: bool
|
||||
gc_interval_hours: float
|
||||
gc_temp_file_max_age_hours: float
|
||||
gc_multipart_max_age_days: int
|
||||
gc_lock_file_max_age_hours: float
|
||||
gc_dry_run: bool
|
||||
gc_io_throttle_ms: int
|
||||
integrity_enabled: bool
|
||||
integrity_interval_hours: float
|
||||
integrity_batch_size: int
|
||||
integrity_auto_heal: bool
|
||||
integrity_dry_run: bool
|
||||
integrity_io_throttle_ms: int
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||
@@ -241,7 +256,7 @@ class AppConfig:
|
||||
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 5))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
|
||||
|
||||
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
||||
@@ -282,6 +297,7 @@ class AppConfig:
|
||||
server_backlog_auto = False
|
||||
|
||||
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
||||
server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
|
||||
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
||||
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
||||
@@ -300,6 +316,7 @@ class AppConfig:
|
||||
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
|
||||
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
|
||||
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
|
||||
meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048))
|
||||
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
|
||||
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
|
||||
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
|
||||
@@ -314,11 +331,24 @@ class AppConfig:
|
||||
site_region = str(_get("SITE_REGION", "us-east-1"))
|
||||
site_priority = int(_get("SITE_PRIORITY", 100))
|
||||
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
|
||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1))
|
||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
|
||||
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
|
||||
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
|
||||
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
|
||||
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
|
||||
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
|
||||
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
|
||||
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
|
||||
|
||||
return cls(storage_root=storage_root,
|
||||
max_upload_size=max_upload_size,
|
||||
@@ -372,6 +402,7 @@ class AppConfig:
|
||||
server_connection_limit=server_connection_limit,
|
||||
server_backlog=server_backlog,
|
||||
server_channel_timeout=server_channel_timeout,
|
||||
server_max_buffer_size=server_max_buffer_size,
|
||||
server_threads_auto=server_threads_auto,
|
||||
server_connection_limit_auto=server_connection_limit_auto,
|
||||
server_backlog_auto=server_backlog_auto,
|
||||
@@ -392,6 +423,7 @@ class AppConfig:
|
||||
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
|
||||
object_key_max_length_bytes=object_key_max_length_bytes,
|
||||
object_cache_max_size=object_cache_max_size,
|
||||
meta_read_cache_max=meta_read_cache_max,
|
||||
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
|
||||
object_tag_limit=object_tag_limit,
|
||||
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
|
||||
@@ -406,7 +438,20 @@ class AppConfig:
|
||||
num_trusted_proxies=num_trusted_proxies,
|
||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
||||
allow_internal_endpoints=allow_internal_endpoints,
|
||||
website_hosting_enabled=website_hosting_enabled)
|
||||
website_hosting_enabled=website_hosting_enabled,
|
||||
gc_enabled=gc_enabled,
|
||||
gc_interval_hours=gc_interval_hours,
|
||||
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
|
||||
gc_multipart_max_age_days=gc_multipart_max_age_days,
|
||||
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
|
||||
gc_dry_run=gc_dry_run,
|
||||
gc_io_throttle_ms=gc_io_throttle_ms,
|
||||
integrity_enabled=integrity_enabled,
|
||||
integrity_interval_hours=integrity_interval_hours,
|
||||
integrity_batch_size=integrity_batch_size,
|
||||
integrity_auto_heal=integrity_auto_heal,
|
||||
integrity_dry_run=integrity_dry_run,
|
||||
integrity_io_throttle_ms=integrity_io_throttle_ms)
|
||||
|
||||
def validate_and_report(self) -> list[str]:
|
||||
"""Validate configuration and return a list of warnings/issues.
|
||||
@@ -471,10 +516,12 @@ class AppConfig:
|
||||
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
||||
if not (10 <= self.server_connection_limit <= 1000):
|
||||
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
||||
if not (64 <= self.server_backlog <= 4096):
|
||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
|
||||
if not (128 <= self.server_backlog <= 4096):
|
||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
|
||||
if not (10 <= self.server_channel_timeout <= 300):
|
||||
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
||||
if self.server_max_buffer_size < 1024 * 1024:
|
||||
issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
|
||||
|
||||
if sys.platform != "win32":
|
||||
try:
|
||||
@@ -520,6 +567,7 @@ class AppConfig:
|
||||
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
||||
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
||||
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
||||
print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
|
||||
print("=" * 60)
|
||||
|
||||
issues = self.validate_and_report()
|
||||
@@ -585,6 +633,7 @@ class AppConfig:
|
||||
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
||||
"SERVER_BACKLOG": self.server_backlog,
|
||||
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
||||
"SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
|
||||
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
||||
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
||||
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
||||
@@ -602,6 +651,7 @@ class AppConfig:
|
||||
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
|
||||
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
|
||||
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
|
||||
"META_READ_CACHE_MAX": self.meta_read_cache_max,
|
||||
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
|
||||
"OBJECT_TAG_LIMIT": self.object_tag_limit,
|
||||
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
|
||||
@@ -617,4 +667,17 @@ class AppConfig:
|
||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
||||
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
||||
"GC_ENABLED": self.gc_enabled,
|
||||
"GC_INTERVAL_HOURS": self.gc_interval_hours,
|
||||
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
|
||||
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
|
||||
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
|
||||
"GC_DRY_RUN": self.gc_dry_run,
|
||||
"GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
|
||||
"INTEGRITY_ENABLED": self.integrity_enabled,
|
||||
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
|
||||
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
|
||||
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
|
||||
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
|
||||
"INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
|
||||
}
|
||||
|
||||
@@ -190,6 +190,15 @@ class EncryptedObjectStorage:
|
||||
def list_objects(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects(bucket_name, **kwargs)
|
||||
|
||||
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def iter_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.iter_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def search_objects(self, bucket_name: str, query: str, **kwargs):
|
||||
return self.storage.search_objects(bucket_name, query, **kwargs)
|
||||
|
||||
def list_objects_all(self, bucket_name: str):
|
||||
return self.storage.list_objects_all(bucket_name)
|
||||
|
||||
|
||||
@@ -19,6 +19,17 @@ from cryptography.hazmat.primitives import hashes
|
||||
if sys.platform != "win32":
|
||||
import fcntl
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not all(hasattr(_rc, f) for f in (
|
||||
"encrypt_stream_chunked", "decrypt_stream_chunked",
|
||||
)):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_rc = None
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -338,6 +349,69 @@ class StreamingEncryptor:
|
||||
output.seek(0)
|
||||
return output
|
||||
|
||||
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
|
||||
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||
base_nonce = secrets.token_bytes(12)
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.encrypt_stream_chunked(
|
||||
input_path, output_path, data_key, base_nonce, self.chunk_size
|
||||
)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
out.write(b"\x00\x00\x00\x00")
|
||||
chunk_index = 0
|
||||
while True:
|
||||
chunk = stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||
out.write(encrypted_chunk)
|
||||
chunk_index += 1
|
||||
out.seek(0)
|
||||
out.write(chunk_index.to_bytes(4, "big"))
|
||||
|
||||
return EncryptionMetadata(
|
||||
algorithm="AES256",
|
||||
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||
nonce=base_nonce,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
def decrypt_file(self, input_path: str, output_path: str,
|
||||
metadata: EncryptionMetadata) -> None:
|
||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||
base_nonce = metadata.nonce
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
chunk_count_bytes = stream.read(4)
|
||||
if len(chunk_count_bytes) < 4:
|
||||
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
for chunk_index in range(chunk_count):
|
||||
size_bytes = stream.read(self.HEADER_SIZE)
|
||||
if len(size_bytes) < self.HEADER_SIZE:
|
||||
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||
chunk_size = int.from_bytes(size_bytes, "big")
|
||||
encrypted_chunk = stream.read(chunk_size)
|
||||
if len(encrypted_chunk) < chunk_size:
|
||||
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
try:
|
||||
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||
out.write(decrypted_chunk)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||
|
||||
|
||||
class EncryptionManager:
|
||||
"""Manages encryption providers and operations."""
|
||||
|
||||
@@ -175,13 +175,21 @@ def handle_app_error(error: AppError) -> Response:
|
||||
|
||||
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
||||
g.s3_error_code = "SlowDown"
|
||||
if request.path.startswith("/ui") or request.path.startswith("/buckets"):
|
||||
wants_json = (
|
||||
request.is_json or
|
||||
request.headers.get("X-Requested-With") == "XMLHttpRequest" or
|
||||
"application/json" in request.accept_mimetypes.values()
|
||||
)
|
||||
if wants_json:
|
||||
return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
|
||||
error = Element("Error")
|
||||
SubElement(error, "Code").text = "SlowDown"
|
||||
SubElement(error, "Message").text = "Please reduce your request rate."
|
||||
SubElement(error, "Resource").text = request.path
|
||||
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
||||
xml_bytes = tostring(error, encoding="utf-8")
|
||||
return Response(xml_bytes, status=429, mimetype="application/xml")
|
||||
return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
|
||||
|
||||
|
||||
def register_error_handlers(app):
|
||||
|
||||
596
app/gc.py
Normal file
596
app/gc.py
Normal file
@@ -0,0 +1,596 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCResult:
|
||||
temp_files_deleted: int = 0
|
||||
temp_bytes_freed: int = 0
|
||||
multipart_uploads_deleted: int = 0
|
||||
multipart_bytes_freed: int = 0
|
||||
lock_files_deleted: int = 0
|
||||
orphaned_metadata_deleted: int = 0
|
||||
orphaned_versions_deleted: int = 0
|
||||
orphaned_version_bytes_freed: int = 0
|
||||
empty_dirs_removed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"temp_files_deleted": self.temp_files_deleted,
|
||||
"temp_bytes_freed": self.temp_bytes_freed,
|
||||
"multipart_uploads_deleted": self.multipart_uploads_deleted,
|
||||
"multipart_bytes_freed": self.multipart_bytes_freed,
|
||||
"lock_files_deleted": self.lock_files_deleted,
|
||||
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
|
||||
"orphaned_versions_deleted": self.orphaned_versions_deleted,
|
||||
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
|
||||
"empty_dirs_removed": self.empty_dirs_removed,
|
||||
"errors": self.errors,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_bytes_freed(self) -> int:
|
||||
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
|
||||
|
||||
@property
|
||||
def has_work(self) -> bool:
|
||||
return (
|
||||
self.temp_files_deleted > 0
|
||||
or self.multipart_uploads_deleted > 0
|
||||
or self.lock_files_deleted > 0
|
||||
or self.orphaned_metadata_deleted > 0
|
||||
or self.orphaned_versions_deleted > 0
|
||||
or self.empty_dirs_removed > 0
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> GCExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
)
|
||||
|
||||
|
||||
class GCHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
|
||||
|
||||
def load(self) -> List[GCExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load GC history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[GCExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save GC history: %s", e)
|
||||
|
||||
def add(self, record: GCExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
total = 0
|
||||
try:
|
||||
for f in path.rglob("*"):
|
||||
if f.is_file():
|
||||
try:
|
||||
total += f.stat().st_size
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
return total
|
||||
|
||||
|
||||
def _file_age_hours(path: Path) -> float:
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
return (time.time() - mtime) / 3600.0
|
||||
except OSError:
|
||||
return 0.0
|
||||
|
||||
|
||||
class GarbageCollector:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_TMP_DIR = "tmp"
|
||||
SYSTEM_MULTIPART_DIR = "multipart"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 6.0,
|
||||
temp_file_max_age_hours: float = 24.0,
|
||||
multipart_max_age_days: int = 7,
|
||||
lock_file_max_age_hours: float = 1.0,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.temp_file_max_age_hours = temp_file_max_age_hours
|
||||
self.multipart_max_age_days = multipart_max_age_days
|
||||
self.lock_file_max_age_hours = lock_file_max_age_hours
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.temp_file_max_age_hours,
|
||||
self.multipart_max_age_days,
|
||||
self.lock_file_max_age_hours,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("GC stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("GC cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("GC is already in progress")
|
||||
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
start = self._scan_start_time
|
||||
result = GCResult()
|
||||
|
||||
original_dry_run = self.dry_run
|
||||
self.dry_run = effective_dry_run
|
||||
try:
|
||||
self._clean_temp_files(result)
|
||||
self._clean_orphaned_multipart(result)
|
||||
self._clean_stale_locks(result)
|
||||
self._clean_orphaned_metadata(result)
|
||||
self._clean_orphaned_versions(result)
|
||||
self._clean_empty_dirs(result)
|
||||
finally:
|
||||
self.dry_run = original_dry_run
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_work or result.errors:
|
||||
logger.info(
|
||||
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
|
||||
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.temp_files_deleted,
|
||||
result.temp_bytes_freed / (1024 * 1024),
|
||||
result.multipart_uploads_deleted,
|
||||
result.multipart_bytes_freed / (1024 * 1024),
|
||||
result.lock_files_deleted,
|
||||
result.orphaned_metadata_deleted,
|
||||
result.orphaned_versions_deleted,
|
||||
result.orphaned_version_bytes_freed / (1024 * 1024),
|
||||
result.empty_dirs_removed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = GCExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _clean_temp_files(self, result: GCResult) -> None:
|
||||
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
|
||||
if not tmp_dir.exists():
|
||||
return
|
||||
try:
|
||||
for entry in tmp_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not entry.is_file():
|
||||
continue
|
||||
age = _file_age_hours(entry)
|
||||
if age < self.temp_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
size = entry.stat().st_size
|
||||
if not self.dry_run:
|
||||
entry.unlink()
|
||||
result.temp_files_deleted += 1
|
||||
result.temp_bytes_freed += size
|
||||
except OSError as e:
|
||||
result.errors.append(f"temp file {entry.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan tmp dir: {e}")
|
||||
|
||||
def _clean_orphaned_multipart(self, result: GCResult) -> None:
|
||||
cutoff_hours = self.multipart_max_age_days * 24.0
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
for multipart_root in (
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
|
||||
self.storage_root / bucket_name / ".multipart",
|
||||
):
|
||||
if not multipart_root.exists():
|
||||
continue
|
||||
try:
|
||||
for upload_dir in multipart_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not upload_dir.is_dir():
|
||||
continue
|
||||
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan multipart {bucket_name}: {e}")
|
||||
|
||||
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
|
||||
manifest_path = upload_dir / "manifest.json"
|
||||
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
|
||||
|
||||
if age < cutoff_hours:
|
||||
return
|
||||
|
||||
dir_bytes = _dir_size(upload_dir)
|
||||
try:
|
||||
if not self.dry_run:
|
||||
shutil.rmtree(upload_dir, ignore_errors=True)
|
||||
result.multipart_uploads_deleted += 1
|
||||
result.multipart_bytes_freed += dir_bytes
|
||||
except OSError as e:
|
||||
result.errors.append(f"multipart {upload_dir.name}: {e}")
|
||||
|
||||
def _clean_stale_locks(self, result: GCResult) -> None:
|
||||
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
|
||||
if not buckets_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for bucket_dir in buckets_root.iterdir():
|
||||
if self._shutdown:
|
||||
return
|
||||
if not bucket_dir.is_dir():
|
||||
continue
|
||||
locks_dir = bucket_dir / "locks"
|
||||
if not locks_dir.exists():
|
||||
continue
|
||||
try:
|
||||
for lock_file in locks_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
|
||||
continue
|
||||
age = _file_age_hours(lock_file)
|
||||
if age < self.lock_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
if not self.dry_run:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
result.lock_files_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"lock {lock_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan buckets for locks: {e}")
|
||||
|
||||
def _clean_orphaned_metadata(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
legacy_meta = self.storage_root / bucket_name / ".meta"
|
||||
if legacy_meta.exists():
|
||||
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
|
||||
|
||||
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if new_meta.exists():
|
||||
self._clean_index_metadata(bucket_name, new_meta, result)
|
||||
|
||||
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for meta_file in meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
try:
|
||||
rel = meta_file.relative_to(meta_root)
|
||||
object_key = rel.as_posix().removesuffix(".meta.json")
|
||||
object_path = bucket_path / object_key
|
||||
if not object_path.exists():
|
||||
if not self.dry_run:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
result.orphaned_metadata_deleted += 1
|
||||
except (OSError, ValueError) as e:
|
||||
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
try:
|
||||
with open(index_file, "r", encoding="utf-8") as f:
|
||||
index_data = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
keys_to_remove = []
|
||||
for key in index_data:
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
if rel_dir == Path("."):
|
||||
full_key = key
|
||||
else:
|
||||
full_key = rel_dir.as_posix() + "/" + key
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
keys_to_remove.append(key)
|
||||
|
||||
if keys_to_remove:
|
||||
if not self.dry_run:
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
try:
|
||||
with open(index_file, "w", encoding="utf-8") as f:
|
||||
json.dump(index_data, f)
|
||||
except OSError as e:
|
||||
result.errors.append(f"write index {bucket_name}: {e}")
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
result.orphaned_metadata_deleted += len(keys_to_remove)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan index meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_orphaned_versions(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
for versions_root in (
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
|
||||
self.storage_root / bucket_name / ".versions",
|
||||
):
|
||||
if not versions_root.exists():
|
||||
continue
|
||||
try:
|
||||
for key_dir in versions_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan versions {bucket_name}: {e}")
|
||||
|
||||
def _clean_versions_for_key(
|
||||
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
|
||||
) -> None:
|
||||
try:
|
||||
rel = key_dir.relative_to(versions_root)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
object_path = bucket_path / rel
|
||||
if object_path.exists():
|
||||
return
|
||||
|
||||
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
|
||||
if not version_files:
|
||||
return
|
||||
|
||||
for vf in version_files:
|
||||
try:
|
||||
size = vf.stat().st_size if vf.suffix == ".bin" else 0
|
||||
if not self.dry_run:
|
||||
vf.unlink(missing_ok=True)
|
||||
if vf.suffix == ".bin":
|
||||
result.orphaned_version_bytes_freed += size
|
||||
result.orphaned_versions_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"version file {vf.name}: {e}")
|
||||
|
||||
def _clean_empty_dirs(self, result: GCResult) -> None:
|
||||
targets = [
|
||||
self._system_path() / self.SYSTEM_TMP_DIR,
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR,
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR,
|
||||
]
|
||||
for bucket_name in self._list_bucket_names():
|
||||
targets.append(self.storage_root / bucket_name / ".meta")
|
||||
targets.append(self.storage_root / bucket_name / ".versions")
|
||||
targets.append(self.storage_root / bucket_name / ".multipart")
|
||||
|
||||
for root in targets:
|
||||
if not root.exists():
|
||||
continue
|
||||
self._remove_empty_dirs_recursive(root, root, result)
|
||||
|
||||
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
|
||||
if self._shutdown:
|
||||
return False
|
||||
if not path.is_dir():
|
||||
return False
|
||||
|
||||
try:
|
||||
children = list(path.iterdir())
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
all_empty = True
|
||||
for child in children:
|
||||
if self._throttle():
|
||||
return False
|
||||
if child.is_dir():
|
||||
if not self._remove_empty_dirs_recursive(child, stop_at, result):
|
||||
all_empty = False
|
||||
else:
|
||||
all_empty = False
|
||||
|
||||
if all_empty and path != stop_at:
|
||||
try:
|
||||
if not self.dry_run:
|
||||
path.rmdir()
|
||||
result.empty_dirs_removed += 1
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
return all_empty
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"temp_file_max_age_hours": self.temp_file_max_age_hours,
|
||||
"multipart_max_age_days": self.multipart_max_age_days,
|
||||
"lock_file_max_age_hours": self.lock_file_max_age_hours,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time:
|
||||
status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
|
||||
return status
|
||||
701
app/iam.py
701
app/iam.py
File diff suppressed because it is too large
Load Diff
995
app/integrity.py
Normal file
995
app/integrity.py
Normal file
@@ -0,0 +1,995 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not hasattr(_rc, "md5_file"):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compute_etag(path: Path) -> str:
|
||||
if _HAS_RUST:
|
||||
return _rc.md5_file(str(path))
|
||||
checksum = hashlib.md5()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(8192), b""):
|
||||
checksum.update(chunk)
|
||||
return checksum.hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityIssue:
|
||||
issue_type: str
|
||||
bucket: str
|
||||
key: str
|
||||
detail: str
|
||||
healed: bool = False
|
||||
heal_action: str = ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"issue_type": self.issue_type,
|
||||
"bucket": self.bucket,
|
||||
"key": self.key,
|
||||
"detail": self.detail,
|
||||
"healed": self.healed,
|
||||
"heal_action": self.heal_action,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityResult:
|
||||
corrupted_objects: int = 0
|
||||
orphaned_objects: int = 0
|
||||
phantom_metadata: int = 0
|
||||
stale_versions: int = 0
|
||||
etag_cache_inconsistencies: int = 0
|
||||
legacy_metadata_drifts: int = 0
|
||||
issues_healed: int = 0
|
||||
issues: List[IntegrityIssue] = field(default_factory=list)
|
||||
errors: List[str] = field(default_factory=list)
|
||||
objects_scanned: int = 0
|
||||
buckets_scanned: int = 0
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"corrupted_objects": self.corrupted_objects,
|
||||
"orphaned_objects": self.orphaned_objects,
|
||||
"phantom_metadata": self.phantom_metadata,
|
||||
"stale_versions": self.stale_versions,
|
||||
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
|
||||
"legacy_metadata_drifts": self.legacy_metadata_drifts,
|
||||
"issues_healed": self.issues_healed,
|
||||
"issues": [i.to_dict() for i in self.issues],
|
||||
"errors": self.errors,
|
||||
"objects_scanned": self.objects_scanned,
|
||||
"buckets_scanned": self.buckets_scanned,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_issues(self) -> int:
|
||||
return (
|
||||
self.corrupted_objects
|
||||
+ self.orphaned_objects
|
||||
+ self.phantom_metadata
|
||||
+ self.stale_versions
|
||||
+ self.etag_cache_inconsistencies
|
||||
+ self.legacy_metadata_drifts
|
||||
)
|
||||
|
||||
@property
|
||||
def has_issues(self) -> bool:
|
||||
return self.total_issues > 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
auto_heal: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
"auto_heal": self.auto_heal,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
auto_heal=data.get("auto_heal", False),
|
||||
)
|
||||
|
||||
|
||||
class IntegrityHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
|
||||
|
||||
def load(self) -> List[IntegrityExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load integrity history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[IntegrityExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity history: %s", e)
|
||||
|
||||
def add(self, record: IntegrityExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
class IntegrityCursorStore:
|
||||
def __init__(self, storage_root: Path) -> None:
|
||||
self.storage_root = storage_root
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
|
||||
|
||||
def load(self) -> Dict[str, Any]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return {"buckets": {}}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data.get("buckets"), dict):
|
||||
return {"buckets": {}}
|
||||
return data
|
||||
except (OSError, ValueError, KeyError):
|
||||
return {"buckets": {}}
|
||||
|
||||
def save(self, data: Dict[str, Any]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity cursor: %s", e)
|
||||
|
||||
def update_bucket(
|
||||
self,
|
||||
bucket_name: str,
|
||||
timestamp: float,
|
||||
last_key: Optional[str] = None,
|
||||
completed: bool = False,
|
||||
) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
entry = data["buckets"].get(bucket_name, {})
|
||||
if completed:
|
||||
entry["last_scanned"] = timestamp
|
||||
entry.pop("last_key", None)
|
||||
entry["completed"] = True
|
||||
else:
|
||||
entry["last_scanned"] = timestamp
|
||||
if last_key is not None:
|
||||
entry["last_key"] = last_key
|
||||
entry["completed"] = False
|
||||
data["buckets"][bucket_name] = entry
|
||||
self.save(data)
|
||||
|
||||
def clean_stale(self, existing_buckets: List[str]) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
existing_set = set(existing_buckets)
|
||||
stale_keys = [k for k in data["buckets"] if k not in existing_set]
|
||||
if stale_keys:
|
||||
for k in stale_keys:
|
||||
del data["buckets"][k]
|
||||
self.save(data)
|
||||
|
||||
def get_last_key(self, bucket_name: str) -> Optional[str]:
|
||||
data = self.load()
|
||||
entry = data.get("buckets", {}).get(bucket_name)
|
||||
if entry is None:
|
||||
return None
|
||||
return entry.get("last_key")
|
||||
|
||||
def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
|
||||
data = self.load()
|
||||
buckets_info = data.get("buckets", {})
|
||||
|
||||
incomplete = []
|
||||
complete = []
|
||||
for name in bucket_names:
|
||||
entry = buckets_info.get(name)
|
||||
if entry is None:
|
||||
incomplete.append((name, 0.0))
|
||||
elif entry.get("last_key") is not None:
|
||||
incomplete.append((name, entry.get("last_scanned", 0.0)))
|
||||
else:
|
||||
complete.append((name, entry.get("last_scanned", 0.0)))
|
||||
|
||||
incomplete.sort(key=lambda x: x[1])
|
||||
complete.sort(key=lambda x: x[1])
|
||||
|
||||
return [n for n, _ in incomplete] + [n for n, _ in complete]
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
data = self.load()
|
||||
buckets = data.get("buckets", {})
|
||||
return {
|
||||
"tracked_buckets": len(buckets),
|
||||
"buckets": {
|
||||
name: {
|
||||
"last_scanned": info.get("last_scanned"),
|
||||
"last_key": info.get("last_key"),
|
||||
"completed": info.get("completed", False),
|
||||
}
|
||||
for name, info in buckets.items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
MAX_ISSUES = 500
|
||||
|
||||
|
||||
class IntegrityChecker:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 24.0,
|
||||
batch_size: int = 1000,
|
||||
auto_heal: bool = False,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.batch_size = batch_size
|
||||
self.auto_heal = auto_heal
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
|
||||
self.cursor_store = IntegrityCursorStore(self.storage_root)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.batch_size,
|
||||
self.auto_heal,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("Integrity checker stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("Integrity check cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("Integrity scan is already in progress")
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
start = self._scan_start_time
|
||||
result = IntegrityResult()
|
||||
|
||||
bucket_names = self._list_bucket_names()
|
||||
self.cursor_store.clean_stale(bucket_names)
|
||||
ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
|
||||
|
||||
for bucket_name in ordered_buckets:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.buckets_scanned += 1
|
||||
cursor_key = self.cursor_store.get_last_key(bucket_name)
|
||||
key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None]
|
||||
bucket_exhausted = self._batch_exhausted(result)
|
||||
if bucket_exhausted and returned_keys:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys))
|
||||
else:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), completed=True)
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_issues or result.errors:
|
||||
logger.info(
|
||||
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
|
||||
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.corrupted_objects,
|
||||
result.orphaned_objects,
|
||||
result.phantom_metadata,
|
||||
result.stale_versions,
|
||||
result.etag_cache_inconsistencies,
|
||||
result.legacy_metadata_drifts,
|
||||
result.issues_healed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = IntegrityExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
auto_heal=effective_auto_heal,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _batch_exhausted(self, result: IntegrityResult) -> bool:
|
||||
return self._shutdown or result.objects_scanned >= self.batch_size
|
||||
|
||||
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
|
||||
if len(result.issues) < MAX_ISSUES:
|
||||
result.issues.append(issue)
|
||||
|
||||
def _collect_index_keys(
|
||||
self, meta_root: Path, cursor_key: Optional[str] = None,
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
all_keys: Dict[str, Dict[str, Any]] = {}
|
||||
if not meta_root.exists():
|
||||
return all_keys
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix()
|
||||
if cursor_key is not None and dir_prefix:
|
||||
full_prefix = dir_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
for key_name, entry in index_data.items():
|
||||
full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
all_keys[full_key] = {
|
||||
"entry": entry,
|
||||
"index_file": index_file,
|
||||
"key_name": key_name,
|
||||
}
|
||||
except OSError:
|
||||
pass
|
||||
return all_keys
|
||||
|
||||
def _walk_bucket_files_sorted(
|
||||
self, bucket_path: Path, cursor_key: Optional[str] = None,
|
||||
):
|
||||
def _walk(dir_path: Path, prefix: str):
|
||||
try:
|
||||
entries = list(os.scandir(dir_path))
|
||||
except OSError:
|
||||
return
|
||||
|
||||
def _sort_key(e):
|
||||
if e.is_dir(follow_symlinks=False):
|
||||
return e.name + "/"
|
||||
return e.name
|
||||
|
||||
entries.sort(key=_sort_key)
|
||||
|
||||
for entry in entries:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if not prefix and entry.name in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
new_prefix = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None:
|
||||
full_prefix = new_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
yield from _walk(Path(entry.path), new_prefix)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
full_key = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
yield full_key
|
||||
|
||||
yield from _walk(bucket_path, "")
|
||||
|
||||
def _check_corrupted_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
info = all_keys[full_key]
|
||||
entry = info["entry"]
|
||||
index_file = info["index_file"]
|
||||
key_name = info["key_name"]
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
if not stored_etag:
|
||||
continue
|
||||
|
||||
try:
|
||||
actual_etag = _compute_etag(object_path)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if actual_etag != stored_etag:
|
||||
result.corrupted_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="corrupted_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
stat = object_path.stat()
|
||||
meta["__etag__"] = actual_etag
|
||||
meta["__size__"] = str(stat.st_size)
|
||||
meta["__last_modified__"] = str(stat.st_mtime)
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
index_data = {}
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "updated etag in index"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check corrupted {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_orphaned_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key):
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
has_entry = False
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
has_entry = key_name in index_data
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if not has_entry:
|
||||
result.orphaned_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="orphaned_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="file exists without metadata entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
object_path = bucket_path / full_key
|
||||
etag = _compute_etag(object_path)
|
||||
stat = object_path.stat()
|
||||
meta = {
|
||||
"__etag__": etag,
|
||||
"__size__": str(stat.st_size),
|
||||
"__last_modified__": str(stat.st_mtime),
|
||||
}
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "created metadata entry"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check orphaned {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_phantom_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
heal_by_index: Dict[Path, List[str]] = {}
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
result.phantom_metadata += 1
|
||||
info = all_keys[full_key]
|
||||
issue = IntegrityIssue(
|
||||
issue_type="phantom_metadata",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="metadata entry without file on disk",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
index_file = info["index_file"]
|
||||
heal_by_index.setdefault(index_file, []).append(info["key_name"])
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed stale index entry"
|
||||
result.issues_healed += 1
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if heal_by_index and auto_heal and not dry_run:
|
||||
for index_file, keys_to_remove in heal_by_index.items():
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
else:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal phantom {bucket_name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"check phantom {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_stale_versions(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
|
||||
|
||||
if not versions_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for key_dir in versions_root.rglob("*"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
|
||||
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
|
||||
json_files = {f.stem: f for f in key_dir.glob("*.json")}
|
||||
|
||||
for stem, bin_file in bin_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in json_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
|
||||
detail="version data without manifest",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
bin_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version data"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {bin_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
|
||||
for stem, json_file in json_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in bin_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
|
||||
detail="version manifest without data",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
json_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version manifest"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {json_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check stale versions {bucket_name}: {e}")
|
||||
|
||||
def _check_etag_cache(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
|
||||
|
||||
if not etag_index_path.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if not meta_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return
|
||||
|
||||
found_mismatch = False
|
||||
|
||||
for full_key, cached_etag in etag_cache.items():
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.objects_scanned += 1
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
if not index_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
entry = index_data.get(key_name)
|
||||
if not entry:
|
||||
continue
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
|
||||
if stored_etag and cached_etag != stored_etag:
|
||||
result.etag_cache_inconsistencies += 1
|
||||
found_mismatch = True
|
||||
issue = IntegrityIssue(
|
||||
issue_type="etag_cache_inconsistency",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
|
||||
)
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if found_mismatch and auto_heal and not dry_run:
|
||||
try:
|
||||
etag_index_path.unlink(missing_ok=True)
|
||||
for issue in result.issues:
|
||||
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted etag_index.json"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal etag cache {bucket_name}: {e}")
|
||||
|
||||
def _check_legacy_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
legacy_meta_root = self.storage_root / bucket_name / ".meta"
|
||||
if not legacy_meta_root.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
try:
|
||||
for meta_file in legacy_meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
try:
|
||||
rel = meta_file.relative_to(legacy_meta_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
full_key = rel.as_posix().removesuffix(".meta.json")
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
try:
|
||||
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
index_entry = None
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
index_entry = index_data.get(key_name)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if index_entry is None:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="unmigrated legacy .meta.json",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": legacy_data}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "migrated to index and deleted legacy file"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
else:
|
||||
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
|
||||
if legacy_data != index_meta:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="legacy .meta.json differs from index entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted legacy file (index is authoritative)"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check legacy meta {bucket_name}: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = index_path.with_suffix(".tmp")
|
||||
try:
|
||||
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f)
|
||||
os.replace(str(tmp_path), str(index_path))
|
||||
except BaseException:
|
||||
try:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"batch_size": self.batch_size,
|
||||
"auto_heal": self.auto_heal,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time is not None:
|
||||
status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
|
||||
status["cursor"] = self.cursor_store.get_info()
|
||||
return status
|
||||
@@ -15,29 +15,23 @@ from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from urllib3.util.connection import create_connection as _urllib3_create_connection
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
"""Check if a URL is safe to make requests to (not internal/private).
|
||||
|
||||
Args:
|
||||
url: The URL to check.
|
||||
allow_internal: If True, allows internal/private IP addresses.
|
||||
Use for self-hosted deployments on internal networks.
|
||||
"""
|
||||
def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False
|
||||
return None
|
||||
cloud_metadata_hosts = {
|
||||
"metadata.google.internal",
|
||||
"169.254.169.254",
|
||||
}
|
||||
if hostname.lower() in cloud_metadata_hosts:
|
||||
return False
|
||||
return None
|
||||
if allow_internal:
|
||||
return True
|
||||
return hostname
|
||||
blocked_hosts = {
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
@@ -46,17 +40,46 @@ def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
"[::1]",
|
||||
}
|
||||
if hostname.lower() in blocked_hosts:
|
||||
return False
|
||||
return None
|
||||
try:
|
||||
resolved_ip = socket.gethostbyname(hostname)
|
||||
ip = ipaddress.ip_address(resolved_ip)
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
return False
|
||||
return None
|
||||
return resolved_ip
|
||||
except (socket.gaierror, ValueError):
|
||||
return False
|
||||
return True
|
||||
return None
|
||||
except Exception:
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
return _resolve_and_check_url(url, allow_internal) is not None
|
||||
|
||||
|
||||
_dns_pin_lock = threading.Lock()
|
||||
|
||||
|
||||
def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname or ""
|
||||
session = requests.Session()
|
||||
original_create = _urllib3_create_connection
|
||||
|
||||
def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any:
|
||||
host, req_port = address
|
||||
if host == hostname:
|
||||
return original_create((pinned_ip, req_port), *args, **kw)
|
||||
return original_create(address, *args, **kw)
|
||||
|
||||
import urllib3.util.connection as _conn_mod
|
||||
with _dns_pin_lock:
|
||||
_conn_mod.create_connection = _create_pinned
|
||||
try:
|
||||
return session.post(url, **kwargs)
|
||||
finally:
|
||||
_conn_mod.create_connection = original_create
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -344,16 +367,18 @@ class NotificationService:
|
||||
self._queue.task_done()
|
||||
|
||||
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
||||
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
|
||||
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
|
||||
resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints)
|
||||
if not resolved_ip:
|
||||
raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}")
|
||||
payload = event.to_s3_event()
|
||||
headers = {"Content-Type": "application/json", **destination.headers}
|
||||
|
||||
last_error = None
|
||||
for attempt in range(destination.retry_count):
|
||||
try:
|
||||
response = requests.post(
|
||||
response = _pinned_post(
|
||||
destination.url,
|
||||
resolved_ip,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=destination.timeout_seconds,
|
||||
|
||||
@@ -5,6 +5,7 @@ import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -138,8 +139,8 @@ class OperationMetricsCollector:
|
||||
self.interval_seconds = interval_minutes * 60
|
||||
self.retention_hours = retention_hours
|
||||
self._lock = threading.Lock()
|
||||
self._by_method: Dict[str, OperationStats] = {}
|
||||
self._by_endpoint: Dict[str, OperationStats] = {}
|
||||
self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_status_class: Dict[str, int] = {}
|
||||
self._error_codes: Dict[str, int] = {}
|
||||
self._totals = OperationStats()
|
||||
@@ -211,8 +212,8 @@ class OperationMetricsCollector:
|
||||
self._prune_old_snapshots()
|
||||
self._save_history()
|
||||
|
||||
self._by_method.clear()
|
||||
self._by_endpoint.clear()
|
||||
self._by_method = defaultdict(OperationStats)
|
||||
self._by_endpoint = defaultdict(OperationStats)
|
||||
self._by_status_class.clear()
|
||||
self._error_codes.clear()
|
||||
self._totals = OperationStats()
|
||||
@@ -232,12 +233,7 @@ class OperationMetricsCollector:
|
||||
status_class = f"{status_code // 100}xx"
|
||||
|
||||
with self._lock:
|
||||
if method not in self._by_method:
|
||||
self._by_method[method] = OperationStats()
|
||||
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
if endpoint_type not in self._by_endpoint:
|
||||
self._by_endpoint[endpoint_type] = OperationStats()
|
||||
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
||||
|
||||
775
app/s3_api.py
775
app/s3_api.py
File diff suppressed because it is too large
Load Diff
@@ -245,6 +245,7 @@ def stream_objects_ndjson(
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
delimiter: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
meta_line = json.dumps({
|
||||
"type": "meta",
|
||||
@@ -258,11 +259,20 @@ def stream_objects_ndjson(
|
||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||
if prefix:
|
||||
kwargs["Prefix"] = prefix
|
||||
if delimiter:
|
||||
kwargs["Delimiter"] = delimiter
|
||||
|
||||
running_count = 0
|
||||
try:
|
||||
paginator = client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(**kwargs):
|
||||
for obj in page.get("Contents", []):
|
||||
for cp in page.get("CommonPrefixes", []):
|
||||
yield json.dumps({
|
||||
"type": "folder",
|
||||
"prefix": cp["Prefix"],
|
||||
}) + "\n"
|
||||
page_contents = page.get("Contents", [])
|
||||
for obj in page_contents:
|
||||
last_mod = obj["LastModified"]
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
@@ -273,6 +283,8 @@ def stream_objects_ndjson(
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
}) + "\n"
|
||||
running_count += len(page_contents)
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
except ClientError as exc:
|
||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||
|
||||
1087
app/storage.py
1087
app/storage.py
File diff suppressed because it is too large
Load Diff
438
app/ui.py
438
app/ui.py
@@ -508,11 +508,15 @@ def bucket_detail(bucket_name: str):
|
||||
can_manage_quota = is_replication_admin
|
||||
|
||||
website_config = None
|
||||
website_domains = []
|
||||
if website_hosting_enabled:
|
||||
try:
|
||||
website_config = storage.get_bucket_website(bucket_name)
|
||||
except StorageError:
|
||||
website_config = None
|
||||
domain_store = current_app.extensions.get("website_domains")
|
||||
if domain_store:
|
||||
website_domains = domain_store.get_domains_for_bucket(bucket_name)
|
||||
|
||||
objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
|
||||
objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name)
|
||||
@@ -558,6 +562,7 @@ def bucket_detail(bucket_name: str):
|
||||
site_sync_enabled=site_sync_enabled,
|
||||
website_hosting_enabled=website_hosting_enabled,
|
||||
website_config=website_config,
|
||||
website_domains=website_domains,
|
||||
can_manage_website=can_edit_policy,
|
||||
)
|
||||
|
||||
@@ -611,20 +616,79 @@ def stream_bucket_objects(bucket_name: str):
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
prefix = request.args.get("prefix") or None
|
||||
delimiter = request.args.get("delimiter") or None
|
||||
|
||||
storage = _storage()
|
||||
try:
|
||||
client = get_session_s3_client()
|
||||
except (PermissionError, RuntimeError) as exc:
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
versioning_enabled = get_versioning_via_s3(client, bucket_name)
|
||||
versioning_enabled = storage.is_versioning_enabled(bucket_name)
|
||||
except StorageError:
|
||||
versioning_enabled = False
|
||||
url_templates = build_url_templates(bucket_name)
|
||||
display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
|
||||
|
||||
def generate():
|
||||
yield json.dumps({
|
||||
"type": "meta",
|
||||
"versioning_enabled": versioning_enabled,
|
||||
"url_templates": url_templates,
|
||||
}) + "\n"
|
||||
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
||||
|
||||
running_count = 0
|
||||
try:
|
||||
if delimiter:
|
||||
for item_type, item in storage.iter_objects_shallow(
|
||||
bucket_name, prefix=prefix or "", delimiter=delimiter,
|
||||
):
|
||||
if item_type == "folder":
|
||||
yield json.dumps({"type": "folder", "prefix": item}) + "\n"
|
||||
else:
|
||||
last_mod = item.last_modified
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
"key": item.key,
|
||||
"size": item.size,
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": _format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
|
||||
"etag": item.etag or "",
|
||||
}) + "\n"
|
||||
running_count += 1
|
||||
if running_count % 1000 == 0:
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
else:
|
||||
continuation_token = None
|
||||
while True:
|
||||
result = storage.list_objects(
|
||||
bucket_name,
|
||||
max_keys=1000,
|
||||
continuation_token=continuation_token,
|
||||
prefix=prefix,
|
||||
)
|
||||
for obj in result.objects:
|
||||
last_mod = obj.last_modified
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
"key": obj.key,
|
||||
"size": obj.size,
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": _format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.etag or "",
|
||||
}) + "\n"
|
||||
running_count += len(result.objects)
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
if not result.is_truncated:
|
||||
break
|
||||
continuation_token = result.next_continuation_token
|
||||
except StorageError as exc:
|
||||
yield json.dumps({"type": "error", "error": str(exc)}) + "\n"
|
||||
return
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
yield json.dumps({"type": "done"}) + "\n"
|
||||
|
||||
return Response(
|
||||
stream_objects_ndjson(
|
||||
client, bucket_name, prefix, url_templates, display_tz, versioning_enabled,
|
||||
),
|
||||
generate(),
|
||||
mimetype='application/x-ndjson',
|
||||
headers={
|
||||
'Cache-Control': 'no-cache',
|
||||
@@ -634,6 +698,33 @@ def stream_bucket_objects(bucket_name: str):
|
||||
)
|
||||
|
||||
|
||||
@ui_bp.get("/buckets/<bucket_name>/objects/search")
|
||||
@limiter.limit("30 per minute")
|
||||
def search_bucket_objects(bucket_name: str):
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "list")
|
||||
except IamError as exc:
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
query = request.args.get("q", "").strip()
|
||||
if not query:
|
||||
return jsonify({"results": [], "truncated": False})
|
||||
|
||||
try:
|
||||
limit = max(1, min(int(request.args.get("limit", 500)), 1000))
|
||||
except (ValueError, TypeError):
|
||||
limit = 500
|
||||
|
||||
prefix = request.args.get("prefix", "").strip()
|
||||
|
||||
storage = _storage()
|
||||
try:
|
||||
return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit))
|
||||
except StorageError as exc:
|
||||
return jsonify({"error": str(exc)}), 404
|
||||
|
||||
|
||||
@ui_bp.post("/buckets/<bucket_name>/upload")
|
||||
@limiter.limit("30 per minute")
|
||||
def upload_object(bucket_name: str):
|
||||
@@ -738,7 +829,6 @@ def initiate_multipart_upload(bucket_name: str):
|
||||
|
||||
|
||||
@ui_bp.put("/buckets/<bucket_name>/multipart/<upload_id>/parts")
|
||||
@limiter.exempt
|
||||
@csrf.exempt
|
||||
def upload_multipart_part(bucket_name: str, upload_id: str):
|
||||
principal = _current_principal()
|
||||
@@ -973,6 +1063,27 @@ def bulk_delete_objects(bucket_name: str):
|
||||
return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400)
|
||||
|
||||
unique_keys = list(dict.fromkeys(cleaned))
|
||||
|
||||
folder_prefixes = [k for k in unique_keys if k.endswith("/")]
|
||||
if folder_prefixes:
|
||||
try:
|
||||
client = get_session_s3_client()
|
||||
for prefix in folder_prefixes:
|
||||
unique_keys.remove(prefix)
|
||||
paginator = client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
|
||||
for obj in page.get("Contents", []):
|
||||
if obj["Key"] not in unique_keys:
|
||||
unique_keys.append(obj["Key"])
|
||||
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||
if isinstance(exc, ClientError):
|
||||
err, status = handle_client_error(exc)
|
||||
return _respond(False, err["error"], status_code=status)
|
||||
return _respond(False, "S3 API server is unreachable", status_code=502)
|
||||
|
||||
if not unique_keys:
|
||||
return _respond(False, "No objects found under the selected folders", status_code=400)
|
||||
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "delete")
|
||||
except IamError as exc:
|
||||
@@ -1003,13 +1114,17 @@ def bulk_delete_objects(bucket_name: str):
|
||||
else:
|
||||
try:
|
||||
client = get_session_s3_client()
|
||||
objects_to_delete = [{"Key": k} for k in unique_keys]
|
||||
resp = client.delete_objects(
|
||||
Bucket=bucket_name,
|
||||
Delete={"Objects": objects_to_delete, "Quiet": False},
|
||||
)
|
||||
deleted = [d["Key"] for d in resp.get("Deleted", [])]
|
||||
errors = [{"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", [])]
|
||||
deleted = []
|
||||
errors = []
|
||||
for i in range(0, len(unique_keys), 1000):
|
||||
batch = unique_keys[i:i + 1000]
|
||||
objects_to_delete = [{"Key": k} for k in batch]
|
||||
resp = client.delete_objects(
|
||||
Bucket=bucket_name,
|
||||
Delete={"Objects": objects_to_delete, "Quiet": False},
|
||||
)
|
||||
deleted.extend(d["Key"] for d in resp.get("Deleted", []))
|
||||
errors.extend({"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", []))
|
||||
for key in deleted:
|
||||
_replication_manager().trigger_replication(bucket_name, key, action="delete")
|
||||
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||
@@ -1297,12 +1412,14 @@ def object_versions(bucket_name: str, object_key: str):
|
||||
for v in resp.get("Versions", []):
|
||||
if v.get("Key") != object_key:
|
||||
continue
|
||||
if v.get("IsLatest", False):
|
||||
continue
|
||||
versions.append({
|
||||
"version_id": v.get("VersionId", ""),
|
||||
"last_modified": v["LastModified"].isoformat() if v.get("LastModified") else None,
|
||||
"size": v.get("Size", 0),
|
||||
"etag": v.get("ETag", "").strip('"'),
|
||||
"is_latest": v.get("IsLatest", False),
|
||||
"is_latest": False,
|
||||
})
|
||||
return jsonify({"versions": versions})
|
||||
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||
@@ -1719,6 +1836,10 @@ def iam_dashboard():
|
||||
users = iam_service.list_users() if not locked else []
|
||||
config_summary = iam_service.config_summary()
|
||||
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
|
||||
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
|
||||
_now = _dt.now(_tz.utc)
|
||||
now_iso = _now.isoformat()
|
||||
soon_iso = (_now + _td(days=7)).isoformat()
|
||||
return render_template(
|
||||
"iam.html",
|
||||
users=users,
|
||||
@@ -1728,6 +1849,8 @@ def iam_dashboard():
|
||||
config_summary=config_summary,
|
||||
config_document=config_document,
|
||||
disclosed_secret=disclosed_secret,
|
||||
now_iso=now_iso,
|
||||
soon_iso=soon_iso,
|
||||
)
|
||||
|
||||
|
||||
@@ -1747,6 +1870,8 @@ def create_iam_user():
|
||||
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
|
||||
flash("Display name must be 64 characters or fewer", "danger")
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
custom_access_key = request.form.get("access_key", "").strip() or None
|
||||
custom_secret_key = request.form.get("secret_key", "").strip() or None
|
||||
policies_text = request.form.get("policies", "").strip()
|
||||
policies = None
|
||||
if policies_text:
|
||||
@@ -1757,8 +1882,21 @@ def create_iam_user():
|
||||
return jsonify({"error": f"Invalid JSON: {exc}"}), 400
|
||||
flash(f"Invalid JSON: {exc}", "danger")
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
expires_at = request.form.get("expires_at", "").strip() or None
|
||||
if expires_at:
|
||||
try:
|
||||
from datetime import datetime as _dt, timezone as _tz
|
||||
exp_dt = _dt.fromisoformat(expires_at)
|
||||
if exp_dt.tzinfo is None:
|
||||
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
|
||||
expires_at = exp_dt.isoformat()
|
||||
except (ValueError, TypeError):
|
||||
if _wants_json():
|
||||
return jsonify({"error": "Invalid expiry date format"}), 400
|
||||
flash("Invalid expiry date format", "danger")
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
try:
|
||||
created = _iam().create_user(display_name=display_name, policies=policies)
|
||||
created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
|
||||
except IamError as exc:
|
||||
if _wants_json():
|
||||
return jsonify({"error": str(exc)}), 400
|
||||
@@ -1932,6 +2070,45 @@ def update_iam_policies(access_key: str):
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
|
||||
|
||||
@ui_bp.post("/iam/users/<access_key>/expiry")
|
||||
def update_iam_expiry(access_key: str):
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:update_policy")
|
||||
except IamError as exc:
|
||||
if _wants_json():
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
flash(str(exc), "danger")
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
|
||||
expires_at = request.form.get("expires_at", "").strip() or None
|
||||
if expires_at:
|
||||
try:
|
||||
from datetime import datetime as _dt, timezone as _tz
|
||||
exp_dt = _dt.fromisoformat(expires_at)
|
||||
if exp_dt.tzinfo is None:
|
||||
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
|
||||
expires_at = exp_dt.isoformat()
|
||||
except (ValueError, TypeError):
|
||||
if _wants_json():
|
||||
return jsonify({"error": "Invalid expiry date format"}), 400
|
||||
flash("Invalid expiry date format", "danger")
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
|
||||
try:
|
||||
_iam().update_user_expiry(access_key, expires_at)
|
||||
if _wants_json():
|
||||
return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
|
||||
label = expires_at if expires_at else "never"
|
||||
flash(f"Expiry for {access_key} set to {label}", "success")
|
||||
except IamError as exc:
|
||||
if _wants_json():
|
||||
return jsonify({"error": str(exc)}), 400
|
||||
flash(str(exc), "danger")
|
||||
|
||||
return redirect(url_for("ui.iam_dashboard"))
|
||||
|
||||
|
||||
@ui_bp.post("/connections")
|
||||
def create_connection():
|
||||
principal = _current_principal()
|
||||
@@ -2374,7 +2551,10 @@ def website_domains_dashboard():
|
||||
store = current_app.extensions.get("website_domains")
|
||||
mappings = store.list_all() if store else []
|
||||
storage = _storage()
|
||||
buckets = [b.name for b in storage.list_buckets()]
|
||||
buckets = [
|
||||
b.name for b in storage.list_buckets()
|
||||
if storage.get_bucket_website(b.name)
|
||||
]
|
||||
return render_template(
|
||||
"website_domains.html",
|
||||
mappings=mappings,
|
||||
@@ -3293,9 +3473,12 @@ def sites_dashboard():
|
||||
@ui_bp.post("/sites/local")
|
||||
def update_local_site():
|
||||
principal = _current_principal()
|
||||
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
flash("Access denied", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3306,6 +3489,8 @@ def update_local_site():
|
||||
display_name = request.form.get("display_name", "").strip()
|
||||
|
||||
if not site_id:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Site ID is required"}), 400
|
||||
flash("Site ID is required", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3327,6 +3512,8 @@ def update_local_site():
|
||||
)
|
||||
registry.set_local_site(site)
|
||||
|
||||
if wants_json:
|
||||
return jsonify({"message": "Local site configuration updated"})
|
||||
flash("Local site configuration updated", "success")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3334,9 +3521,12 @@ def update_local_site():
|
||||
@ui_bp.post("/sites/peers")
|
||||
def add_peer_site():
|
||||
principal = _current_principal()
|
||||
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
flash("Access denied", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3348,9 +3538,13 @@ def add_peer_site():
|
||||
connection_id = request.form.get("connection_id", "").strip() or None
|
||||
|
||||
if not site_id:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Site ID is required"}), 400
|
||||
flash("Site ID is required", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
if not endpoint:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Endpoint is required"}), 400
|
||||
flash("Endpoint is required", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3362,10 +3556,14 @@ def add_peer_site():
|
||||
registry = _site_registry()
|
||||
|
||||
if registry.get_peer(site_id):
|
||||
if wants_json:
|
||||
return jsonify({"error": f"Peer site '{site_id}' already exists"}), 409
|
||||
flash(f"Peer site '{site_id}' already exists", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
if connection_id and not _connections().get(connection_id):
|
||||
if wants_json:
|
||||
return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
|
||||
flash(f"Connection '{connection_id}' not found", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3379,6 +3577,11 @@ def add_peer_site():
|
||||
)
|
||||
registry.add_peer(peer)
|
||||
|
||||
if wants_json:
|
||||
redirect_url = None
|
||||
if connection_id:
|
||||
redirect_url = url_for("ui.replication_wizard", site_id=site_id)
|
||||
return jsonify({"message": f"Peer site '{site_id}' added", "redirect": redirect_url})
|
||||
flash(f"Peer site '{site_id}' added", "success")
|
||||
|
||||
if connection_id:
|
||||
@@ -3389,9 +3592,12 @@ def add_peer_site():
|
||||
@ui_bp.post("/sites/peers/<site_id>/update")
|
||||
def update_peer_site(site_id: str):
|
||||
principal = _current_principal()
|
||||
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
flash("Access denied", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3399,6 +3605,8 @@ def update_peer_site(site_id: str):
|
||||
existing = registry.get_peer(site_id)
|
||||
|
||||
if not existing:
|
||||
if wants_json:
|
||||
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
|
||||
flash(f"Peer site '{site_id}' not found", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3406,7 +3614,10 @@ def update_peer_site(site_id: str):
|
||||
region = request.form.get("region", existing.region).strip()
|
||||
priority = request.form.get("priority", str(existing.priority))
|
||||
display_name = request.form.get("display_name", existing.display_name).strip()
|
||||
connection_id = request.form.get("connection_id", "").strip() or existing.connection_id
|
||||
if "connection_id" in request.form:
|
||||
connection_id = request.form["connection_id"].strip() or None
|
||||
else:
|
||||
connection_id = existing.connection_id
|
||||
|
||||
try:
|
||||
priority_int = int(priority)
|
||||
@@ -3414,6 +3625,8 @@ def update_peer_site(site_id: str):
|
||||
priority_int = existing.priority
|
||||
|
||||
if connection_id and not _connections().get(connection_id):
|
||||
if wants_json:
|
||||
return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
|
||||
flash(f"Connection '{connection_id}' not found", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3430,6 +3643,8 @@ def update_peer_site(site_id: str):
|
||||
)
|
||||
registry.update_peer(peer)
|
||||
|
||||
if wants_json:
|
||||
return jsonify({"message": f"Peer site '{site_id}' updated"})
|
||||
flash(f"Peer site '{site_id}' updated", "success")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
@@ -3437,16 +3652,23 @@ def update_peer_site(site_id: str):
|
||||
@ui_bp.post("/sites/peers/<site_id>/delete")
|
||||
def delete_peer_site(site_id: str):
|
||||
principal = _current_principal()
|
||||
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
if wants_json:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
flash("Access denied", "danger")
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
|
||||
registry = _site_registry()
|
||||
if registry.delete_peer(site_id):
|
||||
if wants_json:
|
||||
return jsonify({"message": f"Peer site '{site_id}' deleted"})
|
||||
flash(f"Peer site '{site_id}' deleted", "success")
|
||||
else:
|
||||
if wants_json:
|
||||
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
|
||||
flash(f"Peer site '{site_id}' not found", "danger")
|
||||
|
||||
return redirect(url_for("ui.sites_dashboard"))
|
||||
@@ -3901,6 +4123,182 @@ def get_peer_sync_stats(site_id: str):
|
||||
return jsonify(stats)
|
||||
|
||||
|
||||
@ui_bp.get("/system")
|
||||
def system_dashboard():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
flash("Access denied: System page requires admin permissions", "danger")
|
||||
return redirect(url_for("ui.buckets_overview"))
|
||||
|
||||
import platform as _platform
|
||||
import sys
|
||||
from app.version import APP_VERSION
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
has_rust = True
|
||||
except ImportError:
|
||||
has_rust = False
|
||||
|
||||
gc = current_app.extensions.get("gc")
|
||||
gc_status = gc.get_status() if gc else {"enabled": False}
|
||||
gc_history_records = []
|
||||
if gc:
|
||||
raw = gc.get_history(limit=10, offset=0)
|
||||
for rec in raw:
|
||||
r = rec.get("result", {})
|
||||
total_freed = r.get("temp_bytes_freed", 0) + r.get("multipart_bytes_freed", 0) + r.get("orphaned_version_bytes_freed", 0)
|
||||
rec["bytes_freed_display"] = _format_bytes(total_freed)
|
||||
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
|
||||
gc_history_records.append(rec)
|
||||
|
||||
checker = current_app.extensions.get("integrity")
|
||||
integrity_status = checker.get_status() if checker else {"enabled": False}
|
||||
integrity_history_records = []
|
||||
if checker:
|
||||
raw = checker.get_history(limit=10, offset=0)
|
||||
for rec in raw:
|
||||
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
|
||||
integrity_history_records.append(rec)
|
||||
|
||||
features = [
|
||||
{"label": "Encryption (SSE-S3)", "enabled": current_app.config.get("ENCRYPTION_ENABLED", False)},
|
||||
{"label": "KMS", "enabled": current_app.config.get("KMS_ENABLED", False)},
|
||||
{"label": "Versioning Lifecycle", "enabled": current_app.config.get("LIFECYCLE_ENABLED", False)},
|
||||
{"label": "Metrics History", "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False)},
|
||||
{"label": "Operation Metrics", "enabled": current_app.config.get("OPERATION_METRICS_ENABLED", False)},
|
||||
{"label": "Site Sync", "enabled": current_app.config.get("SITE_SYNC_ENABLED", False)},
|
||||
{"label": "Website Hosting", "enabled": current_app.config.get("WEBSITE_HOSTING_ENABLED", False)},
|
||||
{"label": "Garbage Collection", "enabled": current_app.config.get("GC_ENABLED", False)},
|
||||
{"label": "Integrity Scanner", "enabled": current_app.config.get("INTEGRITY_ENABLED", False)},
|
||||
]
|
||||
|
||||
return render_template(
|
||||
"system.html",
|
||||
principal=principal,
|
||||
app_version=APP_VERSION,
|
||||
storage_root=current_app.config.get("STORAGE_ROOT", "./data"),
|
||||
platform=_platform.platform(),
|
||||
python_version=sys.version.split()[0],
|
||||
has_rust=has_rust,
|
||||
features=features,
|
||||
gc_status=gc_status,
|
||||
gc_history=gc_history_records,
|
||||
integrity_status=integrity_status,
|
||||
integrity_history=integrity_history_records,
|
||||
display_timezone=current_app.config.get("DISPLAY_TIMEZONE", "UTC"),
|
||||
)
|
||||
|
||||
|
||||
@ui_bp.post("/system/gc/run")
|
||||
def system_gc_run():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
gc = current_app.extensions.get("gc")
|
||||
if not gc:
|
||||
return jsonify({"error": "GC is not enabled"}), 400
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||
if not started:
|
||||
return jsonify({"error": "GC is already in progress"}), 409
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@ui_bp.get("/system/gc/status")
|
||||
def system_gc_status():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
gc = current_app.extensions.get("gc")
|
||||
if not gc:
|
||||
return jsonify({"error": "GC is not enabled"}), 400
|
||||
|
||||
return jsonify(gc.get_status())
|
||||
|
||||
|
||||
@ui_bp.get("/system/gc/history")
|
||||
def system_gc_history():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
gc = current_app.extensions.get("gc")
|
||||
if not gc:
|
||||
return jsonify({"executions": []})
|
||||
|
||||
limit = min(int(request.args.get("limit", 10)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = gc.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
@ui_bp.post("/system/integrity/run")
|
||||
def system_integrity_run():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
checker = current_app.extensions.get("integrity")
|
||||
if not checker:
|
||||
return jsonify({"error": "Integrity checker is not enabled"}), 400
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
started = checker.run_async(
|
||||
auto_heal=payload.get("auto_heal"),
|
||||
dry_run=payload.get("dry_run"),
|
||||
)
|
||||
if not started:
|
||||
return jsonify({"error": "A scan is already in progress"}), 409
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@ui_bp.get("/system/integrity/status")
|
||||
def system_integrity_status():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
checker = current_app.extensions.get("integrity")
|
||||
if not checker:
|
||||
return jsonify({"error": "Integrity checker is not enabled"}), 400
|
||||
|
||||
return jsonify(checker.get_status())
|
||||
|
||||
|
||||
@ui_bp.get("/system/integrity/history")
|
||||
def system_integrity_history():
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
checker = current_app.extensions.get("integrity")
|
||||
if not checker:
|
||||
return jsonify({"executions": []})
|
||||
|
||||
limit = min(int(request.args.get("limit", 10)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = checker.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
@ui_bp.app_errorhandler(404)
|
||||
def ui_not_found(error): # type: ignore[override]
|
||||
prefix = ui_bp.url_prefix or ""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
APP_VERSION = "0.3.0"
|
||||
APP_VERSION = "0.4.2"
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
|
||||
@@ -35,13 +35,16 @@ class WebsiteDomainStore:
|
||||
self.config_path = config_path
|
||||
self._lock = threading.Lock()
|
||||
self._domains: Dict[str, str] = {}
|
||||
self._last_mtime: float = 0.0
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
return
|
||||
try:
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
@@ -51,19 +54,45 @@ class WebsiteDomainStore:
|
||||
except (OSError, json.JSONDecodeError):
|
||||
self._domains = {}
|
||||
|
||||
def _maybe_reload(self) -> None:
|
||||
try:
|
||||
if self.config_path.exists():
|
||||
mtime = self.config_path.stat().st_mtime
|
||||
if mtime != self._last_mtime:
|
||||
self._last_mtime = mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
self._domains = {k.lower(): v for k, v in data.items()}
|
||||
else:
|
||||
self._domains = {}
|
||||
elif self._domains:
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
def _save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(self._domains, f, indent=2)
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
|
||||
def list_all(self) -> List[Dict[str, str]]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
||||
|
||||
def get_bucket(self, domain: str) -> Optional[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return self._domains.get(domain.lower())
|
||||
|
||||
def get_domains_for_bucket(self, bucket: str) -> List[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [d for d, b in self._domains.items() if b == bucket]
|
||||
|
||||
def set_mapping(self, domain: str, bucket: str) -> None:
|
||||
with self._lock:
|
||||
self._domains[domain.lower()] = bucket
|
||||
|
||||
300
docs.md
300
docs.md
@@ -139,18 +139,21 @@ All configuration is done via environment variables. The table below lists every
|
||||
| `API_BASE_URL` | `http://127.0.0.1:5000` | Internal S3 API URL used by the web UI proxy. Also used for presigned URL generation. Set to your public URL if running behind a reverse proxy. |
|
||||
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
|
||||
| `AWS_SERVICE` | `s3` | Service string for SigV4. |
|
||||
| `DISPLAY_TIMEZONE` | `UTC` | Timezone for timestamps in the web UI (e.g., `US/Eastern`, `Asia/Tokyo`). |
|
||||
|
||||
### IAM & Security
|
||||
|
||||
| Variable | Default | Notes |
|
||||
| --- | --- | --- |
|
||||
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. |
|
||||
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. Encrypted at rest when `SECRET_KEY` is set. |
|
||||
| `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). |
|
||||
| `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. |
|
||||
| `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. |
|
||||
| `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. |
|
||||
| `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). |
|
||||
| `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. |
|
||||
| `ADMIN_ACCESS_KEY` | (none) | Custom access key for the admin user on first run or credential reset. If unset, a random key is generated. |
|
||||
| `ADMIN_SECRET_KEY` | (none) | Custom secret key for the admin user on first run or credential reset. If unset, a random key is generated. |
|
||||
|
||||
### CORS (Cross-Origin Resource Sharing)
|
||||
|
||||
@@ -170,15 +173,16 @@ All configuration is done via environment variables. The table below lists every
|
||||
| `RATE_LIMIT_BUCKET_OPS` | `120 per minute` | Rate limit for bucket operations (PUT/DELETE/GET/POST on `/<bucket>`). |
|
||||
| `RATE_LIMIT_OBJECT_OPS` | `240 per minute` | Rate limit for object operations (PUT/GET/DELETE/POST on `/<bucket>/<key>`). |
|
||||
| `RATE_LIMIT_HEAD_OPS` | `100 per minute` | Rate limit for HEAD requests (bucket and object). |
|
||||
| `RATE_LIMIT_ADMIN` | `60 per minute` | Rate limit for admin API endpoints (`/admin/*`). |
|
||||
| `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. |
|
||||
|
||||
### Server Configuration
|
||||
|
||||
| Variable | Default | Notes |
|
||||
| --- | --- | --- |
|
||||
| `SERVER_THREADS` | `0` (auto) | Waitress worker threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
|
||||
| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent connections (10-1000). Set to `0` for auto-calculation based on available RAM. |
|
||||
| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (64-4096). Set to `0` for auto-calculation (connection_limit × 2). |
|
||||
| `SERVER_THREADS` | `0` (auto) | Granian blocking threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
|
||||
| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent requests per worker (10-1000). Set to `0` for auto-calculation based on available RAM. |
|
||||
| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (128-4096). Set to `0` for auto-calculation (connection_limit × 2). |
|
||||
| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |
|
||||
|
||||
### Logging
|
||||
@@ -248,6 +252,60 @@ Once enabled, configure lifecycle rules via:
|
||||
</LifecycleConfiguration>
|
||||
```
|
||||
|
||||
## Garbage Collection
|
||||
|
||||
The garbage collector (GC) automatically cleans up orphaned data that accumulates over time: stale temporary files from failed uploads, abandoned multipart uploads, stale lock files, orphaned metadata entries, orphaned version files, and empty directories.
|
||||
|
||||
### Enabling GC
|
||||
|
||||
By default, GC is disabled. Enable it by setting:
|
||||
|
||||
```bash
|
||||
GC_ENABLED=true python run.py
|
||||
```
|
||||
|
||||
Or in your `myfsio.env` file:
|
||||
```
|
||||
GC_ENABLED=true
|
||||
GC_INTERVAL_HOURS=6 # Run every 6 hours (default)
|
||||
GC_TEMP_FILE_MAX_AGE_HOURS=24 # Delete temp files older than 24h
|
||||
GC_MULTIPART_MAX_AGE_DAYS=7 # Delete orphaned multipart uploads older than 7 days
|
||||
GC_LOCK_FILE_MAX_AGE_HOURS=1 # Delete stale lock files older than 1h
|
||||
GC_DRY_RUN=false # Set to true to log without deleting
|
||||
```
|
||||
|
||||
### What Gets Cleaned
|
||||
|
||||
| Type | Location | Condition |
|
||||
|------|----------|-----------|
|
||||
| **Temp files** | `.myfsio.sys/tmp/` | Older than `GC_TEMP_FILE_MAX_AGE_HOURS` |
|
||||
| **Orphaned multipart uploads** | `.myfsio.sys/multipart/` and `<bucket>/.multipart/` | Older than `GC_MULTIPART_MAX_AGE_DAYS` |
|
||||
| **Stale lock files** | `.myfsio.sys/buckets/<bucket>/locks/` | Older than `GC_LOCK_FILE_MAX_AGE_HOURS` |
|
||||
| **Orphaned metadata** | `.myfsio.sys/buckets/<bucket>/meta/` and `<bucket>/.meta/` | Object file no longer exists |
|
||||
| **Orphaned versions** | `.myfsio.sys/buckets/<bucket>/versions/` and `<bucket>/.versions/` | Main object no longer exists |
|
||||
| **Empty directories** | Various internal directories | Directory is empty after cleanup |
|
||||
|
||||
### Admin API
|
||||
|
||||
All GC endpoints require admin (`iam:*`) permissions.
|
||||
|
||||
| Method | Route | Description |
|
||||
|--------|-------|-------------|
|
||||
| `GET` | `/admin/gc/status` | Get GC status and configuration |
|
||||
| `POST` | `/admin/gc/run` | Trigger a manual GC run (body: `{"dry_run": true}` for preview) |
|
||||
| `GET` | `/admin/gc/history` | Get GC execution history (query: `?limit=50&offset=0`) |
|
||||
|
||||
### Dry Run Mode
|
||||
|
||||
Set `GC_DRY_RUN=true` to log what would be deleted without actually removing anything. You can also trigger a one-time dry run via the admin API:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:5000/admin/gc/run" \
|
||||
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"dry_run": true}'
|
||||
```
|
||||
|
||||
### Performance Tuning
|
||||
|
||||
| Variable | Default | Notes |
|
||||
@@ -256,6 +314,12 @@ Once enabled, configure lifecycle rules via:
|
||||
| `MULTIPART_MIN_PART_SIZE` | `5242880` (5 MB) | Minimum part size for multipart uploads. |
|
||||
| `BUCKET_STATS_CACHE_TTL` | `60` | Seconds to cache bucket statistics. |
|
||||
| `BULK_DELETE_MAX_KEYS` | `500` | Maximum keys per bulk delete request. |
|
||||
| `BULK_DOWNLOAD_MAX_BYTES` | `1073741824` (1 GiB) | Maximum total size for bulk ZIP downloads. |
|
||||
| `OBJECT_CACHE_TTL` | `60` | Seconds to cache object metadata. |
|
||||
|
||||
#### Gzip Compression
|
||||
|
||||
API responses for JSON, XML, HTML, CSS, and JavaScript are automatically gzip-compressed when the client sends `Accept-Encoding: gzip`. Compression activates for responses larger than 500 bytes and is handled by a WSGI middleware (`app/compression.py`). Binary object downloads and streaming responses are never compressed. No configuration is needed.
|
||||
|
||||
### Server Settings
|
||||
|
||||
@@ -269,13 +333,14 @@ Once enabled, configure lifecycle rules via:
|
||||
|
||||
Before deploying to production, ensure you:
|
||||
|
||||
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`)
|
||||
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`). This also enables IAM config encryption at rest.
|
||||
2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*`
|
||||
3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies
|
||||
4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
|
||||
5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
|
||||
6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
|
||||
7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server
|
||||
7. **Use `--prod` flag** - Runs with Granian instead of Flask dev server
|
||||
8. **Set credential expiry** - Assign `expires_at` to non-admin users for time-limited access
|
||||
|
||||
### Proxy Configuration
|
||||
|
||||
@@ -285,6 +350,75 @@ If running behind a reverse proxy (e.g., Nginx, Cloudflare, or a tunnel), ensure
|
||||
|
||||
The application automatically trusts these headers to generate correct presigned URLs (e.g., `https://s3.example.com/...` instead of `http://127.0.0.1:5000/...`). Alternatively, you can explicitly set `API_BASE_URL` to your public endpoint.
|
||||
|
||||
| Variable | Default | Notes |
|
||||
| --- | --- | --- |
|
||||
| `NUM_TRUSTED_PROXIES` | `1` | Number of trusted reverse proxies for `X-Forwarded-*` header processing. |
|
||||
| `ALLOWED_REDIRECT_HOSTS` | `""` | Comma-separated whitelist of safe redirect targets. Empty allows only same-host redirects. |
|
||||
| `ALLOW_INTERNAL_ENDPOINTS` | `false` | Allow connections to internal/private IPs for webhooks and replication targets. **Keep disabled in production unless needed.** |
|
||||
|
||||
## Integrity Scanner
|
||||
|
||||
The integrity scanner detects and optionally auto-repairs data inconsistencies: corrupted objects (ETag mismatch), orphaned files without metadata, phantom metadata without files, stale version archives, ETag cache drift, and unmigrated legacy `.meta.json` files.
|
||||
|
||||
### Enabling Integrity Scanner
|
||||
|
||||
By default, the integrity scanner is disabled. Enable it by setting:
|
||||
|
||||
```bash
|
||||
INTEGRITY_ENABLED=true python run.py
|
||||
```
|
||||
|
||||
Or in your `myfsio.env` file:
|
||||
```
|
||||
INTEGRITY_ENABLED=true
|
||||
INTEGRITY_INTERVAL_HOURS=24 # Run every 24 hours (default)
|
||||
INTEGRITY_BATCH_SIZE=1000 # Max objects to scan per cycle
|
||||
INTEGRITY_AUTO_HEAL=false # Automatically repair detected issues
|
||||
INTEGRITY_DRY_RUN=false # Set to true to log without healing
|
||||
```
|
||||
|
||||
### What Gets Checked
|
||||
|
||||
| Check | Detection | Heal Action |
|
||||
|-------|-----------|-------------|
|
||||
| **Corrupted objects** | File MD5 does not match stored `__etag__` | Update `__etag__` in index (disk data is authoritative) |
|
||||
| **Orphaned objects** | File exists on disk without metadata entry | Create index entry with computed MD5/size/mtime |
|
||||
| **Phantom metadata** | Index entry exists but file is missing from disk | Remove stale entry from `_index.json` |
|
||||
| **Stale versions** | `.json` manifest without `.bin` data or vice versa | Remove orphaned version file |
|
||||
| **ETag cache inconsistency** | `etag_index.json` entry differs from metadata `__etag__` | Delete `etag_index.json` (auto-rebuilt on next list) |
|
||||
| **Legacy metadata drift** | Legacy `.meta.json` differs from index or is unmigrated | Migrate to index and delete legacy file |
|
||||
|
||||
### Admin API
|
||||
|
||||
All integrity endpoints require admin (`iam:*`) permissions.
|
||||
|
||||
| Method | Route | Description |
|
||||
|--------|-------|-------------|
|
||||
| `GET` | `/admin/integrity/status` | Get scanner status and configuration |
|
||||
| `POST` | `/admin/integrity/run` | Trigger a manual scan (body: `{"dry_run": true, "auto_heal": true}`) |
|
||||
| `GET` | `/admin/integrity/history` | Get scan history (query: `?limit=50&offset=0`) |
|
||||
|
||||
### Dry Run Mode
|
||||
|
||||
Set `INTEGRITY_DRY_RUN=true` to log detected issues without making any changes. You can also trigger a one-time dry run via the admin API:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:5000/admin/integrity/run" \
|
||||
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"dry_run": true, "auto_heal": true}'
|
||||
```
|
||||
|
||||
### Configuration Reference
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `INTEGRITY_ENABLED` | `false` | Enable background integrity scanning |
|
||||
| `INTEGRITY_INTERVAL_HOURS` | `24` | Hours between scan cycles |
|
||||
| `INTEGRITY_BATCH_SIZE` | `1000` | Max objects to scan per cycle |
|
||||
| `INTEGRITY_AUTO_HEAL` | `false` | Automatically repair detected issues |
|
||||
| `INTEGRITY_DRY_RUN` | `false` | Log issues without healing |
|
||||
|
||||
## 4. Upgrading and Updates
|
||||
|
||||
### Version Checking
|
||||
@@ -619,11 +753,12 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
|
||||
|
||||
### Getting Started
|
||||
|
||||
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext.
|
||||
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. You can set `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` environment variables to use custom credentials instead of random ones. If `SECRET_KEY` is configured, the IAM config file is encrypted at rest using AES (Fernet). To reset admin credentials later, run `python run.py --reset-cred`.
|
||||
2. Sign into the UI using the generated credentials, then open **IAM**:
|
||||
- **Create user**: supply a display name and optional JSON inline policy array.
|
||||
- **Create user**: supply a display name, optional JSON inline policy array, and optional credential expiry date.
|
||||
- **Set expiry**: assign an expiration date to any user's credentials. Expired credentials are rejected at authentication time. The UI shows expiry badges and preset durations (1h, 24h, 7d, 30d, 90d).
|
||||
- **Rotate secret**: generates a new secret key; the UI surfaces it once.
|
||||
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
||||
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. An optional `"prefix"` field restricts object-level actions to a key prefix (e.g., `"uploads/"`). Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
||||
3. Wildcard action `iam:*` is supported for admin user definitions.
|
||||
|
||||
> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials.
|
||||
@@ -639,8 +774,11 @@ The API expects every request to include authentication headers. The UI persists
|
||||
|
||||
**Security Features:**
|
||||
- **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes).
|
||||
- **Credential Expiry**: Each user can have an optional `expires_at` timestamp (ISO 8601). Once expired, all API requests using those credentials are rejected. Set or clear expiry via the UI or API.
|
||||
- **IAM Config Encryption**: When `SECRET_KEY` is set, the IAM config file (`iam.json`) is encrypted at rest using Fernet (AES-256-CBC with HMAC). Existing plaintext configs are automatically encrypted on next load.
|
||||
- **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days).
|
||||
- **Hot Reload**: IAM configuration changes take effect immediately without restart.
|
||||
- **Credential Reset**: Run `python run.py --reset-cred` to reset admin credentials. Supports `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` env vars for deterministic keys.
|
||||
|
||||
### Permission Model
|
||||
|
||||
@@ -659,13 +797,23 @@ Both layers are evaluated for each request. A user must have permission in their
|
||||
| --- | --- | --- |
|
||||
| `list` | List buckets and objects | `s3:ListBucket`, `s3:ListAllMyBuckets`, `s3:ListBucketVersions`, `s3:ListMultipartUploads`, `s3:ListParts` |
|
||||
| `read` | Download objects, get metadata | `s3:GetObject`, `s3:GetObjectVersion`, `s3:GetObjectTagging`, `s3:GetObjectVersionTagging`, `s3:GetObjectAcl`, `s3:GetBucketVersioning`, `s3:HeadObject`, `s3:HeadBucket` |
|
||||
| `write` | Upload objects, create buckets, manage tags | `s3:PutObject`, `s3:CreateBucket`, `s3:PutObjectTagging`, `s3:PutBucketVersioning`, `s3:CreateMultipartUpload`, `s3:UploadPart`, `s3:CompleteMultipartUpload`, `s3:AbortMultipartUpload`, `s3:CopyObject` |
|
||||
| `delete` | Remove objects, versions, and buckets | `s3:DeleteObject`, `s3:DeleteObjectVersion`, `s3:DeleteBucket`, `s3:DeleteObjectTagging` |
|
||||
| `write` | Upload objects, manage object tags | `s3:PutObject`, `s3:PutObjectTagging`, `s3:CreateMultipartUpload`, `s3:UploadPart`, `s3:CompleteMultipartUpload`, `s3:AbortMultipartUpload`, `s3:CopyObject` |
|
||||
| `delete` | Remove objects and versions | `s3:DeleteObject`, `s3:DeleteObjectVersion`, `s3:DeleteObjectTagging` |
|
||||
| `create_bucket` | Create new buckets | `s3:CreateBucket` |
|
||||
| `delete_bucket` | Delete buckets | `s3:DeleteBucket` |
|
||||
| `share` | Manage Access Control Lists (ACLs) | `s3:PutObjectAcl`, `s3:PutBucketAcl`, `s3:GetBucketAcl` |
|
||||
| `policy` | Manage bucket policies | `s3:PutBucketPolicy`, `s3:GetBucketPolicy`, `s3:DeleteBucketPolicy` |
|
||||
| `versioning` | Manage bucket versioning configuration | `s3:GetBucketVersioning`, `s3:PutBucketVersioning` |
|
||||
| `tagging` | Manage bucket-level tags | `s3:GetBucketTagging`, `s3:PutBucketTagging`, `s3:DeleteBucketTagging` |
|
||||
| `encryption` | Manage bucket encryption configuration | `s3:GetEncryptionConfiguration`, `s3:PutEncryptionConfiguration`, `s3:DeleteEncryptionConfiguration` |
|
||||
| `lifecycle` | Manage lifecycle rules | `s3:GetLifecycleConfiguration`, `s3:PutLifecycleConfiguration`, `s3:DeleteLifecycleConfiguration`, `s3:GetBucketLifecycle`, `s3:PutBucketLifecycle` |
|
||||
| `cors` | Manage CORS configuration | `s3:GetBucketCors`, `s3:PutBucketCors`, `s3:DeleteBucketCors` |
|
||||
| `replication` | Configure and manage replication | `s3:GetReplicationConfiguration`, `s3:PutReplicationConfiguration`, `s3:DeleteReplicationConfiguration`, `s3:ReplicateObject`, `s3:ReplicateTags`, `s3:ReplicateDelete` |
|
||||
| `quota` | Manage bucket storage quotas | `s3:GetBucketQuota`, `s3:PutBucketQuota`, `s3:DeleteBucketQuota` |
|
||||
| `object_lock` | Manage object lock, retention, and legal holds | `s3:GetObjectLockConfiguration`, `s3:PutObjectLockConfiguration`, `s3:PutObjectRetention`, `s3:GetObjectRetention`, `s3:PutObjectLegalHold`, `s3:GetObjectLegalHold` |
|
||||
| `notification` | Manage bucket event notifications | `s3:GetBucketNotificationConfiguration`, `s3:PutBucketNotificationConfiguration`, `s3:DeleteBucketNotificationConfiguration` |
|
||||
| `logging` | Manage bucket access logging | `s3:GetBucketLogging`, `s3:PutBucketLogging`, `s3:DeleteBucketLogging` |
|
||||
| `website` | Manage static website hosting configuration | `s3:GetBucketWebsite`, `s3:PutBucketWebsite`, `s3:DeleteBucketWebsite` |
|
||||
|
||||
#### IAM Actions (User Management)
|
||||
|
||||
@@ -676,25 +824,31 @@ Both layers are evaluated for each request. A user must have permission in their
|
||||
| `iam:delete_user` | Delete IAM users | `iam:DeleteUser` |
|
||||
| `iam:rotate_key` | Rotate user secret keys | `iam:RotateAccessKey` |
|
||||
| `iam:update_policy` | Modify user policies | `iam:PutUserPolicy` |
|
||||
| `iam:create_key` | Create additional access keys for a user | `iam:CreateAccessKey` |
|
||||
| `iam:delete_key` | Delete an access key from a user | `iam:DeleteAccessKey` |
|
||||
| `iam:get_user` | View user details and access keys | `iam:GetUser` |
|
||||
| `iam:get_policy` | View user policy configuration | `iam:GetPolicy` |
|
||||
| `iam:disable_user` | Temporarily disable/enable a user account | `iam:DisableUser` |
|
||||
| `iam:*` | **Admin wildcard** – grants all IAM actions | — |
|
||||
|
||||
#### Wildcards
|
||||
|
||||
| Wildcard | Scope | Description |
|
||||
| --- | --- | --- |
|
||||
| `*` (in actions) | All S3 actions | Grants `list`, `read`, `write`, `delete`, `share`, `policy`, `lifecycle`, `cors`, `replication` |
|
||||
| `*` (in actions) | All S3 actions | Grants all 19 S3 actions including `list`, `read`, `write`, `delete`, `create_bucket`, `delete_bucket`, `share`, `policy`, `versioning`, `tagging`, `encryption`, `lifecycle`, `cors`, `replication`, `quota`, `object_lock`, `notification`, `logging`, `website` |
|
||||
| `iam:*` | All IAM actions | Grants all `iam:*` actions for user management |
|
||||
| `*` (in bucket) | All buckets | Policy applies to every bucket |
|
||||
|
||||
### IAM Policy Structure
|
||||
|
||||
User policies are stored as a JSON array of policy objects. Each object specifies a bucket and the allowed actions:
|
||||
User policies are stored as a JSON array of policy objects. Each object specifies a bucket, the allowed actions, and an optional prefix for object-level scoping:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"bucket": "<bucket-name-or-wildcard>",
|
||||
"actions": ["<action1>", "<action2>", ...]
|
||||
"actions": ["<action1>", "<action2>", ...],
|
||||
"prefix": "<optional-key-prefix>"
|
||||
}
|
||||
]
|
||||
```
|
||||
@@ -702,12 +856,13 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
||||
**Fields:**
|
||||
- `bucket`: The bucket name (case-insensitive) or `*` for all buckets
|
||||
- `actions`: Array of action strings (simple names or AWS aliases)
|
||||
- `prefix`: *(optional)* Restrict object-level actions to keys starting with this prefix. Defaults to `*` (all objects). Example: `"uploads/"` restricts to keys under `uploads/`
|
||||
|
||||
### Example User Policies
|
||||
|
||||
**Full Administrator (complete system access):**
|
||||
```json
|
||||
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "share", "policy", "lifecycle", "cors", "replication", "iam:*"]}]
|
||||
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "share", "policy", "create_bucket", "delete_bucket", "versioning", "tagging", "encryption", "lifecycle", "cors", "replication", "quota", "object_lock", "notification", "logging", "website", "iam:*"]}]
|
||||
```
|
||||
|
||||
**Read-Only User (browse and download only):**
|
||||
@@ -720,6 +875,11 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
||||
[{"bucket": "user-bucket", "actions": ["list", "read", "write", "delete"]}]
|
||||
```
|
||||
|
||||
**Operator (data operations + bucket management, no config):**
|
||||
```json
|
||||
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "create_bucket", "delete_bucket"]}]
|
||||
```
|
||||
|
||||
**Multiple Bucket Access (different permissions per bucket):**
|
||||
```json
|
||||
[
|
||||
@@ -729,9 +889,14 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
||||
]
|
||||
```
|
||||
|
||||
**Prefix-Scoped Access (restrict to a folder inside a shared bucket):**
|
||||
```json
|
||||
[{"bucket": "shared-data", "actions": ["list", "read", "write", "delete"], "prefix": "team-a/"}]
|
||||
```
|
||||
|
||||
**IAM Manager (manage users but no data access):**
|
||||
```json
|
||||
[{"bucket": "*", "actions": ["iam:list_users", "iam:create_user", "iam:delete_user", "iam:rotate_key", "iam:update_policy"]}]
|
||||
[{"bucket": "*", "actions": ["iam:list_users", "iam:create_user", "iam:delete_user", "iam:rotate_key", "iam:update_policy", "iam:create_key", "iam:delete_key", "iam:get_user", "iam:get_policy", "iam:disable_user"]}]
|
||||
```
|
||||
|
||||
**Replication Operator (manage replication only):**
|
||||
@@ -751,10 +916,10 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
||||
|
||||
**Bucket Administrator (full bucket config, no IAM access):**
|
||||
```json
|
||||
[{"bucket": "my-bucket", "actions": ["list", "read", "write", "delete", "policy", "lifecycle", "cors"]}]
|
||||
[{"bucket": "my-bucket", "actions": ["list", "read", "write", "delete", "create_bucket", "delete_bucket", "share", "policy", "versioning", "tagging", "encryption", "lifecycle", "cors", "replication", "quota", "object_lock", "notification", "logging", "website"]}]
|
||||
```
|
||||
|
||||
**Upload-Only User (write but cannot read back):**
|
||||
**Upload-Only User (write but cannot create/delete buckets):**
|
||||
```json
|
||||
[{"bucket": "drop-box", "actions": ["write"]}]
|
||||
```
|
||||
@@ -800,7 +965,8 @@ curl -X POST http://localhost:5000/iam/users \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-d '{
|
||||
"display_name": "New User",
|
||||
"policies": [{"bucket": "*", "actions": ["list", "read"]}]
|
||||
"policies": [{"bucket": "*", "actions": ["list", "read"]}],
|
||||
"expires_at": "2026-12-31T23:59:59Z"
|
||||
}'
|
||||
|
||||
# Rotate user secret (requires iam:rotate_key)
|
||||
@@ -813,9 +979,45 @@ curl -X PUT http://localhost:5000/iam/users/<access-key>/policies \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-d '[{"bucket": "*", "actions": ["list", "read", "write"]}]'
|
||||
|
||||
# Update credential expiry (requires iam:update_policy)
|
||||
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
|
||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-d 'expires_at=2026-12-31T23:59:59Z'
|
||||
|
||||
# Remove credential expiry (never expires)
|
||||
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
|
||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-d 'expires_at='
|
||||
|
||||
# Delete a user (requires iam:delete_user)
|
||||
curl -X DELETE http://localhost:5000/iam/users/<access-key> \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
|
||||
|
||||
# Get user details (requires iam:get_user) — via Admin API
|
||||
curl http://localhost:5000/admin/iam/users/<user-id-or-access-key> \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
|
||||
# Get user policies (requires iam:get_policy) — via Admin API
|
||||
curl http://localhost:5000/admin/iam/users/<user-id-or-access-key>/policies \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
|
||||
# Create additional access key for a user (requires iam:create_key)
|
||||
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/keys \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
|
||||
# Delete an access key (requires iam:delete_key)
|
||||
curl -X DELETE http://localhost:5000/admin/iam/users/<user-id>/keys/<access-key> \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
|
||||
# Disable a user account (requires iam:disable_user)
|
||||
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/disable \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
|
||||
# Re-enable a user account (requires iam:disable_user)
|
||||
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/enable \
|
||||
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||
```
|
||||
|
||||
### Permission Precedence
|
||||
@@ -824,8 +1026,9 @@ When a request is made, permissions are evaluated in this order:
|
||||
|
||||
1. **Authentication** – Verify the access key and secret key are valid
|
||||
2. **Lockout Check** – Ensure the account is not locked due to failed attempts
|
||||
3. **IAM Policy Check** – Verify the user has the required action for the target bucket
|
||||
4. **Bucket Policy Check** – If a bucket policy exists, verify it allows the action
|
||||
3. **Expiry Check** – Reject requests if the user's credentials have expired (`expires_at`)
|
||||
4. **IAM Policy Check** – Verify the user has the required action for the target bucket
|
||||
5. **Bucket Policy Check** – If a bucket policy exists, verify it allows the action
|
||||
|
||||
A request is allowed only if:
|
||||
- The IAM policy grants the action, AND
|
||||
@@ -912,7 +1115,7 @@ Objects with forward slashes (`/`) in their keys are displayed as a folder hiera
|
||||
|
||||
- Select multiple objects using checkboxes
|
||||
- **Bulk Delete**: Delete multiple objects at once
|
||||
- **Bulk Download**: Download selected objects as individual files
|
||||
- **Bulk Download**: Download selected objects as a single ZIP archive (up to `BULK_DOWNLOAD_MAX_BYTES`, default 1 GiB)
|
||||
|
||||
#### Search & Filter
|
||||
|
||||
@@ -985,6 +1188,7 @@ MyFSIO supports **server-side encryption at rest** to protect your data. When en
|
||||
|------|-------------|
|
||||
| **AES-256 (SSE-S3)** | Server-managed encryption using a local master key |
|
||||
| **KMS (SSE-KMS)** | Encryption using customer-managed keys via the built-in KMS |
|
||||
| **SSE-C** | Server-side encryption with customer-provided keys (per-request) |
|
||||
|
||||
### Enabling Encryption
|
||||
|
||||
@@ -1083,6 +1287,44 @@ encrypted, metadata = ClientEncryptionHelper.encrypt_for_upload(plaintext, key)
|
||||
decrypted = ClientEncryptionHelper.decrypt_from_download(encrypted, metadata, key)
|
||||
```
|
||||
|
||||
### SSE-C (Customer-Provided Keys)
|
||||
|
||||
With SSE-C, you provide your own 256-bit AES encryption key with each request. The server encrypts/decrypts using your key but never stores it. You must supply the same key for both upload and download.
|
||||
|
||||
**Required headers:**
|
||||
|
||||
| Header | Value |
|
||||
|--------|-------|
|
||||
| `x-amz-server-side-encryption-customer-algorithm` | `AES256` |
|
||||
| `x-amz-server-side-encryption-customer-key` | Base64-encoded 256-bit key |
|
||||
| `x-amz-server-side-encryption-customer-key-MD5` | Base64-encoded MD5 of the key |
|
||||
|
||||
```bash
|
||||
# Generate a 256-bit key
|
||||
KEY=$(openssl rand -base64 32)
|
||||
KEY_MD5=$(echo -n "$KEY" | base64 -d | openssl dgst -md5 -binary | base64)
|
||||
|
||||
# Upload with SSE-C
|
||||
curl -X PUT "http://localhost:5000/my-bucket/secret.txt" \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-H "x-amz-server-side-encryption-customer-algorithm: AES256" \
|
||||
-H "x-amz-server-side-encryption-customer-key: $KEY" \
|
||||
-H "x-amz-server-side-encryption-customer-key-MD5: $KEY_MD5" \
|
||||
--data-binary @secret.txt
|
||||
|
||||
# Download with SSE-C (same key required)
|
||||
curl "http://localhost:5000/my-bucket/secret.txt" \
|
||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||
-H "x-amz-server-side-encryption-customer-algorithm: AES256" \
|
||||
-H "x-amz-server-side-encryption-customer-key: $KEY" \
|
||||
-H "x-amz-server-side-encryption-customer-key-MD5: $KEY_MD5"
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
- SSE-C does not require `ENCRYPTION_ENABLED` or `KMS_ENABLED` — the key is provided per-request
|
||||
- If you lose your key, the data is irrecoverable
|
||||
- The MD5 header is optional but recommended for integrity verification
|
||||
|
||||
### Important Notes
|
||||
|
||||
- **Existing objects are NOT encrypted** - Only new uploads after enabling encryption are encrypted
|
||||
@@ -1959,6 +2201,20 @@ curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
|
||||
-H "x-amz-meta-newkey: newvalue"
|
||||
```
|
||||
|
||||
### MoveObject (UI)
|
||||
|
||||
Move an object to a different key or bucket. This is a UI-only convenience operation that performs a copy followed by a delete of the source. Requires `read` and `delete` on the source, and `write` on the destination.
|
||||
|
||||
```bash
|
||||
# Move via UI API
|
||||
curl -X POST "http://localhost:5100/ui/buckets/my-bucket/objects/old-path/file.txt/move" \
|
||||
-H "Content-Type: application/json" \
|
||||
--cookie "session=..." \
|
||||
-d '{"dest_bucket": "other-bucket", "dest_key": "new-path/file.txt"}'
|
||||
```
|
||||
|
||||
The move is atomic from the caller's perspective: if the copy succeeds but the delete fails, the object exists in both locations (no data loss).
|
||||
|
||||
### UploadPartCopy
|
||||
|
||||
Copy data from an existing object into a multipart upload part:
|
||||
|
||||
421
myfsio_core/Cargo.lock
generated
421
myfsio_core/Cargo.lock
generated
@@ -1,421 +0,0 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
|
||||
[[package]]
|
||||
name = "hmac"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
||||
dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.182"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
|
||||
dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "myfsio_core"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"hex",
|
||||
"hmac",
|
||||
"lru",
|
||||
"md-5",
|
||||
"parking_lot",
|
||||
"pyo3",
|
||||
"regex",
|
||||
"sha2",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"once_cell",
|
||||
"portable-atomic",
|
||||
"pyo3-build-config",
|
||||
"pyo3-ffi",
|
||||
"pyo3-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
|
||||
dependencies = [
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.116"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
|
||||
dependencies = [
|
||||
"tinyvec_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec_macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
@@ -14,6 +14,11 @@ sha2 = "0.10"
|
||||
md-5 = "0.10"
|
||||
hex = "0.4"
|
||||
unicode-normalization = "0.1"
|
||||
serde_json = "1"
|
||||
regex = "1"
|
||||
lru = "0.14"
|
||||
parking_lot = "0.12"
|
||||
percent-encoding = "2"
|
||||
aes-gcm = "0.10"
|
||||
hkdf = "0.12"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
||||
192
myfsio_core/src/crypto.rs
Normal file
192
myfsio_core/src/crypto.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
use hkdf::Hkdf;
|
||||
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||
use pyo3::prelude::*;
|
||||
use sha2::Sha256;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom, Write};
|
||||
|
||||
const DEFAULT_CHUNK_SIZE: usize = 65536;
|
||||
const HEADER_SIZE: usize = 4;
|
||||
|
||||
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let mut filled = 0;
|
||||
while filled < buf.len() {
|
||||
match reader.read(&mut buf[filled..]) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => filled += n,
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
Ok(filled)
|
||||
}
|
||||
|
||||
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
|
||||
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
|
||||
let mut okm = [0u8; 12];
|
||||
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
|
||||
.map_err(|e| format!("HKDF expand failed: {}", e))?;
|
||||
Ok(okm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||
pub fn encrypt_stream_chunked(
|
||||
py: Python<'_>,
|
||||
input_path: &str,
|
||||
output_path: &str,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
chunk_size: usize,
|
||||
) -> PyResult<u32> {
|
||||
if key.len() != 32 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Key must be 32 bytes, got {}",
|
||||
key.len()
|
||||
)));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Base nonce must be 12 bytes, got {}",
|
||||
base_nonce.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let chunk_size = if chunk_size == 0 {
|
||||
DEFAULT_CHUNK_SIZE
|
||||
} else {
|
||||
chunk_size
|
||||
};
|
||||
|
||||
let inp = input_path.to_owned();
|
||||
let out = output_path.to_owned();
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
|
||||
py.detach(move || {
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(&inp)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||
let mut outfile = File::create(&out)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||
|
||||
outfile
|
||||
.write_all(&[0u8; 4])
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
|
||||
|
||||
let mut buf = vec![0u8; chunk_size];
|
||||
let mut chunk_index: u32 = 0;
|
||||
|
||||
loop {
|
||||
let n = read_exact_chunk(&mut infile, &mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||
.map_err(|e| PyValueError::new_err(e))?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let encrypted = cipher
|
||||
.encrypt(nonce, &buf[..n])
|
||||
.map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
|
||||
|
||||
let size = encrypted.len() as u32;
|
||||
outfile
|
||||
.write_all(&size.to_be_bytes())
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
|
||||
outfile
|
||||
.write_all(&encrypted)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
|
||||
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
outfile
|
||||
.seek(SeekFrom::Start(0))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
|
||||
outfile
|
||||
.write_all(&chunk_index.to_be_bytes())
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
|
||||
|
||||
Ok(chunk_index)
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn decrypt_stream_chunked(
|
||||
py: Python<'_>,
|
||||
input_path: &str,
|
||||
output_path: &str,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
) -> PyResult<u32> {
|
||||
if key.len() != 32 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Key must be 32 bytes, got {}",
|
||||
key.len()
|
||||
)));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Base nonce must be 12 bytes, got {}",
|
||||
base_nonce.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let inp = input_path.to_owned();
|
||||
let out = output_path.to_owned();
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
|
||||
py.detach(move || {
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(&inp)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||
let mut outfile = File::create(&out)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||
|
||||
let mut header = [0u8; HEADER_SIZE];
|
||||
infile
|
||||
.read_exact(&mut header)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
|
||||
let chunk_count = u32::from_be_bytes(header);
|
||||
|
||||
let mut size_buf = [0u8; HEADER_SIZE];
|
||||
for chunk_index in 0..chunk_count {
|
||||
infile
|
||||
.read_exact(&mut size_buf)
|
||||
.map_err(|e| {
|
||||
PyIOError::new_err(format!(
|
||||
"Failed to read chunk {} size: {}",
|
||||
chunk_index, e
|
||||
))
|
||||
})?;
|
||||
let chunk_size = u32::from_be_bytes(size_buf) as usize;
|
||||
|
||||
let mut encrypted = vec![0u8; chunk_size];
|
||||
infile.read_exact(&mut encrypted).map_err(|e| {
|
||||
PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
|
||||
})?;
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||
.map_err(|e| PyValueError::new_err(e))?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
|
||||
PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
|
||||
})?;
|
||||
|
||||
outfile.write_all(&decrypted).map_err(|e| {
|
||||
PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(chunk_count)
|
||||
})
|
||||
}
|
||||
@@ -1,5 +1,9 @@
|
||||
mod crypto;
|
||||
mod hashing;
|
||||
mod metadata;
|
||||
mod sigv4;
|
||||
mod storage;
|
||||
mod streaming;
|
||||
mod validation;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
@@ -10,6 +14,7 @@ mod myfsio_core {
|
||||
|
||||
#[pymodule_init]
|
||||
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(sigv4::verify_sigv4_signature, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
||||
@@ -25,6 +30,22 @@ mod myfsio_core {
|
||||
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(storage::write_index_entry, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::delete_index_entry, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::check_bucket_contents, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::shallow_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::bucket_stats_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
71
myfsio_core/src/metadata.rs
Normal file
71
myfsio_core/src/metadata.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use pyo3::exceptions::PyValueError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyList, PyString};
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
fn value_to_py(py: Python<'_>, v: &Value, depth: u32) -> PyResult<Py<PyAny>> {
|
||||
if depth > MAX_DEPTH {
|
||||
return Err(PyValueError::new_err("JSON nesting too deep"));
|
||||
}
|
||||
match v {
|
||||
Value::Null => Ok(py.None()),
|
||||
Value::Bool(b) => Ok((*b).into_pyobject(py)?.to_owned().into_any().unbind()),
|
||||
Value::Number(n) => {
|
||||
if let Some(i) = n.as_i64() {
|
||||
Ok(i.into_pyobject(py)?.into_any().unbind())
|
||||
} else if let Some(f) = n.as_f64() {
|
||||
Ok(f.into_pyobject(py)?.into_any().unbind())
|
||||
} else {
|
||||
Ok(py.None())
|
||||
}
|
||||
}
|
||||
Value::String(s) => Ok(PyString::new(py, s).into_any().unbind()),
|
||||
Value::Array(arr) => {
|
||||
let list = PyList::empty(py);
|
||||
for item in arr {
|
||||
list.append(value_to_py(py, item, depth + 1)?)?;
|
||||
}
|
||||
Ok(list.into_any().unbind())
|
||||
}
|
||||
Value::Object(map) => {
|
||||
let dict = PyDict::new(py);
|
||||
for (k, val) in map {
|
||||
dict.set_item(k, value_to_py(py, val, depth + 1)?)?;
|
||||
}
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn read_index_entry(
|
||||
py: Python<'_>,
|
||||
path: &str,
|
||||
entry_name: &str,
|
||||
) -> PyResult<Option<Py<PyAny>>> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
|
||||
let entry: Option<Value> = py.detach(move || -> PyResult<Option<Value>> {
|
||||
let content = match fs::read_to_string(&path_owned) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let parsed: Value = match serde_json::from_str(&content) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
match parsed {
|
||||
Value::Object(mut map) => Ok(map.remove(&entry_owned)),
|
||||
_ => Ok(None),
|
||||
}
|
||||
})?;
|
||||
|
||||
match entry {
|
||||
Some(val) => Ok(Some(value_to_py(py, &val, 0)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
use hmac::{Hmac, Mac};
|
||||
use lru::LruCache;
|
||||
use parking_lot::Mutex;
|
||||
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
|
||||
use pyo3::prelude::*;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::num::NonZeroUsize;
|
||||
@@ -19,14 +20,29 @@ static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, Strin
|
||||
|
||||
const CACHE_TTL_SECS: u64 = 60;
|
||||
|
||||
const AWS_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
|
||||
.remove(b'-')
|
||||
.remove(b'_')
|
||||
.remove(b'.')
|
||||
.remove(b'~');
|
||||
|
||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||
mac.update(msg);
|
||||
mac.finalize().into_bytes().to_vec()
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn derive_signing_key(
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
fn aws_uri_encode(input: &str) -> String {
|
||||
percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string()
|
||||
}
|
||||
|
||||
fn derive_signing_key_cached(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
@@ -68,18 +84,91 @@ pub fn derive_signing_key(
|
||||
k_signing
|
||||
}
|
||||
|
||||
fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut result: u8 = 0;
|
||||
for (x, y) in a.iter().zip(b.iter()) {
|
||||
result |= x ^ y;
|
||||
}
|
||||
result == 0
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn verify_sigv4_signature(
|
||||
method: &str,
|
||||
canonical_uri: &str,
|
||||
query_params: Vec<(String, String)>,
|
||||
signed_headers_str: &str,
|
||||
header_values: Vec<(String, String)>,
|
||||
payload_hash: &str,
|
||||
amz_date: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
secret_key: &str,
|
||||
provided_signature: &str,
|
||||
) -> bool {
|
||||
let mut sorted_params = query_params;
|
||||
sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
|
||||
|
||||
let canonical_query_string = sorted_params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}={}", aws_uri_encode(k), aws_uri_encode(v)))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
|
||||
let mut canonical_headers = String::new();
|
||||
for (name, value) in &header_values {
|
||||
let lower_name = name.to_lowercase();
|
||||
let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
let final_value = if lower_name == "expect" && normalized.is_empty() {
|
||||
"100-continue"
|
||||
} else {
|
||||
&normalized
|
||||
};
|
||||
canonical_headers.push_str(&lower_name);
|
||||
canonical_headers.push(':');
|
||||
canonical_headers.push_str(final_value);
|
||||
canonical_headers.push('\n');
|
||||
}
|
||||
|
||||
let canonical_request = format!(
|
||||
"{}\n{}\n{}\n{}\n{}\n{}",
|
||||
method, canonical_uri, canonical_query_string, canonical_headers, signed_headers_str, payload_hash
|
||||
);
|
||||
|
||||
let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service);
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
let string_to_sign = format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
);
|
||||
|
||||
let signing_key = derive_signing_key_cached(secret_key, date_stamp, region, service);
|
||||
let calculated = hmac_sha256(&signing_key, string_to_sign.as_bytes());
|
||||
let calculated_hex = hex::encode(&calculated);
|
||||
|
||||
constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn derive_signing_key(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
derive_signing_key_cached(secret_key, date_stamp, region, service)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||
hex::encode(sig)
|
||||
}
|
||||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn build_string_to_sign(
|
||||
amz_date: &str,
|
||||
@@ -87,19 +176,15 @@ pub fn build_string_to_sign(
|
||||
canonical_request: &str,
|
||||
) -> String {
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
|
||||
format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut result: u8 = 0;
|
||||
for (x, y) in a.bytes().zip(b.bytes()) {
|
||||
result |= x ^ y;
|
||||
}
|
||||
result == 0
|
||||
constant_time_compare_inner(a.as_bytes(), b.as_bytes())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
|
||||
817
myfsio_core/src/storage.rs
Normal file
817
myfsio_core/src/storage.rs
Normal file
@@ -0,0 +1,817 @@
|
||||
use pyo3::exceptions::PyIOError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyList, PyString, PyTuple};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::time::SystemTime;
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
|
||||
fn system_time_to_epoch(t: SystemTime) -> f64 {
|
||||
t.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs_f64())
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
|
||||
fn extract_etag_from_meta_bytes(content: &[u8]) -> Option<String> {
|
||||
let marker = b"\"__etag__\"";
|
||||
let idx = content.windows(marker.len()).position(|w| w == marker)?;
|
||||
let after = &content[idx + marker.len()..];
|
||||
let start = after.iter().position(|&b| b == b'"')? + 1;
|
||||
let rest = &after[start..];
|
||||
let end = rest.iter().position(|&b| b == b'"')?;
|
||||
std::str::from_utf8(&rest[..end]).ok().map(|s| s.to_owned())
|
||||
}
|
||||
|
||||
fn has_any_file(root: &str) -> bool {
|
||||
let root_path = Path::new(root);
|
||||
if !root_path.is_dir() {
|
||||
return false;
|
||||
}
|
||||
let mut stack = vec![root_path.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_file() {
|
||||
return true;
|
||||
}
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn write_index_entry(
|
||||
py: Python<'_>,
|
||||
path: &str,
|
||||
entry_name: &str,
|
||||
entry_data_json: &str,
|
||||
) -> PyResult<()> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
let data_owned = entry_data_json.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<()> {
|
||||
let entry_value: Value = serde_json::from_str(&data_owned)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to parse entry data: {}", e)))?;
|
||||
|
||||
if let Some(parent) = Path::new(&path_owned).parent() {
|
||||
let _ = fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let mut index_data: serde_json::Map<String, Value> = match fs::read_to_string(&path_owned)
|
||||
{
|
||||
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
|
||||
Err(_) => serde_json::Map::new(),
|
||||
};
|
||||
|
||||
index_data.insert(entry_owned, entry_value);
|
||||
|
||||
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||
|
||||
fs::write(&path_owned, serialized)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn delete_index_entry(py: Python<'_>, path: &str, entry_name: &str) -> PyResult<bool> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<bool> {
|
||||
let content = match fs::read_to_string(&path_owned) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Ok(false),
|
||||
};
|
||||
|
||||
let mut index_data: serde_json::Map<String, Value> =
|
||||
match serde_json::from_str(&content) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return Ok(false),
|
||||
};
|
||||
|
||||
if index_data.remove(&entry_owned).is_none() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if index_data.is_empty() {
|
||||
let _ = fs::remove_file(&path_owned);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||
|
||||
fs::write(&path_owned, serialized)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||
|
||||
Ok(false)
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn check_bucket_contents(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
version_roots: Vec<String>,
|
||||
multipart_roots: Vec<String>,
|
||||
) -> PyResult<(bool, bool, bool)> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<(bool, bool, bool)> {
|
||||
let mut has_objects = false;
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
'obj_scan: while let Some(current) = stack.pop() {
|
||||
let is_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ft.is_file() && !ft.is_symlink() {
|
||||
has_objects = true;
|
||||
break 'obj_scan;
|
||||
}
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut has_versions = false;
|
||||
for root in &version_roots {
|
||||
if has_versions {
|
||||
break;
|
||||
}
|
||||
has_versions = has_any_file(root);
|
||||
}
|
||||
|
||||
let mut has_multipart = false;
|
||||
for root in &multipart_roots {
|
||||
if has_multipart {
|
||||
break;
|
||||
}
|
||||
has_multipart = has_any_file(root);
|
||||
}
|
||||
|
||||
Ok((has_objects, has_versions, has_multipart))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn shallow_scan(
|
||||
py: Python<'_>,
|
||||
target_dir: &str,
|
||||
prefix: &str,
|
||||
meta_cache_json: &str,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let target_owned = target_dir.to_owned();
|
||||
let prefix_owned = prefix.to_owned();
|
||||
let cache_owned = meta_cache_json.to_owned();
|
||||
|
||||
let result: (
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
Vec<String>,
|
||||
Vec<(String, bool)>,
|
||||
) = py.detach(move || -> PyResult<(
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
Vec<String>,
|
||||
Vec<(String, bool)>,
|
||||
)> {
|
||||
let meta_cache: HashMap<String, String> =
|
||||
serde_json::from_str(&cache_owned).unwrap_or_default();
|
||||
|
||||
let mut files: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||
let mut dirs: Vec<String> = Vec::new();
|
||||
|
||||
let entries = match fs::read_dir(&target_owned) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return Ok((files, dirs, Vec::new())),
|
||||
};
|
||||
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let name = match entry.file_name().into_string() {
|
||||
Ok(n) => n,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
let cp = format!("{}{}/", prefix_owned, name);
|
||||
dirs.push(cp);
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let key = format!("{}{}", prefix_owned, name);
|
||||
let md = match entry.metadata() {
|
||||
Ok(m) => m,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
let etag = meta_cache.get(&key).cloned();
|
||||
files.push((key, size, mtime, etag));
|
||||
}
|
||||
}
|
||||
|
||||
files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
dirs.sort();
|
||||
|
||||
let mut merged: Vec<(String, bool)> = Vec::with_capacity(files.len() + dirs.len());
|
||||
let mut fi = 0;
|
||||
let mut di = 0;
|
||||
while fi < files.len() && di < dirs.len() {
|
||||
if files[fi].0 <= dirs[di] {
|
||||
merged.push((files[fi].0.clone(), false));
|
||||
fi += 1;
|
||||
} else {
|
||||
merged.push((dirs[di].clone(), true));
|
||||
di += 1;
|
||||
}
|
||||
}
|
||||
while fi < files.len() {
|
||||
merged.push((files[fi].0.clone(), false));
|
||||
fi += 1;
|
||||
}
|
||||
while di < dirs.len() {
|
||||
merged.push((dirs[di].clone(), true));
|
||||
di += 1;
|
||||
}
|
||||
|
||||
Ok((files, dirs, merged))
|
||||
})?;
|
||||
|
||||
let (files, dirs, merged) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let files_list = PyList::empty(py);
|
||||
for (key, size, mtime, etag) in &files {
|
||||
let etag_py: Py<PyAny> = match etag {
|
||||
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||
None => py.None(),
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
etag_py,
|
||||
])?;
|
||||
files_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("files", files_list)?;
|
||||
|
||||
let dirs_list = PyList::empty(py);
|
||||
for d in &dirs {
|
||||
dirs_list.append(PyString::new(py, d))?;
|
||||
}
|
||||
dict.set_item("dirs", dirs_list)?;
|
||||
|
||||
let merged_list = PyList::empty(py);
|
||||
for (key, is_dir) in &merged {
|
||||
let bool_obj: Py<PyAny> = if *is_dir {
|
||||
true.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||
} else {
|
||||
false.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
bool_obj,
|
||||
])?;
|
||||
merged_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("merged_keys", merged_list)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn bucket_stats_scan(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
versions_root: &str,
|
||||
) -> PyResult<(u64, u64, u64, u64)> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let versions_owned = versions_root.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<(u64, u64, u64, u64)> {
|
||||
let mut object_count: u64 = 0;
|
||||
let mut total_bytes: u64 = 0;
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let is_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
object_count += 1;
|
||||
if let Ok(md) = entry.metadata() {
|
||||
total_bytes += md.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut version_count: u64 = 0;
|
||||
let mut version_bytes: u64 = 0;
|
||||
|
||||
let versions_p = Path::new(&versions_owned);
|
||||
if versions_p.is_dir() {
|
||||
let mut stack = vec![versions_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if name.ends_with(".bin") {
|
||||
version_count += 1;
|
||||
if let Ok(md) = entry.metadata() {
|
||||
version_bytes += md.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((object_count, total_bytes, version_count, version_bytes))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (bucket_path, search_root, query, limit))]
|
||||
pub fn search_objects_scan(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
search_root: &str,
|
||||
query: &str,
|
||||
limit: usize,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let search_owned = search_root.to_owned();
|
||||
let query_owned = query.to_owned();
|
||||
|
||||
let result: (Vec<(String, u64, f64)>, bool) = py.detach(
|
||||
move || -> PyResult<(Vec<(String, u64, f64)>, bool)> {
|
||||
let query_lower = query_owned.to_lowercase();
|
||||
let bucket_len = bucket_owned.len() + 1;
|
||||
let scan_limit = limit * 4;
|
||||
let mut matched: usize = 0;
|
||||
let mut results: Vec<(String, u64, f64)> = Vec::new();
|
||||
|
||||
let search_p = Path::new(&search_owned);
|
||||
if !search_p.is_dir() {
|
||||
return Ok((results, false));
|
||||
}
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
let mut stack = vec![search_p.to_path_buf()];
|
||||
|
||||
'scan: while let Some(current) = stack.pop() {
|
||||
let is_bucket_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_bucket_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let full_path = entry.path();
|
||||
let full_str = full_path.to_string_lossy();
|
||||
if full_str.len() <= bucket_len {
|
||||
continue;
|
||||
}
|
||||
let key = full_str[bucket_len..].replace('\\', "/");
|
||||
if key.to_lowercase().contains(&query_lower) {
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
results.push((key, size, mtime));
|
||||
matched += 1;
|
||||
}
|
||||
}
|
||||
if matched >= scan_limit {
|
||||
break 'scan;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
let truncated = results.len() > limit;
|
||||
results.truncate(limit);
|
||||
|
||||
Ok((results, truncated))
|
||||
},
|
||||
)?;
|
||||
|
||||
let (results, truncated) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let results_list = PyList::empty(py);
|
||||
for (key, size, mtime) in &results {
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
])?;
|
||||
results_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("results", results_list)?;
|
||||
dict.set_item("truncated", truncated)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn build_object_cache(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
meta_root: &str,
|
||||
etag_index_path: &str,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let meta_owned = meta_root.to_owned();
|
||||
let index_path_owned = etag_index_path.to_owned();
|
||||
|
||||
let result: (HashMap<String, String>, Vec<(String, u64, f64, Option<String>)>, bool) =
|
||||
py.detach(move || -> PyResult<(
|
||||
HashMap<String, String>,
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
bool,
|
||||
)> {
|
||||
let mut meta_cache: HashMap<String, String> = HashMap::new();
|
||||
let mut index_mtime: f64 = 0.0;
|
||||
let mut etag_cache_changed = false;
|
||||
|
||||
let index_p = Path::new(&index_path_owned);
|
||||
if index_p.is_file() {
|
||||
if let Ok(md) = fs::metadata(&index_path_owned) {
|
||||
index_mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
}
|
||||
if let Ok(content) = fs::read_to_string(&index_path_owned) {
|
||||
if let Ok(parsed) = serde_json::from_str::<HashMap<String, String>>(&content) {
|
||||
meta_cache = parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let meta_p = Path::new(&meta_owned);
|
||||
let mut needs_rebuild = false;
|
||||
|
||||
if meta_p.is_dir() && index_mtime > 0.0 {
|
||||
fn check_newer(dir: &Path, index_mtime: f64) -> bool {
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
if check_newer(&entry.path(), index_mtime) {
|
||||
return true;
|
||||
}
|
||||
} else if ft.is_file() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if name.ends_with(".meta.json") || name == "_index.json" {
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let mt = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
if mt > index_mtime {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
needs_rebuild = check_newer(meta_p, index_mtime);
|
||||
} else if meta_cache.is_empty() {
|
||||
needs_rebuild = true;
|
||||
}
|
||||
|
||||
if needs_rebuild && meta_p.is_dir() {
|
||||
let meta_str = meta_owned.clone();
|
||||
let meta_len = meta_str.len() + 1;
|
||||
let mut index_files: Vec<String> = Vec::new();
|
||||
let mut legacy_meta_files: Vec<(String, String)> = Vec::new();
|
||||
|
||||
fn collect_meta(
|
||||
dir: &Path,
|
||||
meta_len: usize,
|
||||
index_files: &mut Vec<String>,
|
||||
legacy_meta_files: &mut Vec<(String, String)>,
|
||||
) {
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
collect_meta(&entry.path(), meta_len, index_files, legacy_meta_files);
|
||||
} else if ft.is_file() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
let full = entry.path().to_string_lossy().to_string();
|
||||
if name == "_index.json" {
|
||||
index_files.push(full);
|
||||
} else if name.ends_with(".meta.json") {
|
||||
if full.len() > meta_len {
|
||||
let rel = &full[meta_len..];
|
||||
let key = if rel.len() > 10 {
|
||||
rel[..rel.len() - 10].replace('\\', "/")
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
legacy_meta_files.push((key, full));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collect_meta(
|
||||
meta_p,
|
||||
meta_len,
|
||||
&mut index_files,
|
||||
&mut legacy_meta_files,
|
||||
);
|
||||
|
||||
meta_cache.clear();
|
||||
|
||||
for idx_path in &index_files {
|
||||
if let Ok(content) = fs::read_to_string(idx_path) {
|
||||
if let Ok(idx_data) = serde_json::from_str::<HashMap<String, Value>>(&content) {
|
||||
let rel_dir = if idx_path.len() > meta_len {
|
||||
let r = &idx_path[meta_len..];
|
||||
r.replace('\\', "/")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let dir_prefix = if rel_dir.ends_with("/_index.json") {
|
||||
&rel_dir[..rel_dir.len() - "/_index.json".len()]
|
||||
} else {
|
||||
""
|
||||
};
|
||||
for (entry_name, entry_data) in &idx_data {
|
||||
let key = if dir_prefix.is_empty() {
|
||||
entry_name.clone()
|
||||
} else {
|
||||
format!("{}/{}", dir_prefix, entry_name)
|
||||
};
|
||||
if let Some(meta_obj) = entry_data.get("metadata") {
|
||||
if let Some(etag) = meta_obj.get("__etag__") {
|
||||
if let Some(etag_str) = etag.as_str() {
|
||||
meta_cache.insert(key, etag_str.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (key, path) in &legacy_meta_files {
|
||||
if meta_cache.contains_key(key) {
|
||||
continue;
|
||||
}
|
||||
if let Ok(content) = fs::read(path) {
|
||||
if let Some(etag) = extract_etag_from_meta_bytes(&content) {
|
||||
meta_cache.insert(key.clone(), etag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
etag_cache_changed = true;
|
||||
}
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
let bucket_len = bucket_owned.len() + 1;
|
||||
let mut objects: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
let full = entry.path();
|
||||
let full_str = full.to_string_lossy();
|
||||
if full_str.len() > bucket_len {
|
||||
let first_part: &str = if let Some(sep_pos) =
|
||||
full_str[bucket_len..].find(|c: char| c == '\\' || c == '/')
|
||||
{
|
||||
&full_str[bucket_len..bucket_len + sep_pos]
|
||||
} else {
|
||||
&full_str[bucket_len..]
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||
continue;
|
||||
}
|
||||
} else if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
stack.push(full);
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let full = entry.path();
|
||||
let full_str = full.to_string_lossy();
|
||||
if full_str.len() <= bucket_len {
|
||||
continue;
|
||||
}
|
||||
let rel = &full_str[bucket_len..];
|
||||
let first_part: &str =
|
||||
if let Some(sep_pos) = rel.find(|c: char| c == '\\' || c == '/') {
|
||||
&rel[..sep_pos]
|
||||
} else {
|
||||
rel
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||
continue;
|
||||
}
|
||||
let key = rel.replace('\\', "/");
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
let etag = meta_cache.get(&key).cloned();
|
||||
objects.push((key, size, mtime, etag));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((meta_cache, objects, etag_cache_changed))
|
||||
})?;
|
||||
|
||||
let (meta_cache, objects, etag_cache_changed) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let cache_dict = PyDict::new(py);
|
||||
for (k, v) in &meta_cache {
|
||||
cache_dict.set_item(k, v)?;
|
||||
}
|
||||
dict.set_item("etag_cache", cache_dict)?;
|
||||
|
||||
let objects_list = PyList::empty(py);
|
||||
for (key, size, mtime, etag) in &objects {
|
||||
let etag_py: Py<PyAny> = match etag {
|
||||
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||
None => py.None(),
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
etag_py,
|
||||
])?;
|
||||
objects_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("objects", objects_list)?;
|
||||
dict.set_item("etag_cache_changed", etag_cache_changed)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
112
myfsio_core/src/streaming.rs
Normal file
112
myfsio_core/src/streaming.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use md5::{Digest, Md5};
|
||||
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||
use pyo3::prelude::*;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{Read, Write};
|
||||
use uuid::Uuid;
|
||||
|
||||
const DEFAULT_CHUNK_SIZE: usize = 262144;
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (stream, tmp_dir, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||
pub fn stream_to_file_with_md5(
|
||||
py: Python<'_>,
|
||||
stream: &Bound<'_, PyAny>,
|
||||
tmp_dir: &str,
|
||||
chunk_size: usize,
|
||||
) -> PyResult<(String, String, u64)> {
|
||||
let chunk_size = if chunk_size == 0 {
|
||||
DEFAULT_CHUNK_SIZE
|
||||
} else {
|
||||
chunk_size
|
||||
};
|
||||
|
||||
fs::create_dir_all(tmp_dir)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create tmp dir: {}", e)))?;
|
||||
|
||||
let tmp_name = format!("{}.tmp", Uuid::new_v4().as_hyphenated());
|
||||
let tmp_path_buf = std::path::PathBuf::from(tmp_dir).join(&tmp_name);
|
||||
let tmp_path = tmp_path_buf.to_string_lossy().into_owned();
|
||||
|
||||
let mut file = File::create(&tmp_path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create temp file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut total_bytes: u64 = 0;
|
||||
|
||||
let result: PyResult<()> = (|| {
|
||||
loop {
|
||||
let chunk: Vec<u8> = stream.call_method1("read", (chunk_size,))?.extract()?;
|
||||
if chunk.is_empty() {
|
||||
break;
|
||||
}
|
||||
hasher.update(&chunk);
|
||||
file.write_all(&chunk)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||
total_bytes += chunk.len() as u64;
|
||||
|
||||
py.check_signals()?;
|
||||
}
|
||||
file.sync_all()
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if let Err(e) = result {
|
||||
drop(file);
|
||||
let _ = fs::remove_file(&tmp_path);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
drop(file);
|
||||
|
||||
let md5_hex = format!("{:x}", hasher.finalize());
|
||||
Ok((tmp_path, md5_hex, total_bytes))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn assemble_parts_with_md5(
|
||||
py: Python<'_>,
|
||||
part_paths: Vec<String>,
|
||||
dest_path: &str,
|
||||
) -> PyResult<String> {
|
||||
if part_paths.is_empty() {
|
||||
return Err(PyValueError::new_err("No parts to assemble"));
|
||||
}
|
||||
|
||||
let dest = dest_path.to_owned();
|
||||
let parts = part_paths;
|
||||
|
||||
py.detach(move || {
|
||||
if let Some(parent) = std::path::Path::new(&dest).parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create dest dir: {}", e)))?;
|
||||
}
|
||||
|
||||
let mut target = File::create(&dest)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create dest file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut buf = vec![0u8; 1024 * 1024];
|
||||
|
||||
for part_path in &parts {
|
||||
let mut part = File::open(part_path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open part {}: {}", part_path, e)))?;
|
||||
loop {
|
||||
let n = part
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read part: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
target
|
||||
.write_all(&buf[..n])
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||
}
|
||||
}
|
||||
|
||||
target.sync_all()
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}
|
||||
@@ -1,3 +0,0 @@
|
||||
Signature: 8a477f597d28d172789f06886806bc55
|
||||
# This file is a cache directory tag created by cargo.
|
||||
# For information about cache directory tags see https://bford.info/cachedir/
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
801af22cf202da8e
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
435555ec2fb592e3
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
d28af275d001c358
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
1fbf4ba9542edced
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
37923e6f5f9687ab
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
603e28136cf5763c
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
896672d759b5299c
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
914a617b9f05c9d8
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
3b95cf48bbd7dc53
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
b7ba5182ce570398
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
@@ -1 +0,0 @@
|
||||
f0a5af4d8a8c7106
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
5f316276809d5ca0
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
@@ -1 +0,0 @@
|
||||
e417d28fc1909ceb
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
aec88a641c5288e3
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
ddc0b590ff80762b
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":17886154901722686619,"profile":1369601567987815722,"path":8608102977929876445,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\heck-b47c94fd2a7e00cb\\dep-lib-heck","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
41890ebff4143fa5
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"default\", \"std\"]","declared_features":"[\"alloc\", \"default\", \"serde\", \"std\"]","target":4242469766639956503,"profile":2040997289075261528,"path":6793865871540733919,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hex-253414d2260adcdf\\dep-lib-hex","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
@@ -1 +0,0 @@
|
||||
3f45b8d062d94ba4
|
||||
@@ -1 +0,0 @@
|
||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"reset\", \"std\"]","target":12991177224612424488,"profile":2040997289075261528,"path":17893893568771568113,"deps":[[17475753849556516473,"digest",false,15621022965039188625]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hmac-3297e61b9effb758\\dep-lib-hmac","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
This file has an mtime of when this was started.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user