Compare commits
99 Commits
4c661477d5
...
v0.4.1
| Author | SHA1 | Date | |
|---|---|---|---|
| f0c95ac0a9 | |||
| 0e392e18b4 | |||
| 8996f1ce06 | |||
| f60dbaf9c9 | |||
| 1a5a7aa9e1 | |||
| 326367ae4c | |||
| a7f9b0a22f | |||
| 0e525713b1 | |||
| f43fad02fb | |||
| eff3e378f3 | |||
| 5e32cef792 | |||
| 9898167f8d | |||
| 8ff4797041 | |||
| 4a553555d3 | |||
| 7a3202c996 | |||
| bd20ca86ab | |||
| 532cf95d59 | |||
| 366f8ce60d | |||
| 7612cb054a | |||
| 966d524dca | |||
| e84f1f1851 | |||
| a059f0502d | |||
| afd7173ba0 | |||
| c807bb2388 | |||
| aa4f9f5566 | |||
| 14786151e5 | |||
| a496862902 | |||
| df4f27ca2e | |||
| d72e0a347e | |||
| 6ed4b7d8ea | |||
| 31ebbea680 | |||
| d878134ebf | |||
| 50fb5aa387 | |||
| 55568d6892 | |||
| a4ae81c77c | |||
| 9da7104887 | |||
| cc161bf362 | |||
| de5377e5ac | |||
| 80b77b64eb | |||
| 6c912a3d71 | |||
| 2a0e77a754 | |||
| c6e368324a | |||
| 7b6c096bb7 | |||
| 03353a0aec | |||
| eb0e435a5a | |||
| 72f5d9d70c | |||
| be63e27c15 | |||
| 7633007a08 | |||
| 81ef0fe4c7 | |||
| 5f24bd920d | |||
| 8552f193de | |||
| de0d869c9f | |||
| 5536330aeb | |||
| d4657c389d | |||
| 3827235232 | |||
| fdd068feee | |||
| dfc0058d0d | |||
| 27aef84311 | |||
| 66b7677d2c | |||
| 5003514a3d | |||
| 4d90ead816 | |||
| 20a314e030 | |||
| b37a51ed1d | |||
| d8232340c3 | |||
| a356bb0c4e | |||
| 1c328ee3af | |||
| 5bf7962c04 | |||
| e06f653606 | |||
| 0462a7b62e | |||
| 9c2809c195 | |||
| fb32ca0a7d | |||
| 6ab702a818 | |||
| 550e7d435c | |||
| 776967e80d | |||
| 082a7fbcd1 | |||
| ff287cf67b | |||
| bddf36d52d | |||
| cf6cec9cab | |||
| d425839e57 | |||
| 52660570c1 | |||
| 35f61313e0 | |||
| c470cfb576 | |||
| d96955deee | |||
| 85181f0be6 | |||
| d5ca7a8be1 | |||
| 476dc79e42 | |||
| bb6590fc5e | |||
| 899db3421b | |||
| caf01d6ada | |||
| bb366cb4cd | |||
| a2745ff2ee | |||
| 28cb656d94 | |||
| 3c44152fc6 | |||
| 397515edce | |||
| 980fced7e4 | |||
| bae5009ec4 | |||
| 233780617f | |||
| fd8fb21517 | |||
| c6cbe822e1 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -26,6 +26,10 @@ dist/
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
.eggs/
|
.eggs/
|
||||||
|
|
||||||
|
# Rust / maturin build artifacts
|
||||||
|
myfsio_core/target/
|
||||||
|
myfsio_core/Cargo.lock
|
||||||
|
|
||||||
# Local runtime artifacts
|
# Local runtime artifacts
|
||||||
logs/
|
logs/
|
||||||
*.log
|
*.log
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ python run.py --mode api # API only (port 5000)
|
|||||||
python run.py --mode ui # UI only (port 5100)
|
python run.py --mode ui # UI only (port 5100)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Default Credentials:** `localadmin` / `localadmin`
|
**Credentials:** Generated automatically on first run and printed to the console. If missed, check the IAM config file at `<STORAGE_ROOT>/.myfsio.sys/config/iam.json`.
|
||||||
|
|
||||||
- **Web Console:** http://127.0.0.1:5100/ui
|
- **Web Console:** http://127.0.0.1:5100/ui
|
||||||
- **API Endpoint:** http://127.0.0.1:5000
|
- **API Endpoint:** http://127.0.0.1:5000
|
||||||
|
|||||||
211
app/__init__.py
211
app/__init__.py
@@ -1,12 +1,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import html as html_module
|
import html as html_module
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import uuid
|
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
@@ -17,6 +18,8 @@ from flask_cors import CORS
|
|||||||
from flask_wtf.csrf import CSRFError
|
from flask_wtf.csrf import CSRFError
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
|
|
||||||
|
import io
|
||||||
|
|
||||||
from .access_logging import AccessLoggingService
|
from .access_logging import AccessLoggingService
|
||||||
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
||||||
from .compression import GzipMiddleware
|
from .compression import GzipMiddleware
|
||||||
@@ -28,6 +31,8 @@ from .encryption import EncryptionManager
|
|||||||
from .extensions import limiter, csrf
|
from .extensions import limiter, csrf
|
||||||
from .iam import IamService
|
from .iam import IamService
|
||||||
from .kms import KMSManager
|
from .kms import KMSManager
|
||||||
|
from .gc import GarbageCollector
|
||||||
|
from .integrity import IntegrityChecker
|
||||||
from .lifecycle import LifecycleManager
|
from .lifecycle import LifecycleManager
|
||||||
from .notifications import NotificationService
|
from .notifications import NotificationService
|
||||||
from .object_lock import ObjectLockService
|
from .object_lock import ObjectLockService
|
||||||
@@ -38,6 +43,66 @@ from .storage import ObjectStorage, StorageError
|
|||||||
from .version import get_version
|
from .version import get_version
|
||||||
from .website_domains import WebsiteDomainStore
|
from .website_domains import WebsiteDomainStore
|
||||||
|
|
||||||
|
_request_counter = itertools.count(1)
|
||||||
|
|
||||||
|
|
||||||
|
class _ChunkedTransferMiddleware:
|
||||||
|
|
||||||
|
def __init__(self, app):
|
||||||
|
self.app = app
|
||||||
|
|
||||||
|
def __call__(self, environ, start_response):
|
||||||
|
if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
|
||||||
|
return self.app(environ, start_response)
|
||||||
|
|
||||||
|
transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
|
||||||
|
content_length = environ.get("CONTENT_LENGTH")
|
||||||
|
|
||||||
|
if "chunked" in transfer_encoding.lower():
|
||||||
|
if content_length:
|
||||||
|
del environ["HTTP_TRANSFER_ENCODING"]
|
||||||
|
else:
|
||||||
|
raw = environ.get("wsgi.input")
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
if hasattr(raw, "seek"):
|
||||||
|
raw.seek(0)
|
||||||
|
body = raw.read()
|
||||||
|
except Exception:
|
||||||
|
body = b""
|
||||||
|
if body:
|
||||||
|
environ["wsgi.input"] = io.BytesIO(body)
|
||||||
|
environ["CONTENT_LENGTH"] = str(len(body))
|
||||||
|
del environ["HTTP_TRANSFER_ENCODING"]
|
||||||
|
|
||||||
|
content_length = environ.get("CONTENT_LENGTH")
|
||||||
|
if not content_length or content_length == "0":
|
||||||
|
sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
|
||||||
|
decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
|
||||||
|
content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
|
||||||
|
if ("STREAMING" in sha256.upper() or decoded_len
|
||||||
|
or "aws-chunked" in content_encoding.lower()):
|
||||||
|
raw = environ.get("wsgi.input")
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
if hasattr(raw, "seek"):
|
||||||
|
raw.seek(0)
|
||||||
|
body = raw.read()
|
||||||
|
except Exception:
|
||||||
|
body = b""
|
||||||
|
if body:
|
||||||
|
environ["wsgi.input"] = io.BytesIO(body)
|
||||||
|
environ["CONTENT_LENGTH"] = str(len(body))
|
||||||
|
|
||||||
|
raw = environ.get("wsgi.input")
|
||||||
|
if raw and hasattr(raw, "seek"):
|
||||||
|
try:
|
||||||
|
raw.seek(0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return self.app(environ, start_response)
|
||||||
|
|
||||||
|
|
||||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||||
"""Migrate config file from legacy locations to the active path.
|
"""Migrate config file from legacy locations to the active path.
|
||||||
@@ -93,12 +158,20 @@ def create_app(
|
|||||||
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
||||||
|
|
||||||
# Trust X-Forwarded-* headers from proxies
|
# Trust X-Forwarded-* headers from proxies
|
||||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
|
num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1)
|
||||||
|
if num_proxies:
|
||||||
|
if "NUM_TRUSTED_PROXIES" not in os.environ:
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"NUM_TRUSTED_PROXIES not set, defaulting to 1. "
|
||||||
|
"Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy."
|
||||||
|
)
|
||||||
|
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
|
||||||
|
|
||||||
# Enable gzip compression for responses (10-20x smaller JSON payloads)
|
|
||||||
if app.config.get("ENABLE_GZIP", True):
|
if app.config.get("ENABLE_GZIP", True):
|
||||||
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
|
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
|
||||||
|
|
||||||
|
app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
|
||||||
|
|
||||||
_configure_cors(app)
|
_configure_cors(app)
|
||||||
_configure_logging(app)
|
_configure_logging(app)
|
||||||
|
|
||||||
@@ -107,7 +180,7 @@ def create_app(
|
|||||||
|
|
||||||
storage = ObjectStorage(
|
storage = ObjectStorage(
|
||||||
Path(app.config["STORAGE_ROOT"]),
|
Path(app.config["STORAGE_ROOT"]),
|
||||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
|
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
|
||||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
||||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
||||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
||||||
@@ -120,6 +193,7 @@ def create_app(
|
|||||||
Path(app.config["IAM_CONFIG"]),
|
Path(app.config["IAM_CONFIG"]),
|
||||||
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
||||||
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
||||||
|
encryption_key=app.config.get("SECRET_KEY"),
|
||||||
)
|
)
|
||||||
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
||||||
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
||||||
@@ -210,6 +284,31 @@ def create_app(
|
|||||||
)
|
)
|
||||||
lifecycle_manager.start()
|
lifecycle_manager.start()
|
||||||
|
|
||||||
|
gc_collector = None
|
||||||
|
if app.config.get("GC_ENABLED", False):
|
||||||
|
gc_collector = GarbageCollector(
|
||||||
|
storage_root=storage_root,
|
||||||
|
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
|
||||||
|
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
|
||||||
|
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
|
||||||
|
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
|
||||||
|
dry_run=app.config.get("GC_DRY_RUN", False),
|
||||||
|
io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
|
||||||
|
)
|
||||||
|
gc_collector.start()
|
||||||
|
|
||||||
|
integrity_checker = None
|
||||||
|
if app.config.get("INTEGRITY_ENABLED", False):
|
||||||
|
integrity_checker = IntegrityChecker(
|
||||||
|
storage_root=storage_root,
|
||||||
|
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
|
||||||
|
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
|
||||||
|
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
|
||||||
|
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
|
||||||
|
io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
|
||||||
|
)
|
||||||
|
integrity_checker.start()
|
||||||
|
|
||||||
app.extensions["object_storage"] = storage
|
app.extensions["object_storage"] = storage
|
||||||
app.extensions["iam"] = iam
|
app.extensions["iam"] = iam
|
||||||
app.extensions["bucket_policies"] = bucket_policies
|
app.extensions["bucket_policies"] = bucket_policies
|
||||||
@@ -221,6 +320,8 @@ def create_app(
|
|||||||
app.extensions["kms"] = kms_manager
|
app.extensions["kms"] = kms_manager
|
||||||
app.extensions["acl"] = acl_service
|
app.extensions["acl"] = acl_service
|
||||||
app.extensions["lifecycle"] = lifecycle_manager
|
app.extensions["lifecycle"] = lifecycle_manager
|
||||||
|
app.extensions["gc"] = gc_collector
|
||||||
|
app.extensions["integrity"] = integrity_checker
|
||||||
app.extensions["object_lock"] = object_lock_service
|
app.extensions["object_lock"] = object_lock_service
|
||||||
app.extensions["notifications"] = notification_service
|
app.extensions["notifications"] = notification_service
|
||||||
app.extensions["access_logging"] = access_logging_service
|
app.extensions["access_logging"] = access_logging_service
|
||||||
@@ -473,13 +574,9 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def _log_request_start() -> None:
|
def _log_request_start() -> None:
|
||||||
g.request_id = uuid.uuid4().hex
|
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
|
||||||
g.request_started_at = time.perf_counter()
|
g.request_started_at = time.perf_counter()
|
||||||
g.request_bytes_in = request.content_length or 0
|
g.request_bytes_in = request.content_length or 0
|
||||||
app.logger.info(
|
|
||||||
"Request started",
|
|
||||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
|
||||||
)
|
|
||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def _maybe_serve_website():
|
def _maybe_serve_website():
|
||||||
@@ -528,30 +625,57 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||||
except (StorageError, OSError):
|
except (StorageError, OSError):
|
||||||
pass
|
pass
|
||||||
if request.method == "HEAD":
|
|
||||||
response = Response(status=200)
|
|
||||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
|
||||||
try:
|
|
||||||
data, _ = storage.get_object_data(bucket, object_key)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
stat = obj_path.stat()
|
|
||||||
response.headers["Content-Length"] = stat.st_size
|
|
||||||
except OSError:
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
response.headers["Content-Type"] = content_type
|
|
||||||
return response
|
|
||||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||||
try:
|
try:
|
||||||
data, _ = storage.get_object_data(bucket, object_key)
|
data, _ = storage.get_object_data(bucket, object_key)
|
||||||
response = Response(data, mimetype=content_type)
|
file_size = len(data)
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
return response
|
|
||||||
except (StorageError, OSError):
|
except (StorageError, OSError):
|
||||||
return _website_error_response(500, "Internal Server Error")
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
else:
|
||||||
|
data = None
|
||||||
|
try:
|
||||||
|
stat = obj_path.stat()
|
||||||
|
file_size = stat.st_size
|
||||||
|
except OSError:
|
||||||
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
if request.method == "HEAD":
|
||||||
|
response = Response(status=200)
|
||||||
|
response.headers["Content-Length"] = file_size
|
||||||
|
response.headers["Content-Type"] = content_type
|
||||||
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
|
return response
|
||||||
|
from .s3_api import _parse_range_header
|
||||||
|
range_header = request.headers.get("Range")
|
||||||
|
if range_header:
|
||||||
|
ranges = _parse_range_header(range_header, file_size)
|
||||||
|
if ranges is None:
|
||||||
|
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
|
||||||
|
start, end = ranges[0]
|
||||||
|
length = end - start + 1
|
||||||
|
if data is not None:
|
||||||
|
partial_data = data[start:end + 1]
|
||||||
|
response = Response(partial_data, status=206, mimetype=content_type)
|
||||||
|
else:
|
||||||
|
def _stream_range(file_path, start_pos, length_to_read):
|
||||||
|
with file_path.open("rb") as f:
|
||||||
|
f.seek(start_pos)
|
||||||
|
remaining = length_to_read
|
||||||
|
while remaining > 0:
|
||||||
|
chunk = f.read(min(262144, remaining))
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
remaining -= len(chunk)
|
||||||
|
yield chunk
|
||||||
|
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
|
||||||
|
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
||||||
|
response.headers["Content-Length"] = length
|
||||||
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
|
return response
|
||||||
|
if data is not None:
|
||||||
|
response = Response(data, mimetype=content_type)
|
||||||
|
response.headers["Content-Length"] = file_size
|
||||||
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
|
return response
|
||||||
def _stream(file_path):
|
def _stream(file_path):
|
||||||
with file_path.open("rb") as f:
|
with file_path.open("rb") as f:
|
||||||
while True:
|
while True:
|
||||||
@@ -559,13 +683,10 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
yield chunk
|
yield chunk
|
||||||
try:
|
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||||
stat = obj_path.stat()
|
response.headers["Content-Length"] = file_size
|
||||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
response.headers["Content-Length"] = stat.st_size
|
return response
|
||||||
return response
|
|
||||||
except OSError:
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
|
|
||||||
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
||||||
if not error_doc_key:
|
if not error_doc_key:
|
||||||
@@ -608,17 +729,19 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
duration_ms = 0.0
|
duration_ms = 0.0
|
||||||
if hasattr(g, "request_started_at"):
|
if hasattr(g, "request_started_at"):
|
||||||
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
||||||
request_id = getattr(g, "request_id", uuid.uuid4().hex)
|
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
|
||||||
response.headers.setdefault("X-Request-ID", request_id)
|
response.headers.setdefault("X-Request-ID", request_id)
|
||||||
app.logger.info(
|
if app.logger.isEnabledFor(logging.INFO):
|
||||||
"Request completed",
|
app.logger.info(
|
||||||
extra={
|
"Request completed",
|
||||||
"path": request.path,
|
extra={
|
||||||
"method": request.method,
|
"path": request.path,
|
||||||
"remote_addr": request.remote_addr,
|
"method": request.method,
|
||||||
},
|
"remote_addr": request.remote_addr,
|
||||||
)
|
},
|
||||||
|
)
|
||||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||||
|
response.headers["Server"] = "MyFSIO"
|
||||||
|
|
||||||
operation_metrics = app.extensions.get("operation_metrics")
|
operation_metrics = app.extensions.get("operation_metrics")
|
||||||
if operation_metrics:
|
if operation_metrics:
|
||||||
|
|||||||
206
app/admin_api.py
206
app/admin_api.py
@@ -14,6 +14,8 @@ from flask import Blueprint, Response, current_app, jsonify, request
|
|||||||
|
|
||||||
from .connections import ConnectionStore
|
from .connections import ConnectionStore
|
||||||
from .extensions import limiter
|
from .extensions import limiter
|
||||||
|
from .gc import GarbageCollector
|
||||||
|
from .integrity import IntegrityChecker
|
||||||
from .iam import IamError, Principal
|
from .iam import IamError, Principal
|
||||||
from .replication import ReplicationManager
|
from .replication import ReplicationManager
|
||||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
||||||
@@ -684,6 +686,107 @@ def _storage():
|
|||||||
return current_app.extensions["object_storage"]
|
return current_app.extensions["object_storage"]
|
||||||
|
|
||||||
|
|
||||||
|
def _require_iam_action(action: str):
|
||||||
|
principal, error = _require_principal()
|
||||||
|
if error:
|
||||||
|
return None, error
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, action)
|
||||||
|
return principal, None
|
||||||
|
except IamError:
|
||||||
|
return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_list_users():
|
||||||
|
principal, error = _require_iam_action("iam:list_users")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
return jsonify({"users": _iam().list_users()})
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_get_user(identifier):
|
||||||
|
principal, error = _require_iam_action("iam:get_user")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
user_id = _iam().resolve_user_id(identifier)
|
||||||
|
return jsonify(_iam().get_user_by_id(user_id))
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("NotFound", str(exc), 404)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>/policies", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_get_user_policies(identifier):
|
||||||
|
principal, error = _require_iam_action("iam:get_policy")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
return jsonify({"policies": _iam().get_user_policies(identifier)})
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("NotFound", str(exc), 404)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>/keys", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_create_access_key(identifier):
|
||||||
|
principal, error = _require_iam_action("iam:create_key")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
result = _iam().create_access_key(identifier)
|
||||||
|
logger.info("Access key created for %s by %s", identifier, principal.access_key)
|
||||||
|
return jsonify(result), 201
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("InvalidRequest", str(exc), 400)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>/keys/<access_key>", methods=["DELETE"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_delete_access_key(identifier, access_key):
|
||||||
|
principal, error = _require_iam_action("iam:delete_key")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
_iam().delete_access_key(access_key)
|
||||||
|
logger.info("Access key %s deleted by %s", access_key, principal.access_key)
|
||||||
|
return "", 204
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("InvalidRequest", str(exc), 400)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>/disable", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_disable_user(identifier):
|
||||||
|
principal, error = _require_iam_action("iam:disable_user")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
_iam().disable_user(identifier)
|
||||||
|
logger.info("User %s disabled by %s", identifier, principal.access_key)
|
||||||
|
return jsonify({"status": "disabled"})
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("InvalidRequest", str(exc), 400)
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/iam/users/<identifier>/enable", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def iam_enable_user(identifier):
|
||||||
|
principal, error = _require_iam_action("iam:disable_user")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
_iam().enable_user(identifier)
|
||||||
|
logger.info("User %s enabled by %s", identifier, principal.access_key)
|
||||||
|
return jsonify({"status": "enabled"})
|
||||||
|
except IamError as exc:
|
||||||
|
return _json_error("InvalidRequest", str(exc), 400)
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains", methods=["GET"])
|
@admin_api_bp.route("/website-domains", methods=["GET"])
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
def list_website_domains():
|
def list_website_domains():
|
||||||
@@ -776,3 +879,106 @@ def delete_website_domain(domain: str):
|
|||||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||||
logger.info("Website domain mapping deleted: %s", domain)
|
logger.info("Website domain mapping deleted: %s", domain)
|
||||||
return Response(status=204)
|
return Response(status=204)
|
||||||
|
|
||||||
|
|
||||||
|
def _gc() -> Optional[GarbageCollector]:
|
||||||
|
return current_app.extensions.get("gc")
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/gc/status", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def gc_status():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
gc = _gc()
|
||||||
|
if not gc:
|
||||||
|
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
|
||||||
|
return jsonify(gc.get_status())
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/gc/run", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def gc_run_now():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
gc = _gc()
|
||||||
|
if not gc:
|
||||||
|
return _json_error("InvalidRequest", "GC is not enabled", 400)
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||||
|
logger.info("GC manual run by %s", principal.access_key)
|
||||||
|
if not started:
|
||||||
|
return _json_error("Conflict", "GC is already in progress", 409)
|
||||||
|
return jsonify({"status": "started"})
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/gc/history", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def gc_history():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
gc = _gc()
|
||||||
|
if not gc:
|
||||||
|
return jsonify({"executions": []})
|
||||||
|
limit = min(int(request.args.get("limit", 50)), 200)
|
||||||
|
offset = int(request.args.get("offset", 0))
|
||||||
|
records = gc.get_history(limit=limit, offset=offset)
|
||||||
|
return jsonify({"executions": records})
|
||||||
|
|
||||||
|
|
||||||
|
def _integrity() -> Optional[IntegrityChecker]:
|
||||||
|
return current_app.extensions.get("integrity")
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/integrity/status", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def integrity_status():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
checker = _integrity()
|
||||||
|
if not checker:
|
||||||
|
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
|
||||||
|
return jsonify(checker.get_status())
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/integrity/run", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def integrity_run_now():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
checker = _integrity()
|
||||||
|
if not checker:
|
||||||
|
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
override_dry_run = payload.get("dry_run")
|
||||||
|
override_auto_heal = payload.get("auto_heal")
|
||||||
|
started = checker.run_async(
|
||||||
|
auto_heal=override_auto_heal if override_auto_heal is not None else None,
|
||||||
|
dry_run=override_dry_run if override_dry_run is not None else None,
|
||||||
|
)
|
||||||
|
logger.info("Integrity manual run by %s", principal.access_key)
|
||||||
|
if not started:
|
||||||
|
return _json_error("Conflict", "A scan is already in progress", 409)
|
||||||
|
return jsonify({"status": "started"})
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/integrity/history", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def integrity_history():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
checker = _integrity()
|
||||||
|
if not checker:
|
||||||
|
return jsonify({"executions": []})
|
||||||
|
limit = min(int(request.args.get("limit", 50)), 200)
|
||||||
|
offset = int(request.args.get("offset", 0))
|
||||||
|
records = checker.get_history(limit=limit, offset=offset)
|
||||||
|
return jsonify({"executions": records})
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
@@ -75,7 +76,7 @@ def _evaluate_condition_operator(
|
|||||||
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
||||||
return is_null == expected_null
|
return is_null == expected_null
|
||||||
|
|
||||||
return True
|
return False
|
||||||
|
|
||||||
ACTION_ALIASES = {
|
ACTION_ALIASES = {
|
||||||
"s3:listbucket": "list",
|
"s3:listbucket": "list",
|
||||||
@@ -268,7 +269,7 @@ class BucketPolicyStore:
|
|||||||
self._last_mtime = self._current_mtime()
|
self._last_mtime = self._current_mtime()
|
||||||
# Performance: Avoid stat() on every request
|
# Performance: Avoid stat() on every request
|
||||||
self._last_stat_check = 0.0
|
self._last_stat_check = 0.0
|
||||||
self._stat_check_interval = 1.0 # Only check mtime every 1 second
|
self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
|
||||||
|
|
||||||
def maybe_reload(self) -> None:
|
def maybe_reload(self) -> None:
|
||||||
# Performance: Skip stat check if we checked recently
|
# Performance: Skip stat check if we checked recently
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ def _calculate_auto_connection_limit() -> int:
|
|||||||
|
|
||||||
|
|
||||||
def _calculate_auto_backlog(connection_limit: int) -> int:
|
def _calculate_auto_backlog(connection_limit: int) -> int:
|
||||||
return max(64, min(connection_limit * 2, 4096))
|
return max(128, min(connection_limit * 2, 4096))
|
||||||
|
|
||||||
|
|
||||||
def _validate_rate_limit(value: str) -> str:
|
def _validate_rate_limit(value: str) -> str:
|
||||||
@@ -115,6 +115,7 @@ class AppConfig:
|
|||||||
server_connection_limit: int
|
server_connection_limit: int
|
||||||
server_backlog: int
|
server_backlog: int
|
||||||
server_channel_timeout: int
|
server_channel_timeout: int
|
||||||
|
server_max_buffer_size: int
|
||||||
server_threads_auto: bool
|
server_threads_auto: bool
|
||||||
server_connection_limit_auto: bool
|
server_connection_limit_auto: bool
|
||||||
server_backlog_auto: bool
|
server_backlog_auto: bool
|
||||||
@@ -150,6 +151,19 @@ class AppConfig:
|
|||||||
allowed_redirect_hosts: list[str]
|
allowed_redirect_hosts: list[str]
|
||||||
allow_internal_endpoints: bool
|
allow_internal_endpoints: bool
|
||||||
website_hosting_enabled: bool
|
website_hosting_enabled: bool
|
||||||
|
gc_enabled: bool
|
||||||
|
gc_interval_hours: float
|
||||||
|
gc_temp_file_max_age_hours: float
|
||||||
|
gc_multipart_max_age_days: int
|
||||||
|
gc_lock_file_max_age_hours: float
|
||||||
|
gc_dry_run: bool
|
||||||
|
gc_io_throttle_ms: int
|
||||||
|
integrity_enabled: bool
|
||||||
|
integrity_interval_hours: float
|
||||||
|
integrity_batch_size: int
|
||||||
|
integrity_auto_heal: bool
|
||||||
|
integrity_dry_run: bool
|
||||||
|
integrity_io_throttle_ms: int
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||||
@@ -241,7 +255,7 @@ class AppConfig:
|
|||||||
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
||||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 5))
|
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
|
||||||
|
|
||||||
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
||||||
@@ -282,6 +296,7 @@ class AppConfig:
|
|||||||
server_backlog_auto = False
|
server_backlog_auto = False
|
||||||
|
|
||||||
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
||||||
|
server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
|
||||||
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
||||||
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
||||||
@@ -314,11 +329,24 @@ class AppConfig:
|
|||||||
site_region = str(_get("SITE_REGION", "us-east-1"))
|
site_region = str(_get("SITE_REGION", "us-east-1"))
|
||||||
site_priority = int(_get("SITE_PRIORITY", 100))
|
site_priority = int(_get("SITE_PRIORITY", 100))
|
||||||
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
||||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
|
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1))
|
||||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
||||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
||||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
|
||||||
|
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
|
||||||
|
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
|
||||||
|
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
|
||||||
|
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
|
||||||
|
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
|
||||||
|
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
|
||||||
|
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
|
||||||
|
|
||||||
return cls(storage_root=storage_root,
|
return cls(storage_root=storage_root,
|
||||||
max_upload_size=max_upload_size,
|
max_upload_size=max_upload_size,
|
||||||
@@ -372,6 +400,7 @@ class AppConfig:
|
|||||||
server_connection_limit=server_connection_limit,
|
server_connection_limit=server_connection_limit,
|
||||||
server_backlog=server_backlog,
|
server_backlog=server_backlog,
|
||||||
server_channel_timeout=server_channel_timeout,
|
server_channel_timeout=server_channel_timeout,
|
||||||
|
server_max_buffer_size=server_max_buffer_size,
|
||||||
server_threads_auto=server_threads_auto,
|
server_threads_auto=server_threads_auto,
|
||||||
server_connection_limit_auto=server_connection_limit_auto,
|
server_connection_limit_auto=server_connection_limit_auto,
|
||||||
server_backlog_auto=server_backlog_auto,
|
server_backlog_auto=server_backlog_auto,
|
||||||
@@ -406,7 +435,20 @@ class AppConfig:
|
|||||||
num_trusted_proxies=num_trusted_proxies,
|
num_trusted_proxies=num_trusted_proxies,
|
||||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
allowed_redirect_hosts=allowed_redirect_hosts,
|
||||||
allow_internal_endpoints=allow_internal_endpoints,
|
allow_internal_endpoints=allow_internal_endpoints,
|
||||||
website_hosting_enabled=website_hosting_enabled)
|
website_hosting_enabled=website_hosting_enabled,
|
||||||
|
gc_enabled=gc_enabled,
|
||||||
|
gc_interval_hours=gc_interval_hours,
|
||||||
|
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
|
||||||
|
gc_multipart_max_age_days=gc_multipart_max_age_days,
|
||||||
|
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
|
||||||
|
gc_dry_run=gc_dry_run,
|
||||||
|
gc_io_throttle_ms=gc_io_throttle_ms,
|
||||||
|
integrity_enabled=integrity_enabled,
|
||||||
|
integrity_interval_hours=integrity_interval_hours,
|
||||||
|
integrity_batch_size=integrity_batch_size,
|
||||||
|
integrity_auto_heal=integrity_auto_heal,
|
||||||
|
integrity_dry_run=integrity_dry_run,
|
||||||
|
integrity_io_throttle_ms=integrity_io_throttle_ms)
|
||||||
|
|
||||||
def validate_and_report(self) -> list[str]:
|
def validate_and_report(self) -> list[str]:
|
||||||
"""Validate configuration and return a list of warnings/issues.
|
"""Validate configuration and return a list of warnings/issues.
|
||||||
@@ -471,10 +513,12 @@ class AppConfig:
|
|||||||
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
||||||
if not (10 <= self.server_connection_limit <= 1000):
|
if not (10 <= self.server_connection_limit <= 1000):
|
||||||
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
||||||
if not (64 <= self.server_backlog <= 4096):
|
if not (128 <= self.server_backlog <= 4096):
|
||||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
|
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
|
||||||
if not (10 <= self.server_channel_timeout <= 300):
|
if not (10 <= self.server_channel_timeout <= 300):
|
||||||
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
||||||
|
if self.server_max_buffer_size < 1024 * 1024:
|
||||||
|
issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
|
||||||
|
|
||||||
if sys.platform != "win32":
|
if sys.platform != "win32":
|
||||||
try:
|
try:
|
||||||
@@ -520,6 +564,7 @@ class AppConfig:
|
|||||||
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
||||||
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
||||||
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
||||||
|
print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
issues = self.validate_and_report()
|
issues = self.validate_and_report()
|
||||||
@@ -585,6 +630,7 @@ class AppConfig:
|
|||||||
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
||||||
"SERVER_BACKLOG": self.server_backlog,
|
"SERVER_BACKLOG": self.server_backlog,
|
||||||
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
||||||
|
"SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
|
||||||
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
||||||
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
||||||
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
||||||
@@ -617,4 +663,17 @@ class AppConfig:
|
|||||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
||||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
||||||
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
||||||
|
"GC_ENABLED": self.gc_enabled,
|
||||||
|
"GC_INTERVAL_HOURS": self.gc_interval_hours,
|
||||||
|
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
|
||||||
|
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
|
||||||
|
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
|
||||||
|
"GC_DRY_RUN": self.gc_dry_run,
|
||||||
|
"GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
|
||||||
|
"INTEGRITY_ENABLED": self.integrity_enabled,
|
||||||
|
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
|
||||||
|
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
|
||||||
|
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
|
||||||
|
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
|
||||||
|
"INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -190,6 +190,15 @@ class EncryptedObjectStorage:
|
|||||||
def list_objects(self, bucket_name: str, **kwargs):
|
def list_objects(self, bucket_name: str, **kwargs):
|
||||||
return self.storage.list_objects(bucket_name, **kwargs)
|
return self.storage.list_objects(bucket_name, **kwargs)
|
||||||
|
|
||||||
|
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||||
|
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||||
|
|
||||||
|
def iter_objects_shallow(self, bucket_name: str, **kwargs):
|
||||||
|
return self.storage.iter_objects_shallow(bucket_name, **kwargs)
|
||||||
|
|
||||||
|
def search_objects(self, bucket_name: str, query: str, **kwargs):
|
||||||
|
return self.storage.search_objects(bucket_name, query, **kwargs)
|
||||||
|
|
||||||
def list_objects_all(self, bucket_name: str):
|
def list_objects_all(self, bucket_name: str):
|
||||||
return self.storage.list_objects_all(bucket_name)
|
return self.storage.list_objects_all(bucket_name)
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,13 @@ from cryptography.hazmat.primitives import hashes
|
|||||||
if sys.platform != "win32":
|
if sys.platform != "win32":
|
||||||
import fcntl
|
import fcntl
|
||||||
|
|
||||||
|
try:
|
||||||
|
import myfsio_core as _rc
|
||||||
|
_HAS_RUST = True
|
||||||
|
except ImportError:
|
||||||
|
_rc = None
|
||||||
|
_HAS_RUST = False
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -338,6 +345,69 @@ class StreamingEncryptor:
|
|||||||
output.seek(0)
|
output.seek(0)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
|
||||||
|
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||||
|
base_nonce = secrets.token_bytes(12)
|
||||||
|
|
||||||
|
if _HAS_RUST:
|
||||||
|
_rc.encrypt_stream_chunked(
|
||||||
|
input_path, output_path, data_key, base_nonce, self.chunk_size
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
with open(input_path, "rb") as stream:
|
||||||
|
aesgcm = AESGCM(data_key)
|
||||||
|
with open(output_path, "wb") as out:
|
||||||
|
out.write(b"\x00\x00\x00\x00")
|
||||||
|
chunk_index = 0
|
||||||
|
while True:
|
||||||
|
chunk = stream.read(self.chunk_size)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||||
|
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||||
|
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||||
|
out.write(encrypted_chunk)
|
||||||
|
chunk_index += 1
|
||||||
|
out.seek(0)
|
||||||
|
out.write(chunk_index.to_bytes(4, "big"))
|
||||||
|
|
||||||
|
return EncryptionMetadata(
|
||||||
|
algorithm="AES256",
|
||||||
|
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||||
|
nonce=base_nonce,
|
||||||
|
encrypted_data_key=encrypted_data_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
def decrypt_file(self, input_path: str, output_path: str,
|
||||||
|
metadata: EncryptionMetadata) -> None:
|
||||||
|
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||||
|
base_nonce = metadata.nonce
|
||||||
|
|
||||||
|
if _HAS_RUST:
|
||||||
|
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
|
||||||
|
else:
|
||||||
|
with open(input_path, "rb") as stream:
|
||||||
|
chunk_count_bytes = stream.read(4)
|
||||||
|
if len(chunk_count_bytes) < 4:
|
||||||
|
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||||
|
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||||
|
aesgcm = AESGCM(data_key)
|
||||||
|
with open(output_path, "wb") as out:
|
||||||
|
for chunk_index in range(chunk_count):
|
||||||
|
size_bytes = stream.read(self.HEADER_SIZE)
|
||||||
|
if len(size_bytes) < self.HEADER_SIZE:
|
||||||
|
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||||
|
chunk_size = int.from_bytes(size_bytes, "big")
|
||||||
|
encrypted_chunk = stream.read(chunk_size)
|
||||||
|
if len(encrypted_chunk) < chunk_size:
|
||||||
|
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||||
|
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||||
|
try:
|
||||||
|
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||||
|
out.write(decrypted_chunk)
|
||||||
|
except Exception as exc:
|
||||||
|
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
class EncryptionManager:
|
class EncryptionManager:
|
||||||
"""Manages encryption providers and operations."""
|
"""Manages encryption providers and operations."""
|
||||||
|
|||||||
@@ -175,13 +175,21 @@ def handle_app_error(error: AppError) -> Response:
|
|||||||
|
|
||||||
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
||||||
g.s3_error_code = "SlowDown"
|
g.s3_error_code = "SlowDown"
|
||||||
|
if request.path.startswith("/ui") or request.path.startswith("/buckets"):
|
||||||
|
wants_json = (
|
||||||
|
request.is_json or
|
||||||
|
request.headers.get("X-Requested-With") == "XMLHttpRequest" or
|
||||||
|
"application/json" in request.accept_mimetypes.values()
|
||||||
|
)
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
|
||||||
error = Element("Error")
|
error = Element("Error")
|
||||||
SubElement(error, "Code").text = "SlowDown"
|
SubElement(error, "Code").text = "SlowDown"
|
||||||
SubElement(error, "Message").text = "Please reduce your request rate."
|
SubElement(error, "Message").text = "Please reduce your request rate."
|
||||||
SubElement(error, "Resource").text = request.path
|
SubElement(error, "Resource").text = request.path
|
||||||
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
||||||
xml_bytes = tostring(error, encoding="utf-8")
|
xml_bytes = tostring(error, encoding="utf-8")
|
||||||
return Response(xml_bytes, status=429, mimetype="application/xml")
|
return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
|
||||||
|
|
||||||
|
|
||||||
def register_error_handlers(app):
|
def register_error_handlers(app):
|
||||||
|
|||||||
596
app/gc.py
Normal file
596
app/gc.py
Normal file
@@ -0,0 +1,596 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GCResult:
|
||||||
|
temp_files_deleted: int = 0
|
||||||
|
temp_bytes_freed: int = 0
|
||||||
|
multipart_uploads_deleted: int = 0
|
||||||
|
multipart_bytes_freed: int = 0
|
||||||
|
lock_files_deleted: int = 0
|
||||||
|
orphaned_metadata_deleted: int = 0
|
||||||
|
orphaned_versions_deleted: int = 0
|
||||||
|
orphaned_version_bytes_freed: int = 0
|
||||||
|
empty_dirs_removed: int = 0
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
execution_time_seconds: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"temp_files_deleted": self.temp_files_deleted,
|
||||||
|
"temp_bytes_freed": self.temp_bytes_freed,
|
||||||
|
"multipart_uploads_deleted": self.multipart_uploads_deleted,
|
||||||
|
"multipart_bytes_freed": self.multipart_bytes_freed,
|
||||||
|
"lock_files_deleted": self.lock_files_deleted,
|
||||||
|
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
|
||||||
|
"orphaned_versions_deleted": self.orphaned_versions_deleted,
|
||||||
|
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
|
||||||
|
"empty_dirs_removed": self.empty_dirs_removed,
|
||||||
|
"errors": self.errors,
|
||||||
|
"execution_time_seconds": self.execution_time_seconds,
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_bytes_freed(self) -> int:
|
||||||
|
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_work(self) -> bool:
|
||||||
|
return (
|
||||||
|
self.temp_files_deleted > 0
|
||||||
|
or self.multipart_uploads_deleted > 0
|
||||||
|
or self.lock_files_deleted > 0
|
||||||
|
or self.orphaned_metadata_deleted > 0
|
||||||
|
or self.orphaned_versions_deleted > 0
|
||||||
|
or self.empty_dirs_removed > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GCExecutionRecord:
|
||||||
|
timestamp: float
|
||||||
|
result: dict
|
||||||
|
dry_run: bool
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
"result": self.result,
|
||||||
|
"dry_run": self.dry_run,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict) -> GCExecutionRecord:
|
||||||
|
return cls(
|
||||||
|
timestamp=data["timestamp"],
|
||||||
|
result=data["result"],
|
||||||
|
dry_run=data.get("dry_run", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GCHistoryStore:
|
||||||
|
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||||
|
self.storage_root = storage_root
|
||||||
|
self.max_records = max_records
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def _get_path(self) -> Path:
|
||||||
|
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
|
||||||
|
|
||||||
|
def load(self) -> List[GCExecutionRecord]:
|
||||||
|
path = self._get_path()
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||||
|
except (OSError, ValueError, KeyError) as e:
|
||||||
|
logger.error("Failed to load GC history: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def save(self, records: List[GCExecutionRecord]) -> None:
|
||||||
|
path = self._get_path()
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||||
|
try:
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error("Failed to save GC history: %s", e)
|
||||||
|
|
||||||
|
def add(self, record: GCExecutionRecord) -> None:
|
||||||
|
with self._lock:
|
||||||
|
records = self.load()
|
||||||
|
records.insert(0, record)
|
||||||
|
self.save(records)
|
||||||
|
|
||||||
|
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
|
||||||
|
return self.load()[offset : offset + limit]
|
||||||
|
|
||||||
|
|
||||||
|
def _dir_size(path: Path) -> int:
|
||||||
|
total = 0
|
||||||
|
try:
|
||||||
|
for f in path.rglob("*"):
|
||||||
|
if f.is_file():
|
||||||
|
try:
|
||||||
|
total += f.stat().st_size
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
def _file_age_hours(path: Path) -> float:
|
||||||
|
try:
|
||||||
|
mtime = path.stat().st_mtime
|
||||||
|
return (time.time() - mtime) / 3600.0
|
||||||
|
except OSError:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class GarbageCollector:
|
||||||
|
SYSTEM_ROOT = ".myfsio.sys"
|
||||||
|
SYSTEM_TMP_DIR = "tmp"
|
||||||
|
SYSTEM_MULTIPART_DIR = "multipart"
|
||||||
|
SYSTEM_BUCKETS_DIR = "buckets"
|
||||||
|
BUCKET_META_DIR = "meta"
|
||||||
|
BUCKET_VERSIONS_DIR = "versions"
|
||||||
|
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
storage_root: Path,
|
||||||
|
interval_hours: float = 6.0,
|
||||||
|
temp_file_max_age_hours: float = 24.0,
|
||||||
|
multipart_max_age_days: int = 7,
|
||||||
|
lock_file_max_age_hours: float = 1.0,
|
||||||
|
dry_run: bool = False,
|
||||||
|
max_history: int = 50,
|
||||||
|
io_throttle_ms: int = 10,
|
||||||
|
) -> None:
|
||||||
|
self.storage_root = Path(storage_root)
|
||||||
|
self.interval_seconds = interval_hours * 3600.0
|
||||||
|
self.temp_file_max_age_hours = temp_file_max_age_hours
|
||||||
|
self.multipart_max_age_days = multipart_max_age_days
|
||||||
|
self.lock_file_max_age_hours = lock_file_max_age_hours
|
||||||
|
self.dry_run = dry_run
|
||||||
|
self._timer: Optional[threading.Timer] = None
|
||||||
|
self._shutdown = False
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._scanning = False
|
||||||
|
self._scan_start_time: Optional[float] = None
|
||||||
|
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||||
|
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
if self._timer is not None:
|
||||||
|
return
|
||||||
|
self._shutdown = False
|
||||||
|
self._schedule_next()
|
||||||
|
logger.info(
|
||||||
|
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
|
||||||
|
self.interval_seconds / 3600.0,
|
||||||
|
self.temp_file_max_age_hours,
|
||||||
|
self.multipart_max_age_days,
|
||||||
|
self.lock_file_max_age_hours,
|
||||||
|
self.dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
self._shutdown = True
|
||||||
|
if self._timer:
|
||||||
|
self._timer.cancel()
|
||||||
|
self._timer = None
|
||||||
|
logger.info("GC stopped")
|
||||||
|
|
||||||
|
def _schedule_next(self) -> None:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||||
|
self._timer.daemon = True
|
||||||
|
self._timer.start()
|
||||||
|
|
||||||
|
def _run_cycle(self) -> None:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.run_now()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("GC cycle failed: %s", e)
|
||||||
|
finally:
|
||||||
|
self._schedule_next()
|
||||||
|
|
||||||
|
def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
|
||||||
|
if not self._lock.acquire(blocking=False):
|
||||||
|
raise RuntimeError("GC is already in progress")
|
||||||
|
|
||||||
|
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._scanning = True
|
||||||
|
self._scan_start_time = time.time()
|
||||||
|
|
||||||
|
start = self._scan_start_time
|
||||||
|
result = GCResult()
|
||||||
|
|
||||||
|
original_dry_run = self.dry_run
|
||||||
|
self.dry_run = effective_dry_run
|
||||||
|
try:
|
||||||
|
self._clean_temp_files(result)
|
||||||
|
self._clean_orphaned_multipart(result)
|
||||||
|
self._clean_stale_locks(result)
|
||||||
|
self._clean_orphaned_metadata(result)
|
||||||
|
self._clean_orphaned_versions(result)
|
||||||
|
self._clean_empty_dirs(result)
|
||||||
|
finally:
|
||||||
|
self.dry_run = original_dry_run
|
||||||
|
|
||||||
|
result.execution_time_seconds = time.time() - start
|
||||||
|
|
||||||
|
if result.has_work or result.errors:
|
||||||
|
logger.info(
|
||||||
|
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
|
||||||
|
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
|
||||||
|
result.execution_time_seconds,
|
||||||
|
result.temp_files_deleted,
|
||||||
|
result.temp_bytes_freed / (1024 * 1024),
|
||||||
|
result.multipart_uploads_deleted,
|
||||||
|
result.multipart_bytes_freed / (1024 * 1024),
|
||||||
|
result.lock_files_deleted,
|
||||||
|
result.orphaned_metadata_deleted,
|
||||||
|
result.orphaned_versions_deleted,
|
||||||
|
result.orphaned_version_bytes_freed / (1024 * 1024),
|
||||||
|
result.empty_dirs_removed,
|
||||||
|
len(result.errors),
|
||||||
|
" (dry run)" if effective_dry_run else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
record = GCExecutionRecord(
|
||||||
|
timestamp=time.time(),
|
||||||
|
result=result.to_dict(),
|
||||||
|
dry_run=effective_dry_run,
|
||||||
|
)
|
||||||
|
self.history_store.add(record)
|
||||||
|
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
self._scanning = False
|
||||||
|
self._scan_start_time = None
|
||||||
|
self._lock.release()
|
||||||
|
|
||||||
|
def run_async(self, dry_run: Optional[bool] = None) -> bool:
|
||||||
|
if self._scanning:
|
||||||
|
return False
|
||||||
|
t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
|
||||||
|
t.start()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _system_path(self) -> Path:
|
||||||
|
return self.storage_root / self.SYSTEM_ROOT
|
||||||
|
|
||||||
|
def _throttle(self) -> bool:
|
||||||
|
if self._shutdown:
|
||||||
|
return True
|
||||||
|
if self._io_throttle > 0:
|
||||||
|
time.sleep(self._io_throttle)
|
||||||
|
return self._shutdown
|
||||||
|
|
||||||
|
def _list_bucket_names(self) -> List[str]:
|
||||||
|
names = []
|
||||||
|
try:
|
||||||
|
for entry in self.storage_root.iterdir():
|
||||||
|
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||||
|
names.append(entry.name)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return names
|
||||||
|
|
||||||
|
def _clean_temp_files(self, result: GCResult) -> None:
|
||||||
|
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
|
||||||
|
if not tmp_dir.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
for entry in tmp_dir.iterdir():
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not entry.is_file():
|
||||||
|
continue
|
||||||
|
age = _file_age_hours(entry)
|
||||||
|
if age < self.temp_file_max_age_hours:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
size = entry.stat().st_size
|
||||||
|
if not self.dry_run:
|
||||||
|
entry.unlink()
|
||||||
|
result.temp_files_deleted += 1
|
||||||
|
result.temp_bytes_freed += size
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"temp file {entry.name}: {e}")
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan tmp dir: {e}")
|
||||||
|
|
||||||
|
def _clean_orphaned_multipart(self, result: GCResult) -> None:
|
||||||
|
cutoff_hours = self.multipart_max_age_days * 24.0
|
||||||
|
bucket_names = self._list_bucket_names()
|
||||||
|
|
||||||
|
for bucket_name in bucket_names:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
for multipart_root in (
|
||||||
|
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
|
||||||
|
self.storage_root / bucket_name / ".multipart",
|
||||||
|
):
|
||||||
|
if not multipart_root.exists():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
for upload_dir in multipart_root.iterdir():
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not upload_dir.is_dir():
|
||||||
|
continue
|
||||||
|
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan multipart {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
|
||||||
|
manifest_path = upload_dir / "manifest.json"
|
||||||
|
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
|
||||||
|
|
||||||
|
if age < cutoff_hours:
|
||||||
|
return
|
||||||
|
|
||||||
|
dir_bytes = _dir_size(upload_dir)
|
||||||
|
try:
|
||||||
|
if not self.dry_run:
|
||||||
|
shutil.rmtree(upload_dir, ignore_errors=True)
|
||||||
|
result.multipart_uploads_deleted += 1
|
||||||
|
result.multipart_bytes_freed += dir_bytes
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"multipart {upload_dir.name}: {e}")
|
||||||
|
|
||||||
|
def _clean_stale_locks(self, result: GCResult) -> None:
|
||||||
|
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
|
||||||
|
if not buckets_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
for bucket_dir in buckets_root.iterdir():
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
if not bucket_dir.is_dir():
|
||||||
|
continue
|
||||||
|
locks_dir = bucket_dir / "locks"
|
||||||
|
if not locks_dir.exists():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
for lock_file in locks_dir.iterdir():
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
|
||||||
|
continue
|
||||||
|
age = _file_age_hours(lock_file)
|
||||||
|
if age < self.lock_file_max_age_hours:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if not self.dry_run:
|
||||||
|
lock_file.unlink(missing_ok=True)
|
||||||
|
result.lock_files_deleted += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"lock {lock_file.name}: {e}")
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan buckets for locks: {e}")
|
||||||
|
|
||||||
|
def _clean_orphaned_metadata(self, result: GCResult) -> None:
|
||||||
|
bucket_names = self._list_bucket_names()
|
||||||
|
|
||||||
|
for bucket_name in bucket_names:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
legacy_meta = self.storage_root / bucket_name / ".meta"
|
||||||
|
if legacy_meta.exists():
|
||||||
|
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
|
||||||
|
|
||||||
|
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
if new_meta.exists():
|
||||||
|
self._clean_index_metadata(bucket_name, new_meta, result)
|
||||||
|
|
||||||
|
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
try:
|
||||||
|
for meta_file in meta_root.rglob("*.meta.json"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not meta_file.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
rel = meta_file.relative_to(meta_root)
|
||||||
|
object_key = rel.as_posix().removesuffix(".meta.json")
|
||||||
|
object_path = bucket_path / object_key
|
||||||
|
if not object_path.exists():
|
||||||
|
if not self.dry_run:
|
||||||
|
meta_file.unlink(missing_ok=True)
|
||||||
|
result.orphaned_metadata_deleted += 1
|
||||||
|
except (OSError, ValueError) as e:
|
||||||
|
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
try:
|
||||||
|
for index_file in meta_root.rglob("_index.json"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not index_file.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with open(index_file, "r", encoding="utf-8") as f:
|
||||||
|
index_data = json.load(f)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
keys_to_remove = []
|
||||||
|
for key in index_data:
|
||||||
|
rel_dir = index_file.parent.relative_to(meta_root)
|
||||||
|
if rel_dir == Path("."):
|
||||||
|
full_key = key
|
||||||
|
else:
|
||||||
|
full_key = rel_dir.as_posix() + "/" + key
|
||||||
|
object_path = bucket_path / full_key
|
||||||
|
if not object_path.exists():
|
||||||
|
keys_to_remove.append(key)
|
||||||
|
|
||||||
|
if keys_to_remove:
|
||||||
|
if not self.dry_run:
|
||||||
|
for k in keys_to_remove:
|
||||||
|
index_data.pop(k, None)
|
||||||
|
if index_data:
|
||||||
|
try:
|
||||||
|
with open(index_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(index_data, f)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"write index {bucket_name}: {e}")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
index_file.unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
result.orphaned_metadata_deleted += len(keys_to_remove)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan index meta {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _clean_orphaned_versions(self, result: GCResult) -> None:
|
||||||
|
bucket_names = self._list_bucket_names()
|
||||||
|
|
||||||
|
for bucket_name in bucket_names:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
for versions_root in (
|
||||||
|
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
|
||||||
|
self.storage_root / bucket_name / ".versions",
|
||||||
|
):
|
||||||
|
if not versions_root.exists():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
for key_dir in versions_root.iterdir():
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if not key_dir.is_dir():
|
||||||
|
continue
|
||||||
|
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"scan versions {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _clean_versions_for_key(
|
||||||
|
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
|
||||||
|
) -> None:
|
||||||
|
try:
|
||||||
|
rel = key_dir.relative_to(versions_root)
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
object_path = bucket_path / rel
|
||||||
|
if object_path.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
|
||||||
|
if not version_files:
|
||||||
|
return
|
||||||
|
|
||||||
|
for vf in version_files:
|
||||||
|
try:
|
||||||
|
size = vf.stat().st_size if vf.suffix == ".bin" else 0
|
||||||
|
if not self.dry_run:
|
||||||
|
vf.unlink(missing_ok=True)
|
||||||
|
if vf.suffix == ".bin":
|
||||||
|
result.orphaned_version_bytes_freed += size
|
||||||
|
result.orphaned_versions_deleted += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"version file {vf.name}: {e}")
|
||||||
|
|
||||||
|
def _clean_empty_dirs(self, result: GCResult) -> None:
|
||||||
|
targets = [
|
||||||
|
self._system_path() / self.SYSTEM_TMP_DIR,
|
||||||
|
self._system_path() / self.SYSTEM_MULTIPART_DIR,
|
||||||
|
self._system_path() / self.SYSTEM_BUCKETS_DIR,
|
||||||
|
]
|
||||||
|
for bucket_name in self._list_bucket_names():
|
||||||
|
targets.append(self.storage_root / bucket_name / ".meta")
|
||||||
|
targets.append(self.storage_root / bucket_name / ".versions")
|
||||||
|
targets.append(self.storage_root / bucket_name / ".multipart")
|
||||||
|
|
||||||
|
for root in targets:
|
||||||
|
if not root.exists():
|
||||||
|
continue
|
||||||
|
self._remove_empty_dirs_recursive(root, root, result)
|
||||||
|
|
||||||
|
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
|
||||||
|
if self._shutdown:
|
||||||
|
return False
|
||||||
|
if not path.is_dir():
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
children = list(path.iterdir())
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
all_empty = True
|
||||||
|
for child in children:
|
||||||
|
if self._throttle():
|
||||||
|
return False
|
||||||
|
if child.is_dir():
|
||||||
|
if not self._remove_empty_dirs_recursive(child, stop_at, result):
|
||||||
|
all_empty = False
|
||||||
|
else:
|
||||||
|
all_empty = False
|
||||||
|
|
||||||
|
if all_empty and path != stop_at:
|
||||||
|
try:
|
||||||
|
if not self.dry_run:
|
||||||
|
path.rmdir()
|
||||||
|
result.empty_dirs_removed += 1
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
return all_empty
|
||||||
|
|
||||||
|
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||||
|
records = self.history_store.get_history(limit, offset)
|
||||||
|
return [r.to_dict() for r in records]
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
status: Dict[str, Any] = {
|
||||||
|
"enabled": not self._shutdown or self._timer is not None,
|
||||||
|
"running": self._timer is not None and not self._shutdown,
|
||||||
|
"scanning": self._scanning,
|
||||||
|
"interval_hours": self.interval_seconds / 3600.0,
|
||||||
|
"temp_file_max_age_hours": self.temp_file_max_age_hours,
|
||||||
|
"multipart_max_age_days": self.multipart_max_age_days,
|
||||||
|
"lock_file_max_age_hours": self.lock_file_max_age_hours,
|
||||||
|
"dry_run": self.dry_run,
|
||||||
|
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||||
|
}
|
||||||
|
if self._scanning and self._scan_start_time:
|
||||||
|
status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
|
||||||
|
return status
|
||||||
684
app/iam.py
684
app/iam.py
File diff suppressed because it is too large
Load Diff
882
app/integrity.py
Normal file
882
app/integrity.py
Normal file
@@ -0,0 +1,882 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
import myfsio_core as _rc
|
||||||
|
_HAS_RUST = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_RUST = False
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_etag(path: Path) -> str:
|
||||||
|
if _HAS_RUST:
|
||||||
|
return _rc.md5_file(str(path))
|
||||||
|
checksum = hashlib.md5()
|
||||||
|
with path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(8192), b""):
|
||||||
|
checksum.update(chunk)
|
||||||
|
return checksum.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IntegrityIssue:
|
||||||
|
issue_type: str
|
||||||
|
bucket: str
|
||||||
|
key: str
|
||||||
|
detail: str
|
||||||
|
healed: bool = False
|
||||||
|
heal_action: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"issue_type": self.issue_type,
|
||||||
|
"bucket": self.bucket,
|
||||||
|
"key": self.key,
|
||||||
|
"detail": self.detail,
|
||||||
|
"healed": self.healed,
|
||||||
|
"heal_action": self.heal_action,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IntegrityResult:
|
||||||
|
corrupted_objects: int = 0
|
||||||
|
orphaned_objects: int = 0
|
||||||
|
phantom_metadata: int = 0
|
||||||
|
stale_versions: int = 0
|
||||||
|
etag_cache_inconsistencies: int = 0
|
||||||
|
legacy_metadata_drifts: int = 0
|
||||||
|
issues_healed: int = 0
|
||||||
|
issues: List[IntegrityIssue] = field(default_factory=list)
|
||||||
|
errors: List[str] = field(default_factory=list)
|
||||||
|
objects_scanned: int = 0
|
||||||
|
buckets_scanned: int = 0
|
||||||
|
execution_time_seconds: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"corrupted_objects": self.corrupted_objects,
|
||||||
|
"orphaned_objects": self.orphaned_objects,
|
||||||
|
"phantom_metadata": self.phantom_metadata,
|
||||||
|
"stale_versions": self.stale_versions,
|
||||||
|
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
|
||||||
|
"legacy_metadata_drifts": self.legacy_metadata_drifts,
|
||||||
|
"issues_healed": self.issues_healed,
|
||||||
|
"issues": [i.to_dict() for i in self.issues],
|
||||||
|
"errors": self.errors,
|
||||||
|
"objects_scanned": self.objects_scanned,
|
||||||
|
"buckets_scanned": self.buckets_scanned,
|
||||||
|
"execution_time_seconds": self.execution_time_seconds,
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_issues(self) -> int:
|
||||||
|
return (
|
||||||
|
self.corrupted_objects
|
||||||
|
+ self.orphaned_objects
|
||||||
|
+ self.phantom_metadata
|
||||||
|
+ self.stale_versions
|
||||||
|
+ self.etag_cache_inconsistencies
|
||||||
|
+ self.legacy_metadata_drifts
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_issues(self) -> bool:
|
||||||
|
return self.total_issues > 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IntegrityExecutionRecord:
|
||||||
|
timestamp: float
|
||||||
|
result: dict
|
||||||
|
dry_run: bool
|
||||||
|
auto_heal: bool
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
"result": self.result,
|
||||||
|
"dry_run": self.dry_run,
|
||||||
|
"auto_heal": self.auto_heal,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
|
||||||
|
return cls(
|
||||||
|
timestamp=data["timestamp"],
|
||||||
|
result=data["result"],
|
||||||
|
dry_run=data.get("dry_run", False),
|
||||||
|
auto_heal=data.get("auto_heal", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IntegrityHistoryStore:
|
||||||
|
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||||
|
self.storage_root = storage_root
|
||||||
|
self.max_records = max_records
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def _get_path(self) -> Path:
|
||||||
|
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
|
||||||
|
|
||||||
|
def load(self) -> List[IntegrityExecutionRecord]:
|
||||||
|
path = self._get_path()
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||||
|
except (OSError, ValueError, KeyError) as e:
|
||||||
|
logger.error("Failed to load integrity history: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def save(self, records: List[IntegrityExecutionRecord]) -> None:
|
||||||
|
path = self._get_path()
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||||
|
try:
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error("Failed to save integrity history: %s", e)
|
||||||
|
|
||||||
|
def add(self, record: IntegrityExecutionRecord) -> None:
|
||||||
|
with self._lock:
|
||||||
|
records = self.load()
|
||||||
|
records.insert(0, record)
|
||||||
|
self.save(records)
|
||||||
|
|
||||||
|
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
|
||||||
|
return self.load()[offset : offset + limit]
|
||||||
|
|
||||||
|
|
||||||
|
class IntegrityCursorStore:
|
||||||
|
def __init__(self, storage_root: Path) -> None:
|
||||||
|
self.storage_root = storage_root
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def _get_path(self) -> Path:
|
||||||
|
return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
|
||||||
|
|
||||||
|
def load(self) -> Dict[str, Any]:
|
||||||
|
path = self._get_path()
|
||||||
|
if not path.exists():
|
||||||
|
return {"buckets": {}}
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if not isinstance(data.get("buckets"), dict):
|
||||||
|
return {"buckets": {}}
|
||||||
|
return data
|
||||||
|
except (OSError, ValueError, KeyError):
|
||||||
|
return {"buckets": {}}
|
||||||
|
|
||||||
|
def save(self, data: Dict[str, Any]) -> None:
|
||||||
|
path = self._get_path()
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error("Failed to save integrity cursor: %s", e)
|
||||||
|
|
||||||
|
def update_bucket(self, bucket_name: str, timestamp: float) -> None:
|
||||||
|
with self._lock:
|
||||||
|
data = self.load()
|
||||||
|
data["buckets"][bucket_name] = {"last_scanned": timestamp}
|
||||||
|
self.save(data)
|
||||||
|
|
||||||
|
def clean_stale(self, existing_buckets: List[str]) -> None:
|
||||||
|
with self._lock:
|
||||||
|
data = self.load()
|
||||||
|
existing_set = set(existing_buckets)
|
||||||
|
stale_keys = [k for k in data["buckets"] if k not in existing_set]
|
||||||
|
if stale_keys:
|
||||||
|
for k in stale_keys:
|
||||||
|
del data["buckets"][k]
|
||||||
|
self.save(data)
|
||||||
|
|
||||||
|
def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
|
||||||
|
data = self.load()
|
||||||
|
buckets_info = data.get("buckets", {})
|
||||||
|
|
||||||
|
def sort_key(name: str) -> float:
|
||||||
|
entry = buckets_info.get(name)
|
||||||
|
if entry is None:
|
||||||
|
return 0.0
|
||||||
|
return entry.get("last_scanned", 0.0)
|
||||||
|
|
||||||
|
return sorted(bucket_names, key=sort_key)
|
||||||
|
|
||||||
|
def get_info(self) -> Dict[str, Any]:
|
||||||
|
data = self.load()
|
||||||
|
buckets = data.get("buckets", {})
|
||||||
|
return {
|
||||||
|
"tracked_buckets": len(buckets),
|
||||||
|
"buckets": {
|
||||||
|
name: info.get("last_scanned")
|
||||||
|
for name, info in buckets.items()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MAX_ISSUES = 500
|
||||||
|
|
||||||
|
|
||||||
|
class IntegrityChecker:
|
||||||
|
SYSTEM_ROOT = ".myfsio.sys"
|
||||||
|
SYSTEM_BUCKETS_DIR = "buckets"
|
||||||
|
BUCKET_META_DIR = "meta"
|
||||||
|
BUCKET_VERSIONS_DIR = "versions"
|
||||||
|
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
storage_root: Path,
|
||||||
|
interval_hours: float = 24.0,
|
||||||
|
batch_size: int = 1000,
|
||||||
|
auto_heal: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
max_history: int = 50,
|
||||||
|
io_throttle_ms: int = 10,
|
||||||
|
) -> None:
|
||||||
|
self.storage_root = Path(storage_root)
|
||||||
|
self.interval_seconds = interval_hours * 3600.0
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.auto_heal = auto_heal
|
||||||
|
self.dry_run = dry_run
|
||||||
|
self._timer: Optional[threading.Timer] = None
|
||||||
|
self._shutdown = False
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._scanning = False
|
||||||
|
self._scan_start_time: Optional[float] = None
|
||||||
|
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||||
|
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
|
||||||
|
self.cursor_store = IntegrityCursorStore(self.storage_root)
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
if self._timer is not None:
|
||||||
|
return
|
||||||
|
self._shutdown = False
|
||||||
|
self._schedule_next()
|
||||||
|
logger.info(
|
||||||
|
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
|
||||||
|
self.interval_seconds / 3600.0,
|
||||||
|
self.batch_size,
|
||||||
|
self.auto_heal,
|
||||||
|
self.dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
self._shutdown = True
|
||||||
|
if self._timer:
|
||||||
|
self._timer.cancel()
|
||||||
|
self._timer = None
|
||||||
|
logger.info("Integrity checker stopped")
|
||||||
|
|
||||||
|
def _schedule_next(self) -> None:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||||
|
self._timer.daemon = True
|
||||||
|
self._timer.start()
|
||||||
|
|
||||||
|
def _run_cycle(self) -> None:
|
||||||
|
if self._shutdown:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.run_now()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Integrity check cycle failed: %s", e)
|
||||||
|
finally:
|
||||||
|
self._schedule_next()
|
||||||
|
|
||||||
|
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
|
||||||
|
if not self._lock.acquire(blocking=False):
|
||||||
|
raise RuntimeError("Integrity scan is already in progress")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._scanning = True
|
||||||
|
self._scan_start_time = time.time()
|
||||||
|
|
||||||
|
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
|
||||||
|
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||||
|
|
||||||
|
start = self._scan_start_time
|
||||||
|
result = IntegrityResult()
|
||||||
|
|
||||||
|
bucket_names = self._list_bucket_names()
|
||||||
|
self.cursor_store.clean_stale(bucket_names)
|
||||||
|
ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
|
||||||
|
|
||||||
|
for bucket_name in ordered_buckets:
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
break
|
||||||
|
result.buckets_scanned += 1
|
||||||
|
self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||||
|
self.cursor_store.update_bucket(bucket_name, time.time())
|
||||||
|
|
||||||
|
result.execution_time_seconds = time.time() - start
|
||||||
|
|
||||||
|
if result.has_issues or result.errors:
|
||||||
|
logger.info(
|
||||||
|
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
|
||||||
|
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
|
||||||
|
result.execution_time_seconds,
|
||||||
|
result.corrupted_objects,
|
||||||
|
result.orphaned_objects,
|
||||||
|
result.phantom_metadata,
|
||||||
|
result.stale_versions,
|
||||||
|
result.etag_cache_inconsistencies,
|
||||||
|
result.legacy_metadata_drifts,
|
||||||
|
result.issues_healed,
|
||||||
|
len(result.errors),
|
||||||
|
" (dry run)" if effective_dry_run else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
record = IntegrityExecutionRecord(
|
||||||
|
timestamp=time.time(),
|
||||||
|
result=result.to_dict(),
|
||||||
|
dry_run=effective_dry_run,
|
||||||
|
auto_heal=effective_auto_heal,
|
||||||
|
)
|
||||||
|
self.history_store.add(record)
|
||||||
|
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
self._scanning = False
|
||||||
|
self._scan_start_time = None
|
||||||
|
self._lock.release()
|
||||||
|
|
||||||
|
def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
|
||||||
|
if self._scanning:
|
||||||
|
return False
|
||||||
|
t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
|
||||||
|
t.start()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _system_path(self) -> Path:
|
||||||
|
return self.storage_root / self.SYSTEM_ROOT
|
||||||
|
|
||||||
|
def _list_bucket_names(self) -> List[str]:
|
||||||
|
names = []
|
||||||
|
try:
|
||||||
|
for entry in self.storage_root.iterdir():
|
||||||
|
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||||
|
names.append(entry.name)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return names
|
||||||
|
|
||||||
|
def _throttle(self) -> bool:
|
||||||
|
if self._shutdown:
|
||||||
|
return True
|
||||||
|
if self._io_throttle > 0:
|
||||||
|
time.sleep(self._io_throttle)
|
||||||
|
return self._shutdown
|
||||||
|
|
||||||
|
def _batch_exhausted(self, result: IntegrityResult) -> bool:
|
||||||
|
return self._shutdown or result.objects_scanned >= self.batch_size
|
||||||
|
|
||||||
|
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
|
||||||
|
if len(result.issues) < MAX_ISSUES:
|
||||||
|
result.issues.append(issue)
|
||||||
|
|
||||||
|
def _check_corrupted_objects(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
|
||||||
|
if not meta_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
for index_file in meta_root.rglob("_index.json"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
if not index_file.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for key_name, entry in list(index_data.items()):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
|
||||||
|
rel_dir = index_file.parent.relative_to(meta_root)
|
||||||
|
if rel_dir == Path("."):
|
||||||
|
full_key = key_name
|
||||||
|
else:
|
||||||
|
full_key = rel_dir.as_posix() + "/" + key_name
|
||||||
|
|
||||||
|
object_path = bucket_path / full_key
|
||||||
|
if not object_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
result.objects_scanned += 1
|
||||||
|
|
||||||
|
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||||
|
stored_etag = meta.get("__etag__")
|
||||||
|
if not stored_etag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
actual_etag = _compute_etag(object_path)
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if actual_etag != stored_etag:
|
||||||
|
result.corrupted_objects += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="corrupted_object",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
|
||||||
|
)
|
||||||
|
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
stat = object_path.stat()
|
||||||
|
meta["__etag__"] = actual_etag
|
||||||
|
meta["__size__"] = str(stat.st_size)
|
||||||
|
meta["__last_modified__"] = str(stat.st_mtime)
|
||||||
|
index_data[key_name] = {"metadata": meta}
|
||||||
|
self._atomic_write_index(index_file, index_data)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "updated etag in index"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
|
||||||
|
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"check corrupted {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _check_orphaned_objects(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
|
||||||
|
try:
|
||||||
|
for entry in bucket_path.rglob("*"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
if not entry.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
rel = entry.relative_to(bucket_path)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
result.objects_scanned += 1
|
||||||
|
full_key = rel.as_posix()
|
||||||
|
key_name = rel.name
|
||||||
|
parent = rel.parent
|
||||||
|
|
||||||
|
if parent == Path("."):
|
||||||
|
index_path = meta_root / "_index.json"
|
||||||
|
else:
|
||||||
|
index_path = meta_root / parent / "_index.json"
|
||||||
|
|
||||||
|
has_entry = False
|
||||||
|
if index_path.exists():
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
has_entry = key_name in index_data
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not has_entry:
|
||||||
|
result.orphaned_objects += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="orphaned_object",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail="file exists without metadata entry",
|
||||||
|
)
|
||||||
|
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
etag = _compute_etag(entry)
|
||||||
|
stat = entry.stat()
|
||||||
|
meta = {
|
||||||
|
"__etag__": etag,
|
||||||
|
"__size__": str(stat.st_size),
|
||||||
|
"__last_modified__": str(stat.st_mtime),
|
||||||
|
}
|
||||||
|
index_data = {}
|
||||||
|
if index_path.exists():
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
index_data[key_name] = {"metadata": meta}
|
||||||
|
self._atomic_write_index(index_path, index_data)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "created metadata entry"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
|
||||||
|
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"check orphaned {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _check_phantom_metadata(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
bucket_path = self.storage_root / bucket_name
|
||||||
|
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
|
||||||
|
if not meta_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
for index_file in meta_root.rglob("_index.json"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
if not index_file.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
keys_to_remove = []
|
||||||
|
for key_name in list(index_data.keys()):
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
break
|
||||||
|
result.objects_scanned += 1
|
||||||
|
rel_dir = index_file.parent.relative_to(meta_root)
|
||||||
|
if rel_dir == Path("."):
|
||||||
|
full_key = key_name
|
||||||
|
else:
|
||||||
|
full_key = rel_dir.as_posix() + "/" + key_name
|
||||||
|
|
||||||
|
object_path = bucket_path / full_key
|
||||||
|
if not object_path.exists():
|
||||||
|
result.phantom_metadata += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="phantom_metadata",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail="metadata entry without file on disk",
|
||||||
|
)
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
keys_to_remove.append(key_name)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "removed stale index entry"
|
||||||
|
result.issues_healed += 1
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
|
||||||
|
if keys_to_remove and auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
for k in keys_to_remove:
|
||||||
|
index_data.pop(k, None)
|
||||||
|
if index_data:
|
||||||
|
self._atomic_write_index(index_file, index_data)
|
||||||
|
else:
|
||||||
|
index_file.unlink(missing_ok=True)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal phantom {bucket_name}: {e}")
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"check phantom {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _check_stale_versions(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
|
||||||
|
|
||||||
|
if not versions_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
for key_dir in versions_root.rglob("*"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
if not key_dir.is_dir():
|
||||||
|
continue
|
||||||
|
|
||||||
|
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
|
||||||
|
json_files = {f.stem: f for f in key_dir.glob("*.json")}
|
||||||
|
|
||||||
|
for stem, bin_file in bin_files.items():
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
result.objects_scanned += 1
|
||||||
|
if stem not in json_files:
|
||||||
|
result.stale_versions += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="stale_version",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
|
||||||
|
detail="version data without manifest",
|
||||||
|
)
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
bin_file.unlink(missing_ok=True)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "removed orphaned version data"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal stale version {bin_file}: {e}")
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
|
||||||
|
for stem, json_file in json_files.items():
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
result.objects_scanned += 1
|
||||||
|
if stem not in bin_files:
|
||||||
|
result.stale_versions += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="stale_version",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
|
||||||
|
detail="version manifest without data",
|
||||||
|
)
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
json_file.unlink(missing_ok=True)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "removed orphaned version manifest"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal stale version {json_file}: {e}")
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"check stale versions {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _check_etag_cache(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
|
||||||
|
|
||||||
|
if not etag_index_path.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
if not meta_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return
|
||||||
|
|
||||||
|
found_mismatch = False
|
||||||
|
|
||||||
|
for full_key, cached_etag in etag_cache.items():
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
break
|
||||||
|
result.objects_scanned += 1
|
||||||
|
key_path = Path(full_key)
|
||||||
|
key_name = key_path.name
|
||||||
|
parent = key_path.parent
|
||||||
|
|
||||||
|
if parent == Path("."):
|
||||||
|
index_path = meta_root / "_index.json"
|
||||||
|
else:
|
||||||
|
index_path = meta_root / parent / "_index.json"
|
||||||
|
|
||||||
|
if not index_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entry = index_data.get(key_name)
|
||||||
|
if not entry:
|
||||||
|
continue
|
||||||
|
|
||||||
|
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||||
|
stored_etag = meta.get("__etag__")
|
||||||
|
|
||||||
|
if stored_etag and cached_etag != stored_etag:
|
||||||
|
result.etag_cache_inconsistencies += 1
|
||||||
|
found_mismatch = True
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="etag_cache_inconsistency",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
|
||||||
|
)
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
|
||||||
|
if found_mismatch and auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
etag_index_path.unlink(missing_ok=True)
|
||||||
|
for issue in result.issues:
|
||||||
|
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "deleted etag_index.json"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal etag cache {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def _check_legacy_metadata(
|
||||||
|
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||||
|
) -> None:
|
||||||
|
legacy_meta_root = self.storage_root / bucket_name / ".meta"
|
||||||
|
if not legacy_meta_root.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||||
|
|
||||||
|
try:
|
||||||
|
for meta_file in legacy_meta_root.rglob("*.meta.json"):
|
||||||
|
if self._throttle():
|
||||||
|
return
|
||||||
|
if self._batch_exhausted(result):
|
||||||
|
return
|
||||||
|
if not meta_file.is_file():
|
||||||
|
continue
|
||||||
|
|
||||||
|
result.objects_scanned += 1
|
||||||
|
try:
|
||||||
|
rel = meta_file.relative_to(legacy_meta_root)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
full_key = rel.as_posix().removesuffix(".meta.json")
|
||||||
|
key_path = Path(full_key)
|
||||||
|
key_name = key_path.name
|
||||||
|
parent = key_path.parent
|
||||||
|
|
||||||
|
if parent == Path("."):
|
||||||
|
index_path = meta_root / "_index.json"
|
||||||
|
else:
|
||||||
|
index_path = meta_root / parent / "_index.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
index_entry = None
|
||||||
|
if index_path.exists():
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
index_entry = index_data.get(key_name)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if index_entry is None:
|
||||||
|
result.legacy_metadata_drifts += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="legacy_metadata_drift",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail="unmigrated legacy .meta.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
index_data = {}
|
||||||
|
if index_path.exists():
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
index_data[key_name] = {"metadata": legacy_data}
|
||||||
|
self._atomic_write_index(index_path, index_data)
|
||||||
|
meta_file.unlink(missing_ok=True)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "migrated to index and deleted legacy file"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
|
||||||
|
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
else:
|
||||||
|
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
|
||||||
|
if legacy_data != index_meta:
|
||||||
|
result.legacy_metadata_drifts += 1
|
||||||
|
issue = IntegrityIssue(
|
||||||
|
issue_type="legacy_metadata_drift",
|
||||||
|
bucket=bucket_name,
|
||||||
|
key=full_key,
|
||||||
|
detail="legacy .meta.json differs from index entry",
|
||||||
|
)
|
||||||
|
|
||||||
|
if auto_heal and not dry_run:
|
||||||
|
try:
|
||||||
|
meta_file.unlink(missing_ok=True)
|
||||||
|
issue.healed = True
|
||||||
|
issue.heal_action = "deleted legacy file (index is authoritative)"
|
||||||
|
result.issues_healed += 1
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
|
||||||
|
|
||||||
|
self._add_issue(result, issue)
|
||||||
|
except OSError as e:
|
||||||
|
result.errors.append(f"check legacy meta {bucket_name}: {e}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
|
||||||
|
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp_path = index_path.with_suffix(".tmp")
|
||||||
|
try:
|
||||||
|
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
os.replace(str(tmp_path), str(index_path))
|
||||||
|
except BaseException:
|
||||||
|
try:
|
||||||
|
tmp_path.unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||||
|
records = self.history_store.get_history(limit, offset)
|
||||||
|
return [r.to_dict() for r in records]
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
status: Dict[str, Any] = {
|
||||||
|
"enabled": not self._shutdown or self._timer is not None,
|
||||||
|
"running": self._timer is not None and not self._shutdown,
|
||||||
|
"scanning": self._scanning,
|
||||||
|
"interval_hours": self.interval_seconds / 3600.0,
|
||||||
|
"batch_size": self.batch_size,
|
||||||
|
"auto_heal": self.auto_heal,
|
||||||
|
"dry_run": self.dry_run,
|
||||||
|
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||||
|
}
|
||||||
|
if self._scanning and self._scan_start_time is not None:
|
||||||
|
status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
|
||||||
|
status["cursor"] = self.cursor_store.get_info()
|
||||||
|
return status
|
||||||
@@ -15,29 +15,23 @@ from typing import Any, Dict, List, Optional
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from urllib3.util.connection import create_connection as _urllib3_create_connection
|
||||||
|
|
||||||
|
|
||||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]:
|
||||||
"""Check if a URL is safe to make requests to (not internal/private).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: The URL to check.
|
|
||||||
allow_internal: If True, allows internal/private IP addresses.
|
|
||||||
Use for self-hosted deployments on internal networks.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
hostname = parsed.hostname
|
hostname = parsed.hostname
|
||||||
if not hostname:
|
if not hostname:
|
||||||
return False
|
return None
|
||||||
cloud_metadata_hosts = {
|
cloud_metadata_hosts = {
|
||||||
"metadata.google.internal",
|
"metadata.google.internal",
|
||||||
"169.254.169.254",
|
"169.254.169.254",
|
||||||
}
|
}
|
||||||
if hostname.lower() in cloud_metadata_hosts:
|
if hostname.lower() in cloud_metadata_hosts:
|
||||||
return False
|
return None
|
||||||
if allow_internal:
|
if allow_internal:
|
||||||
return True
|
return hostname
|
||||||
blocked_hosts = {
|
blocked_hosts = {
|
||||||
"localhost",
|
"localhost",
|
||||||
"127.0.0.1",
|
"127.0.0.1",
|
||||||
@@ -46,17 +40,46 @@ def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
|||||||
"[::1]",
|
"[::1]",
|
||||||
}
|
}
|
||||||
if hostname.lower() in blocked_hosts:
|
if hostname.lower() in blocked_hosts:
|
||||||
return False
|
return None
|
||||||
try:
|
try:
|
||||||
resolved_ip = socket.gethostbyname(hostname)
|
resolved_ip = socket.gethostbyname(hostname)
|
||||||
ip = ipaddress.ip_address(resolved_ip)
|
ip = ipaddress.ip_address(resolved_ip)
|
||||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||||
return False
|
return None
|
||||||
|
return resolved_ip
|
||||||
except (socket.gaierror, ValueError):
|
except (socket.gaierror, ValueError):
|
||||||
return False
|
return None
|
||||||
return True
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||||
|
return _resolve_and_check_url(url, allow_internal) is not None
|
||||||
|
|
||||||
|
|
||||||
|
_dns_pin_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
hostname = parsed.hostname or ""
|
||||||
|
session = requests.Session()
|
||||||
|
original_create = _urllib3_create_connection
|
||||||
|
|
||||||
|
def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any:
|
||||||
|
host, req_port = address
|
||||||
|
if host == hostname:
|
||||||
|
return original_create((pinned_ip, req_port), *args, **kw)
|
||||||
|
return original_create(address, *args, **kw)
|
||||||
|
|
||||||
|
import urllib3.util.connection as _conn_mod
|
||||||
|
with _dns_pin_lock:
|
||||||
|
_conn_mod.create_connection = _create_pinned
|
||||||
|
try:
|
||||||
|
return session.post(url, **kwargs)
|
||||||
|
finally:
|
||||||
|
_conn_mod.create_connection = original_create
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -344,16 +367,18 @@ class NotificationService:
|
|||||||
self._queue.task_done()
|
self._queue.task_done()
|
||||||
|
|
||||||
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
||||||
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
|
resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints)
|
||||||
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
|
if not resolved_ip:
|
||||||
|
raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}")
|
||||||
payload = event.to_s3_event()
|
payload = event.to_s3_event()
|
||||||
headers = {"Content-Type": "application/json", **destination.headers}
|
headers = {"Content-Type": "application/json", **destination.headers}
|
||||||
|
|
||||||
last_error = None
|
last_error = None
|
||||||
for attempt in range(destination.retry_count):
|
for attempt in range(destination.retry_count):
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
response = _pinned_post(
|
||||||
destination.url,
|
destination.url,
|
||||||
|
resolved_ip,
|
||||||
json=payload,
|
json=payload,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=destination.timeout_seconds,
|
timeout=destination.timeout_seconds,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import logging
|
|||||||
import random
|
import random
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -138,8 +139,8 @@ class OperationMetricsCollector:
|
|||||||
self.interval_seconds = interval_minutes * 60
|
self.interval_seconds = interval_minutes * 60
|
||||||
self.retention_hours = retention_hours
|
self.retention_hours = retention_hours
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._by_method: Dict[str, OperationStats] = {}
|
self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||||
self._by_endpoint: Dict[str, OperationStats] = {}
|
self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||||
self._by_status_class: Dict[str, int] = {}
|
self._by_status_class: Dict[str, int] = {}
|
||||||
self._error_codes: Dict[str, int] = {}
|
self._error_codes: Dict[str, int] = {}
|
||||||
self._totals = OperationStats()
|
self._totals = OperationStats()
|
||||||
@@ -211,8 +212,8 @@ class OperationMetricsCollector:
|
|||||||
self._prune_old_snapshots()
|
self._prune_old_snapshots()
|
||||||
self._save_history()
|
self._save_history()
|
||||||
|
|
||||||
self._by_method.clear()
|
self._by_method = defaultdict(OperationStats)
|
||||||
self._by_endpoint.clear()
|
self._by_endpoint = defaultdict(OperationStats)
|
||||||
self._by_status_class.clear()
|
self._by_status_class.clear()
|
||||||
self._error_codes.clear()
|
self._error_codes.clear()
|
||||||
self._totals = OperationStats()
|
self._totals = OperationStats()
|
||||||
@@ -232,12 +233,7 @@ class OperationMetricsCollector:
|
|||||||
status_class = f"{status_code // 100}xx"
|
status_class = f"{status_code // 100}xx"
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if method not in self._by_method:
|
|
||||||
self._by_method[method] = OperationStats()
|
|
||||||
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
||||||
|
|
||||||
if endpoint_type not in self._by_endpoint:
|
|
||||||
self._by_endpoint[endpoint_type] = OperationStats()
|
|
||||||
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
||||||
|
|
||||||
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
||||||
|
|||||||
614
app/s3_api.py
614
app/s3_api.py
@@ -85,6 +85,9 @@ def _bucket_policies() -> BucketPolicyStore:
|
|||||||
|
|
||||||
|
|
||||||
def _build_policy_context() -> Dict[str, Any]:
|
def _build_policy_context() -> Dict[str, Any]:
|
||||||
|
cached = getattr(g, "_policy_context", None)
|
||||||
|
if cached is not None:
|
||||||
|
return cached
|
||||||
ctx: Dict[str, Any] = {}
|
ctx: Dict[str, Any] = {}
|
||||||
if request.headers.get("Referer"):
|
if request.headers.get("Referer"):
|
||||||
ctx["aws:Referer"] = request.headers.get("Referer")
|
ctx["aws:Referer"] = request.headers.get("Referer")
|
||||||
@@ -98,6 +101,7 @@ def _build_policy_context() -> Dict[str, Any]:
|
|||||||
ctx["aws:SecureTransport"] = str(request.is_secure).lower()
|
ctx["aws:SecureTransport"] = str(request.is_secure).lower()
|
||||||
if request.headers.get("User-Agent"):
|
if request.headers.get("User-Agent"):
|
||||||
ctx["aws:UserAgent"] = request.headers.get("User-Agent")
|
ctx["aws:UserAgent"] = request.headers.get("User-Agent")
|
||||||
|
g._policy_context = ctx
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
@@ -267,39 +271,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
|||||||
if not secret_key:
|
if not secret_key:
|
||||||
raise IamError("SignatureDoesNotMatch")
|
raise IamError("SignatureDoesNotMatch")
|
||||||
|
|
||||||
method = req.method
|
|
||||||
canonical_uri = _get_canonical_uri(req)
|
|
||||||
|
|
||||||
query_args = []
|
|
||||||
for key, value in req.args.items(multi=True):
|
|
||||||
query_args.append((key, value))
|
|
||||||
query_args.sort(key=lambda x: (x[0], x[1]))
|
|
||||||
|
|
||||||
canonical_query_parts = []
|
|
||||||
for k, v in query_args:
|
|
||||||
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
|
||||||
canonical_query_string = "&".join(canonical_query_parts)
|
|
||||||
|
|
||||||
signed_headers_list = signed_headers_str.split(";")
|
|
||||||
canonical_headers_parts = []
|
|
||||||
for header in signed_headers_list:
|
|
||||||
header_val = req.headers.get(header)
|
|
||||||
if header_val is None:
|
|
||||||
header_val = ""
|
|
||||||
|
|
||||||
if header.lower() == 'expect' and header_val == "":
|
|
||||||
header_val = "100-continue"
|
|
||||||
|
|
||||||
header_val = " ".join(header_val.split())
|
|
||||||
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
|
|
||||||
canonical_headers = "".join(canonical_headers_parts)
|
|
||||||
|
|
||||||
payload_hash = req.headers.get("X-Amz-Content-Sha256")
|
|
||||||
if not payload_hash:
|
|
||||||
payload_hash = hashlib.sha256(req.get_data()).hexdigest()
|
|
||||||
|
|
||||||
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
|
|
||||||
|
|
||||||
amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
|
amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
|
||||||
if not amz_date:
|
if not amz_date:
|
||||||
raise IamError("Missing Date header")
|
raise IamError("Missing Date header")
|
||||||
@@ -325,19 +296,51 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
|||||||
if not required_headers.issubset(signed_headers_set):
|
if not required_headers.issubset(signed_headers_set):
|
||||||
raise IamError("Required headers not signed")
|
raise IamError("Required headers not signed")
|
||||||
|
|
||||||
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
canonical_uri = _get_canonical_uri(req)
|
||||||
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD"
|
||||||
|
|
||||||
if _HAS_RUST:
|
if _HAS_RUST:
|
||||||
string_to_sign = _rc.build_string_to_sign(amz_date, credential_scope, canonical_request)
|
query_params = list(req.args.items(multi=True))
|
||||||
calculated_signature = _rc.compute_signature(signing_key, string_to_sign)
|
header_values = []
|
||||||
|
for h in signed_headers_str.split(";"):
|
||||||
|
val = req.headers.get(h) or ""
|
||||||
|
if h.lower() == "expect" and val == "":
|
||||||
|
val = "100-continue"
|
||||||
|
header_values.append((h, val))
|
||||||
|
if not _rc.verify_sigv4_signature(
|
||||||
|
req.method, canonical_uri, query_params, signed_headers_str,
|
||||||
|
header_values, payload_hash, amz_date, date_stamp, region,
|
||||||
|
service, secret_key, signature,
|
||||||
|
):
|
||||||
|
raise IamError("SignatureDoesNotMatch")
|
||||||
else:
|
else:
|
||||||
|
method = req.method
|
||||||
|
query_args = sorted(req.args.items(multi=True), key=lambda x: (x[0], x[1]))
|
||||||
|
canonical_query_parts = []
|
||||||
|
for k, v in query_args:
|
||||||
|
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
||||||
|
canonical_query_string = "&".join(canonical_query_parts)
|
||||||
|
|
||||||
|
signed_headers_list = signed_headers_str.split(";")
|
||||||
|
canonical_headers_parts = []
|
||||||
|
for header in signed_headers_list:
|
||||||
|
header_val = req.headers.get(header)
|
||||||
|
if header_val is None:
|
||||||
|
header_val = ""
|
||||||
|
if header.lower() == 'expect' and header_val == "":
|
||||||
|
header_val = "100-continue"
|
||||||
|
header_val = " ".join(header_val.split())
|
||||||
|
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
|
||||||
|
canonical_headers = "".join(canonical_headers_parts)
|
||||||
|
|
||||||
|
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
|
||||||
|
|
||||||
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
||||||
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
||||||
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
|
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
|
||||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
if not hmac.compare_digest(calculated_signature, signature):
|
||||||
if not hmac.compare_digest(calculated_signature, signature):
|
raise IamError("SignatureDoesNotMatch")
|
||||||
if current_app.config.get("DEBUG_SIGV4"):
|
|
||||||
logger.warning("SigV4 signature mismatch for %s %s", method, req.path)
|
|
||||||
raise IamError("SignatureDoesNotMatch")
|
|
||||||
|
|
||||||
session_token = req.headers.get("X-Amz-Security-Token")
|
session_token = req.headers.get("X-Amz-Security-Token")
|
||||||
if session_token:
|
if session_token:
|
||||||
@@ -368,12 +371,19 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
|
|||||||
raise IamError("Invalid Date format")
|
raise IamError("Invalid Date format")
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
|
tolerance = timedelta(seconds=current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
|
||||||
|
if req_time > now + tolerance:
|
||||||
|
raise IamError("Request date is too far in the future")
|
||||||
try:
|
try:
|
||||||
expires_seconds = int(expires)
|
expires_seconds = int(expires)
|
||||||
if expires_seconds <= 0:
|
if expires_seconds <= 0:
|
||||||
raise IamError("Invalid Expires value: must be positive")
|
raise IamError("Invalid Expires value: must be positive")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise IamError("Invalid Expires value: must be an integer")
|
raise IamError("Invalid Expires value: must be an integer")
|
||||||
|
min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)
|
||||||
|
max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)
|
||||||
|
if expires_seconds < min_expiry or expires_seconds > max_expiry:
|
||||||
|
raise IamError(f"Expiration must be between {min_expiry} second(s) and {max_expiry} seconds")
|
||||||
if now > req_time + timedelta(seconds=expires_seconds):
|
if now > req_time + timedelta(seconds=expires_seconds):
|
||||||
raise IamError("Request expired")
|
raise IamError("Request expired")
|
||||||
|
|
||||||
@@ -381,53 +391,63 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
|
|||||||
if not secret_key:
|
if not secret_key:
|
||||||
raise IamError("Invalid access key")
|
raise IamError("Invalid access key")
|
||||||
|
|
||||||
method = req.method
|
|
||||||
canonical_uri = _get_canonical_uri(req)
|
canonical_uri = _get_canonical_uri(req)
|
||||||
|
|
||||||
query_args = []
|
|
||||||
for key, value in req.args.items(multi=True):
|
|
||||||
if key != "X-Amz-Signature":
|
|
||||||
query_args.append((key, value))
|
|
||||||
query_args.sort(key=lambda x: (x[0], x[1]))
|
|
||||||
|
|
||||||
canonical_query_parts = []
|
|
||||||
for k, v in query_args:
|
|
||||||
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
|
||||||
canonical_query_string = "&".join(canonical_query_parts)
|
|
||||||
|
|
||||||
signed_headers_list = signed_headers_str.split(";")
|
|
||||||
canonical_headers_parts = []
|
|
||||||
for header in signed_headers_list:
|
|
||||||
val = req.headers.get(header, "").strip()
|
|
||||||
if header.lower() == 'expect' and val == "":
|
|
||||||
val = "100-continue"
|
|
||||||
val = " ".join(val.split())
|
|
||||||
canonical_headers_parts.append(f"{header.lower()}:{val}\n")
|
|
||||||
canonical_headers = "".join(canonical_headers_parts)
|
|
||||||
|
|
||||||
payload_hash = "UNSIGNED-PAYLOAD"
|
|
||||||
|
|
||||||
canonical_request = "\n".join([
|
|
||||||
method,
|
|
||||||
canonical_uri,
|
|
||||||
canonical_query_string,
|
|
||||||
canonical_headers,
|
|
||||||
signed_headers_str,
|
|
||||||
payload_hash
|
|
||||||
])
|
|
||||||
|
|
||||||
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
||||||
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
|
||||||
if _HAS_RUST:
|
if _HAS_RUST:
|
||||||
string_to_sign = _rc.build_string_to_sign(amz_date, credential_scope, canonical_request)
|
query_params = [(k, v) for k, v in req.args.items(multi=True) if k != "X-Amz-Signature"]
|
||||||
calculated_signature = _rc.compute_signature(signing_key, string_to_sign)
|
header_values = []
|
||||||
|
for h in signed_headers_str.split(";"):
|
||||||
|
val = req.headers.get(h) or ""
|
||||||
|
if h.lower() == "expect" and val == "":
|
||||||
|
val = "100-continue"
|
||||||
|
header_values.append((h, val))
|
||||||
|
if not _rc.verify_sigv4_signature(
|
||||||
|
req.method, canonical_uri, query_params, signed_headers_str,
|
||||||
|
header_values, "UNSIGNED-PAYLOAD", amz_date, date_stamp, region,
|
||||||
|
service, secret_key, signature,
|
||||||
|
):
|
||||||
|
raise IamError("SignatureDoesNotMatch")
|
||||||
else:
|
else:
|
||||||
|
method = req.method
|
||||||
|
query_args = []
|
||||||
|
for key, value in req.args.items(multi=True):
|
||||||
|
if key != "X-Amz-Signature":
|
||||||
|
query_args.append((key, value))
|
||||||
|
query_args.sort(key=lambda x: (x[0], x[1]))
|
||||||
|
|
||||||
|
canonical_query_parts = []
|
||||||
|
for k, v in query_args:
|
||||||
|
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
||||||
|
canonical_query_string = "&".join(canonical_query_parts)
|
||||||
|
|
||||||
|
signed_headers_list = signed_headers_str.split(";")
|
||||||
|
canonical_headers_parts = []
|
||||||
|
for header in signed_headers_list:
|
||||||
|
val = req.headers.get(header, "").strip()
|
||||||
|
if header.lower() == 'expect' and val == "":
|
||||||
|
val = "100-continue"
|
||||||
|
val = " ".join(val.split())
|
||||||
|
canonical_headers_parts.append(f"{header.lower()}:{val}\n")
|
||||||
|
canonical_headers = "".join(canonical_headers_parts)
|
||||||
|
|
||||||
|
payload_hash = "UNSIGNED-PAYLOAD"
|
||||||
|
|
||||||
|
canonical_request = "\n".join([
|
||||||
|
method,
|
||||||
|
canonical_uri,
|
||||||
|
canonical_query_string,
|
||||||
|
canonical_headers,
|
||||||
|
signed_headers_str,
|
||||||
|
payload_hash
|
||||||
|
])
|
||||||
|
|
||||||
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
||||||
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
||||||
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
||||||
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}"
|
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}"
|
||||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
if not hmac.compare_digest(calculated_signature, signature):
|
||||||
if not hmac.compare_digest(calculated_signature, signature):
|
raise IamError("SignatureDoesNotMatch")
|
||||||
raise IamError("SignatureDoesNotMatch")
|
|
||||||
|
|
||||||
session_token = req.args.get("X-Amz-Security-Token")
|
session_token = req.args.get("X-Amz-Security-Token")
|
||||||
if session_token:
|
if session_token:
|
||||||
@@ -478,7 +498,7 @@ def _authorize_action(principal: Principal | None, bucket_name: str | None, acti
|
|||||||
iam_error: IamError | None = None
|
iam_error: IamError | None = None
|
||||||
if principal is not None:
|
if principal is not None:
|
||||||
try:
|
try:
|
||||||
_iam().authorize(principal, bucket_name, action)
|
_iam().authorize(principal, bucket_name, action, object_key=object_key)
|
||||||
iam_allowed = True
|
iam_allowed = True
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
iam_error = exc
|
iam_error = exc
|
||||||
@@ -586,7 +606,11 @@ def _validate_presigned_request(action: str, bucket_name: str, object_key: str)
|
|||||||
request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
|
request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
raise IamError("Invalid X-Amz-Date") from exc
|
raise IamError("Invalid X-Amz-Date") from exc
|
||||||
if datetime.now(timezone.utc) > request_time + timedelta(seconds=expiry):
|
now = datetime.now(timezone.utc)
|
||||||
|
tolerance = timedelta(seconds=current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
|
||||||
|
if request_time > now + tolerance:
|
||||||
|
raise IamError("Request date is too far in the future")
|
||||||
|
if now > request_time + timedelta(seconds=expiry):
|
||||||
raise IamError("Presigned URL expired")
|
raise IamError("Presigned URL expired")
|
||||||
|
|
||||||
signed_headers_list = [header.strip().lower() for header in signed_headers.split(";") if header]
|
signed_headers_list = [header.strip().lower() for header in signed_headers.split(";") if header]
|
||||||
@@ -662,7 +686,7 @@ def _extract_request_metadata() -> Dict[str, str]:
|
|||||||
for header, value in request.headers.items():
|
for header, value in request.headers.items():
|
||||||
if header.lower().startswith("x-amz-meta-"):
|
if header.lower().startswith("x-amz-meta-"):
|
||||||
key = header[11:]
|
key = header[11:]
|
||||||
if key:
|
if key and not (key.startswith("__") and key.endswith("__")):
|
||||||
metadata[key] = value
|
metadata[key] = value
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
@@ -986,7 +1010,7 @@ def _render_encryption_document(config: dict[str, Any]) -> Element:
|
|||||||
return root
|
return root
|
||||||
|
|
||||||
|
|
||||||
def _stream_file(path, chunk_size: int = 64 * 1024):
|
def _stream_file(path, chunk_size: int = 256 * 1024):
|
||||||
with path.open("rb") as handle:
|
with path.open("rb") as handle:
|
||||||
while True:
|
while True:
|
||||||
chunk = handle.read(chunk_size)
|
chunk = handle.read(chunk_size)
|
||||||
@@ -1005,20 +1029,78 @@ def _method_not_allowed(allowed: list[str]) -> Response:
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None:
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
|
||||||
|
if_match = request.headers.get("If-Match")
|
||||||
|
if if_match:
|
||||||
|
if if_match.strip() != "*":
|
||||||
|
match_etags = [e.strip().strip('"') for e in if_match.split(",")]
|
||||||
|
if etag not in match_etags:
|
||||||
|
return Response(status=412)
|
||||||
|
|
||||||
|
if_unmodified = request.headers.get("If-Unmodified-Since")
|
||||||
|
if not if_match and if_unmodified and last_modified is not None:
|
||||||
|
try:
|
||||||
|
dt = parsedate_to_datetime(if_unmodified)
|
||||||
|
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
|
||||||
|
if obj_dt > dt:
|
||||||
|
return Response(status=412)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if_none_match = request.headers.get("If-None-Match")
|
||||||
|
if if_none_match:
|
||||||
|
if if_none_match.strip() == "*":
|
||||||
|
resp = Response(status=304)
|
||||||
|
resp.headers["ETag"] = f'"{etag}"'
|
||||||
|
if last_modified is not None:
|
||||||
|
resp.headers["Last-Modified"] = http_date(last_modified)
|
||||||
|
return resp
|
||||||
|
none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")]
|
||||||
|
if etag in none_match_etags:
|
||||||
|
resp = Response(status=304)
|
||||||
|
resp.headers["ETag"] = f'"{etag}"'
|
||||||
|
if last_modified is not None:
|
||||||
|
resp.headers["Last-Modified"] = http_date(last_modified)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
if_modified = request.headers.get("If-Modified-Since")
|
||||||
|
if not if_none_match and if_modified and last_modified is not None:
|
||||||
|
try:
|
||||||
|
dt = parsedate_to_datetime(if_modified)
|
||||||
|
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
|
||||||
|
if obj_dt <= dt:
|
||||||
|
resp = Response(status=304)
|
||||||
|
resp.headers["ETag"] = f'"{etag}"'
|
||||||
|
resp.headers["Last-Modified"] = http_date(last_modified)
|
||||||
|
return resp
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _apply_object_headers(
|
def _apply_object_headers(
|
||||||
response: Response,
|
response: Response,
|
||||||
*,
|
*,
|
||||||
file_stat,
|
file_stat,
|
||||||
metadata: Dict[str, str] | None,
|
metadata: Dict[str, str] | None,
|
||||||
etag: str,
|
etag: str,
|
||||||
|
size_override: int | None = None,
|
||||||
|
mtime_override: float | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if file_stat is not None:
|
effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None)
|
||||||
if response.status_code != 206:
|
effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None)
|
||||||
response.headers["Content-Length"] = str(file_stat.st_size)
|
if effective_size is not None and response.status_code != 206:
|
||||||
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
|
response.headers["Content-Length"] = str(effective_size)
|
||||||
|
if effective_mtime is not None:
|
||||||
|
response.headers["Last-Modified"] = http_date(effective_mtime)
|
||||||
response.headers["ETag"] = f'"{etag}"'
|
response.headers["ETag"] = f'"{etag}"'
|
||||||
response.headers["Accept-Ranges"] = "bytes"
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
for key, value in (metadata or {}).items():
|
for key, value in (metadata or {}).items():
|
||||||
|
if key.startswith("__") and key.endswith("__"):
|
||||||
|
continue
|
||||||
safe_value = _sanitize_header_value(str(value))
|
safe_value = _sanitize_header_value(str(value))
|
||||||
response.headers[f"X-Amz-Meta-{key}"] = safe_value
|
response.headers[f"X-Amz-Meta-{key}"] = safe_value
|
||||||
|
|
||||||
@@ -1039,6 +1121,7 @@ def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
|||||||
"logging": _bucket_logging_handler,
|
"logging": _bucket_logging_handler,
|
||||||
"uploads": _bucket_uploads_handler,
|
"uploads": _bucket_uploads_handler,
|
||||||
"policy": _bucket_policy_handler,
|
"policy": _bucket_policy_handler,
|
||||||
|
"policyStatus": _bucket_policy_status_handler,
|
||||||
"replication": _bucket_replication_handler,
|
"replication": _bucket_replication_handler,
|
||||||
"website": _bucket_website_handler,
|
"website": _bucket_website_handler,
|
||||||
}
|
}
|
||||||
@@ -1062,7 +1145,7 @@ def _bucket_versioning_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "versioning")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -1109,7 +1192,7 @@ def _bucket_tagging_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "tagging")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -1274,7 +1357,7 @@ def _bucket_cors_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "cors")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -1321,13 +1404,13 @@ def _bucket_cors_handler(bucket_name: str) -> Response:
|
|||||||
|
|
||||||
|
|
||||||
def _bucket_encryption_handler(bucket_name: str) -> Response:
|
def _bucket_encryption_handler(bucket_name: str) -> Response:
|
||||||
if request.method not in {"GET", "PUT"}:
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||||
return _method_not_allowed(["GET", "PUT"])
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||||
principal, error = _require_principal()
|
principal, error = _require_principal()
|
||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "encryption")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -1343,6 +1426,13 @@ def _bucket_encryption_handler(bucket_name: str) -> Response:
|
|||||||
404,
|
404,
|
||||||
)
|
)
|
||||||
return _xml_response(_render_encryption_document(config))
|
return _xml_response(_render_encryption_document(config))
|
||||||
|
if request.method == "DELETE":
|
||||||
|
try:
|
||||||
|
storage.set_bucket_encryption(bucket_name, None)
|
||||||
|
except StorageError as exc:
|
||||||
|
return _error_response("NoSuchBucket", str(exc), 404)
|
||||||
|
current_app.logger.info("Bucket encryption deleted", extra={"bucket": bucket_name})
|
||||||
|
return Response(status=204)
|
||||||
ct_error = _require_xml_content_type()
|
ct_error = _require_xml_content_type()
|
||||||
if ct_error:
|
if ct_error:
|
||||||
return ct_error
|
return ct_error
|
||||||
@@ -1395,7 +1485,7 @@ def _bucket_acl_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "share")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -1439,6 +1529,99 @@ def _bucket_acl_handler(bucket_name: str) -> Response:
|
|||||||
return _xml_response(root)
|
return _xml_response(root)
|
||||||
|
|
||||||
|
|
||||||
|
def _object_acl_handler(bucket_name: str, object_key: str) -> Response:
|
||||||
|
from .acl import create_canned_acl, GRANTEE_ALL_USERS, GRANTEE_AUTHENTICATED_USERS
|
||||||
|
|
||||||
|
if request.method not in {"GET", "PUT"}:
|
||||||
|
return _method_not_allowed(["GET", "PUT"])
|
||||||
|
storage = _storage()
|
||||||
|
try:
|
||||||
|
path = storage.get_object_path(bucket_name, object_key)
|
||||||
|
except (StorageError, FileNotFoundError):
|
||||||
|
return _error_response("NoSuchKey", "Object not found", 404)
|
||||||
|
|
||||||
|
if request.method == "PUT":
|
||||||
|
principal, error = _object_principal("write", bucket_name, object_key)
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
owner_id = principal.access_key if principal else "anonymous"
|
||||||
|
canned_acl = request.headers.get("x-amz-acl", "private")
|
||||||
|
acl = create_canned_acl(canned_acl, owner_id)
|
||||||
|
acl_service = _acl()
|
||||||
|
metadata = storage.get_object_metadata(bucket_name, object_key)
|
||||||
|
metadata.update(acl_service.create_object_acl_metadata(acl))
|
||||||
|
safe_key = storage._sanitize_object_key(object_key, storage._object_key_max_length_bytes)
|
||||||
|
storage._write_metadata(bucket_name, safe_key, metadata)
|
||||||
|
current_app.logger.info("Object ACL set", extra={"bucket": bucket_name, "key": object_key, "acl": canned_acl})
|
||||||
|
return Response(status=200)
|
||||||
|
|
||||||
|
principal, error = _object_principal("read", bucket_name, object_key)
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
owner_id = principal.access_key if principal else "anonymous"
|
||||||
|
acl_service = _acl()
|
||||||
|
metadata = storage.get_object_metadata(bucket_name, object_key)
|
||||||
|
acl = acl_service.get_object_acl(bucket_name, object_key, metadata)
|
||||||
|
if not acl:
|
||||||
|
acl = create_canned_acl("private", owner_id)
|
||||||
|
|
||||||
|
root = Element("AccessControlPolicy")
|
||||||
|
owner_el = SubElement(root, "Owner")
|
||||||
|
SubElement(owner_el, "ID").text = acl.owner
|
||||||
|
SubElement(owner_el, "DisplayName").text = acl.owner
|
||||||
|
acl_el = SubElement(root, "AccessControlList")
|
||||||
|
for grant in acl.grants:
|
||||||
|
grant_el = SubElement(acl_el, "Grant")
|
||||||
|
grantee = SubElement(grant_el, "Grantee")
|
||||||
|
if grant.grantee == GRANTEE_ALL_USERS:
|
||||||
|
grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
|
||||||
|
SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AllUsers"
|
||||||
|
elif grant.grantee == GRANTEE_AUTHENTICATED_USERS:
|
||||||
|
grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
|
||||||
|
SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AuthenticatedUsers"
|
||||||
|
else:
|
||||||
|
grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser")
|
||||||
|
SubElement(grantee, "ID").text = grant.grantee
|
||||||
|
SubElement(grantee, "DisplayName").text = grant.grantee
|
||||||
|
SubElement(grant_el, "Permission").text = grant.permission
|
||||||
|
return _xml_response(root)
|
||||||
|
|
||||||
|
|
||||||
|
def _object_attributes_handler(bucket_name: str, object_key: str) -> Response:
|
||||||
|
if request.method != "GET":
|
||||||
|
return _method_not_allowed(["GET"])
|
||||||
|
principal, error = _object_principal("read", bucket_name, object_key)
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
storage = _storage()
|
||||||
|
try:
|
||||||
|
path = storage.get_object_path(bucket_name, object_key)
|
||||||
|
file_stat = path.stat()
|
||||||
|
metadata = storage.get_object_metadata(bucket_name, object_key)
|
||||||
|
except (StorageError, FileNotFoundError):
|
||||||
|
return _error_response("NoSuchKey", "Object not found", 404)
|
||||||
|
|
||||||
|
requested = request.headers.get("x-amz-object-attributes", "")
|
||||||
|
attrs = {a.strip() for a in requested.split(",") if a.strip()}
|
||||||
|
|
||||||
|
root = Element("GetObjectAttributesResponse")
|
||||||
|
if "ETag" in attrs:
|
||||||
|
etag = metadata.get("__etag__") or storage._compute_etag(path)
|
||||||
|
SubElement(root, "ETag").text = etag
|
||||||
|
if "StorageClass" in attrs:
|
||||||
|
SubElement(root, "StorageClass").text = "STANDARD"
|
||||||
|
if "ObjectSize" in attrs:
|
||||||
|
SubElement(root, "ObjectSize").text = str(file_stat.st_size)
|
||||||
|
if "Checksum" in attrs:
|
||||||
|
SubElement(root, "Checksum")
|
||||||
|
if "ObjectParts" in attrs:
|
||||||
|
SubElement(root, "ObjectParts")
|
||||||
|
|
||||||
|
response = _xml_response(root)
|
||||||
|
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
def _bucket_list_versions_handler(bucket_name: str) -> Response:
|
def _bucket_list_versions_handler(bucket_name: str) -> Response:
|
||||||
"""Handle ListObjectVersions (GET /<bucket>?versions)."""
|
"""Handle ListObjectVersions (GET /<bucket>?versions)."""
|
||||||
if request.method != "GET":
|
if request.method != "GET":
|
||||||
@@ -1550,7 +1733,7 @@ def _bucket_lifecycle_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "lifecycle")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -1714,7 +1897,7 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "quota")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -1791,7 +1974,7 @@ def _bucket_object_lock_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "object_lock")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -1837,7 +2020,7 @@ def _bucket_notification_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "notification")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -1933,7 +2116,7 @@ def _bucket_logging_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "logging")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -2075,7 +2258,7 @@ def _object_retention_handler(bucket_name: str, object_key: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "write" if request.method == "PUT" else "read", object_key=object_key)
|
_authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -2151,7 +2334,7 @@ def _object_legal_hold_handler(bucket_name: str, object_key: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "write" if request.method == "PUT" else "read", object_key=object_key)
|
_authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
|
||||||
@@ -2346,7 +2529,7 @@ def _post_object(bucket_name: str) -> Response:
|
|||||||
for field_name, value in request.form.items():
|
for field_name, value in request.form.items():
|
||||||
if field_name.lower().startswith("x-amz-meta-"):
|
if field_name.lower().startswith("x-amz-meta-"):
|
||||||
key = field_name[11:]
|
key = field_name[11:]
|
||||||
if key:
|
if key and not (key.startswith("__") and key.endswith("__")):
|
||||||
metadata[key] = value
|
metadata[key] = value
|
||||||
try:
|
try:
|
||||||
meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None)
|
meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None)
|
||||||
@@ -2360,6 +2543,10 @@ def _post_object(bucket_name: str) -> Response:
|
|||||||
if success_action_redirect:
|
if success_action_redirect:
|
||||||
allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", [])
|
allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", [])
|
||||||
if not allowed_hosts:
|
if not allowed_hosts:
|
||||||
|
current_app.logger.warning(
|
||||||
|
"ALLOWED_REDIRECT_HOSTS not configured, falling back to request Host header. "
|
||||||
|
"Set ALLOWED_REDIRECT_HOSTS for production deployments."
|
||||||
|
)
|
||||||
allowed_hosts = [request.host]
|
allowed_hosts = [request.host]
|
||||||
parsed = urlparse(success_action_redirect)
|
parsed = urlparse(success_action_redirect)
|
||||||
if parsed.scheme not in ("http", "https"):
|
if parsed.scheme not in ("http", "https"):
|
||||||
@@ -2480,7 +2667,7 @@ def bucket_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "write")
|
_authorize_action(principal, bucket_name, "create_bucket")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
try:
|
try:
|
||||||
@@ -2497,7 +2684,7 @@ def bucket_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "delete")
|
_authorize_action(principal, bucket_name, "delete_bucket")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
try:
|
try:
|
||||||
@@ -2546,55 +2733,44 @@ def bucket_handler(bucket_name: str) -> Response:
|
|||||||
else:
|
else:
|
||||||
effective_start = marker
|
effective_start = marker
|
||||||
|
|
||||||
fetch_keys = max_keys * 10 if delimiter else max_keys
|
|
||||||
try:
|
try:
|
||||||
list_result = storage.list_objects(
|
if delimiter:
|
||||||
bucket_name,
|
shallow_result = storage.list_objects_shallow(
|
||||||
max_keys=fetch_keys,
|
bucket_name,
|
||||||
continuation_token=effective_start or None,
|
prefix=prefix,
|
||||||
prefix=prefix or None,
|
delimiter=delimiter,
|
||||||
)
|
max_keys=max_keys,
|
||||||
objects = list_result.objects
|
continuation_token=effective_start or None,
|
||||||
|
)
|
||||||
|
objects = shallow_result.objects
|
||||||
|
common_prefixes = shallow_result.common_prefixes
|
||||||
|
is_truncated = shallow_result.is_truncated
|
||||||
|
|
||||||
|
next_marker = shallow_result.next_continuation_token or ""
|
||||||
|
next_continuation_token = ""
|
||||||
|
if is_truncated and next_marker and list_type == "2":
|
||||||
|
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||||
|
else:
|
||||||
|
list_result = storage.list_objects(
|
||||||
|
bucket_name,
|
||||||
|
max_keys=max_keys,
|
||||||
|
continuation_token=effective_start or None,
|
||||||
|
prefix=prefix or None,
|
||||||
|
)
|
||||||
|
objects = list_result.objects
|
||||||
|
common_prefixes = []
|
||||||
|
is_truncated = list_result.is_truncated
|
||||||
|
|
||||||
|
next_marker = ""
|
||||||
|
next_continuation_token = ""
|
||||||
|
if is_truncated:
|
||||||
|
if objects:
|
||||||
|
next_marker = objects[-1].key
|
||||||
|
if list_type == "2" and next_marker:
|
||||||
|
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||||
except StorageError as exc:
|
except StorageError as exc:
|
||||||
return _error_response("NoSuchBucket", str(exc), 404)
|
return _error_response("NoSuchBucket", str(exc), 404)
|
||||||
|
|
||||||
common_prefixes: list[str] = []
|
|
||||||
filtered_objects: list = []
|
|
||||||
if delimiter:
|
|
||||||
seen_prefixes: set[str] = set()
|
|
||||||
for obj in objects:
|
|
||||||
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
|
|
||||||
if delimiter in key_after_prefix:
|
|
||||||
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
|
|
||||||
if common_prefix not in seen_prefixes:
|
|
||||||
seen_prefixes.add(common_prefix)
|
|
||||||
common_prefixes.append(common_prefix)
|
|
||||||
else:
|
|
||||||
filtered_objects.append(obj)
|
|
||||||
objects = filtered_objects
|
|
||||||
common_prefixes = sorted(common_prefixes)
|
|
||||||
|
|
||||||
total_items = len(objects) + len(common_prefixes)
|
|
||||||
is_truncated = total_items > max_keys or list_result.is_truncated
|
|
||||||
|
|
||||||
if len(objects) >= max_keys:
|
|
||||||
objects = objects[:max_keys]
|
|
||||||
common_prefixes = []
|
|
||||||
else:
|
|
||||||
remaining = max_keys - len(objects)
|
|
||||||
common_prefixes = common_prefixes[:remaining]
|
|
||||||
|
|
||||||
next_marker = ""
|
|
||||||
next_continuation_token = ""
|
|
||||||
if is_truncated:
|
|
||||||
if objects:
|
|
||||||
next_marker = objects[-1].key
|
|
||||||
elif common_prefixes:
|
|
||||||
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
|
||||||
|
|
||||||
if list_type == "2" and next_marker:
|
|
||||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
|
||||||
|
|
||||||
if list_type == "2":
|
if list_type == "2":
|
||||||
root = Element("ListBucketResult")
|
root = Element("ListBucketResult")
|
||||||
SubElement(root, "Name").text = bucket_name
|
SubElement(root, "Name").text = bucket_name
|
||||||
@@ -2669,6 +2845,12 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
if "legal-hold" in request.args:
|
if "legal-hold" in request.args:
|
||||||
return _object_legal_hold_handler(bucket_name, object_key)
|
return _object_legal_hold_handler(bucket_name, object_key)
|
||||||
|
|
||||||
|
if "acl" in request.args:
|
||||||
|
return _object_acl_handler(bucket_name, object_key)
|
||||||
|
|
||||||
|
if "attributes" in request.args:
|
||||||
|
return _object_attributes_handler(bucket_name, object_key)
|
||||||
|
|
||||||
if request.method == "POST":
|
if request.method == "POST":
|
||||||
if "uploads" in request.args:
|
if "uploads" in request.args:
|
||||||
return _initiate_multipart_upload(bucket_name, object_key)
|
return _initiate_multipart_upload(bucket_name, object_key)
|
||||||
@@ -2708,6 +2890,8 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
if validation_error:
|
if validation_error:
|
||||||
return _error_response("InvalidArgument", validation_error, 400)
|
return _error_response("InvalidArgument", validation_error, 400)
|
||||||
|
|
||||||
|
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
meta = storage.put_object(
|
meta = storage.put_object(
|
||||||
bucket_name,
|
bucket_name,
|
||||||
@@ -2722,10 +2906,23 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
if "Bucket" in message:
|
if "Bucket" in message:
|
||||||
return _error_response("NoSuchBucket", message, 404)
|
return _error_response("NoSuchBucket", message, 404)
|
||||||
return _error_response("InvalidArgument", message, 400)
|
return _error_response("InvalidArgument", message, 400)
|
||||||
current_app.logger.info(
|
|
||||||
"Object uploaded",
|
content_md5 = request.headers.get("Content-MD5")
|
||||||
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
|
if content_md5 and meta.etag:
|
||||||
)
|
try:
|
||||||
|
expected_md5 = base64.b64decode(content_md5).hex()
|
||||||
|
except Exception:
|
||||||
|
storage.delete_object(bucket_name, object_key)
|
||||||
|
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
|
||||||
|
if expected_md5 != meta.etag:
|
||||||
|
storage.delete_object(bucket_name, object_key)
|
||||||
|
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
|
||||||
|
|
||||||
|
if current_app.logger.isEnabledFor(logging.INFO):
|
||||||
|
current_app.logger.info(
|
||||||
|
"Object uploaded",
|
||||||
|
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
|
||||||
|
)
|
||||||
response = Response(status=200)
|
response = Response(status=200)
|
||||||
if meta.etag:
|
if meta.etag:
|
||||||
response.headers["ETag"] = f'"{meta.etag}"'
|
response.headers["ETag"] = f'"{meta.etag}"'
|
||||||
@@ -2759,10 +2956,27 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
except StorageError as exc:
|
except StorageError as exc:
|
||||||
return _error_response("NoSuchKey", str(exc), 404)
|
return _error_response("NoSuchKey", str(exc), 404)
|
||||||
metadata = storage.get_object_metadata(bucket_name, object_key)
|
metadata = storage.get_object_metadata(bucket_name, object_key)
|
||||||
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||||
|
|
||||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||||
|
|
||||||
|
cond_etag = metadata.get("__etag__")
|
||||||
|
if not cond_etag and not is_encrypted:
|
||||||
|
try:
|
||||||
|
cond_etag = storage._compute_etag(path)
|
||||||
|
except OSError:
|
||||||
|
cond_etag = None
|
||||||
|
if cond_etag:
|
||||||
|
cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
|
||||||
|
if cond_mtime is None:
|
||||||
|
try:
|
||||||
|
cond_mtime = path.stat().st_mtime
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
cond_resp = _check_conditional_headers(cond_etag, cond_mtime)
|
||||||
|
if cond_resp:
|
||||||
|
return cond_resp
|
||||||
|
|
||||||
if request.method == "GET":
|
if request.method == "GET":
|
||||||
range_header = request.headers.get("Range")
|
range_header = request.headers.get("Range")
|
||||||
|
|
||||||
@@ -2816,7 +3030,7 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
f.seek(start_pos)
|
f.seek(start_pos)
|
||||||
remaining = length_to_read
|
remaining = length_to_read
|
||||||
while remaining > 0:
|
while remaining > 0:
|
||||||
chunk_size = min(65536, remaining)
|
chunk_size = min(262144, remaining)
|
||||||
chunk = f.read(chunk_size)
|
chunk = f.read(chunk_size)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
@@ -2851,10 +3065,7 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
response.headers["Content-Type"] = mimetype
|
response.headers["Content-Type"] = mimetype
|
||||||
logged_bytes = 0
|
logged_bytes = 0
|
||||||
|
|
||||||
try:
|
file_stat = stat if not is_encrypted else None
|
||||||
file_stat = path.stat() if not is_encrypted else None
|
|
||||||
except (PermissionError, OSError):
|
|
||||||
file_stat = None
|
|
||||||
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
|
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
|
||||||
|
|
||||||
if request.method == "GET":
|
if request.method == "GET":
|
||||||
@@ -2871,8 +3082,9 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
if value:
|
if value:
|
||||||
response.headers[header] = _sanitize_header_value(value)
|
response.headers[header] = _sanitize_header_value(value)
|
||||||
|
|
||||||
action = "Object read" if request.method == "GET" else "Object head"
|
if current_app.logger.isEnabledFor(logging.INFO):
|
||||||
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
|
action = "Object read" if request.method == "GET" else "Object head"
|
||||||
|
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
|
||||||
return response
|
return response
|
||||||
|
|
||||||
if "uploadId" in request.args:
|
if "uploadId" in request.args:
|
||||||
@@ -2890,7 +3102,8 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
|
|
||||||
storage.delete_object(bucket_name, object_key)
|
storage.delete_object(bucket_name, object_key)
|
||||||
lock_service.delete_object_lock_metadata(bucket_name, object_key)
|
lock_service.delete_object_lock_metadata(bucket_name, object_key)
|
||||||
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
|
if current_app.logger.isEnabledFor(logging.INFO):
|
||||||
|
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
|
||||||
|
|
||||||
principal, _ = _require_principal()
|
principal, _ = _require_principal()
|
||||||
_notifications().emit_object_removed(
|
_notifications().emit_object_removed(
|
||||||
@@ -2993,6 +3206,32 @@ def _bucket_policy_handler(bucket_name: str) -> Response:
|
|||||||
return Response(status=204)
|
return Response(status=204)
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_policy_status_handler(bucket_name: str) -> Response:
|
||||||
|
if request.method != "GET":
|
||||||
|
return _method_not_allowed(["GET"])
|
||||||
|
principal, error = _require_principal()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
_authorize_action(principal, bucket_name, "policy")
|
||||||
|
except IamError as exc:
|
||||||
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
storage = _storage()
|
||||||
|
if not storage.bucket_exists(bucket_name):
|
||||||
|
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
||||||
|
store = _bucket_policies()
|
||||||
|
policy = store.get_policy(bucket_name)
|
||||||
|
is_public = False
|
||||||
|
if policy:
|
||||||
|
for statement in policy.get("Statement", []):
|
||||||
|
if statement.get("Effect") == "Allow" and statement.get("Principal") == "*":
|
||||||
|
is_public = True
|
||||||
|
break
|
||||||
|
root = Element("PolicyStatus")
|
||||||
|
SubElement(root, "IsPublic").text = "TRUE" if is_public else "FALSE"
|
||||||
|
return _xml_response(root)
|
||||||
|
|
||||||
|
|
||||||
def _bucket_replication_handler(bucket_name: str) -> Response:
|
def _bucket_replication_handler(bucket_name: str) -> Response:
|
||||||
if request.method not in {"GET", "PUT", "DELETE"}:
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||||
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||||
@@ -3000,7 +3239,7 @@ def _bucket_replication_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "replication")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -3083,7 +3322,7 @@ def _bucket_website_handler(bucket_name: str) -> Response:
|
|||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
try:
|
try:
|
||||||
_authorize_action(principal, bucket_name, "policy")
|
_authorize_action(principal, bucket_name, "website")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
return _error_response("AccessDenied", str(exc), 403)
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
@@ -3205,12 +3444,30 @@ def head_object(bucket_name: str, object_key: str) -> Response:
|
|||||||
_authorize_action(principal, bucket_name, "read", object_key=object_key)
|
_authorize_action(principal, bucket_name, "read", object_key=object_key)
|
||||||
path = _storage().get_object_path(bucket_name, object_key)
|
path = _storage().get_object_path(bucket_name, object_key)
|
||||||
metadata = _storage().get_object_metadata(bucket_name, object_key)
|
metadata = _storage().get_object_metadata(bucket_name, object_key)
|
||||||
stat = path.stat()
|
etag = metadata.get("__etag__") or _storage()._compute_etag(path)
|
||||||
etag = _storage()._compute_etag(path)
|
|
||||||
|
|
||||||
response = Response(status=200)
|
head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
|
||||||
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
|
if head_mtime is None:
|
||||||
response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
try:
|
||||||
|
head_mtime = path.stat().st_mtime
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
cond_resp = _check_conditional_headers(etag, head_mtime)
|
||||||
|
if cond_resp:
|
||||||
|
return cond_resp
|
||||||
|
|
||||||
|
cached_size = metadata.get("__size__")
|
||||||
|
cached_mtime = metadata.get("__last_modified__")
|
||||||
|
if cached_size is not None and cached_mtime is not None:
|
||||||
|
size_val = int(cached_size)
|
||||||
|
mtime_val = float(cached_mtime)
|
||||||
|
response = Response(status=200)
|
||||||
|
_apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val)
|
||||||
|
else:
|
||||||
|
stat = path.stat()
|
||||||
|
response = Response(status=200)
|
||||||
|
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
|
||||||
|
response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||||
return response
|
return response
|
||||||
except (StorageError, FileNotFoundError):
|
except (StorageError, FileNotFoundError):
|
||||||
return _error_response("NoSuchKey", "Object not found", 404)
|
return _error_response("NoSuchKey", "Object not found", 404)
|
||||||
@@ -3299,7 +3556,7 @@ def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
|
|||||||
if validation_error:
|
if validation_error:
|
||||||
return _error_response("InvalidArgument", validation_error, 400)
|
return _error_response("InvalidArgument", validation_error, 400)
|
||||||
else:
|
else:
|
||||||
metadata = source_metadata
|
metadata = {k: v for k, v in source_metadata.items() if not (k.startswith("__") and k.endswith("__"))}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with source_path.open("rb") as stream:
|
with source_path.open("rb") as stream:
|
||||||
@@ -3440,6 +3697,8 @@ def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
|||||||
return error
|
return error
|
||||||
|
|
||||||
metadata = _extract_request_metadata()
|
metadata = _extract_request_metadata()
|
||||||
|
content_type = request.headers.get("Content-Type")
|
||||||
|
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||||
try:
|
try:
|
||||||
upload_id = _storage().initiate_multipart_upload(
|
upload_id = _storage().initiate_multipart_upload(
|
||||||
bucket_name,
|
bucket_name,
|
||||||
@@ -3492,6 +3751,15 @@ def _upload_part(bucket_name: str, object_key: str) -> Response:
|
|||||||
return _error_response("NoSuchUpload", str(exc), 404)
|
return _error_response("NoSuchUpload", str(exc), 404)
|
||||||
return _error_response("InvalidArgument", str(exc), 400)
|
return _error_response("InvalidArgument", str(exc), 400)
|
||||||
|
|
||||||
|
content_md5 = request.headers.get("Content-MD5")
|
||||||
|
if content_md5 and etag:
|
||||||
|
try:
|
||||||
|
expected_md5 = base64.b64decode(content_md5).hex()
|
||||||
|
except Exception:
|
||||||
|
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
|
||||||
|
if expected_md5 != etag:
|
||||||
|
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
|
||||||
|
|
||||||
response = Response(status=200)
|
response = Response(status=200)
|
||||||
response.headers["ETag"] = f'"{etag}"'
|
response.headers["ETag"] = f'"{etag}"'
|
||||||
return response
|
return response
|
||||||
|
|||||||
@@ -245,6 +245,7 @@ def stream_objects_ndjson(
|
|||||||
url_templates: dict[str, str],
|
url_templates: dict[str, str],
|
||||||
display_tz: str = "UTC",
|
display_tz: str = "UTC",
|
||||||
versioning_enabled: bool = False,
|
versioning_enabled: bool = False,
|
||||||
|
delimiter: Optional[str] = None,
|
||||||
) -> Generator[str, None, None]:
|
) -> Generator[str, None, None]:
|
||||||
meta_line = json.dumps({
|
meta_line = json.dumps({
|
||||||
"type": "meta",
|
"type": "meta",
|
||||||
@@ -258,11 +259,20 @@ def stream_objects_ndjson(
|
|||||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||||
if prefix:
|
if prefix:
|
||||||
kwargs["Prefix"] = prefix
|
kwargs["Prefix"] = prefix
|
||||||
|
if delimiter:
|
||||||
|
kwargs["Delimiter"] = delimiter
|
||||||
|
|
||||||
|
running_count = 0
|
||||||
try:
|
try:
|
||||||
paginator = client.get_paginator("list_objects_v2")
|
paginator = client.get_paginator("list_objects_v2")
|
||||||
for page in paginator.paginate(**kwargs):
|
for page in paginator.paginate(**kwargs):
|
||||||
for obj in page.get("Contents", []):
|
for cp in page.get("CommonPrefixes", []):
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "folder",
|
||||||
|
"prefix": cp["Prefix"],
|
||||||
|
}) + "\n"
|
||||||
|
page_contents = page.get("Contents", [])
|
||||||
|
for obj in page_contents:
|
||||||
last_mod = obj["LastModified"]
|
last_mod = obj["LastModified"]
|
||||||
yield json.dumps({
|
yield json.dumps({
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -273,6 +283,8 @@ def stream_objects_ndjson(
|
|||||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||||
"etag": obj.get("ETag", "").strip('"'),
|
"etag": obj.get("ETag", "").strip('"'),
|
||||||
}) + "\n"
|
}) + "\n"
|
||||||
|
running_count += len(page_contents)
|
||||||
|
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||||
except ClientError as exc:
|
except ClientError as exc:
|
||||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||||
|
|||||||
1035
app/storage.py
1035
app/storage.py
File diff suppressed because it is too large
Load Diff
438
app/ui.py
438
app/ui.py
@@ -508,11 +508,15 @@ def bucket_detail(bucket_name: str):
|
|||||||
can_manage_quota = is_replication_admin
|
can_manage_quota = is_replication_admin
|
||||||
|
|
||||||
website_config = None
|
website_config = None
|
||||||
|
website_domains = []
|
||||||
if website_hosting_enabled:
|
if website_hosting_enabled:
|
||||||
try:
|
try:
|
||||||
website_config = storage.get_bucket_website(bucket_name)
|
website_config = storage.get_bucket_website(bucket_name)
|
||||||
except StorageError:
|
except StorageError:
|
||||||
website_config = None
|
website_config = None
|
||||||
|
domain_store = current_app.extensions.get("website_domains")
|
||||||
|
if domain_store:
|
||||||
|
website_domains = domain_store.get_domains_for_bucket(bucket_name)
|
||||||
|
|
||||||
objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
|
objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
|
||||||
objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name)
|
objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name)
|
||||||
@@ -558,6 +562,7 @@ def bucket_detail(bucket_name: str):
|
|||||||
site_sync_enabled=site_sync_enabled,
|
site_sync_enabled=site_sync_enabled,
|
||||||
website_hosting_enabled=website_hosting_enabled,
|
website_hosting_enabled=website_hosting_enabled,
|
||||||
website_config=website_config,
|
website_config=website_config,
|
||||||
|
website_domains=website_domains,
|
||||||
can_manage_website=can_edit_policy,
|
can_manage_website=can_edit_policy,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -611,20 +616,79 @@ def stream_bucket_objects(bucket_name: str):
|
|||||||
return jsonify({"error": str(exc)}), 403
|
return jsonify({"error": str(exc)}), 403
|
||||||
|
|
||||||
prefix = request.args.get("prefix") or None
|
prefix = request.args.get("prefix") or None
|
||||||
|
delimiter = request.args.get("delimiter") or None
|
||||||
|
|
||||||
|
storage = _storage()
|
||||||
try:
|
try:
|
||||||
client = get_session_s3_client()
|
versioning_enabled = storage.is_versioning_enabled(bucket_name)
|
||||||
except (PermissionError, RuntimeError) as exc:
|
except StorageError:
|
||||||
return jsonify({"error": str(exc)}), 403
|
versioning_enabled = False
|
||||||
|
|
||||||
versioning_enabled = get_versioning_via_s3(client, bucket_name)
|
|
||||||
url_templates = build_url_templates(bucket_name)
|
url_templates = build_url_templates(bucket_name)
|
||||||
display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
|
display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
|
||||||
|
|
||||||
|
def generate():
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "meta",
|
||||||
|
"versioning_enabled": versioning_enabled,
|
||||||
|
"url_templates": url_templates,
|
||||||
|
}) + "\n"
|
||||||
|
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
||||||
|
|
||||||
|
running_count = 0
|
||||||
|
try:
|
||||||
|
if delimiter:
|
||||||
|
for item_type, item in storage.iter_objects_shallow(
|
||||||
|
bucket_name, prefix=prefix or "", delimiter=delimiter,
|
||||||
|
):
|
||||||
|
if item_type == "folder":
|
||||||
|
yield json.dumps({"type": "folder", "prefix": item}) + "\n"
|
||||||
|
else:
|
||||||
|
last_mod = item.last_modified
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "object",
|
||||||
|
"key": item.key,
|
||||||
|
"size": item.size,
|
||||||
|
"last_modified": last_mod.isoformat(),
|
||||||
|
"last_modified_display": _format_datetime_display(last_mod, display_tz),
|
||||||
|
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
|
||||||
|
"etag": item.etag or "",
|
||||||
|
}) + "\n"
|
||||||
|
running_count += 1
|
||||||
|
if running_count % 1000 == 0:
|
||||||
|
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||||
|
else:
|
||||||
|
continuation_token = None
|
||||||
|
while True:
|
||||||
|
result = storage.list_objects(
|
||||||
|
bucket_name,
|
||||||
|
max_keys=1000,
|
||||||
|
continuation_token=continuation_token,
|
||||||
|
prefix=prefix,
|
||||||
|
)
|
||||||
|
for obj in result.objects:
|
||||||
|
last_mod = obj.last_modified
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "object",
|
||||||
|
"key": obj.key,
|
||||||
|
"size": obj.size,
|
||||||
|
"last_modified": last_mod.isoformat(),
|
||||||
|
"last_modified_display": _format_datetime_display(last_mod, display_tz),
|
||||||
|
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
|
||||||
|
"etag": obj.etag or "",
|
||||||
|
}) + "\n"
|
||||||
|
running_count += len(result.objects)
|
||||||
|
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||||
|
if not result.is_truncated:
|
||||||
|
break
|
||||||
|
continuation_token = result.next_continuation_token
|
||||||
|
except StorageError as exc:
|
||||||
|
yield json.dumps({"type": "error", "error": str(exc)}) + "\n"
|
||||||
|
return
|
||||||
|
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||||
|
yield json.dumps({"type": "done"}) + "\n"
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
stream_objects_ndjson(
|
generate(),
|
||||||
client, bucket_name, prefix, url_templates, display_tz, versioning_enabled,
|
|
||||||
),
|
|
||||||
mimetype='application/x-ndjson',
|
mimetype='application/x-ndjson',
|
||||||
headers={
|
headers={
|
||||||
'Cache-Control': 'no-cache',
|
'Cache-Control': 'no-cache',
|
||||||
@@ -634,6 +698,33 @@ def stream_bucket_objects(bucket_name: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/buckets/<bucket_name>/objects/search")
|
||||||
|
@limiter.limit("30 per minute")
|
||||||
|
def search_bucket_objects(bucket_name: str):
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_authorize_ui(principal, bucket_name, "list")
|
||||||
|
except IamError as exc:
|
||||||
|
return jsonify({"error": str(exc)}), 403
|
||||||
|
|
||||||
|
query = request.args.get("q", "").strip()
|
||||||
|
if not query:
|
||||||
|
return jsonify({"results": [], "truncated": False})
|
||||||
|
|
||||||
|
try:
|
||||||
|
limit = max(1, min(int(request.args.get("limit", 500)), 1000))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
limit = 500
|
||||||
|
|
||||||
|
prefix = request.args.get("prefix", "").strip()
|
||||||
|
|
||||||
|
storage = _storage()
|
||||||
|
try:
|
||||||
|
return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit))
|
||||||
|
except StorageError as exc:
|
||||||
|
return jsonify({"error": str(exc)}), 404
|
||||||
|
|
||||||
|
|
||||||
@ui_bp.post("/buckets/<bucket_name>/upload")
|
@ui_bp.post("/buckets/<bucket_name>/upload")
|
||||||
@limiter.limit("30 per minute")
|
@limiter.limit("30 per minute")
|
||||||
def upload_object(bucket_name: str):
|
def upload_object(bucket_name: str):
|
||||||
@@ -738,7 +829,6 @@ def initiate_multipart_upload(bucket_name: str):
|
|||||||
|
|
||||||
|
|
||||||
@ui_bp.put("/buckets/<bucket_name>/multipart/<upload_id>/parts")
|
@ui_bp.put("/buckets/<bucket_name>/multipart/<upload_id>/parts")
|
||||||
@limiter.exempt
|
|
||||||
@csrf.exempt
|
@csrf.exempt
|
||||||
def upload_multipart_part(bucket_name: str, upload_id: str):
|
def upload_multipart_part(bucket_name: str, upload_id: str):
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
@@ -973,6 +1063,27 @@ def bulk_delete_objects(bucket_name: str):
|
|||||||
return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400)
|
return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400)
|
||||||
|
|
||||||
unique_keys = list(dict.fromkeys(cleaned))
|
unique_keys = list(dict.fromkeys(cleaned))
|
||||||
|
|
||||||
|
folder_prefixes = [k for k in unique_keys if k.endswith("/")]
|
||||||
|
if folder_prefixes:
|
||||||
|
try:
|
||||||
|
client = get_session_s3_client()
|
||||||
|
for prefix in folder_prefixes:
|
||||||
|
unique_keys.remove(prefix)
|
||||||
|
paginator = client.get_paginator("list_objects_v2")
|
||||||
|
for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
|
||||||
|
for obj in page.get("Contents", []):
|
||||||
|
if obj["Key"] not in unique_keys:
|
||||||
|
unique_keys.append(obj["Key"])
|
||||||
|
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||||
|
if isinstance(exc, ClientError):
|
||||||
|
err, status = handle_client_error(exc)
|
||||||
|
return _respond(False, err["error"], status_code=status)
|
||||||
|
return _respond(False, "S3 API server is unreachable", status_code=502)
|
||||||
|
|
||||||
|
if not unique_keys:
|
||||||
|
return _respond(False, "No objects found under the selected folders", status_code=400)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_authorize_ui(principal, bucket_name, "delete")
|
_authorize_ui(principal, bucket_name, "delete")
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
@@ -1003,13 +1114,17 @@ def bulk_delete_objects(bucket_name: str):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
client = get_session_s3_client()
|
client = get_session_s3_client()
|
||||||
objects_to_delete = [{"Key": k} for k in unique_keys]
|
deleted = []
|
||||||
resp = client.delete_objects(
|
errors = []
|
||||||
Bucket=bucket_name,
|
for i in range(0, len(unique_keys), 1000):
|
||||||
Delete={"Objects": objects_to_delete, "Quiet": False},
|
batch = unique_keys[i:i + 1000]
|
||||||
)
|
objects_to_delete = [{"Key": k} for k in batch]
|
||||||
deleted = [d["Key"] for d in resp.get("Deleted", [])]
|
resp = client.delete_objects(
|
||||||
errors = [{"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", [])]
|
Bucket=bucket_name,
|
||||||
|
Delete={"Objects": objects_to_delete, "Quiet": False},
|
||||||
|
)
|
||||||
|
deleted.extend(d["Key"] for d in resp.get("Deleted", []))
|
||||||
|
errors.extend({"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", []))
|
||||||
for key in deleted:
|
for key in deleted:
|
||||||
_replication_manager().trigger_replication(bucket_name, key, action="delete")
|
_replication_manager().trigger_replication(bucket_name, key, action="delete")
|
||||||
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||||
@@ -1297,12 +1412,14 @@ def object_versions(bucket_name: str, object_key: str):
|
|||||||
for v in resp.get("Versions", []):
|
for v in resp.get("Versions", []):
|
||||||
if v.get("Key") != object_key:
|
if v.get("Key") != object_key:
|
||||||
continue
|
continue
|
||||||
|
if v.get("IsLatest", False):
|
||||||
|
continue
|
||||||
versions.append({
|
versions.append({
|
||||||
"version_id": v.get("VersionId", ""),
|
"version_id": v.get("VersionId", ""),
|
||||||
"last_modified": v["LastModified"].isoformat() if v.get("LastModified") else None,
|
"last_modified": v["LastModified"].isoformat() if v.get("LastModified") else None,
|
||||||
"size": v.get("Size", 0),
|
"size": v.get("Size", 0),
|
||||||
"etag": v.get("ETag", "").strip('"'),
|
"etag": v.get("ETag", "").strip('"'),
|
||||||
"is_latest": v.get("IsLatest", False),
|
"is_latest": False,
|
||||||
})
|
})
|
||||||
return jsonify({"versions": versions})
|
return jsonify({"versions": versions})
|
||||||
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
|
||||||
@@ -1719,6 +1836,10 @@ def iam_dashboard():
|
|||||||
users = iam_service.list_users() if not locked else []
|
users = iam_service.list_users() if not locked else []
|
||||||
config_summary = iam_service.config_summary()
|
config_summary = iam_service.config_summary()
|
||||||
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
|
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
|
||||||
|
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
|
||||||
|
_now = _dt.now(_tz.utc)
|
||||||
|
now_iso = _now.isoformat()
|
||||||
|
soon_iso = (_now + _td(days=7)).isoformat()
|
||||||
return render_template(
|
return render_template(
|
||||||
"iam.html",
|
"iam.html",
|
||||||
users=users,
|
users=users,
|
||||||
@@ -1728,6 +1849,8 @@ def iam_dashboard():
|
|||||||
config_summary=config_summary,
|
config_summary=config_summary,
|
||||||
config_document=config_document,
|
config_document=config_document,
|
||||||
disclosed_secret=disclosed_secret,
|
disclosed_secret=disclosed_secret,
|
||||||
|
now_iso=now_iso,
|
||||||
|
soon_iso=soon_iso,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -1747,6 +1870,8 @@ def create_iam_user():
|
|||||||
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
|
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
|
||||||
flash("Display name must be 64 characters or fewer", "danger")
|
flash("Display name must be 64 characters or fewer", "danger")
|
||||||
return redirect(url_for("ui.iam_dashboard"))
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
custom_access_key = request.form.get("access_key", "").strip() or None
|
||||||
|
custom_secret_key = request.form.get("secret_key", "").strip() or None
|
||||||
policies_text = request.form.get("policies", "").strip()
|
policies_text = request.form.get("policies", "").strip()
|
||||||
policies = None
|
policies = None
|
||||||
if policies_text:
|
if policies_text:
|
||||||
@@ -1757,8 +1882,21 @@ def create_iam_user():
|
|||||||
return jsonify({"error": f"Invalid JSON: {exc}"}), 400
|
return jsonify({"error": f"Invalid JSON: {exc}"}), 400
|
||||||
flash(f"Invalid JSON: {exc}", "danger")
|
flash(f"Invalid JSON: {exc}", "danger")
|
||||||
return redirect(url_for("ui.iam_dashboard"))
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
expires_at = request.form.get("expires_at", "").strip() or None
|
||||||
|
if expires_at:
|
||||||
|
try:
|
||||||
|
from datetime import datetime as _dt, timezone as _tz
|
||||||
|
exp_dt = _dt.fromisoformat(expires_at)
|
||||||
|
if exp_dt.tzinfo is None:
|
||||||
|
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
|
||||||
|
expires_at = exp_dt.isoformat()
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
if _wants_json():
|
||||||
|
return jsonify({"error": "Invalid expiry date format"}), 400
|
||||||
|
flash("Invalid expiry date format", "danger")
|
||||||
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
try:
|
try:
|
||||||
created = _iam().create_user(display_name=display_name, policies=policies)
|
created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
|
||||||
except IamError as exc:
|
except IamError as exc:
|
||||||
if _wants_json():
|
if _wants_json():
|
||||||
return jsonify({"error": str(exc)}), 400
|
return jsonify({"error": str(exc)}), 400
|
||||||
@@ -1932,6 +2070,45 @@ def update_iam_policies(access_key: str):
|
|||||||
return redirect(url_for("ui.iam_dashboard"))
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.post("/iam/users/<access_key>/expiry")
|
||||||
|
def update_iam_expiry(access_key: str):
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:update_policy")
|
||||||
|
except IamError as exc:
|
||||||
|
if _wants_json():
|
||||||
|
return jsonify({"error": str(exc)}), 403
|
||||||
|
flash(str(exc), "danger")
|
||||||
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
|
||||||
|
expires_at = request.form.get("expires_at", "").strip() or None
|
||||||
|
if expires_at:
|
||||||
|
try:
|
||||||
|
from datetime import datetime as _dt, timezone as _tz
|
||||||
|
exp_dt = _dt.fromisoformat(expires_at)
|
||||||
|
if exp_dt.tzinfo is None:
|
||||||
|
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
|
||||||
|
expires_at = exp_dt.isoformat()
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
if _wants_json():
|
||||||
|
return jsonify({"error": "Invalid expiry date format"}), 400
|
||||||
|
flash("Invalid expiry date format", "danger")
|
||||||
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
|
||||||
|
try:
|
||||||
|
_iam().update_user_expiry(access_key, expires_at)
|
||||||
|
if _wants_json():
|
||||||
|
return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
|
||||||
|
label = expires_at if expires_at else "never"
|
||||||
|
flash(f"Expiry for {access_key} set to {label}", "success")
|
||||||
|
except IamError as exc:
|
||||||
|
if _wants_json():
|
||||||
|
return jsonify({"error": str(exc)}), 400
|
||||||
|
flash(str(exc), "danger")
|
||||||
|
|
||||||
|
return redirect(url_for("ui.iam_dashboard"))
|
||||||
|
|
||||||
|
|
||||||
@ui_bp.post("/connections")
|
@ui_bp.post("/connections")
|
||||||
def create_connection():
|
def create_connection():
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
@@ -2374,7 +2551,10 @@ def website_domains_dashboard():
|
|||||||
store = current_app.extensions.get("website_domains")
|
store = current_app.extensions.get("website_domains")
|
||||||
mappings = store.list_all() if store else []
|
mappings = store.list_all() if store else []
|
||||||
storage = _storage()
|
storage = _storage()
|
||||||
buckets = [b.name for b in storage.list_buckets()]
|
buckets = [
|
||||||
|
b.name for b in storage.list_buckets()
|
||||||
|
if storage.get_bucket_website(b.name)
|
||||||
|
]
|
||||||
return render_template(
|
return render_template(
|
||||||
"website_domains.html",
|
"website_domains.html",
|
||||||
mappings=mappings,
|
mappings=mappings,
|
||||||
@@ -3293,9 +3473,12 @@ def sites_dashboard():
|
|||||||
@ui_bp.post("/sites/local")
|
@ui_bp.post("/sites/local")
|
||||||
def update_local_site():
|
def update_local_site():
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
|
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||||
try:
|
try:
|
||||||
_iam().authorize(principal, None, "iam:*")
|
_iam().authorize(principal, None, "iam:*")
|
||||||
except IamError:
|
except IamError:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
flash("Access denied", "danger")
|
flash("Access denied", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3306,6 +3489,8 @@ def update_local_site():
|
|||||||
display_name = request.form.get("display_name", "").strip()
|
display_name = request.form.get("display_name", "").strip()
|
||||||
|
|
||||||
if not site_id:
|
if not site_id:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Site ID is required"}), 400
|
||||||
flash("Site ID is required", "danger")
|
flash("Site ID is required", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3327,6 +3512,8 @@ def update_local_site():
|
|||||||
)
|
)
|
||||||
registry.set_local_site(site)
|
registry.set_local_site(site)
|
||||||
|
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"message": "Local site configuration updated"})
|
||||||
flash("Local site configuration updated", "success")
|
flash("Local site configuration updated", "success")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3334,9 +3521,12 @@ def update_local_site():
|
|||||||
@ui_bp.post("/sites/peers")
|
@ui_bp.post("/sites/peers")
|
||||||
def add_peer_site():
|
def add_peer_site():
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
|
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||||
try:
|
try:
|
||||||
_iam().authorize(principal, None, "iam:*")
|
_iam().authorize(principal, None, "iam:*")
|
||||||
except IamError:
|
except IamError:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
flash("Access denied", "danger")
|
flash("Access denied", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3348,9 +3538,13 @@ def add_peer_site():
|
|||||||
connection_id = request.form.get("connection_id", "").strip() or None
|
connection_id = request.form.get("connection_id", "").strip() or None
|
||||||
|
|
||||||
if not site_id:
|
if not site_id:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Site ID is required"}), 400
|
||||||
flash("Site ID is required", "danger")
|
flash("Site ID is required", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Endpoint is required"}), 400
|
||||||
flash("Endpoint is required", "danger")
|
flash("Endpoint is required", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3362,10 +3556,14 @@ def add_peer_site():
|
|||||||
registry = _site_registry()
|
registry = _site_registry()
|
||||||
|
|
||||||
if registry.get_peer(site_id):
|
if registry.get_peer(site_id):
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": f"Peer site '{site_id}' already exists"}), 409
|
||||||
flash(f"Peer site '{site_id}' already exists", "danger")
|
flash(f"Peer site '{site_id}' already exists", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
if connection_id and not _connections().get(connection_id):
|
if connection_id and not _connections().get(connection_id):
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
|
||||||
flash(f"Connection '{connection_id}' not found", "danger")
|
flash(f"Connection '{connection_id}' not found", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3379,6 +3577,11 @@ def add_peer_site():
|
|||||||
)
|
)
|
||||||
registry.add_peer(peer)
|
registry.add_peer(peer)
|
||||||
|
|
||||||
|
if wants_json:
|
||||||
|
redirect_url = None
|
||||||
|
if connection_id:
|
||||||
|
redirect_url = url_for("ui.replication_wizard", site_id=site_id)
|
||||||
|
return jsonify({"message": f"Peer site '{site_id}' added", "redirect": redirect_url})
|
||||||
flash(f"Peer site '{site_id}' added", "success")
|
flash(f"Peer site '{site_id}' added", "success")
|
||||||
|
|
||||||
if connection_id:
|
if connection_id:
|
||||||
@@ -3389,9 +3592,12 @@ def add_peer_site():
|
|||||||
@ui_bp.post("/sites/peers/<site_id>/update")
|
@ui_bp.post("/sites/peers/<site_id>/update")
|
||||||
def update_peer_site(site_id: str):
|
def update_peer_site(site_id: str):
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
|
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||||
try:
|
try:
|
||||||
_iam().authorize(principal, None, "iam:*")
|
_iam().authorize(principal, None, "iam:*")
|
||||||
except IamError:
|
except IamError:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
flash("Access denied", "danger")
|
flash("Access denied", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3399,6 +3605,8 @@ def update_peer_site(site_id: str):
|
|||||||
existing = registry.get_peer(site_id)
|
existing = registry.get_peer(site_id)
|
||||||
|
|
||||||
if not existing:
|
if not existing:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
|
||||||
flash(f"Peer site '{site_id}' not found", "danger")
|
flash(f"Peer site '{site_id}' not found", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3406,7 +3614,10 @@ def update_peer_site(site_id: str):
|
|||||||
region = request.form.get("region", existing.region).strip()
|
region = request.form.get("region", existing.region).strip()
|
||||||
priority = request.form.get("priority", str(existing.priority))
|
priority = request.form.get("priority", str(existing.priority))
|
||||||
display_name = request.form.get("display_name", existing.display_name).strip()
|
display_name = request.form.get("display_name", existing.display_name).strip()
|
||||||
connection_id = request.form.get("connection_id", "").strip() or existing.connection_id
|
if "connection_id" in request.form:
|
||||||
|
connection_id = request.form["connection_id"].strip() or None
|
||||||
|
else:
|
||||||
|
connection_id = existing.connection_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
priority_int = int(priority)
|
priority_int = int(priority)
|
||||||
@@ -3414,6 +3625,8 @@ def update_peer_site(site_id: str):
|
|||||||
priority_int = existing.priority
|
priority_int = existing.priority
|
||||||
|
|
||||||
if connection_id and not _connections().get(connection_id):
|
if connection_id and not _connections().get(connection_id):
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
|
||||||
flash(f"Connection '{connection_id}' not found", "danger")
|
flash(f"Connection '{connection_id}' not found", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3430,6 +3643,8 @@ def update_peer_site(site_id: str):
|
|||||||
)
|
)
|
||||||
registry.update_peer(peer)
|
registry.update_peer(peer)
|
||||||
|
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"message": f"Peer site '{site_id}' updated"})
|
||||||
flash(f"Peer site '{site_id}' updated", "success")
|
flash(f"Peer site '{site_id}' updated", "success")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
@@ -3437,16 +3652,23 @@ def update_peer_site(site_id: str):
|
|||||||
@ui_bp.post("/sites/peers/<site_id>/delete")
|
@ui_bp.post("/sites/peers/<site_id>/delete")
|
||||||
def delete_peer_site(site_id: str):
|
def delete_peer_site(site_id: str):
|
||||||
principal = _current_principal()
|
principal = _current_principal()
|
||||||
|
wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
|
||||||
try:
|
try:
|
||||||
_iam().authorize(principal, None, "iam:*")
|
_iam().authorize(principal, None, "iam:*")
|
||||||
except IamError:
|
except IamError:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
flash("Access denied", "danger")
|
flash("Access denied", "danger")
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
|
|
||||||
registry = _site_registry()
|
registry = _site_registry()
|
||||||
if registry.delete_peer(site_id):
|
if registry.delete_peer(site_id):
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"message": f"Peer site '{site_id}' deleted"})
|
||||||
flash(f"Peer site '{site_id}' deleted", "success")
|
flash(f"Peer site '{site_id}' deleted", "success")
|
||||||
else:
|
else:
|
||||||
|
if wants_json:
|
||||||
|
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
|
||||||
flash(f"Peer site '{site_id}' not found", "danger")
|
flash(f"Peer site '{site_id}' not found", "danger")
|
||||||
|
|
||||||
return redirect(url_for("ui.sites_dashboard"))
|
return redirect(url_for("ui.sites_dashboard"))
|
||||||
@@ -3901,6 +4123,182 @@ def get_peer_sync_stats(site_id: str):
|
|||||||
return jsonify(stats)
|
return jsonify(stats)
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/system")
|
||||||
|
def system_dashboard():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
flash("Access denied: System page requires admin permissions", "danger")
|
||||||
|
return redirect(url_for("ui.buckets_overview"))
|
||||||
|
|
||||||
|
import platform as _platform
|
||||||
|
import sys
|
||||||
|
from app.version import APP_VERSION
|
||||||
|
|
||||||
|
try:
|
||||||
|
import myfsio_core as _rc
|
||||||
|
has_rust = True
|
||||||
|
except ImportError:
|
||||||
|
has_rust = False
|
||||||
|
|
||||||
|
gc = current_app.extensions.get("gc")
|
||||||
|
gc_status = gc.get_status() if gc else {"enabled": False}
|
||||||
|
gc_history_records = []
|
||||||
|
if gc:
|
||||||
|
raw = gc.get_history(limit=10, offset=0)
|
||||||
|
for rec in raw:
|
||||||
|
r = rec.get("result", {})
|
||||||
|
total_freed = r.get("temp_bytes_freed", 0) + r.get("multipart_bytes_freed", 0) + r.get("orphaned_version_bytes_freed", 0)
|
||||||
|
rec["bytes_freed_display"] = _format_bytes(total_freed)
|
||||||
|
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
|
||||||
|
gc_history_records.append(rec)
|
||||||
|
|
||||||
|
checker = current_app.extensions.get("integrity")
|
||||||
|
integrity_status = checker.get_status() if checker else {"enabled": False}
|
||||||
|
integrity_history_records = []
|
||||||
|
if checker:
|
||||||
|
raw = checker.get_history(limit=10, offset=0)
|
||||||
|
for rec in raw:
|
||||||
|
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
|
||||||
|
integrity_history_records.append(rec)
|
||||||
|
|
||||||
|
features = [
|
||||||
|
{"label": "Encryption (SSE-S3)", "enabled": current_app.config.get("ENCRYPTION_ENABLED", False)},
|
||||||
|
{"label": "KMS", "enabled": current_app.config.get("KMS_ENABLED", False)},
|
||||||
|
{"label": "Versioning Lifecycle", "enabled": current_app.config.get("LIFECYCLE_ENABLED", False)},
|
||||||
|
{"label": "Metrics History", "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False)},
|
||||||
|
{"label": "Operation Metrics", "enabled": current_app.config.get("OPERATION_METRICS_ENABLED", False)},
|
||||||
|
{"label": "Site Sync", "enabled": current_app.config.get("SITE_SYNC_ENABLED", False)},
|
||||||
|
{"label": "Website Hosting", "enabled": current_app.config.get("WEBSITE_HOSTING_ENABLED", False)},
|
||||||
|
{"label": "Garbage Collection", "enabled": current_app.config.get("GC_ENABLED", False)},
|
||||||
|
{"label": "Integrity Scanner", "enabled": current_app.config.get("INTEGRITY_ENABLED", False)},
|
||||||
|
]
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"system.html",
|
||||||
|
principal=principal,
|
||||||
|
app_version=APP_VERSION,
|
||||||
|
storage_root=current_app.config.get("STORAGE_ROOT", "./data"),
|
||||||
|
platform=_platform.platform(),
|
||||||
|
python_version=sys.version.split()[0],
|
||||||
|
has_rust=has_rust,
|
||||||
|
features=features,
|
||||||
|
gc_status=gc_status,
|
||||||
|
gc_history=gc_history_records,
|
||||||
|
integrity_status=integrity_status,
|
||||||
|
integrity_history=integrity_history_records,
|
||||||
|
display_timezone=current_app.config.get("DISPLAY_TIMEZONE", "UTC"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.post("/system/gc/run")
|
||||||
|
def system_gc_run():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
gc = current_app.extensions.get("gc")
|
||||||
|
if not gc:
|
||||||
|
return jsonify({"error": "GC is not enabled"}), 400
|
||||||
|
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||||
|
if not started:
|
||||||
|
return jsonify({"error": "GC is already in progress"}), 409
|
||||||
|
return jsonify({"status": "started"})
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/system/gc/status")
|
||||||
|
def system_gc_status():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
gc = current_app.extensions.get("gc")
|
||||||
|
if not gc:
|
||||||
|
return jsonify({"error": "GC is not enabled"}), 400
|
||||||
|
|
||||||
|
return jsonify(gc.get_status())
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/system/gc/history")
|
||||||
|
def system_gc_history():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
gc = current_app.extensions.get("gc")
|
||||||
|
if not gc:
|
||||||
|
return jsonify({"executions": []})
|
||||||
|
|
||||||
|
limit = min(int(request.args.get("limit", 10)), 200)
|
||||||
|
offset = int(request.args.get("offset", 0))
|
||||||
|
records = gc.get_history(limit=limit, offset=offset)
|
||||||
|
return jsonify({"executions": records})
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.post("/system/integrity/run")
|
||||||
|
def system_integrity_run():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
checker = current_app.extensions.get("integrity")
|
||||||
|
if not checker:
|
||||||
|
return jsonify({"error": "Integrity checker is not enabled"}), 400
|
||||||
|
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
started = checker.run_async(
|
||||||
|
auto_heal=payload.get("auto_heal"),
|
||||||
|
dry_run=payload.get("dry_run"),
|
||||||
|
)
|
||||||
|
if not started:
|
||||||
|
return jsonify({"error": "A scan is already in progress"}), 409
|
||||||
|
return jsonify({"status": "started"})
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/system/integrity/status")
|
||||||
|
def system_integrity_status():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
checker = current_app.extensions.get("integrity")
|
||||||
|
if not checker:
|
||||||
|
return jsonify({"error": "Integrity checker is not enabled"}), 400
|
||||||
|
|
||||||
|
return jsonify(checker.get_status())
|
||||||
|
|
||||||
|
|
||||||
|
@ui_bp.get("/system/integrity/history")
|
||||||
|
def system_integrity_history():
|
||||||
|
principal = _current_principal()
|
||||||
|
try:
|
||||||
|
_iam().authorize(principal, None, "iam:*")
|
||||||
|
except IamError:
|
||||||
|
return jsonify({"error": "Access denied"}), 403
|
||||||
|
|
||||||
|
checker = current_app.extensions.get("integrity")
|
||||||
|
if not checker:
|
||||||
|
return jsonify({"executions": []})
|
||||||
|
|
||||||
|
limit = min(int(request.args.get("limit", 10)), 200)
|
||||||
|
offset = int(request.args.get("offset", 0))
|
||||||
|
records = checker.get_history(limit=limit, offset=offset)
|
||||||
|
return jsonify({"executions": records})
|
||||||
|
|
||||||
|
|
||||||
@ui_bp.app_errorhandler(404)
|
@ui_bp.app_errorhandler(404)
|
||||||
def ui_not_found(error): # type: ignore[override]
|
def ui_not_found(error): # type: ignore[override]
|
||||||
prefix = ui_bp.url_prefix or ""
|
prefix = ui_bp.url_prefix or ""
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
APP_VERSION = "0.3.0"
|
APP_VERSION = "0.4.1"
|
||||||
|
|
||||||
|
|
||||||
def get_version() -> str:
|
def get_version() -> str:
|
||||||
|
|||||||
@@ -35,13 +35,16 @@ class WebsiteDomainStore:
|
|||||||
self.config_path = config_path
|
self.config_path = config_path
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._domains: Dict[str, str] = {}
|
self._domains: Dict[str, str] = {}
|
||||||
|
self._last_mtime: float = 0.0
|
||||||
self.reload()
|
self.reload()
|
||||||
|
|
||||||
def reload(self) -> None:
|
def reload(self) -> None:
|
||||||
if not self.config_path.exists():
|
if not self.config_path.exists():
|
||||||
self._domains = {}
|
self._domains = {}
|
||||||
|
self._last_mtime = 0.0
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
|
self._last_mtime = self.config_path.stat().st_mtime
|
||||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
@@ -51,19 +54,45 @@ class WebsiteDomainStore:
|
|||||||
except (OSError, json.JSONDecodeError):
|
except (OSError, json.JSONDecodeError):
|
||||||
self._domains = {}
|
self._domains = {}
|
||||||
|
|
||||||
|
def _maybe_reload(self) -> None:
|
||||||
|
try:
|
||||||
|
if self.config_path.exists():
|
||||||
|
mtime = self.config_path.stat().st_mtime
|
||||||
|
if mtime != self._last_mtime:
|
||||||
|
self._last_mtime = mtime
|
||||||
|
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
self._domains = {k.lower(): v for k, v in data.items()}
|
||||||
|
else:
|
||||||
|
self._domains = {}
|
||||||
|
elif self._domains:
|
||||||
|
self._domains = {}
|
||||||
|
self._last_mtime = 0.0
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
def _save(self) -> None:
|
def _save(self) -> None:
|
||||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(self._domains, f, indent=2)
|
json.dump(self._domains, f, indent=2)
|
||||||
|
self._last_mtime = self.config_path.stat().st_mtime
|
||||||
|
|
||||||
def list_all(self) -> List[Dict[str, str]]:
|
def list_all(self) -> List[Dict[str, str]]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
self._maybe_reload()
|
||||||
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
||||||
|
|
||||||
def get_bucket(self, domain: str) -> Optional[str]:
|
def get_bucket(self, domain: str) -> Optional[str]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
self._maybe_reload()
|
||||||
return self._domains.get(domain.lower())
|
return self._domains.get(domain.lower())
|
||||||
|
|
||||||
|
def get_domains_for_bucket(self, bucket: str) -> List[str]:
|
||||||
|
with self._lock:
|
||||||
|
self._maybe_reload()
|
||||||
|
return [d for d, b in self._domains.items() if b == bucket]
|
||||||
|
|
||||||
def set_mapping(self, domain: str, bucket: str) -> None:
|
def set_mapping(self, domain: str, bucket: str) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._domains[domain.lower()] = bucket
|
self._domains[domain.lower()] = bucket
|
||||||
|
|||||||
300
docs.md
300
docs.md
@@ -139,18 +139,21 @@ All configuration is done via environment variables. The table below lists every
|
|||||||
| `API_BASE_URL` | `http://127.0.0.1:5000` | Internal S3 API URL used by the web UI proxy. Also used for presigned URL generation. Set to your public URL if running behind a reverse proxy. |
|
| `API_BASE_URL` | `http://127.0.0.1:5000` | Internal S3 API URL used by the web UI proxy. Also used for presigned URL generation. Set to your public URL if running behind a reverse proxy. |
|
||||||
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
|
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
|
||||||
| `AWS_SERVICE` | `s3` | Service string for SigV4. |
|
| `AWS_SERVICE` | `s3` | Service string for SigV4. |
|
||||||
|
| `DISPLAY_TIMEZONE` | `UTC` | Timezone for timestamps in the web UI (e.g., `US/Eastern`, `Asia/Tokyo`). |
|
||||||
|
|
||||||
### IAM & Security
|
### IAM & Security
|
||||||
|
|
||||||
| Variable | Default | Notes |
|
| Variable | Default | Notes |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. |
|
| `IAM_CONFIG` | `data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. Encrypted at rest when `SECRET_KEY` is set. |
|
||||||
| `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). |
|
| `BUCKET_POLICY_PATH` | `data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). |
|
||||||
| `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. |
|
| `AUTH_MAX_ATTEMPTS` | `5` | Failed login attempts before lockout. |
|
||||||
| `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. |
|
| `AUTH_LOCKOUT_MINUTES` | `15` | Lockout duration after max failed attempts. |
|
||||||
| `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. |
|
| `SESSION_LIFETIME_DAYS` | `30` | How long UI sessions remain valid. |
|
||||||
| `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). |
|
| `SECRET_TTL_SECONDS` | `300` | TTL for ephemeral secrets (presigned URLs). |
|
||||||
| `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. |
|
| `UI_ENFORCE_BUCKET_POLICIES` | `false` | Whether the UI should enforce bucket policies. |
|
||||||
|
| `ADMIN_ACCESS_KEY` | (none) | Custom access key for the admin user on first run or credential reset. If unset, a random key is generated. |
|
||||||
|
| `ADMIN_SECRET_KEY` | (none) | Custom secret key for the admin user on first run or credential reset. If unset, a random key is generated. |
|
||||||
|
|
||||||
### CORS (Cross-Origin Resource Sharing)
|
### CORS (Cross-Origin Resource Sharing)
|
||||||
|
|
||||||
@@ -170,15 +173,16 @@ All configuration is done via environment variables. The table below lists every
|
|||||||
| `RATE_LIMIT_BUCKET_OPS` | `120 per minute` | Rate limit for bucket operations (PUT/DELETE/GET/POST on `/<bucket>`). |
|
| `RATE_LIMIT_BUCKET_OPS` | `120 per minute` | Rate limit for bucket operations (PUT/DELETE/GET/POST on `/<bucket>`). |
|
||||||
| `RATE_LIMIT_OBJECT_OPS` | `240 per minute` | Rate limit for object operations (PUT/GET/DELETE/POST on `/<bucket>/<key>`). |
|
| `RATE_LIMIT_OBJECT_OPS` | `240 per minute` | Rate limit for object operations (PUT/GET/DELETE/POST on `/<bucket>/<key>`). |
|
||||||
| `RATE_LIMIT_HEAD_OPS` | `100 per minute` | Rate limit for HEAD requests (bucket and object). |
|
| `RATE_LIMIT_HEAD_OPS` | `100 per minute` | Rate limit for HEAD requests (bucket and object). |
|
||||||
|
| `RATE_LIMIT_ADMIN` | `60 per minute` | Rate limit for admin API endpoints (`/admin/*`). |
|
||||||
| `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. |
|
| `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. |
|
||||||
|
|
||||||
### Server Configuration
|
### Server Configuration
|
||||||
|
|
||||||
| Variable | Default | Notes |
|
| Variable | Default | Notes |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `SERVER_THREADS` | `0` (auto) | Waitress worker threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
|
| `SERVER_THREADS` | `0` (auto) | Granian blocking threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
|
||||||
| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent connections (10-1000). Set to `0` for auto-calculation based on available RAM. |
|
| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent requests per worker (10-1000). Set to `0` for auto-calculation based on available RAM. |
|
||||||
| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (64-4096). Set to `0` for auto-calculation (connection_limit × 2). |
|
| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (128-4096). Set to `0` for auto-calculation (connection_limit × 2). |
|
||||||
| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |
|
| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |
|
||||||
|
|
||||||
### Logging
|
### Logging
|
||||||
@@ -248,6 +252,60 @@ Once enabled, configure lifecycle rules via:
|
|||||||
</LifecycleConfiguration>
|
</LifecycleConfiguration>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Garbage Collection
|
||||||
|
|
||||||
|
The garbage collector (GC) automatically cleans up orphaned data that accumulates over time: stale temporary files from failed uploads, abandoned multipart uploads, stale lock files, orphaned metadata entries, orphaned version files, and empty directories.
|
||||||
|
|
||||||
|
### Enabling GC
|
||||||
|
|
||||||
|
By default, GC is disabled. Enable it by setting:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
GC_ENABLED=true python run.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Or in your `myfsio.env` file:
|
||||||
|
```
|
||||||
|
GC_ENABLED=true
|
||||||
|
GC_INTERVAL_HOURS=6 # Run every 6 hours (default)
|
||||||
|
GC_TEMP_FILE_MAX_AGE_HOURS=24 # Delete temp files older than 24h
|
||||||
|
GC_MULTIPART_MAX_AGE_DAYS=7 # Delete orphaned multipart uploads older than 7 days
|
||||||
|
GC_LOCK_FILE_MAX_AGE_HOURS=1 # Delete stale lock files older than 1h
|
||||||
|
GC_DRY_RUN=false # Set to true to log without deleting
|
||||||
|
```
|
||||||
|
|
||||||
|
### What Gets Cleaned
|
||||||
|
|
||||||
|
| Type | Location | Condition |
|
||||||
|
|------|----------|-----------|
|
||||||
|
| **Temp files** | `.myfsio.sys/tmp/` | Older than `GC_TEMP_FILE_MAX_AGE_HOURS` |
|
||||||
|
| **Orphaned multipart uploads** | `.myfsio.sys/multipart/` and `<bucket>/.multipart/` | Older than `GC_MULTIPART_MAX_AGE_DAYS` |
|
||||||
|
| **Stale lock files** | `.myfsio.sys/buckets/<bucket>/locks/` | Older than `GC_LOCK_FILE_MAX_AGE_HOURS` |
|
||||||
|
| **Orphaned metadata** | `.myfsio.sys/buckets/<bucket>/meta/` and `<bucket>/.meta/` | Object file no longer exists |
|
||||||
|
| **Orphaned versions** | `.myfsio.sys/buckets/<bucket>/versions/` and `<bucket>/.versions/` | Main object no longer exists |
|
||||||
|
| **Empty directories** | Various internal directories | Directory is empty after cleanup |
|
||||||
|
|
||||||
|
### Admin API
|
||||||
|
|
||||||
|
All GC endpoints require admin (`iam:*`) permissions.
|
||||||
|
|
||||||
|
| Method | Route | Description |
|
||||||
|
|--------|-------|-------------|
|
||||||
|
| `GET` | `/admin/gc/status` | Get GC status and configuration |
|
||||||
|
| `POST` | `/admin/gc/run` | Trigger a manual GC run (body: `{"dry_run": true}` for preview) |
|
||||||
|
| `GET` | `/admin/gc/history` | Get GC execution history (query: `?limit=50&offset=0`) |
|
||||||
|
|
||||||
|
### Dry Run Mode
|
||||||
|
|
||||||
|
Set `GC_DRY_RUN=true` to log what would be deleted without actually removing anything. You can also trigger a one-time dry run via the admin API:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:5000/admin/gc/run" \
|
||||||
|
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"dry_run": true}'
|
||||||
|
```
|
||||||
|
|
||||||
### Performance Tuning
|
### Performance Tuning
|
||||||
|
|
||||||
| Variable | Default | Notes |
|
| Variable | Default | Notes |
|
||||||
@@ -256,6 +314,12 @@ Once enabled, configure lifecycle rules via:
|
|||||||
| `MULTIPART_MIN_PART_SIZE` | `5242880` (5 MB) | Minimum part size for multipart uploads. |
|
| `MULTIPART_MIN_PART_SIZE` | `5242880` (5 MB) | Minimum part size for multipart uploads. |
|
||||||
| `BUCKET_STATS_CACHE_TTL` | `60` | Seconds to cache bucket statistics. |
|
| `BUCKET_STATS_CACHE_TTL` | `60` | Seconds to cache bucket statistics. |
|
||||||
| `BULK_DELETE_MAX_KEYS` | `500` | Maximum keys per bulk delete request. |
|
| `BULK_DELETE_MAX_KEYS` | `500` | Maximum keys per bulk delete request. |
|
||||||
|
| `BULK_DOWNLOAD_MAX_BYTES` | `1073741824` (1 GiB) | Maximum total size for bulk ZIP downloads. |
|
||||||
|
| `OBJECT_CACHE_TTL` | `60` | Seconds to cache object metadata. |
|
||||||
|
|
||||||
|
#### Gzip Compression
|
||||||
|
|
||||||
|
API responses for JSON, XML, HTML, CSS, and JavaScript are automatically gzip-compressed when the client sends `Accept-Encoding: gzip`. Compression activates for responses larger than 500 bytes and is handled by a WSGI middleware (`app/compression.py`). Binary object downloads and streaming responses are never compressed. No configuration is needed.
|
||||||
|
|
||||||
### Server Settings
|
### Server Settings
|
||||||
|
|
||||||
@@ -269,13 +333,14 @@ Once enabled, configure lifecycle rules via:
|
|||||||
|
|
||||||
Before deploying to production, ensure you:
|
Before deploying to production, ensure you:
|
||||||
|
|
||||||
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`)
|
1. **Set `SECRET_KEY`** - Use a strong, unique value (e.g., `openssl rand -base64 32`). This also enables IAM config encryption at rest.
|
||||||
2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*`
|
2. **Restrict CORS** - Set `CORS_ORIGINS` to your specific domains instead of `*`
|
||||||
3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies
|
3. **Configure `API_BASE_URL`** - Required for correct presigned URLs behind proxies
|
||||||
4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
|
4. **Enable HTTPS** - Use a reverse proxy (nginx, Cloudflare) with TLS termination
|
||||||
5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
|
5. **Review rate limits** - Adjust `RATE_LIMIT_DEFAULT` based on your needs
|
||||||
6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
|
6. **Secure master keys** - Back up `ENCRYPTION_MASTER_KEY_PATH` if using encryption
|
||||||
7. **Use `--prod` flag** - Runs with Waitress instead of Flask dev server
|
7. **Use `--prod` flag** - Runs with Granian instead of Flask dev server
|
||||||
|
8. **Set credential expiry** - Assign `expires_at` to non-admin users for time-limited access
|
||||||
|
|
||||||
### Proxy Configuration
|
### Proxy Configuration
|
||||||
|
|
||||||
@@ -285,6 +350,75 @@ If running behind a reverse proxy (e.g., Nginx, Cloudflare, or a tunnel), ensure
|
|||||||
|
|
||||||
The application automatically trusts these headers to generate correct presigned URLs (e.g., `https://s3.example.com/...` instead of `http://127.0.0.1:5000/...`). Alternatively, you can explicitly set `API_BASE_URL` to your public endpoint.
|
The application automatically trusts these headers to generate correct presigned URLs (e.g., `https://s3.example.com/...` instead of `http://127.0.0.1:5000/...`). Alternatively, you can explicitly set `API_BASE_URL` to your public endpoint.
|
||||||
|
|
||||||
|
| Variable | Default | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `NUM_TRUSTED_PROXIES` | `1` | Number of trusted reverse proxies for `X-Forwarded-*` header processing. |
|
||||||
|
| `ALLOWED_REDIRECT_HOSTS` | `""` | Comma-separated whitelist of safe redirect targets. Empty allows only same-host redirects. |
|
||||||
|
| `ALLOW_INTERNAL_ENDPOINTS` | `false` | Allow connections to internal/private IPs for webhooks and replication targets. **Keep disabled in production unless needed.** |
|
||||||
|
|
||||||
|
## Integrity Scanner
|
||||||
|
|
||||||
|
The integrity scanner detects and optionally auto-repairs data inconsistencies: corrupted objects (ETag mismatch), orphaned files without metadata, phantom metadata without files, stale version archives, ETag cache drift, and unmigrated legacy `.meta.json` files.
|
||||||
|
|
||||||
|
### Enabling Integrity Scanner
|
||||||
|
|
||||||
|
By default, the integrity scanner is disabled. Enable it by setting:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
INTEGRITY_ENABLED=true python run.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Or in your `myfsio.env` file:
|
||||||
|
```
|
||||||
|
INTEGRITY_ENABLED=true
|
||||||
|
INTEGRITY_INTERVAL_HOURS=24 # Run every 24 hours (default)
|
||||||
|
INTEGRITY_BATCH_SIZE=1000 # Max objects to scan per cycle
|
||||||
|
INTEGRITY_AUTO_HEAL=false # Automatically repair detected issues
|
||||||
|
INTEGRITY_DRY_RUN=false # Set to true to log without healing
|
||||||
|
```
|
||||||
|
|
||||||
|
### What Gets Checked
|
||||||
|
|
||||||
|
| Check | Detection | Heal Action |
|
||||||
|
|-------|-----------|-------------|
|
||||||
|
| **Corrupted objects** | File MD5 does not match stored `__etag__` | Update `__etag__` in index (disk data is authoritative) |
|
||||||
|
| **Orphaned objects** | File exists on disk without metadata entry | Create index entry with computed MD5/size/mtime |
|
||||||
|
| **Phantom metadata** | Index entry exists but file is missing from disk | Remove stale entry from `_index.json` |
|
||||||
|
| **Stale versions** | `.json` manifest without `.bin` data or vice versa | Remove orphaned version file |
|
||||||
|
| **ETag cache inconsistency** | `etag_index.json` entry differs from metadata `__etag__` | Delete `etag_index.json` (auto-rebuilt on next list) |
|
||||||
|
| **Legacy metadata drift** | Legacy `.meta.json` differs from index or is unmigrated | Migrate to index and delete legacy file |
|
||||||
|
|
||||||
|
### Admin API
|
||||||
|
|
||||||
|
All integrity endpoints require admin (`iam:*`) permissions.
|
||||||
|
|
||||||
|
| Method | Route | Description |
|
||||||
|
|--------|-------|-------------|
|
||||||
|
| `GET` | `/admin/integrity/status` | Get scanner status and configuration |
|
||||||
|
| `POST` | `/admin/integrity/run` | Trigger a manual scan (body: `{"dry_run": true, "auto_heal": true}`) |
|
||||||
|
| `GET` | `/admin/integrity/history` | Get scan history (query: `?limit=50&offset=0`) |
|
||||||
|
|
||||||
|
### Dry Run Mode
|
||||||
|
|
||||||
|
Set `INTEGRITY_DRY_RUN=true` to log detected issues without making any changes. You can also trigger a one-time dry run via the admin API:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:5000/admin/integrity/run" \
|
||||||
|
-H "X-Access-Key: <key>" -H "X-Secret-Key: <secret>" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"dry_run": true, "auto_heal": true}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration Reference
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `INTEGRITY_ENABLED` | `false` | Enable background integrity scanning |
|
||||||
|
| `INTEGRITY_INTERVAL_HOURS` | `24` | Hours between scan cycles |
|
||||||
|
| `INTEGRITY_BATCH_SIZE` | `1000` | Max objects to scan per cycle |
|
||||||
|
| `INTEGRITY_AUTO_HEAL` | `false` | Automatically repair detected issues |
|
||||||
|
| `INTEGRITY_DRY_RUN` | `false` | Log issues without healing |
|
||||||
|
|
||||||
## 4. Upgrading and Updates
|
## 4. Upgrading and Updates
|
||||||
|
|
||||||
### Version Checking
|
### Version Checking
|
||||||
@@ -619,11 +753,12 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
|
|||||||
|
|
||||||
### Getting Started
|
### Getting Started
|
||||||
|
|
||||||
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext.
|
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. You can set `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` environment variables to use custom credentials instead of random ones. If `SECRET_KEY` is configured, the IAM config file is encrypted at rest using AES (Fernet). To reset admin credentials later, run `python run.py --reset-cred`.
|
||||||
2. Sign into the UI using the generated credentials, then open **IAM**:
|
2. Sign into the UI using the generated credentials, then open **IAM**:
|
||||||
- **Create user**: supply a display name and optional JSON inline policy array.
|
- **Create user**: supply a display name, optional JSON inline policy array, and optional credential expiry date.
|
||||||
|
- **Set expiry**: assign an expiration date to any user's credentials. Expired credentials are rejected at authentication time. The UI shows expiry badges and preset durations (1h, 24h, 7d, 30d, 90d).
|
||||||
- **Rotate secret**: generates a new secret key; the UI surfaces it once.
|
- **Rotate secret**: generates a new secret key; the UI surfaces it once.
|
||||||
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. An optional `"prefix"` field restricts object-level actions to a key prefix (e.g., `"uploads/"`). Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
||||||
3. Wildcard action `iam:*` is supported for admin user definitions.
|
3. Wildcard action `iam:*` is supported for admin user definitions.
|
||||||
|
|
||||||
> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials.
|
> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials.
|
||||||
@@ -639,8 +774,11 @@ The API expects every request to include authentication headers. The UI persists
|
|||||||
|
|
||||||
**Security Features:**
|
**Security Features:**
|
||||||
- **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes).
|
- **Lockout Protection**: After `AUTH_MAX_ATTEMPTS` (default: 5) failed login attempts, the account is locked for `AUTH_LOCKOUT_MINUTES` (default: 15 minutes).
|
||||||
|
- **Credential Expiry**: Each user can have an optional `expires_at` timestamp (ISO 8601). Once expired, all API requests using those credentials are rejected. Set or clear expiry via the UI or API.
|
||||||
|
- **IAM Config Encryption**: When `SECRET_KEY` is set, the IAM config file (`iam.json`) is encrypted at rest using Fernet (AES-256-CBC with HMAC). Existing plaintext configs are automatically encrypted on next load.
|
||||||
- **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days).
|
- **Session Management**: UI sessions remain valid for `SESSION_LIFETIME_DAYS` (default: 30 days).
|
||||||
- **Hot Reload**: IAM configuration changes take effect immediately without restart.
|
- **Hot Reload**: IAM configuration changes take effect immediately without restart.
|
||||||
|
- **Credential Reset**: Run `python run.py --reset-cred` to reset admin credentials. Supports `ADMIN_ACCESS_KEY` and `ADMIN_SECRET_KEY` env vars for deterministic keys.
|
||||||
|
|
||||||
### Permission Model
|
### Permission Model
|
||||||
|
|
||||||
@@ -659,13 +797,23 @@ Both layers are evaluated for each request. A user must have permission in their
|
|||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `list` | List buckets and objects | `s3:ListBucket`, `s3:ListAllMyBuckets`, `s3:ListBucketVersions`, `s3:ListMultipartUploads`, `s3:ListParts` |
|
| `list` | List buckets and objects | `s3:ListBucket`, `s3:ListAllMyBuckets`, `s3:ListBucketVersions`, `s3:ListMultipartUploads`, `s3:ListParts` |
|
||||||
| `read` | Download objects, get metadata | `s3:GetObject`, `s3:GetObjectVersion`, `s3:GetObjectTagging`, `s3:GetObjectVersionTagging`, `s3:GetObjectAcl`, `s3:GetBucketVersioning`, `s3:HeadObject`, `s3:HeadBucket` |
|
| `read` | Download objects, get metadata | `s3:GetObject`, `s3:GetObjectVersion`, `s3:GetObjectTagging`, `s3:GetObjectVersionTagging`, `s3:GetObjectAcl`, `s3:GetBucketVersioning`, `s3:HeadObject`, `s3:HeadBucket` |
|
||||||
| `write` | Upload objects, create buckets, manage tags | `s3:PutObject`, `s3:CreateBucket`, `s3:PutObjectTagging`, `s3:PutBucketVersioning`, `s3:CreateMultipartUpload`, `s3:UploadPart`, `s3:CompleteMultipartUpload`, `s3:AbortMultipartUpload`, `s3:CopyObject` |
|
| `write` | Upload objects, manage object tags | `s3:PutObject`, `s3:PutObjectTagging`, `s3:CreateMultipartUpload`, `s3:UploadPart`, `s3:CompleteMultipartUpload`, `s3:AbortMultipartUpload`, `s3:CopyObject` |
|
||||||
| `delete` | Remove objects, versions, and buckets | `s3:DeleteObject`, `s3:DeleteObjectVersion`, `s3:DeleteBucket`, `s3:DeleteObjectTagging` |
|
| `delete` | Remove objects and versions | `s3:DeleteObject`, `s3:DeleteObjectVersion`, `s3:DeleteObjectTagging` |
|
||||||
|
| `create_bucket` | Create new buckets | `s3:CreateBucket` |
|
||||||
|
| `delete_bucket` | Delete buckets | `s3:DeleteBucket` |
|
||||||
| `share` | Manage Access Control Lists (ACLs) | `s3:PutObjectAcl`, `s3:PutBucketAcl`, `s3:GetBucketAcl` |
|
| `share` | Manage Access Control Lists (ACLs) | `s3:PutObjectAcl`, `s3:PutBucketAcl`, `s3:GetBucketAcl` |
|
||||||
| `policy` | Manage bucket policies | `s3:PutBucketPolicy`, `s3:GetBucketPolicy`, `s3:DeleteBucketPolicy` |
|
| `policy` | Manage bucket policies | `s3:PutBucketPolicy`, `s3:GetBucketPolicy`, `s3:DeleteBucketPolicy` |
|
||||||
|
| `versioning` | Manage bucket versioning configuration | `s3:GetBucketVersioning`, `s3:PutBucketVersioning` |
|
||||||
|
| `tagging` | Manage bucket-level tags | `s3:GetBucketTagging`, `s3:PutBucketTagging`, `s3:DeleteBucketTagging` |
|
||||||
|
| `encryption` | Manage bucket encryption configuration | `s3:GetEncryptionConfiguration`, `s3:PutEncryptionConfiguration`, `s3:DeleteEncryptionConfiguration` |
|
||||||
| `lifecycle` | Manage lifecycle rules | `s3:GetLifecycleConfiguration`, `s3:PutLifecycleConfiguration`, `s3:DeleteLifecycleConfiguration`, `s3:GetBucketLifecycle`, `s3:PutBucketLifecycle` |
|
| `lifecycle` | Manage lifecycle rules | `s3:GetLifecycleConfiguration`, `s3:PutLifecycleConfiguration`, `s3:DeleteLifecycleConfiguration`, `s3:GetBucketLifecycle`, `s3:PutBucketLifecycle` |
|
||||||
| `cors` | Manage CORS configuration | `s3:GetBucketCors`, `s3:PutBucketCors`, `s3:DeleteBucketCors` |
|
| `cors` | Manage CORS configuration | `s3:GetBucketCors`, `s3:PutBucketCors`, `s3:DeleteBucketCors` |
|
||||||
| `replication` | Configure and manage replication | `s3:GetReplicationConfiguration`, `s3:PutReplicationConfiguration`, `s3:DeleteReplicationConfiguration`, `s3:ReplicateObject`, `s3:ReplicateTags`, `s3:ReplicateDelete` |
|
| `replication` | Configure and manage replication | `s3:GetReplicationConfiguration`, `s3:PutReplicationConfiguration`, `s3:DeleteReplicationConfiguration`, `s3:ReplicateObject`, `s3:ReplicateTags`, `s3:ReplicateDelete` |
|
||||||
|
| `quota` | Manage bucket storage quotas | `s3:GetBucketQuota`, `s3:PutBucketQuota`, `s3:DeleteBucketQuota` |
|
||||||
|
| `object_lock` | Manage object lock, retention, and legal holds | `s3:GetObjectLockConfiguration`, `s3:PutObjectLockConfiguration`, `s3:PutObjectRetention`, `s3:GetObjectRetention`, `s3:PutObjectLegalHold`, `s3:GetObjectLegalHold` |
|
||||||
|
| `notification` | Manage bucket event notifications | `s3:GetBucketNotificationConfiguration`, `s3:PutBucketNotificationConfiguration`, `s3:DeleteBucketNotificationConfiguration` |
|
||||||
|
| `logging` | Manage bucket access logging | `s3:GetBucketLogging`, `s3:PutBucketLogging`, `s3:DeleteBucketLogging` |
|
||||||
|
| `website` | Manage static website hosting configuration | `s3:GetBucketWebsite`, `s3:PutBucketWebsite`, `s3:DeleteBucketWebsite` |
|
||||||
|
|
||||||
#### IAM Actions (User Management)
|
#### IAM Actions (User Management)
|
||||||
|
|
||||||
@@ -676,25 +824,31 @@ Both layers are evaluated for each request. A user must have permission in their
|
|||||||
| `iam:delete_user` | Delete IAM users | `iam:DeleteUser` |
|
| `iam:delete_user` | Delete IAM users | `iam:DeleteUser` |
|
||||||
| `iam:rotate_key` | Rotate user secret keys | `iam:RotateAccessKey` |
|
| `iam:rotate_key` | Rotate user secret keys | `iam:RotateAccessKey` |
|
||||||
| `iam:update_policy` | Modify user policies | `iam:PutUserPolicy` |
|
| `iam:update_policy` | Modify user policies | `iam:PutUserPolicy` |
|
||||||
|
| `iam:create_key` | Create additional access keys for a user | `iam:CreateAccessKey` |
|
||||||
|
| `iam:delete_key` | Delete an access key from a user | `iam:DeleteAccessKey` |
|
||||||
|
| `iam:get_user` | View user details and access keys | `iam:GetUser` |
|
||||||
|
| `iam:get_policy` | View user policy configuration | `iam:GetPolicy` |
|
||||||
|
| `iam:disable_user` | Temporarily disable/enable a user account | `iam:DisableUser` |
|
||||||
| `iam:*` | **Admin wildcard** – grants all IAM actions | — |
|
| `iam:*` | **Admin wildcard** – grants all IAM actions | — |
|
||||||
|
|
||||||
#### Wildcards
|
#### Wildcards
|
||||||
|
|
||||||
| Wildcard | Scope | Description |
|
| Wildcard | Scope | Description |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `*` (in actions) | All S3 actions | Grants `list`, `read`, `write`, `delete`, `share`, `policy`, `lifecycle`, `cors`, `replication` |
|
| `*` (in actions) | All S3 actions | Grants all 19 S3 actions including `list`, `read`, `write`, `delete`, `create_bucket`, `delete_bucket`, `share`, `policy`, `versioning`, `tagging`, `encryption`, `lifecycle`, `cors`, `replication`, `quota`, `object_lock`, `notification`, `logging`, `website` |
|
||||||
| `iam:*` | All IAM actions | Grants all `iam:*` actions for user management |
|
| `iam:*` | All IAM actions | Grants all `iam:*` actions for user management |
|
||||||
| `*` (in bucket) | All buckets | Policy applies to every bucket |
|
| `*` (in bucket) | All buckets | Policy applies to every bucket |
|
||||||
|
|
||||||
### IAM Policy Structure
|
### IAM Policy Structure
|
||||||
|
|
||||||
User policies are stored as a JSON array of policy objects. Each object specifies a bucket and the allowed actions:
|
User policies are stored as a JSON array of policy objects. Each object specifies a bucket, the allowed actions, and an optional prefix for object-level scoping:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"bucket": "<bucket-name-or-wildcard>",
|
"bucket": "<bucket-name-or-wildcard>",
|
||||||
"actions": ["<action1>", "<action2>", ...]
|
"actions": ["<action1>", "<action2>", ...],
|
||||||
|
"prefix": "<optional-key-prefix>"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
@@ -702,12 +856,13 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
|||||||
**Fields:**
|
**Fields:**
|
||||||
- `bucket`: The bucket name (case-insensitive) or `*` for all buckets
|
- `bucket`: The bucket name (case-insensitive) or `*` for all buckets
|
||||||
- `actions`: Array of action strings (simple names or AWS aliases)
|
- `actions`: Array of action strings (simple names or AWS aliases)
|
||||||
|
- `prefix`: *(optional)* Restrict object-level actions to keys starting with this prefix. Defaults to `*` (all objects). Example: `"uploads/"` restricts to keys under `uploads/`
|
||||||
|
|
||||||
### Example User Policies
|
### Example User Policies
|
||||||
|
|
||||||
**Full Administrator (complete system access):**
|
**Full Administrator (complete system access):**
|
||||||
```json
|
```json
|
||||||
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "share", "policy", "lifecycle", "cors", "replication", "iam:*"]}]
|
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "share", "policy", "create_bucket", "delete_bucket", "versioning", "tagging", "encryption", "lifecycle", "cors", "replication", "quota", "object_lock", "notification", "logging", "website", "iam:*"]}]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Read-Only User (browse and download only):**
|
**Read-Only User (browse and download only):**
|
||||||
@@ -720,6 +875,11 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
|||||||
[{"bucket": "user-bucket", "actions": ["list", "read", "write", "delete"]}]
|
[{"bucket": "user-bucket", "actions": ["list", "read", "write", "delete"]}]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Operator (data operations + bucket management, no config):**
|
||||||
|
```json
|
||||||
|
[{"bucket": "*", "actions": ["list", "read", "write", "delete", "create_bucket", "delete_bucket"]}]
|
||||||
|
```
|
||||||
|
|
||||||
**Multiple Bucket Access (different permissions per bucket):**
|
**Multiple Bucket Access (different permissions per bucket):**
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
@@ -729,9 +889,14 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Prefix-Scoped Access (restrict to a folder inside a shared bucket):**
|
||||||
|
```json
|
||||||
|
[{"bucket": "shared-data", "actions": ["list", "read", "write", "delete"], "prefix": "team-a/"}]
|
||||||
|
```
|
||||||
|
|
||||||
**IAM Manager (manage users but no data access):**
|
**IAM Manager (manage users but no data access):**
|
||||||
```json
|
```json
|
||||||
[{"bucket": "*", "actions": ["iam:list_users", "iam:create_user", "iam:delete_user", "iam:rotate_key", "iam:update_policy"]}]
|
[{"bucket": "*", "actions": ["iam:list_users", "iam:create_user", "iam:delete_user", "iam:rotate_key", "iam:update_policy", "iam:create_key", "iam:delete_key", "iam:get_user", "iam:get_policy", "iam:disable_user"]}]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Replication Operator (manage replication only):**
|
**Replication Operator (manage replication only):**
|
||||||
@@ -751,10 +916,10 @@ User policies are stored as a JSON array of policy objects. Each object specifie
|
|||||||
|
|
||||||
**Bucket Administrator (full bucket config, no IAM access):**
|
**Bucket Administrator (full bucket config, no IAM access):**
|
||||||
```json
|
```json
|
||||||
[{"bucket": "my-bucket", "actions": ["list", "read", "write", "delete", "policy", "lifecycle", "cors"]}]
|
[{"bucket": "my-bucket", "actions": ["list", "read", "write", "delete", "create_bucket", "delete_bucket", "share", "policy", "versioning", "tagging", "encryption", "lifecycle", "cors", "replication", "quota", "object_lock", "notification", "logging", "website"]}]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Upload-Only User (write but cannot read back):**
|
**Upload-Only User (write but cannot create/delete buckets):**
|
||||||
```json
|
```json
|
||||||
[{"bucket": "drop-box", "actions": ["write"]}]
|
[{"bucket": "drop-box", "actions": ["write"]}]
|
||||||
```
|
```
|
||||||
@@ -800,7 +965,8 @@ curl -X POST http://localhost:5000/iam/users \
|
|||||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
-d '{
|
-d '{
|
||||||
"display_name": "New User",
|
"display_name": "New User",
|
||||||
"policies": [{"bucket": "*", "actions": ["list", "read"]}]
|
"policies": [{"bucket": "*", "actions": ["list", "read"]}],
|
||||||
|
"expires_at": "2026-12-31T23:59:59Z"
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Rotate user secret (requires iam:rotate_key)
|
# Rotate user secret (requires iam:rotate_key)
|
||||||
@@ -813,9 +979,45 @@ curl -X PUT http://localhost:5000/iam/users/<access-key>/policies \
|
|||||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
-d '[{"bucket": "*", "actions": ["list", "read", "write"]}]'
|
-d '[{"bucket": "*", "actions": ["list", "read", "write"]}]'
|
||||||
|
|
||||||
|
# Update credential expiry (requires iam:update_policy)
|
||||||
|
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
|
||||||
|
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||||
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
|
-d 'expires_at=2026-12-31T23:59:59Z'
|
||||||
|
|
||||||
|
# Remove credential expiry (never expires)
|
||||||
|
curl -X POST http://localhost:5000/iam/users/<access-key>/expiry \
|
||||||
|
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||||
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
|
-d 'expires_at='
|
||||||
|
|
||||||
# Delete a user (requires iam:delete_user)
|
# Delete a user (requires iam:delete_user)
|
||||||
curl -X DELETE http://localhost:5000/iam/users/<access-key> \
|
curl -X DELETE http://localhost:5000/iam/users/<access-key> \
|
||||||
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
|
||||||
|
|
||||||
|
# Get user details (requires iam:get_user) — via Admin API
|
||||||
|
curl http://localhost:5000/admin/iam/users/<user-id-or-access-key> \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
|
|
||||||
|
# Get user policies (requires iam:get_policy) — via Admin API
|
||||||
|
curl http://localhost:5000/admin/iam/users/<user-id-or-access-key>/policies \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
|
|
||||||
|
# Create additional access key for a user (requires iam:create_key)
|
||||||
|
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/keys \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
|
|
||||||
|
# Delete an access key (requires iam:delete_key)
|
||||||
|
curl -X DELETE http://localhost:5000/admin/iam/users/<user-id>/keys/<access-key> \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
|
|
||||||
|
# Disable a user account (requires iam:disable_user)
|
||||||
|
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/disable \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
|
|
||||||
|
# Re-enable a user account (requires iam:disable_user)
|
||||||
|
curl -X POST http://localhost:5000/admin/iam/users/<user-id-or-access-key>/enable \
|
||||||
|
-H "Authorization: AWS4-HMAC-SHA256 ..."
|
||||||
```
|
```
|
||||||
|
|
||||||
### Permission Precedence
|
### Permission Precedence
|
||||||
@@ -824,8 +1026,9 @@ When a request is made, permissions are evaluated in this order:
|
|||||||
|
|
||||||
1. **Authentication** – Verify the access key and secret key are valid
|
1. **Authentication** – Verify the access key and secret key are valid
|
||||||
2. **Lockout Check** – Ensure the account is not locked due to failed attempts
|
2. **Lockout Check** – Ensure the account is not locked due to failed attempts
|
||||||
3. **IAM Policy Check** – Verify the user has the required action for the target bucket
|
3. **Expiry Check** – Reject requests if the user's credentials have expired (`expires_at`)
|
||||||
4. **Bucket Policy Check** – If a bucket policy exists, verify it allows the action
|
4. **IAM Policy Check** – Verify the user has the required action for the target bucket
|
||||||
|
5. **Bucket Policy Check** – If a bucket policy exists, verify it allows the action
|
||||||
|
|
||||||
A request is allowed only if:
|
A request is allowed only if:
|
||||||
- The IAM policy grants the action, AND
|
- The IAM policy grants the action, AND
|
||||||
@@ -912,7 +1115,7 @@ Objects with forward slashes (`/`) in their keys are displayed as a folder hiera
|
|||||||
|
|
||||||
- Select multiple objects using checkboxes
|
- Select multiple objects using checkboxes
|
||||||
- **Bulk Delete**: Delete multiple objects at once
|
- **Bulk Delete**: Delete multiple objects at once
|
||||||
- **Bulk Download**: Download selected objects as individual files
|
- **Bulk Download**: Download selected objects as a single ZIP archive (up to `BULK_DOWNLOAD_MAX_BYTES`, default 1 GiB)
|
||||||
|
|
||||||
#### Search & Filter
|
#### Search & Filter
|
||||||
|
|
||||||
@@ -985,6 +1188,7 @@ MyFSIO supports **server-side encryption at rest** to protect your data. When en
|
|||||||
|------|-------------|
|
|------|-------------|
|
||||||
| **AES-256 (SSE-S3)** | Server-managed encryption using a local master key |
|
| **AES-256 (SSE-S3)** | Server-managed encryption using a local master key |
|
||||||
| **KMS (SSE-KMS)** | Encryption using customer-managed keys via the built-in KMS |
|
| **KMS (SSE-KMS)** | Encryption using customer-managed keys via the built-in KMS |
|
||||||
|
| **SSE-C** | Server-side encryption with customer-provided keys (per-request) |
|
||||||
|
|
||||||
### Enabling Encryption
|
### Enabling Encryption
|
||||||
|
|
||||||
@@ -1083,6 +1287,44 @@ encrypted, metadata = ClientEncryptionHelper.encrypt_for_upload(plaintext, key)
|
|||||||
decrypted = ClientEncryptionHelper.decrypt_from_download(encrypted, metadata, key)
|
decrypted = ClientEncryptionHelper.decrypt_from_download(encrypted, metadata, key)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### SSE-C (Customer-Provided Keys)
|
||||||
|
|
||||||
|
With SSE-C, you provide your own 256-bit AES encryption key with each request. The server encrypts/decrypts using your key but never stores it. You must supply the same key for both upload and download.
|
||||||
|
|
||||||
|
**Required headers:**
|
||||||
|
|
||||||
|
| Header | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| `x-amz-server-side-encryption-customer-algorithm` | `AES256` |
|
||||||
|
| `x-amz-server-side-encryption-customer-key` | Base64-encoded 256-bit key |
|
||||||
|
| `x-amz-server-side-encryption-customer-key-MD5` | Base64-encoded MD5 of the key |
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate a 256-bit key
|
||||||
|
KEY=$(openssl rand -base64 32)
|
||||||
|
KEY_MD5=$(echo -n "$KEY" | base64 -d | openssl dgst -md5 -binary | base64)
|
||||||
|
|
||||||
|
# Upload with SSE-C
|
||||||
|
curl -X PUT "http://localhost:5000/my-bucket/secret.txt" \
|
||||||
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
|
-H "x-amz-server-side-encryption-customer-algorithm: AES256" \
|
||||||
|
-H "x-amz-server-side-encryption-customer-key: $KEY" \
|
||||||
|
-H "x-amz-server-side-encryption-customer-key-MD5: $KEY_MD5" \
|
||||||
|
--data-binary @secret.txt
|
||||||
|
|
||||||
|
# Download with SSE-C (same key required)
|
||||||
|
curl "http://localhost:5000/my-bucket/secret.txt" \
|
||||||
|
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
|
||||||
|
-H "x-amz-server-side-encryption-customer-algorithm: AES256" \
|
||||||
|
-H "x-amz-server-side-encryption-customer-key: $KEY" \
|
||||||
|
-H "x-amz-server-side-encryption-customer-key-MD5: $KEY_MD5"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key points:**
|
||||||
|
- SSE-C does not require `ENCRYPTION_ENABLED` or `KMS_ENABLED` — the key is provided per-request
|
||||||
|
- If you lose your key, the data is irrecoverable
|
||||||
|
- The MD5 header is optional but recommended for integrity verification
|
||||||
|
|
||||||
### Important Notes
|
### Important Notes
|
||||||
|
|
||||||
- **Existing objects are NOT encrypted** - Only new uploads after enabling encryption are encrypted
|
- **Existing objects are NOT encrypted** - Only new uploads after enabling encryption are encrypted
|
||||||
@@ -1959,6 +2201,20 @@ curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
|
|||||||
-H "x-amz-meta-newkey: newvalue"
|
-H "x-amz-meta-newkey: newvalue"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### MoveObject (UI)
|
||||||
|
|
||||||
|
Move an object to a different key or bucket. This is a UI-only convenience operation that performs a copy followed by a delete of the source. Requires `read` and `delete` on the source, and `write` on the destination.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Move via UI API
|
||||||
|
curl -X POST "http://localhost:5100/ui/buckets/my-bucket/objects/old-path/file.txt/move" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
--cookie "session=..." \
|
||||||
|
-d '{"dest_bucket": "other-bucket", "dest_key": "new-path/file.txt"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The move is atomic from the caller's perspective: if the copy succeeds but the delete fails, the object exists in both locations (no data loss).
|
||||||
|
|
||||||
### UploadPartCopy
|
### UploadPartCopy
|
||||||
|
|
||||||
Copy data from an existing object into a multipart upload part:
|
Copy data from an existing object into a multipart upload part:
|
||||||
|
|||||||
421
myfsio_core/Cargo.lock
generated
421
myfsio_core/Cargo.lock
generated
@@ -1,421 +0,0 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 4
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "aho-corasick"
|
|
||||||
version = "1.1.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "allocator-api2"
|
|
||||||
version = "0.2.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bitflags"
|
|
||||||
version = "2.11.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "block-buffer"
|
|
||||||
version = "0.10.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
|
||||||
dependencies = [
|
|
||||||
"generic-array",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cfg-if"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cpufeatures"
|
|
||||||
version = "0.2.17"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crypto-common"
|
|
||||||
version = "0.1.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
|
||||||
dependencies = [
|
|
||||||
"generic-array",
|
|
||||||
"typenum",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "digest"
|
|
||||||
version = "0.10.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
|
||||||
dependencies = [
|
|
||||||
"block-buffer",
|
|
||||||
"crypto-common",
|
|
||||||
"subtle",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "equivalent"
|
|
||||||
version = "1.0.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "foldhash"
|
|
||||||
version = "0.1.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "generic-array"
|
|
||||||
version = "0.14.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
|
||||||
dependencies = [
|
|
||||||
"typenum",
|
|
||||||
"version_check",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hashbrown"
|
|
||||||
version = "0.15.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
|
||||||
dependencies = [
|
|
||||||
"allocator-api2",
|
|
||||||
"equivalent",
|
|
||||||
"foldhash",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "heck"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hex"
|
|
||||||
version = "0.4.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hmac"
|
|
||||||
version = "0.12.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
|
||||||
dependencies = [
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libc"
|
|
||||||
version = "0.2.182"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lock_api"
|
|
||||||
version = "0.4.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
|
|
||||||
dependencies = [
|
|
||||||
"scopeguard",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lru"
|
|
||||||
version = "0.14.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
|
|
||||||
dependencies = [
|
|
||||||
"hashbrown",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "md-5"
|
|
||||||
version = "0.10.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "memchr"
|
|
||||||
version = "2.8.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "myfsio_core"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"hex",
|
|
||||||
"hmac",
|
|
||||||
"lru",
|
|
||||||
"md-5",
|
|
||||||
"parking_lot",
|
|
||||||
"pyo3",
|
|
||||||
"regex",
|
|
||||||
"sha2",
|
|
||||||
"unicode-normalization",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.21.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot"
|
|
||||||
version = "0.12.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
|
|
||||||
dependencies = [
|
|
||||||
"lock_api",
|
|
||||||
"parking_lot_core",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot_core"
|
|
||||||
version = "0.9.12"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"libc",
|
|
||||||
"redox_syscall",
|
|
||||||
"smallvec",
|
|
||||||
"windows-link",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "portable-atomic"
|
|
||||||
version = "1.13.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro2"
|
|
||||||
version = "1.0.106"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"once_cell",
|
|
||||||
"portable-atomic",
|
|
||||||
"pyo3-build-config",
|
|
||||||
"pyo3-ffi",
|
|
||||||
"pyo3-macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-build-config"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
|
|
||||||
dependencies = [
|
|
||||||
"target-lexicon",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-ffi"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"pyo3-build-config",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-macros"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"pyo3-macros-backend",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-macros-backend"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
|
|
||||||
dependencies = [
|
|
||||||
"heck",
|
|
||||||
"proc-macro2",
|
|
||||||
"pyo3-build-config",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.44"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "redox_syscall"
|
|
||||||
version = "0.5.18"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex"
|
|
||||||
version = "1.12.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
|
||||||
dependencies = [
|
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-automata",
|
|
||||||
"regex-syntax",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-automata"
|
|
||||||
version = "0.4.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
||||||
dependencies = [
|
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-syntax",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-syntax"
|
|
||||||
version = "0.8.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "scopeguard"
|
|
||||||
version = "1.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sha2"
|
|
||||||
version = "0.10.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"cpufeatures",
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "smallvec"
|
|
||||||
version = "1.15.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "subtle"
|
|
||||||
version = "2.6.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "2.0.116"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "target-lexicon"
|
|
||||||
version = "0.13.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tinyvec"
|
|
||||||
version = "1.10.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
|
|
||||||
dependencies = [
|
|
||||||
"tinyvec_macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tinyvec_macros"
|
|
||||||
version = "0.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "typenum"
|
|
||||||
version = "1.19.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.24"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-normalization"
|
|
||||||
version = "0.1.25"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
|
||||||
dependencies = [
|
|
||||||
"tinyvec",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "version_check"
|
|
||||||
version = "0.9.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-link"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
|
||||||
@@ -14,6 +14,11 @@ sha2 = "0.10"
|
|||||||
md-5 = "0.10"
|
md-5 = "0.10"
|
||||||
hex = "0.4"
|
hex = "0.4"
|
||||||
unicode-normalization = "0.1"
|
unicode-normalization = "0.1"
|
||||||
|
serde_json = "1"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
lru = "0.14"
|
lru = "0.14"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
|
percent-encoding = "2"
|
||||||
|
aes-gcm = "0.10"
|
||||||
|
hkdf = "0.12"
|
||||||
|
uuid = { version = "1", features = ["v4"] }
|
||||||
|
|||||||
192
myfsio_core/src/crypto.rs
Normal file
192
myfsio_core/src/crypto.rs
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
use aes_gcm::aead::Aead;
|
||||||
|
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||||
|
use hkdf::Hkdf;
|
||||||
|
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use sha2::Sha256;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{Read, Seek, SeekFrom, Write};
|
||||||
|
|
||||||
|
const DEFAULT_CHUNK_SIZE: usize = 65536;
|
||||||
|
const HEADER_SIZE: usize = 4;
|
||||||
|
|
||||||
|
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
let mut filled = 0;
|
||||||
|
while filled < buf.len() {
|
||||||
|
match reader.read(&mut buf[filled..]) {
|
||||||
|
Ok(0) => break,
|
||||||
|
Ok(n) => filled += n,
|
||||||
|
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(filled)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
|
||||||
|
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
|
||||||
|
let mut okm = [0u8; 12];
|
||||||
|
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
|
||||||
|
.map_err(|e| format!("HKDF expand failed: {}", e))?;
|
||||||
|
Ok(okm)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||||
|
pub fn encrypt_stream_chunked(
|
||||||
|
py: Python<'_>,
|
||||||
|
input_path: &str,
|
||||||
|
output_path: &str,
|
||||||
|
key: &[u8],
|
||||||
|
base_nonce: &[u8],
|
||||||
|
chunk_size: usize,
|
||||||
|
) -> PyResult<u32> {
|
||||||
|
if key.len() != 32 {
|
||||||
|
return Err(PyValueError::new_err(format!(
|
||||||
|
"Key must be 32 bytes, got {}",
|
||||||
|
key.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if base_nonce.len() != 12 {
|
||||||
|
return Err(PyValueError::new_err(format!(
|
||||||
|
"Base nonce must be 12 bytes, got {}",
|
||||||
|
base_nonce.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk_size = if chunk_size == 0 {
|
||||||
|
DEFAULT_CHUNK_SIZE
|
||||||
|
} else {
|
||||||
|
chunk_size
|
||||||
|
};
|
||||||
|
|
||||||
|
let inp = input_path.to_owned();
|
||||||
|
let out = output_path.to_owned();
|
||||||
|
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||||
|
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||||
|
|
||||||
|
py.detach(move || {
|
||||||
|
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||||
|
|
||||||
|
let mut infile = File::open(&inp)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||||
|
let mut outfile = File::create(&out)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||||
|
|
||||||
|
outfile
|
||||||
|
.write_all(&[0u8; 4])
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
|
||||||
|
|
||||||
|
let mut buf = vec![0u8; chunk_size];
|
||||||
|
let mut chunk_index: u32 = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let n = read_exact_chunk(&mut infile, &mut buf)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||||
|
.map_err(|e| PyValueError::new_err(e))?;
|
||||||
|
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||||
|
|
||||||
|
let encrypted = cipher
|
||||||
|
.encrypt(nonce, &buf[..n])
|
||||||
|
.map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
|
||||||
|
|
||||||
|
let size = encrypted.len() as u32;
|
||||||
|
outfile
|
||||||
|
.write_all(&size.to_be_bytes())
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
|
||||||
|
outfile
|
||||||
|
.write_all(&encrypted)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
|
||||||
|
|
||||||
|
chunk_index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
outfile
|
||||||
|
.seek(SeekFrom::Start(0))
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
|
||||||
|
outfile
|
||||||
|
.write_all(&chunk_index.to_be_bytes())
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(chunk_index)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn decrypt_stream_chunked(
|
||||||
|
py: Python<'_>,
|
||||||
|
input_path: &str,
|
||||||
|
output_path: &str,
|
||||||
|
key: &[u8],
|
||||||
|
base_nonce: &[u8],
|
||||||
|
) -> PyResult<u32> {
|
||||||
|
if key.len() != 32 {
|
||||||
|
return Err(PyValueError::new_err(format!(
|
||||||
|
"Key must be 32 bytes, got {}",
|
||||||
|
key.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if base_nonce.len() != 12 {
|
||||||
|
return Err(PyValueError::new_err(format!(
|
||||||
|
"Base nonce must be 12 bytes, got {}",
|
||||||
|
base_nonce.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let inp = input_path.to_owned();
|
||||||
|
let out = output_path.to_owned();
|
||||||
|
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||||
|
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||||
|
|
||||||
|
py.detach(move || {
|
||||||
|
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||||
|
|
||||||
|
let mut infile = File::open(&inp)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||||
|
let mut outfile = File::create(&out)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||||
|
|
||||||
|
let mut header = [0u8; HEADER_SIZE];
|
||||||
|
infile
|
||||||
|
.read_exact(&mut header)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
|
||||||
|
let chunk_count = u32::from_be_bytes(header);
|
||||||
|
|
||||||
|
let mut size_buf = [0u8; HEADER_SIZE];
|
||||||
|
for chunk_index in 0..chunk_count {
|
||||||
|
infile
|
||||||
|
.read_exact(&mut size_buf)
|
||||||
|
.map_err(|e| {
|
||||||
|
PyIOError::new_err(format!(
|
||||||
|
"Failed to read chunk {} size: {}",
|
||||||
|
chunk_index, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let chunk_size = u32::from_be_bytes(size_buf) as usize;
|
||||||
|
|
||||||
|
let mut encrypted = vec![0u8; chunk_size];
|
||||||
|
infile.read_exact(&mut encrypted).map_err(|e| {
|
||||||
|
PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||||
|
.map_err(|e| PyValueError::new_err(e))?;
|
||||||
|
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||||
|
|
||||||
|
let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
|
||||||
|
PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
outfile.write_all(&decrypted).map_err(|e| {
|
||||||
|
PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(chunk_count)
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1,5 +1,9 @@
|
|||||||
|
mod crypto;
|
||||||
mod hashing;
|
mod hashing;
|
||||||
|
mod metadata;
|
||||||
mod sigv4;
|
mod sigv4;
|
||||||
|
mod storage;
|
||||||
|
mod streaming;
|
||||||
mod validation;
|
mod validation;
|
||||||
|
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
@@ -10,6 +14,7 @@ mod myfsio_core {
|
|||||||
|
|
||||||
#[pymodule_init]
|
#[pymodule_init]
|
||||||
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::verify_sigv4_signature, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
||||||
@@ -25,6 +30,22 @@ mod myfsio_core {
|
|||||||
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(storage::write_index_entry, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::delete_index_entry, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::check_bucket_contents, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::shallow_scan, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::bucket_stats_scan, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
71
myfsio_core/src/metadata.rs
Normal file
71
myfsio_core/src/metadata.rs
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
use pyo3::exceptions::PyValueError;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::{PyDict, PyList, PyString};
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
const MAX_DEPTH: u32 = 64;
|
||||||
|
|
||||||
|
fn value_to_py(py: Python<'_>, v: &Value, depth: u32) -> PyResult<Py<PyAny>> {
|
||||||
|
if depth > MAX_DEPTH {
|
||||||
|
return Err(PyValueError::new_err("JSON nesting too deep"));
|
||||||
|
}
|
||||||
|
match v {
|
||||||
|
Value::Null => Ok(py.None()),
|
||||||
|
Value::Bool(b) => Ok((*b).into_pyobject(py)?.to_owned().into_any().unbind()),
|
||||||
|
Value::Number(n) => {
|
||||||
|
if let Some(i) = n.as_i64() {
|
||||||
|
Ok(i.into_pyobject(py)?.into_any().unbind())
|
||||||
|
} else if let Some(f) = n.as_f64() {
|
||||||
|
Ok(f.into_pyobject(py)?.into_any().unbind())
|
||||||
|
} else {
|
||||||
|
Ok(py.None())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::String(s) => Ok(PyString::new(py, s).into_any().unbind()),
|
||||||
|
Value::Array(arr) => {
|
||||||
|
let list = PyList::empty(py);
|
||||||
|
for item in arr {
|
||||||
|
list.append(value_to_py(py, item, depth + 1)?)?;
|
||||||
|
}
|
||||||
|
Ok(list.into_any().unbind())
|
||||||
|
}
|
||||||
|
Value::Object(map) => {
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
for (k, val) in map {
|
||||||
|
dict.set_item(k, value_to_py(py, val, depth + 1)?)?;
|
||||||
|
}
|
||||||
|
Ok(dict.into_any().unbind())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn read_index_entry(
|
||||||
|
py: Python<'_>,
|
||||||
|
path: &str,
|
||||||
|
entry_name: &str,
|
||||||
|
) -> PyResult<Option<Py<PyAny>>> {
|
||||||
|
let path_owned = path.to_owned();
|
||||||
|
let entry_owned = entry_name.to_owned();
|
||||||
|
|
||||||
|
let entry: Option<Value> = py.detach(move || -> PyResult<Option<Value>> {
|
||||||
|
let content = match fs::read_to_string(&path_owned) {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(None),
|
||||||
|
};
|
||||||
|
let parsed: Value = match serde_json::from_str(&content) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(_) => return Ok(None),
|
||||||
|
};
|
||||||
|
match parsed {
|
||||||
|
Value::Object(mut map) => Ok(map.remove(&entry_owned)),
|
||||||
|
_ => Ok(None),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
match entry {
|
||||||
|
Some(val) => Ok(Some(value_to_py(py, &val, 0)?)),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
use hmac::{Hmac, Mac};
|
use hmac::{Hmac, Mac};
|
||||||
use lru::LruCache;
|
use lru::LruCache;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
|
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
@@ -19,14 +20,29 @@ static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, Strin
|
|||||||
|
|
||||||
const CACHE_TTL_SECS: u64 = 60;
|
const CACHE_TTL_SECS: u64 = 60;
|
||||||
|
|
||||||
|
const AWS_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
|
||||||
|
.remove(b'-')
|
||||||
|
.remove(b'_')
|
||||||
|
.remove(b'.')
|
||||||
|
.remove(b'~');
|
||||||
|
|
||||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||||
mac.update(msg);
|
mac.update(msg);
|
||||||
mac.finalize().into_bytes().to_vec()
|
mac.finalize().into_bytes().to_vec()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
fn sha256_hex(data: &[u8]) -> String {
|
||||||
pub fn derive_signing_key(
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(data);
|
||||||
|
hex::encode(hasher.finalize())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn aws_uri_encode(input: &str) -> String {
|
||||||
|
percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn derive_signing_key_cached(
|
||||||
secret_key: &str,
|
secret_key: &str,
|
||||||
date_stamp: &str,
|
date_stamp: &str,
|
||||||
region: &str,
|
region: &str,
|
||||||
@@ -68,18 +84,91 @@ pub fn derive_signing_key(
|
|||||||
k_signing
|
k_signing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool {
|
||||||
|
if a.len() != b.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut result: u8 = 0;
|
||||||
|
for (x, y) in a.iter().zip(b.iter()) {
|
||||||
|
result |= x ^ y;
|
||||||
|
}
|
||||||
|
result == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn verify_sigv4_signature(
|
||||||
|
method: &str,
|
||||||
|
canonical_uri: &str,
|
||||||
|
query_params: Vec<(String, String)>,
|
||||||
|
signed_headers_str: &str,
|
||||||
|
header_values: Vec<(String, String)>,
|
||||||
|
payload_hash: &str,
|
||||||
|
amz_date: &str,
|
||||||
|
date_stamp: &str,
|
||||||
|
region: &str,
|
||||||
|
service: &str,
|
||||||
|
secret_key: &str,
|
||||||
|
provided_signature: &str,
|
||||||
|
) -> bool {
|
||||||
|
let mut sorted_params = query_params;
|
||||||
|
sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
|
||||||
|
|
||||||
|
let canonical_query_string = sorted_params
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| format!("{}={}", aws_uri_encode(k), aws_uri_encode(v)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("&");
|
||||||
|
|
||||||
|
let mut canonical_headers = String::new();
|
||||||
|
for (name, value) in &header_values {
|
||||||
|
let lower_name = name.to_lowercase();
|
||||||
|
let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||||
|
let final_value = if lower_name == "expect" && normalized.is_empty() {
|
||||||
|
"100-continue"
|
||||||
|
} else {
|
||||||
|
&normalized
|
||||||
|
};
|
||||||
|
canonical_headers.push_str(&lower_name);
|
||||||
|
canonical_headers.push(':');
|
||||||
|
canonical_headers.push_str(final_value);
|
||||||
|
canonical_headers.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
let canonical_request = format!(
|
||||||
|
"{}\n{}\n{}\n{}\n{}\n{}",
|
||||||
|
method, canonical_uri, canonical_query_string, canonical_headers, signed_headers_str, payload_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service);
|
||||||
|
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||||
|
let string_to_sign = format!(
|
||||||
|
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||||
|
amz_date, credential_scope, cr_hash
|
||||||
|
);
|
||||||
|
|
||||||
|
let signing_key = derive_signing_key_cached(secret_key, date_stamp, region, service);
|
||||||
|
let calculated = hmac_sha256(&signing_key, string_to_sign.as_bytes());
|
||||||
|
let calculated_hex = hex::encode(&calculated);
|
||||||
|
|
||||||
|
constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn derive_signing_key(
|
||||||
|
secret_key: &str,
|
||||||
|
date_stamp: &str,
|
||||||
|
region: &str,
|
||||||
|
service: &str,
|
||||||
|
) -> Vec<u8> {
|
||||||
|
derive_signing_key_cached(secret_key, date_stamp, region, service)
|
||||||
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||||
hex::encode(sig)
|
hex::encode(sig)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sha256_hex(data: &[u8]) -> String {
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
hasher.update(data);
|
|
||||||
hex::encode(hasher.finalize())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
pub fn build_string_to_sign(
|
pub fn build_string_to_sign(
|
||||||
amz_date: &str,
|
amz_date: &str,
|
||||||
@@ -87,19 +176,15 @@ pub fn build_string_to_sign(
|
|||||||
canonical_request: &str,
|
canonical_request: &str,
|
||||||
) -> String {
|
) -> String {
|
||||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||||
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
|
format!(
|
||||||
|
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||||
|
amz_date, credential_scope, cr_hash
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||||
if a.len() != b.len() {
|
constant_time_compare_inner(a.as_bytes(), b.as_bytes())
|
||||||
return false;
|
|
||||||
}
|
|
||||||
let mut result: u8 = 0;
|
|
||||||
for (x, y) in a.bytes().zip(b.bytes()) {
|
|
||||||
result |= x ^ y;
|
|
||||||
}
|
|
||||||
result == 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
|
|||||||
817
myfsio_core/src/storage.rs
Normal file
817
myfsio_core/src/storage.rs
Normal file
@@ -0,0 +1,817 @@
|
|||||||
|
use pyo3::exceptions::PyIOError;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::{PyDict, PyList, PyString, PyTuple};
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
|
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||||
|
|
||||||
|
fn system_time_to_epoch(t: SystemTime) -> f64 {
|
||||||
|
t.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.map(|d| d.as_secs_f64())
|
||||||
|
.unwrap_or(0.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_etag_from_meta_bytes(content: &[u8]) -> Option<String> {
|
||||||
|
let marker = b"\"__etag__\"";
|
||||||
|
let idx = content.windows(marker.len()).position(|w| w == marker)?;
|
||||||
|
let after = &content[idx + marker.len()..];
|
||||||
|
let start = after.iter().position(|&b| b == b'"')? + 1;
|
||||||
|
let rest = &after[start..];
|
||||||
|
let end = rest.iter().position(|&b| b == b'"')?;
|
||||||
|
std::str::from_utf8(&rest[..end]).ok().map(|s| s.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_any_file(root: &str) -> bool {
|
||||||
|
let root_path = Path::new(root);
|
||||||
|
if !root_path.is_dir() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut stack = vec![root_path.to_path_buf()];
|
||||||
|
while let Some(current) = stack.pop() {
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_file() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
stack.push(entry.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn write_index_entry(
|
||||||
|
py: Python<'_>,
|
||||||
|
path: &str,
|
||||||
|
entry_name: &str,
|
||||||
|
entry_data_json: &str,
|
||||||
|
) -> PyResult<()> {
|
||||||
|
let path_owned = path.to_owned();
|
||||||
|
let entry_owned = entry_name.to_owned();
|
||||||
|
let data_owned = entry_data_json.to_owned();
|
||||||
|
|
||||||
|
py.detach(move || -> PyResult<()> {
|
||||||
|
let entry_value: Value = serde_json::from_str(&data_owned)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to parse entry data: {}", e)))?;
|
||||||
|
|
||||||
|
if let Some(parent) = Path::new(&path_owned).parent() {
|
||||||
|
let _ = fs::create_dir_all(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut index_data: serde_json::Map<String, Value> = match fs::read_to_string(&path_owned)
|
||||||
|
{
|
||||||
|
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
|
||||||
|
Err(_) => serde_json::Map::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
index_data.insert(entry_owned, entry_value);
|
||||||
|
|
||||||
|
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||||
|
|
||||||
|
fs::write(&path_owned, serialized)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn delete_index_entry(py: Python<'_>, path: &str, entry_name: &str) -> PyResult<bool> {
|
||||||
|
let path_owned = path.to_owned();
|
||||||
|
let entry_owned = entry_name.to_owned();
|
||||||
|
|
||||||
|
py.detach(move || -> PyResult<bool> {
|
||||||
|
let content = match fs::read_to_string(&path_owned) {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(_) => return Ok(false),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut index_data: serde_json::Map<String, Value> =
|
||||||
|
match serde_json::from_str(&content) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(_) => return Ok(false),
|
||||||
|
};
|
||||||
|
|
||||||
|
if index_data.remove(&entry_owned).is_none() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if index_data.is_empty() {
|
||||||
|
let _ = fs::remove_file(&path_owned);
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||||
|
|
||||||
|
fs::write(&path_owned, serialized)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(false)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn check_bucket_contents(
|
||||||
|
py: Python<'_>,
|
||||||
|
bucket_path: &str,
|
||||||
|
version_roots: Vec<String>,
|
||||||
|
multipart_roots: Vec<String>,
|
||||||
|
) -> PyResult<(bool, bool, bool)> {
|
||||||
|
let bucket_owned = bucket_path.to_owned();
|
||||||
|
|
||||||
|
py.detach(move || -> PyResult<(bool, bool, bool)> {
|
||||||
|
let mut has_objects = false;
|
||||||
|
let bucket_p = Path::new(&bucket_owned);
|
||||||
|
if bucket_p.is_dir() {
|
||||||
|
let mut stack = vec![bucket_p.to_path_buf()];
|
||||||
|
'obj_scan: while let Some(current) = stack.pop() {
|
||||||
|
let is_root = current == bucket_p;
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if is_root {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if INTERNAL_FOLDERS.contains(&name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ft.is_file() && !ft.is_symlink() {
|
||||||
|
has_objects = true;
|
||||||
|
break 'obj_scan;
|
||||||
|
}
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
stack.push(entry.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut has_versions = false;
|
||||||
|
for root in &version_roots {
|
||||||
|
if has_versions {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
has_versions = has_any_file(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut has_multipart = false;
|
||||||
|
for root in &multipart_roots {
|
||||||
|
if has_multipart {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
has_multipart = has_any_file(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((has_objects, has_versions, has_multipart))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn shallow_scan(
|
||||||
|
py: Python<'_>,
|
||||||
|
target_dir: &str,
|
||||||
|
prefix: &str,
|
||||||
|
meta_cache_json: &str,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
let target_owned = target_dir.to_owned();
|
||||||
|
let prefix_owned = prefix.to_owned();
|
||||||
|
let cache_owned = meta_cache_json.to_owned();
|
||||||
|
|
||||||
|
let result: (
|
||||||
|
Vec<(String, u64, f64, Option<String>)>,
|
||||||
|
Vec<String>,
|
||||||
|
Vec<(String, bool)>,
|
||||||
|
) = py.detach(move || -> PyResult<(
|
||||||
|
Vec<(String, u64, f64, Option<String>)>,
|
||||||
|
Vec<String>,
|
||||||
|
Vec<(String, bool)>,
|
||||||
|
)> {
|
||||||
|
let meta_cache: HashMap<String, String> =
|
||||||
|
serde_json::from_str(&cache_owned).unwrap_or_default();
|
||||||
|
|
||||||
|
let mut files: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||||
|
let mut dirs: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
let entries = match fs::read_dir(&target_owned) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => return Ok((files, dirs, Vec::new())),
|
||||||
|
};
|
||||||
|
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let name = match entry.file_name().into_string() {
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if INTERNAL_FOLDERS.contains(&name.as_str()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
let cp = format!("{}{}/", prefix_owned, name);
|
||||||
|
dirs.push(cp);
|
||||||
|
} else if ft.is_file() && !ft.is_symlink() {
|
||||||
|
let key = format!("{}{}", prefix_owned, name);
|
||||||
|
let md = match entry.metadata() {
|
||||||
|
Ok(m) => m,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let size = md.len();
|
||||||
|
let mtime = md
|
||||||
|
.modified()
|
||||||
|
.map(system_time_to_epoch)
|
||||||
|
.unwrap_or(0.0);
|
||||||
|
let etag = meta_cache.get(&key).cloned();
|
||||||
|
files.push((key, size, mtime, etag));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
dirs.sort();
|
||||||
|
|
||||||
|
let mut merged: Vec<(String, bool)> = Vec::with_capacity(files.len() + dirs.len());
|
||||||
|
let mut fi = 0;
|
||||||
|
let mut di = 0;
|
||||||
|
while fi < files.len() && di < dirs.len() {
|
||||||
|
if files[fi].0 <= dirs[di] {
|
||||||
|
merged.push((files[fi].0.clone(), false));
|
||||||
|
fi += 1;
|
||||||
|
} else {
|
||||||
|
merged.push((dirs[di].clone(), true));
|
||||||
|
di += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while fi < files.len() {
|
||||||
|
merged.push((files[fi].0.clone(), false));
|
||||||
|
fi += 1;
|
||||||
|
}
|
||||||
|
while di < dirs.len() {
|
||||||
|
merged.push((dirs[di].clone(), true));
|
||||||
|
di += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((files, dirs, merged))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let (files, dirs, merged) = result;
|
||||||
|
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
|
||||||
|
let files_list = PyList::empty(py);
|
||||||
|
for (key, size, mtime, etag) in &files {
|
||||||
|
let etag_py: Py<PyAny> = match etag {
|
||||||
|
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||||
|
None => py.None(),
|
||||||
|
};
|
||||||
|
let tuple = PyTuple::new(py, &[
|
||||||
|
PyString::new(py, key).into_any().unbind(),
|
||||||
|
size.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
etag_py,
|
||||||
|
])?;
|
||||||
|
files_list.append(tuple)?;
|
||||||
|
}
|
||||||
|
dict.set_item("files", files_list)?;
|
||||||
|
|
||||||
|
let dirs_list = PyList::empty(py);
|
||||||
|
for d in &dirs {
|
||||||
|
dirs_list.append(PyString::new(py, d))?;
|
||||||
|
}
|
||||||
|
dict.set_item("dirs", dirs_list)?;
|
||||||
|
|
||||||
|
let merged_list = PyList::empty(py);
|
||||||
|
for (key, is_dir) in &merged {
|
||||||
|
let bool_obj: Py<PyAny> = if *is_dir {
|
||||||
|
true.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||||
|
} else {
|
||||||
|
false.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||||
|
};
|
||||||
|
let tuple = PyTuple::new(py, &[
|
||||||
|
PyString::new(py, key).into_any().unbind(),
|
||||||
|
bool_obj,
|
||||||
|
])?;
|
||||||
|
merged_list.append(tuple)?;
|
||||||
|
}
|
||||||
|
dict.set_item("merged_keys", merged_list)?;
|
||||||
|
|
||||||
|
Ok(dict.into_any().unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn bucket_stats_scan(
|
||||||
|
py: Python<'_>,
|
||||||
|
bucket_path: &str,
|
||||||
|
versions_root: &str,
|
||||||
|
) -> PyResult<(u64, u64, u64, u64)> {
|
||||||
|
let bucket_owned = bucket_path.to_owned();
|
||||||
|
let versions_owned = versions_root.to_owned();
|
||||||
|
|
||||||
|
py.detach(move || -> PyResult<(u64, u64, u64, u64)> {
|
||||||
|
let mut object_count: u64 = 0;
|
||||||
|
let mut total_bytes: u64 = 0;
|
||||||
|
|
||||||
|
let bucket_p = Path::new(&bucket_owned);
|
||||||
|
if bucket_p.is_dir() {
|
||||||
|
let mut stack = vec![bucket_p.to_path_buf()];
|
||||||
|
while let Some(current) = stack.pop() {
|
||||||
|
let is_root = current == bucket_p;
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if is_root {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if INTERNAL_FOLDERS.contains(&name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
stack.push(entry.path());
|
||||||
|
} else if ft.is_file() && !ft.is_symlink() {
|
||||||
|
object_count += 1;
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
total_bytes += md.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut version_count: u64 = 0;
|
||||||
|
let mut version_bytes: u64 = 0;
|
||||||
|
|
||||||
|
let versions_p = Path::new(&versions_owned);
|
||||||
|
if versions_p.is_dir() {
|
||||||
|
let mut stack = vec![versions_p.to_path_buf()];
|
||||||
|
while let Some(current) = stack.pop() {
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
stack.push(entry.path());
|
||||||
|
} else if ft.is_file() && !ft.is_symlink() {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if name.ends_with(".bin") {
|
||||||
|
version_count += 1;
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
version_bytes += md.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((object_count, total_bytes, version_count, version_bytes))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (bucket_path, search_root, query, limit))]
|
||||||
|
pub fn search_objects_scan(
|
||||||
|
py: Python<'_>,
|
||||||
|
bucket_path: &str,
|
||||||
|
search_root: &str,
|
||||||
|
query: &str,
|
||||||
|
limit: usize,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
let bucket_owned = bucket_path.to_owned();
|
||||||
|
let search_owned = search_root.to_owned();
|
||||||
|
let query_owned = query.to_owned();
|
||||||
|
|
||||||
|
let result: (Vec<(String, u64, f64)>, bool) = py.detach(
|
||||||
|
move || -> PyResult<(Vec<(String, u64, f64)>, bool)> {
|
||||||
|
let query_lower = query_owned.to_lowercase();
|
||||||
|
let bucket_len = bucket_owned.len() + 1;
|
||||||
|
let scan_limit = limit * 4;
|
||||||
|
let mut matched: usize = 0;
|
||||||
|
let mut results: Vec<(String, u64, f64)> = Vec::new();
|
||||||
|
|
||||||
|
let search_p = Path::new(&search_owned);
|
||||||
|
if !search_p.is_dir() {
|
||||||
|
return Ok((results, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
let bucket_p = Path::new(&bucket_owned);
|
||||||
|
let mut stack = vec![search_p.to_path_buf()];
|
||||||
|
|
||||||
|
'scan: while let Some(current) = stack.pop() {
|
||||||
|
let is_bucket_root = current == bucket_p;
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if is_bucket_root {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if INTERNAL_FOLDERS.contains(&name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
stack.push(entry.path());
|
||||||
|
} else if ft.is_file() && !ft.is_symlink() {
|
||||||
|
let full_path = entry.path();
|
||||||
|
let full_str = full_path.to_string_lossy();
|
||||||
|
if full_str.len() <= bucket_len {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let key = full_str[bucket_len..].replace('\\', "/");
|
||||||
|
if key.to_lowercase().contains(&query_lower) {
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
let size = md.len();
|
||||||
|
let mtime = md
|
||||||
|
.modified()
|
||||||
|
.map(system_time_to_epoch)
|
||||||
|
.unwrap_or(0.0);
|
||||||
|
results.push((key, size, mtime));
|
||||||
|
matched += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matched >= scan_limit {
|
||||||
|
break 'scan;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
let truncated = results.len() > limit;
|
||||||
|
results.truncate(limit);
|
||||||
|
|
||||||
|
Ok((results, truncated))
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let (results, truncated) = result;
|
||||||
|
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
|
||||||
|
let results_list = PyList::empty(py);
|
||||||
|
for (key, size, mtime) in &results {
|
||||||
|
let tuple = PyTuple::new(py, &[
|
||||||
|
PyString::new(py, key).into_any().unbind(),
|
||||||
|
size.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
])?;
|
||||||
|
results_list.append(tuple)?;
|
||||||
|
}
|
||||||
|
dict.set_item("results", results_list)?;
|
||||||
|
dict.set_item("truncated", truncated)?;
|
||||||
|
|
||||||
|
Ok(dict.into_any().unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn build_object_cache(
|
||||||
|
py: Python<'_>,
|
||||||
|
bucket_path: &str,
|
||||||
|
meta_root: &str,
|
||||||
|
etag_index_path: &str,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
let bucket_owned = bucket_path.to_owned();
|
||||||
|
let meta_owned = meta_root.to_owned();
|
||||||
|
let index_path_owned = etag_index_path.to_owned();
|
||||||
|
|
||||||
|
let result: (HashMap<String, String>, Vec<(String, u64, f64, Option<String>)>, bool) =
|
||||||
|
py.detach(move || -> PyResult<(
|
||||||
|
HashMap<String, String>,
|
||||||
|
Vec<(String, u64, f64, Option<String>)>,
|
||||||
|
bool,
|
||||||
|
)> {
|
||||||
|
let mut meta_cache: HashMap<String, String> = HashMap::new();
|
||||||
|
let mut index_mtime: f64 = 0.0;
|
||||||
|
let mut etag_cache_changed = false;
|
||||||
|
|
||||||
|
let index_p = Path::new(&index_path_owned);
|
||||||
|
if index_p.is_file() {
|
||||||
|
if let Ok(md) = fs::metadata(&index_path_owned) {
|
||||||
|
index_mtime = md
|
||||||
|
.modified()
|
||||||
|
.map(system_time_to_epoch)
|
||||||
|
.unwrap_or(0.0);
|
||||||
|
}
|
||||||
|
if let Ok(content) = fs::read_to_string(&index_path_owned) {
|
||||||
|
if let Ok(parsed) = serde_json::from_str::<HashMap<String, String>>(&content) {
|
||||||
|
meta_cache = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let meta_p = Path::new(&meta_owned);
|
||||||
|
let mut needs_rebuild = false;
|
||||||
|
|
||||||
|
if meta_p.is_dir() && index_mtime > 0.0 {
|
||||||
|
fn check_newer(dir: &Path, index_mtime: f64) -> bool {
|
||||||
|
let entries = match fs::read_dir(dir) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => return false,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
if check_newer(&entry.path(), index_mtime) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if ft.is_file() {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if name.ends_with(".meta.json") || name == "_index.json" {
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
let mt = md
|
||||||
|
.modified()
|
||||||
|
.map(system_time_to_epoch)
|
||||||
|
.unwrap_or(0.0);
|
||||||
|
if mt > index_mtime {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
needs_rebuild = check_newer(meta_p, index_mtime);
|
||||||
|
} else if meta_cache.is_empty() {
|
||||||
|
needs_rebuild = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if needs_rebuild && meta_p.is_dir() {
|
||||||
|
let meta_str = meta_owned.clone();
|
||||||
|
let meta_len = meta_str.len() + 1;
|
||||||
|
let mut index_files: Vec<String> = Vec::new();
|
||||||
|
let mut legacy_meta_files: Vec<(String, String)> = Vec::new();
|
||||||
|
|
||||||
|
fn collect_meta(
|
||||||
|
dir: &Path,
|
||||||
|
meta_len: usize,
|
||||||
|
index_files: &mut Vec<String>,
|
||||||
|
legacy_meta_files: &mut Vec<(String, String)>,
|
||||||
|
) {
|
||||||
|
let entries = match fs::read_dir(dir) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => return,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
collect_meta(&entry.path(), meta_len, index_files, legacy_meta_files);
|
||||||
|
} else if ft.is_file() {
|
||||||
|
if let Some(name) = entry.file_name().to_str() {
|
||||||
|
let full = entry.path().to_string_lossy().to_string();
|
||||||
|
if name == "_index.json" {
|
||||||
|
index_files.push(full);
|
||||||
|
} else if name.ends_with(".meta.json") {
|
||||||
|
if full.len() > meta_len {
|
||||||
|
let rel = &full[meta_len..];
|
||||||
|
let key = if rel.len() > 10 {
|
||||||
|
rel[..rel.len() - 10].replace('\\', "/")
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
legacy_meta_files.push((key, full));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
collect_meta(
|
||||||
|
meta_p,
|
||||||
|
meta_len,
|
||||||
|
&mut index_files,
|
||||||
|
&mut legacy_meta_files,
|
||||||
|
);
|
||||||
|
|
||||||
|
meta_cache.clear();
|
||||||
|
|
||||||
|
for idx_path in &index_files {
|
||||||
|
if let Ok(content) = fs::read_to_string(idx_path) {
|
||||||
|
if let Ok(idx_data) = serde_json::from_str::<HashMap<String, Value>>(&content) {
|
||||||
|
let rel_dir = if idx_path.len() > meta_len {
|
||||||
|
let r = &idx_path[meta_len..];
|
||||||
|
r.replace('\\', "/")
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
let dir_prefix = if rel_dir.ends_with("/_index.json") {
|
||||||
|
&rel_dir[..rel_dir.len() - "/_index.json".len()]
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
};
|
||||||
|
for (entry_name, entry_data) in &idx_data {
|
||||||
|
let key = if dir_prefix.is_empty() {
|
||||||
|
entry_name.clone()
|
||||||
|
} else {
|
||||||
|
format!("{}/{}", dir_prefix, entry_name)
|
||||||
|
};
|
||||||
|
if let Some(meta_obj) = entry_data.get("metadata") {
|
||||||
|
if let Some(etag) = meta_obj.get("__etag__") {
|
||||||
|
if let Some(etag_str) = etag.as_str() {
|
||||||
|
meta_cache.insert(key, etag_str.to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (key, path) in &legacy_meta_files {
|
||||||
|
if meta_cache.contains_key(key) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Ok(content) = fs::read(path) {
|
||||||
|
if let Some(etag) = extract_etag_from_meta_bytes(&content) {
|
||||||
|
meta_cache.insert(key.clone(), etag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
etag_cache_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let bucket_p = Path::new(&bucket_owned);
|
||||||
|
let bucket_len = bucket_owned.len() + 1;
|
||||||
|
let mut objects: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||||
|
|
||||||
|
if bucket_p.is_dir() {
|
||||||
|
let mut stack = vec![bucket_p.to_path_buf()];
|
||||||
|
while let Some(current) = stack.pop() {
|
||||||
|
let entries = match fs::read_dir(¤t) {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
for entry_result in entries {
|
||||||
|
let entry = match entry_result {
|
||||||
|
Ok(e) => e,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let ft = match entry.file_type() {
|
||||||
|
Ok(ft) => ft,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if ft.is_dir() && !ft.is_symlink() {
|
||||||
|
let full = entry.path();
|
||||||
|
let full_str = full.to_string_lossy();
|
||||||
|
if full_str.len() > bucket_len {
|
||||||
|
let first_part: &str = if let Some(sep_pos) =
|
||||||
|
full_str[bucket_len..].find(|c: char| c == '\\' || c == '/')
|
||||||
|
{
|
||||||
|
&full_str[bucket_len..bucket_len + sep_pos]
|
||||||
|
} else {
|
||||||
|
&full_str[bucket_len..]
|
||||||
|
};
|
||||||
|
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if let Some(name) = entry.file_name().to_str() {
|
||||||
|
if INTERNAL_FOLDERS.contains(&name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stack.push(full);
|
||||||
|
} else if ft.is_file() && !ft.is_symlink() {
|
||||||
|
let full = entry.path();
|
||||||
|
let full_str = full.to_string_lossy();
|
||||||
|
if full_str.len() <= bucket_len {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let rel = &full_str[bucket_len..];
|
||||||
|
let first_part: &str =
|
||||||
|
if let Some(sep_pos) = rel.find(|c: char| c == '\\' || c == '/') {
|
||||||
|
&rel[..sep_pos]
|
||||||
|
} else {
|
||||||
|
rel
|
||||||
|
};
|
||||||
|
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let key = rel.replace('\\', "/");
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
let size = md.len();
|
||||||
|
let mtime = md
|
||||||
|
.modified()
|
||||||
|
.map(system_time_to_epoch)
|
||||||
|
.unwrap_or(0.0);
|
||||||
|
let etag = meta_cache.get(&key).cloned();
|
||||||
|
objects.push((key, size, mtime, etag));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((meta_cache, objects, etag_cache_changed))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let (meta_cache, objects, etag_cache_changed) = result;
|
||||||
|
|
||||||
|
let dict = PyDict::new(py);
|
||||||
|
|
||||||
|
let cache_dict = PyDict::new(py);
|
||||||
|
for (k, v) in &meta_cache {
|
||||||
|
cache_dict.set_item(k, v)?;
|
||||||
|
}
|
||||||
|
dict.set_item("etag_cache", cache_dict)?;
|
||||||
|
|
||||||
|
let objects_list = PyList::empty(py);
|
||||||
|
for (key, size, mtime, etag) in &objects {
|
||||||
|
let etag_py: Py<PyAny> = match etag {
|
||||||
|
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||||
|
None => py.None(),
|
||||||
|
};
|
||||||
|
let tuple = PyTuple::new(py, &[
|
||||||
|
PyString::new(py, key).into_any().unbind(),
|
||||||
|
size.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||||
|
etag_py,
|
||||||
|
])?;
|
||||||
|
objects_list.append(tuple)?;
|
||||||
|
}
|
||||||
|
dict.set_item("objects", objects_list)?;
|
||||||
|
dict.set_item("etag_cache_changed", etag_cache_changed)?;
|
||||||
|
|
||||||
|
Ok(dict.into_any().unbind())
|
||||||
|
}
|
||||||
112
myfsio_core/src/streaming.rs
Normal file
112
myfsio_core/src/streaming.rs
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
use md5::{Digest, Md5};
|
||||||
|
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use std::fs::{self, File};
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
const DEFAULT_CHUNK_SIZE: usize = 262144;
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (stream, tmp_dir, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||||
|
pub fn stream_to_file_with_md5(
|
||||||
|
py: Python<'_>,
|
||||||
|
stream: &Bound<'_, PyAny>,
|
||||||
|
tmp_dir: &str,
|
||||||
|
chunk_size: usize,
|
||||||
|
) -> PyResult<(String, String, u64)> {
|
||||||
|
let chunk_size = if chunk_size == 0 {
|
||||||
|
DEFAULT_CHUNK_SIZE
|
||||||
|
} else {
|
||||||
|
chunk_size
|
||||||
|
};
|
||||||
|
|
||||||
|
fs::create_dir_all(tmp_dir)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create tmp dir: {}", e)))?;
|
||||||
|
|
||||||
|
let tmp_name = format!("{}.tmp", Uuid::new_v4().as_hyphenated());
|
||||||
|
let tmp_path_buf = std::path::PathBuf::from(tmp_dir).join(&tmp_name);
|
||||||
|
let tmp_path = tmp_path_buf.to_string_lossy().into_owned();
|
||||||
|
|
||||||
|
let mut file = File::create(&tmp_path)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create temp file: {}", e)))?;
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
let mut total_bytes: u64 = 0;
|
||||||
|
|
||||||
|
let result: PyResult<()> = (|| {
|
||||||
|
loop {
|
||||||
|
let chunk: Vec<u8> = stream.call_method1("read", (chunk_size,))?.extract()?;
|
||||||
|
if chunk.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hasher.update(&chunk);
|
||||||
|
file.write_all(&chunk)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||||
|
total_bytes += chunk.len() as u64;
|
||||||
|
|
||||||
|
py.check_signals()?;
|
||||||
|
}
|
||||||
|
file.sync_all()
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||||
|
Ok(())
|
||||||
|
})();
|
||||||
|
|
||||||
|
if let Err(e) = result {
|
||||||
|
drop(file);
|
||||||
|
let _ = fs::remove_file(&tmp_path);
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(file);
|
||||||
|
|
||||||
|
let md5_hex = format!("{:x}", hasher.finalize());
|
||||||
|
Ok((tmp_path, md5_hex, total_bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn assemble_parts_with_md5(
|
||||||
|
py: Python<'_>,
|
||||||
|
part_paths: Vec<String>,
|
||||||
|
dest_path: &str,
|
||||||
|
) -> PyResult<String> {
|
||||||
|
if part_paths.is_empty() {
|
||||||
|
return Err(PyValueError::new_err("No parts to assemble"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let dest = dest_path.to_owned();
|
||||||
|
let parts = part_paths;
|
||||||
|
|
||||||
|
py.detach(move || {
|
||||||
|
if let Some(parent) = std::path::Path::new(&dest).parent() {
|
||||||
|
fs::create_dir_all(parent)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create dest dir: {}", e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut target = File::create(&dest)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to create dest file: {}", e)))?;
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
let mut buf = vec![0u8; 1024 * 1024];
|
||||||
|
|
||||||
|
for part_path in &parts {
|
||||||
|
let mut part = File::open(part_path)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open part {}: {}", part_path, e)))?;
|
||||||
|
loop {
|
||||||
|
let n = part
|
||||||
|
.read(&mut buf)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read part: {}", e)))?;
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hasher.update(&buf[..n]);
|
||||||
|
target
|
||||||
|
.write_all(&buf[..n])
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
target.sync_all()
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(format!("{:x}", hasher.finalize()))
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
Signature: 8a477f597d28d172789f06886806bc55
|
|
||||||
# This file is a cache directory tag created by cargo.
|
|
||||||
# For information about cache directory tags see https://bford.info/cachedir/
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
801af22cf202da8e
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
435555ec2fb592e3
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
d28af275d001c358
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
1fbf4ba9542edced
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
37923e6f5f9687ab
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
603e28136cf5763c
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
896672d759b5299c
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
914a617b9f05c9d8
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
3b95cf48bbd7dc53
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
b7ba5182ce570398
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
f0a5af4d8a8c7106
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
5f316276809d5ca0
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
e417d28fc1909ceb
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
aec88a641c5288e3
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
ddc0b590ff80762b
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":17886154901722686619,"profile":1369601567987815722,"path":8608102977929876445,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\heck-b47c94fd2a7e00cb\\dep-lib-heck","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
41890ebff4143fa5
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"default\", \"std\"]","declared_features":"[\"alloc\", \"default\", \"serde\", \"std\"]","target":4242469766639956503,"profile":2040997289075261528,"path":6793865871540733919,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hex-253414d2260adcdf\\dep-lib-hex","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
3f45b8d062d94ba4
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"reset\", \"std\"]","target":12991177224612424488,"profile":2040997289075261528,"path":17893893568771568113,"deps":[[17475753849556516473,"digest",false,15621022965039188625]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hmac-3297e61b9effb758\\dep-lib-hmac","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user