Separate Python and Rust into python/ and rust/ with per-stack Dockerfiles
This commit is contained in:
763
python/app/__init__.py
Normal file
763
python/app/__init__.py
Normal file
@@ -0,0 +1,763 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import html as html_module
|
||||
import itertools
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
|
||||
from flask_cors import CORS
|
||||
from flask_wtf.csrf import CSRFError
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
|
||||
import io
|
||||
|
||||
from .access_logging import AccessLoggingService
|
||||
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
||||
from .compression import GzipMiddleware
|
||||
from .acl import AclService
|
||||
from .bucket_policies import BucketPolicyStore
|
||||
from .config import AppConfig
|
||||
from .connections import ConnectionStore
|
||||
from .encryption import EncryptionManager
|
||||
from .extensions import limiter, csrf
|
||||
from .iam import IamService
|
||||
from .kms import KMSManager
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .lifecycle import LifecycleManager
|
||||
from .notifications import NotificationService
|
||||
from .object_lock import ObjectLockService
|
||||
from .replication import ReplicationManager
|
||||
from .secret_store import EphemeralSecretStore
|
||||
from .site_registry import SiteRegistry, SiteInfo
|
||||
from .storage import ObjectStorage, StorageError
|
||||
from .version import get_version
|
||||
from .website_domains import WebsiteDomainStore
|
||||
|
||||
_request_counter = itertools.count(1)
|
||||
|
||||
|
||||
class _ChunkedTransferMiddleware:
|
||||
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
|
||||
return self.app(environ, start_response)
|
||||
|
||||
transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
|
||||
if "chunked" in transfer_encoding.lower():
|
||||
if content_length:
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
else:
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
if not content_length or content_length == "0":
|
||||
sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
|
||||
decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
|
||||
content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
|
||||
if ("STREAMING" in sha256.upper() or decoded_len
|
||||
or "aws-chunked" in content_encoding.lower()):
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw and hasattr(raw, "seek"):
|
||||
try:
|
||||
raw.seek(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self.app(environ, start_response)
|
||||
|
||||
|
||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||
"""Migrate config file from legacy locations to the active path.
|
||||
|
||||
Checks each legacy path in order and moves the first one found to the active path.
|
||||
This ensures backward compatibility for users upgrading from older versions.
|
||||
"""
|
||||
active_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if active_path.exists():
|
||||
return active_path
|
||||
|
||||
for legacy_path in legacy_paths:
|
||||
if legacy_path.exists():
|
||||
try:
|
||||
shutil.move(str(legacy_path), str(active_path))
|
||||
except OSError:
|
||||
shutil.copy2(legacy_path, active_path)
|
||||
try:
|
||||
legacy_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
break
|
||||
|
||||
return active_path
|
||||
|
||||
|
||||
def create_app(
|
||||
test_config: Optional[Dict[str, Any]] = None,
|
||||
*,
|
||||
include_api: bool = True,
|
||||
include_ui: bool = True,
|
||||
) -> Flask:
|
||||
"""Create and configure the Flask application."""
|
||||
config = AppConfig.from_env(test_config)
|
||||
|
||||
if getattr(sys, "frozen", False):
|
||||
project_root = Path(sys._MEIPASS)
|
||||
else:
|
||||
project_root = Path(__file__).resolve().parent.parent
|
||||
|
||||
app = Flask(
|
||||
__name__,
|
||||
static_folder=str(project_root / "static"),
|
||||
template_folder=str(project_root / "templates"),
|
||||
)
|
||||
app.config.update(config.to_flask_config())
|
||||
if test_config:
|
||||
app.config.update(test_config)
|
||||
app.config.setdefault("APP_VERSION", get_version())
|
||||
app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30)))
|
||||
if app.config.get("TESTING"):
|
||||
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
||||
|
||||
# Trust X-Forwarded-* headers from proxies
|
||||
num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1)
|
||||
if num_proxies:
|
||||
if "NUM_TRUSTED_PROXIES" not in os.environ:
|
||||
logging.getLogger(__name__).warning(
|
||||
"NUM_TRUSTED_PROXIES not set, defaulting to 1. "
|
||||
"Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy."
|
||||
)
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
|
||||
|
||||
if app.config.get("ENABLE_GZIP", True):
|
||||
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
|
||||
|
||||
app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
|
||||
|
||||
_configure_cors(app)
|
||||
_configure_logging(app)
|
||||
|
||||
limiter.init_app(app)
|
||||
csrf.init_app(app)
|
||||
|
||||
storage = ObjectStorage(
|
||||
Path(app.config["STORAGE_ROOT"]),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
|
||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
||||
meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048),
|
||||
)
|
||||
|
||||
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
|
||||
storage.warm_cache_async()
|
||||
|
||||
iam = IamService(
|
||||
Path(app.config["IAM_CONFIG"]),
|
||||
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
||||
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
||||
encryption_key=app.config.get("SECRET_KEY"),
|
||||
)
|
||||
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
||||
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
||||
|
||||
storage_root = Path(app.config["STORAGE_ROOT"])
|
||||
config_dir = storage_root / ".myfsio.sys" / "config"
|
||||
config_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
connections_path = _migrate_config_file(
|
||||
active_path=config_dir / "connections.json",
|
||||
legacy_paths=[
|
||||
storage_root / ".myfsio.sys" / "connections.json",
|
||||
storage_root / ".connections.json",
|
||||
],
|
||||
)
|
||||
replication_rules_path = _migrate_config_file(
|
||||
active_path=config_dir / "replication_rules.json",
|
||||
legacy_paths=[
|
||||
storage_root / ".myfsio.sys" / "replication_rules.json",
|
||||
storage_root / ".replication_rules.json",
|
||||
],
|
||||
)
|
||||
|
||||
connections = ConnectionStore(connections_path)
|
||||
replication = ReplicationManager(
|
||||
storage,
|
||||
connections,
|
||||
replication_rules_path,
|
||||
storage_root,
|
||||
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
|
||||
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
|
||||
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
|
||||
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
|
||||
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
|
||||
)
|
||||
|
||||
site_registry_path = config_dir / "site_registry.json"
|
||||
site_registry = SiteRegistry(site_registry_path)
|
||||
if app.config.get("SITE_ID") and not site_registry.get_local_site():
|
||||
site_registry.set_local_site(SiteInfo(
|
||||
site_id=app.config["SITE_ID"],
|
||||
endpoint=app.config.get("SITE_ENDPOINT") or "",
|
||||
region=app.config.get("SITE_REGION", "us-east-1"),
|
||||
priority=app.config.get("SITE_PRIORITY", 100),
|
||||
))
|
||||
|
||||
encryption_config = {
|
||||
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
|
||||
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
|
||||
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
|
||||
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
|
||||
}
|
||||
encryption_manager = EncryptionManager(encryption_config)
|
||||
|
||||
kms_manager = None
|
||||
if app.config.get("KMS_ENABLED", False):
|
||||
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
|
||||
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
|
||||
kms_manager = KMSManager(
|
||||
kms_keys_path,
|
||||
kms_master_key_path,
|
||||
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
|
||||
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
|
||||
)
|
||||
encryption_manager.set_kms_provider(kms_manager)
|
||||
|
||||
if app.config.get("ENCRYPTION_ENABLED", False):
|
||||
from .encrypted_storage import EncryptedObjectStorage
|
||||
storage = EncryptedObjectStorage(storage, encryption_manager)
|
||||
|
||||
acl_service = AclService(storage_root)
|
||||
object_lock_service = ObjectLockService(storage_root)
|
||||
notification_service = NotificationService(
|
||||
storage_root,
|
||||
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
|
||||
)
|
||||
access_logging_service = AccessLoggingService(storage_root)
|
||||
access_logging_service.set_storage(storage)
|
||||
|
||||
lifecycle_manager = None
|
||||
if app.config.get("LIFECYCLE_ENABLED", False):
|
||||
base_storage = storage.storage if hasattr(storage, 'storage') else storage
|
||||
lifecycle_manager = LifecycleManager(
|
||||
base_storage,
|
||||
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
|
||||
storage_root=storage_root,
|
||||
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
|
||||
)
|
||||
lifecycle_manager.start()
|
||||
|
||||
gc_collector = None
|
||||
if app.config.get("GC_ENABLED", False):
|
||||
gc_collector = GarbageCollector(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
|
||||
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
|
||||
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
|
||||
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
|
||||
dry_run=app.config.get("GC_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
gc_collector.start()
|
||||
|
||||
integrity_checker = None
|
||||
if app.config.get("INTEGRITY_ENABLED", False):
|
||||
integrity_checker = IntegrityChecker(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
|
||||
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
|
||||
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
|
||||
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
integrity_checker.start()
|
||||
|
||||
app.extensions["object_storage"] = storage
|
||||
app.extensions["iam"] = iam
|
||||
app.extensions["bucket_policies"] = bucket_policies
|
||||
app.extensions["secret_store"] = secret_store
|
||||
app.extensions["limiter"] = limiter
|
||||
app.extensions["connections"] = connections
|
||||
app.extensions["replication"] = replication
|
||||
app.extensions["encryption"] = encryption_manager
|
||||
app.extensions["kms"] = kms_manager
|
||||
app.extensions["acl"] = acl_service
|
||||
app.extensions["lifecycle"] = lifecycle_manager
|
||||
app.extensions["gc"] = gc_collector
|
||||
app.extensions["integrity"] = integrity_checker
|
||||
app.extensions["object_lock"] = object_lock_service
|
||||
app.extensions["notifications"] = notification_service
|
||||
app.extensions["access_logging"] = access_logging_service
|
||||
app.extensions["site_registry"] = site_registry
|
||||
|
||||
website_domains_store = None
|
||||
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
website_domains_path = config_dir / "website_domains.json"
|
||||
website_domains_store = WebsiteDomainStore(website_domains_path)
|
||||
app.extensions["website_domains"] = website_domains_store
|
||||
|
||||
from .s3_client import S3ProxyClient
|
||||
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
|
||||
app.extensions["s3_proxy"] = S3ProxyClient(
|
||||
api_base_url=api_base,
|
||||
region=app.config.get("AWS_REGION", "us-east-1"),
|
||||
)
|
||||
|
||||
operation_metrics_collector = None
|
||||
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
||||
operation_metrics_collector = OperationMetricsCollector(
|
||||
storage_root,
|
||||
interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
|
||||
retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
|
||||
)
|
||||
app.extensions["operation_metrics"] = operation_metrics_collector
|
||||
|
||||
system_metrics_collector = None
|
||||
if app.config.get("METRICS_HISTORY_ENABLED", False):
|
||||
from .system_metrics import SystemMetricsCollector
|
||||
system_metrics_collector = SystemMetricsCollector(
|
||||
storage_root,
|
||||
interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
|
||||
retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
|
||||
)
|
||||
system_metrics_collector.set_storage(storage)
|
||||
app.extensions["system_metrics"] = system_metrics_collector
|
||||
|
||||
site_sync_worker = None
|
||||
if app.config.get("SITE_SYNC_ENABLED", False):
|
||||
from .site_sync import SiteSyncWorker
|
||||
site_sync_worker = SiteSyncWorker(
|
||||
storage=storage,
|
||||
connections=connections,
|
||||
replication_manager=replication,
|
||||
storage_root=storage_root,
|
||||
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
|
||||
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
|
||||
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
|
||||
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
|
||||
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
|
||||
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
|
||||
)
|
||||
site_sync_worker.start()
|
||||
app.extensions["site_sync"] = site_sync_worker
|
||||
|
||||
@app.errorhandler(500)
|
||||
def internal_error(error):
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||
return render_template('500.html'), 500
|
||||
error_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Error>'
|
||||
'<Code>InternalError</Code>'
|
||||
'<Message>An internal server error occurred</Message>'
|
||||
f'<Resource>{path}</Resource>'
|
||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||
'</Error>'
|
||||
)
|
||||
return error_xml, 500, {'Content-Type': 'application/xml'}
|
||||
|
||||
@app.errorhandler(CSRFError)
|
||||
def handle_csrf_error(e):
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||
return render_template('csrf_error.html', reason=e.description), 400
|
||||
error_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Error>'
|
||||
'<Code>CSRFError</Code>'
|
||||
f'<Message>{e.description}</Message>'
|
||||
f'<Resource>{path}</Resource>'
|
||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||
'</Error>'
|
||||
)
|
||||
return error_xml, 400, {'Content-Type': 'application/xml'}
|
||||
|
||||
@app.template_filter("filesizeformat")
|
||||
def filesizeformat(value: int) -> str:
|
||||
"""Format bytes as human-readable file size."""
|
||||
for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
|
||||
if abs(value) < 1024.0 or unit == "PB":
|
||||
if unit == "B":
|
||||
return f"{int(value)} {unit}"
|
||||
return f"{value:.1f} {unit}"
|
||||
value /= 1024.0
|
||||
return f"{value:.1f} PB"
|
||||
|
||||
@app.template_filter("timestamp_to_datetime")
|
||||
def timestamp_to_datetime(value: float) -> str:
|
||||
"""Format Unix timestamp as human-readable datetime in configured timezone."""
|
||||
from datetime import datetime, timezone as dt_timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
if not value:
|
||||
return "Never"
|
||||
try:
|
||||
dt_utc = datetime.fromtimestamp(value, dt_timezone.utc)
|
||||
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
|
||||
if display_tz and display_tz != "UTC":
|
||||
try:
|
||||
tz = ZoneInfo(display_tz)
|
||||
dt_local = dt_utc.astimezone(tz)
|
||||
return dt_local.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
return dt_utc.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
except (ValueError, OSError):
|
||||
return "Unknown"
|
||||
|
||||
@app.template_filter("format_datetime")
|
||||
def format_datetime_filter(dt, include_tz: bool = True) -> str:
|
||||
"""Format datetime object as human-readable string in configured timezone."""
|
||||
from datetime import datetime, timezone as dt_timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
if not dt:
|
||||
return ""
|
||||
try:
|
||||
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
|
||||
if display_tz and display_tz != "UTC":
|
||||
try:
|
||||
tz = ZoneInfo(display_tz)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=dt_timezone.utc)
|
||||
dt = dt.astimezone(tz)
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
tz_abbr = dt.strftime("%Z") or "UTC"
|
||||
if include_tz:
|
||||
return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
|
||||
return dt.strftime("%b %d, %Y %H:%M")
|
||||
except (ValueError, AttributeError):
|
||||
return str(dt)
|
||||
|
||||
if include_api:
|
||||
from .s3_api import s3_api_bp
|
||||
from .kms_api import kms_api_bp
|
||||
from .admin_api import admin_api_bp
|
||||
|
||||
app.register_blueprint(s3_api_bp)
|
||||
app.register_blueprint(kms_api_bp)
|
||||
app.register_blueprint(admin_api_bp)
|
||||
csrf.exempt(s3_api_bp)
|
||||
csrf.exempt(kms_api_bp)
|
||||
csrf.exempt(admin_api_bp)
|
||||
|
||||
if include_ui:
|
||||
from .ui import ui_bp
|
||||
|
||||
app.register_blueprint(ui_bp)
|
||||
if not include_api:
|
||||
@app.get("/")
|
||||
def ui_root_redirect():
|
||||
return redirect(url_for("ui.buckets_overview"))
|
||||
|
||||
@app.errorhandler(404)
|
||||
def handle_not_found(error):
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html:
|
||||
if not include_api or path.startswith("/ui") or path == "/":
|
||||
return render_template("404.html"), 404
|
||||
return error
|
||||
|
||||
@app.get("/myfsio/health")
|
||||
def healthcheck() -> Dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
|
||||
return create_app(test_config, include_api=True, include_ui=False)
|
||||
|
||||
|
||||
def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
|
||||
return create_app(test_config, include_api=False, include_ui=True)
|
||||
|
||||
|
||||
def _configure_cors(app: Flask) -> None:
|
||||
origins = app.config.get("CORS_ORIGINS", ["*"])
|
||||
methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"])
|
||||
allow_headers = app.config.get("CORS_ALLOW_HEADERS", ["*"])
|
||||
expose_headers = app.config.get("CORS_EXPOSE_HEADERS", ["*"])
|
||||
CORS(
|
||||
app,
|
||||
resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers, "expose_headers": expose_headers}},
|
||||
supports_credentials=True,
|
||||
)
|
||||
|
||||
|
||||
class _RequestContextFilter(logging.Filter):
|
||||
"""Inject request-specific attributes into log records."""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if has_request_context():
|
||||
record.request_id = getattr(g, "request_id", "-")
|
||||
record.path = request.path
|
||||
record.method = request.method
|
||||
record.remote_addr = request.remote_addr or "-"
|
||||
else:
|
||||
record.request_id = getattr(record, "request_id", "-")
|
||||
record.path = getattr(record, "path", "-")
|
||||
record.method = getattr(record, "method", "-")
|
||||
record.remote_addr = getattr(record, "remote_addr", "-")
|
||||
return True
|
||||
|
||||
|
||||
def _configure_logging(app: Flask) -> None:
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
|
||||
)
|
||||
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
stream_handler.setFormatter(formatter)
|
||||
stream_handler.addFilter(_RequestContextFilter())
|
||||
|
||||
logger = app.logger
|
||||
for handler in logger.handlers[:]:
|
||||
handler.close()
|
||||
logger.handlers.clear()
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
if app.config.get("LOG_TO_FILE"):
|
||||
log_file = Path(app.config["LOG_FILE"])
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_handler = RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)),
|
||||
backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)),
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(formatter)
|
||||
file_handler.addFilter(_RequestContextFilter())
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO))
|
||||
|
||||
@app.before_request
|
||||
def _log_request_start() -> None:
|
||||
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
|
||||
g.request_started_at = time.perf_counter()
|
||||
g.request_bytes_in = request.content_length or 0
|
||||
|
||||
@app.before_request
|
||||
def _maybe_serve_website():
|
||||
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
|
||||
return None
|
||||
if request.method not in {"GET", "HEAD"}:
|
||||
return None
|
||||
host = request.host
|
||||
if ":" in host:
|
||||
host = host.rsplit(":", 1)[0]
|
||||
host = host.lower()
|
||||
store = app.extensions.get("website_domains")
|
||||
if not store:
|
||||
return None
|
||||
bucket = store.get_bucket(host)
|
||||
if not bucket:
|
||||
return None
|
||||
storage = app.extensions["object_storage"]
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _website_error_response(404, "Not Found")
|
||||
website_config = storage.get_bucket_website(bucket)
|
||||
if not website_config:
|
||||
return _website_error_response(404, "Not Found")
|
||||
index_doc = website_config.get("index_document", "index.html")
|
||||
error_doc = website_config.get("error_document")
|
||||
req_path = request.path.lstrip("/")
|
||||
if not req_path or req_path.endswith("/"):
|
||||
object_key = req_path + index_doc
|
||||
else:
|
||||
object_key = req_path
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, object_key)
|
||||
except (StorageError, OSError):
|
||||
if object_key == req_path:
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
|
||||
object_key = req_path + "/" + index_doc
|
||||
except (StorageError, OSError):
|
||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||
else:
|
||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||
is_encrypted = False
|
||||
try:
|
||||
metadata = storage.get_object_metadata(bucket, object_key)
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
except (StorageError, OSError):
|
||||
pass
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, object_key)
|
||||
file_size = len(data)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
else:
|
||||
data = None
|
||||
try:
|
||||
stat = obj_path.stat()
|
||||
file_size = stat.st_size
|
||||
except OSError:
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
if request.method == "HEAD":
|
||||
response = Response(status=200)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Content-Type"] = content_type
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
from .s3_api import _parse_range_header
|
||||
range_header = request.headers.get("Range")
|
||||
if range_header:
|
||||
ranges = _parse_range_header(range_header, file_size)
|
||||
if ranges is None:
|
||||
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
|
||||
start, end = ranges[0]
|
||||
length = end - start + 1
|
||||
if data is not None:
|
||||
partial_data = data[start:end + 1]
|
||||
response = Response(partial_data, status=206, mimetype=content_type)
|
||||
else:
|
||||
def _stream_range(file_path, start_pos, length_to_read):
|
||||
with file_path.open("rb") as f:
|
||||
f.seek(start_pos)
|
||||
remaining = length_to_read
|
||||
while remaining > 0:
|
||||
chunk = f.read(min(262144, remaining))
|
||||
if not chunk:
|
||||
break
|
||||
remaining -= len(chunk)
|
||||
yield chunk
|
||||
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
||||
response.headers["Content-Length"] = length
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
if data is not None:
|
||||
response = Response(data, mimetype=content_type)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
def _stream(file_path):
|
||||
with file_path.open("rb") as f:
|
||||
while True:
|
||||
chunk = f.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
|
||||
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
||||
if not error_doc_key:
|
||||
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, error_doc_key)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
|
||||
is_encrypted = False
|
||||
try:
|
||||
metadata = storage.get_object_metadata(bucket, error_doc_key)
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
except (StorageError, OSError):
|
||||
pass
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, error_doc_key)
|
||||
response = Response(data, status=status_code, mimetype=content_type)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
return response
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
try:
|
||||
data = obj_path.read_bytes()
|
||||
response = Response(data, status=status_code, mimetype=content_type)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
return response
|
||||
except OSError:
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
|
||||
def _website_error_response(status_code, message):
|
||||
safe_msg = html_module.escape(str(message))
|
||||
safe_code = html_module.escape(str(status_code))
|
||||
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
|
||||
return Response(body, status=status_code, mimetype="text/html")
|
||||
|
||||
@app.after_request
|
||||
def _log_request_end(response):
|
||||
duration_ms = 0.0
|
||||
if hasattr(g, "request_started_at"):
|
||||
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
||||
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
|
||||
response.headers.setdefault("X-Request-ID", request_id)
|
||||
if app.logger.isEnabledFor(logging.INFO):
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||
response.headers["Server"] = "MyFSIO"
|
||||
|
||||
operation_metrics = app.extensions.get("operation_metrics")
|
||||
if operation_metrics:
|
||||
bytes_in = getattr(g, "request_bytes_in", 0)
|
||||
bytes_out = response.content_length or 0
|
||||
error_code = getattr(g, "s3_error_code", None)
|
||||
endpoint_type = classify_endpoint(request.path)
|
||||
operation_metrics.record_request(
|
||||
method=request.method,
|
||||
endpoint_type=endpoint_type,
|
||||
status_code=response.status_code,
|
||||
latency_ms=duration_ms,
|
||||
bytes_in=bytes_in,
|
||||
bytes_out=bytes_out,
|
||||
error_code=error_code,
|
||||
)
|
||||
|
||||
return response
|
||||
265
python/app/access_logging.py
Normal file
265
python/app/access_logging.py
Normal file
@@ -0,0 +1,265 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessLogEntry:
|
||||
bucket_owner: str = "-"
|
||||
bucket: str = "-"
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
remote_ip: str = "-"
|
||||
requester: str = "-"
|
||||
request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16].upper())
|
||||
operation: str = "-"
|
||||
key: str = "-"
|
||||
request_uri: str = "-"
|
||||
http_status: int = 200
|
||||
error_code: str = "-"
|
||||
bytes_sent: int = 0
|
||||
object_size: int = 0
|
||||
total_time_ms: int = 0
|
||||
turn_around_time_ms: int = 0
|
||||
referrer: str = "-"
|
||||
user_agent: str = "-"
|
||||
version_id: str = "-"
|
||||
host_id: str = "-"
|
||||
signature_version: str = "SigV4"
|
||||
cipher_suite: str = "-"
|
||||
authentication_type: str = "AuthHeader"
|
||||
host_header: str = "-"
|
||||
tls_version: str = "-"
|
||||
|
||||
def to_log_line(self) -> str:
|
||||
time_str = self.timestamp.strftime("[%d/%b/%Y:%H:%M:%S %z]")
|
||||
return (
|
||||
f'{self.bucket_owner} {self.bucket} {time_str} {self.remote_ip} '
|
||||
f'{self.requester} {self.request_id} {self.operation} {self.key} '
|
||||
f'"{self.request_uri}" {self.http_status} {self.error_code or "-"} '
|
||||
f'{self.bytes_sent or "-"} {self.object_size or "-"} {self.total_time_ms or "-"} '
|
||||
f'{self.turn_around_time_ms or "-"} "{self.referrer}" "{self.user_agent}" {self.version_id}'
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"bucket_owner": self.bucket_owner,
|
||||
"bucket": self.bucket,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"remote_ip": self.remote_ip,
|
||||
"requester": self.requester,
|
||||
"request_id": self.request_id,
|
||||
"operation": self.operation,
|
||||
"key": self.key,
|
||||
"request_uri": self.request_uri,
|
||||
"http_status": self.http_status,
|
||||
"error_code": self.error_code,
|
||||
"bytes_sent": self.bytes_sent,
|
||||
"object_size": self.object_size,
|
||||
"total_time_ms": self.total_time_ms,
|
||||
"referrer": self.referrer,
|
||||
"user_agent": self.user_agent,
|
||||
"version_id": self.version_id,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoggingConfiguration:
|
||||
target_bucket: str
|
||||
target_prefix: str = ""
|
||||
enabled: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"LoggingEnabled": {
|
||||
"TargetBucket": self.target_bucket,
|
||||
"TargetPrefix": self.target_prefix,
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> Optional["LoggingConfiguration"]:
|
||||
logging_enabled = data.get("LoggingEnabled")
|
||||
if not logging_enabled:
|
||||
return None
|
||||
return cls(
|
||||
target_bucket=logging_enabled.get("TargetBucket", ""),
|
||||
target_prefix=logging_enabled.get("TargetPrefix", ""),
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
|
||||
class AccessLoggingService:
|
||||
def __init__(self, storage_root: Path, flush_interval: int = 60, max_buffer_size: int = 1000):
|
||||
self.storage_root = storage_root
|
||||
self.flush_interval = flush_interval
|
||||
self.max_buffer_size = max_buffer_size
|
||||
self._configs: Dict[str, LoggingConfiguration] = {}
|
||||
self._buffer: Dict[str, List[AccessLogEntry]] = {}
|
||||
self._buffer_lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._storage = None
|
||||
|
||||
self._flush_thread = threading.Thread(target=self._flush_loop, name="access-log-flush", daemon=True)
|
||||
self._flush_thread.start()
|
||||
|
||||
def set_storage(self, storage: Any) -> None:
|
||||
self._storage = storage
|
||||
|
||||
def _config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "logging.json"
|
||||
|
||||
def get_bucket_logging(self, bucket_name: str) -> Optional[LoggingConfiguration]:
|
||||
if bucket_name in self._configs:
|
||||
return self._configs[bucket_name]
|
||||
|
||||
config_path = self._config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
config = LoggingConfiguration.from_dict(data)
|
||||
if config:
|
||||
self._configs[bucket_name] = config
|
||||
return config
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"Failed to load logging config for {bucket_name}: {e}")
|
||||
return None
|
||||
|
||||
def set_bucket_logging(self, bucket_name: str, config: LoggingConfiguration) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config.to_dict(), indent=2), encoding="utf-8")
|
||||
self._configs[bucket_name] = config
|
||||
|
||||
def delete_bucket_logging(self, bucket_name: str) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
try:
|
||||
if config_path.exists():
|
||||
config_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
self._configs.pop(bucket_name, None)
|
||||
|
||||
def log_request(
|
||||
self,
|
||||
bucket_name: str,
|
||||
*,
|
||||
operation: str,
|
||||
key: str = "-",
|
||||
remote_ip: str = "-",
|
||||
requester: str = "-",
|
||||
request_uri: str = "-",
|
||||
http_status: int = 200,
|
||||
error_code: str = "",
|
||||
bytes_sent: int = 0,
|
||||
object_size: int = 0,
|
||||
total_time_ms: int = 0,
|
||||
referrer: str = "-",
|
||||
user_agent: str = "-",
|
||||
version_id: str = "-",
|
||||
request_id: str = "",
|
||||
) -> None:
|
||||
config = self.get_bucket_logging(bucket_name)
|
||||
if not config or not config.enabled:
|
||||
return
|
||||
|
||||
entry = AccessLogEntry(
|
||||
bucket_owner="local-owner",
|
||||
bucket=bucket_name,
|
||||
remote_ip=remote_ip,
|
||||
requester=requester,
|
||||
request_id=request_id or uuid.uuid4().hex[:16].upper(),
|
||||
operation=operation,
|
||||
key=key,
|
||||
request_uri=request_uri,
|
||||
http_status=http_status,
|
||||
error_code=error_code,
|
||||
bytes_sent=bytes_sent,
|
||||
object_size=object_size,
|
||||
total_time_ms=total_time_ms,
|
||||
referrer=referrer,
|
||||
user_agent=user_agent,
|
||||
version_id=version_id,
|
||||
)
|
||||
|
||||
target_key = f"{config.target_bucket}:{config.target_prefix}"
|
||||
should_flush = False
|
||||
with self._buffer_lock:
|
||||
if target_key not in self._buffer:
|
||||
self._buffer[target_key] = []
|
||||
self._buffer[target_key].append(entry)
|
||||
should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
|
||||
|
||||
if should_flush:
|
||||
self._flush_buffer(target_key)
|
||||
|
||||
def _flush_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.flush_interval)
|
||||
if not self._shutdown.is_set():
|
||||
self._flush_all()
|
||||
|
||||
def _flush_all(self) -> None:
|
||||
with self._buffer_lock:
|
||||
targets = list(self._buffer.keys())
|
||||
|
||||
for target_key in targets:
|
||||
self._flush_buffer(target_key)
|
||||
|
||||
def _flush_buffer(self, target_key: str) -> None:
|
||||
with self._buffer_lock:
|
||||
entries = self._buffer.pop(target_key, [])
|
||||
|
||||
if not entries or not self._storage:
|
||||
return
|
||||
|
||||
try:
|
||||
bucket_name, prefix = target_key.split(":", 1)
|
||||
except ValueError:
|
||||
logger.error(f"Invalid target key: {target_key}")
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
log_key = f"{prefix}{now.strftime('%Y-%m-%d-%H-%M-%S')}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
log_content = "\n".join(entry.to_log_line() for entry in entries) + "\n"
|
||||
|
||||
try:
|
||||
stream = io.BytesIO(log_content.encode("utf-8"))
|
||||
self._storage.put_object(bucket_name, log_key, stream, enforce_quota=False)
|
||||
logger.info(f"Flushed {len(entries)} access log entries to {bucket_name}/{log_key}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write access log to {bucket_name}/{log_key}: {e}")
|
||||
with self._buffer_lock:
|
||||
if target_key not in self._buffer:
|
||||
self._buffer[target_key] = []
|
||||
self._buffer[target_key] = entries + self._buffer[target_key]
|
||||
|
||||
def flush(self) -> None:
|
||||
self._flush_all()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._flush_all()
|
||||
self._flush_thread.join(timeout=5.0)
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
with self._buffer_lock:
|
||||
buffered = sum(len(entries) for entries in self._buffer.values())
|
||||
return {
|
||||
"buffered_entries": buffered,
|
||||
"target_buckets": len(self._buffer),
|
||||
}
|
||||
204
python/app/acl.py
Normal file
204
python/app/acl.py
Normal file
@@ -0,0 +1,204 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
|
||||
ACL_PERMISSION_FULL_CONTROL = "FULL_CONTROL"
|
||||
ACL_PERMISSION_WRITE = "WRITE"
|
||||
ACL_PERMISSION_WRITE_ACP = "WRITE_ACP"
|
||||
ACL_PERMISSION_READ = "READ"
|
||||
ACL_PERMISSION_READ_ACP = "READ_ACP"
|
||||
|
||||
ALL_PERMISSIONS = {
|
||||
ACL_PERMISSION_FULL_CONTROL,
|
||||
ACL_PERMISSION_WRITE,
|
||||
ACL_PERMISSION_WRITE_ACP,
|
||||
ACL_PERMISSION_READ,
|
||||
ACL_PERMISSION_READ_ACP,
|
||||
}
|
||||
|
||||
PERMISSION_TO_ACTIONS = {
|
||||
ACL_PERMISSION_FULL_CONTROL: {"read", "write", "delete", "list", "share"},
|
||||
ACL_PERMISSION_WRITE: {"write", "delete"},
|
||||
ACL_PERMISSION_WRITE_ACP: {"share"},
|
||||
ACL_PERMISSION_READ: {"read", "list"},
|
||||
ACL_PERMISSION_READ_ACP: {"share"},
|
||||
}
|
||||
|
||||
GRANTEE_ALL_USERS = "*"
|
||||
GRANTEE_AUTHENTICATED_USERS = "authenticated"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AclGrant:
|
||||
grantee: str
|
||||
permission: str
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {"grantee": self.grantee, "permission": self.permission}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, str]) -> "AclGrant":
|
||||
return cls(grantee=data["grantee"], permission=data["permission"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class Acl:
|
||||
owner: str
|
||||
grants: List[AclGrant] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"owner": self.owner,
|
||||
"grants": [g.to_dict() for g in self.grants],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "Acl":
|
||||
return cls(
|
||||
owner=data.get("owner", ""),
|
||||
grants=[AclGrant.from_dict(g) for g in data.get("grants", [])],
|
||||
)
|
||||
|
||||
def get_allowed_actions(self, principal_id: Optional[str], is_authenticated: bool = True) -> Set[str]:
|
||||
actions: Set[str] = set()
|
||||
if principal_id and principal_id == self.owner:
|
||||
actions.update(PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL])
|
||||
for grant in self.grants:
|
||||
if grant.grantee == GRANTEE_ALL_USERS:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
elif grant.grantee == GRANTEE_AUTHENTICATED_USERS and is_authenticated:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
elif principal_id and grant.grantee == principal_id:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
return actions
|
||||
|
||||
|
||||
CANNED_ACLS = {
|
||||
"private": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL)],
|
||||
),
|
||||
"public-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
|
||||
],
|
||||
),
|
||||
"public-read-write": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_WRITE),
|
||||
],
|
||||
),
|
||||
"authenticated-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_READ),
|
||||
],
|
||||
),
|
||||
"bucket-owner-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
],
|
||||
),
|
||||
"bucket-owner-full-control": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
],
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def create_canned_acl(canned_acl: str, owner: str) -> Acl:
|
||||
factory = CANNED_ACLS.get(canned_acl)
|
||||
if not factory:
|
||||
return CANNED_ACLS["private"](owner)
|
||||
return factory(owner)
|
||||
|
||||
|
||||
class AclService:
|
||||
def __init__(self, storage_root: Path):
|
||||
self.storage_root = storage_root
|
||||
self._bucket_acl_cache: Dict[str, Acl] = {}
|
||||
|
||||
def _bucket_acl_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".acl.json"
|
||||
|
||||
def get_bucket_acl(self, bucket_name: str) -> Optional[Acl]:
|
||||
if bucket_name in self._bucket_acl_cache:
|
||||
return self._bucket_acl_cache[bucket_name]
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
if not acl_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(acl_path.read_text(encoding="utf-8"))
|
||||
acl = Acl.from_dict(data)
|
||||
self._bucket_acl_cache[bucket_name] = acl
|
||||
return acl
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
def set_bucket_acl(self, bucket_name: str, acl: Acl) -> None:
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
acl_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
acl_path.write_text(json.dumps(acl.to_dict(), indent=2), encoding="utf-8")
|
||||
self._bucket_acl_cache[bucket_name] = acl
|
||||
|
||||
def set_bucket_canned_acl(self, bucket_name: str, canned_acl: str, owner: str) -> Acl:
|
||||
acl = create_canned_acl(canned_acl, owner)
|
||||
self.set_bucket_acl(bucket_name, acl)
|
||||
return acl
|
||||
|
||||
def delete_bucket_acl(self, bucket_name: str) -> None:
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
if acl_path.exists():
|
||||
acl_path.unlink()
|
||||
self._bucket_acl_cache.pop(bucket_name, None)
|
||||
|
||||
def evaluate_bucket_acl(
|
||||
self,
|
||||
bucket_name: str,
|
||||
principal_id: Optional[str],
|
||||
action: str,
|
||||
is_authenticated: bool = True,
|
||||
) -> bool:
|
||||
acl = self.get_bucket_acl(bucket_name)
|
||||
if not acl:
|
||||
return False
|
||||
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
|
||||
return action in allowed_actions
|
||||
|
||||
def get_object_acl(self, bucket_name: str, object_key: str, object_metadata: Dict[str, Any]) -> Optional[Acl]:
|
||||
acl_data = object_metadata.get("__acl__")
|
||||
if not acl_data:
|
||||
return None
|
||||
try:
|
||||
return Acl.from_dict(acl_data)
|
||||
except (TypeError, KeyError):
|
||||
return None
|
||||
|
||||
def create_object_acl_metadata(self, acl: Acl) -> Dict[str, Any]:
|
||||
return {"__acl__": acl.to_dict()}
|
||||
|
||||
def evaluate_object_acl(
|
||||
self,
|
||||
object_metadata: Dict[str, Any],
|
||||
principal_id: Optional[str],
|
||||
action: str,
|
||||
is_authenticated: bool = True,
|
||||
) -> bool:
|
||||
acl = self.get_object_acl("", "", object_metadata)
|
||||
if not acl:
|
||||
return False
|
||||
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
|
||||
return action in allowed_actions
|
||||
984
python/app/admin_api.py
Normal file
984
python/app/admin_api.py
Normal file
@@ -0,0 +1,984 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Blueprint, Response, current_app, jsonify, request
|
||||
|
||||
from .connections import ConnectionStore
|
||||
from .extensions import limiter
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .iam import IamError, Principal
|
||||
from .replication import ReplicationManager
|
||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
||||
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
"""Check if a URL is safe to make requests to (not internal/private).
|
||||
|
||||
Args:
|
||||
url: The URL to check.
|
||||
allow_internal: If True, allows internal/private IP addresses.
|
||||
Use for self-hosted deployments on internal networks.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False
|
||||
cloud_metadata_hosts = {
|
||||
"metadata.google.internal",
|
||||
"169.254.169.254",
|
||||
}
|
||||
if hostname.lower() in cloud_metadata_hosts:
|
||||
return False
|
||||
if allow_internal:
|
||||
return True
|
||||
blocked_hosts = {
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"0.0.0.0",
|
||||
"::1",
|
||||
"[::1]",
|
||||
}
|
||||
if hostname.lower() in blocked_hosts:
|
||||
return False
|
||||
try:
|
||||
resolved_ip = socket.gethostbyname(hostname)
|
||||
ip = ipaddress.ip_address(resolved_ip)
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
return False
|
||||
except (socket.gaierror, ValueError):
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _validate_endpoint(endpoint: str) -> Optional[str]:
|
||||
"""Validate endpoint URL format. Returns error message or None."""
|
||||
try:
|
||||
parsed = urlparse(endpoint)
|
||||
if not parsed.scheme or parsed.scheme not in ("http", "https"):
|
||||
return "Endpoint must be http or https URL"
|
||||
if not parsed.netloc:
|
||||
return "Endpoint must have a host"
|
||||
return None
|
||||
except Exception:
|
||||
return "Invalid endpoint URL"
|
||||
|
||||
|
||||
def _validate_priority(priority: Any) -> Optional[str]:
|
||||
"""Validate priority value. Returns error message or None."""
|
||||
try:
|
||||
p = int(priority)
|
||||
if p < 0 or p > 1000:
|
||||
return "Priority must be between 0 and 1000"
|
||||
return None
|
||||
except (TypeError, ValueError):
|
||||
return "Priority must be an integer"
|
||||
|
||||
|
||||
def _validate_region(region: str) -> Optional[str]:
|
||||
"""Validate region format. Returns error message or None."""
|
||||
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
|
||||
return "Region must match format like us-east-1"
|
||||
return None
|
||||
|
||||
|
||||
def _validate_site_id(site_id: str) -> Optional[str]:
|
||||
"""Validate site_id format. Returns error message or None."""
|
||||
if not site_id or len(site_id) > 63:
|
||||
return "site_id must be 1-63 characters"
|
||||
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
|
||||
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
|
||||
return None
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
|
||||
|
||||
|
||||
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
||||
from .s3_api import _require_principal as s3_require_principal
|
||||
return s3_require_principal()
|
||||
|
||||
|
||||
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return None, error
|
||||
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
return principal, None
|
||||
except IamError:
|
||||
return None, _json_error("AccessDenied", "Admin access required", 403)
|
||||
|
||||
|
||||
def _site_registry() -> SiteRegistry:
|
||||
return current_app.extensions["site_registry"]
|
||||
|
||||
|
||||
def _connections() -> ConnectionStore:
|
||||
return current_app.extensions["connections"]
|
||||
|
||||
|
||||
def _replication() -> ReplicationManager:
|
||||
return current_app.extensions["replication"]
|
||||
|
||||
|
||||
def _iam():
|
||||
return current_app.extensions["iam"]
|
||||
|
||||
|
||||
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
|
||||
return {"error": {"code": code, "message": message}}, status
|
||||
|
||||
|
||||
def _get_admin_rate_limit() -> str:
|
||||
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
|
||||
|
||||
|
||||
@admin_api_bp.route("/site", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_local_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local_site = registry.get_local_site()
|
||||
|
||||
if local_site:
|
||||
return jsonify(local_site.to_dict())
|
||||
|
||||
config_site_id = current_app.config.get("SITE_ID")
|
||||
config_endpoint = current_app.config.get("SITE_ENDPOINT")
|
||||
|
||||
if config_site_id:
|
||||
return jsonify({
|
||||
"site_id": config_site_id,
|
||||
"endpoint": config_endpoint or "",
|
||||
"region": current_app.config.get("SITE_REGION", "us-east-1"),
|
||||
"priority": current_app.config.get("SITE_PRIORITY", 100),
|
||||
"display_name": config_site_id,
|
||||
"source": "environment",
|
||||
})
|
||||
|
||||
return _json_error("NotFound", "Local site not configured", 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/site", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_local_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
site_id = payload.get("site_id")
|
||||
endpoint = payload.get("endpoint")
|
||||
|
||||
if not site_id:
|
||||
return _json_error("ValidationError", "site_id is required", 400)
|
||||
|
||||
site_id_error = _validate_site_id(site_id)
|
||||
if site_id_error:
|
||||
return _json_error("ValidationError", site_id_error, 400)
|
||||
|
||||
if endpoint:
|
||||
endpoint_error = _validate_endpoint(endpoint)
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
if "priority" in payload:
|
||||
priority_error = _validate_priority(payload["priority"])
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
if "region" in payload:
|
||||
region_error = _validate_region(payload["region"])
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
registry = _site_registry()
|
||||
existing = registry.get_local_site()
|
||||
|
||||
site = SiteInfo(
|
||||
site_id=site_id,
|
||||
endpoint=endpoint or "",
|
||||
region=payload.get("region", "us-east-1"),
|
||||
priority=payload.get("priority", 100),
|
||||
display_name=payload.get("display_name", site_id),
|
||||
created_at=existing.created_at if existing else None,
|
||||
)
|
||||
|
||||
registry.set_local_site(site)
|
||||
|
||||
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(site.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def list_all_sites():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local = registry.get_local_site()
|
||||
peers = registry.list_peers()
|
||||
|
||||
result = {
|
||||
"local": local.to_dict() if local else None,
|
||||
"peers": [peer.to_dict() for peer in peers],
|
||||
"total_peers": len(peers),
|
||||
}
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def register_peer_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
site_id = payload.get("site_id")
|
||||
endpoint = payload.get("endpoint")
|
||||
|
||||
if not site_id:
|
||||
return _json_error("ValidationError", "site_id is required", 400)
|
||||
|
||||
site_id_error = _validate_site_id(site_id)
|
||||
if site_id_error:
|
||||
return _json_error("ValidationError", site_id_error, 400)
|
||||
|
||||
if not endpoint:
|
||||
return _json_error("ValidationError", "endpoint is required", 400)
|
||||
|
||||
endpoint_error = _validate_endpoint(endpoint)
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
region = payload.get("region", "us-east-1")
|
||||
region_error = _validate_region(region)
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
priority = payload.get("priority", 100)
|
||||
priority_error = _validate_priority(priority)
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
registry = _site_registry()
|
||||
|
||||
if registry.get_peer(site_id):
|
||||
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
|
||||
|
||||
connection_id = payload.get("connection_id")
|
||||
if connection_id:
|
||||
if not _connections().get(connection_id):
|
||||
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
|
||||
|
||||
peer = PeerSite(
|
||||
site_id=site_id,
|
||||
endpoint=endpoint,
|
||||
region=region,
|
||||
priority=int(priority),
|
||||
display_name=payload.get("display_name", site_id),
|
||||
connection_id=connection_id,
|
||||
)
|
||||
|
||||
registry.add_peer(peer)
|
||||
|
||||
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(peer.to_dict()), 201
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
return jsonify(peer.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
existing = registry.get_peer(site_id)
|
||||
|
||||
if not existing:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
if "endpoint" in payload:
|
||||
endpoint_error = _validate_endpoint(payload["endpoint"])
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
if "priority" in payload:
|
||||
priority_error = _validate_priority(payload["priority"])
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
if "region" in payload:
|
||||
region_error = _validate_region(payload["region"])
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
if "connection_id" in payload:
|
||||
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
|
||||
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
|
||||
|
||||
peer = PeerSite(
|
||||
site_id=site_id,
|
||||
endpoint=payload.get("endpoint", existing.endpoint),
|
||||
region=payload.get("region", existing.region),
|
||||
priority=payload.get("priority", existing.priority),
|
||||
display_name=payload.get("display_name", existing.display_name),
|
||||
connection_id=payload.get("connection_id", existing.connection_id),
|
||||
created_at=existing.created_at,
|
||||
is_healthy=existing.is_healthy,
|
||||
last_health_check=existing.last_health_check,
|
||||
)
|
||||
|
||||
registry.update_peer(peer)
|
||||
|
||||
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(peer.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def delete_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
|
||||
if not registry.delete_peer(site_id):
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def check_peer_health(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
is_healthy = False
|
||||
error_message = None
|
||||
|
||||
if peer.connection_id:
|
||||
connection = _connections().get(peer.connection_id)
|
||||
if connection:
|
||||
is_healthy = _replication().check_endpoint_health(connection)
|
||||
else:
|
||||
error_message = f"Connection '{peer.connection_id}' not found"
|
||||
else:
|
||||
error_message = "No connection configured for this peer"
|
||||
|
||||
registry.update_health(site_id, is_healthy)
|
||||
|
||||
result = {
|
||||
"site_id": site_id,
|
||||
"is_healthy": is_healthy,
|
||||
"checked_at": time.time(),
|
||||
}
|
||||
if error_message:
|
||||
result["error"] = error_message
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@admin_api_bp.route("/topology", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_topology():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local = registry.get_local_site()
|
||||
peers = registry.list_peers()
|
||||
|
||||
sites = []
|
||||
|
||||
if local:
|
||||
sites.append({
|
||||
**local.to_dict(),
|
||||
"is_local": True,
|
||||
"is_healthy": True,
|
||||
})
|
||||
|
||||
for peer in peers:
|
||||
sites.append({
|
||||
**peer.to_dict(),
|
||||
"is_local": False,
|
||||
})
|
||||
|
||||
sites.sort(key=lambda s: s.get("priority", 100))
|
||||
|
||||
return jsonify({
|
||||
"sites": sites,
|
||||
"total": len(sites),
|
||||
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
|
||||
})
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def check_bidirectional_status(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
local_site = registry.get_local_site()
|
||||
replication = _replication()
|
||||
local_rules = replication.list_rules()
|
||||
|
||||
local_bidir_rules = []
|
||||
for rule in local_rules:
|
||||
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
|
||||
local_bidir_rules.append({
|
||||
"bucket_name": rule.bucket_name,
|
||||
"target_bucket": rule.target_bucket,
|
||||
"enabled": rule.enabled,
|
||||
})
|
||||
|
||||
result = {
|
||||
"site_id": site_id,
|
||||
"local_site_id": local_site.site_id if local_site else None,
|
||||
"local_endpoint": local_site.endpoint if local_site else None,
|
||||
"local_bidirectional_rules": local_bidir_rules,
|
||||
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
|
||||
"remote_status": None,
|
||||
"issues": [],
|
||||
"is_fully_configured": False,
|
||||
}
|
||||
|
||||
if not local_site or not local_site.site_id:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_SITE_ID",
|
||||
"message": "Local site identity not configured",
|
||||
"severity": "error",
|
||||
})
|
||||
|
||||
if not local_site or not local_site.endpoint:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_ENDPOINT",
|
||||
"message": "Local site endpoint not configured (remote site cannot reach back)",
|
||||
"severity": "error",
|
||||
})
|
||||
|
||||
if not peer.connection_id:
|
||||
result["issues"].append({
|
||||
"code": "NO_CONNECTION",
|
||||
"message": "No connection configured for this peer",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
connection = _connections().get(peer.connection_id)
|
||||
if not connection:
|
||||
result["issues"].append({
|
||||
"code": "CONNECTION_NOT_FOUND",
|
||||
"message": f"Connection '{peer.connection_id}' not found",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
if not local_bidir_rules:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
|
||||
"message": "No bidirectional replication rules configured on this site",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
if not result["local_site_sync_enabled"]:
|
||||
result["issues"].append({
|
||||
"code": "SITE_SYNC_DISABLED",
|
||||
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
if not replication.check_endpoint_health(connection):
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_UNREACHABLE",
|
||||
"message": "Remote endpoint is not reachable",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
|
||||
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
|
||||
result["issues"].append({
|
||||
"code": "ENDPOINT_NOT_ALLOWED",
|
||||
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
try:
|
||||
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
|
||||
resp = requests.get(
|
||||
admin_url,
|
||||
timeout=10,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"X-Access-Key": connection.access_key,
|
||||
"X-Secret-Key": connection.secret_key,
|
||||
},
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
remote_data = resp.json()
|
||||
if not isinstance(remote_data, dict):
|
||||
raise ValueError("Expected JSON object")
|
||||
remote_local = remote_data.get("local")
|
||||
if remote_local is not None and not isinstance(remote_local, dict):
|
||||
raise ValueError("Expected 'local' to be an object")
|
||||
remote_peers = remote_data.get("peers", [])
|
||||
if not isinstance(remote_peers, list):
|
||||
raise ValueError("Expected 'peers' to be a list")
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
logger.warning("Invalid JSON from remote admin API: %s", e)
|
||||
result["remote_status"] = {"reachable": True, "invalid_response": True}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_INVALID_RESPONSE",
|
||||
"message": "Remote admin API returned invalid JSON",
|
||||
"severity": "warning",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"local_site": remote_local,
|
||||
"site_sync_enabled": None,
|
||||
"has_peer_for_us": False,
|
||||
"peer_connection_configured": False,
|
||||
"has_bidirectional_rules_for_us": False,
|
||||
}
|
||||
|
||||
for rp in remote_peers:
|
||||
if not isinstance(rp, dict):
|
||||
continue
|
||||
if local_site and (
|
||||
rp.get("site_id") == local_site.site_id or
|
||||
rp.get("endpoint") == local_site.endpoint
|
||||
):
|
||||
result["remote_status"]["has_peer_for_us"] = True
|
||||
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
|
||||
break
|
||||
|
||||
if not result["remote_status"]["has_peer_for_us"]:
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_NO_PEER_FOR_US",
|
||||
"message": "Remote site does not have this site registered as a peer",
|
||||
"severity": "error",
|
||||
})
|
||||
elif not result["remote_status"]["peer_connection_configured"]:
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_NO_CONNECTION_FOR_US",
|
||||
"message": "Remote site has us as peer but no connection configured (cannot push back)",
|
||||
"severity": "error",
|
||||
})
|
||||
elif resp.status_code == 401 or resp.status_code == 403:
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"admin_access_denied": True,
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_ACCESS_DENIED",
|
||||
"message": "Cannot verify remote configuration (admin access denied)",
|
||||
"severity": "warning",
|
||||
})
|
||||
else:
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"admin_api_error": resp.status_code,
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_API_ERROR",
|
||||
"message": f"Remote admin API returned status {resp.status_code}",
|
||||
"severity": "warning",
|
||||
})
|
||||
except requests.RequestException as e:
|
||||
logger.warning("Remote admin API unreachable: %s", e)
|
||||
result["remote_status"] = {
|
||||
"reachable": False,
|
||||
"error": "Connection failed",
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_UNREACHABLE",
|
||||
"message": "Could not reach remote admin API",
|
||||
"severity": "warning",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
|
||||
result["issues"].append({
|
||||
"code": "VERIFICATION_ERROR",
|
||||
"message": "Internal error during verification",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
|
||||
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
def _website_domains() -> WebsiteDomainStore:
|
||||
return current_app.extensions["website_domains"]
|
||||
|
||||
|
||||
def _storage():
|
||||
return current_app.extensions["object_storage"]
|
||||
|
||||
|
||||
def _require_iam_action(action: str):
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return None, error
|
||||
try:
|
||||
_iam().authorize(principal, None, action)
|
||||
return principal, None
|
||||
except IamError:
|
||||
return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_list_users():
|
||||
principal, error = _require_iam_action("iam:list_users")
|
||||
if error:
|
||||
return error
|
||||
return jsonify({"users": _iam().list_users()})
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user(identifier):
|
||||
principal, error = _require_iam_action("iam:get_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
user_id = _iam().resolve_user_id(identifier)
|
||||
return jsonify(_iam().get_user_by_id(user_id))
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/policies", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user_policies(identifier):
|
||||
principal, error = _require_iam_action("iam:get_policy")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
return jsonify({"policies": _iam().get_user_policies(identifier)})
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_create_access_key(identifier):
|
||||
principal, error = _require_iam_action("iam:create_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
result = _iam().create_access_key(identifier)
|
||||
logger.info("Access key created for %s by %s", identifier, principal.access_key)
|
||||
return jsonify(result), 201
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys/<access_key>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_delete_access_key(identifier, access_key):
|
||||
principal, error = _require_iam_action("iam:delete_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().delete_access_key(access_key)
|
||||
logger.info("Access key %s deleted by %s", access_key, principal.access_key)
|
||||
return "", 204
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/disable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_disable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().disable_user(identifier)
|
||||
logger.info("User %s disabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "disabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/enable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_enable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().enable_user(identifier)
|
||||
logger.info("User %s enabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "enabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def list_website_domains():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
return jsonify(_website_domains().list_all())
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def create_website_domain():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
domain = normalize_domain(payload.get("domain") or "")
|
||||
bucket = (payload.get("bucket") or "").strip()
|
||||
if not domain:
|
||||
return _json_error("ValidationError", "domain is required", 400)
|
||||
if not is_valid_domain(domain):
|
||||
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
|
||||
if not bucket:
|
||||
return _json_error("ValidationError", "bucket is required", 400)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||
store = _website_domains()
|
||||
existing = store.get_bucket(domain)
|
||||
if existing:
|
||||
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
|
||||
store.set_mapping(domain, bucket)
|
||||
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
|
||||
return jsonify({"domain": domain, "bucket": bucket}), 201
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
bucket = _website_domains().get_bucket(domain)
|
||||
if not bucket:
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
return jsonify({"domain": domain, "bucket": bucket})
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
bucket = (payload.get("bucket") or "").strip()
|
||||
if not bucket:
|
||||
return _json_error("ValidationError", "bucket is required", 400)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||
store = _website_domains()
|
||||
if not store.get_bucket(domain):
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
store.set_mapping(domain, bucket)
|
||||
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
|
||||
return jsonify({"domain": domain, "bucket": bucket})
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def delete_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
if not _website_domains().delete_mapping(domain):
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
logger.info("Website domain mapping deleted: %s", domain)
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _gc() -> Optional[GarbageCollector]:
|
||||
return current_app.extensions.get("gc")
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
|
||||
return jsonify(gc.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return _json_error("InvalidRequest", "GC is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||
logger.info("GC manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "GC is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = gc.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
def _integrity() -> Optional[IntegrityChecker]:
|
||||
return current_app.extensions.get("integrity")
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
|
||||
return jsonify(checker.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
override_dry_run = payload.get("dry_run")
|
||||
override_auto_heal = payload.get("auto_heal")
|
||||
started = checker.run_async(
|
||||
auto_heal=override_auto_heal if override_auto_heal is not None else None,
|
||||
dry_run=override_dry_run if override_dry_run is not None else None,
|
||||
)
|
||||
logger.info("Integrity manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "A scan is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = checker.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
404
python/app/bucket_policies.py
Normal file
404
python/app/bucket_policies.py
Normal file
@@ -0,0 +1,404 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from fnmatch import fnmatch, translate
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
|
||||
|
||||
|
||||
RESOURCE_PREFIX = "arn:aws:s3:::"
|
||||
|
||||
|
||||
@lru_cache(maxsize=256)
|
||||
def _compile_pattern(pattern: str) -> Pattern[str]:
|
||||
return re.compile(translate(pattern), re.IGNORECASE)
|
||||
|
||||
|
||||
def _match_string_like(value: str, pattern: str) -> bool:
|
||||
compiled = _compile_pattern(pattern)
|
||||
return bool(compiled.match(value))
|
||||
|
||||
|
||||
def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
network = ipaddress.ip_network(cidr, strict=False)
|
||||
return ip in network
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _evaluate_condition_operator(
|
||||
operator: str,
|
||||
condition_key: str,
|
||||
condition_values: List[str],
|
||||
context: Dict[str, Any],
|
||||
) -> bool:
|
||||
context_value = context.get(condition_key)
|
||||
op_lower = operator.lower()
|
||||
if_exists = op_lower.endswith("ifexists")
|
||||
if if_exists:
|
||||
op_lower = op_lower[:-8]
|
||||
|
||||
if context_value is None:
|
||||
return if_exists
|
||||
|
||||
context_value_str = str(context_value)
|
||||
context_value_lower = context_value_str.lower()
|
||||
|
||||
if op_lower == "stringequals":
|
||||
return context_value_str in condition_values
|
||||
elif op_lower == "stringnotequals":
|
||||
return context_value_str not in condition_values
|
||||
elif op_lower == "stringequalsignorecase":
|
||||
return context_value_lower in [v.lower() for v in condition_values]
|
||||
elif op_lower == "stringnotequalsignorecase":
|
||||
return context_value_lower not in [v.lower() for v in condition_values]
|
||||
elif op_lower == "stringlike":
|
||||
return any(_match_string_like(context_value_str, p) for p in condition_values)
|
||||
elif op_lower == "stringnotlike":
|
||||
return not any(_match_string_like(context_value_str, p) for p in condition_values)
|
||||
elif op_lower == "ipaddress":
|
||||
return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
||||
elif op_lower == "notipaddress":
|
||||
return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
||||
elif op_lower == "bool":
|
||||
bool_val = context_value_lower in ("true", "1", "yes")
|
||||
return str(bool_val).lower() in [v.lower() for v in condition_values]
|
||||
elif op_lower == "null":
|
||||
is_null = context_value is None or context_value == ""
|
||||
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
||||
return is_null == expected_null
|
||||
|
||||
return False
|
||||
|
||||
ACTION_ALIASES = {
|
||||
"s3:listbucket": "list",
|
||||
"s3:listallmybuckets": "list",
|
||||
"s3:listbucketversions": "list",
|
||||
"s3:listmultipartuploads": "list",
|
||||
"s3:listparts": "list",
|
||||
"s3:getobject": "read",
|
||||
"s3:getobjectversion": "read",
|
||||
"s3:getobjecttagging": "read",
|
||||
"s3:getobjectversiontagging": "read",
|
||||
"s3:getobjectacl": "read",
|
||||
"s3:getbucketversioning": "read",
|
||||
"s3:headobject": "read",
|
||||
"s3:headbucket": "read",
|
||||
"s3:putobject": "write",
|
||||
"s3:createbucket": "write",
|
||||
"s3:putobjecttagging": "write",
|
||||
"s3:putbucketversioning": "write",
|
||||
"s3:createmultipartupload": "write",
|
||||
"s3:uploadpart": "write",
|
||||
"s3:completemultipartupload": "write",
|
||||
"s3:abortmultipartupload": "write",
|
||||
"s3:copyobject": "write",
|
||||
"s3:deleteobject": "delete",
|
||||
"s3:deleteobjectversion": "delete",
|
||||
"s3:deletebucket": "delete",
|
||||
"s3:deleteobjecttagging": "delete",
|
||||
"s3:putobjectacl": "share",
|
||||
"s3:putbucketacl": "share",
|
||||
"s3:getbucketacl": "share",
|
||||
"s3:putbucketpolicy": "policy",
|
||||
"s3:getbucketpolicy": "policy",
|
||||
"s3:deletebucketpolicy": "policy",
|
||||
"s3:getreplicationconfiguration": "replication",
|
||||
"s3:putreplicationconfiguration": "replication",
|
||||
"s3:deletereplicationconfiguration": "replication",
|
||||
"s3:replicateobject": "replication",
|
||||
"s3:replicatetags": "replication",
|
||||
"s3:replicatedelete": "replication",
|
||||
"s3:getlifecycleconfiguration": "lifecycle",
|
||||
"s3:putlifecycleconfiguration": "lifecycle",
|
||||
"s3:deletelifecycleconfiguration": "lifecycle",
|
||||
"s3:getbucketlifecycle": "lifecycle",
|
||||
"s3:putbucketlifecycle": "lifecycle",
|
||||
"s3:getbucketcors": "cors",
|
||||
"s3:putbucketcors": "cors",
|
||||
"s3:deletebucketcors": "cors",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_action(action: str) -> str:
|
||||
action = action.strip().lower()
|
||||
if action == "*":
|
||||
return "*"
|
||||
return ACTION_ALIASES.get(action, action)
|
||||
|
||||
|
||||
def _normalize_actions(actions: Iterable[str]) -> List[str]:
|
||||
values: List[str] = []
|
||||
for action in actions:
|
||||
canonical = _normalize_action(action)
|
||||
if canonical == "*" and "*" not in values:
|
||||
return ["*"]
|
||||
if canonical and canonical not in values:
|
||||
values.append(canonical)
|
||||
return values
|
||||
|
||||
|
||||
def _normalize_principals(principal_field: Any) -> List[str] | str:
|
||||
if principal_field == "*":
|
||||
return "*"
|
||||
|
||||
def _collect(values: Any) -> List[str]:
|
||||
if values is None:
|
||||
return []
|
||||
if values == "*":
|
||||
return ["*"]
|
||||
if isinstance(values, str):
|
||||
return [values]
|
||||
if isinstance(values, dict):
|
||||
aggregated: List[str] = []
|
||||
for nested in values.values():
|
||||
chunk = _collect(nested)
|
||||
if "*" in chunk:
|
||||
return ["*"]
|
||||
aggregated.extend(chunk)
|
||||
return aggregated
|
||||
if isinstance(values, Iterable):
|
||||
aggregated = []
|
||||
for nested in values:
|
||||
chunk = _collect(nested)
|
||||
if "*" in chunk:
|
||||
return ["*"]
|
||||
aggregated.extend(chunk)
|
||||
return aggregated
|
||||
return [str(values)]
|
||||
|
||||
normalized: List[str] = []
|
||||
for entry in _collect(principal_field):
|
||||
token = str(entry).strip()
|
||||
if token == "*":
|
||||
return "*"
|
||||
if token and token not in normalized:
|
||||
normalized.append(token)
|
||||
return normalized or "*"
|
||||
|
||||
|
||||
def _parse_resource(resource: str) -> tuple[str | None, str | None]:
|
||||
if not resource.startswith(RESOURCE_PREFIX):
|
||||
return None, None
|
||||
remainder = resource[len(RESOURCE_PREFIX) :]
|
||||
if "/" not in remainder:
|
||||
bucket = remainder or "*"
|
||||
return bucket, None
|
||||
bucket, _, key_pattern = remainder.partition("/")
|
||||
return bucket or "*", key_pattern or "*"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketPolicyStatement:
|
||||
sid: Optional[str]
|
||||
effect: str
|
||||
principals: List[str] | str
|
||||
actions: List[str]
|
||||
resources: List[Tuple[str | None, str | None]]
|
||||
conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
|
||||
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
|
||||
|
||||
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
|
||||
if self._compiled_patterns is None:
|
||||
self._compiled_patterns = []
|
||||
for resource_bucket, key_pattern in self.resources:
|
||||
if key_pattern is None:
|
||||
self._compiled_patterns.append((resource_bucket, None))
|
||||
else:
|
||||
regex_pattern = translate(key_pattern)
|
||||
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
|
||||
return self._compiled_patterns
|
||||
|
||||
def matches_principal(self, access_key: Optional[str]) -> bool:
|
||||
if self.principals == "*":
|
||||
return True
|
||||
if access_key is None:
|
||||
return False
|
||||
return access_key in self.principals
|
||||
|
||||
def matches_action(self, action: str) -> bool:
|
||||
action = _normalize_action(action)
|
||||
return "*" in self.actions or action in self.actions
|
||||
|
||||
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
|
||||
bucket = (bucket or "*").lower()
|
||||
key = object_key or ""
|
||||
for resource_bucket, compiled_pattern in self._get_compiled_patterns():
|
||||
resource_bucket = (resource_bucket or "*").lower()
|
||||
if resource_bucket not in {"*", bucket}:
|
||||
continue
|
||||
if compiled_pattern is None:
|
||||
if not key:
|
||||
return True
|
||||
continue
|
||||
if compiled_pattern.match(key):
|
||||
return True
|
||||
return False
|
||||
|
||||
def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
|
||||
if not self.conditions:
|
||||
return True
|
||||
if context is None:
|
||||
context = {}
|
||||
for operator, key_values in self.conditions.items():
|
||||
for condition_key, condition_values in key_values.items():
|
||||
if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class BucketPolicyStore:
|
||||
"""Loads bucket policies from disk and evaluates statements."""
|
||||
|
||||
def __init__(self, policy_path: Path) -> None:
|
||||
self.policy_path = Path(policy_path)
|
||||
self.policy_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not self.policy_path.exists():
|
||||
self.policy_path.write_text(json.dumps({"policies": {}}, indent=2))
|
||||
self._raw: Dict[str, Any] = {}
|
||||
self._policies: Dict[str, List[BucketPolicyStatement]] = {}
|
||||
self._load()
|
||||
self._last_mtime = self._current_mtime()
|
||||
# Performance: Avoid stat() on every request
|
||||
self._last_stat_check = 0.0
|
||||
self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
|
||||
|
||||
def maybe_reload(self) -> None:
|
||||
# Performance: Skip stat check if we checked recently
|
||||
now = time.time()
|
||||
if now - self._last_stat_check < self._stat_check_interval:
|
||||
return
|
||||
self._last_stat_check = now
|
||||
current = self._current_mtime()
|
||||
if current is None or current == self._last_mtime:
|
||||
return
|
||||
self._load()
|
||||
self._last_mtime = current
|
||||
|
||||
def _current_mtime(self) -> float | None:
|
||||
try:
|
||||
return self.policy_path.stat().st_mtime
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
access_key: Optional[str],
|
||||
bucket: Optional[str],
|
||||
object_key: Optional[str],
|
||||
action: str,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> str | None:
|
||||
bucket = (bucket or "").lower()
|
||||
statements = self._policies.get(bucket) or []
|
||||
decision: Optional[str] = None
|
||||
for statement in statements:
|
||||
if not statement.matches_principal(access_key):
|
||||
continue
|
||||
if not statement.matches_action(action):
|
||||
continue
|
||||
if not statement.matches_resource(bucket, object_key):
|
||||
continue
|
||||
if not statement.matches_condition(context):
|
||||
continue
|
||||
if statement.effect == "deny":
|
||||
return "deny"
|
||||
decision = "allow"
|
||||
return decision
|
||||
|
||||
def get_policy(self, bucket: str) -> Dict[str, Any] | None:
|
||||
return self._raw.get(bucket.lower())
|
||||
|
||||
def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None:
|
||||
bucket = bucket.lower()
|
||||
statements = self._normalize_policy(policy_payload)
|
||||
if not statements:
|
||||
raise ValueError("Policy must include at least one valid statement")
|
||||
self._raw[bucket] = policy_payload
|
||||
self._policies[bucket] = statements
|
||||
self._persist()
|
||||
|
||||
def delete_policy(self, bucket: str) -> None:
|
||||
bucket = bucket.lower()
|
||||
self._raw.pop(bucket, None)
|
||||
self._policies.pop(bucket, None)
|
||||
self._persist()
|
||||
|
||||
def _load(self) -> None:
|
||||
try:
|
||||
content = self.policy_path.read_text(encoding='utf-8')
|
||||
raw_payload = json.loads(content)
|
||||
except FileNotFoundError:
|
||||
raw_payload = {"policies": {}}
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}")
|
||||
except PermissionError as e:
|
||||
raise ValueError(f"Cannot read bucket policy file (permission denied): {e}")
|
||||
except (OSError, ValueError) as e:
|
||||
raise ValueError(f"Failed to load bucket policies: {e}")
|
||||
|
||||
policies: Dict[str, Any] = raw_payload.get("policies", {})
|
||||
parsed: Dict[str, List[BucketPolicyStatement]] = {}
|
||||
for bucket, policy in policies.items():
|
||||
parsed[bucket.lower()] = self._normalize_policy(policy)
|
||||
self._raw = {bucket.lower(): policy for bucket, policy in policies.items()}
|
||||
self._policies = parsed
|
||||
|
||||
def _persist(self) -> None:
|
||||
payload = {"policies": self._raw}
|
||||
self.policy_path.write_text(json.dumps(payload, indent=2))
|
||||
|
||||
def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]:
|
||||
statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", [])
|
||||
statements: List[BucketPolicyStatement] = []
|
||||
for statement in statements_raw:
|
||||
actions = _normalize_actions(statement.get("Action", []))
|
||||
principals = _normalize_principals(statement.get("Principal", "*"))
|
||||
resources_field = statement.get("Resource", [])
|
||||
if isinstance(resources_field, str):
|
||||
resources_field = [resources_field]
|
||||
resources: List[tuple[str | None, str | None]] = []
|
||||
for resource in resources_field:
|
||||
bucket, pattern = _parse_resource(str(resource))
|
||||
if bucket:
|
||||
resources.append((bucket, pattern))
|
||||
if not resources:
|
||||
continue
|
||||
effect = statement.get("Effect", "Allow").lower()
|
||||
conditions = self._normalize_conditions(statement.get("Condition", {}))
|
||||
statements.append(
|
||||
BucketPolicyStatement(
|
||||
sid=statement.get("Sid"),
|
||||
effect=effect,
|
||||
principals=principals,
|
||||
actions=actions or ["*"],
|
||||
resources=resources,
|
||||
conditions=conditions,
|
||||
)
|
||||
)
|
||||
return statements
|
||||
|
||||
def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
|
||||
if not condition_block or not isinstance(condition_block, dict):
|
||||
return {}
|
||||
normalized: Dict[str, Dict[str, List[str]]] = {}
|
||||
for operator, key_values in condition_block.items():
|
||||
if not isinstance(key_values, dict):
|
||||
continue
|
||||
normalized[operator] = {}
|
||||
for cond_key, cond_values in key_values.items():
|
||||
if isinstance(cond_values, str):
|
||||
normalized[operator][cond_key] = [cond_values]
|
||||
elif isinstance(cond_values, list):
|
||||
normalized[operator][cond_key] = [str(v) for v in cond_values]
|
||||
else:
|
||||
normalized[operator][cond_key] = [str(cond_values)]
|
||||
return normalized
|
||||
109
python/app/compression.py
Normal file
109
python/app/compression.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import io
|
||||
from typing import Callable, Iterable, List, Tuple
|
||||
|
||||
COMPRESSIBLE_MIMES = frozenset([
|
||||
'application/json',
|
||||
'application/javascript',
|
||||
'application/xml',
|
||||
'text/html',
|
||||
'text/css',
|
||||
'text/plain',
|
||||
'text/xml',
|
||||
'text/javascript',
|
||||
'application/x-ndjson',
|
||||
])
|
||||
|
||||
MIN_SIZE_FOR_COMPRESSION = 500
|
||||
|
||||
|
||||
class GzipMiddleware:
|
||||
def __init__(self, app: Callable, compression_level: int = 6, min_size: int = MIN_SIZE_FOR_COMPRESSION):
|
||||
self.app = app
|
||||
self.compression_level = compression_level
|
||||
self.min_size = min_size
|
||||
|
||||
def __call__(self, environ: dict, start_response: Callable) -> Iterable[bytes]:
|
||||
accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '')
|
||||
if 'gzip' not in accept_encoding.lower():
|
||||
return self.app(environ, start_response)
|
||||
|
||||
response_started = False
|
||||
status_code = None
|
||||
response_headers: List[Tuple[str, str]] = []
|
||||
content_type = None
|
||||
content_length = None
|
||||
should_compress = False
|
||||
passthrough = False
|
||||
exc_info_holder = [None]
|
||||
|
||||
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
||||
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
|
||||
response_started = True
|
||||
status_code = int(status.split(' ', 1)[0])
|
||||
response_headers = list(headers)
|
||||
exc_info_holder[0] = exc_info
|
||||
|
||||
for name, value in headers:
|
||||
name_lower = name.lower()
|
||||
if name_lower == 'content-type':
|
||||
content_type = value.split(';')[0].strip().lower()
|
||||
elif name_lower == 'content-length':
|
||||
try:
|
||||
content_length = int(value)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif name_lower == 'content-encoding':
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
elif name_lower == 'x-stream-response':
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
|
||||
if content_type and content_type in COMPRESSIBLE_MIMES:
|
||||
if content_length is None or content_length >= self.min_size:
|
||||
should_compress = True
|
||||
else:
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
|
||||
return None
|
||||
|
||||
app_iter = self.app(environ, custom_start_response)
|
||||
|
||||
if passthrough:
|
||||
return app_iter
|
||||
|
||||
response_body = b''.join(app_iter)
|
||||
|
||||
if not response_started:
|
||||
return [response_body]
|
||||
|
||||
if should_compress and len(response_body) >= self.min_size:
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=self.compression_level) as gz:
|
||||
gz.write(response_body)
|
||||
compressed = buf.getvalue()
|
||||
|
||||
if len(compressed) < len(response_body):
|
||||
response_body = compressed
|
||||
new_headers = []
|
||||
for name, value in response_headers:
|
||||
if name.lower() not in ('content-length', 'content-encoding'):
|
||||
new_headers.append((name, value))
|
||||
new_headers.append(('Content-Encoding', 'gzip'))
|
||||
new_headers.append(('Content-Length', str(len(response_body))))
|
||||
new_headers.append(('Vary', 'Accept-Encoding'))
|
||||
response_headers = new_headers
|
||||
|
||||
status_str = f"{status_code} " + {
|
||||
200: "OK", 201: "Created", 204: "No Content", 206: "Partial Content",
|
||||
301: "Moved Permanently", 302: "Found", 304: "Not Modified",
|
||||
400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found",
|
||||
405: "Method Not Allowed", 409: "Conflict", 500: "Internal Server Error",
|
||||
}.get(status_code, "Unknown")
|
||||
|
||||
start_response(status_str, response_headers, exc_info_holder[0])
|
||||
return [response_body]
|
||||
683
python/app/config.py
Normal file
683
python/app/config.py
Normal file
@@ -0,0 +1,683 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import sys
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psutil
|
||||
|
||||
|
||||
def _calculate_auto_threads() -> int:
|
||||
cpu_count = psutil.cpu_count(logical=True) or 4
|
||||
return max(1, min(cpu_count * 2, 64))
|
||||
|
||||
|
||||
def _calculate_auto_connection_limit() -> int:
|
||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
||||
calculated = int(available_mb / 5)
|
||||
return max(20, min(calculated, 1000))
|
||||
|
||||
|
||||
def _calculate_auto_backlog(connection_limit: int) -> int:
|
||||
return max(128, min(connection_limit * 2, 4096))
|
||||
|
||||
|
||||
def _validate_rate_limit(value: str) -> str:
|
||||
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
|
||||
if not re.match(pattern, value):
|
||||
raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'")
|
||||
return value
|
||||
|
||||
if getattr(sys, "frozen", False):
|
||||
# Running in a PyInstaller bundle
|
||||
PROJECT_ROOT = Path(sys._MEIPASS)
|
||||
else:
|
||||
# Running in a normal Python environment
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path:
|
||||
"""Ensure config directories exist and migrate legacy files when possible."""
|
||||
active_path = Path(active_path)
|
||||
active_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if legacy_path:
|
||||
legacy_path = Path(legacy_path)
|
||||
if not active_path.exists() and legacy_path.exists():
|
||||
legacy_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
shutil.move(str(legacy_path), str(active_path))
|
||||
except OSError:
|
||||
shutil.copy2(legacy_path, active_path)
|
||||
try:
|
||||
legacy_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return active_path
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
storage_root: Path
|
||||
max_upload_size: int
|
||||
ui_page_size: int
|
||||
secret_key: str
|
||||
iam_config_path: Path
|
||||
bucket_policy_path: Path
|
||||
api_base_url: Optional[str]
|
||||
aws_region: str
|
||||
aws_service: str
|
||||
ui_enforce_bucket_policies: bool
|
||||
log_level: str
|
||||
log_to_file: bool
|
||||
log_path: Path
|
||||
log_max_bytes: int
|
||||
log_backup_count: int
|
||||
ratelimit_default: str
|
||||
ratelimit_storage_uri: str
|
||||
ratelimit_list_buckets: str
|
||||
ratelimit_bucket_ops: str
|
||||
ratelimit_object_ops: str
|
||||
ratelimit_head_ops: str
|
||||
cors_origins: list[str]
|
||||
cors_methods: list[str]
|
||||
cors_allow_headers: list[str]
|
||||
cors_expose_headers: list[str]
|
||||
session_lifetime_days: int
|
||||
auth_max_attempts: int
|
||||
auth_lockout_minutes: int
|
||||
bulk_delete_max_keys: int
|
||||
secret_ttl_seconds: int
|
||||
stream_chunk_size: int
|
||||
multipart_min_part_size: int
|
||||
bucket_stats_cache_ttl: int
|
||||
object_cache_ttl: int
|
||||
encryption_enabled: bool
|
||||
encryption_master_key_path: Path
|
||||
kms_enabled: bool
|
||||
kms_keys_path: Path
|
||||
default_encryption_algorithm: str
|
||||
display_timezone: str
|
||||
lifecycle_enabled: bool
|
||||
lifecycle_interval_seconds: int
|
||||
metrics_history_enabled: bool
|
||||
metrics_history_retention_hours: int
|
||||
metrics_history_interval_minutes: int
|
||||
operation_metrics_enabled: bool
|
||||
operation_metrics_interval_minutes: int
|
||||
operation_metrics_retention_hours: int
|
||||
server_threads: int
|
||||
server_connection_limit: int
|
||||
server_backlog: int
|
||||
server_channel_timeout: int
|
||||
server_max_buffer_size: int
|
||||
server_threads_auto: bool
|
||||
server_connection_limit_auto: bool
|
||||
server_backlog_auto: bool
|
||||
site_sync_enabled: bool
|
||||
site_sync_interval_seconds: int
|
||||
site_sync_batch_size: int
|
||||
sigv4_timestamp_tolerance_seconds: int
|
||||
presigned_url_min_expiry_seconds: int
|
||||
presigned_url_max_expiry_seconds: int
|
||||
replication_connect_timeout_seconds: int
|
||||
replication_read_timeout_seconds: int
|
||||
replication_max_retries: int
|
||||
replication_streaming_threshold_bytes: int
|
||||
replication_max_failures_per_bucket: int
|
||||
site_sync_connect_timeout_seconds: int
|
||||
site_sync_read_timeout_seconds: int
|
||||
site_sync_max_retries: int
|
||||
site_sync_clock_skew_tolerance_seconds: float
|
||||
object_key_max_length_bytes: int
|
||||
object_cache_max_size: int
|
||||
meta_read_cache_max: int
|
||||
bucket_config_cache_ttl_seconds: float
|
||||
object_tag_limit: int
|
||||
encryption_chunk_size_bytes: int
|
||||
kms_generate_data_key_min_bytes: int
|
||||
kms_generate_data_key_max_bytes: int
|
||||
lifecycle_max_history_per_bucket: int
|
||||
site_id: Optional[str]
|
||||
site_endpoint: Optional[str]
|
||||
site_region: str
|
||||
site_priority: int
|
||||
ratelimit_admin: str
|
||||
num_trusted_proxies: int
|
||||
allowed_redirect_hosts: list[str]
|
||||
allow_internal_endpoints: bool
|
||||
website_hosting_enabled: bool
|
||||
gc_enabled: bool
|
||||
gc_interval_hours: float
|
||||
gc_temp_file_max_age_hours: float
|
||||
gc_multipart_max_age_days: int
|
||||
gc_lock_file_max_age_hours: float
|
||||
gc_dry_run: bool
|
||||
gc_io_throttle_ms: int
|
||||
integrity_enabled: bool
|
||||
integrity_interval_hours: float
|
||||
integrity_batch_size: int
|
||||
integrity_auto_heal: bool
|
||||
integrity_dry_run: bool
|
||||
integrity_io_throttle_ms: int
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||
overrides = overrides or {}
|
||||
|
||||
def _get(name: str, default: Any) -> Any:
|
||||
return overrides.get(name, os.getenv(name, default))
|
||||
|
||||
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))
|
||||
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
|
||||
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
|
||||
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
|
||||
bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500))
|
||||
secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
|
||||
stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
|
||||
multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
|
||||
lifecycle_enabled = _get("LIFECYCLE_ENABLED", "false").lower() in ("true", "1", "yes")
|
||||
lifecycle_interval_seconds = int(_get("LIFECYCLE_INTERVAL_SECONDS", 3600))
|
||||
default_secret = "dev-secret-key"
|
||||
secret_key = str(_get("SECRET_KEY", default_secret))
|
||||
|
||||
if not secret_key or secret_key == default_secret:
|
||||
secret_file = storage_root / ".myfsio.sys" / "config" / ".secret"
|
||||
if secret_file.exists():
|
||||
secret_key = secret_file.read_text().strip()
|
||||
else:
|
||||
generated = secrets.token_urlsafe(32)
|
||||
if secret_key == default_secret:
|
||||
warnings.warn("Using insecure default SECRET_KEY. A random value has been generated and persisted; set SECRET_KEY for production", RuntimeWarning)
|
||||
try:
|
||||
secret_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
secret_file.write_text(generated)
|
||||
try:
|
||||
os.chmod(secret_file, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
secret_key = generated
|
||||
except OSError:
|
||||
secret_key = generated
|
||||
|
||||
iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ
|
||||
bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ
|
||||
|
||||
default_iam_path = storage_root / ".myfsio.sys" / "config" / "iam.json"
|
||||
default_bucket_policy_path = storage_root / ".myfsio.sys" / "config" / "bucket_policies.json"
|
||||
|
||||
iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve()
|
||||
bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve()
|
||||
|
||||
iam_config_path = _prepare_config_file(
|
||||
iam_config_path,
|
||||
legacy_path=None if iam_env_override else storage_root / "iam.json",
|
||||
)
|
||||
bucket_policy_path = _prepare_config_file(
|
||||
bucket_policy_path,
|
||||
legacy_path=None if bucket_policy_override else storage_root / "bucket_policies.json",
|
||||
)
|
||||
api_base_url = _get("API_BASE_URL", None)
|
||||
if api_base_url:
|
||||
api_base_url = str(api_base_url)
|
||||
|
||||
aws_region = str(_get("AWS_REGION", "us-east-1"))
|
||||
aws_service = str(_get("AWS_SERVICE", "s3"))
|
||||
enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
log_level = str(_get("LOG_LEVEL", "INFO")).upper()
|
||||
log_to_file = str(_get("LOG_TO_FILE", "1")).lower() in {"1", "true", "yes", "on"}
|
||||
log_dir = Path(_get("LOG_DIR", storage_root.parent / "logs")).resolve()
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
|
||||
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
|
||||
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
|
||||
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
|
||||
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
|
||||
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
|
||||
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
|
||||
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
|
||||
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
|
||||
|
||||
def _csv(value: str, default: list[str]) -> list[str]:
|
||||
if not value:
|
||||
return default
|
||||
parts = [segment.strip() for segment in value.split(",") if segment.strip()]
|
||||
return parts or default
|
||||
|
||||
cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"])
|
||||
cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS,HEAD")), ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"])
|
||||
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"])
|
||||
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
|
||||
|
||||
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
||||
encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve()
|
||||
kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
|
||||
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
|
||||
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
|
||||
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
|
||||
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
|
||||
operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
|
||||
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
|
||||
|
||||
_raw_threads = int(_get("SERVER_THREADS", 0))
|
||||
if _raw_threads == 0:
|
||||
server_threads = _calculate_auto_threads()
|
||||
server_threads_auto = True
|
||||
else:
|
||||
server_threads = _raw_threads
|
||||
server_threads_auto = False
|
||||
|
||||
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
|
||||
if _raw_conn_limit == 0:
|
||||
server_connection_limit = _calculate_auto_connection_limit()
|
||||
server_connection_limit_auto = True
|
||||
else:
|
||||
server_connection_limit = _raw_conn_limit
|
||||
server_connection_limit_auto = False
|
||||
|
||||
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
|
||||
if _raw_backlog == 0:
|
||||
server_backlog = _calculate_auto_backlog(server_connection_limit)
|
||||
server_backlog_auto = True
|
||||
else:
|
||||
server_backlog = _raw_backlog
|
||||
server_backlog_auto = False
|
||||
|
||||
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
||||
server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
|
||||
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
||||
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
||||
|
||||
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
|
||||
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
|
||||
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
|
||||
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
|
||||
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
|
||||
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
|
||||
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
|
||||
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
|
||||
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
|
||||
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
|
||||
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
|
||||
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
|
||||
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
|
||||
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
|
||||
meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048))
|
||||
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
|
||||
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
|
||||
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
|
||||
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
|
||||
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
|
||||
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
|
||||
|
||||
site_id_raw = _get("SITE_ID", None)
|
||||
site_id = str(site_id_raw).strip() if site_id_raw else None
|
||||
site_endpoint_raw = _get("SITE_ENDPOINT", None)
|
||||
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
|
||||
site_region = str(_get("SITE_REGION", "us-east-1"))
|
||||
site_priority = int(_get("SITE_PRIORITY", 100))
|
||||
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1))
|
||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
|
||||
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
|
||||
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
|
||||
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
|
||||
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
|
||||
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
|
||||
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
|
||||
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
|
||||
|
||||
return cls(storage_root=storage_root,
|
||||
max_upload_size=max_upload_size,
|
||||
ui_page_size=ui_page_size,
|
||||
secret_key=secret_key,
|
||||
iam_config_path=iam_config_path,
|
||||
bucket_policy_path=bucket_policy_path,
|
||||
api_base_url=api_base_url,
|
||||
aws_region=aws_region,
|
||||
aws_service=aws_service,
|
||||
ui_enforce_bucket_policies=enforce_ui_policies,
|
||||
log_level=log_level,
|
||||
log_to_file=log_to_file,
|
||||
log_path=log_path,
|
||||
log_max_bytes=log_max_bytes,
|
||||
log_backup_count=log_backup_count,
|
||||
ratelimit_default=ratelimit_default,
|
||||
ratelimit_storage_uri=ratelimit_storage_uri,
|
||||
ratelimit_list_buckets=ratelimit_list_buckets,
|
||||
ratelimit_bucket_ops=ratelimit_bucket_ops,
|
||||
ratelimit_object_ops=ratelimit_object_ops,
|
||||
ratelimit_head_ops=ratelimit_head_ops,
|
||||
cors_origins=cors_origins,
|
||||
cors_methods=cors_methods,
|
||||
cors_allow_headers=cors_allow_headers,
|
||||
cors_expose_headers=cors_expose_headers,
|
||||
session_lifetime_days=session_lifetime_days,
|
||||
auth_max_attempts=auth_max_attempts,
|
||||
auth_lockout_minutes=auth_lockout_minutes,
|
||||
bulk_delete_max_keys=bulk_delete_max_keys,
|
||||
secret_ttl_seconds=secret_ttl_seconds,
|
||||
stream_chunk_size=stream_chunk_size,
|
||||
multipart_min_part_size=multipart_min_part_size,
|
||||
bucket_stats_cache_ttl=bucket_stats_cache_ttl,
|
||||
object_cache_ttl=object_cache_ttl,
|
||||
encryption_enabled=encryption_enabled,
|
||||
encryption_master_key_path=encryption_master_key_path,
|
||||
kms_enabled=kms_enabled,
|
||||
kms_keys_path=kms_keys_path,
|
||||
default_encryption_algorithm=default_encryption_algorithm,
|
||||
display_timezone=display_timezone,
|
||||
lifecycle_enabled=lifecycle_enabled,
|
||||
lifecycle_interval_seconds=lifecycle_interval_seconds,
|
||||
metrics_history_enabled=metrics_history_enabled,
|
||||
metrics_history_retention_hours=metrics_history_retention_hours,
|
||||
metrics_history_interval_minutes=metrics_history_interval_minutes,
|
||||
operation_metrics_enabled=operation_metrics_enabled,
|
||||
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
|
||||
operation_metrics_retention_hours=operation_metrics_retention_hours,
|
||||
server_threads=server_threads,
|
||||
server_connection_limit=server_connection_limit,
|
||||
server_backlog=server_backlog,
|
||||
server_channel_timeout=server_channel_timeout,
|
||||
server_max_buffer_size=server_max_buffer_size,
|
||||
server_threads_auto=server_threads_auto,
|
||||
server_connection_limit_auto=server_connection_limit_auto,
|
||||
server_backlog_auto=server_backlog_auto,
|
||||
site_sync_enabled=site_sync_enabled,
|
||||
site_sync_interval_seconds=site_sync_interval_seconds,
|
||||
site_sync_batch_size=site_sync_batch_size,
|
||||
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
|
||||
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
|
||||
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
|
||||
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
|
||||
replication_read_timeout_seconds=replication_read_timeout_seconds,
|
||||
replication_max_retries=replication_max_retries,
|
||||
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
|
||||
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
|
||||
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
|
||||
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
|
||||
site_sync_max_retries=site_sync_max_retries,
|
||||
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
|
||||
object_key_max_length_bytes=object_key_max_length_bytes,
|
||||
object_cache_max_size=object_cache_max_size,
|
||||
meta_read_cache_max=meta_read_cache_max,
|
||||
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
|
||||
object_tag_limit=object_tag_limit,
|
||||
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
|
||||
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
|
||||
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
|
||||
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
|
||||
site_id=site_id,
|
||||
site_endpoint=site_endpoint,
|
||||
site_region=site_region,
|
||||
site_priority=site_priority,
|
||||
ratelimit_admin=ratelimit_admin,
|
||||
num_trusted_proxies=num_trusted_proxies,
|
||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
||||
allow_internal_endpoints=allow_internal_endpoints,
|
||||
website_hosting_enabled=website_hosting_enabled,
|
||||
gc_enabled=gc_enabled,
|
||||
gc_interval_hours=gc_interval_hours,
|
||||
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
|
||||
gc_multipart_max_age_days=gc_multipart_max_age_days,
|
||||
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
|
||||
gc_dry_run=gc_dry_run,
|
||||
gc_io_throttle_ms=gc_io_throttle_ms,
|
||||
integrity_enabled=integrity_enabled,
|
||||
integrity_interval_hours=integrity_interval_hours,
|
||||
integrity_batch_size=integrity_batch_size,
|
||||
integrity_auto_heal=integrity_auto_heal,
|
||||
integrity_dry_run=integrity_dry_run,
|
||||
integrity_io_throttle_ms=integrity_io_throttle_ms)
|
||||
|
||||
def validate_and_report(self) -> list[str]:
|
||||
"""Validate configuration and return a list of warnings/issues.
|
||||
|
||||
Call this at startup to detect potential misconfigurations before
|
||||
the application fully commits to running.
|
||||
"""
|
||||
issues = []
|
||||
|
||||
try:
|
||||
test_file = self.storage_root / ".write_test"
|
||||
test_file.touch()
|
||||
test_file.unlink()
|
||||
except (OSError, PermissionError) as e:
|
||||
issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}")
|
||||
|
||||
storage_str = str(self.storage_root).lower()
|
||||
if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str:
|
||||
issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!")
|
||||
|
||||
try:
|
||||
self.iam_config_path.relative_to(self.storage_root)
|
||||
except ValueError:
|
||||
issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.")
|
||||
|
||||
try:
|
||||
self.bucket_policy_path.relative_to(self.storage_root)
|
||||
except ValueError:
|
||||
issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.")
|
||||
|
||||
try:
|
||||
self.log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
test_log = self.log_path.parent / ".write_test"
|
||||
test_log.touch()
|
||||
test_log.unlink()
|
||||
except (OSError, PermissionError) as e:
|
||||
issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}")
|
||||
|
||||
log_str = str(self.log_path).lower()
|
||||
if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str:
|
||||
issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!")
|
||||
|
||||
if self.encryption_enabled:
|
||||
try:
|
||||
self.encryption_master_key_path.relative_to(self.storage_root)
|
||||
except ValueError:
|
||||
issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
|
||||
|
||||
if self.kms_enabled:
|
||||
try:
|
||||
self.kms_keys_path.relative_to(self.storage_root)
|
||||
except ValueError:
|
||||
issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
|
||||
|
||||
if self.secret_key == "dev-secret-key":
|
||||
issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.")
|
||||
|
||||
if "*" in self.cors_origins:
|
||||
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
|
||||
|
||||
if not (1 <= self.server_threads <= 64):
|
||||
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
||||
if not (10 <= self.server_connection_limit <= 1000):
|
||||
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
||||
if not (128 <= self.server_backlog <= 4096):
|
||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
|
||||
if not (10 <= self.server_channel_timeout <= 300):
|
||||
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
||||
if self.server_max_buffer_size < 1024 * 1024:
|
||||
issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
|
||||
|
||||
if sys.platform != "win32":
|
||||
try:
|
||||
import resource
|
||||
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
||||
threshold = int(soft_limit * 0.8)
|
||||
if self.server_connection_limit > threshold:
|
||||
issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
|
||||
try:
|
||||
import psutil
|
||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
||||
estimated_mb = self.server_threads * 50
|
||||
if estimated_mb > available_mb * 0.5:
|
||||
issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return issues
|
||||
|
||||
def print_startup_summary(self) -> None:
|
||||
"""Print a summary of the configuration at startup."""
|
||||
print("\n" + "=" * 60)
|
||||
print("MyFSIO Configuration Summary")
|
||||
print("=" * 60)
|
||||
print(f" STORAGE_ROOT: {self.storage_root}")
|
||||
print(f" IAM_CONFIG: {self.iam_config_path}")
|
||||
print(f" BUCKET_POLICY: {self.bucket_policy_path}")
|
||||
print(f" LOG_PATH: {self.log_path}")
|
||||
if self.api_base_url:
|
||||
print(f" API_BASE_URL: {self.api_base_url}")
|
||||
if self.encryption_enabled:
|
||||
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
||||
if self.kms_enabled:
|
||||
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
||||
if self.website_hosting_enabled:
|
||||
print(f" WEBSITE_HOSTING: Enabled")
|
||||
def _auto(flag: bool) -> str:
|
||||
return " (auto)" if flag else ""
|
||||
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
|
||||
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
||||
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
||||
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
||||
print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
|
||||
print("=" * 60)
|
||||
|
||||
issues = self.validate_and_report()
|
||||
if issues:
|
||||
print("\nConfiguration Issues Detected:")
|
||||
for issue in issues:
|
||||
print(f" • {issue}")
|
||||
print()
|
||||
else:
|
||||
print(" ✓ Configuration validated successfully\n")
|
||||
|
||||
def to_flask_config(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"STORAGE_ROOT": str(self.storage_root),
|
||||
"MAX_CONTENT_LENGTH": self.max_upload_size,
|
||||
"UI_PAGE_SIZE": self.ui_page_size,
|
||||
"SECRET_KEY": self.secret_key,
|
||||
"IAM_CONFIG": str(self.iam_config_path),
|
||||
"BUCKET_POLICY_PATH": str(self.bucket_policy_path),
|
||||
"API_BASE_URL": self.api_base_url,
|
||||
"AWS_REGION": self.aws_region,
|
||||
"AWS_SERVICE": self.aws_service,
|
||||
"UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies,
|
||||
"AUTH_MAX_ATTEMPTS": self.auth_max_attempts,
|
||||
"AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes,
|
||||
"BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys,
|
||||
"SECRET_TTL_SECONDS": self.secret_ttl_seconds,
|
||||
"STREAM_CHUNK_SIZE": self.stream_chunk_size,
|
||||
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
|
||||
"BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl,
|
||||
"OBJECT_CACHE_TTL": self.object_cache_ttl,
|
||||
"LOG_LEVEL": self.log_level,
|
||||
"LOG_TO_FILE": self.log_to_file,
|
||||
"LOG_FILE": str(self.log_path),
|
||||
"LOG_MAX_BYTES": self.log_max_bytes,
|
||||
"LOG_BACKUP_COUNT": self.log_backup_count,
|
||||
"RATELIMIT_DEFAULT": self.ratelimit_default,
|
||||
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
|
||||
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
|
||||
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
|
||||
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
|
||||
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
|
||||
"CORS_ORIGINS": self.cors_origins,
|
||||
"CORS_METHODS": self.cors_methods,
|
||||
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
|
||||
"CORS_EXPOSE_HEADERS": self.cors_expose_headers,
|
||||
"SESSION_LIFETIME_DAYS": self.session_lifetime_days,
|
||||
"ENCRYPTION_ENABLED": self.encryption_enabled,
|
||||
"ENCRYPTION_MASTER_KEY_PATH": str(self.encryption_master_key_path),
|
||||
"KMS_ENABLED": self.kms_enabled,
|
||||
"KMS_KEYS_PATH": str(self.kms_keys_path),
|
||||
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
|
||||
"DISPLAY_TIMEZONE": self.display_timezone,
|
||||
"LIFECYCLE_ENABLED": self.lifecycle_enabled,
|
||||
"LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds,
|
||||
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
|
||||
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
|
||||
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
|
||||
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
|
||||
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
|
||||
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
|
||||
"SERVER_THREADS": self.server_threads,
|
||||
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
||||
"SERVER_BACKLOG": self.server_backlog,
|
||||
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
||||
"SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
|
||||
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
||||
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
||||
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
||||
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
|
||||
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
|
||||
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
|
||||
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
|
||||
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
|
||||
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
|
||||
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
|
||||
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
|
||||
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
|
||||
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
|
||||
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
|
||||
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
|
||||
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
|
||||
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
|
||||
"META_READ_CACHE_MAX": self.meta_read_cache_max,
|
||||
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
|
||||
"OBJECT_TAG_LIMIT": self.object_tag_limit,
|
||||
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
|
||||
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
|
||||
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
|
||||
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
|
||||
"SITE_ID": self.site_id,
|
||||
"SITE_ENDPOINT": self.site_endpoint,
|
||||
"SITE_REGION": self.site_region,
|
||||
"SITE_PRIORITY": self.site_priority,
|
||||
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
|
||||
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
|
||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
||||
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
||||
"GC_ENABLED": self.gc_enabled,
|
||||
"GC_INTERVAL_HOURS": self.gc_interval_hours,
|
||||
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
|
||||
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
|
||||
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
|
||||
"GC_DRY_RUN": self.gc_dry_run,
|
||||
"GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
|
||||
"INTEGRITY_ENABLED": self.integrity_enabled,
|
||||
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
|
||||
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
|
||||
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
|
||||
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
|
||||
"INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
|
||||
}
|
||||
60
python/app/connections.py
Normal file
60
python/app/connections.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteConnection:
|
||||
id: str
|
||||
name: str
|
||||
endpoint_url: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
region: str = "us-east-1"
|
||||
|
||||
|
||||
class ConnectionStore:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._connections: Dict[str, RemoteConnection] = {}
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._connections = {}
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.config_path, "r") as f:
|
||||
data = json.load(f)
|
||||
for item in data:
|
||||
conn = RemoteConnection(**item)
|
||||
self._connections[conn.id] = conn
|
||||
except (OSError, json.JSONDecodeError):
|
||||
self._connections = {}
|
||||
|
||||
def save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = [asdict(conn) for conn in self._connections.values()]
|
||||
with open(self.config_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def list(self) -> List[RemoteConnection]:
|
||||
return list(self._connections.values())
|
||||
|
||||
def get(self, connection_id: str) -> Optional[RemoteConnection]:
|
||||
return self._connections.get(connection_id)
|
||||
|
||||
def add(self, connection: RemoteConnection) -> None:
|
||||
self._connections[connection.id] = connection
|
||||
self.save()
|
||||
|
||||
def delete(self, connection_id: str) -> None:
|
||||
if connection_id in self._connections:
|
||||
del self._connections[connection_id]
|
||||
self.save()
|
||||
293
python/app/encrypted_storage.py
Normal file
293
python/app/encrypted_storage.py
Normal file
@@ -0,0 +1,293 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, Optional
|
||||
|
||||
from .encryption import EncryptionManager, EncryptionMetadata, EncryptionError
|
||||
from .storage import ObjectStorage, ObjectMeta, StorageError
|
||||
|
||||
|
||||
class EncryptedObjectStorage:
|
||||
"""Object storage with transparent server-side encryption.
|
||||
|
||||
This class wraps ObjectStorage and provides transparent encryption/decryption
|
||||
of objects based on bucket encryption configuration.
|
||||
|
||||
Encryption is applied when:
|
||||
1. Bucket has default encryption configured (SSE-S3 or SSE-KMS)
|
||||
2. Client explicitly requests encryption via headers
|
||||
|
||||
The encryption metadata is stored alongside object metadata.
|
||||
"""
|
||||
|
||||
STREAMING_THRESHOLD = 64 * 1024
|
||||
|
||||
def __init__(self, storage: ObjectStorage, encryption_manager: EncryptionManager):
|
||||
self.storage = storage
|
||||
self.encryption = encryption_manager
|
||||
|
||||
@property
|
||||
def root(self) -> Path:
|
||||
return self.storage.root
|
||||
|
||||
def _should_encrypt(self, bucket_name: str,
|
||||
server_side_encryption: str | None = None) -> tuple[bool, str, str | None]:
|
||||
"""Determine if object should be encrypted.
|
||||
|
||||
Returns:
|
||||
Tuple of (should_encrypt, algorithm, kms_key_id)
|
||||
"""
|
||||
if not self.encryption.enabled:
|
||||
return False, "", None
|
||||
|
||||
if server_side_encryption:
|
||||
if server_side_encryption == "AES256":
|
||||
return True, "AES256", None
|
||||
elif server_side_encryption.startswith("aws:kms"):
|
||||
parts = server_side_encryption.split(":")
|
||||
kms_key_id = parts[2] if len(parts) > 2 else None
|
||||
return True, "aws:kms", kms_key_id
|
||||
|
||||
try:
|
||||
encryption_config = self.storage.get_bucket_encryption(bucket_name)
|
||||
if encryption_config and encryption_config.get("Rules"):
|
||||
rule = encryption_config["Rules"][0]
|
||||
# AWS format: Rules[].ApplyServerSideEncryptionByDefault.SSEAlgorithm
|
||||
sse_default = rule.get("ApplyServerSideEncryptionByDefault", {})
|
||||
algorithm = sse_default.get("SSEAlgorithm", "AES256")
|
||||
kms_key_id = sse_default.get("KMSMasterKeyID")
|
||||
return True, algorithm, kms_key_id
|
||||
except StorageError:
|
||||
pass
|
||||
|
||||
return False, "", None
|
||||
|
||||
def _is_encrypted(self, metadata: Dict[str, str]) -> bool:
|
||||
"""Check if object is encrypted based on its metadata."""
|
||||
return "x-amz-server-side-encryption" in metadata
|
||||
|
||||
def put_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
stream: BinaryIO,
|
||||
*,
|
||||
metadata: Optional[Dict[str, str]] = None,
|
||||
server_side_encryption: Optional[str] = None,
|
||||
kms_key_id: Optional[str] = None,
|
||||
) -> ObjectMeta:
|
||||
"""Store an object, optionally with encryption.
|
||||
|
||||
Args:
|
||||
bucket_name: Name of the bucket
|
||||
object_key: Key for the object
|
||||
stream: Binary stream of object data
|
||||
metadata: Optional user metadata
|
||||
server_side_encryption: Encryption algorithm ("AES256" or "aws:kms")
|
||||
kms_key_id: KMS key ID (for aws:kms encryption)
|
||||
|
||||
Returns:
|
||||
ObjectMeta with object information
|
||||
|
||||
Performance: Uses streaming encryption for large files to reduce memory usage.
|
||||
"""
|
||||
should_encrypt, algorithm, detected_kms_key = self._should_encrypt(
|
||||
bucket_name, server_side_encryption
|
||||
)
|
||||
|
||||
if kms_key_id is None:
|
||||
kms_key_id = detected_kms_key
|
||||
|
||||
if should_encrypt:
|
||||
try:
|
||||
# Performance: Use streaming encryption to avoid loading entire file into memory
|
||||
encrypted_stream, enc_metadata = self.encryption.encrypt_stream(
|
||||
stream,
|
||||
algorithm=algorithm,
|
||||
context={"bucket": bucket_name, "key": object_key},
|
||||
)
|
||||
|
||||
combined_metadata = metadata.copy() if metadata else {}
|
||||
combined_metadata.update(enc_metadata.to_dict())
|
||||
|
||||
result = self.storage.put_object(
|
||||
bucket_name,
|
||||
object_key,
|
||||
encrypted_stream,
|
||||
metadata=combined_metadata,
|
||||
)
|
||||
|
||||
result.metadata = combined_metadata
|
||||
return result
|
||||
|
||||
except EncryptionError as exc:
|
||||
raise StorageError(f"Encryption failed: {exc}") from exc
|
||||
else:
|
||||
return self.storage.put_object(
|
||||
bucket_name,
|
||||
object_key,
|
||||
stream,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]:
|
||||
"""Get object data, decrypting if necessary.
|
||||
|
||||
Returns:
|
||||
Tuple of (data, metadata)
|
||||
|
||||
Performance: Uses streaming decryption to reduce memory usage.
|
||||
"""
|
||||
path = self.storage.get_object_path(bucket_name, object_key)
|
||||
metadata = self.storage.get_object_metadata(bucket_name, object_key)
|
||||
|
||||
enc_metadata = EncryptionMetadata.from_dict(metadata)
|
||||
if enc_metadata:
|
||||
try:
|
||||
# Performance: Use streaming decryption to avoid loading entire file into memory
|
||||
with path.open("rb") as f:
|
||||
decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata)
|
||||
data = decrypted_stream.read()
|
||||
except EncryptionError as exc:
|
||||
raise StorageError(f"Decryption failed: {exc}") from exc
|
||||
else:
|
||||
with path.open("rb") as f:
|
||||
data = f.read()
|
||||
|
||||
clean_metadata = {
|
||||
k: v for k, v in metadata.items()
|
||||
if not k.startswith("x-amz-encryption")
|
||||
and k != "x-amz-encrypted-data-key"
|
||||
}
|
||||
|
||||
return data, clean_metadata
|
||||
|
||||
def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]:
|
||||
"""Get object as a stream, decrypting if necessary.
|
||||
|
||||
Returns:
|
||||
Tuple of (stream, metadata, original_size)
|
||||
"""
|
||||
data, metadata = self.get_object_data(bucket_name, object_key)
|
||||
return io.BytesIO(data), metadata, len(data)
|
||||
|
||||
def list_buckets(self):
|
||||
return self.storage.list_buckets()
|
||||
|
||||
def bucket_exists(self, bucket_name: str) -> bool:
|
||||
return self.storage.bucket_exists(bucket_name)
|
||||
|
||||
def create_bucket(self, bucket_name: str) -> None:
|
||||
return self.storage.create_bucket(bucket_name)
|
||||
|
||||
def delete_bucket(self, bucket_name: str) -> None:
|
||||
return self.storage.delete_bucket(bucket_name)
|
||||
|
||||
def bucket_stats(self, bucket_name: str, cache_ttl: int = 60):
|
||||
return self.storage.bucket_stats(bucket_name, cache_ttl)
|
||||
|
||||
def list_objects(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects(bucket_name, **kwargs)
|
||||
|
||||
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def iter_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.iter_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def search_objects(self, bucket_name: str, query: str, **kwargs):
|
||||
return self.storage.search_objects(bucket_name, query, **kwargs)
|
||||
|
||||
def list_objects_all(self, bucket_name: str):
|
||||
return self.storage.list_objects_all(bucket_name)
|
||||
|
||||
def get_object_path(self, bucket_name: str, object_key: str):
|
||||
return self.storage.get_object_path(bucket_name, object_key)
|
||||
|
||||
def get_object_metadata(self, bucket_name: str, object_key: str):
|
||||
return self.storage.get_object_metadata(bucket_name, object_key)
|
||||
|
||||
def delete_object(self, bucket_name: str, object_key: str) -> None:
|
||||
return self.storage.delete_object(bucket_name, object_key)
|
||||
|
||||
def purge_object(self, bucket_name: str, object_key: str) -> None:
|
||||
return self.storage.purge_object(bucket_name, object_key)
|
||||
|
||||
def is_versioning_enabled(self, bucket_name: str) -> bool:
|
||||
return self.storage.is_versioning_enabled(bucket_name)
|
||||
|
||||
def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
|
||||
return self.storage.set_bucket_versioning(bucket_name, enabled)
|
||||
|
||||
def get_bucket_tags(self, bucket_name: str):
|
||||
return self.storage.get_bucket_tags(bucket_name)
|
||||
|
||||
def set_bucket_tags(self, bucket_name: str, tags):
|
||||
return self.storage.set_bucket_tags(bucket_name, tags)
|
||||
|
||||
def get_bucket_cors(self, bucket_name: str):
|
||||
return self.storage.get_bucket_cors(bucket_name)
|
||||
|
||||
def set_bucket_cors(self, bucket_name: str, rules):
|
||||
return self.storage.set_bucket_cors(bucket_name, rules)
|
||||
|
||||
def get_bucket_encryption(self, bucket_name: str):
|
||||
return self.storage.get_bucket_encryption(bucket_name)
|
||||
|
||||
def set_bucket_encryption(self, bucket_name: str, config_payload):
|
||||
return self.storage.set_bucket_encryption(bucket_name, config_payload)
|
||||
|
||||
def get_bucket_lifecycle(self, bucket_name: str):
|
||||
return self.storage.get_bucket_lifecycle(bucket_name)
|
||||
|
||||
def set_bucket_lifecycle(self, bucket_name: str, rules):
|
||||
return self.storage.set_bucket_lifecycle(bucket_name, rules)
|
||||
|
||||
def get_object_tags(self, bucket_name: str, object_key: str):
|
||||
return self.storage.get_object_tags(bucket_name, object_key)
|
||||
|
||||
def set_object_tags(self, bucket_name: str, object_key: str, tags):
|
||||
return self.storage.set_object_tags(bucket_name, object_key, tags)
|
||||
|
||||
def delete_object_tags(self, bucket_name: str, object_key: str):
|
||||
return self.storage.delete_object_tags(bucket_name, object_key)
|
||||
|
||||
def list_object_versions(self, bucket_name: str, object_key: str):
|
||||
return self.storage.list_object_versions(bucket_name, object_key)
|
||||
|
||||
def restore_object_version(self, bucket_name: str, object_key: str, version_id: str):
|
||||
return self.storage.restore_object_version(bucket_name, object_key, version_id)
|
||||
|
||||
def list_orphaned_objects(self, bucket_name: str):
|
||||
return self.storage.list_orphaned_objects(bucket_name)
|
||||
|
||||
def initiate_multipart_upload(self, bucket_name: str, object_key: str, *, metadata=None) -> str:
|
||||
return self.storage.initiate_multipart_upload(bucket_name, object_key, metadata=metadata)
|
||||
|
||||
def upload_multipart_part(self, bucket_name: str, upload_id: str, part_number: int, stream: BinaryIO) -> str:
|
||||
return self.storage.upload_multipart_part(bucket_name, upload_id, part_number, stream)
|
||||
|
||||
def complete_multipart_upload(self, bucket_name: str, upload_id: str, ordered_parts):
|
||||
return self.storage.complete_multipart_upload(bucket_name, upload_id, ordered_parts)
|
||||
|
||||
def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
|
||||
return self.storage.abort_multipart_upload(bucket_name, upload_id)
|
||||
|
||||
def list_multipart_parts(self, bucket_name: str, upload_id: str):
|
||||
return self.storage.list_multipart_parts(bucket_name, upload_id)
|
||||
|
||||
def get_bucket_quota(self, bucket_name: str):
|
||||
return self.storage.get_bucket_quota(bucket_name)
|
||||
|
||||
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
||||
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
||||
|
||||
def get_bucket_website(self, bucket_name: str):
|
||||
return self.storage.get_bucket_website(bucket_name)
|
||||
|
||||
def set_bucket_website(self, bucket_name: str, website_config):
|
||||
return self.storage.set_bucket_website(bucket_name, website_config)
|
||||
|
||||
def _compute_etag(self, path: Path) -> str:
|
||||
return self.storage._compute_etag(path)
|
||||
653
python/app/encryption.py
Normal file
653
python/app/encryption.py
Normal file
@@ -0,0 +1,653 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, Generator, Optional
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
|
||||
if sys.platform != "win32":
|
||||
import fcntl
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not all(hasattr(_rc, f) for f in (
|
||||
"encrypt_stream_chunked", "decrypt_stream_chunked",
|
||||
)):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_rc = None
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
||||
"""Set restrictive file permissions (owner read/write only)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
username = os.environ.get("USERNAME", "")
|
||||
if username:
|
||||
subprocess.run(
|
||||
["icacls", str(file_path), "/inheritance:r",
|
||||
"/grant:r", f"{username}:F"],
|
||||
check=True, capture_output=True
|
||||
)
|
||||
else:
|
||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
||||
else:
|
||||
os.chmod(file_path, 0o600)
|
||||
|
||||
|
||||
class EncryptionError(Exception):
|
||||
"""Raised when encryption/decryption fails."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class EncryptionResult:
|
||||
"""Result of encrypting data."""
|
||||
ciphertext: bytes
|
||||
nonce: bytes
|
||||
key_id: str
|
||||
encrypted_data_key: bytes
|
||||
|
||||
|
||||
@dataclass
|
||||
class EncryptionMetadata:
|
||||
"""Metadata stored with encrypted objects."""
|
||||
algorithm: str
|
||||
key_id: str
|
||||
nonce: bytes
|
||||
encrypted_data_key: bytes
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {
|
||||
"x-amz-server-side-encryption": self.algorithm,
|
||||
"x-amz-encryption-key-id": self.key_id,
|
||||
"x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
|
||||
"x-amz-encrypted-data-key": base64.b64encode(self.encrypted_data_key).decode(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, str]) -> Optional["EncryptionMetadata"]:
|
||||
algorithm = data.get("x-amz-server-side-encryption")
|
||||
if not algorithm:
|
||||
return None
|
||||
try:
|
||||
return cls(
|
||||
algorithm=algorithm,
|
||||
key_id=data.get("x-amz-encryption-key-id", "local"),
|
||||
nonce=base64.b64decode(data.get("x-amz-encryption-nonce", "")),
|
||||
encrypted_data_key=base64.b64decode(data.get("x-amz-encrypted-data-key", "")),
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class EncryptionProvider:
|
||||
"""Base class for encryption providers."""
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
raise NotImplementedError
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
raise NotImplementedError
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and its encrypted form.
|
||||
|
||||
Returns:
|
||||
Tuple of (plaintext_key, encrypted_key)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key.
|
||||
|
||||
Args:
|
||||
encrypted_data_key: The encrypted data key bytes
|
||||
key_id: Optional key identifier (used by KMS providers)
|
||||
|
||||
Returns:
|
||||
The decrypted data key
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LocalKeyEncryption(EncryptionProvider):
|
||||
"""SSE-S3 style encryption using a local master key.
|
||||
|
||||
Uses envelope encryption:
|
||||
1. Generate a unique data key for each object
|
||||
2. Encrypt the data with the data key (AES-256-GCM)
|
||||
3. Encrypt the data key with the master key
|
||||
4. Store the encrypted data key alongside the ciphertext
|
||||
"""
|
||||
|
||||
KEY_ID = "local"
|
||||
|
||||
def __init__(self, master_key_path: Path):
|
||||
self.master_key_path = master_key_path
|
||||
self._master_key: bytes | None = None
|
||||
|
||||
@property
|
||||
def master_key(self) -> bytes:
|
||||
if self._master_key is None:
|
||||
self._master_key = self._load_or_create_master_key()
|
||||
return self._master_key
|
||||
|
||||
def _load_or_create_master_key(self) -> bytes:
|
||||
"""Load master key from file or generate a new one (with file locking)."""
|
||||
lock_path = self.master_key_path.with_suffix(".lock")
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
with open(lock_path, "w") as lock_file:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
if self.master_key_path.exists():
|
||||
try:
|
||||
return base64.b64decode(self.master_key_path.read_text().strip())
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to load master key: {exc}") from exc
|
||||
key = secrets.token_bytes(32)
|
||||
try:
|
||||
self.master_key_path.write_text(base64.b64encode(key).decode())
|
||||
_set_secure_file_permissions(self.master_key_path)
|
||||
except OSError as exc:
|
||||
raise EncryptionError(f"Failed to save master key: {exc}") from exc
|
||||
return key
|
||||
finally:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
except OSError as exc:
|
||||
raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
|
||||
|
||||
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
|
||||
|
||||
def _encrypt_data_key(self, data_key: bytes) -> bytes:
|
||||
"""Encrypt the data key with the master key."""
|
||||
aesgcm = AESGCM(self.master_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
|
||||
return nonce + encrypted
|
||||
|
||||
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
|
||||
"""Decrypt the data key using the master key."""
|
||||
if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
|
||||
raise EncryptionError("Invalid encrypted data key")
|
||||
aesgcm = AESGCM(self.master_key)
|
||||
nonce = encrypted_data_key[:12]
|
||||
ciphertext = encrypted_data_key[12:]
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
|
||||
except Exception:
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
|
||||
return self._decrypt_data_key(encrypted_data_key)
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and its encrypted form."""
|
||||
plaintext_key = secrets.token_bytes(32)
|
||||
encrypted_key = self._encrypt_data_key(plaintext_key)
|
||||
return plaintext_key, encrypted_key
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
"""Encrypt data using envelope encryption."""
|
||||
data_key, encrypted_data_key = self.generate_data_key()
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
nonce=nonce,
|
||||
key_id=self.KEY_ID,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data using envelope encryption."""
|
||||
data_key = self._decrypt_data_key(encrypted_data_key)
|
||||
aesgcm = AESGCM(data_key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError("Failed to decrypt data") from exc
|
||||
|
||||
|
||||
class StreamingEncryptor:
|
||||
"""Encrypts/decrypts data in streaming fashion for large files.
|
||||
|
||||
For large files, we encrypt in chunks. Each chunk is encrypted with the
|
||||
same data key but a unique nonce derived from the base nonce + chunk index.
|
||||
"""
|
||||
|
||||
CHUNK_SIZE = 64 * 1024
|
||||
HEADER_SIZE = 4
|
||||
|
||||
def __init__(self, provider: EncryptionProvider, chunk_size: int = CHUNK_SIZE):
|
||||
self.provider = provider
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
|
||||
"""Derive a unique nonce for each chunk using HKDF."""
|
||||
hkdf = HKDF(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=12,
|
||||
salt=base_nonce,
|
||||
info=chunk_index.to_bytes(4, "big"),
|
||||
)
|
||||
return hkdf.derive(b"chunk_nonce")
|
||||
|
||||
def encrypt_stream(self, stream: BinaryIO,
|
||||
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
||||
"""Encrypt a stream and return encrypted stream + metadata.
|
||||
|
||||
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
||||
"""
|
||||
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||
base_nonce = secrets.token_bytes(12)
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
# Performance: Write directly to BytesIO instead of accumulating chunks
|
||||
output = io.BytesIO()
|
||||
output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count
|
||||
chunk_index = 0
|
||||
|
||||
while True:
|
||||
chunk = stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||
|
||||
# Write size prefix + encrypted chunk directly
|
||||
output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||
output.write(encrypted_chunk)
|
||||
chunk_index += 1
|
||||
|
||||
# Write actual chunk count to header
|
||||
output.seek(0)
|
||||
output.write(chunk_index.to_bytes(4, "big"))
|
||||
output.seek(0)
|
||||
|
||||
metadata = EncryptionMetadata(
|
||||
algorithm="AES256",
|
||||
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||
nonce=base_nonce,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
return output, metadata
|
||||
|
||||
def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
|
||||
"""Decrypt a stream using the provided metadata.
|
||||
|
||||
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
||||
"""
|
||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
base_nonce = metadata.nonce
|
||||
|
||||
chunk_count_bytes = stream.read(4)
|
||||
if len(chunk_count_bytes) < 4:
|
||||
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||
|
||||
# Performance: Write directly to BytesIO instead of accumulating chunks
|
||||
output = io.BytesIO()
|
||||
for chunk_index in range(chunk_count):
|
||||
size_bytes = stream.read(self.HEADER_SIZE)
|
||||
if len(size_bytes) < self.HEADER_SIZE:
|
||||
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||
chunk_size = int.from_bytes(size_bytes, "big")
|
||||
|
||||
encrypted_chunk = stream.read(chunk_size)
|
||||
if len(encrypted_chunk) < chunk_size:
|
||||
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
try:
|
||||
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||
output.write(decrypted_chunk) # Write directly instead of appending to list
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||
|
||||
output.seek(0)
|
||||
return output
|
||||
|
||||
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
|
||||
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||
base_nonce = secrets.token_bytes(12)
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.encrypt_stream_chunked(
|
||||
input_path, output_path, data_key, base_nonce, self.chunk_size
|
||||
)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
out.write(b"\x00\x00\x00\x00")
|
||||
chunk_index = 0
|
||||
while True:
|
||||
chunk = stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||
out.write(encrypted_chunk)
|
||||
chunk_index += 1
|
||||
out.seek(0)
|
||||
out.write(chunk_index.to_bytes(4, "big"))
|
||||
|
||||
return EncryptionMetadata(
|
||||
algorithm="AES256",
|
||||
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||
nonce=base_nonce,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
def decrypt_file(self, input_path: str, output_path: str,
|
||||
metadata: EncryptionMetadata) -> None:
|
||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||
base_nonce = metadata.nonce
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
chunk_count_bytes = stream.read(4)
|
||||
if len(chunk_count_bytes) < 4:
|
||||
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
for chunk_index in range(chunk_count):
|
||||
size_bytes = stream.read(self.HEADER_SIZE)
|
||||
if len(size_bytes) < self.HEADER_SIZE:
|
||||
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||
chunk_size = int.from_bytes(size_bytes, "big")
|
||||
encrypted_chunk = stream.read(chunk_size)
|
||||
if len(encrypted_chunk) < chunk_size:
|
||||
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
try:
|
||||
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||
out.write(decrypted_chunk)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||
|
||||
|
||||
class EncryptionManager:
|
||||
"""Manages encryption providers and operations."""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
self.config = config
|
||||
self._local_provider: LocalKeyEncryption | None = None
|
||||
self._kms_provider: Any = None # Set by KMS module
|
||||
self._streaming_encryptor: StreamingEncryptor | None = None
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
return self.config.get("encryption_enabled", False)
|
||||
|
||||
@property
|
||||
def default_algorithm(self) -> str:
|
||||
return self.config.get("default_encryption_algorithm", "AES256")
|
||||
|
||||
def get_local_provider(self) -> LocalKeyEncryption:
|
||||
if self._local_provider is None:
|
||||
key_path = Path(self.config.get("encryption_master_key_path", "data/.myfsio.sys/keys/master.key"))
|
||||
self._local_provider = LocalKeyEncryption(key_path)
|
||||
return self._local_provider
|
||||
|
||||
def set_kms_provider(self, kms_provider: Any) -> None:
|
||||
"""Set the KMS provider (injected from kms module)."""
|
||||
self._kms_provider = kms_provider
|
||||
|
||||
def get_provider(self, algorithm: str, kms_key_id: str | None = None) -> EncryptionProvider:
|
||||
"""Get the appropriate encryption provider for the algorithm."""
|
||||
if algorithm == "AES256":
|
||||
return self.get_local_provider()
|
||||
elif algorithm == "aws:kms":
|
||||
if self._kms_provider is None:
|
||||
raise EncryptionError("KMS is not configured")
|
||||
return self._kms_provider.get_provider(kms_key_id)
|
||||
else:
|
||||
raise EncryptionError(f"Unsupported encryption algorithm: {algorithm}")
|
||||
|
||||
def get_streaming_encryptor(self) -> StreamingEncryptor:
|
||||
if self._streaming_encryptor is None:
|
||||
chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
|
||||
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
|
||||
return self._streaming_encryptor
|
||||
|
||||
def encrypt_object(self, data: bytes, algorithm: str = "AES256",
|
||||
kms_key_id: str | None = None,
|
||||
context: Dict[str, str] | None = None) -> tuple[bytes, EncryptionMetadata]:
|
||||
"""Encrypt object data."""
|
||||
provider = self.get_provider(algorithm, kms_key_id)
|
||||
result = provider.encrypt(data, context)
|
||||
|
||||
metadata = EncryptionMetadata(
|
||||
algorithm=algorithm,
|
||||
key_id=result.key_id,
|
||||
nonce=result.nonce,
|
||||
encrypted_data_key=result.encrypted_data_key,
|
||||
)
|
||||
|
||||
return result.ciphertext, metadata
|
||||
|
||||
def decrypt_object(self, ciphertext: bytes, metadata: EncryptionMetadata,
|
||||
context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt object data."""
|
||||
provider = self.get_provider(metadata.algorithm, metadata.key_id)
|
||||
return provider.decrypt(
|
||||
ciphertext,
|
||||
metadata.nonce,
|
||||
metadata.encrypted_data_key,
|
||||
metadata.key_id,
|
||||
context,
|
||||
)
|
||||
|
||||
def encrypt_stream(self, stream: BinaryIO, algorithm: str = "AES256",
|
||||
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
||||
"""Encrypt a stream for large files."""
|
||||
encryptor = self.get_streaming_encryptor()
|
||||
return encryptor.encrypt_stream(stream, context)
|
||||
|
||||
def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
|
||||
"""Decrypt a stream."""
|
||||
encryptor = self.get_streaming_encryptor()
|
||||
return encryptor.decrypt_stream(stream, metadata)
|
||||
|
||||
|
||||
class SSECEncryption(EncryptionProvider):
|
||||
"""SSE-C: Server-Side Encryption with Customer-Provided Keys.
|
||||
|
||||
The client provides the encryption key with each request.
|
||||
Server encrypts/decrypts but never stores the key.
|
||||
|
||||
Required headers for PUT:
|
||||
- x-amz-server-side-encryption-customer-algorithm: AES256
|
||||
- x-amz-server-side-encryption-customer-key: Base64-encoded 256-bit key
|
||||
- x-amz-server-side-encryption-customer-key-MD5: Base64-encoded MD5 of key
|
||||
"""
|
||||
|
||||
KEY_ID = "customer-provided"
|
||||
|
||||
def __init__(self, customer_key: bytes):
|
||||
if len(customer_key) != 32:
|
||||
raise EncryptionError("Customer key must be exactly 256 bits (32 bytes)")
|
||||
self.customer_key = customer_key
|
||||
|
||||
@classmethod
|
||||
def from_headers(cls, headers: Dict[str, str]) -> "SSECEncryption":
|
||||
algorithm = headers.get("x-amz-server-side-encryption-customer-algorithm", "")
|
||||
if algorithm.upper() != "AES256":
|
||||
raise EncryptionError(f"Unsupported SSE-C algorithm: {algorithm}. Only AES256 is supported.")
|
||||
|
||||
key_b64 = headers.get("x-amz-server-side-encryption-customer-key", "")
|
||||
if not key_b64:
|
||||
raise EncryptionError("Missing x-amz-server-side-encryption-customer-key header")
|
||||
|
||||
key_md5_b64 = headers.get("x-amz-server-side-encryption-customer-key-md5", "")
|
||||
|
||||
try:
|
||||
customer_key = base64.b64decode(key_b64)
|
||||
except Exception as e:
|
||||
raise EncryptionError(f"Invalid base64 in customer key: {e}") from e
|
||||
|
||||
if len(customer_key) != 32:
|
||||
raise EncryptionError(f"Customer key must be 256 bits, got {len(customer_key) * 8} bits")
|
||||
|
||||
if key_md5_b64:
|
||||
import hashlib
|
||||
expected_md5 = base64.b64encode(hashlib.md5(customer_key).digest()).decode()
|
||||
if key_md5_b64 != expected_md5:
|
||||
raise EncryptionError("Customer key MD5 mismatch")
|
||||
|
||||
return cls(customer_key)
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
aesgcm = AESGCM(self.customer_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
nonce=nonce,
|
||||
key_id=self.KEY_ID,
|
||||
encrypted_data_key=b"",
|
||||
)
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
aesgcm = AESGCM(self.customer_key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError("SSE-C decryption failed") from exc
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
return self.customer_key, b""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SSECMetadata:
|
||||
algorithm: str = "AES256"
|
||||
nonce: bytes = b""
|
||||
key_md5: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {
|
||||
"x-amz-server-side-encryption-customer-algorithm": self.algorithm,
|
||||
"x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
|
||||
"x-amz-server-side-encryption-customer-key-MD5": self.key_md5,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, str]) -> Optional["SSECMetadata"]:
|
||||
algorithm = data.get("x-amz-server-side-encryption-customer-algorithm")
|
||||
if not algorithm:
|
||||
return None
|
||||
try:
|
||||
nonce = base64.b64decode(data.get("x-amz-encryption-nonce", ""))
|
||||
return cls(
|
||||
algorithm=algorithm,
|
||||
nonce=nonce,
|
||||
key_md5=data.get("x-amz-server-side-encryption-customer-key-MD5", ""),
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class ClientEncryptionHelper:
|
||||
"""Helpers for client-side encryption.
|
||||
|
||||
Client-side encryption is performed by the client, but this helper
|
||||
provides key generation and materials for clients that need them.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def generate_client_key() -> Dict[str, str]:
|
||||
"""Generate a new client encryption key."""
|
||||
from datetime import datetime, timezone
|
||||
key = secrets.token_bytes(32)
|
||||
return {
|
||||
"key": base64.b64encode(key).decode(),
|
||||
"algorithm": "AES-256-GCM",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
|
||||
"""Encrypt data with a client-provided key."""
|
||||
key = base64.b64decode(key_b64)
|
||||
if len(key) != 32:
|
||||
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return {
|
||||
"ciphertext": base64.b64encode(ciphertext).decode(),
|
||||
"nonce": base64.b64encode(nonce).decode(),
|
||||
"algorithm": "AES-256-GCM",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data with a client-provided key."""
|
||||
key = base64.b64decode(key_b64)
|
||||
nonce = base64.b64decode(nonce_b64)
|
||||
ciphertext = base64.b64decode(ciphertext_b64)
|
||||
|
||||
if len(key) != 32:
|
||||
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError("Decryption failed") from exc
|
||||
207
python/app/errors.py
Normal file
207
python/app/errors.py
Normal file
@@ -0,0 +1,207 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, Dict, Any
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from flask import Response, jsonify, request, flash, redirect, url_for, g
|
||||
from flask_limiter import RateLimitExceeded
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppError(Exception):
|
||||
"""Base application error with multi-format response support."""
|
||||
code: str
|
||||
message: str
|
||||
status_code: int = 500
|
||||
details: Optional[Dict[str, Any]] = field(default=None)
|
||||
|
||||
def __post_init__(self):
|
||||
super().__init__(self.message)
|
||||
|
||||
def to_xml_response(self) -> Response:
|
||||
"""Convert to S3 API XML error response."""
|
||||
error = Element("Error")
|
||||
SubElement(error, "Code").text = self.code
|
||||
SubElement(error, "Message").text = self.message
|
||||
request_id = getattr(g, 'request_id', None) if g else None
|
||||
SubElement(error, "RequestId").text = request_id or "unknown"
|
||||
xml_bytes = tostring(error, encoding="utf-8")
|
||||
return Response(xml_bytes, status=self.status_code, mimetype="application/xml")
|
||||
|
||||
def to_json_response(self) -> tuple[Response, int]:
|
||||
"""Convert to JSON error response for UI AJAX calls."""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": False,
|
||||
"error": {
|
||||
"code": self.code,
|
||||
"message": self.message
|
||||
}
|
||||
}
|
||||
if self.details:
|
||||
payload["error"]["details"] = self.details
|
||||
return jsonify(payload), self.status_code
|
||||
|
||||
def to_flash_message(self) -> str:
|
||||
"""Convert to user-friendly flash message."""
|
||||
return self.message
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketNotFoundError(AppError):
|
||||
"""Bucket does not exist."""
|
||||
code: str = "NoSuchBucket"
|
||||
message: str = "The specified bucket does not exist"
|
||||
status_code: int = 404
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketAlreadyExistsError(AppError):
|
||||
"""Bucket already exists."""
|
||||
code: str = "BucketAlreadyExists"
|
||||
message: str = "The requested bucket name is not available"
|
||||
status_code: int = 409
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketNotEmptyError(AppError):
|
||||
"""Bucket is not empty."""
|
||||
code: str = "BucketNotEmpty"
|
||||
message: str = "The bucket you tried to delete is not empty"
|
||||
status_code: int = 409
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectNotFoundError(AppError):
|
||||
"""Object does not exist."""
|
||||
code: str = "NoSuchKey"
|
||||
message: str = "The specified key does not exist"
|
||||
status_code: int = 404
|
||||
|
||||
|
||||
@dataclass
|
||||
class InvalidObjectKeyError(AppError):
|
||||
"""Invalid object key."""
|
||||
code: str = "InvalidKey"
|
||||
message: str = "The specified key is not valid"
|
||||
status_code: int = 400
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessDeniedError(AppError):
|
||||
"""Access denied."""
|
||||
code: str = "AccessDenied"
|
||||
message: str = "Access Denied"
|
||||
status_code: int = 403
|
||||
|
||||
|
||||
@dataclass
|
||||
class InvalidCredentialsError(AppError):
|
||||
"""Invalid credentials."""
|
||||
code: str = "InvalidAccessKeyId"
|
||||
message: str = "The access key ID you provided does not exist"
|
||||
status_code: int = 403
|
||||
|
||||
@dataclass
|
||||
class MalformedRequestError(AppError):
|
||||
"""Malformed request."""
|
||||
code: str = "MalformedXML"
|
||||
message: str = "The XML you provided was not well-formed"
|
||||
status_code: int = 400
|
||||
|
||||
|
||||
@dataclass
|
||||
class InvalidArgumentError(AppError):
|
||||
"""Invalid argument."""
|
||||
code: str = "InvalidArgument"
|
||||
message: str = "Invalid argument"
|
||||
status_code: int = 400
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityTooLargeError(AppError):
|
||||
"""Entity too large."""
|
||||
code: str = "EntityTooLarge"
|
||||
message: str = "Your proposed upload exceeds the maximum allowed size"
|
||||
status_code: int = 413
|
||||
|
||||
|
||||
@dataclass
|
||||
class QuotaExceededAppError(AppError):
|
||||
"""Bucket quota exceeded."""
|
||||
code: str = "QuotaExceeded"
|
||||
message: str = "The bucket quota has been exceeded"
|
||||
status_code: int = 403
|
||||
quota: Optional[Dict[str, Any]] = None
|
||||
usage: Optional[Dict[str, int]] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.quota or self.usage:
|
||||
self.details = {}
|
||||
if self.quota:
|
||||
self.details["quota"] = self.quota
|
||||
if self.usage:
|
||||
self.details["usage"] = self.usage
|
||||
super().__post_init__()
|
||||
|
||||
|
||||
def handle_app_error(error: AppError) -> Response:
|
||||
"""Handle application errors with appropriate response format."""
|
||||
log_extra = {"error_code": error.code}
|
||||
if error.details:
|
||||
log_extra["details"] = error.details
|
||||
|
||||
logger.error(f"{error.code}: {error.message}", extra=log_extra)
|
||||
|
||||
if request.path.startswith('/ui'):
|
||||
wants_json = (
|
||||
request.is_json or
|
||||
request.headers.get('X-Requested-With') == 'XMLHttpRequest' or
|
||||
'application/json' in request.accept_mimetypes.values()
|
||||
)
|
||||
if wants_json:
|
||||
return error.to_json_response()
|
||||
flash(error.to_flash_message(), 'danger')
|
||||
referrer = request.referrer
|
||||
if referrer and request.host in referrer:
|
||||
return redirect(referrer)
|
||||
return redirect(url_for('ui.buckets_overview'))
|
||||
else:
|
||||
return error.to_xml_response()
|
||||
|
||||
|
||||
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
||||
g.s3_error_code = "SlowDown"
|
||||
if request.path.startswith("/ui") or request.path.startswith("/buckets"):
|
||||
wants_json = (
|
||||
request.is_json or
|
||||
request.headers.get("X-Requested-With") == "XMLHttpRequest" or
|
||||
"application/json" in request.accept_mimetypes.values()
|
||||
)
|
||||
if wants_json:
|
||||
return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
|
||||
error = Element("Error")
|
||||
SubElement(error, "Code").text = "SlowDown"
|
||||
SubElement(error, "Message").text = "Please reduce your request rate."
|
||||
SubElement(error, "Resource").text = request.path
|
||||
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
||||
xml_bytes = tostring(error, encoding="utf-8")
|
||||
return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
|
||||
|
||||
|
||||
def register_error_handlers(app):
|
||||
"""Register error handlers with a Flask app."""
|
||||
app.register_error_handler(AppError, handle_app_error)
|
||||
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
|
||||
|
||||
for error_class in [
|
||||
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
|
||||
ObjectNotFoundError, InvalidObjectKeyError,
|
||||
AccessDeniedError, InvalidCredentialsError,
|
||||
MalformedRequestError, InvalidArgumentError, EntityTooLargeError,
|
||||
QuotaExceededAppError,
|
||||
]:
|
||||
app.register_error_handler(error_class, handle_app_error)
|
||||
16
python/app/extensions.py
Normal file
16
python/app/extensions.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from flask import g
|
||||
from flask_limiter import Limiter
|
||||
from flask_limiter.util import get_remote_address
|
||||
from flask_wtf import CSRFProtect
|
||||
|
||||
def get_rate_limit_key():
|
||||
"""Generate rate limit key based on authenticated user."""
|
||||
if hasattr(g, 'principal') and g.principal:
|
||||
return g.principal.access_key
|
||||
return get_remote_address()
|
||||
|
||||
# Shared rate limiter instance; configured in app factory.
|
||||
limiter = Limiter(key_func=get_rate_limit_key)
|
||||
|
||||
# Global CSRF protection for UI routes.
|
||||
csrf = CSRFProtect()
|
||||
596
python/app/gc.py
Normal file
596
python/app/gc.py
Normal file
@@ -0,0 +1,596 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCResult:
|
||||
temp_files_deleted: int = 0
|
||||
temp_bytes_freed: int = 0
|
||||
multipart_uploads_deleted: int = 0
|
||||
multipart_bytes_freed: int = 0
|
||||
lock_files_deleted: int = 0
|
||||
orphaned_metadata_deleted: int = 0
|
||||
orphaned_versions_deleted: int = 0
|
||||
orphaned_version_bytes_freed: int = 0
|
||||
empty_dirs_removed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"temp_files_deleted": self.temp_files_deleted,
|
||||
"temp_bytes_freed": self.temp_bytes_freed,
|
||||
"multipart_uploads_deleted": self.multipart_uploads_deleted,
|
||||
"multipart_bytes_freed": self.multipart_bytes_freed,
|
||||
"lock_files_deleted": self.lock_files_deleted,
|
||||
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
|
||||
"orphaned_versions_deleted": self.orphaned_versions_deleted,
|
||||
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
|
||||
"empty_dirs_removed": self.empty_dirs_removed,
|
||||
"errors": self.errors,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_bytes_freed(self) -> int:
|
||||
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
|
||||
|
||||
@property
|
||||
def has_work(self) -> bool:
|
||||
return (
|
||||
self.temp_files_deleted > 0
|
||||
or self.multipart_uploads_deleted > 0
|
||||
or self.lock_files_deleted > 0
|
||||
or self.orphaned_metadata_deleted > 0
|
||||
or self.orphaned_versions_deleted > 0
|
||||
or self.empty_dirs_removed > 0
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> GCExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
)
|
||||
|
||||
|
||||
class GCHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
|
||||
|
||||
def load(self) -> List[GCExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load GC history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[GCExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save GC history: %s", e)
|
||||
|
||||
def add(self, record: GCExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
total = 0
|
||||
try:
|
||||
for f in path.rglob("*"):
|
||||
if f.is_file():
|
||||
try:
|
||||
total += f.stat().st_size
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
return total
|
||||
|
||||
|
||||
def _file_age_hours(path: Path) -> float:
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
return (time.time() - mtime) / 3600.0
|
||||
except OSError:
|
||||
return 0.0
|
||||
|
||||
|
||||
class GarbageCollector:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_TMP_DIR = "tmp"
|
||||
SYSTEM_MULTIPART_DIR = "multipart"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 6.0,
|
||||
temp_file_max_age_hours: float = 24.0,
|
||||
multipart_max_age_days: int = 7,
|
||||
lock_file_max_age_hours: float = 1.0,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.temp_file_max_age_hours = temp_file_max_age_hours
|
||||
self.multipart_max_age_days = multipart_max_age_days
|
||||
self.lock_file_max_age_hours = lock_file_max_age_hours
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.temp_file_max_age_hours,
|
||||
self.multipart_max_age_days,
|
||||
self.lock_file_max_age_hours,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("GC stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("GC cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("GC is already in progress")
|
||||
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
start = self._scan_start_time
|
||||
result = GCResult()
|
||||
|
||||
original_dry_run = self.dry_run
|
||||
self.dry_run = effective_dry_run
|
||||
try:
|
||||
self._clean_temp_files(result)
|
||||
self._clean_orphaned_multipart(result)
|
||||
self._clean_stale_locks(result)
|
||||
self._clean_orphaned_metadata(result)
|
||||
self._clean_orphaned_versions(result)
|
||||
self._clean_empty_dirs(result)
|
||||
finally:
|
||||
self.dry_run = original_dry_run
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_work or result.errors:
|
||||
logger.info(
|
||||
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
|
||||
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.temp_files_deleted,
|
||||
result.temp_bytes_freed / (1024 * 1024),
|
||||
result.multipart_uploads_deleted,
|
||||
result.multipart_bytes_freed / (1024 * 1024),
|
||||
result.lock_files_deleted,
|
||||
result.orphaned_metadata_deleted,
|
||||
result.orphaned_versions_deleted,
|
||||
result.orphaned_version_bytes_freed / (1024 * 1024),
|
||||
result.empty_dirs_removed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = GCExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _clean_temp_files(self, result: GCResult) -> None:
|
||||
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
|
||||
if not tmp_dir.exists():
|
||||
return
|
||||
try:
|
||||
for entry in tmp_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not entry.is_file():
|
||||
continue
|
||||
age = _file_age_hours(entry)
|
||||
if age < self.temp_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
size = entry.stat().st_size
|
||||
if not self.dry_run:
|
||||
entry.unlink()
|
||||
result.temp_files_deleted += 1
|
||||
result.temp_bytes_freed += size
|
||||
except OSError as e:
|
||||
result.errors.append(f"temp file {entry.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan tmp dir: {e}")
|
||||
|
||||
def _clean_orphaned_multipart(self, result: GCResult) -> None:
|
||||
cutoff_hours = self.multipart_max_age_days * 24.0
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
for multipart_root in (
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
|
||||
self.storage_root / bucket_name / ".multipart",
|
||||
):
|
||||
if not multipart_root.exists():
|
||||
continue
|
||||
try:
|
||||
for upload_dir in multipart_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not upload_dir.is_dir():
|
||||
continue
|
||||
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan multipart {bucket_name}: {e}")
|
||||
|
||||
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
|
||||
manifest_path = upload_dir / "manifest.json"
|
||||
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
|
||||
|
||||
if age < cutoff_hours:
|
||||
return
|
||||
|
||||
dir_bytes = _dir_size(upload_dir)
|
||||
try:
|
||||
if not self.dry_run:
|
||||
shutil.rmtree(upload_dir, ignore_errors=True)
|
||||
result.multipart_uploads_deleted += 1
|
||||
result.multipart_bytes_freed += dir_bytes
|
||||
except OSError as e:
|
||||
result.errors.append(f"multipart {upload_dir.name}: {e}")
|
||||
|
||||
def _clean_stale_locks(self, result: GCResult) -> None:
|
||||
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
|
||||
if not buckets_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for bucket_dir in buckets_root.iterdir():
|
||||
if self._shutdown:
|
||||
return
|
||||
if not bucket_dir.is_dir():
|
||||
continue
|
||||
locks_dir = bucket_dir / "locks"
|
||||
if not locks_dir.exists():
|
||||
continue
|
||||
try:
|
||||
for lock_file in locks_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
|
||||
continue
|
||||
age = _file_age_hours(lock_file)
|
||||
if age < self.lock_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
if not self.dry_run:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
result.lock_files_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"lock {lock_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan buckets for locks: {e}")
|
||||
|
||||
def _clean_orphaned_metadata(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
legacy_meta = self.storage_root / bucket_name / ".meta"
|
||||
if legacy_meta.exists():
|
||||
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
|
||||
|
||||
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if new_meta.exists():
|
||||
self._clean_index_metadata(bucket_name, new_meta, result)
|
||||
|
||||
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for meta_file in meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
try:
|
||||
rel = meta_file.relative_to(meta_root)
|
||||
object_key = rel.as_posix().removesuffix(".meta.json")
|
||||
object_path = bucket_path / object_key
|
||||
if not object_path.exists():
|
||||
if not self.dry_run:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
result.orphaned_metadata_deleted += 1
|
||||
except (OSError, ValueError) as e:
|
||||
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
try:
|
||||
with open(index_file, "r", encoding="utf-8") as f:
|
||||
index_data = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
keys_to_remove = []
|
||||
for key in index_data:
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
if rel_dir == Path("."):
|
||||
full_key = key
|
||||
else:
|
||||
full_key = rel_dir.as_posix() + "/" + key
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
keys_to_remove.append(key)
|
||||
|
||||
if keys_to_remove:
|
||||
if not self.dry_run:
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
try:
|
||||
with open(index_file, "w", encoding="utf-8") as f:
|
||||
json.dump(index_data, f)
|
||||
except OSError as e:
|
||||
result.errors.append(f"write index {bucket_name}: {e}")
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
result.orphaned_metadata_deleted += len(keys_to_remove)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan index meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_orphaned_versions(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
for versions_root in (
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
|
||||
self.storage_root / bucket_name / ".versions",
|
||||
):
|
||||
if not versions_root.exists():
|
||||
continue
|
||||
try:
|
||||
for key_dir in versions_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan versions {bucket_name}: {e}")
|
||||
|
||||
def _clean_versions_for_key(
|
||||
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
|
||||
) -> None:
|
||||
try:
|
||||
rel = key_dir.relative_to(versions_root)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
object_path = bucket_path / rel
|
||||
if object_path.exists():
|
||||
return
|
||||
|
||||
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
|
||||
if not version_files:
|
||||
return
|
||||
|
||||
for vf in version_files:
|
||||
try:
|
||||
size = vf.stat().st_size if vf.suffix == ".bin" else 0
|
||||
if not self.dry_run:
|
||||
vf.unlink(missing_ok=True)
|
||||
if vf.suffix == ".bin":
|
||||
result.orphaned_version_bytes_freed += size
|
||||
result.orphaned_versions_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"version file {vf.name}: {e}")
|
||||
|
||||
def _clean_empty_dirs(self, result: GCResult) -> None:
|
||||
targets = [
|
||||
self._system_path() / self.SYSTEM_TMP_DIR,
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR,
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR,
|
||||
]
|
||||
for bucket_name in self._list_bucket_names():
|
||||
targets.append(self.storage_root / bucket_name / ".meta")
|
||||
targets.append(self.storage_root / bucket_name / ".versions")
|
||||
targets.append(self.storage_root / bucket_name / ".multipart")
|
||||
|
||||
for root in targets:
|
||||
if not root.exists():
|
||||
continue
|
||||
self._remove_empty_dirs_recursive(root, root, result)
|
||||
|
||||
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
|
||||
if self._shutdown:
|
||||
return False
|
||||
if not path.is_dir():
|
||||
return False
|
||||
|
||||
try:
|
||||
children = list(path.iterdir())
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
all_empty = True
|
||||
for child in children:
|
||||
if self._throttle():
|
||||
return False
|
||||
if child.is_dir():
|
||||
if not self._remove_empty_dirs_recursive(child, stop_at, result):
|
||||
all_empty = False
|
||||
else:
|
||||
all_empty = False
|
||||
|
||||
if all_empty and path != stop_at:
|
||||
try:
|
||||
if not self.dry_run:
|
||||
path.rmdir()
|
||||
result.empty_dirs_removed += 1
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
return all_empty
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"temp_file_max_age_hours": self.temp_file_max_age_hours,
|
||||
"multipart_max_age_days": self.multipart_max_age_days,
|
||||
"lock_file_max_age_hours": self.lock_file_max_age_hours,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time:
|
||||
status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
|
||||
return status
|
||||
1095
python/app/iam.py
Normal file
1095
python/app/iam.py
Normal file
File diff suppressed because it is too large
Load Diff
995
python/app/integrity.py
Normal file
995
python/app/integrity.py
Normal file
@@ -0,0 +1,995 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not hasattr(_rc, "md5_file"):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compute_etag(path: Path) -> str:
|
||||
if _HAS_RUST:
|
||||
return _rc.md5_file(str(path))
|
||||
checksum = hashlib.md5()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(8192), b""):
|
||||
checksum.update(chunk)
|
||||
return checksum.hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityIssue:
|
||||
issue_type: str
|
||||
bucket: str
|
||||
key: str
|
||||
detail: str
|
||||
healed: bool = False
|
||||
heal_action: str = ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"issue_type": self.issue_type,
|
||||
"bucket": self.bucket,
|
||||
"key": self.key,
|
||||
"detail": self.detail,
|
||||
"healed": self.healed,
|
||||
"heal_action": self.heal_action,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityResult:
|
||||
corrupted_objects: int = 0
|
||||
orphaned_objects: int = 0
|
||||
phantom_metadata: int = 0
|
||||
stale_versions: int = 0
|
||||
etag_cache_inconsistencies: int = 0
|
||||
legacy_metadata_drifts: int = 0
|
||||
issues_healed: int = 0
|
||||
issues: List[IntegrityIssue] = field(default_factory=list)
|
||||
errors: List[str] = field(default_factory=list)
|
||||
objects_scanned: int = 0
|
||||
buckets_scanned: int = 0
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"corrupted_objects": self.corrupted_objects,
|
||||
"orphaned_objects": self.orphaned_objects,
|
||||
"phantom_metadata": self.phantom_metadata,
|
||||
"stale_versions": self.stale_versions,
|
||||
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
|
||||
"legacy_metadata_drifts": self.legacy_metadata_drifts,
|
||||
"issues_healed": self.issues_healed,
|
||||
"issues": [i.to_dict() for i in self.issues],
|
||||
"errors": self.errors,
|
||||
"objects_scanned": self.objects_scanned,
|
||||
"buckets_scanned": self.buckets_scanned,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_issues(self) -> int:
|
||||
return (
|
||||
self.corrupted_objects
|
||||
+ self.orphaned_objects
|
||||
+ self.phantom_metadata
|
||||
+ self.stale_versions
|
||||
+ self.etag_cache_inconsistencies
|
||||
+ self.legacy_metadata_drifts
|
||||
)
|
||||
|
||||
@property
|
||||
def has_issues(self) -> bool:
|
||||
return self.total_issues > 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
auto_heal: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
"auto_heal": self.auto_heal,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
auto_heal=data.get("auto_heal", False),
|
||||
)
|
||||
|
||||
|
||||
class IntegrityHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
|
||||
|
||||
def load(self) -> List[IntegrityExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load integrity history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[IntegrityExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity history: %s", e)
|
||||
|
||||
def add(self, record: IntegrityExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
class IntegrityCursorStore:
|
||||
def __init__(self, storage_root: Path) -> None:
|
||||
self.storage_root = storage_root
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
|
||||
|
||||
def load(self) -> Dict[str, Any]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return {"buckets": {}}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data.get("buckets"), dict):
|
||||
return {"buckets": {}}
|
||||
return data
|
||||
except (OSError, ValueError, KeyError):
|
||||
return {"buckets": {}}
|
||||
|
||||
def save(self, data: Dict[str, Any]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity cursor: %s", e)
|
||||
|
||||
def update_bucket(
|
||||
self,
|
||||
bucket_name: str,
|
||||
timestamp: float,
|
||||
last_key: Optional[str] = None,
|
||||
completed: bool = False,
|
||||
) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
entry = data["buckets"].get(bucket_name, {})
|
||||
if completed:
|
||||
entry["last_scanned"] = timestamp
|
||||
entry.pop("last_key", None)
|
||||
entry["completed"] = True
|
||||
else:
|
||||
entry["last_scanned"] = timestamp
|
||||
if last_key is not None:
|
||||
entry["last_key"] = last_key
|
||||
entry["completed"] = False
|
||||
data["buckets"][bucket_name] = entry
|
||||
self.save(data)
|
||||
|
||||
def clean_stale(self, existing_buckets: List[str]) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
existing_set = set(existing_buckets)
|
||||
stale_keys = [k for k in data["buckets"] if k not in existing_set]
|
||||
if stale_keys:
|
||||
for k in stale_keys:
|
||||
del data["buckets"][k]
|
||||
self.save(data)
|
||||
|
||||
def get_last_key(self, bucket_name: str) -> Optional[str]:
|
||||
data = self.load()
|
||||
entry = data.get("buckets", {}).get(bucket_name)
|
||||
if entry is None:
|
||||
return None
|
||||
return entry.get("last_key")
|
||||
|
||||
def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
|
||||
data = self.load()
|
||||
buckets_info = data.get("buckets", {})
|
||||
|
||||
incomplete = []
|
||||
complete = []
|
||||
for name in bucket_names:
|
||||
entry = buckets_info.get(name)
|
||||
if entry is None:
|
||||
incomplete.append((name, 0.0))
|
||||
elif entry.get("last_key") is not None:
|
||||
incomplete.append((name, entry.get("last_scanned", 0.0)))
|
||||
else:
|
||||
complete.append((name, entry.get("last_scanned", 0.0)))
|
||||
|
||||
incomplete.sort(key=lambda x: x[1])
|
||||
complete.sort(key=lambda x: x[1])
|
||||
|
||||
return [n for n, _ in incomplete] + [n for n, _ in complete]
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
data = self.load()
|
||||
buckets = data.get("buckets", {})
|
||||
return {
|
||||
"tracked_buckets": len(buckets),
|
||||
"buckets": {
|
||||
name: {
|
||||
"last_scanned": info.get("last_scanned"),
|
||||
"last_key": info.get("last_key"),
|
||||
"completed": info.get("completed", False),
|
||||
}
|
||||
for name, info in buckets.items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
MAX_ISSUES = 500
|
||||
|
||||
|
||||
class IntegrityChecker:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 24.0,
|
||||
batch_size: int = 1000,
|
||||
auto_heal: bool = False,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.batch_size = batch_size
|
||||
self.auto_heal = auto_heal
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
|
||||
self.cursor_store = IntegrityCursorStore(self.storage_root)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.batch_size,
|
||||
self.auto_heal,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("Integrity checker stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("Integrity check cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("Integrity scan is already in progress")
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
start = self._scan_start_time
|
||||
result = IntegrityResult()
|
||||
|
||||
bucket_names = self._list_bucket_names()
|
||||
self.cursor_store.clean_stale(bucket_names)
|
||||
ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
|
||||
|
||||
for bucket_name in ordered_buckets:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.buckets_scanned += 1
|
||||
cursor_key = self.cursor_store.get_last_key(bucket_name)
|
||||
key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None]
|
||||
bucket_exhausted = self._batch_exhausted(result)
|
||||
if bucket_exhausted and returned_keys:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys))
|
||||
else:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), completed=True)
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_issues or result.errors:
|
||||
logger.info(
|
||||
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
|
||||
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.corrupted_objects,
|
||||
result.orphaned_objects,
|
||||
result.phantom_metadata,
|
||||
result.stale_versions,
|
||||
result.etag_cache_inconsistencies,
|
||||
result.legacy_metadata_drifts,
|
||||
result.issues_healed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = IntegrityExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
auto_heal=effective_auto_heal,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _batch_exhausted(self, result: IntegrityResult) -> bool:
|
||||
return self._shutdown or result.objects_scanned >= self.batch_size
|
||||
|
||||
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
|
||||
if len(result.issues) < MAX_ISSUES:
|
||||
result.issues.append(issue)
|
||||
|
||||
def _collect_index_keys(
|
||||
self, meta_root: Path, cursor_key: Optional[str] = None,
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
all_keys: Dict[str, Dict[str, Any]] = {}
|
||||
if not meta_root.exists():
|
||||
return all_keys
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix()
|
||||
if cursor_key is not None and dir_prefix:
|
||||
full_prefix = dir_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
for key_name, entry in index_data.items():
|
||||
full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
all_keys[full_key] = {
|
||||
"entry": entry,
|
||||
"index_file": index_file,
|
||||
"key_name": key_name,
|
||||
}
|
||||
except OSError:
|
||||
pass
|
||||
return all_keys
|
||||
|
||||
def _walk_bucket_files_sorted(
|
||||
self, bucket_path: Path, cursor_key: Optional[str] = None,
|
||||
):
|
||||
def _walk(dir_path: Path, prefix: str):
|
||||
try:
|
||||
entries = list(os.scandir(dir_path))
|
||||
except OSError:
|
||||
return
|
||||
|
||||
def _sort_key(e):
|
||||
if e.is_dir(follow_symlinks=False):
|
||||
return e.name + "/"
|
||||
return e.name
|
||||
|
||||
entries.sort(key=_sort_key)
|
||||
|
||||
for entry in entries:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if not prefix and entry.name in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
new_prefix = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None:
|
||||
full_prefix = new_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
yield from _walk(Path(entry.path), new_prefix)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
full_key = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
yield full_key
|
||||
|
||||
yield from _walk(bucket_path, "")
|
||||
|
||||
def _check_corrupted_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
info = all_keys[full_key]
|
||||
entry = info["entry"]
|
||||
index_file = info["index_file"]
|
||||
key_name = info["key_name"]
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
if not stored_etag:
|
||||
continue
|
||||
|
||||
try:
|
||||
actual_etag = _compute_etag(object_path)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if actual_etag != stored_etag:
|
||||
result.corrupted_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="corrupted_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
stat = object_path.stat()
|
||||
meta["__etag__"] = actual_etag
|
||||
meta["__size__"] = str(stat.st_size)
|
||||
meta["__last_modified__"] = str(stat.st_mtime)
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
index_data = {}
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "updated etag in index"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check corrupted {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_orphaned_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key):
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
has_entry = False
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
has_entry = key_name in index_data
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if not has_entry:
|
||||
result.orphaned_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="orphaned_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="file exists without metadata entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
object_path = bucket_path / full_key
|
||||
etag = _compute_etag(object_path)
|
||||
stat = object_path.stat()
|
||||
meta = {
|
||||
"__etag__": etag,
|
||||
"__size__": str(stat.st_size),
|
||||
"__last_modified__": str(stat.st_mtime),
|
||||
}
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "created metadata entry"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check orphaned {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_phantom_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
heal_by_index: Dict[Path, List[str]] = {}
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
result.phantom_metadata += 1
|
||||
info = all_keys[full_key]
|
||||
issue = IntegrityIssue(
|
||||
issue_type="phantom_metadata",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="metadata entry without file on disk",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
index_file = info["index_file"]
|
||||
heal_by_index.setdefault(index_file, []).append(info["key_name"])
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed stale index entry"
|
||||
result.issues_healed += 1
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if heal_by_index and auto_heal and not dry_run:
|
||||
for index_file, keys_to_remove in heal_by_index.items():
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
else:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal phantom {bucket_name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"check phantom {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_stale_versions(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
|
||||
|
||||
if not versions_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for key_dir in versions_root.rglob("*"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
|
||||
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
|
||||
json_files = {f.stem: f for f in key_dir.glob("*.json")}
|
||||
|
||||
for stem, bin_file in bin_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in json_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
|
||||
detail="version data without manifest",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
bin_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version data"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {bin_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
|
||||
for stem, json_file in json_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in bin_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
|
||||
detail="version manifest without data",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
json_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version manifest"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {json_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check stale versions {bucket_name}: {e}")
|
||||
|
||||
def _check_etag_cache(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
|
||||
|
||||
if not etag_index_path.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if not meta_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return
|
||||
|
||||
found_mismatch = False
|
||||
|
||||
for full_key, cached_etag in etag_cache.items():
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.objects_scanned += 1
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
if not index_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
entry = index_data.get(key_name)
|
||||
if not entry:
|
||||
continue
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
|
||||
if stored_etag and cached_etag != stored_etag:
|
||||
result.etag_cache_inconsistencies += 1
|
||||
found_mismatch = True
|
||||
issue = IntegrityIssue(
|
||||
issue_type="etag_cache_inconsistency",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
|
||||
)
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if found_mismatch and auto_heal and not dry_run:
|
||||
try:
|
||||
etag_index_path.unlink(missing_ok=True)
|
||||
for issue in result.issues:
|
||||
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted etag_index.json"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal etag cache {bucket_name}: {e}")
|
||||
|
||||
def _check_legacy_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
legacy_meta_root = self.storage_root / bucket_name / ".meta"
|
||||
if not legacy_meta_root.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
try:
|
||||
for meta_file in legacy_meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
try:
|
||||
rel = meta_file.relative_to(legacy_meta_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
full_key = rel.as_posix().removesuffix(".meta.json")
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
try:
|
||||
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
index_entry = None
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
index_entry = index_data.get(key_name)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if index_entry is None:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="unmigrated legacy .meta.json",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": legacy_data}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "migrated to index and deleted legacy file"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
else:
|
||||
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
|
||||
if legacy_data != index_meta:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="legacy .meta.json differs from index entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted legacy file (index is authoritative)"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check legacy meta {bucket_name}: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = index_path.with_suffix(".tmp")
|
||||
try:
|
||||
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f)
|
||||
os.replace(str(tmp_path), str(index_path))
|
||||
except BaseException:
|
||||
try:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"batch_size": self.batch_size,
|
||||
"auto_heal": self.auto_heal,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time is not None:
|
||||
status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
|
||||
status["cursor"] = self.cursor_store.get_info()
|
||||
return status
|
||||
422
python/app/kms.py
Normal file
422
python/app/kms.py
Normal file
@@ -0,0 +1,422 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
|
||||
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
|
||||
|
||||
if sys.platform != "win32":
|
||||
import fcntl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
||||
"""Set restrictive file permissions (owner read/write only)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
username = os.environ.get("USERNAME", "")
|
||||
if username:
|
||||
subprocess.run(
|
||||
["icacls", str(file_path), "/inheritance:r",
|
||||
"/grant:r", f"{username}:F"],
|
||||
check=True, capture_output=True
|
||||
)
|
||||
else:
|
||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
||||
else:
|
||||
os.chmod(file_path, 0o600)
|
||||
|
||||
|
||||
@dataclass
|
||||
class KMSKey:
|
||||
"""Represents a KMS encryption key."""
|
||||
key_id: str
|
||||
description: str
|
||||
created_at: str
|
||||
enabled: bool = True
|
||||
key_material: bytes = field(default_factory=lambda: b"", repr=False)
|
||||
|
||||
@property
|
||||
def arn(self) -> str:
|
||||
return f"arn:aws:kms:local:000000000000:key/{self.key_id}"
|
||||
|
||||
def to_dict(self, include_key: bool = False) -> Dict[str, Any]:
|
||||
data = {
|
||||
"KeyId": self.key_id,
|
||||
"Arn": self.arn,
|
||||
"Description": self.description,
|
||||
"CreationDate": self.created_at,
|
||||
"Enabled": self.enabled,
|
||||
"KeyState": "Enabled" if self.enabled else "Disabled",
|
||||
"KeyUsage": "ENCRYPT_DECRYPT",
|
||||
"KeySpec": "SYMMETRIC_DEFAULT",
|
||||
}
|
||||
if include_key:
|
||||
data["KeyMaterial"] = base64.b64encode(self.key_material).decode()
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "KMSKey":
|
||||
key_material = b""
|
||||
if "KeyMaterial" in data:
|
||||
key_material = base64.b64decode(data["KeyMaterial"])
|
||||
return cls(
|
||||
key_id=data["KeyId"],
|
||||
description=data.get("Description", ""),
|
||||
created_at=data.get("CreationDate", datetime.now(timezone.utc).isoformat()),
|
||||
enabled=data.get("Enabled", True),
|
||||
key_material=key_material,
|
||||
)
|
||||
|
||||
|
||||
class KMSEncryptionProvider(EncryptionProvider):
|
||||
"""Encryption provider using a specific KMS key."""
|
||||
|
||||
def __init__(self, kms: "KMSManager", key_id: str):
|
||||
self.kms = kms
|
||||
self.key_id = key_id
|
||||
|
||||
@property
|
||||
def KEY_ID(self) -> str:
|
||||
return self.key_id
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
"""Generate a data key encrypted with the KMS key."""
|
||||
return self.kms.generate_data_key(self.key_id)
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
"""Encrypt data using envelope encryption with KMS."""
|
||||
data_key, encrypted_data_key = self.generate_data_key()
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext,
|
||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
nonce=nonce,
|
||||
key_id=self.key_id,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data using envelope encryption with KMS."""
|
||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
||||
if len(data_key) != 32:
|
||||
raise EncryptionError("Invalid data key size")
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext,
|
||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
||||
except Exception as exc:
|
||||
logger.debug("KMS decryption failed: %s", exc)
|
||||
raise EncryptionError("Failed to decrypt data") from exc
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key using KMS."""
|
||||
if key_id is None:
|
||||
key_id = self.key_id
|
||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
||||
if len(data_key) != 32:
|
||||
raise EncryptionError("Invalid data key size")
|
||||
return data_key
|
||||
|
||||
|
||||
class KMSManager:
|
||||
"""Manages KMS keys and operations.
|
||||
|
||||
This is a local implementation that mimics AWS KMS functionality.
|
||||
Keys are stored encrypted on disk.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
keys_path: Path,
|
||||
master_key_path: Path,
|
||||
generate_data_key_min_bytes: int = 1,
|
||||
generate_data_key_max_bytes: int = 1024,
|
||||
):
|
||||
self.keys_path = keys_path
|
||||
self.master_key_path = master_key_path
|
||||
self.generate_data_key_min_bytes = generate_data_key_min_bytes
|
||||
self.generate_data_key_max_bytes = generate_data_key_max_bytes
|
||||
self._keys: Dict[str, KMSKey] = {}
|
||||
self._master_key: bytes | None = None
|
||||
self._master_aesgcm: AESGCM | None = None
|
||||
self._loaded = False
|
||||
|
||||
@property
|
||||
def master_key(self) -> bytes:
|
||||
"""Load or create the master key for encrypting KMS keys (with file locking)."""
|
||||
if self._master_key is None:
|
||||
lock_path = self.master_key_path.with_suffix(".lock")
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(lock_path, "w") as lock_file:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
if self.master_key_path.exists():
|
||||
self._master_key = base64.b64decode(
|
||||
self.master_key_path.read_text().strip()
|
||||
)
|
||||
else:
|
||||
self._master_key = secrets.token_bytes(32)
|
||||
self.master_key_path.write_text(
|
||||
base64.b64encode(self._master_key).decode()
|
||||
)
|
||||
_set_secure_file_permissions(self.master_key_path)
|
||||
finally:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
self._master_aesgcm = AESGCM(self._master_key)
|
||||
return self._master_key
|
||||
|
||||
def _load_keys(self) -> None:
|
||||
"""Load keys from disk."""
|
||||
if self._loaded:
|
||||
return
|
||||
|
||||
if self.keys_path.exists():
|
||||
try:
|
||||
data = json.loads(self.keys_path.read_text(encoding="utf-8"))
|
||||
for key_data in data.get("keys", []):
|
||||
key = KMSKey.from_dict(key_data)
|
||||
if key_data.get("EncryptedKeyMaterial"):
|
||||
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
|
||||
key.key_material = self._decrypt_key_material(encrypted)
|
||||
self._keys[key.key_id] = key
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.error("Failed to parse KMS keys file: %s", exc)
|
||||
except (ValueError, KeyError) as exc:
|
||||
logger.error("Invalid KMS key data: %s", exc)
|
||||
|
||||
self._loaded = True
|
||||
|
||||
def _save_keys(self) -> None:
|
||||
"""Save keys to disk (with encrypted key material)."""
|
||||
keys_data = []
|
||||
for key in self._keys.values():
|
||||
data = key.to_dict(include_key=False)
|
||||
encrypted = self._encrypt_key_material(key.key_material)
|
||||
data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
|
||||
keys_data.append(data)
|
||||
|
||||
self.keys_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.keys_path.write_text(
|
||||
json.dumps({"keys": keys_data}, indent=2),
|
||||
encoding="utf-8"
|
||||
)
|
||||
_set_secure_file_permissions(self.keys_path)
|
||||
|
||||
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
||||
_ = self.master_key
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
|
||||
return nonce + ciphertext
|
||||
|
||||
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
||||
_ = self.master_key
|
||||
nonce = encrypted[:12]
|
||||
ciphertext = encrypted[12:]
|
||||
return self._master_aesgcm.decrypt(nonce, ciphertext, None)
|
||||
|
||||
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
||||
"""Create a new KMS key."""
|
||||
self._load_keys()
|
||||
|
||||
if key_id is None:
|
||||
key_id = str(uuid.uuid4())
|
||||
|
||||
if key_id in self._keys:
|
||||
raise EncryptionError(f"Key already exists: {key_id}")
|
||||
|
||||
key = KMSKey(
|
||||
key_id=key_id,
|
||||
description=description,
|
||||
created_at=datetime.now(timezone.utc).isoformat(),
|
||||
enabled=True,
|
||||
key_material=secrets.token_bytes(32),
|
||||
)
|
||||
|
||||
self._keys[key_id] = key
|
||||
self._save_keys()
|
||||
return key
|
||||
|
||||
def get_key(self, key_id: str) -> KMSKey | None:
|
||||
"""Get a key by ID."""
|
||||
self._load_keys()
|
||||
return self._keys.get(key_id)
|
||||
|
||||
def list_keys(self) -> List[KMSKey]:
|
||||
"""List all keys."""
|
||||
self._load_keys()
|
||||
return list(self._keys.values())
|
||||
|
||||
def get_default_key_id(self) -> str:
|
||||
"""Get the default KMS key ID, creating one if none exist."""
|
||||
self._load_keys()
|
||||
for key in self._keys.values():
|
||||
if key.enabled:
|
||||
return key.key_id
|
||||
default_key = self.create_key(description="Default KMS Key")
|
||||
return default_key.key_id
|
||||
|
||||
def get_provider(self, key_id: str | None = None) -> "KMSEncryptionProvider":
|
||||
"""Get a KMS encryption provider for the specified key."""
|
||||
if key_id is None:
|
||||
key_id = self.get_default_key_id()
|
||||
key = self.get_key(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
return KMSEncryptionProvider(self, key_id)
|
||||
|
||||
def enable_key(self, key_id: str) -> None:
|
||||
"""Enable a key."""
|
||||
self._load_keys()
|
||||
key = self._keys.get(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
key.enabled = True
|
||||
self._save_keys()
|
||||
|
||||
def disable_key(self, key_id: str) -> None:
|
||||
"""Disable a key."""
|
||||
self._load_keys()
|
||||
key = self._keys.get(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
key.enabled = False
|
||||
self._save_keys()
|
||||
|
||||
def delete_key(self, key_id: str) -> None:
|
||||
"""Delete a key (schedule for deletion in real KMS)."""
|
||||
self._load_keys()
|
||||
if key_id not in self._keys:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
del self._keys[key_id]
|
||||
self._save_keys()
|
||||
|
||||
def encrypt(self, key_id: str, plaintext: bytes,
|
||||
context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Encrypt data directly with a KMS key."""
|
||||
self._load_keys()
|
||||
key = self._keys.get(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
|
||||
aesgcm = AESGCM(key.key_material)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
key_id_bytes = key_id.encode("utf-8")
|
||||
return len(key_id_bytes).to_bytes(2, "big") + key_id_bytes + nonce + ciphertext
|
||||
|
||||
def decrypt(self, ciphertext: bytes,
|
||||
context: Dict[str, str] | None = None) -> tuple[bytes, str]:
|
||||
"""Decrypt data directly with a KMS key.
|
||||
|
||||
Returns:
|
||||
Tuple of (plaintext, key_id)
|
||||
"""
|
||||
self._load_keys()
|
||||
|
||||
key_id_len = int.from_bytes(ciphertext[:2], "big")
|
||||
key_id = ciphertext[2:2 + key_id_len].decode("utf-8")
|
||||
rest = ciphertext[2 + key_id_len:]
|
||||
|
||||
key = self._keys.get(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
|
||||
nonce = rest[:12]
|
||||
encrypted = rest[12:]
|
||||
|
||||
aesgcm = AESGCM(key.key_material)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
plaintext = aesgcm.decrypt(nonce, encrypted, aad)
|
||||
return plaintext, key_id
|
||||
except Exception as exc:
|
||||
logger.debug("KMS decrypt operation failed: %s", exc)
|
||||
raise EncryptionError("Decryption failed") from exc
|
||||
|
||||
def generate_data_key(self, key_id: str,
|
||||
context: Dict[str, str] | None = None,
|
||||
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and return both plaintext and encrypted versions.
|
||||
|
||||
Args:
|
||||
key_id: The KMS key ID to use for encryption
|
||||
context: Optional encryption context
|
||||
key_spec: Key specification - AES_128 or AES_256 (default)
|
||||
|
||||
Returns:
|
||||
Tuple of (plaintext_key, encrypted_key)
|
||||
"""
|
||||
self._load_keys()
|
||||
key = self._keys.get(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
|
||||
key_bytes = 32 if key_spec == "AES_256" else 16
|
||||
plaintext_key = secrets.token_bytes(key_bytes)
|
||||
|
||||
encrypted_key = self.encrypt(key_id, plaintext_key, context)
|
||||
|
||||
return plaintext_key, encrypted_key
|
||||
|
||||
def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
|
||||
context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt a data key."""
|
||||
plaintext, _ = self.decrypt(encrypted_key, context)
|
||||
return plaintext
|
||||
|
||||
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
||||
source_context: Dict[str, str] | None = None,
|
||||
destination_context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Re-encrypt data with a different key."""
|
||||
|
||||
plaintext, source_key_id = self.decrypt(ciphertext, source_context)
|
||||
|
||||
return self.encrypt(destination_key_id, plaintext, destination_context)
|
||||
|
||||
def generate_random(self, num_bytes: int = 32) -> bytes:
|
||||
"""Generate cryptographically secure random bytes."""
|
||||
if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
|
||||
raise EncryptionError(
|
||||
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
|
||||
)
|
||||
return secrets.token_bytes(num_bytes)
|
||||
444
python/app/kms_api.py
Normal file
444
python/app/kms_api.py
Normal file
@@ -0,0 +1,444 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import uuid
|
||||
from typing import Any, Dict
|
||||
|
||||
from flask import Blueprint, Response, current_app, jsonify, request
|
||||
|
||||
from .encryption import ClientEncryptionHelper, EncryptionError
|
||||
from .extensions import limiter
|
||||
from .iam import IamError
|
||||
|
||||
kms_api_bp = Blueprint("kms_api", __name__, url_prefix="/kms")
|
||||
|
||||
|
||||
def _require_principal():
|
||||
"""Require authentication for KMS operations."""
|
||||
from .s3_api import _require_principal as s3_require_principal
|
||||
return s3_require_principal()
|
||||
|
||||
|
||||
def _kms():
|
||||
"""Get KMS manager from app extensions."""
|
||||
return current_app.extensions.get("kms")
|
||||
|
||||
|
||||
def _encryption():
|
||||
"""Get encryption manager from app extensions."""
|
||||
return current_app.extensions.get("encryption")
|
||||
|
||||
|
||||
def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]:
|
||||
return {"__type": code, "message": message}, status
|
||||
|
||||
@kms_api_bp.route("/keys", methods=["GET", "POST"])
|
||||
@limiter.limit("30 per minute")
|
||||
def list_or_create_keys():
|
||||
"""List all KMS keys or create a new key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
if request.method == "POST":
|
||||
payload = request.get_json(silent=True) or {}
|
||||
key_id = payload.get("KeyId") or payload.get("key_id")
|
||||
description = payload.get("Description") or payload.get("description", "")
|
||||
|
||||
try:
|
||||
key = kms.create_key(description=description, key_id=key_id)
|
||||
current_app.logger.info(
|
||||
"KMS key created",
|
||||
extra={"key_id": key.key_id, "principal": principal.access_key},
|
||||
)
|
||||
return jsonify({
|
||||
"KeyMetadata": key.to_dict(),
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
|
||||
keys = kms.list_keys()
|
||||
return jsonify({
|
||||
"Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys],
|
||||
"Truncated": False,
|
||||
})
|
||||
|
||||
|
||||
@kms_api_bp.route("/keys/<key_id>", methods=["GET", "DELETE"])
|
||||
@limiter.limit("30 per minute")
|
||||
def get_or_delete_key(key_id: str):
|
||||
"""Get or delete a specific KMS key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
kms.delete_key(key_id)
|
||||
current_app.logger.info(
|
||||
"KMS key deleted",
|
||||
extra={"key_id": key_id, "principal": principal.access_key},
|
||||
)
|
||||
return Response(status=204)
|
||||
except EncryptionError as exc:
|
||||
return _error_response("NotFoundException", str(exc), 404)
|
||||
|
||||
key = kms.get_key(key_id)
|
||||
if not key:
|
||||
return _error_response("NotFoundException", f"Key not found: {key_id}", 404)
|
||||
|
||||
return jsonify({"KeyMetadata": key.to_dict()})
|
||||
|
||||
|
||||
@kms_api_bp.route("/keys/<key_id>/enable", methods=["POST"])
|
||||
@limiter.limit("30 per minute")
|
||||
def enable_key(key_id: str):
|
||||
"""Enable a KMS key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
try:
|
||||
kms.enable_key(key_id)
|
||||
current_app.logger.info(
|
||||
"KMS key enabled",
|
||||
extra={"key_id": key_id, "principal": principal.access_key},
|
||||
)
|
||||
return Response(status=200)
|
||||
except EncryptionError as exc:
|
||||
return _error_response("NotFoundException", str(exc), 404)
|
||||
|
||||
|
||||
@kms_api_bp.route("/keys/<key_id>/disable", methods=["POST"])
|
||||
@limiter.limit("30 per minute")
|
||||
def disable_key(key_id: str):
|
||||
"""Disable a KMS key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
try:
|
||||
kms.disable_key(key_id)
|
||||
current_app.logger.info(
|
||||
"KMS key disabled",
|
||||
extra={"key_id": key_id, "principal": principal.access_key},
|
||||
)
|
||||
return Response(status=200)
|
||||
except EncryptionError as exc:
|
||||
return _error_response("NotFoundException", str(exc), 404)
|
||||
|
||||
@kms_api_bp.route("/encrypt", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def encrypt_data():
|
||||
"""Encrypt data using a KMS key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
key_id = payload.get("KeyId")
|
||||
plaintext_b64 = payload.get("Plaintext")
|
||||
context = payload.get("EncryptionContext")
|
||||
|
||||
if not key_id:
|
||||
return _error_response("ValidationException", "KeyId is required", 400)
|
||||
if not plaintext_b64:
|
||||
return _error_response("ValidationException", "Plaintext is required", 400)
|
||||
|
||||
try:
|
||||
plaintext = base64.b64decode(plaintext_b64)
|
||||
except Exception:
|
||||
return _error_response("ValidationException", "Plaintext must be base64 encoded", 400)
|
||||
|
||||
try:
|
||||
ciphertext = kms.encrypt(key_id, plaintext, context)
|
||||
return jsonify({
|
||||
"CiphertextBlob": base64.b64encode(ciphertext).decode(),
|
||||
"KeyId": key_id,
|
||||
"EncryptionAlgorithm": "SYMMETRIC_DEFAULT",
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/decrypt", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def decrypt_data():
|
||||
"""Decrypt data using a KMS key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
ciphertext_b64 = payload.get("CiphertextBlob")
|
||||
context = payload.get("EncryptionContext")
|
||||
|
||||
if not ciphertext_b64:
|
||||
return _error_response("ValidationException", "CiphertextBlob is required", 400)
|
||||
|
||||
try:
|
||||
ciphertext = base64.b64decode(ciphertext_b64)
|
||||
except Exception:
|
||||
return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
|
||||
|
||||
try:
|
||||
plaintext, key_id = kms.decrypt(ciphertext, context)
|
||||
return jsonify({
|
||||
"Plaintext": base64.b64encode(plaintext).decode(),
|
||||
"KeyId": key_id,
|
||||
"EncryptionAlgorithm": "SYMMETRIC_DEFAULT",
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("InvalidCiphertextException", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/generate-data-key", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def generate_data_key():
|
||||
"""Generate a data encryption key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
key_id = payload.get("KeyId")
|
||||
context = payload.get("EncryptionContext")
|
||||
key_spec = payload.get("KeySpec", "AES_256")
|
||||
|
||||
if not key_id:
|
||||
return _error_response("ValidationException", "KeyId is required", 400)
|
||||
|
||||
if key_spec not in {"AES_256", "AES_128"}:
|
||||
return _error_response("ValidationException", "KeySpec must be AES_256 or AES_128", 400)
|
||||
|
||||
try:
|
||||
plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
|
||||
|
||||
if key_spec == "AES_128":
|
||||
plaintext_key = plaintext_key[:16]
|
||||
|
||||
return jsonify({
|
||||
"Plaintext": base64.b64encode(plaintext_key).decode(),
|
||||
"CiphertextBlob": base64.b64encode(encrypted_key).decode(),
|
||||
"KeyId": key_id,
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/generate-data-key-without-plaintext", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def generate_data_key_without_plaintext():
|
||||
"""Generate a data encryption key without returning the plaintext."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
key_id = payload.get("KeyId")
|
||||
context = payload.get("EncryptionContext")
|
||||
|
||||
if not key_id:
|
||||
return _error_response("ValidationException", "KeyId is required", 400)
|
||||
|
||||
try:
|
||||
_, encrypted_key = kms.generate_data_key(key_id, context)
|
||||
return jsonify({
|
||||
"CiphertextBlob": base64.b64encode(encrypted_key).decode(),
|
||||
"KeyId": key_id,
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/re-encrypt", methods=["POST"])
|
||||
@limiter.limit("30 per minute")
|
||||
def re_encrypt():
|
||||
"""Re-encrypt data with a different key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
ciphertext_b64 = payload.get("CiphertextBlob")
|
||||
destination_key_id = payload.get("DestinationKeyId")
|
||||
source_context = payload.get("SourceEncryptionContext")
|
||||
destination_context = payload.get("DestinationEncryptionContext")
|
||||
|
||||
if not ciphertext_b64:
|
||||
return _error_response("ValidationException", "CiphertextBlob is required", 400)
|
||||
if not destination_key_id:
|
||||
return _error_response("ValidationException", "DestinationKeyId is required", 400)
|
||||
|
||||
try:
|
||||
ciphertext = base64.b64decode(ciphertext_b64)
|
||||
except Exception:
|
||||
return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
|
||||
|
||||
try:
|
||||
plaintext, source_key_id = kms.decrypt(ciphertext, source_context)
|
||||
new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context)
|
||||
|
||||
return jsonify({
|
||||
"CiphertextBlob": base64.b64encode(new_ciphertext).decode(),
|
||||
"SourceKeyId": source_key_id,
|
||||
"KeyId": destination_key_id,
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/generate-random", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def generate_random():
|
||||
"""Generate random bytes."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
num_bytes = payload.get("NumberOfBytes", 32)
|
||||
|
||||
try:
|
||||
num_bytes = int(num_bytes)
|
||||
except (TypeError, ValueError):
|
||||
return _error_response("ValidationException", "NumberOfBytes must be an integer", 400)
|
||||
|
||||
try:
|
||||
random_bytes = kms.generate_random(num_bytes)
|
||||
return jsonify({
|
||||
"Plaintext": base64.b64encode(random_bytes).decode(),
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("ValidationException", str(exc), 400)
|
||||
|
||||
@kms_api_bp.route("/client/generate-key", methods=["POST"])
|
||||
@limiter.limit("30 per minute")
|
||||
def generate_client_key():
|
||||
"""Generate a client-side encryption key."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
key_info = ClientEncryptionHelper.generate_client_key()
|
||||
return jsonify(key_info)
|
||||
|
||||
|
||||
@kms_api_bp.route("/client/encrypt", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def client_encrypt():
|
||||
"""Encrypt data using client-side encryption."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
plaintext_b64 = payload.get("Plaintext")
|
||||
key_b64 = payload.get("Key")
|
||||
|
||||
if not plaintext_b64 or not key_b64:
|
||||
return _error_response("ValidationException", "Plaintext and Key are required", 400)
|
||||
|
||||
try:
|
||||
plaintext = base64.b64decode(plaintext_b64)
|
||||
result = ClientEncryptionHelper.encrypt_with_key(plaintext, key_b64)
|
||||
return jsonify(result)
|
||||
except Exception as exc:
|
||||
return _error_response("EncryptionError", str(exc), 400)
|
||||
|
||||
|
||||
@kms_api_bp.route("/client/decrypt", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def client_decrypt():
|
||||
"""Decrypt data using client-side encryption."""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
ciphertext_b64 = payload.get("Ciphertext") or payload.get("ciphertext")
|
||||
nonce_b64 = payload.get("Nonce") or payload.get("nonce")
|
||||
key_b64 = payload.get("Key") or payload.get("key")
|
||||
|
||||
if not ciphertext_b64 or not nonce_b64 or not key_b64:
|
||||
return _error_response("ValidationException", "Ciphertext, Nonce, and Key are required", 400)
|
||||
|
||||
try:
|
||||
plaintext = ClientEncryptionHelper.decrypt_with_key(ciphertext_b64, nonce_b64, key_b64)
|
||||
return jsonify({
|
||||
"Plaintext": base64.b64encode(plaintext).decode(),
|
||||
})
|
||||
except Exception as exc:
|
||||
return _error_response("DecryptionError", str(exc), 400)
|
||||
|
||||
@kms_api_bp.route("/materials/<key_id>", methods=["POST"])
|
||||
@limiter.limit("60 per minute")
|
||||
def get_encryption_materials(key_id: str):
|
||||
"""Get encryption materials for client-side S3 encryption.
|
||||
|
||||
This is used by S3 encryption clients that want to use KMS for
|
||||
key management but perform encryption client-side.
|
||||
"""
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
kms = _kms()
|
||||
if not kms:
|
||||
return _error_response("KMSNotEnabled", "KMS is not configured", 400)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
context = payload.get("EncryptionContext")
|
||||
|
||||
try:
|
||||
plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
|
||||
|
||||
return jsonify({
|
||||
"PlaintextKey": base64.b64encode(plaintext_key).decode(),
|
||||
"EncryptedKey": base64.b64encode(encrypted_key).decode(),
|
||||
"KeyId": key_id,
|
||||
"Algorithm": "AES-256-GCM",
|
||||
"KeyWrapAlgorithm": "kms",
|
||||
})
|
||||
except EncryptionError as exc:
|
||||
return _error_response("KMSInternalException", str(exc), 400)
|
||||
340
python/app/lifecycle.py
Normal file
340
python/app/lifecycle.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .storage import ObjectStorage, StorageError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LifecycleResult:
|
||||
bucket_name: str
|
||||
objects_deleted: int = 0
|
||||
versions_deleted: int = 0
|
||||
uploads_aborted: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LifecycleExecutionRecord:
|
||||
timestamp: float
|
||||
bucket_name: str
|
||||
objects_deleted: int
|
||||
versions_deleted: int
|
||||
uploads_aborted: int
|
||||
errors: List[str]
|
||||
execution_time_seconds: float
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"bucket_name": self.bucket_name,
|
||||
"objects_deleted": self.objects_deleted,
|
||||
"versions_deleted": self.versions_deleted,
|
||||
"uploads_aborted": self.uploads_aborted,
|
||||
"errors": self.errors,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
bucket_name=data["bucket_name"],
|
||||
objects_deleted=data["objects_deleted"],
|
||||
versions_deleted=data["versions_deleted"],
|
||||
uploads_aborted=data["uploads_aborted"],
|
||||
errors=data.get("errors", []),
|
||||
execution_time_seconds=data["execution_time_seconds"],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
|
||||
return cls(
|
||||
timestamp=time.time(),
|
||||
bucket_name=result.bucket_name,
|
||||
objects_deleted=result.objects_deleted,
|
||||
versions_deleted=result.versions_deleted,
|
||||
uploads_aborted=result.uploads_aborted,
|
||||
errors=result.errors.copy(),
|
||||
execution_time_seconds=result.execution_time_seconds,
|
||||
)
|
||||
|
||||
|
||||
class LifecycleHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_history_per_bucket = max_history_per_bucket
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_history_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
|
||||
|
||||
def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
|
||||
path = self._get_history_path(bucket_name)
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
data = json.load(f)
|
||||
return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
|
||||
path = self._get_history_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
|
||||
try:
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
|
||||
|
||||
def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load_history(bucket_name)
|
||||
records.insert(0, record)
|
||||
self.save_history(bucket_name, records)
|
||||
|
||||
def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
||||
records = self.load_history(bucket_name)
|
||||
return records[offset:offset + limit]
|
||||
|
||||
|
||||
class LifecycleManager:
|
||||
def __init__(
|
||||
self,
|
||||
storage: ObjectStorage,
|
||||
interval_seconds: int = 3600,
|
||||
storage_root: Optional[Path] = None,
|
||||
max_history_per_bucket: int = 50,
|
||||
):
|
||||
self.storage = storage
|
||||
self.interval_seconds = interval_seconds
|
||||
self.storage_root = storage_root
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s")
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("Lifecycle manager stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_enforcement)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_enforcement(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.enforce_all_buckets()
|
||||
except Exception as e:
|
||||
logger.error(f"Lifecycle enforcement failed: {e}")
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def enforce_all_buckets(self) -> Dict[str, LifecycleResult]:
|
||||
results = {}
|
||||
try:
|
||||
buckets = self.storage.list_buckets()
|
||||
for bucket in buckets:
|
||||
result = self.enforce_rules(bucket.name)
|
||||
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
|
||||
results[bucket.name] = result
|
||||
except StorageError as e:
|
||||
logger.error(f"Failed to list buckets for lifecycle: {e}")
|
||||
return results
|
||||
|
||||
def enforce_rules(self, bucket_name: str) -> LifecycleResult:
|
||||
start_time = time.time()
|
||||
result = LifecycleResult(bucket_name=bucket_name)
|
||||
|
||||
try:
|
||||
lifecycle = self.storage.get_bucket_lifecycle(bucket_name)
|
||||
if not lifecycle:
|
||||
return result
|
||||
|
||||
for rule in lifecycle:
|
||||
if rule.get("Status") != "Enabled":
|
||||
continue
|
||||
rule_id = rule.get("ID", "unknown")
|
||||
prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", ""))
|
||||
|
||||
self._enforce_expiration(bucket_name, rule, prefix, result)
|
||||
self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result)
|
||||
self._enforce_abort_multipart(bucket_name, rule, result)
|
||||
|
||||
except StorageError as e:
|
||||
result.errors.append(str(e))
|
||||
logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
|
||||
|
||||
result.execution_time_seconds = time.time() - start_time
|
||||
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
|
||||
logger.info(
|
||||
f"Lifecycle enforcement for {bucket_name}: "
|
||||
f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
|
||||
f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
|
||||
)
|
||||
if self.history_store:
|
||||
record = LifecycleExecutionRecord.from_result(result)
|
||||
self.history_store.add_record(bucket_name, record)
|
||||
return result
|
||||
|
||||
def _enforce_expiration(
|
||||
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
||||
) -> None:
|
||||
expiration = rule.get("Expiration", {})
|
||||
if not expiration:
|
||||
return
|
||||
|
||||
days = expiration.get("Days")
|
||||
date_str = expiration.get("Date")
|
||||
|
||||
if days:
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
elif date_str:
|
||||
try:
|
||||
cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return
|
||||
else:
|
||||
return
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
for obj in objects:
|
||||
if prefix and not obj.key.startswith(prefix):
|
||||
continue
|
||||
if obj.last_modified < cutoff:
|
||||
try:
|
||||
self.storage.delete_object(bucket_name, obj.key)
|
||||
result.objects_deleted += 1
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to delete {obj.key}: {e}")
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list objects: {e}")
|
||||
|
||||
def _enforce_noncurrent_expiration(
|
||||
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
||||
) -> None:
|
||||
noncurrent = rule.get("NoncurrentVersionExpiration", {})
|
||||
noncurrent_days = noncurrent.get("NoncurrentDays")
|
||||
if not noncurrent_days:
|
||||
return
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days)
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
for obj in objects:
|
||||
if prefix and not obj.key.startswith(prefix):
|
||||
continue
|
||||
try:
|
||||
versions = self.storage.list_object_versions(bucket_name, obj.key)
|
||||
for version in versions:
|
||||
archived_at_str = version.get("archived_at", "")
|
||||
if not archived_at_str:
|
||||
continue
|
||||
try:
|
||||
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
||||
if archived_at < cutoff:
|
||||
version_id = version.get("version_id")
|
||||
if version_id:
|
||||
self.storage.delete_object_version(bucket_name, obj.key, version_id)
|
||||
result.versions_deleted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to process version: {e}")
|
||||
except StorageError:
|
||||
pass
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list objects: {e}")
|
||||
|
||||
try:
|
||||
orphaned = self.storage.list_orphaned_objects(bucket_name)
|
||||
for item in orphaned:
|
||||
obj_key = item.get("key", "")
|
||||
if prefix and not obj_key.startswith(prefix):
|
||||
continue
|
||||
try:
|
||||
versions = self.storage.list_object_versions(bucket_name, obj_key)
|
||||
for version in versions:
|
||||
archived_at_str = version.get("archived_at", "")
|
||||
if not archived_at_str:
|
||||
continue
|
||||
try:
|
||||
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
||||
if archived_at < cutoff:
|
||||
version_id = version.get("version_id")
|
||||
if version_id:
|
||||
self.storage.delete_object_version(bucket_name, obj_key, version_id)
|
||||
result.versions_deleted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to process orphaned version: {e}")
|
||||
except StorageError:
|
||||
pass
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list orphaned objects: {e}")
|
||||
|
||||
def _enforce_abort_multipart(
|
||||
self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult
|
||||
) -> None:
|
||||
abort_config = rule.get("AbortIncompleteMultipartUpload", {})
|
||||
days_after = abort_config.get("DaysAfterInitiation")
|
||||
if not days_after:
|
||||
return
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days_after)
|
||||
|
||||
try:
|
||||
uploads = self.storage.list_multipart_uploads(bucket_name)
|
||||
for upload in uploads:
|
||||
created_at_str = upload.get("created_at", "")
|
||||
if not created_at_str:
|
||||
continue
|
||||
try:
|
||||
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
||||
if created_at < cutoff:
|
||||
upload_id = upload.get("upload_id")
|
||||
if upload_id:
|
||||
self.storage.abort_multipart_upload(bucket_name, upload_id)
|
||||
result.uploads_aborted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to abort upload: {e}")
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list multipart uploads: {e}")
|
||||
|
||||
def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]:
|
||||
if bucket_name:
|
||||
return {bucket_name: self.enforce_rules(bucket_name)}
|
||||
return self.enforce_all_buckets()
|
||||
|
||||
def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
||||
if not self.history_store:
|
||||
return []
|
||||
return self.history_store.get_history(bucket_name, limit, offset)
|
||||
406
python/app/notifications.py
Normal file
406
python/app/notifications.py
Normal file
@@ -0,0 +1,406 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from urllib3.util.connection import create_connection as _urllib3_create_connection
|
||||
|
||||
|
||||
def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return None
|
||||
cloud_metadata_hosts = {
|
||||
"metadata.google.internal",
|
||||
"169.254.169.254",
|
||||
}
|
||||
if hostname.lower() in cloud_metadata_hosts:
|
||||
return None
|
||||
if allow_internal:
|
||||
return hostname
|
||||
blocked_hosts = {
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"0.0.0.0",
|
||||
"::1",
|
||||
"[::1]",
|
||||
}
|
||||
if hostname.lower() in blocked_hosts:
|
||||
return None
|
||||
try:
|
||||
resolved_ip = socket.gethostbyname(hostname)
|
||||
ip = ipaddress.ip_address(resolved_ip)
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
return None
|
||||
return resolved_ip
|
||||
except (socket.gaierror, ValueError):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
return _resolve_and_check_url(url, allow_internal) is not None
|
||||
|
||||
|
||||
_dns_pin_lock = threading.Lock()
|
||||
|
||||
|
||||
def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname or ""
|
||||
session = requests.Session()
|
||||
original_create = _urllib3_create_connection
|
||||
|
||||
def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any:
|
||||
host, req_port = address
|
||||
if host == hostname:
|
||||
return original_create((pinned_ip, req_port), *args, **kw)
|
||||
return original_create(address, *args, **kw)
|
||||
|
||||
import urllib3.util.connection as _conn_mod
|
||||
with _dns_pin_lock:
|
||||
_conn_mod.create_connection = _create_pinned
|
||||
try:
|
||||
return session.post(url, **kwargs)
|
||||
finally:
|
||||
_conn_mod.create_connection = original_create
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationEvent:
|
||||
event_name: str
|
||||
bucket_name: str
|
||||
object_key: str
|
||||
object_size: int = 0
|
||||
etag: str = ""
|
||||
version_id: Optional[str] = None
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
request_id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
source_ip: str = ""
|
||||
user_identity: str = ""
|
||||
|
||||
def to_s3_event(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"Records": [
|
||||
{
|
||||
"eventVersion": "2.1",
|
||||
"eventSource": "myfsio:s3",
|
||||
"awsRegion": "local",
|
||||
"eventTime": self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
|
||||
"eventName": self.event_name,
|
||||
"userIdentity": {
|
||||
"principalId": self.user_identity or "ANONYMOUS",
|
||||
},
|
||||
"requestParameters": {
|
||||
"sourceIPAddress": self.source_ip or "127.0.0.1",
|
||||
},
|
||||
"responseElements": {
|
||||
"x-amz-request-id": self.request_id,
|
||||
"x-amz-id-2": self.request_id,
|
||||
},
|
||||
"s3": {
|
||||
"s3SchemaVersion": "1.0",
|
||||
"configurationId": "notification",
|
||||
"bucket": {
|
||||
"name": self.bucket_name,
|
||||
"ownerIdentity": {"principalId": "local"},
|
||||
"arn": f"arn:aws:s3:::{self.bucket_name}",
|
||||
},
|
||||
"object": {
|
||||
"key": self.object_key,
|
||||
"size": self.object_size,
|
||||
"eTag": self.etag,
|
||||
"versionId": self.version_id or "null",
|
||||
"sequencer": f"{int(time.time() * 1000):016X}",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class WebhookDestination:
|
||||
url: str
|
||||
headers: Dict[str, str] = field(default_factory=dict)
|
||||
timeout_seconds: int = 30
|
||||
retry_count: int = 3
|
||||
retry_delay_seconds: int = 1
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"url": self.url,
|
||||
"headers": self.headers,
|
||||
"timeout_seconds": self.timeout_seconds,
|
||||
"retry_count": self.retry_count,
|
||||
"retry_delay_seconds": self.retry_delay_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "WebhookDestination":
|
||||
return cls(
|
||||
url=data.get("url", ""),
|
||||
headers=data.get("headers", {}),
|
||||
timeout_seconds=data.get("timeout_seconds", 30),
|
||||
retry_count=data.get("retry_count", 3),
|
||||
retry_delay_seconds=data.get("retry_delay_seconds", 1),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationConfiguration:
|
||||
id: str
|
||||
events: List[str]
|
||||
destination: WebhookDestination
|
||||
prefix_filter: str = ""
|
||||
suffix_filter: str = ""
|
||||
|
||||
def matches_event(self, event_name: str, object_key: str) -> bool:
|
||||
event_match = False
|
||||
for pattern in self.events:
|
||||
if pattern.endswith("*"):
|
||||
base = pattern[:-1]
|
||||
if event_name.startswith(base):
|
||||
event_match = True
|
||||
break
|
||||
elif pattern == event_name:
|
||||
event_match = True
|
||||
break
|
||||
|
||||
if not event_match:
|
||||
return False
|
||||
|
||||
if self.prefix_filter and not object_key.startswith(self.prefix_filter):
|
||||
return False
|
||||
if self.suffix_filter and not object_key.endswith(self.suffix_filter):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"Id": self.id,
|
||||
"Events": self.events,
|
||||
"Destination": self.destination.to_dict(),
|
||||
"Filter": {
|
||||
"Key": {
|
||||
"FilterRules": [
|
||||
{"Name": "prefix", "Value": self.prefix_filter},
|
||||
{"Name": "suffix", "Value": self.suffix_filter},
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "NotificationConfiguration":
|
||||
prefix = ""
|
||||
suffix = ""
|
||||
filter_data = data.get("Filter", {})
|
||||
key_filter = filter_data.get("Key", {})
|
||||
for rule in key_filter.get("FilterRules", []):
|
||||
if rule.get("Name") == "prefix":
|
||||
prefix = rule.get("Value", "")
|
||||
elif rule.get("Name") == "suffix":
|
||||
suffix = rule.get("Value", "")
|
||||
|
||||
return cls(
|
||||
id=data.get("Id", uuid.uuid4().hex),
|
||||
events=data.get("Events", []),
|
||||
destination=WebhookDestination.from_dict(data.get("Destination", {})),
|
||||
prefix_filter=prefix,
|
||||
suffix_filter=suffix,
|
||||
)
|
||||
|
||||
|
||||
class NotificationService:
|
||||
def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
|
||||
self.storage_root = storage_root
|
||||
self._allow_internal_endpoints = allow_internal_endpoints
|
||||
self._configs: Dict[str, List[NotificationConfiguration]] = {}
|
||||
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
|
||||
self._workers: List[threading.Thread] = []
|
||||
self._shutdown = threading.Event()
|
||||
self._stats = {
|
||||
"events_queued": 0,
|
||||
"events_sent": 0,
|
||||
"events_failed": 0,
|
||||
}
|
||||
|
||||
for i in range(worker_count):
|
||||
worker = threading.Thread(target=self._worker_loop, name=f"notification-worker-{i}", daemon=True)
|
||||
worker.start()
|
||||
self._workers.append(worker)
|
||||
|
||||
def _config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "notifications.json"
|
||||
|
||||
def get_bucket_notifications(self, bucket_name: str) -> List[NotificationConfiguration]:
|
||||
if bucket_name in self._configs:
|
||||
return self._configs[bucket_name]
|
||||
|
||||
config_path = self._config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
configs = [NotificationConfiguration.from_dict(c) for c in data.get("configurations", [])]
|
||||
self._configs[bucket_name] = configs
|
||||
return configs
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"Failed to load notification config for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def set_bucket_notifications(
|
||||
self, bucket_name: str, configurations: List[NotificationConfiguration]
|
||||
) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = {"configurations": [c.to_dict() for c in configurations]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
self._configs[bucket_name] = configurations
|
||||
|
||||
def delete_bucket_notifications(self, bucket_name: str) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
try:
|
||||
if config_path.exists():
|
||||
config_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
self._configs.pop(bucket_name, None)
|
||||
|
||||
def emit_event(self, event: NotificationEvent) -> None:
|
||||
configurations = self.get_bucket_notifications(event.bucket_name)
|
||||
if not configurations:
|
||||
return
|
||||
|
||||
for config in configurations:
|
||||
if config.matches_event(event.event_name, event.object_key):
|
||||
self._queue.put((event, config.destination))
|
||||
self._stats["events_queued"] += 1
|
||||
logger.debug(
|
||||
f"Queued notification for {event.event_name} on {event.bucket_name}/{event.object_key}"
|
||||
)
|
||||
|
||||
def emit_object_created(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
*,
|
||||
size: int = 0,
|
||||
etag: str = "",
|
||||
version_id: Optional[str] = None,
|
||||
request_id: str = "",
|
||||
source_ip: str = "",
|
||||
user_identity: str = "",
|
||||
operation: str = "Put",
|
||||
) -> None:
|
||||
event = NotificationEvent(
|
||||
event_name=f"s3:ObjectCreated:{operation}",
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key,
|
||||
object_size=size,
|
||||
etag=etag,
|
||||
version_id=version_id,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
source_ip=source_ip,
|
||||
user_identity=user_identity,
|
||||
)
|
||||
self.emit_event(event)
|
||||
|
||||
def emit_object_removed(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
*,
|
||||
version_id: Optional[str] = None,
|
||||
request_id: str = "",
|
||||
source_ip: str = "",
|
||||
user_identity: str = "",
|
||||
operation: str = "Delete",
|
||||
) -> None:
|
||||
event = NotificationEvent(
|
||||
event_name=f"s3:ObjectRemoved:{operation}",
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key,
|
||||
version_id=version_id,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
source_ip=source_ip,
|
||||
user_identity=user_identity,
|
||||
)
|
||||
self.emit_event(event)
|
||||
|
||||
def _worker_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
try:
|
||||
event, destination = self._queue.get(timeout=1.0)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
try:
|
||||
self._send_notification(event, destination)
|
||||
self._stats["events_sent"] += 1
|
||||
except Exception as e:
|
||||
self._stats["events_failed"] += 1
|
||||
logger.error(f"Failed to send notification: {e}")
|
||||
finally:
|
||||
self._queue.task_done()
|
||||
|
||||
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
||||
resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints)
|
||||
if not resolved_ip:
|
||||
raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}")
|
||||
payload = event.to_s3_event()
|
||||
headers = {"Content-Type": "application/json", **destination.headers}
|
||||
|
||||
last_error = None
|
||||
for attempt in range(destination.retry_count):
|
||||
try:
|
||||
response = _pinned_post(
|
||||
destination.url,
|
||||
resolved_ip,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=destination.timeout_seconds,
|
||||
)
|
||||
if response.status_code < 400:
|
||||
logger.info(
|
||||
f"Notification sent: {event.event_name} -> {destination.url} (status={response.status_code})"
|
||||
)
|
||||
return
|
||||
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
||||
except requests.RequestException as e:
|
||||
last_error = str(e)
|
||||
|
||||
if attempt < destination.retry_count - 1:
|
||||
time.sleep(destination.retry_delay_seconds * (attempt + 1))
|
||||
|
||||
raise RuntimeError(f"Failed after {destination.retry_count} attempts: {last_error}")
|
||||
|
||||
def get_stats(self) -> Dict[str, int]:
|
||||
return dict(self._stats)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
for worker in self._workers:
|
||||
worker.join(timeout=5.0)
|
||||
234
python/app/object_lock.py
Normal file
234
python/app/object_lock.py
Normal file
@@ -0,0 +1,234 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class RetentionMode(Enum):
|
||||
GOVERNANCE = "GOVERNANCE"
|
||||
COMPLIANCE = "COMPLIANCE"
|
||||
|
||||
|
||||
class ObjectLockError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectLockRetention:
|
||||
mode: RetentionMode
|
||||
retain_until_date: datetime
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {
|
||||
"Mode": self.mode.value,
|
||||
"RetainUntilDate": self.retain_until_date.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> Optional["ObjectLockRetention"]:
|
||||
if not data:
|
||||
return None
|
||||
mode_str = data.get("Mode")
|
||||
date_str = data.get("RetainUntilDate")
|
||||
if not mode_str or not date_str:
|
||||
return None
|
||||
try:
|
||||
mode = RetentionMode(mode_str)
|
||||
retain_until = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
||||
return cls(mode=mode, retain_until_date=retain_until)
|
||||
except (ValueError, KeyError):
|
||||
return None
|
||||
|
||||
def is_expired(self) -> bool:
|
||||
return datetime.now(timezone.utc) > self.retain_until_date
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectLockConfig:
|
||||
enabled: bool = False
|
||||
default_retention: Optional[ObjectLockRetention] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result: Dict[str, Any] = {"ObjectLockEnabled": "Enabled" if self.enabled else "Disabled"}
|
||||
if self.default_retention:
|
||||
result["Rule"] = {
|
||||
"DefaultRetention": {
|
||||
"Mode": self.default_retention.mode.value,
|
||||
"Days": None,
|
||||
"Years": None,
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ObjectLockConfig":
|
||||
enabled = data.get("ObjectLockEnabled") == "Enabled"
|
||||
default_retention = None
|
||||
rule = data.get("Rule")
|
||||
if rule and "DefaultRetention" in rule:
|
||||
dr = rule["DefaultRetention"]
|
||||
mode_str = dr.get("Mode", "GOVERNANCE")
|
||||
days = dr.get("Days")
|
||||
years = dr.get("Years")
|
||||
if days or years:
|
||||
from datetime import timedelta
|
||||
now = datetime.now(timezone.utc)
|
||||
if years:
|
||||
delta = timedelta(days=int(years) * 365)
|
||||
else:
|
||||
delta = timedelta(days=int(days))
|
||||
default_retention = ObjectLockRetention(
|
||||
mode=RetentionMode(mode_str),
|
||||
retain_until_date=now + delta,
|
||||
)
|
||||
return cls(enabled=enabled, default_retention=default_retention)
|
||||
|
||||
|
||||
class ObjectLockService:
|
||||
def __init__(self, storage_root: Path):
|
||||
self.storage_root = storage_root
|
||||
self._config_cache: Dict[str, ObjectLockConfig] = {}
|
||||
|
||||
def _bucket_lock_config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "object_lock.json"
|
||||
|
||||
def _object_lock_meta_path(self, bucket_name: str, object_key: str) -> Path:
|
||||
safe_key = object_key.replace("/", "_").replace("\\", "_")
|
||||
return (
|
||||
self.storage_root / ".myfsio.sys" / "buckets" / bucket_name /
|
||||
"locks" / f"{safe_key}.lock.json"
|
||||
)
|
||||
|
||||
def get_bucket_lock_config(self, bucket_name: str) -> ObjectLockConfig:
|
||||
if bucket_name in self._config_cache:
|
||||
return self._config_cache[bucket_name]
|
||||
|
||||
config_path = self._bucket_lock_config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return ObjectLockConfig(enabled=False)
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
config = ObjectLockConfig.from_dict(data)
|
||||
self._config_cache[bucket_name] = config
|
||||
return config
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return ObjectLockConfig(enabled=False)
|
||||
|
||||
def set_bucket_lock_config(self, bucket_name: str, config: ObjectLockConfig) -> None:
|
||||
config_path = self._bucket_lock_config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config.to_dict()), encoding="utf-8")
|
||||
self._config_cache[bucket_name] = config
|
||||
|
||||
def enable_bucket_lock(self, bucket_name: str) -> None:
|
||||
config = self.get_bucket_lock_config(bucket_name)
|
||||
config.enabled = True
|
||||
self.set_bucket_lock_config(bucket_name, config)
|
||||
|
||||
def is_bucket_lock_enabled(self, bucket_name: str) -> bool:
|
||||
return self.get_bucket_lock_config(bucket_name).enabled
|
||||
|
||||
def get_object_retention(self, bucket_name: str, object_key: str) -> Optional[ObjectLockRetention]:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
if not meta_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
return ObjectLockRetention.from_dict(data.get("retention", {}))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
def set_object_retention(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
retention: ObjectLockRetention,
|
||||
bypass_governance: bool = False,
|
||||
) -> None:
|
||||
existing = self.get_object_retention(bucket_name, object_key)
|
||||
if existing and not existing.is_expired():
|
||||
if existing.mode == RetentionMode.COMPLIANCE:
|
||||
raise ObjectLockError(
|
||||
"Cannot modify retention on object with COMPLIANCE mode until retention expires"
|
||||
)
|
||||
if existing.mode == RetentionMode.GOVERNANCE and not bypass_governance:
|
||||
raise ObjectLockError(
|
||||
"Cannot modify GOVERNANCE retention without bypass-governance permission"
|
||||
)
|
||||
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
existing_data: Dict[str, Any] = {}
|
||||
if meta_path.exists():
|
||||
try:
|
||||
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
existing_data["retention"] = retention.to_dict()
|
||||
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
|
||||
|
||||
def get_legal_hold(self, bucket_name: str, object_key: str) -> bool:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
if not meta_path.exists():
|
||||
return False
|
||||
try:
|
||||
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
return data.get("legal_hold", False)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return False
|
||||
|
||||
def set_legal_hold(self, bucket_name: str, object_key: str, enabled: bool) -> None:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
existing_data: Dict[str, Any] = {}
|
||||
if meta_path.exists():
|
||||
try:
|
||||
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
existing_data["legal_hold"] = enabled
|
||||
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
|
||||
|
||||
def can_delete_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
bypass_governance: bool = False,
|
||||
) -> tuple[bool, str]:
|
||||
if self.get_legal_hold(bucket_name, object_key):
|
||||
return False, "Object is under legal hold"
|
||||
|
||||
retention = self.get_object_retention(bucket_name, object_key)
|
||||
if retention and not retention.is_expired():
|
||||
if retention.mode == RetentionMode.COMPLIANCE:
|
||||
return False, f"Object is locked in COMPLIANCE mode until {retention.retain_until_date.isoformat()}"
|
||||
if retention.mode == RetentionMode.GOVERNANCE:
|
||||
if not bypass_governance:
|
||||
return False, f"Object is locked in GOVERNANCE mode until {retention.retain_until_date.isoformat()}"
|
||||
|
||||
return True, ""
|
||||
|
||||
def can_overwrite_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
bypass_governance: bool = False,
|
||||
) -> tuple[bool, str]:
|
||||
return self.can_delete_object(bucket_name, object_key, bypass_governance)
|
||||
|
||||
def delete_object_lock_metadata(self, bucket_name: str, object_key: str) -> None:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
try:
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
296
python/app/operation_metrics.py
Normal file
296
python/app/operation_metrics.py
Normal file
@@ -0,0 +1,296 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
MAX_LATENCY_SAMPLES = 5000
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OperationStats:
|
||||
count: int = 0
|
||||
success_count: int = 0
|
||||
error_count: int = 0
|
||||
latency_sum_ms: float = 0.0
|
||||
latency_min_ms: float = float("inf")
|
||||
latency_max_ms: float = 0.0
|
||||
bytes_in: int = 0
|
||||
bytes_out: int = 0
|
||||
latency_samples: List[float] = field(default_factory=list)
|
||||
|
||||
@staticmethod
|
||||
def _compute_percentile(sorted_data: List[float], p: float) -> float:
|
||||
if not sorted_data:
|
||||
return 0.0
|
||||
k = (len(sorted_data) - 1) * (p / 100.0)
|
||||
f = int(k)
|
||||
c = min(f + 1, len(sorted_data) - 1)
|
||||
d = k - f
|
||||
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
|
||||
|
||||
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
||||
self.count += 1
|
||||
if success:
|
||||
self.success_count += 1
|
||||
else:
|
||||
self.error_count += 1
|
||||
self.latency_sum_ms += latency_ms
|
||||
if latency_ms < self.latency_min_ms:
|
||||
self.latency_min_ms = latency_ms
|
||||
if latency_ms > self.latency_max_ms:
|
||||
self.latency_max_ms = latency_ms
|
||||
self.bytes_in += bytes_in
|
||||
self.bytes_out += bytes_out
|
||||
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
|
||||
self.latency_samples.append(latency_ms)
|
||||
else:
|
||||
j = random.randint(0, self.count - 1)
|
||||
if j < MAX_LATENCY_SAMPLES:
|
||||
self.latency_samples[j] = latency_ms
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
||||
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
||||
sorted_latencies = sorted(self.latency_samples)
|
||||
return {
|
||||
"count": self.count,
|
||||
"success_count": self.success_count,
|
||||
"error_count": self.error_count,
|
||||
"latency_avg_ms": round(avg_latency, 2),
|
||||
"latency_min_ms": round(min_latency, 2),
|
||||
"latency_max_ms": round(self.latency_max_ms, 2),
|
||||
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
|
||||
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
|
||||
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
|
||||
"bytes_in": self.bytes_in,
|
||||
"bytes_out": self.bytes_out,
|
||||
}
|
||||
|
||||
def merge(self, other: "OperationStats") -> None:
|
||||
self.count += other.count
|
||||
self.success_count += other.success_count
|
||||
self.error_count += other.error_count
|
||||
self.latency_sum_ms += other.latency_sum_ms
|
||||
if other.latency_min_ms < self.latency_min_ms:
|
||||
self.latency_min_ms = other.latency_min_ms
|
||||
if other.latency_max_ms > self.latency_max_ms:
|
||||
self.latency_max_ms = other.latency_max_ms
|
||||
self.bytes_in += other.bytes_in
|
||||
self.bytes_out += other.bytes_out
|
||||
combined = self.latency_samples + other.latency_samples
|
||||
if len(combined) > MAX_LATENCY_SAMPLES:
|
||||
random.shuffle(combined)
|
||||
combined = combined[:MAX_LATENCY_SAMPLES]
|
||||
self.latency_samples = combined
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricsSnapshot:
|
||||
timestamp: datetime
|
||||
window_seconds: int
|
||||
by_method: Dict[str, Dict[str, Any]]
|
||||
by_endpoint: Dict[str, Dict[str, Any]]
|
||||
by_status_class: Dict[str, int]
|
||||
error_codes: Dict[str, int]
|
||||
totals: Dict[str, Any]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"window_seconds": self.window_seconds,
|
||||
"by_method": self.by_method,
|
||||
"by_endpoint": self.by_endpoint,
|
||||
"by_status_class": self.by_status_class,
|
||||
"error_codes": self.error_codes,
|
||||
"totals": self.totals,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
|
||||
return cls(
|
||||
timestamp=datetime.fromisoformat(data["timestamp"]),
|
||||
window_seconds=data.get("window_seconds", 300),
|
||||
by_method=data.get("by_method", {}),
|
||||
by_endpoint=data.get("by_endpoint", {}),
|
||||
by_status_class=data.get("by_status_class", {}),
|
||||
error_codes=data.get("error_codes", {}),
|
||||
totals=data.get("totals", {}),
|
||||
)
|
||||
|
||||
|
||||
class OperationMetricsCollector:
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_minutes: int = 5,
|
||||
retention_hours: int = 24,
|
||||
):
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_minutes * 60
|
||||
self.retention_hours = retention_hours
|
||||
self._lock = threading.Lock()
|
||||
self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_status_class: Dict[str, int] = {}
|
||||
self._error_codes: Dict[str, int] = {}
|
||||
self._totals = OperationStats()
|
||||
self._window_start = time.time()
|
||||
self._shutdown = threading.Event()
|
||||
self._snapshots: List[MetricsSnapshot] = []
|
||||
|
||||
self._load_history()
|
||||
|
||||
self._snapshot_thread = threading.Thread(
|
||||
target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
|
||||
)
|
||||
self._snapshot_thread.start()
|
||||
|
||||
def _config_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
|
||||
|
||||
def _load_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
if not config_path.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
snapshots_data = data.get("snapshots", [])
|
||||
self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
|
||||
self._prune_old_snapshots()
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning(f"Failed to load operation metrics history: {e}")
|
||||
|
||||
def _save_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
data = {"snapshots": [s.to_dict() for s in self._snapshots]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to save operation metrics history: {e}")
|
||||
|
||||
def _prune_old_snapshots(self) -> None:
|
||||
if not self._snapshots:
|
||||
return
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
||||
self._snapshots = [
|
||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
||||
]
|
||||
|
||||
def _snapshot_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if not self._shutdown.is_set():
|
||||
self._take_snapshot()
|
||||
|
||||
def _take_snapshot(self) -> None:
|
||||
with self._lock:
|
||||
now = datetime.now(timezone.utc)
|
||||
window_seconds = int(time.time() - self._window_start)
|
||||
|
||||
snapshot = MetricsSnapshot(
|
||||
timestamp=now,
|
||||
window_seconds=window_seconds,
|
||||
by_method={k: v.to_dict() for k, v in self._by_method.items()},
|
||||
by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||
by_status_class=dict(self._by_status_class),
|
||||
error_codes=dict(self._error_codes),
|
||||
totals=self._totals.to_dict(),
|
||||
)
|
||||
|
||||
self._snapshots.append(snapshot)
|
||||
self._prune_old_snapshots()
|
||||
self._save_history()
|
||||
|
||||
self._by_method = defaultdict(OperationStats)
|
||||
self._by_endpoint = defaultdict(OperationStats)
|
||||
self._by_status_class.clear()
|
||||
self._error_codes.clear()
|
||||
self._totals = OperationStats()
|
||||
self._window_start = time.time()
|
||||
|
||||
def record_request(
|
||||
self,
|
||||
method: str,
|
||||
endpoint_type: str,
|
||||
status_code: int,
|
||||
latency_ms: float,
|
||||
bytes_in: int = 0,
|
||||
bytes_out: int = 0,
|
||||
error_code: Optional[str] = None,
|
||||
) -> None:
|
||||
success = 200 <= status_code < 400
|
||||
status_class = f"{status_code // 100}xx"
|
||||
|
||||
with self._lock:
|
||||
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
||||
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
||||
|
||||
if error_code:
|
||||
self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
|
||||
|
||||
self._totals.record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
def get_current_stats(self) -> Dict[str, Any]:
|
||||
with self._lock:
|
||||
window_seconds = int(time.time() - self._window_start)
|
||||
return {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"window_seconds": window_seconds,
|
||||
"by_method": {k: v.to_dict() for k, v in self._by_method.items()},
|
||||
"by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||
"by_status_class": dict(self._by_status_class),
|
||||
"error_codes": dict(self._error_codes),
|
||||
"totals": self._totals.to_dict(),
|
||||
}
|
||||
|
||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
snapshots = list(self._snapshots)
|
||||
|
||||
if hours:
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
||||
|
||||
return [s.to_dict() for s in snapshots]
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._take_snapshot()
|
||||
self._snapshot_thread.join(timeout=5.0)
|
||||
|
||||
|
||||
def classify_endpoint(path: str) -> str:
|
||||
if not path or path == "/":
|
||||
return "service"
|
||||
|
||||
path = path.rstrip("/")
|
||||
|
||||
if path.startswith("/ui"):
|
||||
return "ui"
|
||||
|
||||
if path.startswith("/kms"):
|
||||
return "kms"
|
||||
|
||||
if path.startswith("/myfsio"):
|
||||
return "service"
|
||||
|
||||
parts = path.lstrip("/").split("/")
|
||||
if len(parts) == 0:
|
||||
return "service"
|
||||
elif len(parts) == 1:
|
||||
return "bucket"
|
||||
else:
|
||||
return "object"
|
||||
667
python/app/replication.py
Normal file
667
python/app/replication.py
Normal file
@@ -0,0 +1,667 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
from boto3.exceptions import S3UploadFailedError
|
||||
|
||||
from .connections import ConnectionStore, RemoteConnection
|
||||
from .storage import ObjectStorage, StorageError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||
|
||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||
REPLICATION_MODE_ALL = "all"
|
||||
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
|
||||
|
||||
|
||||
def _create_s3_client(
|
||||
connection: RemoteConnection,
|
||||
*,
|
||||
health_check: bool = False,
|
||||
connect_timeout: int = 5,
|
||||
read_timeout: int = 30,
|
||||
max_retries: int = 2,
|
||||
) -> Any:
|
||||
"""Create a boto3 S3 client for the given connection.
|
||||
Args:
|
||||
connection: Remote S3 connection configuration
|
||||
health_check: If True, use minimal retries for quick health checks
|
||||
"""
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=connect_timeout,
|
||||
read_timeout=read_timeout,
|
||||
retries={'max_attempts': 1 if health_check else max_retries},
|
||||
signature_version='s3v4',
|
||||
s3={'addressing_style': 'path'},
|
||||
request_checksum_calculation='when_required',
|
||||
response_checksum_validation='when_required',
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region or 'us-east-1',
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationStats:
|
||||
"""Statistics for replication operations - computed dynamically."""
|
||||
objects_synced: int = 0
|
||||
objects_pending: int = 0
|
||||
objects_orphaned: int = 0
|
||||
bytes_synced: int = 0
|
||||
last_sync_at: Optional[float] = None
|
||||
last_sync_key: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"objects_synced": self.objects_synced,
|
||||
"objects_pending": self.objects_pending,
|
||||
"objects_orphaned": self.objects_orphaned,
|
||||
"bytes_synced": self.bytes_synced,
|
||||
"last_sync_at": self.last_sync_at,
|
||||
"last_sync_key": self.last_sync_key,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationStats":
|
||||
return cls(
|
||||
objects_synced=data.get("objects_synced", 0),
|
||||
objects_pending=data.get("objects_pending", 0),
|
||||
objects_orphaned=data.get("objects_orphaned", 0),
|
||||
bytes_synced=data.get("bytes_synced", 0),
|
||||
last_sync_at=data.get("last_sync_at"),
|
||||
last_sync_key=data.get("last_sync_key"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationFailure:
|
||||
object_key: str
|
||||
error_message: str
|
||||
timestamp: float
|
||||
failure_count: int
|
||||
bucket_name: str
|
||||
action: str
|
||||
last_error_code: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"object_key": self.object_key,
|
||||
"error_message": self.error_message,
|
||||
"timestamp": self.timestamp,
|
||||
"failure_count": self.failure_count,
|
||||
"bucket_name": self.bucket_name,
|
||||
"action": self.action,
|
||||
"last_error_code": self.last_error_code,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationFailure":
|
||||
return cls(
|
||||
object_key=data["object_key"],
|
||||
error_message=data["error_message"],
|
||||
timestamp=data["timestamp"],
|
||||
failure_count=data["failure_count"],
|
||||
bucket_name=data["bucket_name"],
|
||||
action=data["action"],
|
||||
last_error_code=data.get("last_error_code"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationRule:
|
||||
bucket_name: str
|
||||
target_connection_id: str
|
||||
target_bucket: str
|
||||
enabled: bool = True
|
||||
mode: str = REPLICATION_MODE_NEW_ONLY
|
||||
created_at: Optional[float] = None
|
||||
stats: ReplicationStats = field(default_factory=ReplicationStats)
|
||||
sync_deletions: bool = True
|
||||
last_pull_at: Optional[float] = None
|
||||
filter_prefix: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"bucket_name": self.bucket_name,
|
||||
"target_connection_id": self.target_connection_id,
|
||||
"target_bucket": self.target_bucket,
|
||||
"enabled": self.enabled,
|
||||
"mode": self.mode,
|
||||
"created_at": self.created_at,
|
||||
"stats": self.stats.to_dict(),
|
||||
"sync_deletions": self.sync_deletions,
|
||||
"last_pull_at": self.last_pull_at,
|
||||
"filter_prefix": self.filter_prefix,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationRule":
|
||||
stats_data = data.pop("stats", {})
|
||||
if "mode" not in data:
|
||||
data["mode"] = REPLICATION_MODE_NEW_ONLY
|
||||
if "created_at" not in data:
|
||||
data["created_at"] = None
|
||||
if "sync_deletions" not in data:
|
||||
data["sync_deletions"] = True
|
||||
if "last_pull_at" not in data:
|
||||
data["last_pull_at"] = None
|
||||
if "filter_prefix" not in data:
|
||||
data["filter_prefix"] = None
|
||||
rule = cls(**data)
|
||||
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
|
||||
return rule
|
||||
|
||||
|
||||
class ReplicationFailureStore:
|
||||
def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_failures_per_bucket = max_failures_per_bucket
|
||||
self._lock = threading.Lock()
|
||||
self._cache: Dict[str, List[ReplicationFailure]] = {}
|
||||
|
||||
def _get_failures_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
||||
|
||||
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||
path = self._get_failures_path(bucket_name)
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
data = json.load(f)
|
||||
return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||
path = self._get_failures_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
|
||||
try:
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
||||
|
||||
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||
if bucket_name in self._cache:
|
||||
return list(self._cache[bucket_name])
|
||||
failures = self._load_from_disk(bucket_name)
|
||||
self._cache[bucket_name] = failures
|
||||
return list(failures)
|
||||
|
||||
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||
trimmed = failures[:self.max_failures_per_bucket]
|
||||
self._cache[bucket_name] = trimmed
|
||||
self._save_to_disk(bucket_name, trimmed)
|
||||
|
||||
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
||||
with self._lock:
|
||||
failures = self.load_failures(bucket_name)
|
||||
existing = next((f for f in failures if f.object_key == failure.object_key), None)
|
||||
if existing:
|
||||
existing.failure_count += 1
|
||||
existing.timestamp = failure.timestamp
|
||||
existing.error_message = failure.error_message
|
||||
existing.last_error_code = failure.last_error_code
|
||||
else:
|
||||
failures.insert(0, failure)
|
||||
self.save_failures(bucket_name, failures)
|
||||
|
||||
def remove_failure(self, bucket_name: str, object_key: str) -> bool:
|
||||
with self._lock:
|
||||
failures = self.load_failures(bucket_name)
|
||||
original_len = len(failures)
|
||||
failures = [f for f in failures if f.object_key != object_key]
|
||||
if len(failures) < original_len:
|
||||
self.save_failures(bucket_name, failures)
|
||||
return True
|
||||
return False
|
||||
|
||||
def clear_failures(self, bucket_name: str) -> None:
|
||||
with self._lock:
|
||||
self._cache.pop(bucket_name, None)
|
||||
path = self._get_failures_path(bucket_name)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
|
||||
def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]:
|
||||
failures = self.load_failures(bucket_name)
|
||||
return next((f for f in failures if f.object_key == object_key), None)
|
||||
|
||||
def get_failure_count(self, bucket_name: str) -> int:
|
||||
return len(self.load_failures(bucket_name))
|
||||
|
||||
|
||||
class ReplicationManager:
|
||||
def __init__(
|
||||
self,
|
||||
storage: ObjectStorage,
|
||||
connections: ConnectionStore,
|
||||
rules_path: Path,
|
||||
storage_root: Path,
|
||||
connect_timeout: int = 5,
|
||||
read_timeout: int = 30,
|
||||
max_retries: int = 2,
|
||||
streaming_threshold_bytes: int = 10 * 1024 * 1024,
|
||||
max_failures_per_bucket: int = 50,
|
||||
) -> None:
|
||||
self.storage = storage
|
||||
self.connections = connections
|
||||
self.rules_path = rules_path
|
||||
self.storage_root = storage_root
|
||||
self.connect_timeout = connect_timeout
|
||||
self.read_timeout = read_timeout
|
||||
self.max_retries = max_retries
|
||||
self.streaming_threshold_bytes = streaming_threshold_bytes
|
||||
self._rules: Dict[str, ReplicationRule] = {}
|
||||
self._stats_lock = threading.Lock()
|
||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||
self._shutdown = False
|
||||
self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
|
||||
self.reload_rules()
|
||||
|
||||
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
|
||||
"""Create an S3 client with the manager's configured timeouts."""
|
||||
return _create_s3_client(
|
||||
connection,
|
||||
health_check=health_check,
|
||||
connect_timeout=self.connect_timeout,
|
||||
read_timeout=self.read_timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
|
||||
def shutdown(self, wait: bool = True) -> None:
|
||||
"""Shutdown the replication executor gracefully.
|
||||
|
||||
Args:
|
||||
wait: If True, wait for pending tasks to complete
|
||||
"""
|
||||
self._shutdown = True
|
||||
self._executor.shutdown(wait=wait)
|
||||
logger.info("Replication manager shut down")
|
||||
|
||||
def reload_rules(self) -> None:
|
||||
if not self.rules_path.exists():
|
||||
self._rules = {}
|
||||
return
|
||||
try:
|
||||
with open(self.rules_path, "r") as f:
|
||||
data = json.load(f)
|
||||
for bucket, rule_data in data.items():
|
||||
self._rules[bucket] = ReplicationRule.from_dict(rule_data)
|
||||
except (OSError, ValueError) as e:
|
||||
logger.error(f"Failed to load replication rules: {e}")
|
||||
|
||||
def save_rules(self) -> None:
|
||||
data = {b: rule.to_dict() for b, rule in self._rules.items()}
|
||||
self.rules_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.rules_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def check_endpoint_health(self, connection: RemoteConnection) -> bool:
|
||||
"""Check if a remote endpoint is reachable and responsive.
|
||||
|
||||
Returns True if endpoint is healthy, False otherwise.
|
||||
Uses short timeouts to prevent blocking.
|
||||
"""
|
||||
try:
|
||||
s3 = self._create_client(connection, health_check=True)
|
||||
s3.list_buckets()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Endpoint health check failed for {connection.name} ({connection.endpoint_url}): {e}")
|
||||
return False
|
||||
|
||||
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
|
||||
return self._rules.get(bucket_name)
|
||||
|
||||
def list_rules(self) -> List[ReplicationRule]:
|
||||
return list(self._rules.values())
|
||||
|
||||
def set_rule(self, rule: ReplicationRule) -> None:
|
||||
old_rule = self._rules.get(rule.bucket_name)
|
||||
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
|
||||
self._rules[rule.bucket_name] = rule
|
||||
self.save_rules()
|
||||
|
||||
if rule.mode == REPLICATION_MODE_ALL and rule.enabled and not was_all_mode:
|
||||
logger.info(f"Replication mode ALL enabled for {rule.bucket_name}, triggering sync of existing objects")
|
||||
self._executor.submit(self.replicate_existing_objects, rule.bucket_name)
|
||||
|
||||
def delete_rule(self, bucket_name: str) -> None:
|
||||
if bucket_name in self._rules:
|
||||
del self._rules[bucket_name]
|
||||
self.save_rules()
|
||||
|
||||
def _update_last_sync(self, bucket_name: str, object_key: str = "") -> None:
|
||||
"""Update last sync timestamp after a successful operation."""
|
||||
with self._stats_lock:
|
||||
rule = self._rules.get(bucket_name)
|
||||
if not rule:
|
||||
return
|
||||
rule.stats.last_sync_at = time.time()
|
||||
rule.stats.last_sync_key = object_key
|
||||
self.save_rules()
|
||||
|
||||
def get_sync_status(self, bucket_name: str) -> Optional[ReplicationStats]:
|
||||
"""Dynamically compute replication status by comparing source and destination buckets."""
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule:
|
||||
return None
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
return rule.stats
|
||||
|
||||
try:
|
||||
source_objects = self.storage.list_objects_all(bucket_name)
|
||||
source_keys = {obj.key: obj.size for obj in source_objects}
|
||||
|
||||
s3 = self._create_client(connection)
|
||||
|
||||
dest_keys = set()
|
||||
bytes_synced = 0
|
||||
paginator = s3.get_paginator('list_objects_v2')
|
||||
try:
|
||||
for page in paginator.paginate(Bucket=rule.target_bucket):
|
||||
for obj in page.get('Contents', []):
|
||||
dest_keys.add(obj['Key'])
|
||||
if obj['Key'] in source_keys:
|
||||
bytes_synced += obj.get('Size', 0)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'NoSuchBucket':
|
||||
dest_keys = set()
|
||||
else:
|
||||
raise
|
||||
|
||||
synced = source_keys.keys() & dest_keys
|
||||
orphaned = dest_keys - source_keys.keys()
|
||||
|
||||
if rule.mode == REPLICATION_MODE_ALL:
|
||||
pending = source_keys.keys() - dest_keys
|
||||
else:
|
||||
pending = set()
|
||||
|
||||
rule.stats.objects_synced = len(synced)
|
||||
rule.stats.objects_pending = len(pending)
|
||||
rule.stats.objects_orphaned = len(orphaned)
|
||||
rule.stats.bytes_synced = bytes_synced
|
||||
|
||||
return rule.stats
|
||||
|
||||
except (ClientError, StorageError) as e:
|
||||
logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
|
||||
return rule.stats
|
||||
|
||||
def replicate_existing_objects(self, bucket_name: str) -> None:
|
||||
"""Trigger replication for all existing objects in a bucket."""
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
|
||||
return
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
|
||||
return
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
|
||||
for obj in objects:
|
||||
self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write")
|
||||
except StorageError as e:
|
||||
logger.error(f"Failed to list objects for replication: {e}")
|
||||
|
||||
def create_remote_bucket(self, connection_id: str, bucket_name: str) -> None:
|
||||
"""Create a bucket on the remote connection."""
|
||||
connection = self.connections.get(connection_id)
|
||||
if not connection:
|
||||
raise ValueError(f"Connection {connection_id} not found")
|
||||
|
||||
try:
|
||||
s3 = self._create_client(connection)
|
||||
s3.create_bucket(Bucket=bucket_name)
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
|
||||
raise
|
||||
|
||||
def trigger_replication(self, bucket_name: str, object_key: str, action: str = "write") -> None:
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
|
||||
return
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
|
||||
return
|
||||
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
|
||||
|
||||
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
|
||||
current_rule = self.get_rule(bucket_name)
|
||||
if not current_rule or not current_rule.enabled:
|
||||
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
|
||||
return
|
||||
|
||||
if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
|
||||
logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
|
||||
return
|
||||
|
||||
try:
|
||||
from .storage import ObjectStorage
|
||||
ObjectStorage._sanitize_object_key(object_key)
|
||||
except StorageError as e:
|
||||
logger.error(f"Object key validation failed in replication: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
s3 = self._create_client(conn)
|
||||
|
||||
if action == "delete":
|
||||
try:
|
||||
s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
|
||||
logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
self.failure_store.remove_failure(bucket_name, object_key)
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action="delete",
|
||||
last_error_code=error_code,
|
||||
))
|
||||
return
|
||||
|
||||
try:
|
||||
path = self.storage.get_object_path(bucket_name, object_key)
|
||||
except StorageError:
|
||||
logger.error(f"Source object not found: {bucket_name}/{object_key}")
|
||||
return
|
||||
|
||||
content_type, _ = mimetypes.guess_type(path)
|
||||
file_size = path.stat().st_size
|
||||
|
||||
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
|
||||
|
||||
def do_upload() -> None:
|
||||
"""Upload object using appropriate method based on file size.
|
||||
|
||||
For small files (< 10 MiB): Read into memory for simpler handling
|
||||
For large files: Use streaming upload to avoid memory issues
|
||||
"""
|
||||
extra_args = {}
|
||||
if content_type:
|
||||
extra_args["ContentType"] = content_type
|
||||
|
||||
if file_size >= self.streaming_threshold_bytes:
|
||||
s3.upload_file(
|
||||
str(path),
|
||||
rule.target_bucket,
|
||||
object_key,
|
||||
ExtraArgs=extra_args if extra_args else None,
|
||||
)
|
||||
else:
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
"Key": object_key,
|
||||
"Body": file_content,
|
||||
**extra_args,
|
||||
}
|
||||
s3.put_object(**put_kwargs)
|
||||
|
||||
try:
|
||||
do_upload()
|
||||
except (ClientError, S3UploadFailedError) as e:
|
||||
error_code = None
|
||||
if isinstance(e, ClientError):
|
||||
error_code = e.response['Error']['Code']
|
||||
elif isinstance(e, S3UploadFailedError):
|
||||
if "NoSuchBucket" in str(e):
|
||||
error_code = 'NoSuchBucket'
|
||||
|
||||
if error_code == 'NoSuchBucket':
|
||||
logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
|
||||
bucket_ready = False
|
||||
try:
|
||||
s3.create_bucket(Bucket=rule.target_bucket)
|
||||
bucket_ready = True
|
||||
logger.info(f"Created target bucket {rule.target_bucket}")
|
||||
except ClientError as bucket_err:
|
||||
if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'):
|
||||
logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)")
|
||||
bucket_ready = True
|
||||
else:
|
||||
logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}")
|
||||
raise e
|
||||
|
||||
if bucket_ready:
|
||||
do_upload()
|
||||
else:
|
||||
raise e
|
||||
|
||||
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
self.failure_store.remove_failure(bucket_name, object_key)
|
||||
|
||||
except (ClientError, OSError, ValueError) as e:
|
||||
error_code = None
|
||||
if isinstance(e, ClientError):
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action=action,
|
||||
last_error_code=error_code,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action=action,
|
||||
last_error_code=None,
|
||||
))
|
||||
|
||||
def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]:
|
||||
failures = self.failure_store.load_failures(bucket_name)
|
||||
return failures[offset:offset + limit]
|
||||
|
||||
def get_failure_count(self, bucket_name: str) -> int:
|
||||
return self.failure_store.get_failure_count(bucket_name)
|
||||
|
||||
def retry_failed_item(self, bucket_name: str, object_key: str) -> bool:
|
||||
failure = self.failure_store.get_failure(bucket_name, object_key)
|
||||
if not failure:
|
||||
return False
|
||||
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return False
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
|
||||
return False
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
|
||||
return False
|
||||
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action)
|
||||
return True
|
||||
|
||||
def retry_all_failed(self, bucket_name: str) -> Dict[str, int]:
|
||||
failures = self.failure_store.load_failures(bucket_name)
|
||||
if not failures:
|
||||
return {"submitted": 0, "skipped": 0}
|
||||
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
submitted = 0
|
||||
for failure in failures:
|
||||
self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action)
|
||||
submitted += 1
|
||||
|
||||
return {"submitted": submitted, "skipped": 0}
|
||||
|
||||
def dismiss_failure(self, bucket_name: str, object_key: str) -> bool:
|
||||
return self.failure_store.remove_failure(bucket_name, object_key)
|
||||
|
||||
def clear_failures(self, bucket_name: str) -> None:
|
||||
self.failure_store.clear_failures(bucket_name)
|
||||
3975
python/app/s3_api.py
Normal file
3975
python/app/s3_api.py
Normal file
File diff suppressed because it is too large
Load Diff
296
python/app/s3_client.py
Normal file
296
python/app/s3_client.py
Normal file
@@ -0,0 +1,296 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Generator, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
|
||||
from flask import current_app, session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
|
||||
|
||||
_BOTO_ERROR_MAP = {
|
||||
"NoSuchBucket": 404,
|
||||
"NoSuchKey": 404,
|
||||
"NoSuchUpload": 404,
|
||||
"BucketAlreadyExists": 409,
|
||||
"BucketAlreadyOwnedByYou": 409,
|
||||
"BucketNotEmpty": 409,
|
||||
"AccessDenied": 403,
|
||||
"InvalidAccessKeyId": 403,
|
||||
"SignatureDoesNotMatch": 403,
|
||||
"InvalidBucketName": 400,
|
||||
"InvalidArgument": 400,
|
||||
"MalformedXML": 400,
|
||||
"EntityTooLarge": 400,
|
||||
"QuotaExceeded": 403,
|
||||
}
|
||||
|
||||
_UPLOAD_REGISTRY_MAX_AGE = 86400
|
||||
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
|
||||
|
||||
|
||||
class UploadRegistry:
|
||||
def __init__(self) -> None:
|
||||
self._entries: dict[str, tuple[str, str, float]] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._last_cleanup = time.monotonic()
|
||||
|
||||
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
|
||||
with self._lock:
|
||||
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
|
||||
self._maybe_cleanup()
|
||||
|
||||
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
|
||||
with self._lock:
|
||||
entry = self._entries.get(upload_id)
|
||||
if entry is None:
|
||||
return None
|
||||
stored_bucket, key, created_at = entry
|
||||
if stored_bucket != bucket_name:
|
||||
return None
|
||||
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
|
||||
del self._entries[upload_id]
|
||||
return None
|
||||
return key
|
||||
|
||||
def remove(self, upload_id: str) -> None:
|
||||
with self._lock:
|
||||
self._entries.pop(upload_id, None)
|
||||
|
||||
def _maybe_cleanup(self) -> None:
|
||||
now = time.monotonic()
|
||||
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
|
||||
return
|
||||
self._last_cleanup = now
|
||||
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
|
||||
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
|
||||
for uid in stale:
|
||||
del self._entries[uid]
|
||||
|
||||
|
||||
class S3ProxyClient:
|
||||
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
|
||||
if not api_base_url:
|
||||
raise ValueError("api_base_url is required for S3ProxyClient")
|
||||
self._api_base_url = api_base_url.rstrip("/")
|
||||
self._region = region
|
||||
self.upload_registry = UploadRegistry()
|
||||
|
||||
@property
|
||||
def api_base_url(self) -> str:
|
||||
return self._api_base_url
|
||||
|
||||
def get_client(self, access_key: str, secret_key: str) -> Any:
|
||||
if not access_key or not secret_key:
|
||||
raise ValueError("Both access_key and secret_key are required")
|
||||
config = Config(
|
||||
user_agent_extra=UI_PROXY_USER_AGENT,
|
||||
connect_timeout=5,
|
||||
read_timeout=30,
|
||||
retries={"max_attempts": 0},
|
||||
signature_version="s3v4",
|
||||
s3={"addressing_style": "path"},
|
||||
request_checksum_calculation="when_required",
|
||||
response_checksum_validation="when_required",
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=self._api_base_url,
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
region_name=self._region,
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
def _get_proxy() -> S3ProxyClient:
|
||||
proxy = current_app.extensions.get("s3_proxy")
|
||||
if proxy is None:
|
||||
raise RuntimeError(
|
||||
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
|
||||
)
|
||||
return proxy
|
||||
|
||||
|
||||
def _get_session_creds() -> tuple[str, str]:
|
||||
secret_store = current_app.extensions["secret_store"]
|
||||
secret_store.purge_expired()
|
||||
token = session.get("cred_token")
|
||||
if not token:
|
||||
raise PermissionError("Not authenticated")
|
||||
creds = secret_store.peek(token)
|
||||
if not creds:
|
||||
raise PermissionError("Session expired")
|
||||
access_key = creds.get("access_key", "")
|
||||
secret_key = creds.get("secret_key", "")
|
||||
if not access_key or not secret_key:
|
||||
raise PermissionError("Invalid session credentials")
|
||||
return access_key, secret_key
|
||||
|
||||
|
||||
def get_session_s3_client() -> Any:
|
||||
proxy = _get_proxy()
|
||||
access_key, secret_key = _get_session_creds()
|
||||
return proxy.get_client(access_key, secret_key)
|
||||
|
||||
|
||||
def get_upload_registry() -> UploadRegistry:
|
||||
return _get_proxy().upload_registry
|
||||
|
||||
|
||||
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
|
||||
error_info = exc.response.get("Error", {})
|
||||
code = error_info.get("Code", "InternalError")
|
||||
message = error_info.get("Message") or "S3 operation failed"
|
||||
http_status = _BOTO_ERROR_MAP.get(code)
|
||||
if http_status is None:
|
||||
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
|
||||
return {"error": message}, http_status
|
||||
|
||||
|
||||
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
|
||||
logger.error("S3 API connection failed: %s", exc)
|
||||
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
|
||||
|
||||
|
||||
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
|
||||
from .ui import _format_datetime_display
|
||||
return _format_datetime_display(dt, display_tz)
|
||||
|
||||
|
||||
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
|
||||
from .ui import _format_datetime_iso
|
||||
return _format_datetime_iso(dt, display_tz)
|
||||
|
||||
|
||||
def build_url_templates(bucket_name: str) -> dict[str, str]:
|
||||
from flask import url_for
|
||||
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
restore_t = url_for(
|
||||
"ui.restore_object_version",
|
||||
bucket_name=bucket_name,
|
||||
object_key="KEY_PLACEHOLDER",
|
||||
version_id="VERSION_ID_PLACEHOLDER",
|
||||
)
|
||||
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
return {
|
||||
"preview": preview_t,
|
||||
"download": preview_t + "?download=1",
|
||||
"presign": presign_t,
|
||||
"delete": delete_t,
|
||||
"versions": versions_t,
|
||||
"restore": restore_t,
|
||||
"tags": tags_t,
|
||||
"copy": copy_t,
|
||||
"move": move_t,
|
||||
"metadata": metadata_t,
|
||||
}
|
||||
|
||||
|
||||
def translate_list_objects(
|
||||
boto3_response: dict[str, Any],
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
objects_data = []
|
||||
for obj in boto3_response.get("Contents", []):
|
||||
last_mod = obj["LastModified"]
|
||||
objects_data.append({
|
||||
"key": obj["Key"],
|
||||
"size": obj["Size"],
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
})
|
||||
return {
|
||||
"objects": objects_data,
|
||||
"is_truncated": boto3_response.get("IsTruncated", False),
|
||||
"next_continuation_token": boto3_response.get("NextContinuationToken"),
|
||||
"total_count": boto3_response.get("KeyCount", len(objects_data)),
|
||||
"versioning_enabled": versioning_enabled,
|
||||
"url_templates": url_templates,
|
||||
}
|
||||
|
||||
|
||||
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
|
||||
try:
|
||||
resp = client.get_bucket_versioning(Bucket=bucket_name)
|
||||
return resp.get("Status") == "Enabled"
|
||||
except ClientError as exc:
|
||||
code = exc.response.get("Error", {}).get("Code", "")
|
||||
if code != "NoSuchBucket":
|
||||
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
|
||||
return False
|
||||
|
||||
|
||||
def stream_objects_ndjson(
|
||||
client: Any,
|
||||
bucket_name: str,
|
||||
prefix: Optional[str],
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
delimiter: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
meta_line = json.dumps({
|
||||
"type": "meta",
|
||||
"versioning_enabled": versioning_enabled,
|
||||
"url_templates": url_templates,
|
||||
}) + "\n"
|
||||
yield meta_line
|
||||
|
||||
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
||||
|
||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||
if prefix:
|
||||
kwargs["Prefix"] = prefix
|
||||
if delimiter:
|
||||
kwargs["Delimiter"] = delimiter
|
||||
|
||||
running_count = 0
|
||||
try:
|
||||
paginator = client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(**kwargs):
|
||||
for cp in page.get("CommonPrefixes", []):
|
||||
yield json.dumps({
|
||||
"type": "folder",
|
||||
"prefix": cp["Prefix"],
|
||||
}) + "\n"
|
||||
page_contents = page.get("Contents", [])
|
||||
for obj in page_contents:
|
||||
last_mod = obj["LastModified"]
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
"key": obj["Key"],
|
||||
"size": obj["Size"],
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
}) + "\n"
|
||||
running_count += len(page_contents)
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
except ClientError as exc:
|
||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||
return
|
||||
except (EndpointConnectionError, ConnectionClosedError):
|
||||
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
|
||||
return
|
||||
|
||||
yield json.dumps({"type": "done"}) + "\n"
|
||||
48
python/app/secret_store.py
Normal file
48
python/app/secret_store.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class EphemeralSecretStore:
|
||||
"""Keeps values in-memory for a short period and returns them once."""
|
||||
|
||||
def __init__(self, default_ttl: int = 300) -> None:
|
||||
self._default_ttl = max(default_ttl, 1)
|
||||
self._store: Dict[str, tuple[Any, float]] = {}
|
||||
|
||||
def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str:
|
||||
token = secrets.token_urlsafe(16)
|
||||
expires_at = time.time() + (ttl or self._default_ttl)
|
||||
self._store[token] = (payload, expires_at)
|
||||
return token
|
||||
|
||||
def peek(self, token: str | None) -> Any | None:
|
||||
if not token:
|
||||
return None
|
||||
entry = self._store.get(token)
|
||||
if not entry:
|
||||
return None
|
||||
payload, expires_at = entry
|
||||
if expires_at < time.time():
|
||||
self._store.pop(token, None)
|
||||
return None
|
||||
return payload
|
||||
|
||||
def pop(self, token: str | None) -> Any | None:
|
||||
if not token:
|
||||
return None
|
||||
entry = self._store.pop(token, None)
|
||||
if not entry:
|
||||
return None
|
||||
payload, expires_at = entry
|
||||
if expires_at < time.time():
|
||||
return None
|
||||
return payload
|
||||
|
||||
def purge_expired(self) -> None:
|
||||
now = time.time()
|
||||
stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now]
|
||||
for token in stale:
|
||||
self._store.pop(token, None)
|
||||
171
python/app/select_content.py
Normal file
171
python/app/select_content.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""S3 SelectObjectContent SQL query execution using DuckDB."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Generator, Optional
|
||||
|
||||
try:
|
||||
import duckdb
|
||||
DUCKDB_AVAILABLE = True
|
||||
except ImportError:
|
||||
DUCKDB_AVAILABLE = False
|
||||
|
||||
|
||||
class SelectError(Exception):
|
||||
"""Error during SELECT query execution."""
|
||||
pass
|
||||
|
||||
|
||||
def execute_select_query(
|
||||
file_path: Path,
|
||||
expression: str,
|
||||
input_format: str,
|
||||
input_config: Dict[str, Any],
|
||||
output_format: str,
|
||||
output_config: Dict[str, Any],
|
||||
chunk_size: int = 65536,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Execute SQL query on object content."""
|
||||
if not DUCKDB_AVAILABLE:
|
||||
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
|
||||
|
||||
conn = duckdb.connect(":memory:")
|
||||
|
||||
try:
|
||||
if input_format == "CSV":
|
||||
_load_csv(conn, file_path, input_config)
|
||||
elif input_format == "JSON":
|
||||
_load_json(conn, file_path, input_config)
|
||||
elif input_format == "Parquet":
|
||||
_load_parquet(conn, file_path)
|
||||
else:
|
||||
raise SelectError(f"Unsupported input format: {input_format}")
|
||||
|
||||
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
|
||||
|
||||
try:
|
||||
result = conn.execute(normalized_expression)
|
||||
except duckdb.Error as exc:
|
||||
raise SelectError(f"SQL execution error: {exc}")
|
||||
|
||||
if output_format == "CSV":
|
||||
yield from _output_csv(result, output_config, chunk_size)
|
||||
elif output_format == "JSON":
|
||||
yield from _output_json(result, output_config, chunk_size)
|
||||
else:
|
||||
raise SelectError(f"Unsupported output format: {output_format}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
||||
"""Load CSV file into DuckDB."""
|
||||
file_header_info = config.get("file_header_info", "NONE")
|
||||
delimiter = config.get("field_delimiter", ",")
|
||||
quote = config.get("quote_character", '"')
|
||||
|
||||
header = file_header_info in ("USE", "IGNORE")
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_csv('{path_str}',
|
||||
header={header},
|
||||
delim='{delimiter}',
|
||||
quote='{quote}'
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
||||
"""Load JSON file into DuckDB."""
|
||||
json_type = config.get("type", "DOCUMENT")
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
|
||||
if json_type == "LINES":
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
|
||||
""")
|
||||
else:
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_json_auto('{path_str}', format='array')
|
||||
""")
|
||||
|
||||
|
||||
def _load_parquet(conn, file_path: Path) -> None:
|
||||
"""Load Parquet file into DuckDB."""
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
|
||||
|
||||
|
||||
def _output_csv(
|
||||
result,
|
||||
config: Dict[str, Any],
|
||||
chunk_size: int,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Output query results as CSV."""
|
||||
delimiter = config.get("field_delimiter", ",")
|
||||
record_delimiter = config.get("record_delimiter", "\n")
|
||||
quote = config.get("quote_character", '"')
|
||||
|
||||
buffer = ""
|
||||
|
||||
while True:
|
||||
rows = result.fetchmany(1000)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
fields = []
|
||||
for value in row:
|
||||
if value is None:
|
||||
fields.append("")
|
||||
elif isinstance(value, str):
|
||||
if delimiter in value or quote in value or record_delimiter in value:
|
||||
escaped = value.replace(quote, quote + quote)
|
||||
fields.append(f'{quote}{escaped}{quote}')
|
||||
else:
|
||||
fields.append(value)
|
||||
else:
|
||||
fields.append(str(value))
|
||||
|
||||
buffer += delimiter.join(fields) + record_delimiter
|
||||
|
||||
while len(buffer) >= chunk_size:
|
||||
yield buffer[:chunk_size].encode("utf-8")
|
||||
buffer = buffer[chunk_size:]
|
||||
|
||||
if buffer:
|
||||
yield buffer.encode("utf-8")
|
||||
|
||||
|
||||
def _output_json(
|
||||
result,
|
||||
config: Dict[str, Any],
|
||||
chunk_size: int,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Output query results as JSON Lines."""
|
||||
record_delimiter = config.get("record_delimiter", "\n")
|
||||
columns = [desc[0] for desc in result.description]
|
||||
|
||||
buffer = ""
|
||||
|
||||
while True:
|
||||
rows = result.fetchmany(1000)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
record = dict(zip(columns, row))
|
||||
buffer += json.dumps(record, default=str) + record_delimiter
|
||||
|
||||
while len(buffer) >= chunk_size:
|
||||
yield buffer[:chunk_size].encode("utf-8")
|
||||
buffer = buffer[chunk_size:]
|
||||
|
||||
if buffer:
|
||||
yield buffer.encode("utf-8")
|
||||
177
python/app/site_registry.py
Normal file
177
python/app/site_registry.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteInfo:
|
||||
site_id: str
|
||||
endpoint: str
|
||||
region: str = "us-east-1"
|
||||
priority: int = 100
|
||||
display_name: str = ""
|
||||
created_at: Optional[float] = None
|
||||
updated_at: Optional[float] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.display_name:
|
||||
self.display_name = self.site_id
|
||||
if self.created_at is None:
|
||||
self.created_at = time.time()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"site_id": self.site_id,
|
||||
"endpoint": self.endpoint,
|
||||
"region": self.region,
|
||||
"priority": self.priority,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
|
||||
return cls(
|
||||
site_id=data["site_id"],
|
||||
endpoint=data.get("endpoint", ""),
|
||||
region=data.get("region", "us-east-1"),
|
||||
priority=data.get("priority", 100),
|
||||
display_name=data.get("display_name", ""),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PeerSite:
|
||||
site_id: str
|
||||
endpoint: str
|
||||
region: str = "us-east-1"
|
||||
priority: int = 100
|
||||
display_name: str = ""
|
||||
created_at: Optional[float] = None
|
||||
updated_at: Optional[float] = None
|
||||
connection_id: Optional[str] = None
|
||||
is_healthy: Optional[bool] = None
|
||||
last_health_check: Optional[float] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.display_name:
|
||||
self.display_name = self.site_id
|
||||
if self.created_at is None:
|
||||
self.created_at = time.time()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"site_id": self.site_id,
|
||||
"endpoint": self.endpoint,
|
||||
"region": self.region,
|
||||
"priority": self.priority,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"connection_id": self.connection_id,
|
||||
"is_healthy": self.is_healthy,
|
||||
"last_health_check": self.last_health_check,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
|
||||
return cls(
|
||||
site_id=data["site_id"],
|
||||
endpoint=data.get("endpoint", ""),
|
||||
region=data.get("region", "us-east-1"),
|
||||
priority=data.get("priority", 100),
|
||||
display_name=data.get("display_name", ""),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
connection_id=data.get("connection_id"),
|
||||
is_healthy=data.get("is_healthy"),
|
||||
last_health_check=data.get("last_health_check"),
|
||||
)
|
||||
|
||||
|
||||
class SiteRegistry:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._local_site: Optional[SiteInfo] = None
|
||||
self._peers: Dict[str, PeerSite] = {}
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._local_site = None
|
||||
self._peers = {}
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if data.get("local"):
|
||||
self._local_site = SiteInfo.from_dict(data["local"])
|
||||
else:
|
||||
self._local_site = None
|
||||
|
||||
self._peers = {}
|
||||
for peer_data in data.get("peers", []):
|
||||
peer = PeerSite.from_dict(peer_data)
|
||||
self._peers[peer.site_id] = peer
|
||||
|
||||
except (OSError, json.JSONDecodeError, KeyError):
|
||||
self._local_site = None
|
||||
self._peers = {}
|
||||
|
||||
def save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"local": self._local_site.to_dict() if self._local_site else None,
|
||||
"peers": [peer.to_dict() for peer in self._peers.values()],
|
||||
}
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def get_local_site(self) -> Optional[SiteInfo]:
|
||||
return self._local_site
|
||||
|
||||
def set_local_site(self, site: SiteInfo) -> None:
|
||||
site.updated_at = time.time()
|
||||
self._local_site = site
|
||||
self.save()
|
||||
|
||||
def list_peers(self) -> List[PeerSite]:
|
||||
return list(self._peers.values())
|
||||
|
||||
def get_peer(self, site_id: str) -> Optional[PeerSite]:
|
||||
return self._peers.get(site_id)
|
||||
|
||||
def add_peer(self, peer: PeerSite) -> None:
|
||||
peer.created_at = peer.created_at or time.time()
|
||||
self._peers[peer.site_id] = peer
|
||||
self.save()
|
||||
|
||||
def update_peer(self, peer: PeerSite) -> None:
|
||||
if peer.site_id not in self._peers:
|
||||
raise ValueError(f"Peer {peer.site_id} not found")
|
||||
peer.updated_at = time.time()
|
||||
self._peers[peer.site_id] = peer
|
||||
self.save()
|
||||
|
||||
def delete_peer(self, site_id: str) -> bool:
|
||||
if site_id in self._peers:
|
||||
del self._peers[site_id]
|
||||
self.save()
|
||||
return True
|
||||
return False
|
||||
|
||||
def update_health(self, site_id: str, is_healthy: bool) -> None:
|
||||
peer = self._peers.get(site_id)
|
||||
if peer:
|
||||
peer.is_healthy = is_healthy
|
||||
peer.last_health_check = time.time()
|
||||
self.save()
|
||||
416
python/app/site_sync.py
Normal file
416
python/app/site_sync.py
Normal file
@@ -0,0 +1,416 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .connections import ConnectionStore, RemoteConnection
|
||||
from .replication import ReplicationManager, ReplicationRule
|
||||
from .storage import ObjectStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncedObjectInfo:
|
||||
last_synced_at: float
|
||||
remote_etag: str
|
||||
source: str
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"last_synced_at": self.last_synced_at,
|
||||
"remote_etag": self.remote_etag,
|
||||
"source": self.source,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
|
||||
return cls(
|
||||
last_synced_at=data["last_synced_at"],
|
||||
remote_etag=data["remote_etag"],
|
||||
source=data["source"],
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncState:
|
||||
synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
|
||||
last_full_sync: Optional[float] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
|
||||
"last_full_sync": self.last_full_sync,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
|
||||
synced_objects = {}
|
||||
for k, v in data.get("synced_objects", {}).items():
|
||||
synced_objects[k] = SyncedObjectInfo.from_dict(v)
|
||||
return cls(
|
||||
synced_objects=synced_objects,
|
||||
last_full_sync=data.get("last_full_sync"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteSyncStats:
|
||||
last_sync_at: Optional[float] = None
|
||||
objects_pulled: int = 0
|
||||
objects_skipped: int = 0
|
||||
conflicts_resolved: int = 0
|
||||
deletions_applied: int = 0
|
||||
errors: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"last_sync_at": self.last_sync_at,
|
||||
"objects_pulled": self.objects_pulled,
|
||||
"objects_skipped": self.objects_skipped,
|
||||
"conflicts_resolved": self.conflicts_resolved,
|
||||
"deletions_applied": self.deletions_applied,
|
||||
"errors": self.errors,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteObjectMeta:
|
||||
key: str
|
||||
size: int
|
||||
last_modified: datetime
|
||||
etag: str
|
||||
|
||||
@classmethod
|
||||
def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
|
||||
return cls(
|
||||
key=obj["Key"],
|
||||
size=obj.get("Size", 0),
|
||||
last_modified=obj["LastModified"],
|
||||
etag=obj.get("ETag", "").strip('"'),
|
||||
)
|
||||
|
||||
|
||||
def _create_sync_client(
|
||||
connection: "RemoteConnection",
|
||||
*,
|
||||
connect_timeout: int = 10,
|
||||
read_timeout: int = 120,
|
||||
max_retries: int = 2,
|
||||
) -> Any:
|
||||
config = Config(
|
||||
user_agent_extra=SITE_SYNC_USER_AGENT,
|
||||
connect_timeout=connect_timeout,
|
||||
read_timeout=read_timeout,
|
||||
retries={"max_attempts": max_retries},
|
||||
signature_version="s3v4",
|
||||
s3={"addressing_style": "path"},
|
||||
request_checksum_calculation="when_required",
|
||||
response_checksum_validation="when_required",
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region or "us-east-1",
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
class SiteSyncWorker:
|
||||
def __init__(
|
||||
self,
|
||||
storage: "ObjectStorage",
|
||||
connections: "ConnectionStore",
|
||||
replication_manager: "ReplicationManager",
|
||||
storage_root: Path,
|
||||
interval_seconds: int = 60,
|
||||
batch_size: int = 100,
|
||||
connect_timeout: int = 10,
|
||||
read_timeout: int = 120,
|
||||
max_retries: int = 2,
|
||||
clock_skew_tolerance_seconds: float = 1.0,
|
||||
):
|
||||
self.storage = storage
|
||||
self.connections = connections
|
||||
self.replication_manager = replication_manager
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_seconds
|
||||
self.batch_size = batch_size
|
||||
self.connect_timeout = connect_timeout
|
||||
self.read_timeout = read_timeout
|
||||
self.max_retries = max_retries
|
||||
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
|
||||
self._lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._sync_thread: Optional[threading.Thread] = None
|
||||
self._bucket_stats: Dict[str, SiteSyncStats] = {}
|
||||
|
||||
def _create_client(self, connection: "RemoteConnection") -> Any:
|
||||
"""Create an S3 client with the worker's configured timeouts."""
|
||||
return _create_sync_client(
|
||||
connection,
|
||||
connect_timeout=self.connect_timeout,
|
||||
read_timeout=self.read_timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._sync_thread is not None and self._sync_thread.is_alive():
|
||||
return
|
||||
self._shutdown.clear()
|
||||
self._sync_thread = threading.Thread(
|
||||
target=self._sync_loop, name="site-sync-worker", daemon=True
|
||||
)
|
||||
self._sync_thread.start()
|
||||
logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
if self._sync_thread is not None:
|
||||
self._sync_thread.join(timeout=10.0)
|
||||
logger.info("Site sync worker shut down")
|
||||
|
||||
def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
||||
rule = self.replication_manager.get_rule(bucket_name)
|
||||
if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
||||
return None
|
||||
return self._sync_bucket(rule)
|
||||
|
||||
def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
||||
with self._lock:
|
||||
return self._bucket_stats.get(bucket_name)
|
||||
|
||||
def _sync_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
self._run_sync_cycle()
|
||||
|
||||
def _run_sync_cycle(self) -> None:
|
||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
||||
for bucket_name, rule in list(self.replication_manager._rules.items()):
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
||||
continue
|
||||
try:
|
||||
stats = self._sync_bucket(rule)
|
||||
with self._lock:
|
||||
self._bucket_stats[bucket_name] = stats
|
||||
except Exception as e:
|
||||
logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
|
||||
|
||||
def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
|
||||
stats = SiteSyncStats()
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
try:
|
||||
local_objects = self._list_local_objects(rule.bucket_name)
|
||||
except Exception as e:
|
||||
logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
try:
|
||||
remote_objects = self._list_remote_objects(rule, connection)
|
||||
except Exception as e:
|
||||
logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
sync_state = self._load_sync_state(rule.bucket_name)
|
||||
local_keys = set(local_objects.keys())
|
||||
remote_keys = set(remote_objects.keys())
|
||||
|
||||
to_pull = []
|
||||
for key in remote_keys:
|
||||
remote_meta = remote_objects[key]
|
||||
local_meta = local_objects.get(key)
|
||||
if local_meta is None:
|
||||
to_pull.append(key)
|
||||
else:
|
||||
resolution = self._resolve_conflict(local_meta, remote_meta)
|
||||
if resolution == "pull":
|
||||
to_pull.append(key)
|
||||
stats.conflicts_resolved += 1
|
||||
else:
|
||||
stats.objects_skipped += 1
|
||||
|
||||
pulled_count = 0
|
||||
for key in to_pull:
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
if pulled_count >= self.batch_size:
|
||||
break
|
||||
remote_meta = remote_objects[key]
|
||||
success = self._pull_object(rule, key, connection, remote_meta)
|
||||
if success:
|
||||
stats.objects_pulled += 1
|
||||
pulled_count += 1
|
||||
sync_state.synced_objects[key] = SyncedObjectInfo(
|
||||
last_synced_at=time.time(),
|
||||
remote_etag=remote_meta.etag,
|
||||
source="remote",
|
||||
)
|
||||
else:
|
||||
stats.errors += 1
|
||||
|
||||
if rule.sync_deletions:
|
||||
for key in list(sync_state.synced_objects.keys()):
|
||||
if key not in remote_keys and key in local_keys:
|
||||
tracked = sync_state.synced_objects[key]
|
||||
if tracked.source == "remote":
|
||||
local_meta = local_objects.get(key)
|
||||
if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
|
||||
success = self._apply_remote_deletion(rule.bucket_name, key)
|
||||
if success:
|
||||
stats.deletions_applied += 1
|
||||
del sync_state.synced_objects[key]
|
||||
|
||||
sync_state.last_full_sync = time.time()
|
||||
self._save_sync_state(rule.bucket_name, sync_state)
|
||||
|
||||
with self.replication_manager._stats_lock:
|
||||
rule.last_pull_at = time.time()
|
||||
self.replication_manager.save_rules()
|
||||
|
||||
stats.last_sync_at = time.time()
|
||||
logger.info(
|
||||
"Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
|
||||
rule.bucket_name,
|
||||
stats.objects_pulled,
|
||||
stats.objects_skipped,
|
||||
stats.conflicts_resolved,
|
||||
stats.deletions_applied,
|
||||
stats.errors,
|
||||
)
|
||||
return stats
|
||||
|
||||
def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
|
||||
from .storage import ObjectMeta
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
return {obj.key: obj for obj in objects}
|
||||
|
||||
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
|
||||
s3 = self._create_client(connection)
|
||||
result: Dict[str, RemoteObjectMeta] = {}
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
try:
|
||||
for page in paginator.paginate(Bucket=rule.target_bucket):
|
||||
for obj in page.get("Contents", []):
|
||||
meta = RemoteObjectMeta.from_s3_object(obj)
|
||||
result[meta.key] = meta
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] == "NoSuchBucket":
|
||||
return {}
|
||||
raise
|
||||
return result
|
||||
|
||||
def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
|
||||
local_ts = local_meta.last_modified.timestamp()
|
||||
remote_ts = remote_meta.last_modified.timestamp()
|
||||
|
||||
if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
|
||||
local_etag = local_meta.etag or ""
|
||||
if remote_meta.etag == local_etag:
|
||||
return "skip"
|
||||
return "pull" if remote_meta.etag > local_etag else "keep"
|
||||
|
||||
return "pull" if remote_ts > local_ts else "keep"
|
||||
|
||||
def _pull_object(
|
||||
self,
|
||||
rule: "ReplicationRule",
|
||||
object_key: str,
|
||||
connection: "RemoteConnection",
|
||||
remote_meta: RemoteObjectMeta,
|
||||
) -> bool:
|
||||
s3 = self._create_client(connection)
|
||||
tmp_path = None
|
||||
try:
|
||||
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
|
||||
tmp_dir.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
|
||||
tmp_path = Path(tmp_file.name)
|
||||
|
||||
s3.download_file(rule.target_bucket, object_key, str(tmp_path))
|
||||
|
||||
head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
|
||||
user_metadata = head_response.get("Metadata", {})
|
||||
|
||||
with open(tmp_path, "rb") as f:
|
||||
self.storage.put_object(
|
||||
rule.bucket_name,
|
||||
object_key,
|
||||
f,
|
||||
metadata=user_metadata if user_metadata else None,
|
||||
)
|
||||
|
||||
logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
|
||||
return True
|
||||
|
||||
except ClientError as e:
|
||||
logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
|
||||
return False
|
||||
finally:
|
||||
if tmp_path and tmp_path.exists():
|
||||
try:
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
|
||||
try:
|
||||
self.storage.delete_object(bucket_name, object_key)
|
||||
logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
|
||||
return False
|
||||
|
||||
def _sync_state_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
|
||||
|
||||
def _load_sync_state(self, bucket_name: str) -> SyncState:
|
||||
path = self._sync_state_path(bucket_name)
|
||||
if not path.exists():
|
||||
return SyncState()
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
return SyncState.from_dict(data)
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
|
||||
return SyncState()
|
||||
|
||||
def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
|
||||
path = self._sync_state_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning("Failed to save sync state for %s: %s", bucket_name, e)
|
||||
2904
python/app/storage.py
Normal file
2904
python/app/storage.py
Normal file
File diff suppressed because it is too large
Load Diff
215
python/app/system_metrics.py
Normal file
215
python/app/system_metrics.py
Normal file
@@ -0,0 +1,215 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import psutil
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .storage import ObjectStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemMetricsSnapshot:
|
||||
timestamp: datetime
|
||||
cpu_percent: float
|
||||
memory_percent: float
|
||||
disk_percent: float
|
||||
storage_bytes: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"cpu_percent": round(self.cpu_percent, 2),
|
||||
"memory_percent": round(self.memory_percent, 2),
|
||||
"disk_percent": round(self.disk_percent, 2),
|
||||
"storage_bytes": self.storage_bytes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
|
||||
timestamp_str = data["timestamp"]
|
||||
if timestamp_str.endswith("Z"):
|
||||
timestamp_str = timestamp_str[:-1] + "+00:00"
|
||||
return cls(
|
||||
timestamp=datetime.fromisoformat(timestamp_str),
|
||||
cpu_percent=data.get("cpu_percent", 0.0),
|
||||
memory_percent=data.get("memory_percent", 0.0),
|
||||
disk_percent=data.get("disk_percent", 0.0),
|
||||
storage_bytes=data.get("storage_bytes", 0),
|
||||
)
|
||||
|
||||
|
||||
class SystemMetricsCollector:
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_minutes: int = 5,
|
||||
retention_hours: int = 24,
|
||||
):
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_minutes * 60
|
||||
self.retention_hours = retention_hours
|
||||
self._lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._snapshots: List[SystemMetricsSnapshot] = []
|
||||
self._storage_ref: Optional["ObjectStorage"] = None
|
||||
|
||||
self._load_history()
|
||||
|
||||
self._snapshot_thread = threading.Thread(
|
||||
target=self._snapshot_loop,
|
||||
name="system-metrics-snapshot",
|
||||
daemon=True,
|
||||
)
|
||||
self._snapshot_thread.start()
|
||||
|
||||
def set_storage(self, storage: "ObjectStorage") -> None:
|
||||
with self._lock:
|
||||
self._storage_ref = storage
|
||||
|
||||
def _config_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
|
||||
|
||||
def _load_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
if not config_path.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
history_data = data.get("history", [])
|
||||
self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
|
||||
self._prune_old_snapshots()
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning(f"Failed to load system metrics history: {e}")
|
||||
|
||||
def _save_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
data = {"history": [s.to_dict() for s in self._snapshots]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to save system metrics history: {e}")
|
||||
|
||||
def _prune_old_snapshots(self) -> None:
|
||||
if not self._snapshots:
|
||||
return
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
||||
self._snapshots = [
|
||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
||||
]
|
||||
|
||||
def _snapshot_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if not self._shutdown.is_set():
|
||||
self._take_snapshot()
|
||||
|
||||
def _take_snapshot(self) -> None:
|
||||
try:
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage(str(self.storage_root))
|
||||
|
||||
storage_bytes = 0
|
||||
with self._lock:
|
||||
storage = self._storage_ref
|
||||
if storage:
|
||||
try:
|
||||
buckets = storage.list_buckets()
|
||||
for bucket in buckets:
|
||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
||||
storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to collect bucket stats: {e}")
|
||||
|
||||
snapshot = SystemMetricsSnapshot(
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
cpu_percent=cpu_percent,
|
||||
memory_percent=memory.percent,
|
||||
disk_percent=disk.percent,
|
||||
storage_bytes=storage_bytes,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._snapshots.append(snapshot)
|
||||
self._prune_old_snapshots()
|
||||
self._save_history()
|
||||
|
||||
logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to take system metrics snapshot: {e}")
|
||||
|
||||
def get_current(self) -> Dict[str, Any]:
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage(str(self.storage_root))
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
uptime_days = int(uptime_seconds / 86400)
|
||||
|
||||
total_buckets = 0
|
||||
total_objects = 0
|
||||
total_bytes_used = 0
|
||||
total_versions = 0
|
||||
|
||||
with self._lock:
|
||||
storage = self._storage_ref
|
||||
if storage:
|
||||
try:
|
||||
buckets = storage.list_buckets()
|
||||
total_buckets = len(buckets)
|
||||
for bucket in buckets:
|
||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
||||
total_objects += stats.get("total_objects", stats.get("objects", 0))
|
||||
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
|
||||
total_versions += stats.get("version_count", 0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to collect current bucket stats: {e}")
|
||||
|
||||
return {
|
||||
"cpu_percent": round(cpu_percent, 2),
|
||||
"memory": {
|
||||
"total": memory.total,
|
||||
"available": memory.available,
|
||||
"used": memory.used,
|
||||
"percent": round(memory.percent, 2),
|
||||
},
|
||||
"disk": {
|
||||
"total": disk.total,
|
||||
"free": disk.free,
|
||||
"used": disk.used,
|
||||
"percent": round(disk.percent, 2),
|
||||
},
|
||||
"app": {
|
||||
"buckets": total_buckets,
|
||||
"objects": total_objects,
|
||||
"versions": total_versions,
|
||||
"storage_bytes": total_bytes_used,
|
||||
"uptime_days": uptime_days,
|
||||
},
|
||||
}
|
||||
|
||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
snapshots = list(self._snapshots)
|
||||
|
||||
if hours:
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
||||
|
||||
return [s.to_dict() for s in snapshots]
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._take_snapshot()
|
||||
self._snapshot_thread.join(timeout=5.0)
|
||||
4309
python/app/ui.py
Normal file
4309
python/app/ui.py
Normal file
File diff suppressed because it is too large
Load Diff
8
python/app/version.py
Normal file
8
python/app/version.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
APP_VERSION = "0.4.3"
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
"""Return the current application version."""
|
||||
return APP_VERSION
|
||||
108
python/app/website_domains.py
Normal file
108
python/app/website_domains.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
_DOMAIN_RE = re.compile(
|
||||
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
|
||||
)
|
||||
|
||||
|
||||
def normalize_domain(raw: str) -> str:
|
||||
raw = raw.strip().lower()
|
||||
for prefix in ("https://", "http://"):
|
||||
if raw.startswith(prefix):
|
||||
raw = raw[len(prefix):]
|
||||
raw = raw.split("/", 1)[0]
|
||||
raw = raw.split("?", 1)[0]
|
||||
raw = raw.split("#", 1)[0]
|
||||
if ":" in raw:
|
||||
raw = raw.rsplit(":", 1)[0]
|
||||
return raw
|
||||
|
||||
|
||||
def is_valid_domain(domain: str) -> bool:
|
||||
if not domain or len(domain) > 253:
|
||||
return False
|
||||
return bool(_DOMAIN_RE.match(domain))
|
||||
|
||||
|
||||
class WebsiteDomainStore:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._lock = threading.Lock()
|
||||
self._domains: Dict[str, str] = {}
|
||||
self._last_mtime: float = 0.0
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
return
|
||||
try:
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
self._domains = {k.lower(): v for k, v in data.items()}
|
||||
else:
|
||||
self._domains = {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
self._domains = {}
|
||||
|
||||
def _maybe_reload(self) -> None:
|
||||
try:
|
||||
if self.config_path.exists():
|
||||
mtime = self.config_path.stat().st_mtime
|
||||
if mtime != self._last_mtime:
|
||||
self._last_mtime = mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
self._domains = {k.lower(): v for k, v in data.items()}
|
||||
else:
|
||||
self._domains = {}
|
||||
elif self._domains:
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
def _save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(self._domains, f, indent=2)
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
|
||||
def list_all(self) -> List[Dict[str, str]]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
||||
|
||||
def get_bucket(self, domain: str) -> Optional[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return self._domains.get(domain.lower())
|
||||
|
||||
def get_domains_for_bucket(self, bucket: str) -> List[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [d for d, b in self._domains.items() if b == bucket]
|
||||
|
||||
def set_mapping(self, domain: str, bucket: str) -> None:
|
||||
with self._lock:
|
||||
self._domains[domain.lower()] = bucket
|
||||
self._save()
|
||||
|
||||
def delete_mapping(self, domain: str) -> bool:
|
||||
with self._lock:
|
||||
key = domain.lower()
|
||||
if key not in self._domains:
|
||||
return False
|
||||
del self._domains[key]
|
||||
self._save()
|
||||
return True
|
||||
Reference in New Issue
Block a user