42 Commits

Author SHA1 Message Date
4c661477d5 Add Rust extension module (myfsio_core) for SigV4, hashing, and validation hot paths 2026-02-16 16:04:15 +08:00
f3f52f14a5 Fix domain mapping bugs and improve UI/UX: normalize domains, fix delete, add validation and search 2026-02-16 00:51:19 +08:00
d19ba3e305 UI/UX enhancements to IAM page: role badges, search, copy keys, improved policy display 2026-02-16 00:40:04 +08:00
c627f41f53 UI/UX enhancements to Metrics page 2026-02-15 23:56:18 +08:00
bcad0cd3da Improve web UI: sort/search/context menu, fix security and UX bugs 2026-02-15 23:30:26 +08:00
67f057ca1c Add static website hosting 2026-02-15 20:57:02 +08:00
01e79e6993 Fix object browser UI issues 2026-02-10 11:41:02 +08:00
1e3c4b545f Migrate UI backend from direct storage calls to S3 API proxy via boto3 2026-02-09 22:33:47 +08:00
4ecd32a554 Fix empty UI on large bucket first load: keep loading row during streaming, add progress indicator, throttle renders 2026-02-09 19:29:50 +08:00
aa6d7c4d28 Optimize replication failure caching, batch UI auth checks, add bulk download size limit, background parent cleanup 2026-02-09 18:23:45 +08:00
6e6d6d32bf Optimize KMS: cache AESGCM instance, remove duplicate get_provider 2026-02-09 17:01:19 +08:00
54705ab9c4 Fix Content-Length mismatch on range requests (206 Partial Content) 2026-02-06 16:14:35 +08:00
77a46d0725 Binary run fix 2026-02-05 23:49:36 +08:00
0f750b9d89 Optimize object browser for large listings on slow networks 2026-02-05 22:56:00 +08:00
e0dee9db36 Fix UI object browser not showing objects uploaded via S3 API 2026-02-05 22:22:59 +08:00
126657c99f Further debugging of object browser object count delay 2026-02-05 21:45:02 +08:00
07fb1ac773 Fix cross-process cache invalidation on Windows using version counter instead of mtime 2026-02-05 21:32:40 +08:00
147962e1dd Further debugging of object browser object count delay 2026-02-05 21:18:35 +08:00
2643a79121 Debug object browser object count delay 2026-02-05 21:08:18 +08:00
e9a035827b Add _touch_cache_marker for UI object delay count issue 2026-02-05 20:56:42 +08:00
033b8a82be Fix error handlers for API mode; distinguish files from directories in object lookup; Fix UI not showing newly uploaded objects by adding Cache-Control headers 2026-02-05 20:44:11 +08:00
e76c311231 Update install/uninstall scripts with new config options and credential capture 2026-02-05 19:21:18 +08:00
cbdf1a27c8 Pin dockerfile python version to 3.14.3 2026-02-05 19:11:42 +08:00
4a60cb269a Update python version in Dockerfile 2026-02-05 19:11:00 +08:00
ebe7f6222d Fix hardcoded secret key ttl session 2026-02-05 19:08:18 +08:00
70b61fd8e6 Further optimize CPU usage; Improve security and performance; 4 bug fixes. 2026-02-05 17:45:34 +08:00
a779b002d7 Optimize CPU usage via caching and reducing ThreadPoolExecutor workers to prevent CPU saturation 2026-02-02 13:30:06 +08:00
45d21cce21 Add ALLOW_INTERNAL_ENDPOINTS config for self-hosted internal network deployments 2026-02-01 18:26:14 +08:00
9629507acd Fix auth bypass, user enumeration, xml DoS, multipart race, path traversal unicode, silent permissions failures, data key without AAD, KMS streaming 2026-02-01 18:12:03 +08:00
5d6cb4efa1 Update documentation 2026-02-01 15:18:20 +08:00
56ad83bbaf Fix bidirectional sync UI issues 2026-02-01 14:56:20 +08:00
847933b7c0 Add UI endpoint for bidirectional-status to fix 403 auth error 2026-02-01 14:30:55 +08:00
be55d08c0a Fix bidirectional-status 404 when UI runs separately from API 2026-02-01 14:23:35 +08:00
8c4bf67974 Fix 15 security vulnerabilities across auth, storage, and API modules 2026-01-31 00:55:27 +08:00
9385d1fe1c Add 4 new S3 APIs: UploadPartCopy, Bucket Replication, PostObject, SelectObjectContent 2026-01-29 12:51:00 +08:00
0ea54457e8 Fix 17 security vulnerabilities across encryption, auth, and API modules 2026-01-29 12:05:35 +08:00
ae26d22388 Add bidirectional replication setup verification and improved UX warnings 2026-01-26 23:29:20 +08:00
6b715851b9 Add replication setup wizard and site-level sync dashboard for site registry 2026-01-26 21:39:47 +08:00
62c36f7a6c Add site registry UI and update documentation for geo-distribution 2026-01-26 19:49:23 +08:00
b32f1f94f7 Add configurable env variables for hardcoded timeouts and limits 2026-01-25 23:32:36 +08:00
6e3d280a75 Add SlowDown error code tracking for 429 rate limit responses 2026-01-25 21:29:58 +08:00
704f79dc44 Add configurable rate limits for S3 API endpoints 2026-01-25 20:15:38 +08:00
545 changed files with 13528 additions and 2093 deletions

View File

@@ -1,25 +1,33 @@
# syntax=docker/dockerfile:1.7 FROM python:3.14.3-slim
FROM python:3.12.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1
WORKDIR /app WORKDIR /app
# Install build deps for any wheels that need compilation, then clean up RUN apt-get update \
RUN apt-get update \ && apt-get install -y --no-install-recommends build-essential curl \
&& apt-get install -y --no-install-recommends build-essential \ && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
ENV PATH="/root/.cargo/bin:${PATH}"
COPY requirements.txt ./ COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
# Make entrypoint executable RUN pip install --no-cache-dir maturin \
&& cd myfsio_core \
&& maturin build --release \
&& pip install target/wheels/*.whl \
&& cd .. \
&& rm -rf myfsio_core/target \
&& pip uninstall -y maturin \
&& rustup self uninstall -y
RUN chmod +x docker-entrypoint.sh RUN chmod +x docker-entrypoint.sh
# Create data directory and set permissions
RUN mkdir -p /app/data \ RUN mkdir -p /app/data \
&& useradd -m -u 1000 myfsio \ && useradd -m -u 1000 myfsio \
&& chown -R myfsio:myfsio /app && chown -R myfsio:myfsio /app

View File

@@ -102,6 +102,11 @@ python run.py --mode ui # UI only (port 5100)
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption | | `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
| `KMS_ENABLED` | `false` | Enable Key Management Service | | `KMS_ENABLED` | `false` | Enable Key Management Service |
| `LOG_LEVEL` | `INFO` | Logging verbosity | | `LOG_LEVEL` | `INFO` | Logging verbosity |
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
## Data Layout ## Data Layout

View File

@@ -1,6 +1,8 @@
from __future__ import annotations from __future__ import annotations
import html as html_module
import logging import logging
import mimetypes
import shutil import shutil
import sys import sys
import time import time
@@ -10,7 +12,7 @@ from pathlib import Path
from datetime import timedelta from datetime import timedelta
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
from flask_cors import CORS from flask_cors import CORS
from flask_wtf.csrf import CSRFError from flask_wtf.csrf import CSRFError
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
@@ -31,8 +33,10 @@ from .notifications import NotificationService
from .object_lock import ObjectLockService from .object_lock import ObjectLockService
from .replication import ReplicationManager from .replication import ReplicationManager
from .secret_store import EphemeralSecretStore from .secret_store import EphemeralSecretStore
from .storage import ObjectStorage from .site_registry import SiteRegistry, SiteInfo
from .storage import ObjectStorage, StorageError
from .version import get_version from .version import get_version
from .website_domains import WebsiteDomainStore
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path: def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
@@ -104,6 +108,9 @@ def create_app(
storage = ObjectStorage( storage = ObjectStorage(
Path(app.config["STORAGE_ROOT"]), Path(app.config["STORAGE_ROOT"]),
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5), cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
) )
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"): if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
@@ -137,12 +144,33 @@ def create_app(
) )
connections = ConnectionStore(connections_path) connections = ConnectionStore(connections_path)
replication = ReplicationManager(storage, connections, replication_rules_path, storage_root) replication = ReplicationManager(
storage,
connections,
replication_rules_path,
storage_root,
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
)
site_registry_path = config_dir / "site_registry.json"
site_registry = SiteRegistry(site_registry_path)
if app.config.get("SITE_ID") and not site_registry.get_local_site():
site_registry.set_local_site(SiteInfo(
site_id=app.config["SITE_ID"],
endpoint=app.config.get("SITE_ENDPOINT") or "",
region=app.config.get("SITE_REGION", "us-east-1"),
priority=app.config.get("SITE_PRIORITY", 100),
))
encryption_config = { encryption_config = {
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False), "encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"), "encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"), "default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
} }
encryption_manager = EncryptionManager(encryption_config) encryption_manager = EncryptionManager(encryption_config)
@@ -150,7 +178,12 @@ def create_app(
if app.config.get("KMS_ENABLED", False): if app.config.get("KMS_ENABLED", False):
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", "")) kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", "")) kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
kms_manager = KMSManager(kms_keys_path, kms_master_key_path) kms_manager = KMSManager(
kms_keys_path,
kms_master_key_path,
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
)
encryption_manager.set_kms_provider(kms_manager) encryption_manager.set_kms_provider(kms_manager)
if app.config.get("ENCRYPTION_ENABLED", False): if app.config.get("ENCRYPTION_ENABLED", False):
@@ -159,7 +192,10 @@ def create_app(
acl_service = AclService(storage_root) acl_service = AclService(storage_root)
object_lock_service = ObjectLockService(storage_root) object_lock_service = ObjectLockService(storage_root)
notification_service = NotificationService(storage_root) notification_service = NotificationService(
storage_root,
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
)
access_logging_service = AccessLoggingService(storage_root) access_logging_service = AccessLoggingService(storage_root)
access_logging_service.set_storage(storage) access_logging_service.set_storage(storage)
@@ -170,6 +206,7 @@ def create_app(
base_storage, base_storage,
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600), interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
storage_root=storage_root, storage_root=storage_root,
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
) )
lifecycle_manager.start() lifecycle_manager.start()
@@ -187,6 +224,20 @@ def create_app(
app.extensions["object_lock"] = object_lock_service app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service app.extensions["access_logging"] = access_logging_service
app.extensions["site_registry"] = site_registry
website_domains_store = None
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
website_domains_path = config_dir / "website_domains.json"
website_domains_store = WebsiteDomainStore(website_domains_path)
app.extensions["website_domains"] = website_domains_store
from .s3_client import S3ProxyClient
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
app.extensions["s3_proxy"] = S3ProxyClient(
api_base_url=api_base,
region=app.config.get("AWS_REGION", "us-east-1"),
)
operation_metrics_collector = None operation_metrics_collector = None
if app.config.get("OPERATION_METRICS_ENABLED", False): if app.config.get("OPERATION_METRICS_ENABLED", False):
@@ -218,17 +269,47 @@ def create_app(
storage_root=storage_root, storage_root=storage_root,
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60), interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100), batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
) )
site_sync_worker.start() site_sync_worker.start()
app.extensions["site_sync"] = site_sync_worker app.extensions["site_sync"] = site_sync_worker
@app.errorhandler(500) @app.errorhandler(500)
def internal_error(error): def internal_error(error):
return render_template('500.html'), 500 wants_html = request.accept_mimetypes.accept_html
path = request.path or ""
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
return render_template('500.html'), 500
error_xml = (
'<?xml version="1.0" encoding="UTF-8"?>'
'<Error>'
'<Code>InternalError</Code>'
'<Message>An internal server error occurred</Message>'
f'<Resource>{path}</Resource>'
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
'</Error>'
)
return error_xml, 500, {'Content-Type': 'application/xml'}
@app.errorhandler(CSRFError) @app.errorhandler(CSRFError)
def handle_csrf_error(e): def handle_csrf_error(e):
return render_template('csrf_error.html', reason=e.description), 400 wants_html = request.accept_mimetypes.accept_html
path = request.path or ""
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
return render_template('csrf_error.html', reason=e.description), 400
error_xml = (
'<?xml version="1.0" encoding="UTF-8"?>'
'<Error>'
'<Code>CSRFError</Code>'
f'<Message>{e.description}</Message>'
f'<Resource>{path}</Resource>'
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
'</Error>'
)
return error_xml, 400, {'Content-Type': 'application/xml'}
@app.template_filter("filesizeformat") @app.template_filter("filesizeformat")
def filesizeformat(value: int) -> str: def filesizeformat(value: int) -> str:
@@ -289,11 +370,14 @@ def create_app(
if include_api: if include_api:
from .s3_api import s3_api_bp from .s3_api import s3_api_bp
from .kms_api import kms_api_bp from .kms_api import kms_api_bp
from .admin_api import admin_api_bp
app.register_blueprint(s3_api_bp) app.register_blueprint(s3_api_bp)
app.register_blueprint(kms_api_bp) app.register_blueprint(kms_api_bp)
app.register_blueprint(admin_api_bp)
csrf.exempt(s3_api_bp) csrf.exempt(s3_api_bp)
csrf.exempt(kms_api_bp) csrf.exempt(kms_api_bp)
csrf.exempt(admin_api_bp)
if include_ui: if include_ui:
from .ui import ui_bp from .ui import ui_bp
@@ -397,6 +481,128 @@ def _configure_logging(app: Flask) -> None:
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr}, extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
) )
@app.before_request
def _maybe_serve_website():
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
return None
if request.method not in {"GET", "HEAD"}:
return None
host = request.host
if ":" in host:
host = host.rsplit(":", 1)[0]
host = host.lower()
store = app.extensions.get("website_domains")
if not store:
return None
bucket = store.get_bucket(host)
if not bucket:
return None
storage = app.extensions["object_storage"]
if not storage.bucket_exists(bucket):
return _website_error_response(404, "Not Found")
website_config = storage.get_bucket_website(bucket)
if not website_config:
return _website_error_response(404, "Not Found")
index_doc = website_config.get("index_document", "index.html")
error_doc = website_config.get("error_document")
req_path = request.path.lstrip("/")
if not req_path or req_path.endswith("/"):
object_key = req_path + index_doc
else:
object_key = req_path
try:
obj_path = storage.get_object_path(bucket, object_key)
except (StorageError, OSError):
if object_key == req_path:
try:
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
object_key = req_path + "/" + index_doc
except (StorageError, OSError):
return _serve_website_error(storage, bucket, error_doc, 404)
else:
return _serve_website_error(storage, bucket, error_doc, 404)
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
is_encrypted = False
try:
metadata = storage.get_object_metadata(bucket, object_key)
is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError):
pass
if request.method == "HEAD":
response = Response(status=200)
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response.headers["Content-Length"] = len(data)
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
else:
try:
stat = obj_path.stat()
response.headers["Content-Length"] = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
response.headers["Content-Type"] = content_type
return response
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response = Response(data, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
def _stream(file_path):
with file_path.open("rb") as f:
while True:
chunk = f.read(65536)
if not chunk:
break
yield chunk
try:
stat = obj_path.stat()
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
response.headers["Content-Length"] = stat.st_size
return response
except OSError:
return _website_error_response(500, "Internal Server Error")
def _serve_website_error(storage, bucket, error_doc_key, status_code):
if not error_doc_key:
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
try:
obj_path = storage.get_object_path(bucket, error_doc_key)
except (StorageError, OSError):
return _website_error_response(status_code, "Not Found")
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
is_encrypted = False
try:
metadata = storage.get_object_metadata(bucket, error_doc_key)
is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError):
pass
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, error_doc_key)
response = Response(data, status=status_code, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError):
return _website_error_response(status_code, "Not Found")
try:
data = obj_path.read_bytes()
response = Response(data, status=status_code, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except OSError:
return _website_error_response(status_code, "Not Found")
def _website_error_response(status_code, message):
safe_msg = html_module.escape(str(message))
safe_code = html_module.escape(str(status_code))
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
return Response(body, status=status_code, mimetype="text/html")
@app.after_request @app.after_request
def _log_request_end(response): def _log_request_end(response):
duration_ms = 0.0 duration_ms = 0.0

778
app/admin_api.py Normal file
View File

@@ -0,0 +1,778 @@
from __future__ import annotations
import ipaddress
import json
import logging
import re
import socket
import time
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlparse
import requests
from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore
from .extensions import limiter
from .iam import IamError, Principal
from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
def _validate_endpoint(endpoint: str) -> Optional[str]:
"""Validate endpoint URL format. Returns error message or None."""
try:
parsed = urlparse(endpoint)
if not parsed.scheme or parsed.scheme not in ("http", "https"):
return "Endpoint must be http or https URL"
if not parsed.netloc:
return "Endpoint must have a host"
return None
except Exception:
return "Invalid endpoint URL"
def _validate_priority(priority: Any) -> Optional[str]:
"""Validate priority value. Returns error message or None."""
try:
p = int(priority)
if p < 0 or p > 1000:
return "Priority must be between 0 and 1000"
return None
except (TypeError, ValueError):
return "Priority must be an integer"
def _validate_region(region: str) -> Optional[str]:
"""Validate region format. Returns error message or None."""
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
return "Region must match format like us-east-1"
return None
def _validate_site_id(site_id: str) -> Optional[str]:
"""Validate site_id format. Returns error message or None."""
if not site_id or len(site_id) > 63:
return "site_id must be 1-63 characters"
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
return None
logger = logging.getLogger(__name__)
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
from .s3_api import _require_principal as s3_require_principal
return s3_require_principal()
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
principal, error = _require_principal()
if error:
return None, error
try:
_iam().authorize(principal, None, "iam:*")
return principal, None
except IamError:
return None, _json_error("AccessDenied", "Admin access required", 403)
def _site_registry() -> SiteRegistry:
return current_app.extensions["site_registry"]
def _connections() -> ConnectionStore:
return current_app.extensions["connections"]
def _replication() -> ReplicationManager:
return current_app.extensions["replication"]
def _iam():
return current_app.extensions["iam"]
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
return {"error": {"code": code, "message": message}}, status
def _get_admin_rate_limit() -> str:
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
@admin_api_bp.route("/site", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_local_site():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local_site = registry.get_local_site()
if local_site:
return jsonify(local_site.to_dict())
config_site_id = current_app.config.get("SITE_ID")
config_endpoint = current_app.config.get("SITE_ENDPOINT")
if config_site_id:
return jsonify({
"site_id": config_site_id,
"endpoint": config_endpoint or "",
"region": current_app.config.get("SITE_REGION", "us-east-1"),
"priority": current_app.config.get("SITE_PRIORITY", 100),
"display_name": config_site_id,
"source": "environment",
})
return _json_error("NotFound", "Local site not configured", 404)
@admin_api_bp.route("/site", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_local_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if endpoint:
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
registry = _site_registry()
existing = registry.get_local_site()
site = SiteInfo(
site_id=site_id,
endpoint=endpoint or "",
region=payload.get("region", "us-east-1"),
priority=payload.get("priority", 100),
display_name=payload.get("display_name", site_id),
created_at=existing.created_at if existing else None,
)
registry.set_local_site(site)
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(site.to_dict())
@admin_api_bp.route("/sites", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_all_sites():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
result = {
"local": local.to_dict() if local else None,
"peers": [peer.to_dict() for peer in peers],
"total_peers": len(peers),
}
return jsonify(result)
@admin_api_bp.route("/sites", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def register_peer_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if not endpoint:
return _json_error("ValidationError", "endpoint is required", 400)
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
region = payload.get("region", "us-east-1")
region_error = _validate_region(region)
if region_error:
return _json_error("ValidationError", region_error, 400)
priority = payload.get("priority", 100)
priority_error = _validate_priority(priority)
if priority_error:
return _json_error("ValidationError", priority_error, 400)
registry = _site_registry()
if registry.get_peer(site_id):
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
connection_id = payload.get("connection_id")
if connection_id:
if not _connections().get(connection_id):
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
peer = PeerSite(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=int(priority),
display_name=payload.get("display_name", site_id),
connection_id=connection_id,
)
registry.add_peer(peer)
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict()), 201
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
existing = registry.get_peer(site_id)
if not existing:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
payload = request.get_json(silent=True) or {}
if "endpoint" in payload:
endpoint_error = _validate_endpoint(payload["endpoint"])
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
if "connection_id" in payload:
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
peer = PeerSite(
site_id=site_id,
endpoint=payload.get("endpoint", existing.endpoint),
region=payload.get("region", existing.region),
priority=payload.get("priority", existing.priority),
display_name=payload.get("display_name", existing.display_name),
connection_id=payload.get("connection_id", existing.connection_id),
created_at=existing.created_at,
is_healthy=existing.is_healthy,
last_health_check=existing.last_health_check,
)
registry.update_peer(peer)
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def delete_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
if not registry.delete_peer(site_id):
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
return Response(status=204)
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_peer_health(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
is_healthy = False
error_message = None
if peer.connection_id:
connection = _connections().get(peer.connection_id)
if connection:
is_healthy = _replication().check_endpoint_health(connection)
else:
error_message = f"Connection '{peer.connection_id}' not found"
else:
error_message = "No connection configured for this peer"
registry.update_health(site_id, is_healthy)
result = {
"site_id": site_id,
"is_healthy": is_healthy,
"checked_at": time.time(),
}
if error_message:
result["error"] = error_message
return jsonify(result)
@admin_api_bp.route("/topology", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_topology():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
sites = []
if local:
sites.append({
**local.to_dict(),
"is_local": True,
"is_healthy": True,
})
for peer in peers:
sites.append({
**peer.to_dict(),
"is_local": False,
})
sites.sort(key=lambda s: s.get("priority", 100))
return jsonify({
"sites": sites,
"total": len(sites),
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
})
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_bidirectional_status(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
local_site = registry.get_local_site()
replication = _replication()
local_rules = replication.list_rules()
local_bidir_rules = []
for rule in local_rules:
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
local_bidir_rules.append({
"bucket_name": rule.bucket_name,
"target_bucket": rule.target_bucket,
"enabled": rule.enabled,
})
result = {
"site_id": site_id,
"local_site_id": local_site.site_id if local_site else None,
"local_endpoint": local_site.endpoint if local_site else None,
"local_bidirectional_rules": local_bidir_rules,
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
"remote_status": None,
"issues": [],
"is_fully_configured": False,
}
if not local_site or not local_site.site_id:
result["issues"].append({
"code": "NO_LOCAL_SITE_ID",
"message": "Local site identity not configured",
"severity": "error",
})
if not local_site or not local_site.endpoint:
result["issues"].append({
"code": "NO_LOCAL_ENDPOINT",
"message": "Local site endpoint not configured (remote site cannot reach back)",
"severity": "error",
})
if not peer.connection_id:
result["issues"].append({
"code": "NO_CONNECTION",
"message": "No connection configured for this peer",
"severity": "error",
})
return jsonify(result)
connection = _connections().get(peer.connection_id)
if not connection:
result["issues"].append({
"code": "CONNECTION_NOT_FOUND",
"message": f"Connection '{peer.connection_id}' not found",
"severity": "error",
})
return jsonify(result)
if not local_bidir_rules:
result["issues"].append({
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
"message": "No bidirectional replication rules configured on this site",
"severity": "warning",
})
if not result["local_site_sync_enabled"]:
result["issues"].append({
"code": "SITE_SYNC_DISABLED",
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
"severity": "warning",
})
if not replication.check_endpoint_health(connection):
result["issues"].append({
"code": "REMOTE_UNREACHABLE",
"message": "Remote endpoint is not reachable",
"severity": "error",
})
return jsonify(result)
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
"severity": "error",
})
return jsonify(result)
try:
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
resp = requests.get(
admin_url,
timeout=10,
headers={
"Accept": "application/json",
"X-Access-Key": connection.access_key,
"X-Secret-Key": connection.secret_key,
},
)
if resp.status_code == 200:
try:
remote_data = resp.json()
if not isinstance(remote_data, dict):
raise ValueError("Expected JSON object")
remote_local = remote_data.get("local")
if remote_local is not None and not isinstance(remote_local, dict):
raise ValueError("Expected 'local' to be an object")
remote_peers = remote_data.get("peers", [])
if not isinstance(remote_peers, list):
raise ValueError("Expected 'peers' to be a list")
except (ValueError, json.JSONDecodeError) as e:
logger.warning("Invalid JSON from remote admin API: %s", e)
result["remote_status"] = {"reachable": True, "invalid_response": True}
result["issues"].append({
"code": "REMOTE_INVALID_RESPONSE",
"message": "Remote admin API returned invalid JSON",
"severity": "warning",
})
return jsonify(result)
result["remote_status"] = {
"reachable": True,
"local_site": remote_local,
"site_sync_enabled": None,
"has_peer_for_us": False,
"peer_connection_configured": False,
"has_bidirectional_rules_for_us": False,
}
for rp in remote_peers:
if not isinstance(rp, dict):
continue
if local_site and (
rp.get("site_id") == local_site.site_id or
rp.get("endpoint") == local_site.endpoint
):
result["remote_status"]["has_peer_for_us"] = True
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
break
if not result["remote_status"]["has_peer_for_us"]:
result["issues"].append({
"code": "REMOTE_NO_PEER_FOR_US",
"message": "Remote site does not have this site registered as a peer",
"severity": "error",
})
elif not result["remote_status"]["peer_connection_configured"]:
result["issues"].append({
"code": "REMOTE_NO_CONNECTION_FOR_US",
"message": "Remote site has us as peer but no connection configured (cannot push back)",
"severity": "error",
})
elif resp.status_code == 401 or resp.status_code == 403:
result["remote_status"] = {
"reachable": True,
"admin_access_denied": True,
}
result["issues"].append({
"code": "REMOTE_ADMIN_ACCESS_DENIED",
"message": "Cannot verify remote configuration (admin access denied)",
"severity": "warning",
})
else:
result["remote_status"] = {
"reachable": True,
"admin_api_error": resp.status_code,
}
result["issues"].append({
"code": "REMOTE_ADMIN_API_ERROR",
"message": f"Remote admin API returned status {resp.status_code}",
"severity": "warning",
})
except requests.RequestException as e:
logger.warning("Remote admin API unreachable: %s", e)
result["remote_status"] = {
"reachable": False,
"error": "Connection failed",
}
result["issues"].append({
"code": "REMOTE_ADMIN_UNREACHABLE",
"message": "Could not reach remote admin API",
"severity": "warning",
})
except Exception as e:
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
result["issues"].append({
"code": "VERIFICATION_ERROR",
"message": "Internal error during verification",
"severity": "warning",
})
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
return jsonify(result)
def _website_domains() -> WebsiteDomainStore:
return current_app.extensions["website_domains"]
def _storage():
return current_app.extensions["object_storage"]
@admin_api_bp.route("/website-domains", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_website_domains():
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
return jsonify(_website_domains().list_all())
@admin_api_bp.route("/website-domains", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def create_website_domain():
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
payload = request.get_json(silent=True) or {}
domain = normalize_domain(payload.get("domain") or "")
bucket = (payload.get("bucket") or "").strip()
if not domain:
return _json_error("ValidationError", "domain is required", 400)
if not is_valid_domain(domain):
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
if not bucket:
return _json_error("ValidationError", "bucket is required", 400)
storage = _storage()
if not storage.bucket_exists(bucket):
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
store = _website_domains()
existing = store.get_bucket(domain)
if existing:
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
store.set_mapping(domain, bucket)
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
return jsonify({"domain": domain, "bucket": bucket}), 201
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
bucket = _website_domains().get_bucket(domain)
if not bucket:
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
return jsonify({"domain": domain, "bucket": bucket})
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
payload = request.get_json(silent=True) or {}
bucket = (payload.get("bucket") or "").strip()
if not bucket:
return _json_error("ValidationError", "bucket is required", 400)
storage = _storage()
if not storage.bucket_exists(bucket):
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
store = _website_domains()
if not store.get_bucket(domain):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
store.set_mapping(domain, bucket)
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
return jsonify({"domain": domain, "bucket": bucket})
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def delete_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
if not _website_domains().delete_mapping(domain):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
logger.info("Website domain mapping deleted: %s", domain)
return Response(status=204)

View File

@@ -6,6 +6,7 @@ import re
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from fnmatch import fnmatch, translate from fnmatch import fnmatch, translate
from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
@@ -13,9 +14,14 @@ from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
RESOURCE_PREFIX = "arn:aws:s3:::" RESOURCE_PREFIX = "arn:aws:s3:::"
@lru_cache(maxsize=256)
def _compile_pattern(pattern: str) -> Pattern[str]:
return re.compile(translate(pattern), re.IGNORECASE)
def _match_string_like(value: str, pattern: str) -> bool: def _match_string_like(value: str, pattern: str) -> bool:
regex = translate(pattern) compiled = _compile_pattern(pattern)
return bool(re.match(regex, value, re.IGNORECASE)) return bool(compiled.match(value))
def _ip_in_cidr(ip_str: str, cidr: str) -> bool: def _ip_in_cidr(ip_str: str, cidr: str) -> bool:

View File

@@ -36,10 +36,11 @@ class GzipMiddleware:
content_type = None content_type = None
content_length = None content_length = None
should_compress = False should_compress = False
passthrough = False
exc_info_holder = [None] exc_info_holder = [None]
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None): def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
response_started = True response_started = True
status_code = int(status.split(' ', 1)[0]) status_code = int(status.split(' ', 1)[0])
response_headers = list(headers) response_headers = list(headers)
@@ -50,18 +51,32 @@ class GzipMiddleware:
if name_lower == 'content-type': if name_lower == 'content-type':
content_type = value.split(';')[0].strip().lower() content_type = value.split(';')[0].strip().lower()
elif name_lower == 'content-length': elif name_lower == 'content-length':
content_length = int(value) try:
content_length = int(value)
except (ValueError, TypeError):
pass
elif name_lower == 'content-encoding': elif name_lower == 'content-encoding':
should_compress = False passthrough = True
return start_response(status, headers, exc_info)
elif name_lower == 'x-stream-response':
passthrough = True
return start_response(status, headers, exc_info) return start_response(status, headers, exc_info)
if content_type and content_type in COMPRESSIBLE_MIMES: if content_type and content_type in COMPRESSIBLE_MIMES:
if content_length is None or content_length >= self.min_size: if content_length is None or content_length >= self.min_size:
should_compress = True should_compress = True
else:
passthrough = True
return start_response(status, headers, exc_info)
return None return None
response_body = b''.join(self.app(environ, custom_start_response)) app_iter = self.app(environ, custom_start_response)
if passthrough:
return app_iter
response_body = b''.join(app_iter)
if not response_started: if not response_started:
return [response_body] return [response_body]

View File

@@ -10,6 +10,23 @@ from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import psutil
def _calculate_auto_threads() -> int:
cpu_count = psutil.cpu_count(logical=True) or 4
return max(1, min(cpu_count * 2, 64))
def _calculate_auto_connection_limit() -> int:
available_mb = psutil.virtual_memory().available / (1024 * 1024)
calculated = int(available_mb / 5)
return max(20, min(calculated, 1000))
def _calculate_auto_backlog(connection_limit: int) -> int:
return max(64, min(connection_limit * 2, 4096))
def _validate_rate_limit(value: str) -> str: def _validate_rate_limit(value: str) -> str:
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$" pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
@@ -63,6 +80,10 @@ class AppConfig:
log_backup_count: int log_backup_count: int
ratelimit_default: str ratelimit_default: str
ratelimit_storage_uri: str ratelimit_storage_uri: str
ratelimit_list_buckets: str
ratelimit_bucket_ops: str
ratelimit_object_ops: str
ratelimit_head_ops: str
cors_origins: list[str] cors_origins: list[str]
cors_methods: list[str] cors_methods: list[str]
cors_allow_headers: list[str] cors_allow_headers: list[str]
@@ -94,9 +115,41 @@ class AppConfig:
server_connection_limit: int server_connection_limit: int
server_backlog: int server_backlog: int
server_channel_timeout: int server_channel_timeout: int
server_threads_auto: bool
server_connection_limit_auto: bool
server_backlog_auto: bool
site_sync_enabled: bool site_sync_enabled: bool
site_sync_interval_seconds: int site_sync_interval_seconds: int
site_sync_batch_size: int site_sync_batch_size: int
sigv4_timestamp_tolerance_seconds: int
presigned_url_min_expiry_seconds: int
presigned_url_max_expiry_seconds: int
replication_connect_timeout_seconds: int
replication_read_timeout_seconds: int
replication_max_retries: int
replication_streaming_threshold_bytes: int
replication_max_failures_per_bucket: int
site_sync_connect_timeout_seconds: int
site_sync_read_timeout_seconds: int
site_sync_max_retries: int
site_sync_clock_skew_tolerance_seconds: float
object_key_max_length_bytes: int
object_cache_max_size: int
bucket_config_cache_ttl_seconds: float
object_tag_limit: int
encryption_chunk_size_bytes: int
kms_generate_data_key_min_bytes: int
kms_generate_data_key_max_bytes: int
lifecycle_max_history_per_bucket: int
site_id: Optional[str]
site_endpoint: Optional[str]
site_region: str
site_priority: int
ratelimit_admin: str
num_trusted_proxies: int
allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool
website_hosting_enabled: bool
@classmethod @classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -171,6 +224,10 @@ class AppConfig:
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3)) log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))) ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://")) ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
def _csv(value: str, default: list[str]) -> list[str]: def _csv(value: str, default: list[str]) -> list[str]:
if not value: if not value:
@@ -200,14 +257,69 @@ class AppConfig:
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5)) operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24)) operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
server_threads = int(_get("SERVER_THREADS", 4)) _raw_threads = int(_get("SERVER_THREADS", 0))
server_connection_limit = int(_get("SERVER_CONNECTION_LIMIT", 100)) if _raw_threads == 0:
server_backlog = int(_get("SERVER_BACKLOG", 1024)) server_threads = _calculate_auto_threads()
server_threads_auto = True
else:
server_threads = _raw_threads
server_threads_auto = False
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
if _raw_conn_limit == 0:
server_connection_limit = _calculate_auto_connection_limit()
server_connection_limit_auto = True
else:
server_connection_limit = _raw_conn_limit
server_connection_limit_auto = False
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
if _raw_backlog == 0:
server_backlog = _calculate_auto_backlog(server_connection_limit)
server_backlog_auto = True
else:
server_backlog = _raw_backlog
server_backlog_auto = False
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120)) server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60)) site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100)) site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
site_id_raw = _get("SITE_ID", None)
site_id = str(site_id_raw).strip() if site_id_raw else None
site_endpoint_raw = _get("SITE_ENDPOINT", None)
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
site_region = str(_get("SITE_REGION", "us-east-1"))
site_priority = int(_get("SITE_PRIORITY", 100))
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
return cls(storage_root=storage_root, return cls(storage_root=storage_root,
max_upload_size=max_upload_size, max_upload_size=max_upload_size,
ui_page_size=ui_page_size, ui_page_size=ui_page_size,
@@ -225,6 +337,10 @@ class AppConfig:
log_backup_count=log_backup_count, log_backup_count=log_backup_count,
ratelimit_default=ratelimit_default, ratelimit_default=ratelimit_default,
ratelimit_storage_uri=ratelimit_storage_uri, ratelimit_storage_uri=ratelimit_storage_uri,
ratelimit_list_buckets=ratelimit_list_buckets,
ratelimit_bucket_ops=ratelimit_bucket_ops,
ratelimit_object_ops=ratelimit_object_ops,
ratelimit_head_ops=ratelimit_head_ops,
cors_origins=cors_origins, cors_origins=cors_origins,
cors_methods=cors_methods, cors_methods=cors_methods,
cors_allow_headers=cors_allow_headers, cors_allow_headers=cors_allow_headers,
@@ -256,9 +372,41 @@ class AppConfig:
server_connection_limit=server_connection_limit, server_connection_limit=server_connection_limit,
server_backlog=server_backlog, server_backlog=server_backlog,
server_channel_timeout=server_channel_timeout, server_channel_timeout=server_channel_timeout,
server_threads_auto=server_threads_auto,
server_connection_limit_auto=server_connection_limit_auto,
server_backlog_auto=server_backlog_auto,
site_sync_enabled=site_sync_enabled, site_sync_enabled=site_sync_enabled,
site_sync_interval_seconds=site_sync_interval_seconds, site_sync_interval_seconds=site_sync_interval_seconds,
site_sync_batch_size=site_sync_batch_size) site_sync_batch_size=site_sync_batch_size,
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
replication_read_timeout_seconds=replication_read_timeout_seconds,
replication_max_retries=replication_max_retries,
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
site_sync_max_retries=site_sync_max_retries,
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
object_key_max_length_bytes=object_key_max_length_bytes,
object_cache_max_size=object_cache_max_size,
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
object_tag_limit=object_tag_limit,
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
site_id=site_id,
site_endpoint=site_endpoint,
site_region=site_region,
site_priority=site_priority,
ratelimit_admin=ratelimit_admin,
num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints,
website_hosting_enabled=website_hosting_enabled)
def validate_and_report(self) -> list[str]: def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues. """Validate configuration and return a list of warnings/issues.
@@ -364,9 +512,13 @@ class AppConfig:
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})") print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
if self.kms_enabled: if self.kms_enabled:
print(f" KMS: Enabled (Keys: {self.kms_keys_path})") print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
print(f" SERVER_THREADS: {self.server_threads}") if self.website_hosting_enabled:
print(f" CONNECTION_LIMIT: {self.server_connection_limit}") print(f" WEBSITE_HOSTING: Enabled")
print(f" BACKLOG: {self.server_backlog}") def _auto(flag: bool) -> str:
return " (auto)" if flag else ""
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s") print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
print("=" * 60) print("=" * 60)
@@ -406,6 +558,10 @@ class AppConfig:
"LOG_BACKUP_COUNT": self.log_backup_count, "LOG_BACKUP_COUNT": self.log_backup_count,
"RATELIMIT_DEFAULT": self.ratelimit_default, "RATELIMIT_DEFAULT": self.ratelimit_default,
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri, "RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
"CORS_ORIGINS": self.cors_origins, "CORS_ORIGINS": self.cors_origins,
"CORS_METHODS": self.cors_methods, "CORS_METHODS": self.cors_methods,
"CORS_ALLOW_HEADERS": self.cors_allow_headers, "CORS_ALLOW_HEADERS": self.cors_allow_headers,
@@ -432,4 +588,33 @@ class AppConfig:
"SITE_SYNC_ENABLED": self.site_sync_enabled, "SITE_SYNC_ENABLED": self.site_sync_enabled,
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds, "SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size, "SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
"OBJECT_TAG_LIMIT": self.object_tag_limit,
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
"SITE_ID": self.site_id,
"SITE_ENDPOINT": self.site_endpoint,
"SITE_REGION": self.site_region,
"SITE_PRIORITY": self.site_priority,
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
} }

View File

@@ -270,9 +270,15 @@ class EncryptedObjectStorage:
def get_bucket_quota(self, bucket_name: str): def get_bucket_quota(self, bucket_name: str):
return self.storage.get_bucket_quota(bucket_name) return self.storage.get_bucket_quota(bucket_name)
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None): def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects) return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
def get_bucket_website(self, bucket_name: str):
return self.storage.get_bucket_website(bucket_name)
def set_bucket_website(self, bucket_name: str, website_config):
return self.storage.set_bucket_website(bucket_name, website_config)
def _compute_etag(self, path: Path) -> str: def _compute_etag(self, path: Path) -> str:
return self.storage._compute_etag(path) return self.storage._compute_etag(path)

View File

@@ -1,15 +1,44 @@
"""Encryption providers for server-side and client-side encryption."""
from __future__ import annotations from __future__ import annotations
import base64 import base64
import io import io
import json import json
import logging
import os
import secrets import secrets
import subprocess
import sys
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any, BinaryIO, Dict, Generator, Optional from typing import Any, BinaryIO, Dict, Generator, Optional
from cryptography.hazmat.primitives.ciphers.aead import AESGCM from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
class EncryptionError(Exception): class EncryptionError(Exception):
@@ -59,22 +88,34 @@ class EncryptionMetadata:
class EncryptionProvider: class EncryptionProvider:
"""Base class for encryption providers.""" """Base class for encryption providers."""
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
raise NotImplementedError raise NotImplementedError
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
raise NotImplementedError raise NotImplementedError
def generate_data_key(self) -> tuple[bytes, bytes]: def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form. """Generate a data key and its encrypted form.
Returns: Returns:
Tuple of (plaintext_key, encrypted_key) Tuple of (plaintext_key, encrypted_key)
""" """
raise NotImplementedError raise NotImplementedError
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key.
Args:
encrypted_data_key: The encrypted data key bytes
key_id: Optional key identifier (used by KMS providers)
Returns:
The decrypted data key
"""
raise NotImplementedError
class LocalKeyEncryption(EncryptionProvider): class LocalKeyEncryption(EncryptionProvider):
"""SSE-S3 style encryption using a local master key. """SSE-S3 style encryption using a local master key.
@@ -99,28 +140,48 @@ class LocalKeyEncryption(EncryptionProvider):
return self._master_key return self._master_key
def _load_or_create_master_key(self) -> bytes: def _load_or_create_master_key(self) -> bytes:
"""Load master key from file or generate a new one.""" """Load master key from file or generate a new one (with file locking)."""
if self.master_key_path.exists(): lock_path = self.master_key_path.with_suffix(".lock")
try: lock_path.parent.mkdir(parents=True, exist_ok=True)
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
try: try:
self.master_key_path.parent.mkdir(parents=True, exist_ok=True) with open(lock_path, "w") as lock_file:
self.master_key_path.write_text(base64.b64encode(key).decode()) if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
try:
if self.master_key_path.exists():
try:
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
try:
self.master_key_path.write_text(base64.b64encode(key).decode())
_set_secure_file_permissions(self.master_key_path)
except OSError as exc:
raise EncryptionError(f"Failed to save master key: {exc}") from exc
return key
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except OSError as exc: except OSError as exc:
raise EncryptionError(f"Failed to save master key: {exc}") from exc raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
return key
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
def _encrypt_data_key(self, data_key: bytes) -> bytes: def _encrypt_data_key(self, data_key: bytes) -> bytes:
"""Encrypt the data key with the master key.""" """Encrypt the data key with the master key."""
aesgcm = AESGCM(self.master_key) aesgcm = AESGCM(self.master_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
encrypted = aesgcm.encrypt(nonce, data_key, None) encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
return nonce + encrypted return nonce + encrypted
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes: def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
"""Decrypt the data key using the master key.""" """Decrypt the data key using the master key."""
if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
@@ -129,10 +190,17 @@ class LocalKeyEncryption(EncryptionProvider):
nonce = encrypted_data_key[:12] nonce = encrypted_data_key[:12]
ciphertext = encrypted_data_key[12:] ciphertext = encrypted_data_key[12:]
try: try:
return aesgcm.decrypt(nonce, ciphertext, None) return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
except Exception as exc: except Exception:
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc try:
return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
return self._decrypt_data_key(encrypted_data_key)
def generate_data_key(self) -> tuple[bytes, bytes]: def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form.""" """Generate a data key and its encrypted form."""
plaintext_key = secrets.token_bytes(32) plaintext_key = secrets.token_bytes(32)
@@ -142,11 +210,12 @@ class LocalKeyEncryption(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption.""" """Encrypt data using envelope encryption."""
data_key, encrypted_data_key = self.generate_data_key() data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None) aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult( return EncryptionResult(
ciphertext=ciphertext, ciphertext=ciphertext,
nonce=nonce, nonce=nonce,
@@ -159,10 +228,11 @@ class LocalKeyEncryption(EncryptionProvider):
"""Decrypt data using envelope encryption.""" """Decrypt data using envelope encryption."""
data_key = self._decrypt_data_key(encrypted_data_key) data_key = self._decrypt_data_key(encrypted_data_key)
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
return aesgcm.decrypt(nonce, ciphertext, None) return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc raise EncryptionError("Failed to decrypt data") from exc
class StreamingEncryptor: class StreamingEncryptor:
@@ -180,12 +250,14 @@ class StreamingEncryptor:
self.chunk_size = chunk_size self.chunk_size = chunk_size
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes: def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
"""Derive a unique nonce for each chunk. """Derive a unique nonce for each chunk using HKDF."""
hkdf = HKDF(
Performance: Use direct byte manipulation instead of full int conversion. algorithm=hashes.SHA256(),
""" length=12,
# Performance: Only modify last 4 bytes instead of full 12-byte conversion salt=base_nonce,
return base_nonce[:8] + (chunk_index ^ int.from_bytes(base_nonce[8:], "big")).to_bytes(4, "big") info=chunk_index.to_bytes(4, "big"),
)
return hkdf.derive(b"chunk_nonce")
def encrypt_stream(self, stream: BinaryIO, def encrypt_stream(self, stream: BinaryIO,
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]: context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
@@ -234,10 +306,7 @@ class StreamingEncryptor:
Performance: Writes chunks directly to output buffer instead of accumulating in list. Performance: Writes chunks directly to output buffer instead of accumulating in list.
""" """
if isinstance(self.provider, LocalKeyEncryption): data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
else:
raise EncryptionError("Unsupported provider for streaming decryption")
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
base_nonce = metadata.nonce base_nonce = metadata.nonce
@@ -310,7 +379,8 @@ class EncryptionManager:
def get_streaming_encryptor(self) -> StreamingEncryptor: def get_streaming_encryptor(self) -> StreamingEncryptor:
if self._streaming_encryptor is None: if self._streaming_encryptor is None:
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider()) chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
return self._streaming_encryptor return self._streaming_encryptor
def encrypt_object(self, data: bytes, algorithm: str = "AES256", def encrypt_object(self, data: bytes, algorithm: str = "AES256",
@@ -403,7 +473,8 @@ class SSECEncryption(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
aesgcm = AESGCM(self.customer_key) aesgcm = AESGCM(self.customer_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None) aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult( return EncryptionResult(
ciphertext=ciphertext, ciphertext=ciphertext,
@@ -415,10 +486,11 @@ class SSECEncryption(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
aesgcm = AESGCM(self.customer_key) aesgcm = AESGCM(self.customer_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
return aesgcm.decrypt(nonce, ciphertext, None) return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc: except Exception as exc:
raise EncryptionError(f"SSE-C decryption failed: {exc}") from exc raise EncryptionError("SSE-C decryption failed") from exc
def generate_data_key(self) -> tuple[bytes, bytes]: def generate_data_key(self) -> tuple[bytes, bytes]:
return self.customer_key, b"" return self.customer_key, b""
@@ -472,34 +544,36 @@ class ClientEncryptionHelper:
} }
@staticmethod @staticmethod
def encrypt_with_key(plaintext: bytes, key_b64: str) -> Dict[str, str]: def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
"""Encrypt data with a client-provided key.""" """Encrypt data with a client-provided key."""
key = base64.b64decode(key_b64) key = base64.b64decode(key_b64)
if len(key) != 32: if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)") raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key) aesgcm = AESGCM(key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None) aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return { return {
"ciphertext": base64.b64encode(ciphertext).decode(), "ciphertext": base64.b64encode(ciphertext).decode(),
"nonce": base64.b64encode(nonce).decode(), "nonce": base64.b64encode(nonce).decode(),
"algorithm": "AES-256-GCM", "algorithm": "AES-256-GCM",
} }
@staticmethod @staticmethod
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str) -> bytes: def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data with a client-provided key.""" """Decrypt data with a client-provided key."""
key = base64.b64decode(key_b64) key = base64.b64decode(key_b64)
nonce = base64.b64decode(nonce_b64) nonce = base64.b64decode(nonce_b64)
ciphertext = base64.b64decode(ciphertext_b64) ciphertext = base64.b64decode(ciphertext_b64)
if len(key) != 32: if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)") raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key) aesgcm = AESGCM(key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
return aesgcm.decrypt(nonce, ciphertext, None) return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Decryption failed: {exc}") from exc raise EncryptionError("Decryption failed") from exc

View File

@@ -6,6 +6,7 @@ from typing import Optional, Dict, Any
from xml.etree.ElementTree import Element, SubElement, tostring from xml.etree.ElementTree import Element, SubElement, tostring
from flask import Response, jsonify, request, flash, redirect, url_for, g from flask import Response, jsonify, request, flash, redirect, url_for, g
from flask_limiter import RateLimitExceeded
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -172,10 +173,22 @@ def handle_app_error(error: AppError) -> Response:
return error.to_xml_response() return error.to_xml_response()
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
g.s3_error_code = "SlowDown"
error = Element("Error")
SubElement(error, "Code").text = "SlowDown"
SubElement(error, "Message").text = "Please reduce your request rate."
SubElement(error, "Resource").text = request.path
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
xml_bytes = tostring(error, encoding="utf-8")
return Response(xml_bytes, status=429, mimetype="application/xml")
def register_error_handlers(app): def register_error_handlers(app):
"""Register error handlers with a Flask app.""" """Register error handlers with a Flask app."""
app.register_error_handler(AppError, handle_app_error) app.register_error_handler(AppError, handle_app_error)
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
for error_class in [ for error_class in [
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError, BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
ObjectNotFoundError, InvalidObjectKeyError, ObjectNotFoundError, InvalidObjectKeyError,

View File

@@ -1,9 +1,12 @@
from __future__ import annotations from __future__ import annotations
import hashlib
import hmac import hmac
import json import json
import math import math
import os
import secrets import secrets
import threading
import time import time
from collections import deque from collections import deque
from dataclasses import dataclass from dataclasses import dataclass
@@ -118,12 +121,15 @@ class IamService:
self._raw_config: Dict[str, Any] = {} self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {} self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._last_load_time = 0.0 self._last_load_time = 0.0
self._credential_cache: Dict[str, Tuple[str, Principal, float]] = {} self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
self._cache_ttl = 60.0 self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0 self._last_stat_check = 0.0
self._stat_check_interval = 1.0 self._stat_check_interval = 1.0
self._sessions: Dict[str, Dict[str, Any]] = {} self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock()
self._load() self._load()
self._load_lockout_state()
def _maybe_reload(self) -> None: def _maybe_reload(self) -> None:
"""Reload configuration if the file has changed on disk.""" """Reload configuration if the file has changed on disk."""
@@ -134,7 +140,8 @@ class IamService:
try: try:
if self.config_path.stat().st_mtime > self._last_load_time: if self.config_path.stat().st_mtime > self._last_load_time:
self._load() self._load()
self._credential_cache.clear() self._principal_cache.clear()
self._secret_key_cache.clear()
except OSError: except OSError:
pass pass
@@ -150,7 +157,8 @@ class IamService:
f"Access temporarily locked. Try again in {seconds} seconds." f"Access temporarily locked. Try again in {seconds} seconds."
) )
record = self._users.get(access_key) record = self._users.get(access_key)
if not record or not hmac.compare_digest(record["secret_key"], secret_key): stored_secret = record["secret_key"] if record else secrets.token_urlsafe(24)
if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key) self._record_failed_attempt(access_key)
raise IamError("Invalid credentials") raise IamError("Invalid credentials")
self._clear_failed_attempts(access_key) self._clear_failed_attempts(access_key)
@@ -162,11 +170,46 @@ class IamService:
attempts = self._failed_attempts.setdefault(access_key, deque()) attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts) self._prune_attempts(attempts)
attempts.append(datetime.now(timezone.utc)) attempts.append(datetime.now(timezone.utc))
self._save_lockout_state()
def _clear_failed_attempts(self, access_key: str) -> None: def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key: if not access_key:
return return
self._failed_attempts.pop(access_key, None) if self._failed_attempts.pop(access_key, None) is not None:
self._save_lockout_state()
def _lockout_file(self) -> Path:
return self.config_path.parent / "lockout_state.json"
def _load_lockout_state(self) -> None:
"""Load lockout state from disk."""
try:
if self._lockout_file().exists():
data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
for key, timestamps in data.get("failed_attempts", {}).items():
valid = []
for ts in timestamps:
try:
dt = datetime.fromisoformat(ts)
if dt > cutoff:
valid.append(dt)
except (ValueError, TypeError):
continue
if valid:
self._failed_attempts[key] = deque(valid)
except (OSError, json.JSONDecodeError):
pass
def _save_lockout_state(self) -> None:
"""Persist lockout state to disk."""
data: Dict[str, Any] = {"failed_attempts": {}}
for key, attempts in self._failed_attempts.items():
data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
try:
self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
except OSError:
pass
def _prune_attempts(self, attempts: Deque[datetime]) -> None: def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
@@ -209,16 +252,23 @@ class IamService:
return token return token
def validate_session_token(self, access_key: str, session_token: str) -> bool: def validate_session_token(self, access_key: str, session_token: str) -> bool:
"""Validate a session token for an access key.""" """Validate a session token for an access key (thread-safe, constant-time)."""
session = self._sessions.get(session_token) dummy_key = secrets.token_urlsafe(16)
if not session: dummy_token = secrets.token_urlsafe(32)
return False with self._session_lock:
if session["access_key"] != access_key: session = self._sessions.get(session_token)
return False if not session:
if time.time() > session["expires_at"]: hmac.compare_digest(access_key, dummy_key)
del self._sessions[session_token] hmac.compare_digest(session_token, dummy_token)
return False return False
return True key_match = hmac.compare_digest(session["access_key"], access_key)
if not key_match:
hmac.compare_digest(session_token, dummy_token)
return False
if time.time() > session["expires_at"]:
self._sessions.pop(session_token, None)
return False
return True
def _cleanup_expired_sessions(self) -> None: def _cleanup_expired_sessions(self) -> None:
"""Remove expired session tokens.""" """Remove expired session tokens."""
@@ -229,9 +279,9 @@ class IamService:
def principal_for_key(self, access_key: str) -> Principal: def principal_for_key(self, access_key: str) -> Principal:
now = time.time() now = time.time()
cached = self._credential_cache.get(access_key) cached = self._principal_cache.get(access_key)
if cached: if cached:
secret, principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
return principal return principal
@@ -240,23 +290,14 @@ class IamService:
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal
def secret_for_key(self, access_key: str) -> str: def secret_for_key(self, access_key: str) -> str:
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return record["secret_key"] return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None: def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
@@ -268,6 +309,18 @@ class IamService:
if not self._is_allowed(principal, normalized, action): if not self._is_allowed(principal, normalized, action):
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'") raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
self._maybe_reload()
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
normalized_actions = {a: self._normalize_action(a) for a in actions}
results: Dict[str, bool] = {}
for original, canonical in normalized_actions.items():
if canonical not in ALLOWED_ACTIONS:
results[original] = False
else:
results[original] = self._is_allowed(principal, bucket_name, canonical)
return results
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]: def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")] return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
@@ -328,6 +381,10 @@ class IamService:
new_secret = self._generate_secret_key() new_secret = self._generate_secret_key()
user["secret_key"] = new_secret user["secret_key"] = new_secret
self._save() self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load() self._load()
return new_secret return new_secret
@@ -346,6 +403,10 @@ class IamService:
raise IamError("User not found") raise IamError("User not found")
self._raw_config["users"] = remaining self._raw_config["users"] = remaining
self._save() self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load() self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
@@ -480,11 +541,13 @@ class IamService:
return candidate if candidate in ALLOWED_ACTIONS else "" return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None: def _write_default(self) -> None:
access_key = secrets.token_hex(12)
secret_key = secrets.token_urlsafe(32)
default = { default = {
"users": [ "users": [
{ {
"access_key": "localadmin", "access_key": access_key,
"secret_key": "localadmin", "secret_key": secret_key,
"display_name": "Local Admin", "display_name": "Local Admin",
"policies": [ "policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)} {"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
@@ -493,6 +556,14 @@ class IamService:
] ]
} }
self.config_path.write_text(json.dumps(default, indent=2)) self.config_path.write_text(json.dumps(default, indent=2))
print(f"\n{'='*60}")
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED")
print(f"{'='*60}")
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
print(f"Missed this? Check: {self.config_path}")
print(f"{'='*60}\n")
def _generate_access_key(self) -> str: def _generate_access_key(self) -> str:
return secrets.token_hex(8) return secrets.token_hex(8)
@@ -508,25 +579,25 @@ class IamService:
def get_secret_key(self, access_key: str) -> str | None: def get_secret_key(self, access_key: str) -> str | None:
now = time.time() now = time.time()
cached = self._credential_cache.get(access_key) cached = self._secret_key_cache.get(access_key)
if cached: if cached:
secret, principal, cached_time = cached secret_key, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
return secret return secret_key
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
principal = self._build_principal(access_key, record) secret_key = record["secret_key"]
self._credential_cache[access_key] = (record["secret_key"], principal, now) self._secret_key_cache[access_key] = (secret_key, now)
return record["secret_key"] return secret_key
return None return None
def get_principal(self, access_key: str) -> Principal | None: def get_principal(self, access_key: str) -> Principal | None:
now = time.time() now = time.time()
cached = self._credential_cache.get(access_key) cached = self._principal_cache.get(access_key)
if cached: if cached:
secret, principal, cached_time = cached principal, cached_time = cached
if now - cached_time < self._cache_ttl: if now - cached_time < self._cache_ttl:
return principal return principal
@@ -534,6 +605,6 @@ class IamService:
record = self._users.get(access_key) record = self._users.get(access_key)
if record: if record:
principal = self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now) self._principal_cache[access_key] = (principal, now)
return principal return principal
return None return None

View File

@@ -2,7 +2,11 @@ from __future__ import annotations
import base64 import base64
import json import json
import logging
import os
import secrets import secrets
import subprocess
import sys
import uuid import uuid
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -13,6 +17,30 @@ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
@dataclass @dataclass
class KMSKey: class KMSKey:
@@ -74,11 +102,11 @@ class KMSEncryptionProvider(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption with KMS.""" """Encrypt data using envelope encryption with KMS."""
data_key, encrypted_data_key = self.generate_data_key() data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, ciphertext = aesgcm.encrypt(nonce, plaintext,
json.dumps(context).encode() if context else None) json.dumps(context, sort_keys=True).encode() if context else None)
return EncryptionResult( return EncryptionResult(
ciphertext=ciphertext, ciphertext=ciphertext,
@@ -90,15 +118,26 @@ class KMSEncryptionProvider(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data using envelope encryption with KMS.""" """Decrypt data using envelope encryption with KMS."""
# Note: Data key is encrypted without context (AAD), so we decrypt without context
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None) data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
try: try:
return aesgcm.decrypt(nonce, ciphertext, return aesgcm.decrypt(nonce, ciphertext,
json.dumps(context).encode() if context else None) json.dumps(context, sort_keys=True).encode() if context else None)
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc logger.debug("KMS decryption failed: %s", exc)
raise EncryptionError("Failed to decrypt data") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key using KMS."""
if key_id is None:
key_id = self.key_id
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
return data_key
class KMSManager: class KMSManager:
@@ -108,27 +147,52 @@ class KMSManager:
Keys are stored encrypted on disk. Keys are stored encrypted on disk.
""" """
def __init__(self, keys_path: Path, master_key_path: Path): def __init__(
self,
keys_path: Path,
master_key_path: Path,
generate_data_key_min_bytes: int = 1,
generate_data_key_max_bytes: int = 1024,
):
self.keys_path = keys_path self.keys_path = keys_path
self.master_key_path = master_key_path self.master_key_path = master_key_path
self.generate_data_key_min_bytes = generate_data_key_min_bytes
self.generate_data_key_max_bytes = generate_data_key_max_bytes
self._keys: Dict[str, KMSKey] = {} self._keys: Dict[str, KMSKey] = {}
self._master_key: bytes | None = None self._master_key: bytes | None = None
self._master_aesgcm: AESGCM | None = None
self._loaded = False self._loaded = False
@property @property
def master_key(self) -> bytes: def master_key(self) -> bytes:
"""Load or create the master key for encrypting KMS keys.""" """Load or create the master key for encrypting KMS keys (with file locking)."""
if self._master_key is None: if self._master_key is None:
if self.master_key_path.exists(): lock_path = self.master_key_path.with_suffix(".lock")
self._master_key = base64.b64decode( lock_path.parent.mkdir(parents=True, exist_ok=True)
self.master_key_path.read_text().strip() with open(lock_path, "w") as lock_file:
) if sys.platform == "win32":
else: import msvcrt
self._master_key = secrets.token_bytes(32) msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
self.master_key_path.parent.mkdir(parents=True, exist_ok=True) else:
self.master_key_path.write_text( fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
base64.b64encode(self._master_key).decode() try:
) if self.master_key_path.exists():
self._master_key = base64.b64decode(
self.master_key_path.read_text().strip()
)
else:
self._master_key = secrets.token_bytes(32)
self.master_key_path.write_text(
base64.b64encode(self._master_key).decode()
)
_set_secure_file_permissions(self.master_key_path)
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
self._master_aesgcm = AESGCM(self._master_key)
return self._master_key return self._master_key
def _load_keys(self) -> None: def _load_keys(self) -> None:
@@ -145,8 +209,10 @@ class KMSManager:
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"]) encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
key.key_material = self._decrypt_key_material(encrypted) key.key_material = self._decrypt_key_material(encrypted)
self._keys[key.key_id] = key self._keys[key.key_id] = key
except Exception: except json.JSONDecodeError as exc:
pass logger.error("Failed to parse KMS keys file: %s", exc)
except (ValueError, KeyError) as exc:
logger.error("Invalid KMS key data: %s", exc)
self._loaded = True self._loaded = True
@@ -158,26 +224,25 @@ class KMSManager:
encrypted = self._encrypt_key_material(key.key_material) encrypted = self._encrypt_key_material(key.key_material)
data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode() data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
keys_data.append(data) keys_data.append(data)
self.keys_path.parent.mkdir(parents=True, exist_ok=True) self.keys_path.parent.mkdir(parents=True, exist_ok=True)
self.keys_path.write_text( self.keys_path.write_text(
json.dumps({"keys": keys_data}, indent=2), json.dumps({"keys": keys_data}, indent=2),
encoding="utf-8" encoding="utf-8"
) )
_set_secure_file_permissions(self.keys_path)
def _encrypt_key_material(self, key_material: bytes) -> bytes: def _encrypt_key_material(self, key_material: bytes) -> bytes:
"""Encrypt key material with the master key.""" _ = self.master_key
aesgcm = AESGCM(self.master_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, key_material, None) ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
return nonce + ciphertext return nonce + ciphertext
def _decrypt_key_material(self, encrypted: bytes) -> bytes: def _decrypt_key_material(self, encrypted: bytes) -> bytes:
"""Decrypt key material with the master key.""" _ = self.master_key
aesgcm = AESGCM(self.master_key)
nonce = encrypted[:12] nonce = encrypted[:12]
ciphertext = encrypted[12:] ciphertext = encrypted[12:]
return aesgcm.decrypt(nonce, ciphertext, None) return self._master_aesgcm.decrypt(nonce, ciphertext, None)
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey: def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
"""Create a new KMS key.""" """Create a new KMS key."""
@@ -269,7 +334,7 @@ class KMSManager:
aesgcm = AESGCM(key.key_material) aesgcm = AESGCM(key.key_material)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
aad = json.dumps(context).encode() if context else None aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad) ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
key_id_bytes = key_id.encode("utf-8") key_id_bytes = key_id.encode("utf-8")
@@ -298,17 +363,24 @@ class KMSManager:
encrypted = rest[12:] encrypted = rest[12:]
aesgcm = AESGCM(key.key_material) aesgcm = AESGCM(key.key_material)
aad = json.dumps(context).encode() if context else None aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
plaintext = aesgcm.decrypt(nonce, encrypted, aad) plaintext = aesgcm.decrypt(nonce, encrypted, aad)
return plaintext, key_id return plaintext, key_id
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Decryption failed: {exc}") from exc logger.debug("KMS decrypt operation failed: %s", exc)
raise EncryptionError("Decryption failed") from exc
def generate_data_key(self, key_id: str, def generate_data_key(self, key_id: str,
context: Dict[str, str] | None = None) -> tuple[bytes, bytes]: context: Dict[str, str] | None = None,
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
"""Generate a data key and return both plaintext and encrypted versions. """Generate a data key and return both plaintext and encrypted versions.
Args:
key_id: The KMS key ID to use for encryption
context: Optional encryption context
key_spec: Key specification - AES_128 or AES_256 (default)
Returns: Returns:
Tuple of (plaintext_key, encrypted_key) Tuple of (plaintext_key, encrypted_key)
""" """
@@ -318,11 +390,12 @@ class KMSManager:
raise EncryptionError(f"Key not found: {key_id}") raise EncryptionError(f"Key not found: {key_id}")
if not key.enabled: if not key.enabled:
raise EncryptionError(f"Key is disabled: {key_id}") raise EncryptionError(f"Key is disabled: {key_id}")
plaintext_key = secrets.token_bytes(32) key_bytes = 32 if key_spec == "AES_256" else 16
plaintext_key = secrets.token_bytes(key_bytes)
encrypted_key = self.encrypt(key_id, plaintext_key, context) encrypted_key = self.encrypt(key_id, plaintext_key, context)
return plaintext_key, encrypted_key return plaintext_key, encrypted_key
def decrypt_data_key(self, key_id: str, encrypted_key: bytes, def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
@@ -331,22 +404,6 @@ class KMSManager:
plaintext, _ = self.decrypt(encrypted_key, context) plaintext, _ = self.decrypt(encrypted_key, context)
return plaintext return plaintext
def get_provider(self, key_id: str | None = None) -> KMSEncryptionProvider:
"""Get an encryption provider for a specific key."""
self._load_keys()
if key_id is None:
if not self._keys:
key = self.create_key("Default KMS Key")
key_id = key.key_id
else:
key_id = next(iter(self._keys.keys()))
if key_id not in self._keys:
raise EncryptionError(f"Key not found: {key_id}")
return KMSEncryptionProvider(self, key_id)
def re_encrypt(self, ciphertext: bytes, destination_key_id: str, def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
source_context: Dict[str, str] | None = None, source_context: Dict[str, str] | None = None,
destination_context: Dict[str, str] | None = None) -> bytes: destination_context: Dict[str, str] | None = None) -> bytes:
@@ -358,6 +415,8 @@ class KMSManager:
def generate_random(self, num_bytes: int = 32) -> bytes: def generate_random(self, num_bytes: int = 32) -> bytes:
"""Generate cryptographically secure random bytes.""" """Generate cryptographically secure random bytes."""
if num_bytes < 1 or num_bytes > 1024: if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
raise EncryptionError("Number of bytes must be between 1 and 1024") raise EncryptionError(
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
)
return secrets.token_bytes(num_bytes) return secrets.token_bytes(num_bytes)

View File

@@ -71,10 +71,9 @@ class LifecycleExecutionRecord:
class LifecycleHistoryStore: class LifecycleHistoryStore:
MAX_HISTORY_PER_BUCKET = 50 def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
def __init__(self, storage_root: Path) -> None:
self.storage_root = storage_root self.storage_root = storage_root
self.max_history_per_bucket = max_history_per_bucket
self._lock = threading.Lock() self._lock = threading.Lock()
def _get_history_path(self, bucket_name: str) -> Path: def _get_history_path(self, bucket_name: str) -> Path:
@@ -95,7 +94,7 @@ class LifecycleHistoryStore:
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None: def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
path = self._get_history_path(bucket_name) path = self._get_history_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[:self.MAX_HISTORY_PER_BUCKET]]} data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
try: try:
with open(path, "w") as f: with open(path, "w") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
@@ -114,14 +113,20 @@ class LifecycleHistoryStore:
class LifecycleManager: class LifecycleManager:
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600, storage_root: Optional[Path] = None): def __init__(
self,
storage: ObjectStorage,
interval_seconds: int = 3600,
storage_root: Optional[Path] = None,
max_history_per_bucket: int = 50,
):
self.storage = storage self.storage = storage
self.interval_seconds = interval_seconds self.interval_seconds = interval_seconds
self.storage_root = storage_root self.storage_root = storage_root
self._timer: Optional[threading.Timer] = None self._timer: Optional[threading.Timer] = None
self._shutdown = False self._shutdown = False
self._lock = threading.Lock() self._lock = threading.Lock()
self.history_store = LifecycleHistoryStore(storage_root) if storage_root else None self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
def start(self) -> None: def start(self) -> None:
if self._timer is not None: if self._timer is not None:

View File

@@ -1,8 +1,10 @@
from __future__ import annotations from __future__ import annotations
import ipaddress
import json import json
import logging import logging
import queue import queue
import socket
import threading import threading
import time import time
import uuid import uuid
@@ -14,6 +16,48 @@ from urllib.parse import urlparse
import requests import requests
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -165,8 +209,9 @@ class NotificationConfiguration:
class NotificationService: class NotificationService:
def __init__(self, storage_root: Path, worker_count: int = 2): def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
self.storage_root = storage_root self.storage_root = storage_root
self._allow_internal_endpoints = allow_internal_endpoints
self._configs: Dict[str, List[NotificationConfiguration]] = {} self._configs: Dict[str, List[NotificationConfiguration]] = {}
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue() self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
self._workers: List[threading.Thread] = [] self._workers: List[threading.Thread] = []
@@ -299,6 +344,8 @@ class NotificationService:
self._queue.task_done() self._queue.task_done()
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None: def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
payload = event.to_s3_event() payload = event.to_s3_event()
headers = {"Content-Type": "application/json", **destination.headers} headers = {"Content-Type": "application/json", **destination.headers}

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import json import json
import logging import logging
import random
import threading import threading
import time import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -9,6 +10,8 @@ from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
MAX_LATENCY_SAMPLES = 5000
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -22,6 +25,17 @@ class OperationStats:
latency_max_ms: float = 0.0 latency_max_ms: float = 0.0
bytes_in: int = 0 bytes_in: int = 0
bytes_out: int = 0 bytes_out: int = 0
latency_samples: List[float] = field(default_factory=list)
@staticmethod
def _compute_percentile(sorted_data: List[float], p: float) -> float:
if not sorted_data:
return 0.0
k = (len(sorted_data) - 1) * (p / 100.0)
f = int(k)
c = min(f + 1, len(sorted_data) - 1)
d = k - f
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None: def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
self.count += 1 self.count += 1
@@ -36,10 +50,17 @@ class OperationStats:
self.latency_max_ms = latency_ms self.latency_max_ms = latency_ms
self.bytes_in += bytes_in self.bytes_in += bytes_in
self.bytes_out += bytes_out self.bytes_out += bytes_out
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
self.latency_samples.append(latency_ms)
else:
j = random.randint(0, self.count - 1)
if j < MAX_LATENCY_SAMPLES:
self.latency_samples[j] = latency_ms
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0 avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0 min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
sorted_latencies = sorted(self.latency_samples)
return { return {
"count": self.count, "count": self.count,
"success_count": self.success_count, "success_count": self.success_count,
@@ -47,6 +68,9 @@ class OperationStats:
"latency_avg_ms": round(avg_latency, 2), "latency_avg_ms": round(avg_latency, 2),
"latency_min_ms": round(min_latency, 2), "latency_min_ms": round(min_latency, 2),
"latency_max_ms": round(self.latency_max_ms, 2), "latency_max_ms": round(self.latency_max_ms, 2),
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
"bytes_in": self.bytes_in, "bytes_in": self.bytes_in,
"bytes_out": self.bytes_out, "bytes_out": self.bytes_out,
} }
@@ -62,6 +86,11 @@ class OperationStats:
self.latency_max_ms = other.latency_max_ms self.latency_max_ms = other.latency_max_ms
self.bytes_in += other.bytes_in self.bytes_in += other.bytes_in
self.bytes_out += other.bytes_out self.bytes_out += other.bytes_out
combined = self.latency_samples + other.latency_samples
if len(combined) > MAX_LATENCY_SAMPLES:
random.shuffle(combined)
combined = combined[:MAX_LATENCY_SAMPLES]
self.latency_samples = combined
@dataclass @dataclass

View File

@@ -21,16 +21,20 @@ from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0" REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_CONNECT_TIMEOUT = 5
REPLICATION_READ_TIMEOUT = 30
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024
REPLICATION_MODE_NEW_ONLY = "new_only" REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all" REPLICATION_MODE_ALL = "all"
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional" REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
def _create_s3_client(connection: RemoteConnection, *, health_check: bool = False) -> Any: def _create_s3_client(
connection: RemoteConnection,
*,
health_check: bool = False,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
) -> Any:
"""Create a boto3 S3 client for the given connection. """Create a boto3 S3 client for the given connection.
Args: Args:
connection: Remote S3 connection configuration connection: Remote S3 connection configuration
@@ -38,9 +42,9 @@ def _create_s3_client(connection: RemoteConnection, *, health_check: bool = Fals
""" """
config = Config( config = Config(
user_agent_extra=REPLICATION_USER_AGENT, user_agent_extra=REPLICATION_USER_AGENT,
connect_timeout=REPLICATION_CONNECT_TIMEOUT, connect_timeout=connect_timeout,
read_timeout=REPLICATION_READ_TIMEOUT, read_timeout=read_timeout,
retries={'max_attempts': 1 if health_check else 2}, retries={'max_attempts': 1 if health_check else max_retries},
signature_version='s3v4', signature_version='s3v4',
s3={'addressing_style': 'path'}, s3={'addressing_style': 'path'},
request_checksum_calculation='when_required', request_checksum_calculation='when_required',
@@ -133,6 +137,7 @@ class ReplicationRule:
stats: ReplicationStats = field(default_factory=ReplicationStats) stats: ReplicationStats = field(default_factory=ReplicationStats)
sync_deletions: bool = True sync_deletions: bool = True
last_pull_at: Optional[float] = None last_pull_at: Optional[float] = None
filter_prefix: Optional[str] = None
def to_dict(self) -> dict: def to_dict(self) -> dict:
return { return {
@@ -145,6 +150,7 @@ class ReplicationRule:
"stats": self.stats.to_dict(), "stats": self.stats.to_dict(),
"sync_deletions": self.sync_deletions, "sync_deletions": self.sync_deletions,
"last_pull_at": self.last_pull_at, "last_pull_at": self.last_pull_at,
"filter_prefix": self.filter_prefix,
} }
@classmethod @classmethod
@@ -158,22 +164,24 @@ class ReplicationRule:
data["sync_deletions"] = True data["sync_deletions"] = True
if "last_pull_at" not in data: if "last_pull_at" not in data:
data["last_pull_at"] = None data["last_pull_at"] = None
if "filter_prefix" not in data:
data["filter_prefix"] = None
rule = cls(**data) rule = cls(**data)
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats() rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
return rule return rule
class ReplicationFailureStore: class ReplicationFailureStore:
MAX_FAILURES_PER_BUCKET = 50 def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
def __init__(self, storage_root: Path) -> None:
self.storage_root = storage_root self.storage_root = storage_root
self.max_failures_per_bucket = max_failures_per_bucket
self._lock = threading.Lock() self._lock = threading.Lock()
self._cache: Dict[str, List[ReplicationFailure]] = {}
def _get_failures_path(self, bucket_name: str) -> Path: def _get_failures_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json" return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]: def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
path = self._get_failures_path(bucket_name) path = self._get_failures_path(bucket_name)
if not path.exists(): if not path.exists():
return [] return []
@@ -185,16 +193,28 @@ class ReplicationFailureStore:
logger.error(f"Failed to load replication failures for {bucket_name}: {e}") logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
return [] return []
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None: def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
path = self._get_failures_path(bucket_name) path = self._get_failures_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
data = {"failures": [f.to_dict() for f in failures[:self.MAX_FAILURES_PER_BUCKET]]} data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
try: try:
with open(path, "w") as f: with open(path, "w") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
except OSError as e: except OSError as e:
logger.error(f"Failed to save replication failures for {bucket_name}: {e}") logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
if bucket_name in self._cache:
return list(self._cache[bucket_name])
failures = self._load_from_disk(bucket_name)
self._cache[bucket_name] = failures
return list(failures)
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
trimmed = failures[:self.max_failures_per_bucket]
self._cache[bucket_name] = trimmed
self._save_to_disk(bucket_name, trimmed)
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None: def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
with self._lock: with self._lock:
failures = self.load_failures(bucket_name) failures = self.load_failures(bucket_name)
@@ -220,6 +240,7 @@ class ReplicationFailureStore:
def clear_failures(self, bucket_name: str) -> None: def clear_failures(self, bucket_name: str) -> None:
with self._lock: with self._lock:
self._cache.pop(bucket_name, None)
path = self._get_failures_path(bucket_name) path = self._get_failures_path(bucket_name)
if path.exists(): if path.exists():
path.unlink() path.unlink()
@@ -233,18 +254,43 @@ class ReplicationFailureStore:
class ReplicationManager: class ReplicationManager:
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path, storage_root: Path) -> None: def __init__(
self,
storage: ObjectStorage,
connections: ConnectionStore,
rules_path: Path,
storage_root: Path,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
streaming_threshold_bytes: int = 10 * 1024 * 1024,
max_failures_per_bucket: int = 50,
) -> None:
self.storage = storage self.storage = storage
self.connections = connections self.connections = connections
self.rules_path = rules_path self.rules_path = rules_path
self.storage_root = storage_root self.storage_root = storage_root
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.streaming_threshold_bytes = streaming_threshold_bytes
self._rules: Dict[str, ReplicationRule] = {} self._rules: Dict[str, ReplicationRule] = {}
self._stats_lock = threading.Lock() self._stats_lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker") self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self._shutdown = False self._shutdown = False
self.failure_store = ReplicationFailureStore(storage_root) self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
self.reload_rules() self.reload_rules()
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
"""Create an S3 client with the manager's configured timeouts."""
return _create_s3_client(
connection,
health_check=health_check,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def shutdown(self, wait: bool = True) -> None: def shutdown(self, wait: bool = True) -> None:
"""Shutdown the replication executor gracefully. """Shutdown the replication executor gracefully.
@@ -280,7 +326,7 @@ class ReplicationManager:
Uses short timeouts to prevent blocking. Uses short timeouts to prevent blocking.
""" """
try: try:
s3 = _create_s3_client(connection, health_check=True) s3 = self._create_client(connection, health_check=True)
s3.list_buckets() s3.list_buckets()
return True return True
except Exception as e: except Exception as e:
@@ -290,6 +336,9 @@ class ReplicationManager:
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]: def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
return self._rules.get(bucket_name) return self._rules.get(bucket_name)
def list_rules(self) -> List[ReplicationRule]:
return list(self._rules.values())
def set_rule(self, rule: ReplicationRule) -> None: def set_rule(self, rule: ReplicationRule) -> None:
old_rule = self._rules.get(rule.bucket_name) old_rule = self._rules.get(rule.bucket_name)
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
@@ -329,7 +378,7 @@ class ReplicationManager:
source_objects = self.storage.list_objects_all(bucket_name) source_objects = self.storage.list_objects_all(bucket_name)
source_keys = {obj.key: obj.size for obj in source_objects} source_keys = {obj.key: obj.size for obj in source_objects}
s3 = _create_s3_client(connection) s3 = self._create_client(connection)
dest_keys = set() dest_keys = set()
bytes_synced = 0 bytes_synced = 0
@@ -395,7 +444,7 @@ class ReplicationManager:
raise ValueError(f"Connection {connection_id} not found") raise ValueError(f"Connection {connection_id} not found")
try: try:
s3 = _create_s3_client(connection) s3 = self._create_client(connection)
s3.create_bucket(Bucket=bucket_name) s3.create_bucket(Bucket=bucket_name)
except ClientError as e: except ClientError as e:
logger.error(f"Failed to create remote bucket {bucket_name}: {e}") logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
@@ -438,7 +487,7 @@ class ReplicationManager:
return return
try: try:
s3 = _create_s3_client(conn) s3 = self._create_client(conn)
if action == "delete": if action == "delete":
try: try:
@@ -481,7 +530,7 @@ class ReplicationManager:
if content_type: if content_type:
extra_args["ContentType"] = content_type extra_args["ContentType"] = content_type
if file_size >= STREAMING_THRESHOLD_BYTES: if file_size >= self.streaming_threshold_bytes:
s3.upload_file( s3.upload_file(
str(path), str(path),
rule.target_bucket, rule.target_bucket,

File diff suppressed because it is too large Load Diff

284
app/s3_client.py Normal file
View File

@@ -0,0 +1,284 @@
from __future__ import annotations
import json
import logging
import threading
import time
from typing import Any, Generator, Optional
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
from flask import current_app, session
logger = logging.getLogger(__name__)
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
_BOTO_ERROR_MAP = {
"NoSuchBucket": 404,
"NoSuchKey": 404,
"NoSuchUpload": 404,
"BucketAlreadyExists": 409,
"BucketAlreadyOwnedByYou": 409,
"BucketNotEmpty": 409,
"AccessDenied": 403,
"InvalidAccessKeyId": 403,
"SignatureDoesNotMatch": 403,
"InvalidBucketName": 400,
"InvalidArgument": 400,
"MalformedXML": 400,
"EntityTooLarge": 400,
"QuotaExceeded": 403,
}
_UPLOAD_REGISTRY_MAX_AGE = 86400
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
class UploadRegistry:
def __init__(self) -> None:
self._entries: dict[str, tuple[str, str, float]] = {}
self._lock = threading.Lock()
self._last_cleanup = time.monotonic()
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
with self._lock:
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
self._maybe_cleanup()
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
with self._lock:
entry = self._entries.get(upload_id)
if entry is None:
return None
stored_bucket, key, created_at = entry
if stored_bucket != bucket_name:
return None
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
del self._entries[upload_id]
return None
return key
def remove(self, upload_id: str) -> None:
with self._lock:
self._entries.pop(upload_id, None)
def _maybe_cleanup(self) -> None:
now = time.monotonic()
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
return
self._last_cleanup = now
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
for uid in stale:
del self._entries[uid]
class S3ProxyClient:
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
if not api_base_url:
raise ValueError("api_base_url is required for S3ProxyClient")
self._api_base_url = api_base_url.rstrip("/")
self._region = region
self.upload_registry = UploadRegistry()
@property
def api_base_url(self) -> str:
return self._api_base_url
def get_client(self, access_key: str, secret_key: str) -> Any:
if not access_key or not secret_key:
raise ValueError("Both access_key and secret_key are required")
config = Config(
user_agent_extra=UI_PROXY_USER_AGENT,
connect_timeout=5,
read_timeout=30,
retries={"max_attempts": 0},
signature_version="s3v4",
s3={"addressing_style": "path"},
request_checksum_calculation="when_required",
response_checksum_validation="when_required",
)
return boto3.client(
"s3",
endpoint_url=self._api_base_url,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
region_name=self._region,
config=config,
)
def _get_proxy() -> S3ProxyClient:
proxy = current_app.extensions.get("s3_proxy")
if proxy is None:
raise RuntimeError(
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
)
return proxy
def _get_session_creds() -> tuple[str, str]:
secret_store = current_app.extensions["secret_store"]
secret_store.purge_expired()
token = session.get("cred_token")
if not token:
raise PermissionError("Not authenticated")
creds = secret_store.peek(token)
if not creds:
raise PermissionError("Session expired")
access_key = creds.get("access_key", "")
secret_key = creds.get("secret_key", "")
if not access_key or not secret_key:
raise PermissionError("Invalid session credentials")
return access_key, secret_key
def get_session_s3_client() -> Any:
proxy = _get_proxy()
access_key, secret_key = _get_session_creds()
return proxy.get_client(access_key, secret_key)
def get_upload_registry() -> UploadRegistry:
return _get_proxy().upload_registry
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
error_info = exc.response.get("Error", {})
code = error_info.get("Code", "InternalError")
message = error_info.get("Message") or "S3 operation failed"
http_status = _BOTO_ERROR_MAP.get(code)
if http_status is None:
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
return {"error": message}, http_status
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
logger.error("S3 API connection failed: %s", exc)
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
from .ui import _format_datetime_display
return _format_datetime_display(dt, display_tz)
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
from .ui import _format_datetime_iso
return _format_datetime_iso(dt, display_tz)
def build_url_templates(bucket_name: str) -> dict[str, str]:
from flask import url_for
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
restore_t = url_for(
"ui.restore_object_version",
bucket_name=bucket_name,
object_key="KEY_PLACEHOLDER",
version_id="VERSION_ID_PLACEHOLDER",
)
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
return {
"preview": preview_t,
"download": preview_t + "?download=1",
"presign": presign_t,
"delete": delete_t,
"versions": versions_t,
"restore": restore_t,
"tags": tags_t,
"copy": copy_t,
"move": move_t,
"metadata": metadata_t,
}
def translate_list_objects(
boto3_response: dict[str, Any],
url_templates: dict[str, str],
display_tz: str = "UTC",
versioning_enabled: bool = False,
) -> dict[str, Any]:
objects_data = []
for obj in boto3_response.get("Contents", []):
last_mod = obj["LastModified"]
objects_data.append({
"key": obj["Key"],
"size": obj["Size"],
"last_modified": last_mod.isoformat(),
"last_modified_display": format_datetime_display(last_mod, display_tz),
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
"etag": obj.get("ETag", "").strip('"'),
})
return {
"objects": objects_data,
"is_truncated": boto3_response.get("IsTruncated", False),
"next_continuation_token": boto3_response.get("NextContinuationToken"),
"total_count": boto3_response.get("KeyCount", len(objects_data)),
"versioning_enabled": versioning_enabled,
"url_templates": url_templates,
}
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
try:
resp = client.get_bucket_versioning(Bucket=bucket_name)
return resp.get("Status") == "Enabled"
except ClientError as exc:
code = exc.response.get("Error", {}).get("Code", "")
if code != "NoSuchBucket":
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
return False
def stream_objects_ndjson(
client: Any,
bucket_name: str,
prefix: Optional[str],
url_templates: dict[str, str],
display_tz: str = "UTC",
versioning_enabled: bool = False,
) -> Generator[str, None, None]:
meta_line = json.dumps({
"type": "meta",
"versioning_enabled": versioning_enabled,
"url_templates": url_templates,
}) + "\n"
yield meta_line
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
if prefix:
kwargs["Prefix"] = prefix
try:
paginator = client.get_paginator("list_objects_v2")
for page in paginator.paginate(**kwargs):
for obj in page.get("Contents", []):
last_mod = obj["LastModified"]
yield json.dumps({
"type": "object",
"key": obj["Key"],
"size": obj["Size"],
"last_modified": last_mod.isoformat(),
"last_modified_display": format_datetime_display(last_mod, display_tz),
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
"etag": obj.get("ETag", "").strip('"'),
}) + "\n"
except ClientError as exc:
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
return
except (EndpointConnectionError, ConnectionClosedError):
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
return
yield json.dumps({"type": "done"}) + "\n"

View File

@@ -18,6 +18,18 @@ class EphemeralSecretStore:
self._store[token] = (payload, expires_at) self._store[token] = (payload, expires_at)
return token return token
def peek(self, token: str | None) -> Any | None:
if not token:
return None
entry = self._store.get(token)
if not entry:
return None
payload, expires_at = entry
if expires_at < time.time():
self._store.pop(token, None)
return None
return payload
def pop(self, token: str | None) -> Any | None: def pop(self, token: str | None) -> Any | None:
if not token: if not token:
return None return None

171
app/select_content.py Normal file
View File

@@ -0,0 +1,171 @@
"""S3 SelectObjectContent SQL query execution using DuckDB."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, Generator, Optional
try:
import duckdb
DUCKDB_AVAILABLE = True
except ImportError:
DUCKDB_AVAILABLE = False
class SelectError(Exception):
"""Error during SELECT query execution."""
pass
def execute_select_query(
file_path: Path,
expression: str,
input_format: str,
input_config: Dict[str, Any],
output_format: str,
output_config: Dict[str, Any],
chunk_size: int = 65536,
) -> Generator[bytes, None, None]:
"""Execute SQL query on object content."""
if not DUCKDB_AVAILABLE:
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
conn = duckdb.connect(":memory:")
try:
if input_format == "CSV":
_load_csv(conn, file_path, input_config)
elif input_format == "JSON":
_load_json(conn, file_path, input_config)
elif input_format == "Parquet":
_load_parquet(conn, file_path)
else:
raise SelectError(f"Unsupported input format: {input_format}")
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
try:
result = conn.execute(normalized_expression)
except duckdb.Error as exc:
raise SelectError(f"SQL execution error: {exc}")
if output_format == "CSV":
yield from _output_csv(result, output_config, chunk_size)
elif output_format == "JSON":
yield from _output_json(result, output_config, chunk_size)
else:
raise SelectError(f"Unsupported output format: {output_format}")
finally:
conn.close()
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load CSV file into DuckDB."""
file_header_info = config.get("file_header_info", "NONE")
delimiter = config.get("field_delimiter", ",")
quote = config.get("quote_character", '"')
header = file_header_info in ("USE", "IGNORE")
path_str = str(file_path).replace("\\", "/")
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_csv('{path_str}',
header={header},
delim='{delimiter}',
quote='{quote}'
)
""")
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load JSON file into DuckDB."""
json_type = config.get("type", "DOCUMENT")
path_str = str(file_path).replace("\\", "/")
if json_type == "LINES":
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
""")
else:
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='array')
""")
def _load_parquet(conn, file_path: Path) -> None:
"""Load Parquet file into DuckDB."""
path_str = str(file_path).replace("\\", "/")
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
def _output_csv(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as CSV."""
delimiter = config.get("field_delimiter", ",")
record_delimiter = config.get("record_delimiter", "\n")
quote = config.get("quote_character", '"')
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
fields = []
for value in row:
if value is None:
fields.append("")
elif isinstance(value, str):
if delimiter in value or quote in value or record_delimiter in value:
escaped = value.replace(quote, quote + quote)
fields.append(f'{quote}{escaped}{quote}')
else:
fields.append(value)
else:
fields.append(str(value))
buffer += delimiter.join(fields) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")
def _output_json(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as JSON Lines."""
record_delimiter = config.get("record_delimiter", "\n")
columns = [desc[0] for desc in result.description]
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
record = dict(zip(columns, row))
buffer += json.dumps(record, default=str) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")

177
app/site_registry.py Normal file
View File

@@ -0,0 +1,177 @@
from __future__ import annotations
import json
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
@dataclass
class SiteInfo:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
@dataclass
class PeerSite:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
connection_id: Optional[str] = None
is_healthy: Optional[bool] = None
last_health_check: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
"connection_id": self.connection_id,
"is_healthy": self.is_healthy,
"last_health_check": self.last_health_check,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
connection_id=data.get("connection_id"),
is_healthy=data.get("is_healthy"),
last_health_check=data.get("last_health_check"),
)
class SiteRegistry:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._local_site: Optional[SiteInfo] = None
self._peers: Dict[str, PeerSite] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._local_site = None
self._peers = {}
return
try:
with open(self.config_path, "r", encoding="utf-8") as f:
data = json.load(f)
if data.get("local"):
self._local_site = SiteInfo.from_dict(data["local"])
else:
self._local_site = None
self._peers = {}
for peer_data in data.get("peers", []):
peer = PeerSite.from_dict(peer_data)
self._peers[peer.site_id] = peer
except (OSError, json.JSONDecodeError, KeyError):
self._local_site = None
self._peers = {}
def save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"local": self._local_site.to_dict() if self._local_site else None,
"peers": [peer.to_dict() for peer in self._peers.values()],
}
with open(self.config_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
def get_local_site(self) -> Optional[SiteInfo]:
return self._local_site
def set_local_site(self, site: SiteInfo) -> None:
site.updated_at = time.time()
self._local_site = site
self.save()
def list_peers(self) -> List[PeerSite]:
return list(self._peers.values())
def get_peer(self, site_id: str) -> Optional[PeerSite]:
return self._peers.get(site_id)
def add_peer(self, peer: PeerSite) -> None:
peer.created_at = peer.created_at or time.time()
self._peers[peer.site_id] = peer
self.save()
def update_peer(self, peer: PeerSite) -> None:
if peer.site_id not in self._peers:
raise ValueError(f"Peer {peer.site_id} not found")
peer.updated_at = time.time()
self._peers[peer.site_id] = peer
self.save()
def delete_peer(self, site_id: str) -> bool:
if site_id in self._peers:
del self._peers[site_id]
self.save()
return True
return False
def update_health(self, site_id: str, is_healthy: bool) -> None:
peer = self._peers.get(site_id)
if peer:
peer.is_healthy = is_healthy
peer.last_health_check = time.time()
self.save()

View File

@@ -22,9 +22,6 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0" SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
SITE_SYNC_CONNECT_TIMEOUT = 10
SITE_SYNC_READ_TIMEOUT = 120
CLOCK_SKEW_TOLERANCE_SECONDS = 1.0
@dataclass @dataclass
@@ -108,12 +105,18 @@ class RemoteObjectMeta:
) )
def _create_sync_client(connection: "RemoteConnection") -> Any: def _create_sync_client(
connection: "RemoteConnection",
*,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
) -> Any:
config = Config( config = Config(
user_agent_extra=SITE_SYNC_USER_AGENT, user_agent_extra=SITE_SYNC_USER_AGENT,
connect_timeout=SITE_SYNC_CONNECT_TIMEOUT, connect_timeout=connect_timeout,
read_timeout=SITE_SYNC_READ_TIMEOUT, read_timeout=read_timeout,
retries={"max_attempts": 2}, retries={"max_attempts": max_retries},
signature_version="s3v4", signature_version="s3v4",
s3={"addressing_style": "path"}, s3={"addressing_style": "path"},
request_checksum_calculation="when_required", request_checksum_calculation="when_required",
@@ -138,6 +141,10 @@ class SiteSyncWorker:
storage_root: Path, storage_root: Path,
interval_seconds: int = 60, interval_seconds: int = 60,
batch_size: int = 100, batch_size: int = 100,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
clock_skew_tolerance_seconds: float = 1.0,
): ):
self.storage = storage self.storage = storage
self.connections = connections self.connections = connections
@@ -145,11 +152,24 @@ class SiteSyncWorker:
self.storage_root = storage_root self.storage_root = storage_root
self.interval_seconds = interval_seconds self.interval_seconds = interval_seconds
self.batch_size = batch_size self.batch_size = batch_size
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
self._lock = threading.Lock() self._lock = threading.Lock()
self._shutdown = threading.Event() self._shutdown = threading.Event()
self._sync_thread: Optional[threading.Thread] = None self._sync_thread: Optional[threading.Thread] = None
self._bucket_stats: Dict[str, SiteSyncStats] = {} self._bucket_stats: Dict[str, SiteSyncStats] = {}
def _create_client(self, connection: "RemoteConnection") -> Any:
"""Create an S3 client with the worker's configured timeouts."""
return _create_sync_client(
connection,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def start(self) -> None: def start(self) -> None:
if self._sync_thread is not None and self._sync_thread.is_alive(): if self._sync_thread is not None and self._sync_thread.is_alive():
return return
@@ -294,7 +314,7 @@ class SiteSyncWorker:
return {obj.key: obj for obj in objects} return {obj.key: obj for obj in objects}
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]: def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
s3 = _create_sync_client(connection) s3 = self._create_client(connection)
result: Dict[str, RemoteObjectMeta] = {} result: Dict[str, RemoteObjectMeta] = {}
paginator = s3.get_paginator("list_objects_v2") paginator = s3.get_paginator("list_objects_v2")
try: try:
@@ -312,7 +332,7 @@ class SiteSyncWorker:
local_ts = local_meta.last_modified.timestamp() local_ts = local_meta.last_modified.timestamp()
remote_ts = remote_meta.last_modified.timestamp() remote_ts = remote_meta.last_modified.timestamp()
if abs(remote_ts - local_ts) < CLOCK_SKEW_TOLERANCE_SECONDS: if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
local_etag = local_meta.etag or "" local_etag = local_meta.etag or ""
if remote_meta.etag == local_etag: if remote_meta.etag == local_etag:
return "skip" return "skip"
@@ -327,7 +347,7 @@ class SiteSyncWorker:
connection: "RemoteConnection", connection: "RemoteConnection",
remote_meta: RemoteObjectMeta, remote_meta: RemoteObjectMeta,
) -> bool: ) -> bool:
s3 = _create_sync_client(connection) s3 = self._create_client(connection)
tmp_path = None tmp_path = None
try: try:
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp" tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"

File diff suppressed because it is too large Load Diff

2058
app/ui.py

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
APP_VERSION = "0.2.3" APP_VERSION = "0.3.0"
def get_version() -> str: def get_version() -> str:

79
app/website_domains.py Normal file
View File

@@ -0,0 +1,79 @@
from __future__ import annotations
import json
import re
import threading
from pathlib import Path
from typing import Dict, List, Optional
_DOMAIN_RE = re.compile(
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
)
def normalize_domain(raw: str) -> str:
raw = raw.strip().lower()
for prefix in ("https://", "http://"):
if raw.startswith(prefix):
raw = raw[len(prefix):]
raw = raw.split("/", 1)[0]
raw = raw.split("?", 1)[0]
raw = raw.split("#", 1)[0]
if ":" in raw:
raw = raw.rsplit(":", 1)[0]
return raw
def is_valid_domain(domain: str) -> bool:
if not domain or len(domain) > 253:
return False
return bool(_DOMAIN_RE.match(domain))
class WebsiteDomainStore:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._lock = threading.Lock()
self._domains: Dict[str, str] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._domains = {}
return
try:
with open(self.config_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
self._domains = {k.lower(): v for k, v in data.items()}
else:
self._domains = {}
except (OSError, json.JSONDecodeError):
self._domains = {}
def _save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.config_path, "w", encoding="utf-8") as f:
json.dump(self._domains, f, indent=2)
def list_all(self) -> List[Dict[str, str]]:
with self._lock:
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
def get_bucket(self, domain: str) -> Optional[str]:
with self._lock:
return self._domains.get(domain.lower())
def set_mapping(self, domain: str, bucket: str) -> None:
with self._lock:
self._domains[domain.lower()] = bucket
self._save()
def delete_mapping(self, domain: str) -> bool:
with self._lock:
key = domain.lower()
if key not in self._domains:
return False
del self._domains[key]
self._save()
return True

859
docs.md
View File

@@ -7,7 +7,7 @@ This document expands on the README to describe the full workflow for running, c
MyFSIO ships two Flask entrypoints that share the same storage, IAM, and bucket-policy state: MyFSIO ships two Flask entrypoints that share the same storage, IAM, and bucket-policy state:
- **API server** Implements the S3-compatible REST API, policy evaluation, and Signature Version 4 presign service. - **API server** Implements the S3-compatible REST API, policy evaluation, and Signature Version 4 presign service.
- **UI server** Provides the browser console for buckets, IAM, and policies. It proxies to the API for presign operations. - **UI server** Provides the browser console for buckets, IAM, and policies. It proxies all storage operations through the S3 API via boto3 (SigV4-signed), mirroring the architecture used by MinIO and Garage.
Both servers read `AppConfig`, so editing JSON stores on disk instantly affects both surfaces. Both servers read `AppConfig`, so editing JSON stores on disk instantly affects both surfaces.
@@ -136,7 +136,7 @@ All configuration is done via environment variables. The table below lists every
| `MAX_UPLOAD_SIZE` | `1073741824` (1 GiB) | Bytes. Caps incoming uploads in both API + UI. | | `MAX_UPLOAD_SIZE` | `1073741824` (1 GiB) | Bytes. Caps incoming uploads in both API + UI. |
| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint shown in listings. | | `UI_PAGE_SIZE` | `100` | `MaxKeys` hint shown in listings. |
| `SECRET_KEY` | Auto-generated | Flask session key. Auto-generates and persists if not set. **Set explicitly in production.** | | `SECRET_KEY` | Auto-generated | Flask session key. Auto-generates and persists if not set. **Set explicitly in production.** |
| `API_BASE_URL` | `None` | Public URL for presigned URLs. Required behind proxies. | | `API_BASE_URL` | `http://127.0.0.1:5000` | Internal S3 API URL used by the web UI proxy. Also used for presigned URL generation. Set to your public URL if running behind a reverse proxy. |
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. | | `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
| `AWS_SERVICE` | `s3` | Service string for SigV4. | | `AWS_SERVICE` | `s3` | Service string for SigV4. |
@@ -166,15 +166,19 @@ All configuration is done via environment variables. The table below lists every
| Variable | Default | Notes | | Variable | Default | Notes |
| --- | --- | --- | | --- | --- | --- |
| `RATE_LIMIT_DEFAULT` | `200 per minute` | Default rate limit for API endpoints. | | `RATE_LIMIT_DEFAULT` | `200 per minute` | Default rate limit for API endpoints. |
| `RATE_LIMIT_LIST_BUCKETS` | `60 per minute` | Rate limit for listing buckets (`GET /`). |
| `RATE_LIMIT_BUCKET_OPS` | `120 per minute` | Rate limit for bucket operations (PUT/DELETE/GET/POST on `/<bucket>`). |
| `RATE_LIMIT_OBJECT_OPS` | `240 per minute` | Rate limit for object operations (PUT/GET/DELETE/POST on `/<bucket>/<key>`). |
| `RATE_LIMIT_HEAD_OPS` | `100 per minute` | Rate limit for HEAD requests (bucket and object). |
| `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. | | `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. |
### Server Configuration ### Server Configuration
| Variable | Default | Notes | | Variable | Default | Notes |
| --- | --- | --- | | --- | --- | --- |
| `SERVER_THREADS` | `4` | Waitress worker threads (1-64). More threads handle more concurrent requests but use more memory. | | `SERVER_THREADS` | `0` (auto) | Waitress worker threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
| `SERVER_CONNECTION_LIMIT` | `100` | Maximum concurrent connections (10-1000). Ensure OS file descriptor limits support this value. | | `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent connections (10-1000). Set to `0` for auto-calculation based on available RAM. |
| `SERVER_BACKLOG` | `1024` | TCP listen backlog (64-4096). Connections queue here when all threads are busy. | | `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (64-4096). Set to `0` for auto-calculation (connection_limit × 2). |
| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). | | `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |
### Logging ### Logging
@@ -615,13 +619,15 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
### Getting Started ### Getting Started
1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access. 1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext.
2. Sign into the UI using those credentials, then open **IAM**: 2. Sign into the UI using the generated credentials, then open **IAM**:
- **Create user**: supply a display name and optional JSON inline policy array. - **Create user**: supply a display name and optional JSON inline policy array.
- **Rotate secret**: generates a new secret key; the UI surfaces it once. - **Rotate secret**: generates a new secret key; the UI surfaces it once.
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
3. Wildcard action `iam:*` is supported for admin user definitions. 3. Wildcard action `iam:*` is supported for admin user definitions.
> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials.
### Authentication ### Authentication
The API expects every request to include authentication headers. The UI persists them in the Flask session after login. The API expects every request to include authentication headers. The UI persists them in the Flask session after login.
@@ -1503,16 +1509,841 @@ The suite covers bucket CRUD, presigned downloads, bucket policy enforcement, an
## 14. API Matrix ## 14. API Matrix
``` ```
# Service Endpoints
GET /myfsio/health # Health check
# Bucket Operations
GET / # List buckets GET / # List buckets
PUT /<bucket> # Create bucket PUT /<bucket> # Create bucket
DELETE /<bucket> # Remove bucket DELETE /<bucket> # Remove bucket
GET /<bucket> # List objects GET /<bucket> # List objects (supports ?list-type=2)
PUT /<bucket>/<key> # Upload object HEAD /<bucket> # Check bucket exists
GET /<bucket>/<key> # Download object POST /<bucket> # POST object upload (HTML form)
DELETE /<bucket>/<key> # Delete object POST /<bucket>?delete # Bulk delete objects
GET /<bucket>?policy # Fetch policy
PUT /<bucket>?policy # Upsert policy # Bucket Configuration
DELETE /<bucket>?policy # Delete policy GET /<bucket>?policy # Fetch bucket policy
PUT /<bucket>?policy # Upsert bucket policy
DELETE /<bucket>?policy # Delete bucket policy
GET /<bucket>?quota # Get bucket quota GET /<bucket>?quota # Get bucket quota
PUT /<bucket>?quota # Set bucket quota (admin only) PUT /<bucket>?quota # Set bucket quota (admin only)
GET /<bucket>?versioning # Get versioning status
PUT /<bucket>?versioning # Enable/disable versioning
GET /<bucket>?lifecycle # Get lifecycle rules
PUT /<bucket>?lifecycle # Set lifecycle rules
DELETE /<bucket>?lifecycle # Delete lifecycle rules
GET /<bucket>?cors # Get CORS configuration
PUT /<bucket>?cors # Set CORS configuration
DELETE /<bucket>?cors # Delete CORS configuration
GET /<bucket>?encryption # Get encryption configuration
PUT /<bucket>?encryption # Set default encryption
DELETE /<bucket>?encryption # Delete encryption configuration
GET /<bucket>?acl # Get bucket ACL
PUT /<bucket>?acl # Set bucket ACL
GET /<bucket>?tagging # Get bucket tags
PUT /<bucket>?tagging # Set bucket tags
DELETE /<bucket>?tagging # Delete bucket tags
GET /<bucket>?replication # Get replication configuration
PUT /<bucket>?replication # Set replication rules
DELETE /<bucket>?replication # Delete replication configuration
GET /<bucket>?logging # Get access logging configuration
PUT /<bucket>?logging # Set access logging
GET /<bucket>?notification # Get event notifications
PUT /<bucket>?notification # Set event notifications (webhooks)
GET /<bucket>?object-lock # Get object lock configuration
PUT /<bucket>?object-lock # Set object lock configuration
GET /<bucket>?website # Get website configuration
PUT /<bucket>?website # Set website configuration
DELETE /<bucket>?website # Delete website configuration
GET /<bucket>?uploads # List active multipart uploads
GET /<bucket>?versions # List object versions
GET /<bucket>?location # Get bucket location/region
# Object Operations
PUT /<bucket>/<key> # Upload object
GET /<bucket>/<key> # Download object (supports Range header)
DELETE /<bucket>/<key> # Delete object
HEAD /<bucket>/<key> # Get object metadata
POST /<bucket>/<key> # POST upload with policy
POST /<bucket>/<key>?select # SelectObjectContent (SQL query)
# Object Configuration
GET /<bucket>/<key>?tagging # Get object tags
PUT /<bucket>/<key>?tagging # Set object tags
DELETE /<bucket>/<key>?tagging # Delete object tags
GET /<bucket>/<key>?acl # Get object ACL
PUT /<bucket>/<key>?acl # Set object ACL
PUT /<bucket>/<key>?retention # Set object retention
GET /<bucket>/<key>?retention # Get object retention
PUT /<bucket>/<key>?legal-hold # Set legal hold
GET /<bucket>/<key>?legal-hold # Get legal hold status
# Multipart Upload
POST /<bucket>/<key>?uploads # Initiate multipart upload
PUT /<bucket>/<key>?uploadId=X&partNumber=N # Upload part
PUT /<bucket>/<key>?uploadId=X&partNumber=N (with x-amz-copy-source) # UploadPartCopy
POST /<bucket>/<key>?uploadId=X # Complete multipart upload
DELETE /<bucket>/<key>?uploadId=X # Abort multipart upload
GET /<bucket>/<key>?uploadId=X # List parts
# Copy Operations
PUT /<bucket>/<key> (with x-amz-copy-source header) # CopyObject
# Admin API
GET /admin/site # Get local site info
PUT /admin/site # Update local site
GET /admin/sites # List peer sites
POST /admin/sites # Register peer site
GET /admin/sites/<site_id> # Get peer site
PUT /admin/sites/<site_id> # Update peer site
DELETE /admin/sites/<site_id> # Unregister peer site
GET /admin/sites/<site_id>/health # Check peer health
GET /admin/topology # Get cluster topology
GET /admin/website-domains # List domain mappings
POST /admin/website-domains # Create domain mapping
GET /admin/website-domains/<domain> # Get domain mapping
PUT /admin/website-domains/<domain> # Update domain mapping
DELETE /admin/website-domains/<domain> # Delete domain mapping
# KMS API
GET /kms/keys # List KMS keys
POST /kms/keys # Create KMS key
GET /kms/keys/<key_id> # Get key details
DELETE /kms/keys/<key_id> # Schedule key deletion
POST /kms/keys/<key_id>/enable # Enable key
POST /kms/keys/<key_id>/disable # Disable key
POST /kms/keys/<key_id>/rotate # Rotate key material
POST /kms/encrypt # Encrypt data
POST /kms/decrypt # Decrypt data
POST /kms/generate-data-key # Generate data key
POST /kms/generate-random # Generate random bytes
``` ```
## 15. Health Check Endpoint
The API exposes a simple health check endpoint for monitoring and load balancer integration:
```bash
# Check API health
curl http://localhost:5000/myfsio/health
# Response
{"status": "ok", "version": "0.1.7"}
```
The response includes:
- `status`: Always `"ok"` when the server is running
- `version`: Current application version from `app/version.py`
Use this endpoint for:
- Load balancer health checks
- Kubernetes liveness/readiness probes
- Monitoring system integration (Prometheus, Datadog, etc.)
## 16. Object Lock & Retention
Object Lock prevents objects from being deleted or overwritten for a specified retention period. MyFSIO supports both GOVERNANCE and COMPLIANCE modes.
### Retention Modes
| Mode | Description |
|------|-------------|
| **GOVERNANCE** | Objects can't be deleted by normal users, but users with `s3:BypassGovernanceRetention` permission can override |
| **COMPLIANCE** | Objects can't be deleted or overwritten by anyone, including root, until the retention period expires |
### Enabling Object Lock
Object Lock must be enabled when creating a bucket:
```bash
# Create bucket with Object Lock enabled
curl -X PUT "http://localhost:5000/my-bucket" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-bucket-object-lock-enabled: true"
# Set default retention configuration
curl -X PUT "http://localhost:5000/my-bucket?object-lock" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"ObjectLockEnabled": "Enabled",
"Rule": {
"DefaultRetention": {
"Mode": "GOVERNANCE",
"Days": 30
}
}
}'
```
### Per-Object Retention
Set retention on individual objects:
```bash
# Set object retention
curl -X PUT "http://localhost:5000/my-bucket/important.pdf?retention" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Mode": "COMPLIANCE",
"RetainUntilDate": "2025-12-31T23:59:59Z"
}'
# Get object retention
curl "http://localhost:5000/my-bucket/important.pdf?retention" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Legal Hold
Legal hold provides indefinite protection independent of retention settings:
```bash
# Enable legal hold
curl -X PUT "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{"Status": "ON"}'
# Disable legal hold
curl -X PUT "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{"Status": "OFF"}'
# Check legal hold status
curl "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
## 17. Access Logging
Enable S3-style access logging to track all requests to your buckets.
### Configuration
```bash
# Enable access logging
curl -X PUT "http://localhost:5000/my-bucket?logging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"LoggingEnabled": {
"TargetBucket": "log-bucket",
"TargetPrefix": "logs/my-bucket/"
}
}'
# Get logging configuration
curl "http://localhost:5000/my-bucket?logging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Disable logging (empty configuration)
curl -X PUT "http://localhost:5000/my-bucket?logging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{}'
```
### Log Format
Access logs are written in S3-compatible format with fields including:
- Timestamp, bucket, key
- Operation (REST.GET.OBJECT, REST.PUT.OBJECT, etc.)
- Request ID, requester, source IP
- HTTP status, error code, bytes sent
- Total time, turn-around time
- Referrer, User-Agent
## 18. Bucket Notifications & Webhooks
Configure event notifications to trigger webhooks when objects are created or deleted.
### Supported Events
| Event Type | Description |
|-----------|-------------|
| `s3:ObjectCreated:*` | Any object creation (PUT, POST, COPY, multipart) |
| `s3:ObjectCreated:Put` | Object created via PUT |
| `s3:ObjectCreated:Post` | Object created via POST |
| `s3:ObjectCreated:Copy` | Object created via COPY |
| `s3:ObjectCreated:CompleteMultipartUpload` | Multipart upload completed |
| `s3:ObjectRemoved:*` | Any object deletion |
| `s3:ObjectRemoved:Delete` | Object deleted |
| `s3:ObjectRemoved:DeleteMarkerCreated` | Delete marker created (versioned bucket) |
### Configuration
```bash
# Set notification configuration
curl -X PUT "http://localhost:5000/my-bucket?notification" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TopicConfigurations": [
{
"Id": "upload-notify",
"TopicArn": "https://webhook.example.com/s3-events",
"Events": ["s3:ObjectCreated:*"],
"Filter": {
"Key": {
"FilterRules": [
{"Name": "prefix", "Value": "uploads/"},
{"Name": "suffix", "Value": ".jpg"}
]
}
}
}
]
}'
# Get notification configuration
curl "http://localhost:5000/my-bucket?notification" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Webhook Payload
The webhook receives a JSON payload similar to AWS S3 event notifications:
```json
{
"Records": [
{
"eventVersion": "2.1",
"eventSource": "myfsio:s3",
"eventTime": "2024-01-15T10:30:00.000Z",
"eventName": "ObjectCreated:Put",
"s3": {
"bucket": {"name": "my-bucket"},
"object": {
"key": "uploads/photo.jpg",
"size": 102400,
"eTag": "abc123..."
}
}
}
]
}
```
### Security Notes
- Webhook URLs are validated to prevent SSRF attacks
- Internal/private IP ranges are blocked by default
- Use HTTPS endpoints in production
## 19. SelectObjectContent (SQL Queries)
Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object. Requires DuckDB to be installed.
### Prerequisites
```bash
pip install duckdb
```
### Usage
```bash
# Query a CSV file
curl -X POST "http://localhost:5000/my-bucket/data.csv?select" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Expression": "SELECT name, age FROM s3object WHERE age > 25",
"ExpressionType": "SQL",
"InputSerialization": {
"CSV": {
"FileHeaderInfo": "USE",
"FieldDelimiter": ","
}
},
"OutputSerialization": {
"JSON": {}
}
}'
# Query a JSON file
curl -X POST "http://localhost:5000/my-bucket/data.json?select" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Expression": "SELECT * FROM s3object s WHERE s.status = '\"active'\"",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "LINES"}},
"OutputSerialization": {"JSON": {}}
}'
```
### Supported Input Formats
| Format | Options |
|--------|---------|
| **CSV** | `FileHeaderInfo` (USE, IGNORE, NONE), `FieldDelimiter`, `QuoteCharacter`, `RecordDelimiter` |
| **JSON** | `Type` (DOCUMENT, LINES) |
| **Parquet** | Automatic schema detection |
### Output Formats
- **JSON**: Returns results as JSON records
- **CSV**: Returns results as CSV
## 20. PostObject (HTML Form Upload)
Upload objects using HTML forms with policy-based authorization. Useful for browser-based direct uploads.
### Form Fields
| Field | Required | Description |
|-------|----------|-------------|
| `key` | Yes | Object key (can include `${filename}` placeholder) |
| `file` | Yes | The file to upload |
| `policy` | No | Base64-encoded policy document |
| `x-amz-signature` | No | Policy signature |
| `x-amz-credential` | No | Credential scope |
| `x-amz-algorithm` | No | Signing algorithm (AWS4-HMAC-SHA256) |
| `x-amz-date` | No | Request timestamp |
| `Content-Type` | No | MIME type of the file |
| `x-amz-meta-*` | No | Custom metadata |
### Example HTML Form
```html
<form action="http://localhost:5000/my-bucket" method="post" enctype="multipart/form-data">
<input type="hidden" name="key" value="uploads/${filename}">
<input type="hidden" name="Content-Type" value="image/jpeg">
<input type="hidden" name="x-amz-meta-user" value="john">
<input type="file" name="file">
<button type="submit">Upload</button>
</form>
```
### With Policy (Signed Upload)
For authenticated uploads, include a policy document:
```bash
# Generate policy and signature using boto3 or similar
# Then include in form:
# - policy: base64(policy_document)
# - x-amz-signature: HMAC-SHA256(policy, signing_key)
# - x-amz-credential: access_key/date/region/s3/aws4_request
# - x-amz-algorithm: AWS4-HMAC-SHA256
# - x-amz-date: YYYYMMDDTHHMMSSZ
```
## 21. Advanced S3 Operations
### CopyObject
Copy objects within or between buckets:
```bash
# Copy within same bucket
curl -X PUT "http://localhost:5000/my-bucket/copy-of-file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/original-file.txt"
# Copy to different bucket
curl -X PUT "http://localhost:5000/other-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/original-file.txt"
# Copy with metadata replacement
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/file.txt" \
-H "x-amz-metadata-directive: REPLACE" \
-H "x-amz-meta-newkey: newvalue"
```
### UploadPartCopy
Copy data from an existing object into a multipart upload part:
```bash
# Initiate multipart upload
UPLOAD_ID=$(curl -X POST "http://localhost:5000/my-bucket/large-file.bin?uploads" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." | jq -r '.UploadId')
# Copy bytes 0-10485759 from source as part 1
curl -X PUT "http://localhost:5000/my-bucket/large-file.bin?uploadId=$UPLOAD_ID&partNumber=1" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /source-bucket/source-file.bin" \
-H "x-amz-copy-source-range: bytes=0-10485759"
# Copy bytes 10485760-20971519 as part 2
curl -X PUT "http://localhost:5000/my-bucket/large-file.bin?uploadId=$UPLOAD_ID&partNumber=2" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /source-bucket/source-file.bin" \
-H "x-amz-copy-source-range: bytes=10485760-20971519"
```
### Range Requests
Download partial content using the Range header:
```bash
# Get first 1000 bytes
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=0-999"
# Get bytes 1000-1999
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=1000-1999"
# Get last 500 bytes
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=-500"
# Get from byte 5000 to end
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=5000-"
```
Range responses include:
- HTTP 206 Partial Content status
- `Content-Range` header showing the byte range
- `Accept-Ranges: bytes` header
### Conditional Requests
Use conditional headers for cache validation:
```bash
# Only download if modified since
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-Modified-Since: Wed, 15 Jan 2025 10:00:00 GMT"
# Only download if ETag doesn't match (changed)
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-None-Match: \"abc123...\""
# Only download if ETag matches
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-Match: \"abc123...\""
```
## 22. Access Control Lists (ACLs)
ACLs provide legacy-style permission management for buckets and objects.
### Canned ACLs
| ACL | Description |
|-----|-------------|
| `private` | Owner gets FULL_CONTROL (default) |
| `public-read` | Owner FULL_CONTROL, public READ |
| `public-read-write` | Owner FULL_CONTROL, public READ and WRITE |
| `authenticated-read` | Owner FULL_CONTROL, authenticated users READ |
### Setting ACLs
```bash
# Set bucket ACL using canned ACL
curl -X PUT "http://localhost:5000/my-bucket?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: public-read"
# Set object ACL
curl -X PUT "http://localhost:5000/my-bucket/file.txt?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: private"
# Set ACL during upload
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: public-read" \
--data-binary @file.txt
# Get bucket ACL
curl "http://localhost:5000/my-bucket?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Get object ACL
curl "http://localhost:5000/my-bucket/file.txt?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### ACL vs Bucket Policies
- **ACLs**: Simple, limited options, legacy approach
- **Bucket Policies**: Powerful, flexible, recommended for new deployments
For most use cases, prefer bucket policies over ACLs.
## 23. Object & Bucket Tagging
Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.
### Bucket Tagging
```bash
# Set bucket tags
curl -X PUT "http://localhost:5000/my-bucket?tagging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TagSet": [
{"Key": "Environment", "Value": "Production"},
{"Key": "Team", "Value": "Engineering"}
]
}'
# Get bucket tags
curl "http://localhost:5000/my-bucket?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete bucket tags
curl -X DELETE "http://localhost:5000/my-bucket?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Object Tagging
```bash
# Set object tags
curl -X PUT "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TagSet": [
{"Key": "Classification", "Value": "Confidential"},
{"Key": "Owner", "Value": "john@example.com"}
]
}'
# Get object tags
curl "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete object tags
curl -X DELETE "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Set tags during upload
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-tagging: Environment=Staging&Team=QA" \
--data-binary @file.txt
```
### Tagging Limits
- Maximum 50 tags per object (configurable via `OBJECT_TAG_LIMIT`)
- Tag key: 1-128 Unicode characters
- Tag value: 0-256 Unicode characters
### Use Cases
- **Lifecycle Rules**: Filter objects for expiration by tag
- **Access Control**: Use tag conditions in bucket policies
- **Cost Tracking**: Group objects by project or department
- **Automation**: Trigger actions based on object tags
## 24. CORS Configuration
Configure Cross-Origin Resource Sharing for browser-based applications.
### Setting CORS Rules
```bash
# Set CORS configuration
curl -X PUT "http://localhost:5000/my-bucket?cors" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"CORSRules": [
{
"AllowedOrigins": ["https://example.com", "https://app.example.com"],
"AllowedMethods": ["GET", "PUT", "POST", "DELETE"],
"AllowedHeaders": ["*"],
"ExposeHeaders": ["ETag", "x-amz-meta-*"],
"MaxAgeSeconds": 3600
}
]
}'
# Get CORS configuration
curl "http://localhost:5000/my-bucket?cors" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete CORS configuration
curl -X DELETE "http://localhost:5000/my-bucket?cors" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### CORS Rule Fields
| Field | Description |
|-------|-------------|
| `AllowedOrigins` | Origins allowed to access the bucket (required) |
| `AllowedMethods` | HTTP methods allowed (GET, PUT, POST, DELETE, HEAD) |
| `AllowedHeaders` | Request headers allowed in preflight |
| `ExposeHeaders` | Response headers visible to browser |
| `MaxAgeSeconds` | How long browser can cache preflight response |
## 25. List Objects API v2
MyFSIO supports both ListBucketResult v1 and v2 APIs.
### Using v2 API
```bash
# List with v2 (supports continuation tokens)
curl "http://localhost:5000/my-bucket?list-type=2" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# With prefix and delimiter (folder-like listing)
curl "http://localhost:5000/my-bucket?list-type=2&prefix=photos/&delimiter=/" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Pagination with continuation token
curl "http://localhost:5000/my-bucket?list-type=2&max-keys=100&continuation-token=TOKEN" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Start after specific key
curl "http://localhost:5000/my-bucket?list-type=2&start-after=photos/2024/" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### v1 vs v2 Differences
| Feature | v1 | v2 |
|---------|----|----|
| Pagination | `marker` | `continuation-token` |
| Start position | `marker` | `start-after` |
| Fetch owner info | Always included | Use `fetch-owner=true` |
| Max keys | 1000 | 1000 |
### Query Parameters
| Parameter | Description |
|-----------|-------------|
| `list-type` | Set to `2` for v2 API |
| `prefix` | Filter objects by key prefix |
| `delimiter` | Group objects (typically `/`) |
| `max-keys` | Maximum results (1-1000, default 1000) |
| `continuation-token` | Token from previous response |
| `start-after` | Start listing after this key |
| `fetch-owner` | Include owner info in response |
| `encoding-type` | Set to `url` for URL-encoded keys
## 26. Static Website Hosting
MyFSIO can serve S3 buckets as static websites via custom domain mappings. When a request arrives with a `Host` header matching a mapped domain, MyFSIO resolves the bucket and serves objects directly.
### Enabling
Set the environment variable:
```bash
WEBSITE_HOSTING_ENABLED=true
```
When disabled, all website hosting endpoints return 400 and domain-based serving is skipped.
### Configuration
| Variable | Default | Description |
|----------|---------|-------------|
| `WEBSITE_HOSTING_ENABLED` | `false` | Master switch for website hosting |
### Setting Up a Website
**Step 1: Configure the bucket website settings**
```bash
curl -X PUT "http://localhost:5000/my-site?website" \
-H "Authorization: ..." \
-d '<?xml version="1.0" encoding="UTF-8"?>
<WebsiteConfiguration>
<IndexDocument><Suffix>index.html</Suffix></IndexDocument>
<ErrorDocument><Key>404.html</Key></ErrorDocument>
</WebsiteConfiguration>'
```
- `IndexDocument` with `Suffix` is required (must not contain `/`)
- `ErrorDocument` is optional
**Step 2: Map a domain to the bucket**
```bash
curl -X POST "http://localhost:5000/admin/website-domains" \
-H "Authorization: ..." \
-H "Content-Type: application/json" \
-d '{"domain": "example.com", "bucket": "my-site"}'
```
**Step 3: Point your domain to MyFSIO**
For HTTP-only (direct access), point DNS to the MyFSIO host on port 5000.
For HTTPS (recommended), use a reverse proxy. The critical requirement is passing the original `Host` header so MyFSIO can match the domain to a bucket.
**nginx example:**
```nginx
server {
server_name example.com;
listen 443 ssl;
ssl_certificate /etc/ssl/certs/example.com.pem;
ssl_certificate_key /etc/ssl/private/example.com.key;
location / {
proxy_pass http://127.0.0.1:5000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
```
`proxy_set_header Host $host;` is required — without it, MyFSIO cannot match the incoming domain to a bucket. You do not need any path-based routing rules; MyFSIO handles all object resolution internally.
### How Domain Routing Works
1. A request arrives with `Host: example.com`
2. MyFSIO's `before_request` hook strips the port and looks up the domain in the `WebsiteDomainStore`
3. If a match is found, it loads the bucket's website config (index/error documents)
4. Object key resolution:
- `/` or trailing `/` → append `index_document` (e.g., `index.html`)
- `/path` → try exact match, then try `path/index_document`
- Not found → serve `error_document` with 404 status
5. If no domain match is found, the request falls through to normal S3 API / UI routing
### Domain Mapping Admin API
All endpoints require admin (`iam:*`) permissions.
| Method | Route | Body | Description |
|--------|-------|------|-------------|
| `GET` | `/admin/website-domains` | — | List all mappings |
| `POST` | `/admin/website-domains` | `{"domain": "...", "bucket": "..."}` | Create mapping |
| `GET` | `/admin/website-domains/<domain>` | — | Get single mapping |
| `PUT` | `/admin/website-domains/<domain>` | `{"bucket": "..."}` | Update mapping |
| `DELETE` | `/admin/website-domains/<domain>` | — | Delete mapping |
### Bucket Website API
| Method | Route | Description |
|--------|-------|-------------|
| `PUT` | `/<bucket>?website` | Set website config (XML body) |
| `GET` | `/<bucket>?website` | Get website config (XML response) |
| `DELETE` | `/<bucket>?website` | Remove website config |
### Web UI
- **Per-bucket config:** Bucket Details → Properties tab → "Static Website Hosting" card
- **Domain management:** Sidebar → "Domains" (visible when hosting is enabled and user is admin)

421
myfsio_core/Cargo.lock generated Normal file
View File

@@ -0,0 +1,421 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cpufeatures"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "lru"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
dependencies = [
"hashbrown",
]
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if",
"digest",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "myfsio_core"
version = "0.1.0"
dependencies = [
"hex",
"hmac",
"lru",
"md-5",
"parking_lot",
"pyo3",
"regex",
"sha2",
"unicode-normalization",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "parking_lot"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "portable-atomic"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
dependencies = [
"libc",
"once_cell",
"portable-atomic",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
]
[[package]]
name = "pyo3-build-config"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
dependencies = [
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
dependencies = [
"heck",
"proc-macro2",
"pyo3-build-config",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sha2"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "subtle"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "2.0.116"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
[[package]]
name = "tinyvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "typenum"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-normalization"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
dependencies = [
"tinyvec",
]
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"

19
myfsio_core/Cargo.toml Normal file
View File

@@ -0,0 +1,19 @@
[package]
name = "myfsio_core"
version = "0.1.0"
edition = "2021"
[lib]
name = "myfsio_core"
crate-type = ["cdylib"]
[dependencies]
pyo3 = { version = "0.28", features = ["extension-module"] }
hmac = "0.12"
sha2 = "0.10"
md-5 = "0.10"
hex = "0.4"
unicode-normalization = "0.1"
regex = "1"
lru = "0.14"
parking_lot = "0.12"

View File

@@ -0,0 +1,11 @@
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"
[project]
name = "myfsio_core"
version = "0.1.0"
requires-python = ">=3.10"
[tool.maturin]
features = ["pyo3/extension-module"]

View File

@@ -0,0 +1,90 @@
use md5::{Digest, Md5};
use pyo3::exceptions::PyIOError;
use pyo3::prelude::*;
use sha2::Sha256;
use std::fs::File;
use std::io::Read;
const CHUNK_SIZE: usize = 65536;
#[pyfunction]
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
})
}
#[pyfunction]
pub fn md5_bytes(data: &[u8]) -> String {
let mut hasher = Md5::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[pyfunction]
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
})
}
#[pyfunction]
pub fn sha256_bytes(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[pyfunction]
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut md5_hasher = Md5::new();
let mut sha_hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
md5_hasher.update(&buf[..n]);
sha_hasher.update(&buf[..n]);
}
Ok((
format!("{:x}", md5_hasher.finalize()),
format!("{:x}", sha_hasher.finalize()),
))
})
}

30
myfsio_core/src/lib.rs Normal file
View File

@@ -0,0 +1,30 @@
mod hashing;
mod sigv4;
mod validation;
use pyo3::prelude::*;
#[pymodule]
mod myfsio_core {
use super::*;
#[pymodule_init]
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
Ok(())
}
}

108
myfsio_core/src/sigv4.rs Normal file
View File

@@ -0,0 +1,108 @@
use hmac::{Hmac, Mac};
use lru::LruCache;
use parking_lot::Mutex;
use pyo3::prelude::*;
use sha2::{Digest, Sha256};
use std::num::NonZeroUsize;
use std::sync::LazyLock;
use std::time::Instant;
type HmacSha256 = Hmac<Sha256>;
struct CacheEntry {
key: Vec<u8>,
created: Instant,
}
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
const CACHE_TTL_SECS: u64 = 60;
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
mac.update(msg);
mac.finalize().into_bytes().to_vec()
}
#[pyfunction]
pub fn derive_signing_key(
secret_key: &str,
date_stamp: &str,
region: &str,
service: &str,
) -> Vec<u8> {
let cache_key = (
secret_key.to_owned(),
date_stamp.to_owned(),
region.to_owned(),
service.to_owned(),
);
{
let mut cache = SIGNING_KEY_CACHE.lock();
if let Some(entry) = cache.get(&cache_key) {
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
return entry.key.clone();
}
cache.pop(&cache_key);
}
}
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
let k_region = hmac_sha256(&k_date, region.as_bytes());
let k_service = hmac_sha256(&k_region, service.as_bytes());
let k_signing = hmac_sha256(&k_service, b"aws4_request");
{
let mut cache = SIGNING_KEY_CACHE.lock();
cache.put(
cache_key,
CacheEntry {
key: k_signing.clone(),
created: Instant::now(),
},
);
}
k_signing
}
#[pyfunction]
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
hex::encode(sig)
}
fn sha256_hex(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
hex::encode(hasher.finalize())
}
#[pyfunction]
pub fn build_string_to_sign(
amz_date: &str,
credential_scope: &str,
canonical_request: &str,
) -> String {
let cr_hash = sha256_hex(canonical_request.as_bytes());
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
}
#[pyfunction]
pub fn constant_time_compare(a: &str, b: &str) -> bool {
if a.len() != b.len() {
return false;
}
let mut result: u8 = 0;
for (x, y) in a.bytes().zip(b.bytes()) {
result |= x ^ y;
}
result == 0
}
#[pyfunction]
pub fn clear_signing_key_cache() {
SIGNING_KEY_CACHE.lock().clear();
}

View File

@@ -0,0 +1,149 @@
use pyo3::prelude::*;
use std::sync::LazyLock;
use unicode_normalization::UnicodeNormalization;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
"LPT9",
];
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
const SYSTEM_ROOT: &str = ".myfsio.sys";
static IP_REGEX: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
#[pyfunction]
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
pub fn validate_object_key(
object_key: &str,
max_length_bytes: usize,
is_windows: bool,
reserved_prefixes: Option<Vec<String>>,
) -> PyResult<Option<String>> {
if object_key.is_empty() {
return Ok(Some("Object key required".to_string()));
}
if object_key.contains('\0') {
return Ok(Some("Object key contains null bytes".to_string()));
}
let normalized: String = object_key.nfc().collect();
if normalized.len() > max_length_bytes {
return Ok(Some(format!(
"Object key exceeds maximum length of {} bytes",
max_length_bytes
)));
}
if normalized.starts_with('/') || normalized.starts_with('\\') {
return Ok(Some("Object key cannot start with a slash".to_string()));
}
let parts: Vec<&str> = if cfg!(windows) || is_windows {
normalized.split(['/', '\\']).collect()
} else {
normalized.split('/').collect()
};
for part in &parts {
if part.is_empty() {
continue;
}
if *part == ".." {
return Ok(Some(
"Object key contains parent directory references".to_string(),
));
}
if *part == "." {
return Ok(Some("Object key contains invalid segments".to_string()));
}
if part.chars().any(|c| (c as u32) < 32) {
return Ok(Some(
"Object key contains control characters".to_string(),
));
}
if is_windows {
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
return Ok(Some(
"Object key contains characters not supported on Windows filesystems"
.to_string(),
));
}
if part.ends_with(' ') || part.ends_with('.') {
return Ok(Some(
"Object key segments cannot end with spaces or periods on Windows".to_string(),
));
}
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
return Ok(Some(format!("Invalid filename segment: {}", part)));
}
}
}
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
if let Some(top) = non_empty_parts.first() {
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
return Ok(Some("Object key uses a reserved prefix".to_string()));
}
if let Some(ref prefixes) = reserved_prefixes {
for prefix in prefixes {
if *top == prefix.as_str() {
return Ok(Some("Object key uses a reserved prefix".to_string()));
}
}
}
}
Ok(None)
}
#[pyfunction]
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
let len = bucket_name.len();
if len < 3 || len > 63 {
return Some("Bucket name must be between 3 and 63 characters".to_string());
}
let bytes = bucket_name.as_bytes();
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
for &b in bytes {
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
return Some(
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
.to_string(),
);
}
}
if bucket_name.contains("..") {
return Some("Bucket name must not contain consecutive periods".to_string());
}
if IP_REGEX.is_match(bucket_name) {
return Some("Bucket name must not be formatted as an IP address".to_string());
}
None
}

View File

@@ -0,0 +1 @@
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}

View File

@@ -0,0 +1,3 @@
Signature: 8a477f597d28d172789f06886806bc55
# This file is a cache directory tag created by cargo.
# For information about cache directory tags see https://bford.info/cachedir/

View File

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
801af22cf202da8e

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
d28af275d001c358

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
1fbf4ba9542edced

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
37923e6f5f9687ab

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
603e28136cf5763c

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
914a617b9f05c9d8

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
3b95cf48bbd7dc53

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
b7ba5182ce570398

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
aec88a641c5288e3

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
ddc0b590ff80762b

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":17886154901722686619,"profile":1369601567987815722,"path":8608102977929876445,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\heck-b47c94fd2a7e00cb\\dep-lib-heck","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -0,0 +1 @@
This file has an mtime of when this was started.

View File

@@ -0,0 +1 @@
41890ebff4143fa5

View File

@@ -0,0 +1 @@
{"rustc":8323788817864214825,"features":"[\"alloc\", \"default\", \"std\"]","declared_features":"[\"alloc\", \"default\", \"serde\", \"std\"]","target":4242469766639956503,"profile":2040997289075261528,"path":6793865871540733919,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hex-253414d2260adcdf\\dep-lib-hex","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

Some files were not shown because too many files have changed in this diff Show More