Compare commits
12 Commits
4c661477d5
...
v0.2.1
| Author | SHA1 | Date | |
|---|---|---|---|
| 899db3421b | |||
| caf01d6ada | |||
| bb366cb4cd | |||
| a2745ff2ee | |||
| 28cb656d94 | |||
| 3c44152fc6 | |||
| 397515edce | |||
| 980fced7e4 | |||
| bae5009ec4 | |||
| 233780617f | |||
| fd8fb21517 | |||
| c6cbe822e1 |
22
Dockerfile
22
Dockerfile
@@ -1,33 +1,25 @@
|
|||||||
FROM python:3.14.3-slim
|
# syntax=docker/dockerfile:1.7
|
||||||
|
FROM python:3.12.12-slim
|
||||||
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install build deps for any wheels that need compilation, then clean up
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends build-essential curl \
|
&& apt-get install -y --no-install-recommends build-essential \
|
||||||
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
||||||
|
|
||||||
COPY requirements.txt ./
|
COPY requirements.txt ./
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN pip install --no-cache-dir maturin \
|
# Make entrypoint executable
|
||||||
&& cd myfsio_core \
|
|
||||||
&& maturin build --release \
|
|
||||||
&& pip install target/wheels/*.whl \
|
|
||||||
&& cd .. \
|
|
||||||
&& rm -rf myfsio_core/target \
|
|
||||||
&& pip uninstall -y maturin \
|
|
||||||
&& rustup self uninstall -y
|
|
||||||
|
|
||||||
RUN chmod +x docker-entrypoint.sh
|
RUN chmod +x docker-entrypoint.sh
|
||||||
|
|
||||||
|
# Create data directory and set permissions
|
||||||
RUN mkdir -p /app/data \
|
RUN mkdir -p /app/data \
|
||||||
&& useradd -m -u 1000 myfsio \
|
&& useradd -m -u 1000 myfsio \
|
||||||
&& chown -R myfsio:myfsio /app
|
&& chown -R myfsio:myfsio /app
|
||||||
@@ -40,6 +32,6 @@ ENV APP_HOST=0.0.0.0 \
|
|||||||
FLASK_DEBUG=0
|
FLASK_DEBUG=0
|
||||||
|
|
||||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||||
CMD python -c "import requests; requests.get('http://localhost:5000/myfsio/health', timeout=2)"
|
CMD python -c "import requests; requests.get('http://localhost:5000/healthz', timeout=2)"
|
||||||
|
|
||||||
CMD ["./docker-entrypoint.sh"]
|
CMD ["./docker-entrypoint.sh"]
|
||||||
|
|||||||
21
README.md
21
README.md
@@ -102,11 +102,6 @@ python run.py --mode ui # UI only (port 5100)
|
|||||||
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
|
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
|
||||||
| `KMS_ENABLED` | `false` | Enable Key Management Service |
|
| `KMS_ENABLED` | `false` | Enable Key Management Service |
|
||||||
| `LOG_LEVEL` | `INFO` | Logging verbosity |
|
| `LOG_LEVEL` | `INFO` | Logging verbosity |
|
||||||
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
|
|
||||||
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
|
|
||||||
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
|
|
||||||
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
|
|
||||||
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
|
|
||||||
|
|
||||||
## Data Layout
|
## Data Layout
|
||||||
|
|
||||||
@@ -154,13 +149,19 @@ All endpoints require AWS Signature Version 4 authentication unless using presig
|
|||||||
| `POST` | `/<bucket>/<key>?uploadId=X` | Complete multipart upload |
|
| `POST` | `/<bucket>/<key>?uploadId=X` | Complete multipart upload |
|
||||||
| `DELETE` | `/<bucket>/<key>?uploadId=X` | Abort multipart upload |
|
| `DELETE` | `/<bucket>/<key>?uploadId=X` | Abort multipart upload |
|
||||||
|
|
||||||
### Bucket Policies (S3-compatible)
|
### Presigned URLs
|
||||||
|
|
||||||
| Method | Endpoint | Description |
|
| Method | Endpoint | Description |
|
||||||
|--------|----------|-------------|
|
|--------|----------|-------------|
|
||||||
| `GET` | `/<bucket>?policy` | Get bucket policy |
|
| `POST` | `/presign/<bucket>/<key>` | Generate presigned URL |
|
||||||
| `PUT` | `/<bucket>?policy` | Set bucket policy |
|
|
||||||
| `DELETE` | `/<bucket>?policy` | Delete bucket policy |
|
### Bucket Policies
|
||||||
|
|
||||||
|
| Method | Endpoint | Description |
|
||||||
|
|--------|----------|-------------|
|
||||||
|
| `GET` | `/bucket-policy/<bucket>` | Get bucket policy |
|
||||||
|
| `PUT` | `/bucket-policy/<bucket>` | Set bucket policy |
|
||||||
|
| `DELETE` | `/bucket-policy/<bucket>` | Delete bucket policy |
|
||||||
|
|
||||||
### Versioning
|
### Versioning
|
||||||
|
|
||||||
@@ -174,7 +175,7 @@ All endpoints require AWS Signature Version 4 authentication unless using presig
|
|||||||
|
|
||||||
| Method | Endpoint | Description |
|
| Method | Endpoint | Description |
|
||||||
|--------|----------|-------------|
|
|--------|----------|-------------|
|
||||||
| `GET` | `/myfsio/health` | Health check endpoint |
|
| `GET` | `/healthz` | Health check endpoint |
|
||||||
|
|
||||||
## IAM & Access Control
|
## IAM & Access Control
|
||||||
|
|
||||||
|
|||||||
297
app/__init__.py
297
app/__init__.py
@@ -1,8 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import html as html_module
|
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@@ -12,13 +10,12 @@ from pathlib import Path
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
|
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from flask_wtf.csrf import CSRFError
|
from flask_wtf.csrf import CSRFError
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
|
|
||||||
from .access_logging import AccessLoggingService
|
from .access_logging import AccessLoggingService
|
||||||
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
|
||||||
from .compression import GzipMiddleware
|
from .compression import GzipMiddleware
|
||||||
from .acl import AclService
|
from .acl import AclService
|
||||||
from .bucket_policies import BucketPolicyStore
|
from .bucket_policies import BucketPolicyStore
|
||||||
@@ -33,10 +30,8 @@ from .notifications import NotificationService
|
|||||||
from .object_lock import ObjectLockService
|
from .object_lock import ObjectLockService
|
||||||
from .replication import ReplicationManager
|
from .replication import ReplicationManager
|
||||||
from .secret_store import EphemeralSecretStore
|
from .secret_store import EphemeralSecretStore
|
||||||
from .site_registry import SiteRegistry, SiteInfo
|
from .storage import ObjectStorage
|
||||||
from .storage import ObjectStorage, StorageError
|
|
||||||
from .version import get_version
|
from .version import get_version
|
||||||
from .website_domains import WebsiteDomainStore
|
|
||||||
|
|
||||||
|
|
||||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||||
@@ -108,9 +103,6 @@ def create_app(
|
|||||||
storage = ObjectStorage(
|
storage = ObjectStorage(
|
||||||
Path(app.config["STORAGE_ROOT"]),
|
Path(app.config["STORAGE_ROOT"]),
|
||||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
|
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
|
||||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
|
||||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
|
||||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
|
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
|
||||||
@@ -144,33 +136,12 @@ def create_app(
|
|||||||
)
|
)
|
||||||
|
|
||||||
connections = ConnectionStore(connections_path)
|
connections = ConnectionStore(connections_path)
|
||||||
replication = ReplicationManager(
|
replication = ReplicationManager(storage, connections, replication_rules_path, storage_root)
|
||||||
storage,
|
|
||||||
connections,
|
|
||||||
replication_rules_path,
|
|
||||||
storage_root,
|
|
||||||
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
|
|
||||||
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
|
|
||||||
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
|
|
||||||
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
|
|
||||||
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
|
|
||||||
)
|
|
||||||
|
|
||||||
site_registry_path = config_dir / "site_registry.json"
|
|
||||||
site_registry = SiteRegistry(site_registry_path)
|
|
||||||
if app.config.get("SITE_ID") and not site_registry.get_local_site():
|
|
||||||
site_registry.set_local_site(SiteInfo(
|
|
||||||
site_id=app.config["SITE_ID"],
|
|
||||||
endpoint=app.config.get("SITE_ENDPOINT") or "",
|
|
||||||
region=app.config.get("SITE_REGION", "us-east-1"),
|
|
||||||
priority=app.config.get("SITE_PRIORITY", 100),
|
|
||||||
))
|
|
||||||
|
|
||||||
encryption_config = {
|
encryption_config = {
|
||||||
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
|
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
|
||||||
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
|
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
|
||||||
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
|
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
|
||||||
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
|
|
||||||
}
|
}
|
||||||
encryption_manager = EncryptionManager(encryption_config)
|
encryption_manager = EncryptionManager(encryption_config)
|
||||||
|
|
||||||
@@ -178,12 +149,7 @@ def create_app(
|
|||||||
if app.config.get("KMS_ENABLED", False):
|
if app.config.get("KMS_ENABLED", False):
|
||||||
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
|
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
|
||||||
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
|
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
|
||||||
kms_manager = KMSManager(
|
kms_manager = KMSManager(kms_keys_path, kms_master_key_path)
|
||||||
kms_keys_path,
|
|
||||||
kms_master_key_path,
|
|
||||||
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
|
|
||||||
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
|
|
||||||
)
|
|
||||||
encryption_manager.set_kms_provider(kms_manager)
|
encryption_manager.set_kms_provider(kms_manager)
|
||||||
|
|
||||||
if app.config.get("ENCRYPTION_ENABLED", False):
|
if app.config.get("ENCRYPTION_ENABLED", False):
|
||||||
@@ -192,10 +158,7 @@ def create_app(
|
|||||||
|
|
||||||
acl_service = AclService(storage_root)
|
acl_service = AclService(storage_root)
|
||||||
object_lock_service = ObjectLockService(storage_root)
|
object_lock_service = ObjectLockService(storage_root)
|
||||||
notification_service = NotificationService(
|
notification_service = NotificationService(storage_root)
|
||||||
storage_root,
|
|
||||||
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
|
|
||||||
)
|
|
||||||
access_logging_service = AccessLoggingService(storage_root)
|
access_logging_service = AccessLoggingService(storage_root)
|
||||||
access_logging_service.set_storage(storage)
|
access_logging_service.set_storage(storage)
|
||||||
|
|
||||||
@@ -206,7 +169,6 @@ def create_app(
|
|||||||
base_storage,
|
base_storage,
|
||||||
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
|
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
|
||||||
storage_root=storage_root,
|
storage_root=storage_root,
|
||||||
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
|
|
||||||
)
|
)
|
||||||
lifecycle_manager.start()
|
lifecycle_manager.start()
|
||||||
|
|
||||||
@@ -224,92 +186,14 @@ def create_app(
|
|||||||
app.extensions["object_lock"] = object_lock_service
|
app.extensions["object_lock"] = object_lock_service
|
||||||
app.extensions["notifications"] = notification_service
|
app.extensions["notifications"] = notification_service
|
||||||
app.extensions["access_logging"] = access_logging_service
|
app.extensions["access_logging"] = access_logging_service
|
||||||
app.extensions["site_registry"] = site_registry
|
|
||||||
|
|
||||||
website_domains_store = None
|
|
||||||
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
website_domains_path = config_dir / "website_domains.json"
|
|
||||||
website_domains_store = WebsiteDomainStore(website_domains_path)
|
|
||||||
app.extensions["website_domains"] = website_domains_store
|
|
||||||
|
|
||||||
from .s3_client import S3ProxyClient
|
|
||||||
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
|
|
||||||
app.extensions["s3_proxy"] = S3ProxyClient(
|
|
||||||
api_base_url=api_base,
|
|
||||||
region=app.config.get("AWS_REGION", "us-east-1"),
|
|
||||||
)
|
|
||||||
|
|
||||||
operation_metrics_collector = None
|
|
||||||
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
|
||||||
operation_metrics_collector = OperationMetricsCollector(
|
|
||||||
storage_root,
|
|
||||||
interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
|
|
||||||
retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
|
|
||||||
)
|
|
||||||
app.extensions["operation_metrics"] = operation_metrics_collector
|
|
||||||
|
|
||||||
system_metrics_collector = None
|
|
||||||
if app.config.get("METRICS_HISTORY_ENABLED", False):
|
|
||||||
from .system_metrics import SystemMetricsCollector
|
|
||||||
system_metrics_collector = SystemMetricsCollector(
|
|
||||||
storage_root,
|
|
||||||
interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
|
|
||||||
retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
|
|
||||||
)
|
|
||||||
system_metrics_collector.set_storage(storage)
|
|
||||||
app.extensions["system_metrics"] = system_metrics_collector
|
|
||||||
|
|
||||||
site_sync_worker = None
|
|
||||||
if app.config.get("SITE_SYNC_ENABLED", False):
|
|
||||||
from .site_sync import SiteSyncWorker
|
|
||||||
site_sync_worker = SiteSyncWorker(
|
|
||||||
storage=storage,
|
|
||||||
connections=connections,
|
|
||||||
replication_manager=replication,
|
|
||||||
storage_root=storage_root,
|
|
||||||
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
|
|
||||||
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
|
|
||||||
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
|
|
||||||
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
|
|
||||||
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
|
|
||||||
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
|
|
||||||
)
|
|
||||||
site_sync_worker.start()
|
|
||||||
app.extensions["site_sync"] = site_sync_worker
|
|
||||||
|
|
||||||
@app.errorhandler(500)
|
@app.errorhandler(500)
|
||||||
def internal_error(error):
|
def internal_error(error):
|
||||||
wants_html = request.accept_mimetypes.accept_html
|
|
||||||
path = request.path or ""
|
|
||||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
|
||||||
return render_template('500.html'), 500
|
return render_template('500.html'), 500
|
||||||
error_xml = (
|
|
||||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
|
||||||
'<Error>'
|
|
||||||
'<Code>InternalError</Code>'
|
|
||||||
'<Message>An internal server error occurred</Message>'
|
|
||||||
f'<Resource>{path}</Resource>'
|
|
||||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
|
||||||
'</Error>'
|
|
||||||
)
|
|
||||||
return error_xml, 500, {'Content-Type': 'application/xml'}
|
|
||||||
|
|
||||||
@app.errorhandler(CSRFError)
|
@app.errorhandler(CSRFError)
|
||||||
def handle_csrf_error(e):
|
def handle_csrf_error(e):
|
||||||
wants_html = request.accept_mimetypes.accept_html
|
|
||||||
path = request.path or ""
|
|
||||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
|
||||||
return render_template('csrf_error.html', reason=e.description), 400
|
return render_template('csrf_error.html', reason=e.description), 400
|
||||||
error_xml = (
|
|
||||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
|
||||||
'<Error>'
|
|
||||||
'<Code>CSRFError</Code>'
|
|
||||||
f'<Message>{e.description}</Message>'
|
|
||||||
f'<Resource>{path}</Resource>'
|
|
||||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
|
||||||
'</Error>'
|
|
||||||
)
|
|
||||||
return error_xml, 400, {'Content-Type': 'application/xml'}
|
|
||||||
|
|
||||||
@app.template_filter("filesizeformat")
|
@app.template_filter("filesizeformat")
|
||||||
def filesizeformat(value: int) -> str:
|
def filesizeformat(value: int) -> str:
|
||||||
@@ -343,41 +227,14 @@ def create_app(
|
|||||||
except (ValueError, OSError):
|
except (ValueError, OSError):
|
||||||
return "Unknown"
|
return "Unknown"
|
||||||
|
|
||||||
@app.template_filter("format_datetime")
|
|
||||||
def format_datetime_filter(dt, include_tz: bool = True) -> str:
|
|
||||||
"""Format datetime object as human-readable string in configured timezone."""
|
|
||||||
from datetime import datetime, timezone as dt_timezone
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
if not dt:
|
|
||||||
return ""
|
|
||||||
try:
|
|
||||||
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
|
|
||||||
if display_tz and display_tz != "UTC":
|
|
||||||
try:
|
|
||||||
tz = ZoneInfo(display_tz)
|
|
||||||
if dt.tzinfo is None:
|
|
||||||
dt = dt.replace(tzinfo=dt_timezone.utc)
|
|
||||||
dt = dt.astimezone(tz)
|
|
||||||
except (KeyError, ValueError):
|
|
||||||
pass
|
|
||||||
tz_abbr = dt.strftime("%Z") or "UTC"
|
|
||||||
if include_tz:
|
|
||||||
return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
|
|
||||||
return dt.strftime("%b %d, %Y %H:%M")
|
|
||||||
except (ValueError, AttributeError):
|
|
||||||
return str(dt)
|
|
||||||
|
|
||||||
if include_api:
|
if include_api:
|
||||||
from .s3_api import s3_api_bp
|
from .s3_api import s3_api_bp
|
||||||
from .kms_api import kms_api_bp
|
from .kms_api import kms_api_bp
|
||||||
from .admin_api import admin_api_bp
|
|
||||||
|
|
||||||
app.register_blueprint(s3_api_bp)
|
app.register_blueprint(s3_api_bp)
|
||||||
app.register_blueprint(kms_api_bp)
|
app.register_blueprint(kms_api_bp)
|
||||||
app.register_blueprint(admin_api_bp)
|
|
||||||
csrf.exempt(s3_api_bp)
|
csrf.exempt(s3_api_bp)
|
||||||
csrf.exempt(kms_api_bp)
|
csrf.exempt(kms_api_bp)
|
||||||
csrf.exempt(admin_api_bp)
|
|
||||||
|
|
||||||
if include_ui:
|
if include_ui:
|
||||||
from .ui import ui_bp
|
from .ui import ui_bp
|
||||||
@@ -397,9 +254,9 @@ def create_app(
|
|||||||
return render_template("404.html"), 404
|
return render_template("404.html"), 404
|
||||||
return error
|
return error
|
||||||
|
|
||||||
@app.get("/myfsio/health")
|
@app.get("/healthz")
|
||||||
def healthcheck() -> Dict[str, str]:
|
def healthcheck() -> Dict[str, str]:
|
||||||
return {"status": "ok"}
|
return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")}
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
@@ -475,134 +332,11 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
def _log_request_start() -> None:
|
def _log_request_start() -> None:
|
||||||
g.request_id = uuid.uuid4().hex
|
g.request_id = uuid.uuid4().hex
|
||||||
g.request_started_at = time.perf_counter()
|
g.request_started_at = time.perf_counter()
|
||||||
g.request_bytes_in = request.content_length or 0
|
|
||||||
app.logger.info(
|
app.logger.info(
|
||||||
"Request started",
|
"Request started",
|
||||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||||
)
|
)
|
||||||
|
|
||||||
@app.before_request
|
|
||||||
def _maybe_serve_website():
|
|
||||||
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
|
|
||||||
return None
|
|
||||||
if request.method not in {"GET", "HEAD"}:
|
|
||||||
return None
|
|
||||||
host = request.host
|
|
||||||
if ":" in host:
|
|
||||||
host = host.rsplit(":", 1)[0]
|
|
||||||
host = host.lower()
|
|
||||||
store = app.extensions.get("website_domains")
|
|
||||||
if not store:
|
|
||||||
return None
|
|
||||||
bucket = store.get_bucket(host)
|
|
||||||
if not bucket:
|
|
||||||
return None
|
|
||||||
storage = app.extensions["object_storage"]
|
|
||||||
if not storage.bucket_exists(bucket):
|
|
||||||
return _website_error_response(404, "Not Found")
|
|
||||||
website_config = storage.get_bucket_website(bucket)
|
|
||||||
if not website_config:
|
|
||||||
return _website_error_response(404, "Not Found")
|
|
||||||
index_doc = website_config.get("index_document", "index.html")
|
|
||||||
error_doc = website_config.get("error_document")
|
|
||||||
req_path = request.path.lstrip("/")
|
|
||||||
if not req_path or req_path.endswith("/"):
|
|
||||||
object_key = req_path + index_doc
|
|
||||||
else:
|
|
||||||
object_key = req_path
|
|
||||||
try:
|
|
||||||
obj_path = storage.get_object_path(bucket, object_key)
|
|
||||||
except (StorageError, OSError):
|
|
||||||
if object_key == req_path:
|
|
||||||
try:
|
|
||||||
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
|
|
||||||
object_key = req_path + "/" + index_doc
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
|
||||||
else:
|
|
||||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
|
||||||
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
|
||||||
is_encrypted = False
|
|
||||||
try:
|
|
||||||
metadata = storage.get_object_metadata(bucket, object_key)
|
|
||||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
|
||||||
except (StorageError, OSError):
|
|
||||||
pass
|
|
||||||
if request.method == "HEAD":
|
|
||||||
response = Response(status=200)
|
|
||||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
|
||||||
try:
|
|
||||||
data, _ = storage.get_object_data(bucket, object_key)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
stat = obj_path.stat()
|
|
||||||
response.headers["Content-Length"] = stat.st_size
|
|
||||||
except OSError:
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
response.headers["Content-Type"] = content_type
|
|
||||||
return response
|
|
||||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
|
||||||
try:
|
|
||||||
data, _ = storage.get_object_data(bucket, object_key)
|
|
||||||
response = Response(data, mimetype=content_type)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
return response
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
def _stream(file_path):
|
|
||||||
with file_path.open("rb") as f:
|
|
||||||
while True:
|
|
||||||
chunk = f.read(65536)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
yield chunk
|
|
||||||
try:
|
|
||||||
stat = obj_path.stat()
|
|
||||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
|
||||||
response.headers["Content-Length"] = stat.st_size
|
|
||||||
return response
|
|
||||||
except OSError:
|
|
||||||
return _website_error_response(500, "Internal Server Error")
|
|
||||||
|
|
||||||
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
|
||||||
if not error_doc_key:
|
|
||||||
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
|
|
||||||
try:
|
|
||||||
obj_path = storage.get_object_path(bucket, error_doc_key)
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _website_error_response(status_code, "Not Found")
|
|
||||||
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
|
|
||||||
is_encrypted = False
|
|
||||||
try:
|
|
||||||
metadata = storage.get_object_metadata(bucket, error_doc_key)
|
|
||||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
|
||||||
except (StorageError, OSError):
|
|
||||||
pass
|
|
||||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
|
||||||
try:
|
|
||||||
data, _ = storage.get_object_data(bucket, error_doc_key)
|
|
||||||
response = Response(data, status=status_code, mimetype=content_type)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
return response
|
|
||||||
except (StorageError, OSError):
|
|
||||||
return _website_error_response(status_code, "Not Found")
|
|
||||||
try:
|
|
||||||
data = obj_path.read_bytes()
|
|
||||||
response = Response(data, status=status_code, mimetype=content_type)
|
|
||||||
response.headers["Content-Length"] = len(data)
|
|
||||||
return response
|
|
||||||
except OSError:
|
|
||||||
return _website_error_response(status_code, "Not Found")
|
|
||||||
|
|
||||||
def _website_error_response(status_code, message):
|
|
||||||
safe_msg = html_module.escape(str(message))
|
|
||||||
safe_code = html_module.escape(str(status_code))
|
|
||||||
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
|
|
||||||
return Response(body, status=status_code, mimetype="text/html")
|
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def _log_request_end(response):
|
def _log_request_end(response):
|
||||||
duration_ms = 0.0
|
duration_ms = 0.0
|
||||||
@@ -619,21 +353,4 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||||
|
|
||||||
operation_metrics = app.extensions.get("operation_metrics")
|
|
||||||
if operation_metrics:
|
|
||||||
bytes_in = getattr(g, "request_bytes_in", 0)
|
|
||||||
bytes_out = response.content_length or 0
|
|
||||||
error_code = getattr(g, "s3_error_code", None)
|
|
||||||
endpoint_type = classify_endpoint(request.path)
|
|
||||||
operation_metrics.record_request(
|
|
||||||
method=request.method,
|
|
||||||
endpoint_type=endpoint_type,
|
|
||||||
status_code=response.status_code,
|
|
||||||
latency_ms=duration_ms,
|
|
||||||
bytes_in=bytes_in,
|
|
||||||
bytes_out=bytes_out,
|
|
||||||
error_code=error_code,
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|||||||
778
app/admin_api.py
778
app/admin_api.py
@@ -1,778 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import ipaddress
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import socket
|
|
||||||
import time
|
|
||||||
from typing import Any, Dict, Optional, Tuple
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from flask import Blueprint, Response, current_app, jsonify, request
|
|
||||||
|
|
||||||
from .connections import ConnectionStore
|
|
||||||
from .extensions import limiter
|
|
||||||
from .iam import IamError, Principal
|
|
||||||
from .replication import ReplicationManager
|
|
||||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
|
||||||
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
|
|
||||||
|
|
||||||
|
|
||||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
|
||||||
"""Check if a URL is safe to make requests to (not internal/private).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: The URL to check.
|
|
||||||
allow_internal: If True, allows internal/private IP addresses.
|
|
||||||
Use for self-hosted deployments on internal networks.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
parsed = urlparse(url)
|
|
||||||
hostname = parsed.hostname
|
|
||||||
if not hostname:
|
|
||||||
return False
|
|
||||||
cloud_metadata_hosts = {
|
|
||||||
"metadata.google.internal",
|
|
||||||
"169.254.169.254",
|
|
||||||
}
|
|
||||||
if hostname.lower() in cloud_metadata_hosts:
|
|
||||||
return False
|
|
||||||
if allow_internal:
|
|
||||||
return True
|
|
||||||
blocked_hosts = {
|
|
||||||
"localhost",
|
|
||||||
"127.0.0.1",
|
|
||||||
"0.0.0.0",
|
|
||||||
"::1",
|
|
||||||
"[::1]",
|
|
||||||
}
|
|
||||||
if hostname.lower() in blocked_hosts:
|
|
||||||
return False
|
|
||||||
try:
|
|
||||||
resolved_ip = socket.gethostbyname(hostname)
|
|
||||||
ip = ipaddress.ip_address(resolved_ip)
|
|
||||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
|
||||||
return False
|
|
||||||
except (socket.gaierror, ValueError):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_endpoint(endpoint: str) -> Optional[str]:
|
|
||||||
"""Validate endpoint URL format. Returns error message or None."""
|
|
||||||
try:
|
|
||||||
parsed = urlparse(endpoint)
|
|
||||||
if not parsed.scheme or parsed.scheme not in ("http", "https"):
|
|
||||||
return "Endpoint must be http or https URL"
|
|
||||||
if not parsed.netloc:
|
|
||||||
return "Endpoint must have a host"
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return "Invalid endpoint URL"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_priority(priority: Any) -> Optional[str]:
|
|
||||||
"""Validate priority value. Returns error message or None."""
|
|
||||||
try:
|
|
||||||
p = int(priority)
|
|
||||||
if p < 0 or p > 1000:
|
|
||||||
return "Priority must be between 0 and 1000"
|
|
||||||
return None
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return "Priority must be an integer"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_region(region: str) -> Optional[str]:
|
|
||||||
"""Validate region format. Returns error message or None."""
|
|
||||||
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
|
|
||||||
return "Region must match format like us-east-1"
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_site_id(site_id: str) -> Optional[str]:
|
|
||||||
"""Validate site_id format. Returns error message or None."""
|
|
||||||
if not site_id or len(site_id) > 63:
|
|
||||||
return "site_id must be 1-63 characters"
|
|
||||||
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
|
|
||||||
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
|
|
||||||
|
|
||||||
|
|
||||||
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
|
||||||
from .s3_api import _require_principal as s3_require_principal
|
|
||||||
return s3_require_principal()
|
|
||||||
|
|
||||||
|
|
||||||
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
|
||||||
principal, error = _require_principal()
|
|
||||||
if error:
|
|
||||||
return None, error
|
|
||||||
|
|
||||||
try:
|
|
||||||
_iam().authorize(principal, None, "iam:*")
|
|
||||||
return principal, None
|
|
||||||
except IamError:
|
|
||||||
return None, _json_error("AccessDenied", "Admin access required", 403)
|
|
||||||
|
|
||||||
|
|
||||||
def _site_registry() -> SiteRegistry:
|
|
||||||
return current_app.extensions["site_registry"]
|
|
||||||
|
|
||||||
|
|
||||||
def _connections() -> ConnectionStore:
|
|
||||||
return current_app.extensions["connections"]
|
|
||||||
|
|
||||||
|
|
||||||
def _replication() -> ReplicationManager:
|
|
||||||
return current_app.extensions["replication"]
|
|
||||||
|
|
||||||
|
|
||||||
def _iam():
|
|
||||||
return current_app.extensions["iam"]
|
|
||||||
|
|
||||||
|
|
||||||
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
|
|
||||||
return {"error": {"code": code, "message": message}}, status
|
|
||||||
|
|
||||||
|
|
||||||
def _get_admin_rate_limit() -> str:
|
|
||||||
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/site", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def get_local_site():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
local_site = registry.get_local_site()
|
|
||||||
|
|
||||||
if local_site:
|
|
||||||
return jsonify(local_site.to_dict())
|
|
||||||
|
|
||||||
config_site_id = current_app.config.get("SITE_ID")
|
|
||||||
config_endpoint = current_app.config.get("SITE_ENDPOINT")
|
|
||||||
|
|
||||||
if config_site_id:
|
|
||||||
return jsonify({
|
|
||||||
"site_id": config_site_id,
|
|
||||||
"endpoint": config_endpoint or "",
|
|
||||||
"region": current_app.config.get("SITE_REGION", "us-east-1"),
|
|
||||||
"priority": current_app.config.get("SITE_PRIORITY", 100),
|
|
||||||
"display_name": config_site_id,
|
|
||||||
"source": "environment",
|
|
||||||
})
|
|
||||||
|
|
||||||
return _json_error("NotFound", "Local site not configured", 404)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/site", methods=["PUT"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def update_local_site():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
payload = request.get_json(silent=True) or {}
|
|
||||||
|
|
||||||
site_id = payload.get("site_id")
|
|
||||||
endpoint = payload.get("endpoint")
|
|
||||||
|
|
||||||
if not site_id:
|
|
||||||
return _json_error("ValidationError", "site_id is required", 400)
|
|
||||||
|
|
||||||
site_id_error = _validate_site_id(site_id)
|
|
||||||
if site_id_error:
|
|
||||||
return _json_error("ValidationError", site_id_error, 400)
|
|
||||||
|
|
||||||
if endpoint:
|
|
||||||
endpoint_error = _validate_endpoint(endpoint)
|
|
||||||
if endpoint_error:
|
|
||||||
return _json_error("ValidationError", endpoint_error, 400)
|
|
||||||
|
|
||||||
if "priority" in payload:
|
|
||||||
priority_error = _validate_priority(payload["priority"])
|
|
||||||
if priority_error:
|
|
||||||
return _json_error("ValidationError", priority_error, 400)
|
|
||||||
|
|
||||||
if "region" in payload:
|
|
||||||
region_error = _validate_region(payload["region"])
|
|
||||||
if region_error:
|
|
||||||
return _json_error("ValidationError", region_error, 400)
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
existing = registry.get_local_site()
|
|
||||||
|
|
||||||
site = SiteInfo(
|
|
||||||
site_id=site_id,
|
|
||||||
endpoint=endpoint or "",
|
|
||||||
region=payload.get("region", "us-east-1"),
|
|
||||||
priority=payload.get("priority", 100),
|
|
||||||
display_name=payload.get("display_name", site_id),
|
|
||||||
created_at=existing.created_at if existing else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
registry.set_local_site(site)
|
|
||||||
|
|
||||||
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
|
||||||
return jsonify(site.to_dict())
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def list_all_sites():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
local = registry.get_local_site()
|
|
||||||
peers = registry.list_peers()
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"local": local.to_dict() if local else None,
|
|
||||||
"peers": [peer.to_dict() for peer in peers],
|
|
||||||
"total_peers": len(peers),
|
|
||||||
}
|
|
||||||
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites", methods=["POST"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def register_peer_site():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
payload = request.get_json(silent=True) or {}
|
|
||||||
|
|
||||||
site_id = payload.get("site_id")
|
|
||||||
endpoint = payload.get("endpoint")
|
|
||||||
|
|
||||||
if not site_id:
|
|
||||||
return _json_error("ValidationError", "site_id is required", 400)
|
|
||||||
|
|
||||||
site_id_error = _validate_site_id(site_id)
|
|
||||||
if site_id_error:
|
|
||||||
return _json_error("ValidationError", site_id_error, 400)
|
|
||||||
|
|
||||||
if not endpoint:
|
|
||||||
return _json_error("ValidationError", "endpoint is required", 400)
|
|
||||||
|
|
||||||
endpoint_error = _validate_endpoint(endpoint)
|
|
||||||
if endpoint_error:
|
|
||||||
return _json_error("ValidationError", endpoint_error, 400)
|
|
||||||
|
|
||||||
region = payload.get("region", "us-east-1")
|
|
||||||
region_error = _validate_region(region)
|
|
||||||
if region_error:
|
|
||||||
return _json_error("ValidationError", region_error, 400)
|
|
||||||
|
|
||||||
priority = payload.get("priority", 100)
|
|
||||||
priority_error = _validate_priority(priority)
|
|
||||||
if priority_error:
|
|
||||||
return _json_error("ValidationError", priority_error, 400)
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
|
|
||||||
if registry.get_peer(site_id):
|
|
||||||
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
|
|
||||||
|
|
||||||
connection_id = payload.get("connection_id")
|
|
||||||
if connection_id:
|
|
||||||
if not _connections().get(connection_id):
|
|
||||||
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
|
|
||||||
|
|
||||||
peer = PeerSite(
|
|
||||||
site_id=site_id,
|
|
||||||
endpoint=endpoint,
|
|
||||||
region=region,
|
|
||||||
priority=int(priority),
|
|
||||||
display_name=payload.get("display_name", site_id),
|
|
||||||
connection_id=connection_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
registry.add_peer(peer)
|
|
||||||
|
|
||||||
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
|
|
||||||
return jsonify(peer.to_dict()), 201
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def get_peer_site(site_id: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
peer = registry.get_peer(site_id)
|
|
||||||
|
|
||||||
if not peer:
|
|
||||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
|
||||||
|
|
||||||
return jsonify(peer.to_dict())
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def update_peer_site(site_id: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
existing = registry.get_peer(site_id)
|
|
||||||
|
|
||||||
if not existing:
|
|
||||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
|
||||||
|
|
||||||
payload = request.get_json(silent=True) or {}
|
|
||||||
|
|
||||||
if "endpoint" in payload:
|
|
||||||
endpoint_error = _validate_endpoint(payload["endpoint"])
|
|
||||||
if endpoint_error:
|
|
||||||
return _json_error("ValidationError", endpoint_error, 400)
|
|
||||||
|
|
||||||
if "priority" in payload:
|
|
||||||
priority_error = _validate_priority(payload["priority"])
|
|
||||||
if priority_error:
|
|
||||||
return _json_error("ValidationError", priority_error, 400)
|
|
||||||
|
|
||||||
if "region" in payload:
|
|
||||||
region_error = _validate_region(payload["region"])
|
|
||||||
if region_error:
|
|
||||||
return _json_error("ValidationError", region_error, 400)
|
|
||||||
|
|
||||||
if "connection_id" in payload:
|
|
||||||
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
|
|
||||||
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
|
|
||||||
|
|
||||||
peer = PeerSite(
|
|
||||||
site_id=site_id,
|
|
||||||
endpoint=payload.get("endpoint", existing.endpoint),
|
|
||||||
region=payload.get("region", existing.region),
|
|
||||||
priority=payload.get("priority", existing.priority),
|
|
||||||
display_name=payload.get("display_name", existing.display_name),
|
|
||||||
connection_id=payload.get("connection_id", existing.connection_id),
|
|
||||||
created_at=existing.created_at,
|
|
||||||
is_healthy=existing.is_healthy,
|
|
||||||
last_health_check=existing.last_health_check,
|
|
||||||
)
|
|
||||||
|
|
||||||
registry.update_peer(peer)
|
|
||||||
|
|
||||||
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
|
||||||
return jsonify(peer.to_dict())
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def delete_peer_site(site_id: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
|
|
||||||
if not registry.delete_peer(site_id):
|
|
||||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
|
||||||
|
|
||||||
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
|
|
||||||
return Response(status=204)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def check_peer_health(site_id: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
peer = registry.get_peer(site_id)
|
|
||||||
|
|
||||||
if not peer:
|
|
||||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
|
||||||
|
|
||||||
is_healthy = False
|
|
||||||
error_message = None
|
|
||||||
|
|
||||||
if peer.connection_id:
|
|
||||||
connection = _connections().get(peer.connection_id)
|
|
||||||
if connection:
|
|
||||||
is_healthy = _replication().check_endpoint_health(connection)
|
|
||||||
else:
|
|
||||||
error_message = f"Connection '{peer.connection_id}' not found"
|
|
||||||
else:
|
|
||||||
error_message = "No connection configured for this peer"
|
|
||||||
|
|
||||||
registry.update_health(site_id, is_healthy)
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"site_id": site_id,
|
|
||||||
"is_healthy": is_healthy,
|
|
||||||
"checked_at": time.time(),
|
|
||||||
}
|
|
||||||
if error_message:
|
|
||||||
result["error"] = error_message
|
|
||||||
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/topology", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def get_topology():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
local = registry.get_local_site()
|
|
||||||
peers = registry.list_peers()
|
|
||||||
|
|
||||||
sites = []
|
|
||||||
|
|
||||||
if local:
|
|
||||||
sites.append({
|
|
||||||
**local.to_dict(),
|
|
||||||
"is_local": True,
|
|
||||||
"is_healthy": True,
|
|
||||||
})
|
|
||||||
|
|
||||||
for peer in peers:
|
|
||||||
sites.append({
|
|
||||||
**peer.to_dict(),
|
|
||||||
"is_local": False,
|
|
||||||
})
|
|
||||||
|
|
||||||
sites.sort(key=lambda s: s.get("priority", 100))
|
|
||||||
|
|
||||||
return jsonify({
|
|
||||||
"sites": sites,
|
|
||||||
"total": len(sites),
|
|
||||||
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def check_bidirectional_status(site_id: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
|
|
||||||
registry = _site_registry()
|
|
||||||
peer = registry.get_peer(site_id)
|
|
||||||
|
|
||||||
if not peer:
|
|
||||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
|
||||||
|
|
||||||
local_site = registry.get_local_site()
|
|
||||||
replication = _replication()
|
|
||||||
local_rules = replication.list_rules()
|
|
||||||
|
|
||||||
local_bidir_rules = []
|
|
||||||
for rule in local_rules:
|
|
||||||
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
|
|
||||||
local_bidir_rules.append({
|
|
||||||
"bucket_name": rule.bucket_name,
|
|
||||||
"target_bucket": rule.target_bucket,
|
|
||||||
"enabled": rule.enabled,
|
|
||||||
})
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"site_id": site_id,
|
|
||||||
"local_site_id": local_site.site_id if local_site else None,
|
|
||||||
"local_endpoint": local_site.endpoint if local_site else None,
|
|
||||||
"local_bidirectional_rules": local_bidir_rules,
|
|
||||||
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
|
|
||||||
"remote_status": None,
|
|
||||||
"issues": [],
|
|
||||||
"is_fully_configured": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
if not local_site or not local_site.site_id:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "NO_LOCAL_SITE_ID",
|
|
||||||
"message": "Local site identity not configured",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
|
|
||||||
if not local_site or not local_site.endpoint:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "NO_LOCAL_ENDPOINT",
|
|
||||||
"message": "Local site endpoint not configured (remote site cannot reach back)",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
|
|
||||||
if not peer.connection_id:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "NO_CONNECTION",
|
|
||||||
"message": "No connection configured for this peer",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
connection = _connections().get(peer.connection_id)
|
|
||||||
if not connection:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "CONNECTION_NOT_FOUND",
|
|
||||||
"message": f"Connection '{peer.connection_id}' not found",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
if not local_bidir_rules:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
|
|
||||||
"message": "No bidirectional replication rules configured on this site",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
|
|
||||||
if not result["local_site_sync_enabled"]:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "SITE_SYNC_DISABLED",
|
|
||||||
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
|
|
||||||
if not replication.check_endpoint_health(connection):
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_UNREACHABLE",
|
|
||||||
"message": "Remote endpoint is not reachable",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
|
|
||||||
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "ENDPOINT_NOT_ALLOWED",
|
|
||||||
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
try:
|
|
||||||
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
|
|
||||||
resp = requests.get(
|
|
||||||
admin_url,
|
|
||||||
timeout=10,
|
|
||||||
headers={
|
|
||||||
"Accept": "application/json",
|
|
||||||
"X-Access-Key": connection.access_key,
|
|
||||||
"X-Secret-Key": connection.secret_key,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if resp.status_code == 200:
|
|
||||||
try:
|
|
||||||
remote_data = resp.json()
|
|
||||||
if not isinstance(remote_data, dict):
|
|
||||||
raise ValueError("Expected JSON object")
|
|
||||||
remote_local = remote_data.get("local")
|
|
||||||
if remote_local is not None and not isinstance(remote_local, dict):
|
|
||||||
raise ValueError("Expected 'local' to be an object")
|
|
||||||
remote_peers = remote_data.get("peers", [])
|
|
||||||
if not isinstance(remote_peers, list):
|
|
||||||
raise ValueError("Expected 'peers' to be a list")
|
|
||||||
except (ValueError, json.JSONDecodeError) as e:
|
|
||||||
logger.warning("Invalid JSON from remote admin API: %s", e)
|
|
||||||
result["remote_status"] = {"reachable": True, "invalid_response": True}
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_INVALID_RESPONSE",
|
|
||||||
"message": "Remote admin API returned invalid JSON",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
result["remote_status"] = {
|
|
||||||
"reachable": True,
|
|
||||||
"local_site": remote_local,
|
|
||||||
"site_sync_enabled": None,
|
|
||||||
"has_peer_for_us": False,
|
|
||||||
"peer_connection_configured": False,
|
|
||||||
"has_bidirectional_rules_for_us": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
for rp in remote_peers:
|
|
||||||
if not isinstance(rp, dict):
|
|
||||||
continue
|
|
||||||
if local_site and (
|
|
||||||
rp.get("site_id") == local_site.site_id or
|
|
||||||
rp.get("endpoint") == local_site.endpoint
|
|
||||||
):
|
|
||||||
result["remote_status"]["has_peer_for_us"] = True
|
|
||||||
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
|
|
||||||
break
|
|
||||||
|
|
||||||
if not result["remote_status"]["has_peer_for_us"]:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_NO_PEER_FOR_US",
|
|
||||||
"message": "Remote site does not have this site registered as a peer",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
elif not result["remote_status"]["peer_connection_configured"]:
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_NO_CONNECTION_FOR_US",
|
|
||||||
"message": "Remote site has us as peer but no connection configured (cannot push back)",
|
|
||||||
"severity": "error",
|
|
||||||
})
|
|
||||||
elif resp.status_code == 401 or resp.status_code == 403:
|
|
||||||
result["remote_status"] = {
|
|
||||||
"reachable": True,
|
|
||||||
"admin_access_denied": True,
|
|
||||||
}
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_ADMIN_ACCESS_DENIED",
|
|
||||||
"message": "Cannot verify remote configuration (admin access denied)",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
result["remote_status"] = {
|
|
||||||
"reachable": True,
|
|
||||||
"admin_api_error": resp.status_code,
|
|
||||||
}
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_ADMIN_API_ERROR",
|
|
||||||
"message": f"Remote admin API returned status {resp.status_code}",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
except requests.RequestException as e:
|
|
||||||
logger.warning("Remote admin API unreachable: %s", e)
|
|
||||||
result["remote_status"] = {
|
|
||||||
"reachable": False,
|
|
||||||
"error": "Connection failed",
|
|
||||||
}
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "REMOTE_ADMIN_UNREACHABLE",
|
|
||||||
"message": "Could not reach remote admin API",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
|
|
||||||
result["issues"].append({
|
|
||||||
"code": "VERIFICATION_ERROR",
|
|
||||||
"message": "Internal error during verification",
|
|
||||||
"severity": "warning",
|
|
||||||
})
|
|
||||||
|
|
||||||
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
|
|
||||||
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
|
|
||||||
|
|
||||||
return jsonify(result)
|
|
||||||
|
|
||||||
|
|
||||||
def _website_domains() -> WebsiteDomainStore:
|
|
||||||
return current_app.extensions["website_domains"]
|
|
||||||
|
|
||||||
|
|
||||||
def _storage():
|
|
||||||
return current_app.extensions["object_storage"]
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def list_website_domains():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
|
||||||
return jsonify(_website_domains().list_all())
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains", methods=["POST"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def create_website_domain():
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
|
||||||
payload = request.get_json(silent=True) or {}
|
|
||||||
domain = normalize_domain(payload.get("domain") or "")
|
|
||||||
bucket = (payload.get("bucket") or "").strip()
|
|
||||||
if not domain:
|
|
||||||
return _json_error("ValidationError", "domain is required", 400)
|
|
||||||
if not is_valid_domain(domain):
|
|
||||||
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
|
|
||||||
if not bucket:
|
|
||||||
return _json_error("ValidationError", "bucket is required", 400)
|
|
||||||
storage = _storage()
|
|
||||||
if not storage.bucket_exists(bucket):
|
|
||||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
|
||||||
store = _website_domains()
|
|
||||||
existing = store.get_bucket(domain)
|
|
||||||
if existing:
|
|
||||||
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
|
|
||||||
store.set_mapping(domain, bucket)
|
|
||||||
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
|
|
||||||
return jsonify({"domain": domain, "bucket": bucket}), 201
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def get_website_domain(domain: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
|
||||||
domain = normalize_domain(domain)
|
|
||||||
bucket = _website_domains().get_bucket(domain)
|
|
||||||
if not bucket:
|
|
||||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
|
||||||
return jsonify({"domain": domain, "bucket": bucket})
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def update_website_domain(domain: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
|
||||||
domain = normalize_domain(domain)
|
|
||||||
payload = request.get_json(silent=True) or {}
|
|
||||||
bucket = (payload.get("bucket") or "").strip()
|
|
||||||
if not bucket:
|
|
||||||
return _json_error("ValidationError", "bucket is required", 400)
|
|
||||||
storage = _storage()
|
|
||||||
if not storage.bucket_exists(bucket):
|
|
||||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
|
||||||
store = _website_domains()
|
|
||||||
if not store.get_bucket(domain):
|
|
||||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
|
||||||
store.set_mapping(domain, bucket)
|
|
||||||
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
|
|
||||||
return jsonify({"domain": domain, "bucket": bucket})
|
|
||||||
|
|
||||||
|
|
||||||
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
|
|
||||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
|
||||||
def delete_website_domain(domain: str):
|
|
||||||
principal, error = _require_admin()
|
|
||||||
if error:
|
|
||||||
return error
|
|
||||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
|
||||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
|
||||||
domain = normalize_domain(domain)
|
|
||||||
if not _website_domains().delete_mapping(domain):
|
|
||||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
|
||||||
logger.info("Website domain mapping deleted: %s", domain)
|
|
||||||
return Response(status=204)
|
|
||||||
@@ -1,88 +1,24 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import ipaddress
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from fnmatch import fnmatch, translate
|
from fnmatch import fnmatch, translate
|
||||||
from functools import lru_cache
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
|
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
|
||||||
|
|
||||||
|
|
||||||
RESOURCE_PREFIX = "arn:aws:s3:::"
|
RESOURCE_PREFIX = "arn:aws:s3:::"
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=256)
|
|
||||||
def _compile_pattern(pattern: str) -> Pattern[str]:
|
|
||||||
return re.compile(translate(pattern), re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
def _match_string_like(value: str, pattern: str) -> bool:
|
|
||||||
compiled = _compile_pattern(pattern)
|
|
||||||
return bool(compiled.match(value))
|
|
||||||
|
|
||||||
|
|
||||||
def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
|
|
||||||
try:
|
|
||||||
ip = ipaddress.ip_address(ip_str)
|
|
||||||
network = ipaddress.ip_network(cidr, strict=False)
|
|
||||||
return ip in network
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _evaluate_condition_operator(
|
|
||||||
operator: str,
|
|
||||||
condition_key: str,
|
|
||||||
condition_values: List[str],
|
|
||||||
context: Dict[str, Any],
|
|
||||||
) -> bool:
|
|
||||||
context_value = context.get(condition_key)
|
|
||||||
op_lower = operator.lower()
|
|
||||||
if_exists = op_lower.endswith("ifexists")
|
|
||||||
if if_exists:
|
|
||||||
op_lower = op_lower[:-8]
|
|
||||||
|
|
||||||
if context_value is None:
|
|
||||||
return if_exists
|
|
||||||
|
|
||||||
context_value_str = str(context_value)
|
|
||||||
context_value_lower = context_value_str.lower()
|
|
||||||
|
|
||||||
if op_lower == "stringequals":
|
|
||||||
return context_value_str in condition_values
|
|
||||||
elif op_lower == "stringnotequals":
|
|
||||||
return context_value_str not in condition_values
|
|
||||||
elif op_lower == "stringequalsignorecase":
|
|
||||||
return context_value_lower in [v.lower() for v in condition_values]
|
|
||||||
elif op_lower == "stringnotequalsignorecase":
|
|
||||||
return context_value_lower not in [v.lower() for v in condition_values]
|
|
||||||
elif op_lower == "stringlike":
|
|
||||||
return any(_match_string_like(context_value_str, p) for p in condition_values)
|
|
||||||
elif op_lower == "stringnotlike":
|
|
||||||
return not any(_match_string_like(context_value_str, p) for p in condition_values)
|
|
||||||
elif op_lower == "ipaddress":
|
|
||||||
return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
|
||||||
elif op_lower == "notipaddress":
|
|
||||||
return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
|
||||||
elif op_lower == "bool":
|
|
||||||
bool_val = context_value_lower in ("true", "1", "yes")
|
|
||||||
return str(bool_val).lower() in [v.lower() for v in condition_values]
|
|
||||||
elif op_lower == "null":
|
|
||||||
is_null = context_value is None or context_value == ""
|
|
||||||
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
|
||||||
return is_null == expected_null
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
ACTION_ALIASES = {
|
ACTION_ALIASES = {
|
||||||
|
# List actions
|
||||||
"s3:listbucket": "list",
|
"s3:listbucket": "list",
|
||||||
"s3:listallmybuckets": "list",
|
"s3:listallmybuckets": "list",
|
||||||
"s3:listbucketversions": "list",
|
"s3:listbucketversions": "list",
|
||||||
"s3:listmultipartuploads": "list",
|
"s3:listmultipartuploads": "list",
|
||||||
"s3:listparts": "list",
|
"s3:listparts": "list",
|
||||||
|
# Read actions
|
||||||
"s3:getobject": "read",
|
"s3:getobject": "read",
|
||||||
"s3:getobjectversion": "read",
|
"s3:getobjectversion": "read",
|
||||||
"s3:getobjecttagging": "read",
|
"s3:getobjecttagging": "read",
|
||||||
@@ -91,6 +27,7 @@ ACTION_ALIASES = {
|
|||||||
"s3:getbucketversioning": "read",
|
"s3:getbucketversioning": "read",
|
||||||
"s3:headobject": "read",
|
"s3:headobject": "read",
|
||||||
"s3:headbucket": "read",
|
"s3:headbucket": "read",
|
||||||
|
# Write actions
|
||||||
"s3:putobject": "write",
|
"s3:putobject": "write",
|
||||||
"s3:createbucket": "write",
|
"s3:createbucket": "write",
|
||||||
"s3:putobjecttagging": "write",
|
"s3:putobjecttagging": "write",
|
||||||
@@ -100,30 +37,26 @@ ACTION_ALIASES = {
|
|||||||
"s3:completemultipartupload": "write",
|
"s3:completemultipartupload": "write",
|
||||||
"s3:abortmultipartupload": "write",
|
"s3:abortmultipartupload": "write",
|
||||||
"s3:copyobject": "write",
|
"s3:copyobject": "write",
|
||||||
|
# Delete actions
|
||||||
"s3:deleteobject": "delete",
|
"s3:deleteobject": "delete",
|
||||||
"s3:deleteobjectversion": "delete",
|
"s3:deleteobjectversion": "delete",
|
||||||
"s3:deletebucket": "delete",
|
"s3:deletebucket": "delete",
|
||||||
"s3:deleteobjecttagging": "delete",
|
"s3:deleteobjecttagging": "delete",
|
||||||
|
# Share actions (ACL)
|
||||||
"s3:putobjectacl": "share",
|
"s3:putobjectacl": "share",
|
||||||
"s3:putbucketacl": "share",
|
"s3:putbucketacl": "share",
|
||||||
"s3:getbucketacl": "share",
|
"s3:getbucketacl": "share",
|
||||||
|
# Policy actions
|
||||||
"s3:putbucketpolicy": "policy",
|
"s3:putbucketpolicy": "policy",
|
||||||
"s3:getbucketpolicy": "policy",
|
"s3:getbucketpolicy": "policy",
|
||||||
"s3:deletebucketpolicy": "policy",
|
"s3:deletebucketpolicy": "policy",
|
||||||
|
# Replication actions
|
||||||
"s3:getreplicationconfiguration": "replication",
|
"s3:getreplicationconfiguration": "replication",
|
||||||
"s3:putreplicationconfiguration": "replication",
|
"s3:putreplicationconfiguration": "replication",
|
||||||
"s3:deletereplicationconfiguration": "replication",
|
"s3:deletereplicationconfiguration": "replication",
|
||||||
"s3:replicateobject": "replication",
|
"s3:replicateobject": "replication",
|
||||||
"s3:replicatetags": "replication",
|
"s3:replicatetags": "replication",
|
||||||
"s3:replicatedelete": "replication",
|
"s3:replicatedelete": "replication",
|
||||||
"s3:getlifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:putlifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:deletelifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:getbucketlifecycle": "lifecycle",
|
|
||||||
"s3:putbucketlifecycle": "lifecycle",
|
|
||||||
"s3:getbucketcors": "cors",
|
|
||||||
"s3:putbucketcors": "cors",
|
|
||||||
"s3:deletebucketcors": "cors",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -202,16 +135,18 @@ class BucketPolicyStatement:
|
|||||||
principals: List[str] | str
|
principals: List[str] | str
|
||||||
actions: List[str]
|
actions: List[str]
|
||||||
resources: List[Tuple[str | None, str | None]]
|
resources: List[Tuple[str | None, str | None]]
|
||||||
conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
|
# Performance: Pre-compiled regex patterns for resource matching
|
||||||
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
|
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
|
||||||
|
|
||||||
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
|
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
|
||||||
|
"""Lazily compile fnmatch patterns to regex for faster matching."""
|
||||||
if self._compiled_patterns is None:
|
if self._compiled_patterns is None:
|
||||||
self._compiled_patterns = []
|
self._compiled_patterns = []
|
||||||
for resource_bucket, key_pattern in self.resources:
|
for resource_bucket, key_pattern in self.resources:
|
||||||
if key_pattern is None:
|
if key_pattern is None:
|
||||||
self._compiled_patterns.append((resource_bucket, None))
|
self._compiled_patterns.append((resource_bucket, None))
|
||||||
else:
|
else:
|
||||||
|
# Convert fnmatch pattern to regex
|
||||||
regex_pattern = translate(key_pattern)
|
regex_pattern = translate(key_pattern)
|
||||||
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
|
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
|
||||||
return self._compiled_patterns
|
return self._compiled_patterns
|
||||||
@@ -238,21 +173,11 @@ class BucketPolicyStatement:
|
|||||||
if not key:
|
if not key:
|
||||||
return True
|
return True
|
||||||
continue
|
continue
|
||||||
|
# Performance: Use pre-compiled regex instead of fnmatch
|
||||||
if compiled_pattern.match(key):
|
if compiled_pattern.match(key):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
|
|
||||||
if not self.conditions:
|
|
||||||
return True
|
|
||||||
if context is None:
|
|
||||||
context = {}
|
|
||||||
for operator, key_values in self.conditions.items():
|
|
||||||
for condition_key, condition_values in key_values.items():
|
|
||||||
if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class BucketPolicyStore:
|
class BucketPolicyStore:
|
||||||
"""Loads bucket policies from disk and evaluates statements."""
|
"""Loads bucket policies from disk and evaluates statements."""
|
||||||
@@ -294,7 +219,6 @@ class BucketPolicyStore:
|
|||||||
bucket: Optional[str],
|
bucket: Optional[str],
|
||||||
object_key: Optional[str],
|
object_key: Optional[str],
|
||||||
action: str,
|
action: str,
|
||||||
context: Optional[Dict[str, Any]] = None,
|
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
bucket = (bucket or "").lower()
|
bucket = (bucket or "").lower()
|
||||||
statements = self._policies.get(bucket) or []
|
statements = self._policies.get(bucket) or []
|
||||||
@@ -306,8 +230,6 @@ class BucketPolicyStore:
|
|||||||
continue
|
continue
|
||||||
if not statement.matches_resource(bucket, object_key):
|
if not statement.matches_resource(bucket, object_key):
|
||||||
continue
|
continue
|
||||||
if not statement.matches_condition(context):
|
|
||||||
continue
|
|
||||||
if statement.effect == "deny":
|
if statement.effect == "deny":
|
||||||
return "deny"
|
return "deny"
|
||||||
decision = "allow"
|
decision = "allow"
|
||||||
@@ -372,7 +294,6 @@ class BucketPolicyStore:
|
|||||||
if not resources:
|
if not resources:
|
||||||
continue
|
continue
|
||||||
effect = statement.get("Effect", "Allow").lower()
|
effect = statement.get("Effect", "Allow").lower()
|
||||||
conditions = self._normalize_conditions(statement.get("Condition", {}))
|
|
||||||
statements.append(
|
statements.append(
|
||||||
BucketPolicyStatement(
|
BucketPolicyStatement(
|
||||||
sid=statement.get("Sid"),
|
sid=statement.get("Sid"),
|
||||||
@@ -380,24 +301,6 @@ class BucketPolicyStore:
|
|||||||
principals=principals,
|
principals=principals,
|
||||||
actions=actions or ["*"],
|
actions=actions or ["*"],
|
||||||
resources=resources,
|
resources=resources,
|
||||||
conditions=conditions,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return statements
|
return statements
|
||||||
|
|
||||||
def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
|
|
||||||
if not condition_block or not isinstance(condition_block, dict):
|
|
||||||
return {}
|
|
||||||
normalized: Dict[str, Dict[str, List[str]]] = {}
|
|
||||||
for operator, key_values in condition_block.items():
|
|
||||||
if not isinstance(key_values, dict):
|
|
||||||
continue
|
|
||||||
normalized[operator] = {}
|
|
||||||
for cond_key, cond_values in key_values.items():
|
|
||||||
if isinstance(cond_values, str):
|
|
||||||
normalized[operator][cond_key] = [cond_values]
|
|
||||||
elif isinstance(cond_values, list):
|
|
||||||
normalized[operator][cond_key] = [str(v) for v in cond_values]
|
|
||||||
else:
|
|
||||||
normalized[operator][cond_key] = [str(cond_values)]
|
|
||||||
return normalized
|
|
||||||
@@ -36,11 +36,10 @@ class GzipMiddleware:
|
|||||||
content_type = None
|
content_type = None
|
||||||
content_length = None
|
content_length = None
|
||||||
should_compress = False
|
should_compress = False
|
||||||
passthrough = False
|
|
||||||
exc_info_holder = [None]
|
exc_info_holder = [None]
|
||||||
|
|
||||||
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
||||||
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
|
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress
|
||||||
response_started = True
|
response_started = True
|
||||||
status_code = int(status.split(' ', 1)[0])
|
status_code = int(status.split(' ', 1)[0])
|
||||||
response_headers = list(headers)
|
response_headers = list(headers)
|
||||||
@@ -51,32 +50,18 @@ class GzipMiddleware:
|
|||||||
if name_lower == 'content-type':
|
if name_lower == 'content-type':
|
||||||
content_type = value.split(';')[0].strip().lower()
|
content_type = value.split(';')[0].strip().lower()
|
||||||
elif name_lower == 'content-length':
|
elif name_lower == 'content-length':
|
||||||
try:
|
|
||||||
content_length = int(value)
|
content_length = int(value)
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
elif name_lower == 'content-encoding':
|
elif name_lower == 'content-encoding':
|
||||||
passthrough = True
|
should_compress = False
|
||||||
return start_response(status, headers, exc_info)
|
|
||||||
elif name_lower == 'x-stream-response':
|
|
||||||
passthrough = True
|
|
||||||
return start_response(status, headers, exc_info)
|
return start_response(status, headers, exc_info)
|
||||||
|
|
||||||
if content_type and content_type in COMPRESSIBLE_MIMES:
|
if content_type and content_type in COMPRESSIBLE_MIMES:
|
||||||
if content_length is None or content_length >= self.min_size:
|
if content_length is None or content_length >= self.min_size:
|
||||||
should_compress = True
|
should_compress = True
|
||||||
else:
|
|
||||||
passthrough = True
|
|
||||||
return start_response(status, headers, exc_info)
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
app_iter = self.app(environ, custom_start_response)
|
response_body = b''.join(self.app(environ, custom_start_response))
|
||||||
|
|
||||||
if passthrough:
|
|
||||||
return app_iter
|
|
||||||
|
|
||||||
response_body = b''.join(app_iter)
|
|
||||||
|
|
||||||
if not response_started:
|
if not response_started:
|
||||||
return [response_body]
|
return [response_body]
|
||||||
|
|||||||
284
app/config.py
284
app/config.py
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import secrets
|
import secrets
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
@@ -10,30 +9,6 @@ from dataclasses import dataclass
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
|
|
||||||
def _calculate_auto_threads() -> int:
|
|
||||||
cpu_count = psutil.cpu_count(logical=True) or 4
|
|
||||||
return max(1, min(cpu_count * 2, 64))
|
|
||||||
|
|
||||||
|
|
||||||
def _calculate_auto_connection_limit() -> int:
|
|
||||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
|
||||||
calculated = int(available_mb / 5)
|
|
||||||
return max(20, min(calculated, 1000))
|
|
||||||
|
|
||||||
|
|
||||||
def _calculate_auto_backlog(connection_limit: int) -> int:
|
|
||||||
return max(64, min(connection_limit * 2, 4096))
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_rate_limit(value: str) -> str:
|
|
||||||
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
|
|
||||||
if not re.match(pattern, value):
|
|
||||||
raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'")
|
|
||||||
return value
|
|
||||||
|
|
||||||
if getattr(sys, "frozen", False):
|
if getattr(sys, "frozen", False):
|
||||||
# Running in a PyInstaller bundle
|
# Running in a PyInstaller bundle
|
||||||
PROJECT_ROOT = Path(sys._MEIPASS)
|
PROJECT_ROOT = Path(sys._MEIPASS)
|
||||||
@@ -80,10 +55,6 @@ class AppConfig:
|
|||||||
log_backup_count: int
|
log_backup_count: int
|
||||||
ratelimit_default: str
|
ratelimit_default: str
|
||||||
ratelimit_storage_uri: str
|
ratelimit_storage_uri: str
|
||||||
ratelimit_list_buckets: str
|
|
||||||
ratelimit_bucket_ops: str
|
|
||||||
ratelimit_object_ops: str
|
|
||||||
ratelimit_head_ops: str
|
|
||||||
cors_origins: list[str]
|
cors_origins: list[str]
|
||||||
cors_methods: list[str]
|
cors_methods: list[str]
|
||||||
cors_allow_headers: list[str]
|
cors_allow_headers: list[str]
|
||||||
@@ -105,51 +76,6 @@ class AppConfig:
|
|||||||
display_timezone: str
|
display_timezone: str
|
||||||
lifecycle_enabled: bool
|
lifecycle_enabled: bool
|
||||||
lifecycle_interval_seconds: int
|
lifecycle_interval_seconds: int
|
||||||
metrics_history_enabled: bool
|
|
||||||
metrics_history_retention_hours: int
|
|
||||||
metrics_history_interval_minutes: int
|
|
||||||
operation_metrics_enabled: bool
|
|
||||||
operation_metrics_interval_minutes: int
|
|
||||||
operation_metrics_retention_hours: int
|
|
||||||
server_threads: int
|
|
||||||
server_connection_limit: int
|
|
||||||
server_backlog: int
|
|
||||||
server_channel_timeout: int
|
|
||||||
server_threads_auto: bool
|
|
||||||
server_connection_limit_auto: bool
|
|
||||||
server_backlog_auto: bool
|
|
||||||
site_sync_enabled: bool
|
|
||||||
site_sync_interval_seconds: int
|
|
||||||
site_sync_batch_size: int
|
|
||||||
sigv4_timestamp_tolerance_seconds: int
|
|
||||||
presigned_url_min_expiry_seconds: int
|
|
||||||
presigned_url_max_expiry_seconds: int
|
|
||||||
replication_connect_timeout_seconds: int
|
|
||||||
replication_read_timeout_seconds: int
|
|
||||||
replication_max_retries: int
|
|
||||||
replication_streaming_threshold_bytes: int
|
|
||||||
replication_max_failures_per_bucket: int
|
|
||||||
site_sync_connect_timeout_seconds: int
|
|
||||||
site_sync_read_timeout_seconds: int
|
|
||||||
site_sync_max_retries: int
|
|
||||||
site_sync_clock_skew_tolerance_seconds: float
|
|
||||||
object_key_max_length_bytes: int
|
|
||||||
object_cache_max_size: int
|
|
||||||
bucket_config_cache_ttl_seconds: float
|
|
||||||
object_tag_limit: int
|
|
||||||
encryption_chunk_size_bytes: int
|
|
||||||
kms_generate_data_key_min_bytes: int
|
|
||||||
kms_generate_data_key_max_bytes: int
|
|
||||||
lifecycle_max_history_per_bucket: int
|
|
||||||
site_id: Optional[str]
|
|
||||||
site_endpoint: Optional[str]
|
|
||||||
site_region: str
|
|
||||||
site_priority: int
|
|
||||||
ratelimit_admin: str
|
|
||||||
num_trusted_proxies: int
|
|
||||||
allowed_redirect_hosts: list[str]
|
|
||||||
allow_internal_endpoints: bool
|
|
||||||
website_hosting_enabled: bool
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||||
@@ -222,12 +148,8 @@ class AppConfig:
|
|||||||
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
|
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
|
||||||
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
|
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
|
||||||
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
|
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
|
||||||
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
|
ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))
|
||||||
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
|
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
|
||||||
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
|
|
||||||
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
|
|
||||||
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
|
|
||||||
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
|
|
||||||
|
|
||||||
def _csv(value: str, default: list[str]) -> list[str]:
|
def _csv(value: str, default: list[str]) -> list[str]:
|
||||||
if not value:
|
if not value:
|
||||||
@@ -250,75 +172,6 @@ class AppConfig:
|
|||||||
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
|
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
|
||||||
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
|
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
|
||||||
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
|
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
|
||||||
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
|
||||||
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
|
|
||||||
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
|
|
||||||
operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
|
||||||
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
|
|
||||||
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
|
|
||||||
|
|
||||||
_raw_threads = int(_get("SERVER_THREADS", 0))
|
|
||||||
if _raw_threads == 0:
|
|
||||||
server_threads = _calculate_auto_threads()
|
|
||||||
server_threads_auto = True
|
|
||||||
else:
|
|
||||||
server_threads = _raw_threads
|
|
||||||
server_threads_auto = False
|
|
||||||
|
|
||||||
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
|
|
||||||
if _raw_conn_limit == 0:
|
|
||||||
server_connection_limit = _calculate_auto_connection_limit()
|
|
||||||
server_connection_limit_auto = True
|
|
||||||
else:
|
|
||||||
server_connection_limit = _raw_conn_limit
|
|
||||||
server_connection_limit_auto = False
|
|
||||||
|
|
||||||
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
|
|
||||||
if _raw_backlog == 0:
|
|
||||||
server_backlog = _calculate_auto_backlog(server_connection_limit)
|
|
||||||
server_backlog_auto = True
|
|
||||||
else:
|
|
||||||
server_backlog = _raw_backlog
|
|
||||||
server_backlog_auto = False
|
|
||||||
|
|
||||||
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
|
||||||
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
|
||||||
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
|
||||||
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
|
||||||
|
|
||||||
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
|
|
||||||
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
|
|
||||||
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
|
|
||||||
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
|
|
||||||
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
|
|
||||||
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
|
|
||||||
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
|
|
||||||
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
|
|
||||||
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
|
|
||||||
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
|
|
||||||
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
|
|
||||||
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
|
|
||||||
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
|
|
||||||
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
|
|
||||||
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
|
|
||||||
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
|
|
||||||
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
|
|
||||||
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
|
|
||||||
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
|
|
||||||
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
|
|
||||||
|
|
||||||
site_id_raw = _get("SITE_ID", None)
|
|
||||||
site_id = str(site_id_raw).strip() if site_id_raw else None
|
|
||||||
site_endpoint_raw = _get("SITE_ENDPOINT", None)
|
|
||||||
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
|
|
||||||
site_region = str(_get("SITE_REGION", "us-east-1"))
|
|
||||||
site_priority = int(_get("SITE_PRIORITY", 100))
|
|
||||||
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
|
||||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
|
|
||||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
|
||||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
|
||||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
|
||||||
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
|
||||||
|
|
||||||
return cls(storage_root=storage_root,
|
return cls(storage_root=storage_root,
|
||||||
max_upload_size=max_upload_size,
|
max_upload_size=max_upload_size,
|
||||||
@@ -337,10 +190,6 @@ class AppConfig:
|
|||||||
log_backup_count=log_backup_count,
|
log_backup_count=log_backup_count,
|
||||||
ratelimit_default=ratelimit_default,
|
ratelimit_default=ratelimit_default,
|
||||||
ratelimit_storage_uri=ratelimit_storage_uri,
|
ratelimit_storage_uri=ratelimit_storage_uri,
|
||||||
ratelimit_list_buckets=ratelimit_list_buckets,
|
|
||||||
ratelimit_bucket_ops=ratelimit_bucket_ops,
|
|
||||||
ratelimit_object_ops=ratelimit_object_ops,
|
|
||||||
ratelimit_head_ops=ratelimit_head_ops,
|
|
||||||
cors_origins=cors_origins,
|
cors_origins=cors_origins,
|
||||||
cors_methods=cors_methods,
|
cors_methods=cors_methods,
|
||||||
cors_allow_headers=cors_allow_headers,
|
cors_allow_headers=cors_allow_headers,
|
||||||
@@ -361,52 +210,7 @@ class AppConfig:
|
|||||||
default_encryption_algorithm=default_encryption_algorithm,
|
default_encryption_algorithm=default_encryption_algorithm,
|
||||||
display_timezone=display_timezone,
|
display_timezone=display_timezone,
|
||||||
lifecycle_enabled=lifecycle_enabled,
|
lifecycle_enabled=lifecycle_enabled,
|
||||||
lifecycle_interval_seconds=lifecycle_interval_seconds,
|
lifecycle_interval_seconds=lifecycle_interval_seconds)
|
||||||
metrics_history_enabled=metrics_history_enabled,
|
|
||||||
metrics_history_retention_hours=metrics_history_retention_hours,
|
|
||||||
metrics_history_interval_minutes=metrics_history_interval_minutes,
|
|
||||||
operation_metrics_enabled=operation_metrics_enabled,
|
|
||||||
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
|
|
||||||
operation_metrics_retention_hours=operation_metrics_retention_hours,
|
|
||||||
server_threads=server_threads,
|
|
||||||
server_connection_limit=server_connection_limit,
|
|
||||||
server_backlog=server_backlog,
|
|
||||||
server_channel_timeout=server_channel_timeout,
|
|
||||||
server_threads_auto=server_threads_auto,
|
|
||||||
server_connection_limit_auto=server_connection_limit_auto,
|
|
||||||
server_backlog_auto=server_backlog_auto,
|
|
||||||
site_sync_enabled=site_sync_enabled,
|
|
||||||
site_sync_interval_seconds=site_sync_interval_seconds,
|
|
||||||
site_sync_batch_size=site_sync_batch_size,
|
|
||||||
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
|
|
||||||
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
|
|
||||||
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
|
|
||||||
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
|
|
||||||
replication_read_timeout_seconds=replication_read_timeout_seconds,
|
|
||||||
replication_max_retries=replication_max_retries,
|
|
||||||
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
|
|
||||||
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
|
|
||||||
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
|
|
||||||
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
|
|
||||||
site_sync_max_retries=site_sync_max_retries,
|
|
||||||
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
|
|
||||||
object_key_max_length_bytes=object_key_max_length_bytes,
|
|
||||||
object_cache_max_size=object_cache_max_size,
|
|
||||||
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
|
|
||||||
object_tag_limit=object_tag_limit,
|
|
||||||
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
|
|
||||||
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
|
|
||||||
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
|
|
||||||
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
|
|
||||||
site_id=site_id,
|
|
||||||
site_endpoint=site_endpoint,
|
|
||||||
site_region=site_region,
|
|
||||||
site_priority=site_priority,
|
|
||||||
ratelimit_admin=ratelimit_admin,
|
|
||||||
num_trusted_proxies=num_trusted_proxies,
|
|
||||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
|
||||||
allow_internal_endpoints=allow_internal_endpoints,
|
|
||||||
website_hosting_enabled=website_hosting_enabled)
|
|
||||||
|
|
||||||
def validate_and_report(self) -> list[str]:
|
def validate_and_report(self) -> list[str]:
|
||||||
"""Validate configuration and return a list of warnings/issues.
|
"""Validate configuration and return a list of warnings/issues.
|
||||||
@@ -467,34 +271,6 @@ class AppConfig:
|
|||||||
if "*" in self.cors_origins:
|
if "*" in self.cors_origins:
|
||||||
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
|
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
|
||||||
|
|
||||||
if not (1 <= self.server_threads <= 64):
|
|
||||||
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
|
||||||
if not (10 <= self.server_connection_limit <= 1000):
|
|
||||||
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
|
||||||
if not (64 <= self.server_backlog <= 4096):
|
|
||||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
|
|
||||||
if not (10 <= self.server_channel_timeout <= 300):
|
|
||||||
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
|
||||||
|
|
||||||
if sys.platform != "win32":
|
|
||||||
try:
|
|
||||||
import resource
|
|
||||||
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
||||||
threshold = int(soft_limit * 0.8)
|
|
||||||
if self.server_connection_limit > threshold:
|
|
||||||
issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
|
|
||||||
except (ImportError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
import psutil
|
|
||||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
|
||||||
estimated_mb = self.server_threads * 50
|
|
||||||
if estimated_mb > available_mb * 0.5:
|
|
||||||
issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return issues
|
return issues
|
||||||
|
|
||||||
def print_startup_summary(self) -> None:
|
def print_startup_summary(self) -> None:
|
||||||
@@ -512,14 +288,6 @@ class AppConfig:
|
|||||||
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
||||||
if self.kms_enabled:
|
if self.kms_enabled:
|
||||||
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
||||||
if self.website_hosting_enabled:
|
|
||||||
print(f" WEBSITE_HOSTING: Enabled")
|
|
||||||
def _auto(flag: bool) -> str:
|
|
||||||
return " (auto)" if flag else ""
|
|
||||||
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
|
|
||||||
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
|
||||||
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
|
||||||
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
issues = self.validate_and_report()
|
issues = self.validate_and_report()
|
||||||
@@ -558,10 +326,6 @@ class AppConfig:
|
|||||||
"LOG_BACKUP_COUNT": self.log_backup_count,
|
"LOG_BACKUP_COUNT": self.log_backup_count,
|
||||||
"RATELIMIT_DEFAULT": self.ratelimit_default,
|
"RATELIMIT_DEFAULT": self.ratelimit_default,
|
||||||
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
|
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
|
||||||
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
|
|
||||||
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
|
|
||||||
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
|
|
||||||
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
|
|
||||||
"CORS_ORIGINS": self.cors_origins,
|
"CORS_ORIGINS": self.cors_origins,
|
||||||
"CORS_METHODS": self.cors_methods,
|
"CORS_METHODS": self.cors_methods,
|
||||||
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
|
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
|
||||||
@@ -573,48 +337,4 @@ class AppConfig:
|
|||||||
"KMS_KEYS_PATH": str(self.kms_keys_path),
|
"KMS_KEYS_PATH": str(self.kms_keys_path),
|
||||||
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
|
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
|
||||||
"DISPLAY_TIMEZONE": self.display_timezone,
|
"DISPLAY_TIMEZONE": self.display_timezone,
|
||||||
"LIFECYCLE_ENABLED": self.lifecycle_enabled,
|
|
||||||
"LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds,
|
|
||||||
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
|
|
||||||
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
|
|
||||||
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
|
|
||||||
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
|
|
||||||
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
|
|
||||||
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
|
|
||||||
"SERVER_THREADS": self.server_threads,
|
|
||||||
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
|
||||||
"SERVER_BACKLOG": self.server_backlog,
|
|
||||||
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
|
||||||
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
|
||||||
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
|
||||||
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
|
||||||
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
|
|
||||||
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
|
|
||||||
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
|
|
||||||
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
|
|
||||||
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
|
|
||||||
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
|
|
||||||
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
|
|
||||||
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
|
|
||||||
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
|
|
||||||
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
|
|
||||||
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
|
|
||||||
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
|
|
||||||
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
|
|
||||||
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
|
|
||||||
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
|
|
||||||
"OBJECT_TAG_LIMIT": self.object_tag_limit,
|
|
||||||
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
|
|
||||||
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
|
|
||||||
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
|
|
||||||
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
|
|
||||||
"SITE_ID": self.site_id,
|
|
||||||
"SITE_ENDPOINT": self.site_endpoint,
|
|
||||||
"SITE_REGION": self.site_region,
|
|
||||||
"SITE_PRIORITY": self.site_priority,
|
|
||||||
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
|
|
||||||
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
|
|
||||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
|
||||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
|
||||||
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -274,11 +274,5 @@ class EncryptedObjectStorage:
|
|||||||
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
||||||
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
||||||
|
|
||||||
def get_bucket_website(self, bucket_name: str):
|
|
||||||
return self.storage.get_bucket_website(bucket_name)
|
|
||||||
|
|
||||||
def set_bucket_website(self, bucket_name: str, website_config):
|
|
||||||
return self.storage.set_bucket_website(bucket_name, website_config)
|
|
||||||
|
|
||||||
def _compute_etag(self, path: Path) -> str:
|
def _compute_etag(self, path: Path) -> str:
|
||||||
return self.storage._compute_etag(path)
|
return self.storage._compute_etag(path)
|
||||||
|
|||||||
@@ -1,44 +1,15 @@
|
|||||||
|
"""Encryption providers for server-side and client-side encryption."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import secrets
|
import secrets
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Dict, Generator, Optional
|
from typing import Any, BinaryIO, Dict, Generator, Optional
|
||||||
|
|
||||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||||
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
|
|
||||||
from cryptography.hazmat.primitives import hashes
|
|
||||||
|
|
||||||
if sys.platform != "win32":
|
|
||||||
import fcntl
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
|
||||||
"""Set restrictive file permissions (owner read/write only)."""
|
|
||||||
if sys.platform == "win32":
|
|
||||||
try:
|
|
||||||
username = os.environ.get("USERNAME", "")
|
|
||||||
if username:
|
|
||||||
subprocess.run(
|
|
||||||
["icacls", str(file_path), "/inheritance:r",
|
|
||||||
"/grant:r", f"{username}:F"],
|
|
||||||
check=True, capture_output=True
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
|
||||||
except (subprocess.SubprocessError, OSError) as exc:
|
|
||||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
|
||||||
else:
|
|
||||||
os.chmod(file_path, 0o600)
|
|
||||||
|
|
||||||
|
|
||||||
class EncryptionError(Exception):
|
class EncryptionError(Exception):
|
||||||
@@ -104,18 +75,6 @@ class EncryptionProvider:
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
|
||||||
"""Decrypt an encrypted data key.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
encrypted_data_key: The encrypted data key bytes
|
|
||||||
key_id: Optional key identifier (used by KMS providers)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The decrypted data key
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class LocalKeyEncryption(EncryptionProvider):
|
class LocalKeyEncryption(EncryptionProvider):
|
||||||
"""SSE-S3 style encryption using a local master key.
|
"""SSE-S3 style encryption using a local master key.
|
||||||
@@ -140,46 +99,26 @@ class LocalKeyEncryption(EncryptionProvider):
|
|||||||
return self._master_key
|
return self._master_key
|
||||||
|
|
||||||
def _load_or_create_master_key(self) -> bytes:
|
def _load_or_create_master_key(self) -> bytes:
|
||||||
"""Load master key from file or generate a new one (with file locking)."""
|
"""Load master key from file or generate a new one."""
|
||||||
lock_path = self.master_key_path.with_suffix(".lock")
|
|
||||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(lock_path, "w") as lock_file:
|
|
||||||
if sys.platform == "win32":
|
|
||||||
import msvcrt
|
|
||||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
|
||||||
else:
|
|
||||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
|
||||||
try:
|
|
||||||
if self.master_key_path.exists():
|
if self.master_key_path.exists():
|
||||||
try:
|
try:
|
||||||
return base64.b64decode(self.master_key_path.read_text().strip())
|
return base64.b64decode(self.master_key_path.read_text().strip())
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise EncryptionError(f"Failed to load master key: {exc}") from exc
|
raise EncryptionError(f"Failed to load master key: {exc}") from exc
|
||||||
|
|
||||||
key = secrets.token_bytes(32)
|
key = secrets.token_bytes(32)
|
||||||
try:
|
try:
|
||||||
|
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
self.master_key_path.write_text(base64.b64encode(key).decode())
|
self.master_key_path.write_text(base64.b64encode(key).decode())
|
||||||
_set_secure_file_permissions(self.master_key_path)
|
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
raise EncryptionError(f"Failed to save master key: {exc}") from exc
|
raise EncryptionError(f"Failed to save master key: {exc}") from exc
|
||||||
return key
|
return key
|
||||||
finally:
|
|
||||||
if sys.platform == "win32":
|
|
||||||
import msvcrt
|
|
||||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
|
||||||
else:
|
|
||||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
|
||||||
except OSError as exc:
|
|
||||||
raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
|
|
||||||
|
|
||||||
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
|
|
||||||
|
|
||||||
def _encrypt_data_key(self, data_key: bytes) -> bytes:
|
def _encrypt_data_key(self, data_key: bytes) -> bytes:
|
||||||
"""Encrypt the data key with the master key."""
|
"""Encrypt the data key with the master key."""
|
||||||
aesgcm = AESGCM(self.master_key)
|
aesgcm = AESGCM(self.master_key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
|
encrypted = aesgcm.encrypt(nonce, data_key, None)
|
||||||
return nonce + encrypted
|
return nonce + encrypted
|
||||||
|
|
||||||
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
|
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
|
||||||
@@ -189,18 +128,11 @@ class LocalKeyEncryption(EncryptionProvider):
|
|||||||
aesgcm = AESGCM(self.master_key)
|
aesgcm = AESGCM(self.master_key)
|
||||||
nonce = encrypted_data_key[:12]
|
nonce = encrypted_data_key[:12]
|
||||||
ciphertext = encrypted_data_key[12:]
|
ciphertext = encrypted_data_key[12:]
|
||||||
try:
|
|
||||||
return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
|
|
||||||
except Exception:
|
|
||||||
try:
|
try:
|
||||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
|
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
|
||||||
|
|
||||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
|
||||||
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
|
|
||||||
return self._decrypt_data_key(encrypted_data_key)
|
|
||||||
|
|
||||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||||
"""Generate a data key and its encrypted form."""
|
"""Generate a data key and its encrypted form."""
|
||||||
plaintext_key = secrets.token_bytes(32)
|
plaintext_key = secrets.token_bytes(32)
|
||||||
@@ -213,8 +145,7 @@ class LocalKeyEncryption(EncryptionProvider):
|
|||||||
|
|
||||||
aesgcm = AESGCM(data_key)
|
aesgcm = AESGCM(data_key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
|
||||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
|
||||||
|
|
||||||
return EncryptionResult(
|
return EncryptionResult(
|
||||||
ciphertext=ciphertext,
|
ciphertext=ciphertext,
|
||||||
@@ -228,11 +159,10 @@ class LocalKeyEncryption(EncryptionProvider):
|
|||||||
"""Decrypt data using envelope encryption."""
|
"""Decrypt data using envelope encryption."""
|
||||||
data_key = self._decrypt_data_key(encrypted_data_key)
|
data_key = self._decrypt_data_key(encrypted_data_key)
|
||||||
aesgcm = AESGCM(data_key)
|
aesgcm = AESGCM(data_key)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
|
||||||
try:
|
try:
|
||||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise EncryptionError("Failed to decrypt data") from exc
|
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
class StreamingEncryptor:
|
class StreamingEncryptor:
|
||||||
@@ -250,14 +180,12 @@ class StreamingEncryptor:
|
|||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
|
|
||||||
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
|
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
|
||||||
"""Derive a unique nonce for each chunk using HKDF."""
|
"""Derive a unique nonce for each chunk.
|
||||||
hkdf = HKDF(
|
|
||||||
algorithm=hashes.SHA256(),
|
Performance: Use direct byte manipulation instead of full int conversion.
|
||||||
length=12,
|
"""
|
||||||
salt=base_nonce,
|
# Performance: Only modify last 4 bytes instead of full 12-byte conversion
|
||||||
info=chunk_index.to_bytes(4, "big"),
|
return base_nonce[:8] + (chunk_index ^ int.from_bytes(base_nonce[8:], "big")).to_bytes(4, "big")
|
||||||
)
|
|
||||||
return hkdf.derive(b"chunk_nonce")
|
|
||||||
|
|
||||||
def encrypt_stream(self, stream: BinaryIO,
|
def encrypt_stream(self, stream: BinaryIO,
|
||||||
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
||||||
@@ -306,7 +234,10 @@ class StreamingEncryptor:
|
|||||||
|
|
||||||
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
||||||
"""
|
"""
|
||||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
if isinstance(self.provider, LocalKeyEncryption):
|
||||||
|
data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
|
||||||
|
else:
|
||||||
|
raise EncryptionError("Unsupported provider for streaming decryption")
|
||||||
|
|
||||||
aesgcm = AESGCM(data_key)
|
aesgcm = AESGCM(data_key)
|
||||||
base_nonce = metadata.nonce
|
base_nonce = metadata.nonce
|
||||||
@@ -379,8 +310,7 @@ class EncryptionManager:
|
|||||||
|
|
||||||
def get_streaming_encryptor(self) -> StreamingEncryptor:
|
def get_streaming_encryptor(self) -> StreamingEncryptor:
|
||||||
if self._streaming_encryptor is None:
|
if self._streaming_encryptor is None:
|
||||||
chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
|
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider())
|
||||||
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
|
|
||||||
return self._streaming_encryptor
|
return self._streaming_encryptor
|
||||||
|
|
||||||
def encrypt_object(self, data: bytes, algorithm: str = "AES256",
|
def encrypt_object(self, data: bytes, algorithm: str = "AES256",
|
||||||
@@ -473,8 +403,7 @@ class SSECEncryption(EncryptionProvider):
|
|||||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||||
aesgcm = AESGCM(self.customer_key)
|
aesgcm = AESGCM(self.customer_key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
|
||||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
|
||||||
|
|
||||||
return EncryptionResult(
|
return EncryptionResult(
|
||||||
ciphertext=ciphertext,
|
ciphertext=ciphertext,
|
||||||
@@ -486,11 +415,10 @@ class SSECEncryption(EncryptionProvider):
|
|||||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||||
aesgcm = AESGCM(self.customer_key)
|
aesgcm = AESGCM(self.customer_key)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
|
||||||
try:
|
try:
|
||||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise EncryptionError("SSE-C decryption failed") from exc
|
raise EncryptionError(f"SSE-C decryption failed: {exc}") from exc
|
||||||
|
|
||||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||||
return self.customer_key, b""
|
return self.customer_key, b""
|
||||||
@@ -544,7 +472,7 @@ class ClientEncryptionHelper:
|
|||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
|
def encrypt_with_key(plaintext: bytes, key_b64: str) -> Dict[str, str]:
|
||||||
"""Encrypt data with a client-provided key."""
|
"""Encrypt data with a client-provided key."""
|
||||||
key = base64.b64decode(key_b64)
|
key = base64.b64decode(key_b64)
|
||||||
if len(key) != 32:
|
if len(key) != 32:
|
||||||
@@ -552,8 +480,7 @@ class ClientEncryptionHelper:
|
|||||||
|
|
||||||
aesgcm = AESGCM(key)
|
aesgcm = AESGCM(key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
|
||||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ciphertext": base64.b64encode(ciphertext).decode(),
|
"ciphertext": base64.b64encode(ciphertext).decode(),
|
||||||
@@ -562,7 +489,7 @@ class ClientEncryptionHelper:
|
|||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
|
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str) -> bytes:
|
||||||
"""Decrypt data with a client-provided key."""
|
"""Decrypt data with a client-provided key."""
|
||||||
key = base64.b64decode(key_b64)
|
key = base64.b64decode(key_b64)
|
||||||
nonce = base64.b64decode(nonce_b64)
|
nonce = base64.b64decode(nonce_b64)
|
||||||
@@ -572,8 +499,7 @@ class ClientEncryptionHelper:
|
|||||||
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
||||||
|
|
||||||
aesgcm = AESGCM(key)
|
aesgcm = AESGCM(key)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
|
||||||
try:
|
try:
|
||||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise EncryptionError("Decryption failed") from exc
|
raise EncryptionError(f"Decryption failed: {exc}") from exc
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ from typing import Optional, Dict, Any
|
|||||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||||
|
|
||||||
from flask import Response, jsonify, request, flash, redirect, url_for, g
|
from flask import Response, jsonify, request, flash, redirect, url_for, g
|
||||||
from flask_limiter import RateLimitExceeded
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -173,21 +172,9 @@ def handle_app_error(error: AppError) -> Response:
|
|||||||
return error.to_xml_response()
|
return error.to_xml_response()
|
||||||
|
|
||||||
|
|
||||||
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
|
||||||
g.s3_error_code = "SlowDown"
|
|
||||||
error = Element("Error")
|
|
||||||
SubElement(error, "Code").text = "SlowDown"
|
|
||||||
SubElement(error, "Message").text = "Please reduce your request rate."
|
|
||||||
SubElement(error, "Resource").text = request.path
|
|
||||||
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
|
||||||
xml_bytes = tostring(error, encoding="utf-8")
|
|
||||||
return Response(xml_bytes, status=429, mimetype="application/xml")
|
|
||||||
|
|
||||||
|
|
||||||
def register_error_handlers(app):
|
def register_error_handlers(app):
|
||||||
"""Register error handlers with a Flask app."""
|
"""Register error handlers with a Flask app."""
|
||||||
app.register_error_handler(AppError, handle_app_error)
|
app.register_error_handler(AppError, handle_app_error)
|
||||||
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
|
|
||||||
|
|
||||||
for error_class in [
|
for error_class in [
|
||||||
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
|
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
|
||||||
|
|||||||
146
app/iam.py
146
app/iam.py
@@ -1,12 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import hmac
|
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
import os
|
|
||||||
import secrets
|
import secrets
|
||||||
import threading
|
|
||||||
import time
|
import time
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -19,7 +15,7 @@ class IamError(RuntimeError):
|
|||||||
"""Raised when authentication or authorization fails."""
|
"""Raised when authentication or authorization fails."""
|
||||||
|
|
||||||
|
|
||||||
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy", "replication", "lifecycle", "cors"}
|
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy", "replication"}
|
||||||
IAM_ACTIONS = {
|
IAM_ACTIONS = {
|
||||||
"iam:list_users",
|
"iam:list_users",
|
||||||
"iam:create_user",
|
"iam:create_user",
|
||||||
@@ -75,16 +71,6 @@ ACTION_ALIASES = {
|
|||||||
"s3:replicateobject": "replication",
|
"s3:replicateobject": "replication",
|
||||||
"s3:replicatetags": "replication",
|
"s3:replicatetags": "replication",
|
||||||
"s3:replicatedelete": "replication",
|
"s3:replicatedelete": "replication",
|
||||||
"lifecycle": "lifecycle",
|
|
||||||
"s3:getlifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:putlifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:deletelifecycleconfiguration": "lifecycle",
|
|
||||||
"s3:getbucketlifecycle": "lifecycle",
|
|
||||||
"s3:putbucketlifecycle": "lifecycle",
|
|
||||||
"cors": "cors",
|
|
||||||
"s3:getbucketcors": "cors",
|
|
||||||
"s3:putbucketcors": "cors",
|
|
||||||
"s3:deletebucketcors": "cors",
|
|
||||||
"iam:listusers": "iam:list_users",
|
"iam:listusers": "iam:list_users",
|
||||||
"iam:createuser": "iam:create_user",
|
"iam:createuser": "iam:create_user",
|
||||||
"iam:deleteuser": "iam:delete_user",
|
"iam:deleteuser": "iam:delete_user",
|
||||||
@@ -121,15 +107,12 @@ class IamService:
|
|||||||
self._raw_config: Dict[str, Any] = {}
|
self._raw_config: Dict[str, Any] = {}
|
||||||
self._failed_attempts: Dict[str, Deque[datetime]] = {}
|
self._failed_attempts: Dict[str, Deque[datetime]] = {}
|
||||||
self._last_load_time = 0.0
|
self._last_load_time = 0.0
|
||||||
self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
|
self._credential_cache: Dict[str, Tuple[str, Principal, float]] = {}
|
||||||
self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
|
self._cache_ttl = 60.0
|
||||||
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
|
|
||||||
self._last_stat_check = 0.0
|
self._last_stat_check = 0.0
|
||||||
self._stat_check_interval = 1.0
|
self._stat_check_interval = 1.0
|
||||||
self._sessions: Dict[str, Dict[str, Any]] = {}
|
self._sessions: Dict[str, Dict[str, Any]] = {}
|
||||||
self._session_lock = threading.Lock()
|
|
||||||
self._load()
|
self._load()
|
||||||
self._load_lockout_state()
|
|
||||||
|
|
||||||
def _maybe_reload(self) -> None:
|
def _maybe_reload(self) -> None:
|
||||||
"""Reload configuration if the file has changed on disk."""
|
"""Reload configuration if the file has changed on disk."""
|
||||||
@@ -140,8 +123,7 @@ class IamService:
|
|||||||
try:
|
try:
|
||||||
if self.config_path.stat().st_mtime > self._last_load_time:
|
if self.config_path.stat().st_mtime > self._last_load_time:
|
||||||
self._load()
|
self._load()
|
||||||
self._principal_cache.clear()
|
self._credential_cache.clear()
|
||||||
self._secret_key_cache.clear()
|
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -157,8 +139,7 @@ class IamService:
|
|||||||
f"Access temporarily locked. Try again in {seconds} seconds."
|
f"Access temporarily locked. Try again in {seconds} seconds."
|
||||||
)
|
)
|
||||||
record = self._users.get(access_key)
|
record = self._users.get(access_key)
|
||||||
stored_secret = record["secret_key"] if record else secrets.token_urlsafe(24)
|
if not record or record["secret_key"] != secret_key:
|
||||||
if not record or not hmac.compare_digest(stored_secret, secret_key):
|
|
||||||
self._record_failed_attempt(access_key)
|
self._record_failed_attempt(access_key)
|
||||||
raise IamError("Invalid credentials")
|
raise IamError("Invalid credentials")
|
||||||
self._clear_failed_attempts(access_key)
|
self._clear_failed_attempts(access_key)
|
||||||
@@ -170,46 +151,11 @@ class IamService:
|
|||||||
attempts = self._failed_attempts.setdefault(access_key, deque())
|
attempts = self._failed_attempts.setdefault(access_key, deque())
|
||||||
self._prune_attempts(attempts)
|
self._prune_attempts(attempts)
|
||||||
attempts.append(datetime.now(timezone.utc))
|
attempts.append(datetime.now(timezone.utc))
|
||||||
self._save_lockout_state()
|
|
||||||
|
|
||||||
def _clear_failed_attempts(self, access_key: str) -> None:
|
def _clear_failed_attempts(self, access_key: str) -> None:
|
||||||
if not access_key:
|
if not access_key:
|
||||||
return
|
return
|
||||||
if self._failed_attempts.pop(access_key, None) is not None:
|
self._failed_attempts.pop(access_key, None)
|
||||||
self._save_lockout_state()
|
|
||||||
|
|
||||||
def _lockout_file(self) -> Path:
|
|
||||||
return self.config_path.parent / "lockout_state.json"
|
|
||||||
|
|
||||||
def _load_lockout_state(self) -> None:
|
|
||||||
"""Load lockout state from disk."""
|
|
||||||
try:
|
|
||||||
if self._lockout_file().exists():
|
|
||||||
data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
|
|
||||||
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
|
|
||||||
for key, timestamps in data.get("failed_attempts", {}).items():
|
|
||||||
valid = []
|
|
||||||
for ts in timestamps:
|
|
||||||
try:
|
|
||||||
dt = datetime.fromisoformat(ts)
|
|
||||||
if dt > cutoff:
|
|
||||||
valid.append(dt)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
continue
|
|
||||||
if valid:
|
|
||||||
self._failed_attempts[key] = deque(valid)
|
|
||||||
except (OSError, json.JSONDecodeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _save_lockout_state(self) -> None:
|
|
||||||
"""Persist lockout state to disk."""
|
|
||||||
data: Dict[str, Any] = {"failed_attempts": {}}
|
|
||||||
for key, attempts in self._failed_attempts.items():
|
|
||||||
data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
|
|
||||||
try:
|
|
||||||
self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
|
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
|
||||||
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
|
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
|
||||||
@@ -252,21 +198,14 @@ class IamService:
|
|||||||
return token
|
return token
|
||||||
|
|
||||||
def validate_session_token(self, access_key: str, session_token: str) -> bool:
|
def validate_session_token(self, access_key: str, session_token: str) -> bool:
|
||||||
"""Validate a session token for an access key (thread-safe, constant-time)."""
|
"""Validate a session token for an access key."""
|
||||||
dummy_key = secrets.token_urlsafe(16)
|
|
||||||
dummy_token = secrets.token_urlsafe(32)
|
|
||||||
with self._session_lock:
|
|
||||||
session = self._sessions.get(session_token)
|
session = self._sessions.get(session_token)
|
||||||
if not session:
|
if not session:
|
||||||
hmac.compare_digest(access_key, dummy_key)
|
|
||||||
hmac.compare_digest(session_token, dummy_token)
|
|
||||||
return False
|
return False
|
||||||
key_match = hmac.compare_digest(session["access_key"], access_key)
|
if session["access_key"] != access_key:
|
||||||
if not key_match:
|
|
||||||
hmac.compare_digest(session_token, dummy_token)
|
|
||||||
return False
|
return False
|
||||||
if time.time() > session["expires_at"]:
|
if time.time() > session["expires_at"]:
|
||||||
self._sessions.pop(session_token, None)
|
del self._sessions[session_token]
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -279,9 +218,9 @@ class IamService:
|
|||||||
|
|
||||||
def principal_for_key(self, access_key: str) -> Principal:
|
def principal_for_key(self, access_key: str) -> Principal:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
cached = self._principal_cache.get(access_key)
|
cached = self._credential_cache.get(access_key)
|
||||||
if cached:
|
if cached:
|
||||||
principal, cached_time = cached
|
secret, principal, cached_time = cached
|
||||||
if now - cached_time < self._cache_ttl:
|
if now - cached_time < self._cache_ttl:
|
||||||
return principal
|
return principal
|
||||||
|
|
||||||
@@ -290,14 +229,23 @@ class IamService:
|
|||||||
if not record:
|
if not record:
|
||||||
raise IamError("Unknown access key")
|
raise IamError("Unknown access key")
|
||||||
principal = self._build_principal(access_key, record)
|
principal = self._build_principal(access_key, record)
|
||||||
self._principal_cache[access_key] = (principal, now)
|
self._credential_cache[access_key] = (record["secret_key"], principal, now)
|
||||||
return principal
|
return principal
|
||||||
|
|
||||||
def secret_for_key(self, access_key: str) -> str:
|
def secret_for_key(self, access_key: str) -> str:
|
||||||
|
now = time.time()
|
||||||
|
cached = self._credential_cache.get(access_key)
|
||||||
|
if cached:
|
||||||
|
secret, principal, cached_time = cached
|
||||||
|
if now - cached_time < self._cache_ttl:
|
||||||
|
return secret
|
||||||
|
|
||||||
self._maybe_reload()
|
self._maybe_reload()
|
||||||
record = self._users.get(access_key)
|
record = self._users.get(access_key)
|
||||||
if not record:
|
if not record:
|
||||||
raise IamError("Unknown access key")
|
raise IamError("Unknown access key")
|
||||||
|
principal = self._build_principal(access_key, record)
|
||||||
|
self._credential_cache[access_key] = (record["secret_key"], principal, now)
|
||||||
return record["secret_key"]
|
return record["secret_key"]
|
||||||
|
|
||||||
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
|
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
|
||||||
@@ -309,18 +257,6 @@ class IamService:
|
|||||||
if not self._is_allowed(principal, normalized, action):
|
if not self._is_allowed(principal, normalized, action):
|
||||||
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
|
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
|
||||||
|
|
||||||
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
|
|
||||||
self._maybe_reload()
|
|
||||||
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
|
|
||||||
normalized_actions = {a: self._normalize_action(a) for a in actions}
|
|
||||||
results: Dict[str, bool] = {}
|
|
||||||
for original, canonical in normalized_actions.items():
|
|
||||||
if canonical not in ALLOWED_ACTIONS:
|
|
||||||
results[original] = False
|
|
||||||
else:
|
|
||||||
results[original] = self._is_allowed(principal, bucket_name, canonical)
|
|
||||||
return results
|
|
||||||
|
|
||||||
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
|
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
|
||||||
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
|
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
|
||||||
|
|
||||||
@@ -381,10 +317,6 @@ class IamService:
|
|||||||
new_secret = self._generate_secret_key()
|
new_secret = self._generate_secret_key()
|
||||||
user["secret_key"] = new_secret
|
user["secret_key"] = new_secret
|
||||||
self._save()
|
self._save()
|
||||||
self._principal_cache.pop(access_key, None)
|
|
||||||
self._secret_key_cache.pop(access_key, None)
|
|
||||||
from .s3_api import clear_signing_key_cache
|
|
||||||
clear_signing_key_cache()
|
|
||||||
self._load()
|
self._load()
|
||||||
return new_secret
|
return new_secret
|
||||||
|
|
||||||
@@ -403,10 +335,6 @@ class IamService:
|
|||||||
raise IamError("User not found")
|
raise IamError("User not found")
|
||||||
self._raw_config["users"] = remaining
|
self._raw_config["users"] = remaining
|
||||||
self._save()
|
self._save()
|
||||||
self._principal_cache.pop(access_key, None)
|
|
||||||
self._secret_key_cache.pop(access_key, None)
|
|
||||||
from .s3_api import clear_signing_key_cache
|
|
||||||
clear_signing_key_cache()
|
|
||||||
self._load()
|
self._load()
|
||||||
|
|
||||||
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
|
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
|
||||||
@@ -541,13 +469,11 @@ class IamService:
|
|||||||
return candidate if candidate in ALLOWED_ACTIONS else ""
|
return candidate if candidate in ALLOWED_ACTIONS else ""
|
||||||
|
|
||||||
def _write_default(self) -> None:
|
def _write_default(self) -> None:
|
||||||
access_key = secrets.token_hex(12)
|
|
||||||
secret_key = secrets.token_urlsafe(32)
|
|
||||||
default = {
|
default = {
|
||||||
"users": [
|
"users": [
|
||||||
{
|
{
|
||||||
"access_key": access_key,
|
"access_key": "localadmin",
|
||||||
"secret_key": secret_key,
|
"secret_key": "localadmin",
|
||||||
"display_name": "Local Admin",
|
"display_name": "Local Admin",
|
||||||
"policies": [
|
"policies": [
|
||||||
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
|
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
|
||||||
@@ -556,14 +482,6 @@ class IamService:
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
self.config_path.write_text(json.dumps(default, indent=2))
|
self.config_path.write_text(json.dumps(default, indent=2))
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Access Key: {access_key}")
|
|
||||||
print(f"Secret Key: {secret_key}")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Missed this? Check: {self.config_path}")
|
|
||||||
print(f"{'='*60}\n")
|
|
||||||
|
|
||||||
def _generate_access_key(self) -> str:
|
def _generate_access_key(self) -> str:
|
||||||
return secrets.token_hex(8)
|
return secrets.token_hex(8)
|
||||||
@@ -579,25 +497,25 @@ class IamService:
|
|||||||
|
|
||||||
def get_secret_key(self, access_key: str) -> str | None:
|
def get_secret_key(self, access_key: str) -> str | None:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
cached = self._secret_key_cache.get(access_key)
|
cached = self._credential_cache.get(access_key)
|
||||||
if cached:
|
if cached:
|
||||||
secret_key, cached_time = cached
|
secret, principal, cached_time = cached
|
||||||
if now - cached_time < self._cache_ttl:
|
if now - cached_time < self._cache_ttl:
|
||||||
return secret_key
|
return secret
|
||||||
|
|
||||||
self._maybe_reload()
|
self._maybe_reload()
|
||||||
record = self._users.get(access_key)
|
record = self._users.get(access_key)
|
||||||
if record:
|
if record:
|
||||||
secret_key = record["secret_key"]
|
principal = self._build_principal(access_key, record)
|
||||||
self._secret_key_cache[access_key] = (secret_key, now)
|
self._credential_cache[access_key] = (record["secret_key"], principal, now)
|
||||||
return secret_key
|
return record["secret_key"]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_principal(self, access_key: str) -> Principal | None:
|
def get_principal(self, access_key: str) -> Principal | None:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
cached = self._principal_cache.get(access_key)
|
cached = self._credential_cache.get(access_key)
|
||||||
if cached:
|
if cached:
|
||||||
principal, cached_time = cached
|
secret, principal, cached_time = cached
|
||||||
if now - cached_time < self._cache_ttl:
|
if now - cached_time < self._cache_ttl:
|
||||||
return principal
|
return principal
|
||||||
|
|
||||||
@@ -605,6 +523,6 @@ class IamService:
|
|||||||
record = self._users.get(access_key)
|
record = self._users.get(access_key)
|
||||||
if record:
|
if record:
|
||||||
principal = self._build_principal(access_key, record)
|
principal = self._build_principal(access_key, record)
|
||||||
self._principal_cache[access_key] = (principal, now)
|
self._credential_cache[access_key] = (record["secret_key"], principal, now)
|
||||||
return principal
|
return principal
|
||||||
return None
|
return None
|
||||||
|
|||||||
135
app/kms.py
135
app/kms.py
@@ -2,11 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import secrets
|
import secrets
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@@ -17,30 +13,6 @@ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
|||||||
|
|
||||||
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
|
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
|
||||||
|
|
||||||
if sys.platform != "win32":
|
|
||||||
import fcntl
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
|
||||||
"""Set restrictive file permissions (owner read/write only)."""
|
|
||||||
if sys.platform == "win32":
|
|
||||||
try:
|
|
||||||
username = os.environ.get("USERNAME", "")
|
|
||||||
if username:
|
|
||||||
subprocess.run(
|
|
||||||
["icacls", str(file_path), "/inheritance:r",
|
|
||||||
"/grant:r", f"{username}:F"],
|
|
||||||
check=True, capture_output=True
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
|
||||||
except (subprocess.SubprocessError, OSError) as exc:
|
|
||||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
|
||||||
else:
|
|
||||||
os.chmod(file_path, 0o600)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class KMSKey:
|
class KMSKey:
|
||||||
@@ -106,7 +78,7 @@ class KMSEncryptionProvider(EncryptionProvider):
|
|||||||
aesgcm = AESGCM(data_key)
|
aesgcm = AESGCM(data_key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
ciphertext = aesgcm.encrypt(nonce, plaintext,
|
ciphertext = aesgcm.encrypt(nonce, plaintext,
|
||||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
json.dumps(context).encode() if context else None)
|
||||||
|
|
||||||
return EncryptionResult(
|
return EncryptionResult(
|
||||||
ciphertext=ciphertext,
|
ciphertext=ciphertext,
|
||||||
@@ -118,26 +90,15 @@ class KMSEncryptionProvider(EncryptionProvider):
|
|||||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||||
"""Decrypt data using envelope encryption with KMS."""
|
"""Decrypt data using envelope encryption with KMS."""
|
||||||
|
# Note: Data key is encrypted without context (AAD), so we decrypt without context
|
||||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
||||||
if len(data_key) != 32:
|
|
||||||
raise EncryptionError("Invalid data key size")
|
|
||||||
|
|
||||||
aesgcm = AESGCM(data_key)
|
aesgcm = AESGCM(data_key)
|
||||||
try:
|
try:
|
||||||
return aesgcm.decrypt(nonce, ciphertext,
|
return aesgcm.decrypt(nonce, ciphertext,
|
||||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
json.dumps(context).encode() if context else None)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("KMS decryption failed: %s", exc)
|
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
|
||||||
raise EncryptionError("Failed to decrypt data") from exc
|
|
||||||
|
|
||||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
|
||||||
"""Decrypt an encrypted data key using KMS."""
|
|
||||||
if key_id is None:
|
|
||||||
key_id = self.key_id
|
|
||||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
|
||||||
if len(data_key) != 32:
|
|
||||||
raise EncryptionError("Invalid data key size")
|
|
||||||
return data_key
|
|
||||||
|
|
||||||
|
|
||||||
class KMSManager:
|
class KMSManager:
|
||||||
@@ -147,52 +108,27 @@ class KMSManager:
|
|||||||
Keys are stored encrypted on disk.
|
Keys are stored encrypted on disk.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, keys_path: Path, master_key_path: Path):
|
||||||
self,
|
|
||||||
keys_path: Path,
|
|
||||||
master_key_path: Path,
|
|
||||||
generate_data_key_min_bytes: int = 1,
|
|
||||||
generate_data_key_max_bytes: int = 1024,
|
|
||||||
):
|
|
||||||
self.keys_path = keys_path
|
self.keys_path = keys_path
|
||||||
self.master_key_path = master_key_path
|
self.master_key_path = master_key_path
|
||||||
self.generate_data_key_min_bytes = generate_data_key_min_bytes
|
|
||||||
self.generate_data_key_max_bytes = generate_data_key_max_bytes
|
|
||||||
self._keys: Dict[str, KMSKey] = {}
|
self._keys: Dict[str, KMSKey] = {}
|
||||||
self._master_key: bytes | None = None
|
self._master_key: bytes | None = None
|
||||||
self._master_aesgcm: AESGCM | None = None
|
|
||||||
self._loaded = False
|
self._loaded = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def master_key(self) -> bytes:
|
def master_key(self) -> bytes:
|
||||||
"""Load or create the master key for encrypting KMS keys (with file locking)."""
|
"""Load or create the master key for encrypting KMS keys."""
|
||||||
if self._master_key is None:
|
if self._master_key is None:
|
||||||
lock_path = self.master_key_path.with_suffix(".lock")
|
|
||||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with open(lock_path, "w") as lock_file:
|
|
||||||
if sys.platform == "win32":
|
|
||||||
import msvcrt
|
|
||||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
|
||||||
else:
|
|
||||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
|
||||||
try:
|
|
||||||
if self.master_key_path.exists():
|
if self.master_key_path.exists():
|
||||||
self._master_key = base64.b64decode(
|
self._master_key = base64.b64decode(
|
||||||
self.master_key_path.read_text().strip()
|
self.master_key_path.read_text().strip()
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._master_key = secrets.token_bytes(32)
|
self._master_key = secrets.token_bytes(32)
|
||||||
|
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
self.master_key_path.write_text(
|
self.master_key_path.write_text(
|
||||||
base64.b64encode(self._master_key).decode()
|
base64.b64encode(self._master_key).decode()
|
||||||
)
|
)
|
||||||
_set_secure_file_permissions(self.master_key_path)
|
|
||||||
finally:
|
|
||||||
if sys.platform == "win32":
|
|
||||||
import msvcrt
|
|
||||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
|
||||||
else:
|
|
||||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
|
||||||
self._master_aesgcm = AESGCM(self._master_key)
|
|
||||||
return self._master_key
|
return self._master_key
|
||||||
|
|
||||||
def _load_keys(self) -> None:
|
def _load_keys(self) -> None:
|
||||||
@@ -209,10 +145,8 @@ class KMSManager:
|
|||||||
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
|
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
|
||||||
key.key_material = self._decrypt_key_material(encrypted)
|
key.key_material = self._decrypt_key_material(encrypted)
|
||||||
self._keys[key.key_id] = key
|
self._keys[key.key_id] = key
|
||||||
except json.JSONDecodeError as exc:
|
except Exception:
|
||||||
logger.error("Failed to parse KMS keys file: %s", exc)
|
pass
|
||||||
except (ValueError, KeyError) as exc:
|
|
||||||
logger.error("Invalid KMS key data: %s", exc)
|
|
||||||
|
|
||||||
self._loaded = True
|
self._loaded = True
|
||||||
|
|
||||||
@@ -230,19 +164,20 @@ class KMSManager:
|
|||||||
json.dumps({"keys": keys_data}, indent=2),
|
json.dumps({"keys": keys_data}, indent=2),
|
||||||
encoding="utf-8"
|
encoding="utf-8"
|
||||||
)
|
)
|
||||||
_set_secure_file_permissions(self.keys_path)
|
|
||||||
|
|
||||||
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
||||||
_ = self.master_key
|
"""Encrypt key material with the master key."""
|
||||||
|
aesgcm = AESGCM(self.master_key)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
|
ciphertext = aesgcm.encrypt(nonce, key_material, None)
|
||||||
return nonce + ciphertext
|
return nonce + ciphertext
|
||||||
|
|
||||||
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
||||||
_ = self.master_key
|
"""Decrypt key material with the master key."""
|
||||||
|
aesgcm = AESGCM(self.master_key)
|
||||||
nonce = encrypted[:12]
|
nonce = encrypted[:12]
|
||||||
ciphertext = encrypted[12:]
|
ciphertext = encrypted[12:]
|
||||||
return self._master_aesgcm.decrypt(nonce, ciphertext, None)
|
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
|
|
||||||
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
||||||
"""Create a new KMS key."""
|
"""Create a new KMS key."""
|
||||||
@@ -334,7 +269,7 @@ class KMSManager:
|
|||||||
|
|
||||||
aesgcm = AESGCM(key.key_material)
|
aesgcm = AESGCM(key.key_material)
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
aad = json.dumps(context).encode() if context else None
|
||||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||||
|
|
||||||
key_id_bytes = key_id.encode("utf-8")
|
key_id_bytes = key_id.encode("utf-8")
|
||||||
@@ -363,24 +298,17 @@ class KMSManager:
|
|||||||
encrypted = rest[12:]
|
encrypted = rest[12:]
|
||||||
|
|
||||||
aesgcm = AESGCM(key.key_material)
|
aesgcm = AESGCM(key.key_material)
|
||||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
aad = json.dumps(context).encode() if context else None
|
||||||
try:
|
try:
|
||||||
plaintext = aesgcm.decrypt(nonce, encrypted, aad)
|
plaintext = aesgcm.decrypt(nonce, encrypted, aad)
|
||||||
return plaintext, key_id
|
return plaintext, key_id
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("KMS decrypt operation failed: %s", exc)
|
raise EncryptionError(f"Decryption failed: {exc}") from exc
|
||||||
raise EncryptionError("Decryption failed") from exc
|
|
||||||
|
|
||||||
def generate_data_key(self, key_id: str,
|
def generate_data_key(self, key_id: str,
|
||||||
context: Dict[str, str] | None = None,
|
context: Dict[str, str] | None = None) -> tuple[bytes, bytes]:
|
||||||
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
|
|
||||||
"""Generate a data key and return both plaintext and encrypted versions.
|
"""Generate a data key and return both plaintext and encrypted versions.
|
||||||
|
|
||||||
Args:
|
|
||||||
key_id: The KMS key ID to use for encryption
|
|
||||||
context: Optional encryption context
|
|
||||||
key_spec: Key specification - AES_128 or AES_256 (default)
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (plaintext_key, encrypted_key)
|
Tuple of (plaintext_key, encrypted_key)
|
||||||
"""
|
"""
|
||||||
@@ -391,8 +319,7 @@ class KMSManager:
|
|||||||
if not key.enabled:
|
if not key.enabled:
|
||||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||||
|
|
||||||
key_bytes = 32 if key_spec == "AES_256" else 16
|
plaintext_key = secrets.token_bytes(32)
|
||||||
plaintext_key = secrets.token_bytes(key_bytes)
|
|
||||||
|
|
||||||
encrypted_key = self.encrypt(key_id, plaintext_key, context)
|
encrypted_key = self.encrypt(key_id, plaintext_key, context)
|
||||||
|
|
||||||
@@ -404,6 +331,22 @@ class KMSManager:
|
|||||||
plaintext, _ = self.decrypt(encrypted_key, context)
|
plaintext, _ = self.decrypt(encrypted_key, context)
|
||||||
return plaintext
|
return plaintext
|
||||||
|
|
||||||
|
def get_provider(self, key_id: str | None = None) -> KMSEncryptionProvider:
|
||||||
|
"""Get an encryption provider for a specific key."""
|
||||||
|
self._load_keys()
|
||||||
|
|
||||||
|
if key_id is None:
|
||||||
|
if not self._keys:
|
||||||
|
key = self.create_key("Default KMS Key")
|
||||||
|
key_id = key.key_id
|
||||||
|
else:
|
||||||
|
key_id = next(iter(self._keys.keys()))
|
||||||
|
|
||||||
|
if key_id not in self._keys:
|
||||||
|
raise EncryptionError(f"Key not found: {key_id}")
|
||||||
|
|
||||||
|
return KMSEncryptionProvider(self, key_id)
|
||||||
|
|
||||||
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
||||||
source_context: Dict[str, str] | None = None,
|
source_context: Dict[str, str] | None = None,
|
||||||
destination_context: Dict[str, str] | None = None) -> bytes:
|
destination_context: Dict[str, str] | None = None) -> bytes:
|
||||||
@@ -415,8 +358,6 @@ class KMSManager:
|
|||||||
|
|
||||||
def generate_random(self, num_bytes: int = 32) -> bytes:
|
def generate_random(self, num_bytes: int = 32) -> bytes:
|
||||||
"""Generate cryptographically secure random bytes."""
|
"""Generate cryptographically secure random bytes."""
|
||||||
if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
|
if num_bytes < 1 or num_bytes > 1024:
|
||||||
raise EncryptionError(
|
raise EncryptionError("Number of bytes must be between 1 and 1024")
|
||||||
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
|
|
||||||
)
|
|
||||||
return secrets.token_bytes(num_bytes)
|
return secrets.token_bytes(num_bytes)
|
||||||
|
|||||||
@@ -71,9 +71,10 @@ class LifecycleExecutionRecord:
|
|||||||
|
|
||||||
|
|
||||||
class LifecycleHistoryStore:
|
class LifecycleHistoryStore:
|
||||||
def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
|
MAX_HISTORY_PER_BUCKET = 50
|
||||||
|
|
||||||
|
def __init__(self, storage_root: Path) -> None:
|
||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self.max_history_per_bucket = max_history_per_bucket
|
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
def _get_history_path(self, bucket_name: str) -> Path:
|
def _get_history_path(self, bucket_name: str) -> Path:
|
||||||
@@ -94,7 +95,7 @@ class LifecycleHistoryStore:
|
|||||||
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
|
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
|
||||||
path = self._get_history_path(bucket_name)
|
path = self._get_history_path(bucket_name)
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
|
data = {"executions": [r.to_dict() for r in records[:self.MAX_HISTORY_PER_BUCKET]]}
|
||||||
try:
|
try:
|
||||||
with open(path, "w") as f:
|
with open(path, "w") as f:
|
||||||
json.dump(data, f, indent=2)
|
json.dump(data, f, indent=2)
|
||||||
@@ -113,20 +114,14 @@ class LifecycleHistoryStore:
|
|||||||
|
|
||||||
|
|
||||||
class LifecycleManager:
|
class LifecycleManager:
|
||||||
def __init__(
|
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600, storage_root: Optional[Path] = None):
|
||||||
self,
|
|
||||||
storage: ObjectStorage,
|
|
||||||
interval_seconds: int = 3600,
|
|
||||||
storage_root: Optional[Path] = None,
|
|
||||||
max_history_per_bucket: int = 50,
|
|
||||||
):
|
|
||||||
self.storage = storage
|
self.storage = storage
|
||||||
self.interval_seconds = interval_seconds
|
self.interval_seconds = interval_seconds
|
||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self._timer: Optional[threading.Timer] = None
|
self._timer: Optional[threading.Timer] = None
|
||||||
self._shutdown = False
|
self._shutdown = False
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
|
self.history_store = LifecycleHistoryStore(storage_root) if storage_root else None
|
||||||
|
|
||||||
def start(self) -> None:
|
def start(self) -> None:
|
||||||
if self._timer is not None:
|
if self._timer is not None:
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import ipaddress
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import queue
|
import queue
|
||||||
import socket
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
@@ -16,48 +14,6 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
|
||||||
"""Check if a URL is safe to make requests to (not internal/private).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: The URL to check.
|
|
||||||
allow_internal: If True, allows internal/private IP addresses.
|
|
||||||
Use for self-hosted deployments on internal networks.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
parsed = urlparse(url)
|
|
||||||
hostname = parsed.hostname
|
|
||||||
if not hostname:
|
|
||||||
return False
|
|
||||||
cloud_metadata_hosts = {
|
|
||||||
"metadata.google.internal",
|
|
||||||
"169.254.169.254",
|
|
||||||
}
|
|
||||||
if hostname.lower() in cloud_metadata_hosts:
|
|
||||||
return False
|
|
||||||
if allow_internal:
|
|
||||||
return True
|
|
||||||
blocked_hosts = {
|
|
||||||
"localhost",
|
|
||||||
"127.0.0.1",
|
|
||||||
"0.0.0.0",
|
|
||||||
"::1",
|
|
||||||
"[::1]",
|
|
||||||
}
|
|
||||||
if hostname.lower() in blocked_hosts:
|
|
||||||
return False
|
|
||||||
try:
|
|
||||||
resolved_ip = socket.gethostbyname(hostname)
|
|
||||||
ip = ipaddress.ip_address(resolved_ip)
|
|
||||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
|
||||||
return False
|
|
||||||
except (socket.gaierror, ValueError):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -209,9 +165,8 @@ class NotificationConfiguration:
|
|||||||
|
|
||||||
|
|
||||||
class NotificationService:
|
class NotificationService:
|
||||||
def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
|
def __init__(self, storage_root: Path, worker_count: int = 2):
|
||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self._allow_internal_endpoints = allow_internal_endpoints
|
|
||||||
self._configs: Dict[str, List[NotificationConfiguration]] = {}
|
self._configs: Dict[str, List[NotificationConfiguration]] = {}
|
||||||
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
|
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
|
||||||
self._workers: List[threading.Thread] = []
|
self._workers: List[threading.Thread] = []
|
||||||
@@ -344,8 +299,6 @@ class NotificationService:
|
|||||||
self._queue.task_done()
|
self._queue.task_done()
|
||||||
|
|
||||||
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
||||||
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
|
|
||||||
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
|
|
||||||
payload = event.to_s3_event()
|
payload = event.to_s3_event()
|
||||||
headers = {"Content-Type": "application/json", **destination.headers}
|
headers = {"Content-Type": "application/json", **destination.headers}
|
||||||
|
|
||||||
|
|||||||
@@ -1,300 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import random
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
MAX_LATENCY_SAMPLES = 5000
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class OperationStats:
|
|
||||||
count: int = 0
|
|
||||||
success_count: int = 0
|
|
||||||
error_count: int = 0
|
|
||||||
latency_sum_ms: float = 0.0
|
|
||||||
latency_min_ms: float = float("inf")
|
|
||||||
latency_max_ms: float = 0.0
|
|
||||||
bytes_in: int = 0
|
|
||||||
bytes_out: int = 0
|
|
||||||
latency_samples: List[float] = field(default_factory=list)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _compute_percentile(sorted_data: List[float], p: float) -> float:
|
|
||||||
if not sorted_data:
|
|
||||||
return 0.0
|
|
||||||
k = (len(sorted_data) - 1) * (p / 100.0)
|
|
||||||
f = int(k)
|
|
||||||
c = min(f + 1, len(sorted_data) - 1)
|
|
||||||
d = k - f
|
|
||||||
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
|
|
||||||
|
|
||||||
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
|
||||||
self.count += 1
|
|
||||||
if success:
|
|
||||||
self.success_count += 1
|
|
||||||
else:
|
|
||||||
self.error_count += 1
|
|
||||||
self.latency_sum_ms += latency_ms
|
|
||||||
if latency_ms < self.latency_min_ms:
|
|
||||||
self.latency_min_ms = latency_ms
|
|
||||||
if latency_ms > self.latency_max_ms:
|
|
||||||
self.latency_max_ms = latency_ms
|
|
||||||
self.bytes_in += bytes_in
|
|
||||||
self.bytes_out += bytes_out
|
|
||||||
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
|
|
||||||
self.latency_samples.append(latency_ms)
|
|
||||||
else:
|
|
||||||
j = random.randint(0, self.count - 1)
|
|
||||||
if j < MAX_LATENCY_SAMPLES:
|
|
||||||
self.latency_samples[j] = latency_ms
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
|
||||||
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
|
||||||
sorted_latencies = sorted(self.latency_samples)
|
|
||||||
return {
|
|
||||||
"count": self.count,
|
|
||||||
"success_count": self.success_count,
|
|
||||||
"error_count": self.error_count,
|
|
||||||
"latency_avg_ms": round(avg_latency, 2),
|
|
||||||
"latency_min_ms": round(min_latency, 2),
|
|
||||||
"latency_max_ms": round(self.latency_max_ms, 2),
|
|
||||||
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
|
|
||||||
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
|
|
||||||
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
|
|
||||||
"bytes_in": self.bytes_in,
|
|
||||||
"bytes_out": self.bytes_out,
|
|
||||||
}
|
|
||||||
|
|
||||||
def merge(self, other: "OperationStats") -> None:
|
|
||||||
self.count += other.count
|
|
||||||
self.success_count += other.success_count
|
|
||||||
self.error_count += other.error_count
|
|
||||||
self.latency_sum_ms += other.latency_sum_ms
|
|
||||||
if other.latency_min_ms < self.latency_min_ms:
|
|
||||||
self.latency_min_ms = other.latency_min_ms
|
|
||||||
if other.latency_max_ms > self.latency_max_ms:
|
|
||||||
self.latency_max_ms = other.latency_max_ms
|
|
||||||
self.bytes_in += other.bytes_in
|
|
||||||
self.bytes_out += other.bytes_out
|
|
||||||
combined = self.latency_samples + other.latency_samples
|
|
||||||
if len(combined) > MAX_LATENCY_SAMPLES:
|
|
||||||
random.shuffle(combined)
|
|
||||||
combined = combined[:MAX_LATENCY_SAMPLES]
|
|
||||||
self.latency_samples = combined
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MetricsSnapshot:
|
|
||||||
timestamp: datetime
|
|
||||||
window_seconds: int
|
|
||||||
by_method: Dict[str, Dict[str, Any]]
|
|
||||||
by_endpoint: Dict[str, Dict[str, Any]]
|
|
||||||
by_status_class: Dict[str, int]
|
|
||||||
error_codes: Dict[str, int]
|
|
||||||
totals: Dict[str, Any]
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"timestamp": self.timestamp.isoformat(),
|
|
||||||
"window_seconds": self.window_seconds,
|
|
||||||
"by_method": self.by_method,
|
|
||||||
"by_endpoint": self.by_endpoint,
|
|
||||||
"by_status_class": self.by_status_class,
|
|
||||||
"error_codes": self.error_codes,
|
|
||||||
"totals": self.totals,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
|
|
||||||
return cls(
|
|
||||||
timestamp=datetime.fromisoformat(data["timestamp"]),
|
|
||||||
window_seconds=data.get("window_seconds", 300),
|
|
||||||
by_method=data.get("by_method", {}),
|
|
||||||
by_endpoint=data.get("by_endpoint", {}),
|
|
||||||
by_status_class=data.get("by_status_class", {}),
|
|
||||||
error_codes=data.get("error_codes", {}),
|
|
||||||
totals=data.get("totals", {}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class OperationMetricsCollector:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
storage_root: Path,
|
|
||||||
interval_minutes: int = 5,
|
|
||||||
retention_hours: int = 24,
|
|
||||||
):
|
|
||||||
self.storage_root = storage_root
|
|
||||||
self.interval_seconds = interval_minutes * 60
|
|
||||||
self.retention_hours = retention_hours
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._by_method: Dict[str, OperationStats] = {}
|
|
||||||
self._by_endpoint: Dict[str, OperationStats] = {}
|
|
||||||
self._by_status_class: Dict[str, int] = {}
|
|
||||||
self._error_codes: Dict[str, int] = {}
|
|
||||||
self._totals = OperationStats()
|
|
||||||
self._window_start = time.time()
|
|
||||||
self._shutdown = threading.Event()
|
|
||||||
self._snapshots: List[MetricsSnapshot] = []
|
|
||||||
|
|
||||||
self._load_history()
|
|
||||||
|
|
||||||
self._snapshot_thread = threading.Thread(
|
|
||||||
target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
|
|
||||||
)
|
|
||||||
self._snapshot_thread.start()
|
|
||||||
|
|
||||||
def _config_path(self) -> Path:
|
|
||||||
return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
|
|
||||||
|
|
||||||
def _load_history(self) -> None:
|
|
||||||
config_path = self._config_path()
|
|
||||||
if not config_path.exists():
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
|
||||||
snapshots_data = data.get("snapshots", [])
|
|
||||||
self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
|
|
||||||
self._prune_old_snapshots()
|
|
||||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
|
||||||
logger.warning(f"Failed to load operation metrics history: {e}")
|
|
||||||
|
|
||||||
def _save_history(self) -> None:
|
|
||||||
config_path = self._config_path()
|
|
||||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
try:
|
|
||||||
data = {"snapshots": [s.to_dict() for s in self._snapshots]}
|
|
||||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
||||||
except OSError as e:
|
|
||||||
logger.warning(f"Failed to save operation metrics history: {e}")
|
|
||||||
|
|
||||||
def _prune_old_snapshots(self) -> None:
|
|
||||||
if not self._snapshots:
|
|
||||||
return
|
|
||||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
|
||||||
self._snapshots = [
|
|
||||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
|
||||||
]
|
|
||||||
|
|
||||||
def _snapshot_loop(self) -> None:
|
|
||||||
while not self._shutdown.is_set():
|
|
||||||
self._shutdown.wait(timeout=self.interval_seconds)
|
|
||||||
if not self._shutdown.is_set():
|
|
||||||
self._take_snapshot()
|
|
||||||
|
|
||||||
def _take_snapshot(self) -> None:
|
|
||||||
with self._lock:
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
window_seconds = int(time.time() - self._window_start)
|
|
||||||
|
|
||||||
snapshot = MetricsSnapshot(
|
|
||||||
timestamp=now,
|
|
||||||
window_seconds=window_seconds,
|
|
||||||
by_method={k: v.to_dict() for k, v in self._by_method.items()},
|
|
||||||
by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
|
|
||||||
by_status_class=dict(self._by_status_class),
|
|
||||||
error_codes=dict(self._error_codes),
|
|
||||||
totals=self._totals.to_dict(),
|
|
||||||
)
|
|
||||||
|
|
||||||
self._snapshots.append(snapshot)
|
|
||||||
self._prune_old_snapshots()
|
|
||||||
self._save_history()
|
|
||||||
|
|
||||||
self._by_method.clear()
|
|
||||||
self._by_endpoint.clear()
|
|
||||||
self._by_status_class.clear()
|
|
||||||
self._error_codes.clear()
|
|
||||||
self._totals = OperationStats()
|
|
||||||
self._window_start = time.time()
|
|
||||||
|
|
||||||
def record_request(
|
|
||||||
self,
|
|
||||||
method: str,
|
|
||||||
endpoint_type: str,
|
|
||||||
status_code: int,
|
|
||||||
latency_ms: float,
|
|
||||||
bytes_in: int = 0,
|
|
||||||
bytes_out: int = 0,
|
|
||||||
error_code: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
success = 200 <= status_code < 400
|
|
||||||
status_class = f"{status_code // 100}xx"
|
|
||||||
|
|
||||||
with self._lock:
|
|
||||||
if method not in self._by_method:
|
|
||||||
self._by_method[method] = OperationStats()
|
|
||||||
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
|
||||||
|
|
||||||
if endpoint_type not in self._by_endpoint:
|
|
||||||
self._by_endpoint[endpoint_type] = OperationStats()
|
|
||||||
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
|
||||||
|
|
||||||
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
|
||||||
|
|
||||||
if error_code:
|
|
||||||
self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
|
|
||||||
|
|
||||||
self._totals.record(latency_ms, success, bytes_in, bytes_out)
|
|
||||||
|
|
||||||
def get_current_stats(self) -> Dict[str, Any]:
|
|
||||||
with self._lock:
|
|
||||||
window_seconds = int(time.time() - self._window_start)
|
|
||||||
return {
|
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"window_seconds": window_seconds,
|
|
||||||
"by_method": {k: v.to_dict() for k, v in self._by_method.items()},
|
|
||||||
"by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
|
|
||||||
"by_status_class": dict(self._by_status_class),
|
|
||||||
"error_codes": dict(self._error_codes),
|
|
||||||
"totals": self._totals.to_dict(),
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
||||||
with self._lock:
|
|
||||||
snapshots = list(self._snapshots)
|
|
||||||
|
|
||||||
if hours:
|
|
||||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
|
||||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
|
||||||
|
|
||||||
return [s.to_dict() for s in snapshots]
|
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
|
||||||
self._shutdown.set()
|
|
||||||
self._take_snapshot()
|
|
||||||
self._snapshot_thread.join(timeout=5.0)
|
|
||||||
|
|
||||||
|
|
||||||
def classify_endpoint(path: str) -> str:
|
|
||||||
if not path or path == "/":
|
|
||||||
return "service"
|
|
||||||
|
|
||||||
path = path.rstrip("/")
|
|
||||||
|
|
||||||
if path.startswith("/ui"):
|
|
||||||
return "ui"
|
|
||||||
|
|
||||||
if path.startswith("/kms"):
|
|
||||||
return "kms"
|
|
||||||
|
|
||||||
if path.startswith("/myfsio"):
|
|
||||||
return "service"
|
|
||||||
|
|
||||||
parts = path.lstrip("/").split("/")
|
|
||||||
if len(parts) == 0:
|
|
||||||
return "service"
|
|
||||||
elif len(parts) == 1:
|
|
||||||
return "bucket"
|
|
||||||
else:
|
|
||||||
return "object"
|
|
||||||
@@ -21,20 +21,15 @@ from .storage import ObjectStorage, StorageError
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||||
|
REPLICATION_CONNECT_TIMEOUT = 5
|
||||||
|
REPLICATION_READ_TIMEOUT = 30
|
||||||
|
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024
|
||||||
|
|
||||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||||
REPLICATION_MODE_ALL = "all"
|
REPLICATION_MODE_ALL = "all"
|
||||||
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
|
|
||||||
|
|
||||||
|
|
||||||
def _create_s3_client(
|
def _create_s3_client(connection: RemoteConnection, *, health_check: bool = False) -> Any:
|
||||||
connection: RemoteConnection,
|
|
||||||
*,
|
|
||||||
health_check: bool = False,
|
|
||||||
connect_timeout: int = 5,
|
|
||||||
read_timeout: int = 30,
|
|
||||||
max_retries: int = 2,
|
|
||||||
) -> Any:
|
|
||||||
"""Create a boto3 S3 client for the given connection.
|
"""Create a boto3 S3 client for the given connection.
|
||||||
Args:
|
Args:
|
||||||
connection: Remote S3 connection configuration
|
connection: Remote S3 connection configuration
|
||||||
@@ -42,9 +37,9 @@ def _create_s3_client(
|
|||||||
"""
|
"""
|
||||||
config = Config(
|
config = Config(
|
||||||
user_agent_extra=REPLICATION_USER_AGENT,
|
user_agent_extra=REPLICATION_USER_AGENT,
|
||||||
connect_timeout=connect_timeout,
|
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||||
read_timeout=read_timeout,
|
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||||
retries={'max_attempts': 1 if health_check else max_retries},
|
retries={'max_attempts': 1 if health_check else 2},
|
||||||
signature_version='s3v4',
|
signature_version='s3v4',
|
||||||
s3={'addressing_style': 'path'},
|
s3={'addressing_style': 'path'},
|
||||||
request_checksum_calculation='when_required',
|
request_checksum_calculation='when_required',
|
||||||
@@ -135,9 +130,6 @@ class ReplicationRule:
|
|||||||
mode: str = REPLICATION_MODE_NEW_ONLY
|
mode: str = REPLICATION_MODE_NEW_ONLY
|
||||||
created_at: Optional[float] = None
|
created_at: Optional[float] = None
|
||||||
stats: ReplicationStats = field(default_factory=ReplicationStats)
|
stats: ReplicationStats = field(default_factory=ReplicationStats)
|
||||||
sync_deletions: bool = True
|
|
||||||
last_pull_at: Optional[float] = None
|
|
||||||
filter_prefix: Optional[str] = None
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
@@ -148,9 +140,6 @@ class ReplicationRule:
|
|||||||
"mode": self.mode,
|
"mode": self.mode,
|
||||||
"created_at": self.created_at,
|
"created_at": self.created_at,
|
||||||
"stats": self.stats.to_dict(),
|
"stats": self.stats.to_dict(),
|
||||||
"sync_deletions": self.sync_deletions,
|
|
||||||
"last_pull_at": self.last_pull_at,
|
|
||||||
"filter_prefix": self.filter_prefix,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -160,28 +149,22 @@ class ReplicationRule:
|
|||||||
data["mode"] = REPLICATION_MODE_NEW_ONLY
|
data["mode"] = REPLICATION_MODE_NEW_ONLY
|
||||||
if "created_at" not in data:
|
if "created_at" not in data:
|
||||||
data["created_at"] = None
|
data["created_at"] = None
|
||||||
if "sync_deletions" not in data:
|
|
||||||
data["sync_deletions"] = True
|
|
||||||
if "last_pull_at" not in data:
|
|
||||||
data["last_pull_at"] = None
|
|
||||||
if "filter_prefix" not in data:
|
|
||||||
data["filter_prefix"] = None
|
|
||||||
rule = cls(**data)
|
rule = cls(**data)
|
||||||
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
|
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
|
|
||||||
class ReplicationFailureStore:
|
class ReplicationFailureStore:
|
||||||
def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
|
MAX_FAILURES_PER_BUCKET = 50
|
||||||
|
|
||||||
|
def __init__(self, storage_root: Path) -> None:
|
||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self.max_failures_per_bucket = max_failures_per_bucket
|
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._cache: Dict[str, List[ReplicationFailure]] = {}
|
|
||||||
|
|
||||||
def _get_failures_path(self, bucket_name: str) -> Path:
|
def _get_failures_path(self, bucket_name: str) -> Path:
|
||||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
||||||
|
|
||||||
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
|
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return []
|
return []
|
||||||
@@ -193,28 +176,16 @@ class ReplicationFailureStore:
|
|||||||
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
|
data = {"failures": [f.to_dict() for f in failures[:self.MAX_FAILURES_PER_BUCKET]]}
|
||||||
try:
|
try:
|
||||||
with open(path, "w") as f:
|
with open(path, "w") as f:
|
||||||
json.dump(data, f, indent=2)
|
json.dump(data, f, indent=2)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
||||||
|
|
||||||
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
|
||||||
if bucket_name in self._cache:
|
|
||||||
return list(self._cache[bucket_name])
|
|
||||||
failures = self._load_from_disk(bucket_name)
|
|
||||||
self._cache[bucket_name] = failures
|
|
||||||
return list(failures)
|
|
||||||
|
|
||||||
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
|
||||||
trimmed = failures[:self.max_failures_per_bucket]
|
|
||||||
self._cache[bucket_name] = trimmed
|
|
||||||
self._save_to_disk(bucket_name, trimmed)
|
|
||||||
|
|
||||||
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
failures = self.load_failures(bucket_name)
|
failures = self.load_failures(bucket_name)
|
||||||
@@ -240,7 +211,6 @@ class ReplicationFailureStore:
|
|||||||
|
|
||||||
def clear_failures(self, bucket_name: str) -> None:
|
def clear_failures(self, bucket_name: str) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._cache.pop(bucket_name, None)
|
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
if path.exists():
|
if path.exists():
|
||||||
path.unlink()
|
path.unlink()
|
||||||
@@ -254,43 +224,18 @@ class ReplicationFailureStore:
|
|||||||
|
|
||||||
|
|
||||||
class ReplicationManager:
|
class ReplicationManager:
|
||||||
def __init__(
|
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path, storage_root: Path) -> None:
|
||||||
self,
|
|
||||||
storage: ObjectStorage,
|
|
||||||
connections: ConnectionStore,
|
|
||||||
rules_path: Path,
|
|
||||||
storage_root: Path,
|
|
||||||
connect_timeout: int = 5,
|
|
||||||
read_timeout: int = 30,
|
|
||||||
max_retries: int = 2,
|
|
||||||
streaming_threshold_bytes: int = 10 * 1024 * 1024,
|
|
||||||
max_failures_per_bucket: int = 50,
|
|
||||||
) -> None:
|
|
||||||
self.storage = storage
|
self.storage = storage
|
||||||
self.connections = connections
|
self.connections = connections
|
||||||
self.rules_path = rules_path
|
self.rules_path = rules_path
|
||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self.connect_timeout = connect_timeout
|
|
||||||
self.read_timeout = read_timeout
|
|
||||||
self.max_retries = max_retries
|
|
||||||
self.streaming_threshold_bytes = streaming_threshold_bytes
|
|
||||||
self._rules: Dict[str, ReplicationRule] = {}
|
self._rules: Dict[str, ReplicationRule] = {}
|
||||||
self._stats_lock = threading.Lock()
|
self._stats_lock = threading.Lock()
|
||||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||||
self._shutdown = False
|
self._shutdown = False
|
||||||
self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
|
self.failure_store = ReplicationFailureStore(storage_root)
|
||||||
self.reload_rules()
|
self.reload_rules()
|
||||||
|
|
||||||
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
|
|
||||||
"""Create an S3 client with the manager's configured timeouts."""
|
|
||||||
return _create_s3_client(
|
|
||||||
connection,
|
|
||||||
health_check=health_check,
|
|
||||||
connect_timeout=self.connect_timeout,
|
|
||||||
read_timeout=self.read_timeout,
|
|
||||||
max_retries=self.max_retries,
|
|
||||||
)
|
|
||||||
|
|
||||||
def shutdown(self, wait: bool = True) -> None:
|
def shutdown(self, wait: bool = True) -> None:
|
||||||
"""Shutdown the replication executor gracefully.
|
"""Shutdown the replication executor gracefully.
|
||||||
|
|
||||||
@@ -326,7 +271,7 @@ class ReplicationManager:
|
|||||||
Uses short timeouts to prevent blocking.
|
Uses short timeouts to prevent blocking.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
s3 = self._create_client(connection, health_check=True)
|
s3 = _create_s3_client(connection, health_check=True)
|
||||||
s3.list_buckets()
|
s3.list_buckets()
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -336,9 +281,6 @@ class ReplicationManager:
|
|||||||
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
|
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
|
||||||
return self._rules.get(bucket_name)
|
return self._rules.get(bucket_name)
|
||||||
|
|
||||||
def list_rules(self) -> List[ReplicationRule]:
|
|
||||||
return list(self._rules.values())
|
|
||||||
|
|
||||||
def set_rule(self, rule: ReplicationRule) -> None:
|
def set_rule(self, rule: ReplicationRule) -> None:
|
||||||
old_rule = self._rules.get(rule.bucket_name)
|
old_rule = self._rules.get(rule.bucket_name)
|
||||||
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
|
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
|
||||||
@@ -378,7 +320,7 @@ class ReplicationManager:
|
|||||||
source_objects = self.storage.list_objects_all(bucket_name)
|
source_objects = self.storage.list_objects_all(bucket_name)
|
||||||
source_keys = {obj.key: obj.size for obj in source_objects}
|
source_keys = {obj.key: obj.size for obj in source_objects}
|
||||||
|
|
||||||
s3 = self._create_client(connection)
|
s3 = _create_s3_client(connection)
|
||||||
|
|
||||||
dest_keys = set()
|
dest_keys = set()
|
||||||
bytes_synced = 0
|
bytes_synced = 0
|
||||||
@@ -444,7 +386,7 @@ class ReplicationManager:
|
|||||||
raise ValueError(f"Connection {connection_id} not found")
|
raise ValueError(f"Connection {connection_id} not found")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
s3 = self._create_client(connection)
|
s3 = _create_s3_client(connection)
|
||||||
s3.create_bucket(Bucket=bucket_name)
|
s3.create_bucket(Bucket=bucket_name)
|
||||||
except ClientError as e:
|
except ClientError as e:
|
||||||
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
|
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
|
||||||
@@ -487,7 +429,7 @@ class ReplicationManager:
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
s3 = self._create_client(conn)
|
s3 = _create_s3_client(conn)
|
||||||
|
|
||||||
if action == "delete":
|
if action == "delete":
|
||||||
try:
|
try:
|
||||||
@@ -530,7 +472,7 @@ class ReplicationManager:
|
|||||||
if content_type:
|
if content_type:
|
||||||
extra_args["ContentType"] = content_type
|
extra_args["ContentType"] = content_type
|
||||||
|
|
||||||
if file_size >= self.streaming_threshold_bytes:
|
if file_size >= STREAMING_THRESHOLD_BYTES:
|
||||||
s3.upload_file(
|
s3.upload_file(
|
||||||
str(path),
|
str(path),
|
||||||
rule.target_bucket,
|
rule.target_bucket,
|
||||||
|
|||||||
1014
app/s3_api.py
1014
app/s3_api.py
File diff suppressed because it is too large
Load Diff
284
app/s3_client.py
284
app/s3_client.py
@@ -1,284 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from typing import Any, Generator, Optional
|
|
||||||
|
|
||||||
import boto3
|
|
||||||
from botocore.config import Config
|
|
||||||
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
|
|
||||||
from flask import current_app, session
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
|
|
||||||
|
|
||||||
_BOTO_ERROR_MAP = {
|
|
||||||
"NoSuchBucket": 404,
|
|
||||||
"NoSuchKey": 404,
|
|
||||||
"NoSuchUpload": 404,
|
|
||||||
"BucketAlreadyExists": 409,
|
|
||||||
"BucketAlreadyOwnedByYou": 409,
|
|
||||||
"BucketNotEmpty": 409,
|
|
||||||
"AccessDenied": 403,
|
|
||||||
"InvalidAccessKeyId": 403,
|
|
||||||
"SignatureDoesNotMatch": 403,
|
|
||||||
"InvalidBucketName": 400,
|
|
||||||
"InvalidArgument": 400,
|
|
||||||
"MalformedXML": 400,
|
|
||||||
"EntityTooLarge": 400,
|
|
||||||
"QuotaExceeded": 403,
|
|
||||||
}
|
|
||||||
|
|
||||||
_UPLOAD_REGISTRY_MAX_AGE = 86400
|
|
||||||
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
|
|
||||||
|
|
||||||
|
|
||||||
class UploadRegistry:
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._entries: dict[str, tuple[str, str, float]] = {}
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._last_cleanup = time.monotonic()
|
|
||||||
|
|
||||||
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
|
|
||||||
with self._lock:
|
|
||||||
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
|
|
||||||
self._maybe_cleanup()
|
|
||||||
|
|
||||||
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
|
|
||||||
with self._lock:
|
|
||||||
entry = self._entries.get(upload_id)
|
|
||||||
if entry is None:
|
|
||||||
return None
|
|
||||||
stored_bucket, key, created_at = entry
|
|
||||||
if stored_bucket != bucket_name:
|
|
||||||
return None
|
|
||||||
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
|
|
||||||
del self._entries[upload_id]
|
|
||||||
return None
|
|
||||||
return key
|
|
||||||
|
|
||||||
def remove(self, upload_id: str) -> None:
|
|
||||||
with self._lock:
|
|
||||||
self._entries.pop(upload_id, None)
|
|
||||||
|
|
||||||
def _maybe_cleanup(self) -> None:
|
|
||||||
now = time.monotonic()
|
|
||||||
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
|
|
||||||
return
|
|
||||||
self._last_cleanup = now
|
|
||||||
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
|
|
||||||
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
|
|
||||||
for uid in stale:
|
|
||||||
del self._entries[uid]
|
|
||||||
|
|
||||||
|
|
||||||
class S3ProxyClient:
|
|
||||||
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
|
|
||||||
if not api_base_url:
|
|
||||||
raise ValueError("api_base_url is required for S3ProxyClient")
|
|
||||||
self._api_base_url = api_base_url.rstrip("/")
|
|
||||||
self._region = region
|
|
||||||
self.upload_registry = UploadRegistry()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def api_base_url(self) -> str:
|
|
||||||
return self._api_base_url
|
|
||||||
|
|
||||||
def get_client(self, access_key: str, secret_key: str) -> Any:
|
|
||||||
if not access_key or not secret_key:
|
|
||||||
raise ValueError("Both access_key and secret_key are required")
|
|
||||||
config = Config(
|
|
||||||
user_agent_extra=UI_PROXY_USER_AGENT,
|
|
||||||
connect_timeout=5,
|
|
||||||
read_timeout=30,
|
|
||||||
retries={"max_attempts": 0},
|
|
||||||
signature_version="s3v4",
|
|
||||||
s3={"addressing_style": "path"},
|
|
||||||
request_checksum_calculation="when_required",
|
|
||||||
response_checksum_validation="when_required",
|
|
||||||
)
|
|
||||||
return boto3.client(
|
|
||||||
"s3",
|
|
||||||
endpoint_url=self._api_base_url,
|
|
||||||
aws_access_key_id=access_key,
|
|
||||||
aws_secret_access_key=secret_key,
|
|
||||||
region_name=self._region,
|
|
||||||
config=config,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_proxy() -> S3ProxyClient:
|
|
||||||
proxy = current_app.extensions.get("s3_proxy")
|
|
||||||
if proxy is None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
|
|
||||||
)
|
|
||||||
return proxy
|
|
||||||
|
|
||||||
|
|
||||||
def _get_session_creds() -> tuple[str, str]:
|
|
||||||
secret_store = current_app.extensions["secret_store"]
|
|
||||||
secret_store.purge_expired()
|
|
||||||
token = session.get("cred_token")
|
|
||||||
if not token:
|
|
||||||
raise PermissionError("Not authenticated")
|
|
||||||
creds = secret_store.peek(token)
|
|
||||||
if not creds:
|
|
||||||
raise PermissionError("Session expired")
|
|
||||||
access_key = creds.get("access_key", "")
|
|
||||||
secret_key = creds.get("secret_key", "")
|
|
||||||
if not access_key or not secret_key:
|
|
||||||
raise PermissionError("Invalid session credentials")
|
|
||||||
return access_key, secret_key
|
|
||||||
|
|
||||||
|
|
||||||
def get_session_s3_client() -> Any:
|
|
||||||
proxy = _get_proxy()
|
|
||||||
access_key, secret_key = _get_session_creds()
|
|
||||||
return proxy.get_client(access_key, secret_key)
|
|
||||||
|
|
||||||
|
|
||||||
def get_upload_registry() -> UploadRegistry:
|
|
||||||
return _get_proxy().upload_registry
|
|
||||||
|
|
||||||
|
|
||||||
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
|
|
||||||
error_info = exc.response.get("Error", {})
|
|
||||||
code = error_info.get("Code", "InternalError")
|
|
||||||
message = error_info.get("Message") or "S3 operation failed"
|
|
||||||
http_status = _BOTO_ERROR_MAP.get(code)
|
|
||||||
if http_status is None:
|
|
||||||
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
|
|
||||||
return {"error": message}, http_status
|
|
||||||
|
|
||||||
|
|
||||||
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
|
|
||||||
logger.error("S3 API connection failed: %s", exc)
|
|
||||||
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
|
|
||||||
|
|
||||||
|
|
||||||
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
|
|
||||||
from .ui import _format_datetime_display
|
|
||||||
return _format_datetime_display(dt, display_tz)
|
|
||||||
|
|
||||||
|
|
||||||
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
|
|
||||||
from .ui import _format_datetime_iso
|
|
||||||
return _format_datetime_iso(dt, display_tz)
|
|
||||||
|
|
||||||
|
|
||||||
def build_url_templates(bucket_name: str) -> dict[str, str]:
|
|
||||||
from flask import url_for
|
|
||||||
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
restore_t = url_for(
|
|
||||||
"ui.restore_object_version",
|
|
||||||
bucket_name=bucket_name,
|
|
||||||
object_key="KEY_PLACEHOLDER",
|
|
||||||
version_id="VERSION_ID_PLACEHOLDER",
|
|
||||||
)
|
|
||||||
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
|
||||||
return {
|
|
||||||
"preview": preview_t,
|
|
||||||
"download": preview_t + "?download=1",
|
|
||||||
"presign": presign_t,
|
|
||||||
"delete": delete_t,
|
|
||||||
"versions": versions_t,
|
|
||||||
"restore": restore_t,
|
|
||||||
"tags": tags_t,
|
|
||||||
"copy": copy_t,
|
|
||||||
"move": move_t,
|
|
||||||
"metadata": metadata_t,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def translate_list_objects(
|
|
||||||
boto3_response: dict[str, Any],
|
|
||||||
url_templates: dict[str, str],
|
|
||||||
display_tz: str = "UTC",
|
|
||||||
versioning_enabled: bool = False,
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
objects_data = []
|
|
||||||
for obj in boto3_response.get("Contents", []):
|
|
||||||
last_mod = obj["LastModified"]
|
|
||||||
objects_data.append({
|
|
||||||
"key": obj["Key"],
|
|
||||||
"size": obj["Size"],
|
|
||||||
"last_modified": last_mod.isoformat(),
|
|
||||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
|
||||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
|
||||||
"etag": obj.get("ETag", "").strip('"'),
|
|
||||||
})
|
|
||||||
return {
|
|
||||||
"objects": objects_data,
|
|
||||||
"is_truncated": boto3_response.get("IsTruncated", False),
|
|
||||||
"next_continuation_token": boto3_response.get("NextContinuationToken"),
|
|
||||||
"total_count": boto3_response.get("KeyCount", len(objects_data)),
|
|
||||||
"versioning_enabled": versioning_enabled,
|
|
||||||
"url_templates": url_templates,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
|
|
||||||
try:
|
|
||||||
resp = client.get_bucket_versioning(Bucket=bucket_name)
|
|
||||||
return resp.get("Status") == "Enabled"
|
|
||||||
except ClientError as exc:
|
|
||||||
code = exc.response.get("Error", {}).get("Code", "")
|
|
||||||
if code != "NoSuchBucket":
|
|
||||||
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def stream_objects_ndjson(
|
|
||||||
client: Any,
|
|
||||||
bucket_name: str,
|
|
||||||
prefix: Optional[str],
|
|
||||||
url_templates: dict[str, str],
|
|
||||||
display_tz: str = "UTC",
|
|
||||||
versioning_enabled: bool = False,
|
|
||||||
) -> Generator[str, None, None]:
|
|
||||||
meta_line = json.dumps({
|
|
||||||
"type": "meta",
|
|
||||||
"versioning_enabled": versioning_enabled,
|
|
||||||
"url_templates": url_templates,
|
|
||||||
}) + "\n"
|
|
||||||
yield meta_line
|
|
||||||
|
|
||||||
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
|
||||||
|
|
||||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
|
||||||
if prefix:
|
|
||||||
kwargs["Prefix"] = prefix
|
|
||||||
|
|
||||||
try:
|
|
||||||
paginator = client.get_paginator("list_objects_v2")
|
|
||||||
for page in paginator.paginate(**kwargs):
|
|
||||||
for obj in page.get("Contents", []):
|
|
||||||
last_mod = obj["LastModified"]
|
|
||||||
yield json.dumps({
|
|
||||||
"type": "object",
|
|
||||||
"key": obj["Key"],
|
|
||||||
"size": obj["Size"],
|
|
||||||
"last_modified": last_mod.isoformat(),
|
|
||||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
|
||||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
|
||||||
"etag": obj.get("ETag", "").strip('"'),
|
|
||||||
}) + "\n"
|
|
||||||
except ClientError as exc:
|
|
||||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
|
||||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
|
||||||
return
|
|
||||||
except (EndpointConnectionError, ConnectionClosedError):
|
|
||||||
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
|
|
||||||
return
|
|
||||||
|
|
||||||
yield json.dumps({"type": "done"}) + "\n"
|
|
||||||
@@ -18,18 +18,6 @@ class EphemeralSecretStore:
|
|||||||
self._store[token] = (payload, expires_at)
|
self._store[token] = (payload, expires_at)
|
||||||
return token
|
return token
|
||||||
|
|
||||||
def peek(self, token: str | None) -> Any | None:
|
|
||||||
if not token:
|
|
||||||
return None
|
|
||||||
entry = self._store.get(token)
|
|
||||||
if not entry:
|
|
||||||
return None
|
|
||||||
payload, expires_at = entry
|
|
||||||
if expires_at < time.time():
|
|
||||||
self._store.pop(token, None)
|
|
||||||
return None
|
|
||||||
return payload
|
|
||||||
|
|
||||||
def pop(self, token: str | None) -> Any | None:
|
def pop(self, token: str | None) -> Any | None:
|
||||||
if not token:
|
if not token:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -1,171 +0,0 @@
|
|||||||
"""S3 SelectObjectContent SQL query execution using DuckDB."""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, Generator, Optional
|
|
||||||
|
|
||||||
try:
|
|
||||||
import duckdb
|
|
||||||
DUCKDB_AVAILABLE = True
|
|
||||||
except ImportError:
|
|
||||||
DUCKDB_AVAILABLE = False
|
|
||||||
|
|
||||||
|
|
||||||
class SelectError(Exception):
|
|
||||||
"""Error during SELECT query execution."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def execute_select_query(
|
|
||||||
file_path: Path,
|
|
||||||
expression: str,
|
|
||||||
input_format: str,
|
|
||||||
input_config: Dict[str, Any],
|
|
||||||
output_format: str,
|
|
||||||
output_config: Dict[str, Any],
|
|
||||||
chunk_size: int = 65536,
|
|
||||||
) -> Generator[bytes, None, None]:
|
|
||||||
"""Execute SQL query on object content."""
|
|
||||||
if not DUCKDB_AVAILABLE:
|
|
||||||
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
|
|
||||||
|
|
||||||
conn = duckdb.connect(":memory:")
|
|
||||||
|
|
||||||
try:
|
|
||||||
if input_format == "CSV":
|
|
||||||
_load_csv(conn, file_path, input_config)
|
|
||||||
elif input_format == "JSON":
|
|
||||||
_load_json(conn, file_path, input_config)
|
|
||||||
elif input_format == "Parquet":
|
|
||||||
_load_parquet(conn, file_path)
|
|
||||||
else:
|
|
||||||
raise SelectError(f"Unsupported input format: {input_format}")
|
|
||||||
|
|
||||||
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = conn.execute(normalized_expression)
|
|
||||||
except duckdb.Error as exc:
|
|
||||||
raise SelectError(f"SQL execution error: {exc}")
|
|
||||||
|
|
||||||
if output_format == "CSV":
|
|
||||||
yield from _output_csv(result, output_config, chunk_size)
|
|
||||||
elif output_format == "JSON":
|
|
||||||
yield from _output_json(result, output_config, chunk_size)
|
|
||||||
else:
|
|
||||||
raise SelectError(f"Unsupported output format: {output_format}")
|
|
||||||
|
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
|
||||||
"""Load CSV file into DuckDB."""
|
|
||||||
file_header_info = config.get("file_header_info", "NONE")
|
|
||||||
delimiter = config.get("field_delimiter", ",")
|
|
||||||
quote = config.get("quote_character", '"')
|
|
||||||
|
|
||||||
header = file_header_info in ("USE", "IGNORE")
|
|
||||||
path_str = str(file_path).replace("\\", "/")
|
|
||||||
|
|
||||||
conn.execute(f"""
|
|
||||||
CREATE TABLE data AS
|
|
||||||
SELECT * FROM read_csv('{path_str}',
|
|
||||||
header={header},
|
|
||||||
delim='{delimiter}',
|
|
||||||
quote='{quote}'
|
|
||||||
)
|
|
||||||
""")
|
|
||||||
|
|
||||||
|
|
||||||
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
|
||||||
"""Load JSON file into DuckDB."""
|
|
||||||
json_type = config.get("type", "DOCUMENT")
|
|
||||||
path_str = str(file_path).replace("\\", "/")
|
|
||||||
|
|
||||||
if json_type == "LINES":
|
|
||||||
conn.execute(f"""
|
|
||||||
CREATE TABLE data AS
|
|
||||||
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
|
|
||||||
""")
|
|
||||||
else:
|
|
||||||
conn.execute(f"""
|
|
||||||
CREATE TABLE data AS
|
|
||||||
SELECT * FROM read_json_auto('{path_str}', format='array')
|
|
||||||
""")
|
|
||||||
|
|
||||||
|
|
||||||
def _load_parquet(conn, file_path: Path) -> None:
|
|
||||||
"""Load Parquet file into DuckDB."""
|
|
||||||
path_str = str(file_path).replace("\\", "/")
|
|
||||||
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
|
|
||||||
|
|
||||||
|
|
||||||
def _output_csv(
|
|
||||||
result,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
chunk_size: int,
|
|
||||||
) -> Generator[bytes, None, None]:
|
|
||||||
"""Output query results as CSV."""
|
|
||||||
delimiter = config.get("field_delimiter", ",")
|
|
||||||
record_delimiter = config.get("record_delimiter", "\n")
|
|
||||||
quote = config.get("quote_character", '"')
|
|
||||||
|
|
||||||
buffer = ""
|
|
||||||
|
|
||||||
while True:
|
|
||||||
rows = result.fetchmany(1000)
|
|
||||||
if not rows:
|
|
||||||
break
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
fields = []
|
|
||||||
for value in row:
|
|
||||||
if value is None:
|
|
||||||
fields.append("")
|
|
||||||
elif isinstance(value, str):
|
|
||||||
if delimiter in value or quote in value or record_delimiter in value:
|
|
||||||
escaped = value.replace(quote, quote + quote)
|
|
||||||
fields.append(f'{quote}{escaped}{quote}')
|
|
||||||
else:
|
|
||||||
fields.append(value)
|
|
||||||
else:
|
|
||||||
fields.append(str(value))
|
|
||||||
|
|
||||||
buffer += delimiter.join(fields) + record_delimiter
|
|
||||||
|
|
||||||
while len(buffer) >= chunk_size:
|
|
||||||
yield buffer[:chunk_size].encode("utf-8")
|
|
||||||
buffer = buffer[chunk_size:]
|
|
||||||
|
|
||||||
if buffer:
|
|
||||||
yield buffer.encode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def _output_json(
|
|
||||||
result,
|
|
||||||
config: Dict[str, Any],
|
|
||||||
chunk_size: int,
|
|
||||||
) -> Generator[bytes, None, None]:
|
|
||||||
"""Output query results as JSON Lines."""
|
|
||||||
record_delimiter = config.get("record_delimiter", "\n")
|
|
||||||
columns = [desc[0] for desc in result.description]
|
|
||||||
|
|
||||||
buffer = ""
|
|
||||||
|
|
||||||
while True:
|
|
||||||
rows = result.fetchmany(1000)
|
|
||||||
if not rows:
|
|
||||||
break
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
record = dict(zip(columns, row))
|
|
||||||
buffer += json.dumps(record, default=str) + record_delimiter
|
|
||||||
|
|
||||||
while len(buffer) >= chunk_size:
|
|
||||||
yield buffer[:chunk_size].encode("utf-8")
|
|
||||||
buffer = buffer[chunk_size:]
|
|
||||||
|
|
||||||
if buffer:
|
|
||||||
yield buffer.encode("utf-8")
|
|
||||||
@@ -1,177 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SiteInfo:
|
|
||||||
site_id: str
|
|
||||||
endpoint: str
|
|
||||||
region: str = "us-east-1"
|
|
||||||
priority: int = 100
|
|
||||||
display_name: str = ""
|
|
||||||
created_at: Optional[float] = None
|
|
||||||
updated_at: Optional[float] = None
|
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
|
||||||
if not self.display_name:
|
|
||||||
self.display_name = self.site_id
|
|
||||||
if self.created_at is None:
|
|
||||||
self.created_at = time.time()
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"site_id": self.site_id,
|
|
||||||
"endpoint": self.endpoint,
|
|
||||||
"region": self.region,
|
|
||||||
"priority": self.priority,
|
|
||||||
"display_name": self.display_name,
|
|
||||||
"created_at": self.created_at,
|
|
||||||
"updated_at": self.updated_at,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
|
|
||||||
return cls(
|
|
||||||
site_id=data["site_id"],
|
|
||||||
endpoint=data.get("endpoint", ""),
|
|
||||||
region=data.get("region", "us-east-1"),
|
|
||||||
priority=data.get("priority", 100),
|
|
||||||
display_name=data.get("display_name", ""),
|
|
||||||
created_at=data.get("created_at"),
|
|
||||||
updated_at=data.get("updated_at"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class PeerSite:
|
|
||||||
site_id: str
|
|
||||||
endpoint: str
|
|
||||||
region: str = "us-east-1"
|
|
||||||
priority: int = 100
|
|
||||||
display_name: str = ""
|
|
||||||
created_at: Optional[float] = None
|
|
||||||
updated_at: Optional[float] = None
|
|
||||||
connection_id: Optional[str] = None
|
|
||||||
is_healthy: Optional[bool] = None
|
|
||||||
last_health_check: Optional[float] = None
|
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
|
||||||
if not self.display_name:
|
|
||||||
self.display_name = self.site_id
|
|
||||||
if self.created_at is None:
|
|
||||||
self.created_at = time.time()
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"site_id": self.site_id,
|
|
||||||
"endpoint": self.endpoint,
|
|
||||||
"region": self.region,
|
|
||||||
"priority": self.priority,
|
|
||||||
"display_name": self.display_name,
|
|
||||||
"created_at": self.created_at,
|
|
||||||
"updated_at": self.updated_at,
|
|
||||||
"connection_id": self.connection_id,
|
|
||||||
"is_healthy": self.is_healthy,
|
|
||||||
"last_health_check": self.last_health_check,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
|
|
||||||
return cls(
|
|
||||||
site_id=data["site_id"],
|
|
||||||
endpoint=data.get("endpoint", ""),
|
|
||||||
region=data.get("region", "us-east-1"),
|
|
||||||
priority=data.get("priority", 100),
|
|
||||||
display_name=data.get("display_name", ""),
|
|
||||||
created_at=data.get("created_at"),
|
|
||||||
updated_at=data.get("updated_at"),
|
|
||||||
connection_id=data.get("connection_id"),
|
|
||||||
is_healthy=data.get("is_healthy"),
|
|
||||||
last_health_check=data.get("last_health_check"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SiteRegistry:
|
|
||||||
def __init__(self, config_path: Path) -> None:
|
|
||||||
self.config_path = config_path
|
|
||||||
self._local_site: Optional[SiteInfo] = None
|
|
||||||
self._peers: Dict[str, PeerSite] = {}
|
|
||||||
self.reload()
|
|
||||||
|
|
||||||
def reload(self) -> None:
|
|
||||||
if not self.config_path.exists():
|
|
||||||
self._local_site = None
|
|
||||||
self._peers = {}
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
if data.get("local"):
|
|
||||||
self._local_site = SiteInfo.from_dict(data["local"])
|
|
||||||
else:
|
|
||||||
self._local_site = None
|
|
||||||
|
|
||||||
self._peers = {}
|
|
||||||
for peer_data in data.get("peers", []):
|
|
||||||
peer = PeerSite.from_dict(peer_data)
|
|
||||||
self._peers[peer.site_id] = peer
|
|
||||||
|
|
||||||
except (OSError, json.JSONDecodeError, KeyError):
|
|
||||||
self._local_site = None
|
|
||||||
self._peers = {}
|
|
||||||
|
|
||||||
def save(self) -> None:
|
|
||||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
data = {
|
|
||||||
"local": self._local_site.to_dict() if self._local_site else None,
|
|
||||||
"peers": [peer.to_dict() for peer in self._peers.values()],
|
|
||||||
}
|
|
||||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(data, f, indent=2)
|
|
||||||
|
|
||||||
def get_local_site(self) -> Optional[SiteInfo]:
|
|
||||||
return self._local_site
|
|
||||||
|
|
||||||
def set_local_site(self, site: SiteInfo) -> None:
|
|
||||||
site.updated_at = time.time()
|
|
||||||
self._local_site = site
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def list_peers(self) -> List[PeerSite]:
|
|
||||||
return list(self._peers.values())
|
|
||||||
|
|
||||||
def get_peer(self, site_id: str) -> Optional[PeerSite]:
|
|
||||||
return self._peers.get(site_id)
|
|
||||||
|
|
||||||
def add_peer(self, peer: PeerSite) -> None:
|
|
||||||
peer.created_at = peer.created_at or time.time()
|
|
||||||
self._peers[peer.site_id] = peer
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def update_peer(self, peer: PeerSite) -> None:
|
|
||||||
if peer.site_id not in self._peers:
|
|
||||||
raise ValueError(f"Peer {peer.site_id} not found")
|
|
||||||
peer.updated_at = time.time()
|
|
||||||
self._peers[peer.site_id] = peer
|
|
||||||
self.save()
|
|
||||||
|
|
||||||
def delete_peer(self, site_id: str) -> bool:
|
|
||||||
if site_id in self._peers:
|
|
||||||
del self._peers[site_id]
|
|
||||||
self.save()
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def update_health(self, site_id: str, is_healthy: bool) -> None:
|
|
||||||
peer = self._peers.get(site_id)
|
|
||||||
if peer:
|
|
||||||
peer.is_healthy = is_healthy
|
|
||||||
peer.last_health_check = time.time()
|
|
||||||
self.save()
|
|
||||||
416
app/site_sync.py
416
app/site_sync.py
@@ -1,416 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import tempfile
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
||||||
|
|
||||||
import boto3
|
|
||||||
from botocore.config import Config
|
|
||||||
from botocore.exceptions import ClientError
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from .connections import ConnectionStore, RemoteConnection
|
|
||||||
from .replication import ReplicationManager, ReplicationRule
|
|
||||||
from .storage import ObjectStorage
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SyncedObjectInfo:
|
|
||||||
last_synced_at: float
|
|
||||||
remote_etag: str
|
|
||||||
source: str
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"last_synced_at": self.last_synced_at,
|
|
||||||
"remote_etag": self.remote_etag,
|
|
||||||
"source": self.source,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
|
|
||||||
return cls(
|
|
||||||
last_synced_at=data["last_synced_at"],
|
|
||||||
remote_etag=data["remote_etag"],
|
|
||||||
source=data["source"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SyncState:
|
|
||||||
synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
|
|
||||||
last_full_sync: Optional[float] = None
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
|
|
||||||
"last_full_sync": self.last_full_sync,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
|
|
||||||
synced_objects = {}
|
|
||||||
for k, v in data.get("synced_objects", {}).items():
|
|
||||||
synced_objects[k] = SyncedObjectInfo.from_dict(v)
|
|
||||||
return cls(
|
|
||||||
synced_objects=synced_objects,
|
|
||||||
last_full_sync=data.get("last_full_sync"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SiteSyncStats:
|
|
||||||
last_sync_at: Optional[float] = None
|
|
||||||
objects_pulled: int = 0
|
|
||||||
objects_skipped: int = 0
|
|
||||||
conflicts_resolved: int = 0
|
|
||||||
deletions_applied: int = 0
|
|
||||||
errors: int = 0
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"last_sync_at": self.last_sync_at,
|
|
||||||
"objects_pulled": self.objects_pulled,
|
|
||||||
"objects_skipped": self.objects_skipped,
|
|
||||||
"conflicts_resolved": self.conflicts_resolved,
|
|
||||||
"deletions_applied": self.deletions_applied,
|
|
||||||
"errors": self.errors,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RemoteObjectMeta:
|
|
||||||
key: str
|
|
||||||
size: int
|
|
||||||
last_modified: datetime
|
|
||||||
etag: str
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
|
|
||||||
return cls(
|
|
||||||
key=obj["Key"],
|
|
||||||
size=obj.get("Size", 0),
|
|
||||||
last_modified=obj["LastModified"],
|
|
||||||
etag=obj.get("ETag", "").strip('"'),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _create_sync_client(
|
|
||||||
connection: "RemoteConnection",
|
|
||||||
*,
|
|
||||||
connect_timeout: int = 10,
|
|
||||||
read_timeout: int = 120,
|
|
||||||
max_retries: int = 2,
|
|
||||||
) -> Any:
|
|
||||||
config = Config(
|
|
||||||
user_agent_extra=SITE_SYNC_USER_AGENT,
|
|
||||||
connect_timeout=connect_timeout,
|
|
||||||
read_timeout=read_timeout,
|
|
||||||
retries={"max_attempts": max_retries},
|
|
||||||
signature_version="s3v4",
|
|
||||||
s3={"addressing_style": "path"},
|
|
||||||
request_checksum_calculation="when_required",
|
|
||||||
response_checksum_validation="when_required",
|
|
||||||
)
|
|
||||||
return boto3.client(
|
|
||||||
"s3",
|
|
||||||
endpoint_url=connection.endpoint_url,
|
|
||||||
aws_access_key_id=connection.access_key,
|
|
||||||
aws_secret_access_key=connection.secret_key,
|
|
||||||
region_name=connection.region or "us-east-1",
|
|
||||||
config=config,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SiteSyncWorker:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
storage: "ObjectStorage",
|
|
||||||
connections: "ConnectionStore",
|
|
||||||
replication_manager: "ReplicationManager",
|
|
||||||
storage_root: Path,
|
|
||||||
interval_seconds: int = 60,
|
|
||||||
batch_size: int = 100,
|
|
||||||
connect_timeout: int = 10,
|
|
||||||
read_timeout: int = 120,
|
|
||||||
max_retries: int = 2,
|
|
||||||
clock_skew_tolerance_seconds: float = 1.0,
|
|
||||||
):
|
|
||||||
self.storage = storage
|
|
||||||
self.connections = connections
|
|
||||||
self.replication_manager = replication_manager
|
|
||||||
self.storage_root = storage_root
|
|
||||||
self.interval_seconds = interval_seconds
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.connect_timeout = connect_timeout
|
|
||||||
self.read_timeout = read_timeout
|
|
||||||
self.max_retries = max_retries
|
|
||||||
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._shutdown = threading.Event()
|
|
||||||
self._sync_thread: Optional[threading.Thread] = None
|
|
||||||
self._bucket_stats: Dict[str, SiteSyncStats] = {}
|
|
||||||
|
|
||||||
def _create_client(self, connection: "RemoteConnection") -> Any:
|
|
||||||
"""Create an S3 client with the worker's configured timeouts."""
|
|
||||||
return _create_sync_client(
|
|
||||||
connection,
|
|
||||||
connect_timeout=self.connect_timeout,
|
|
||||||
read_timeout=self.read_timeout,
|
|
||||||
max_retries=self.max_retries,
|
|
||||||
)
|
|
||||||
|
|
||||||
def start(self) -> None:
|
|
||||||
if self._sync_thread is not None and self._sync_thread.is_alive():
|
|
||||||
return
|
|
||||||
self._shutdown.clear()
|
|
||||||
self._sync_thread = threading.Thread(
|
|
||||||
target=self._sync_loop, name="site-sync-worker", daemon=True
|
|
||||||
)
|
|
||||||
self._sync_thread.start()
|
|
||||||
logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
|
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
|
||||||
self._shutdown.set()
|
|
||||||
if self._sync_thread is not None:
|
|
||||||
self._sync_thread.join(timeout=10.0)
|
|
||||||
logger.info("Site sync worker shut down")
|
|
||||||
|
|
||||||
def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
|
||||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
|
||||||
rule = self.replication_manager.get_rule(bucket_name)
|
|
||||||
if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
|
||||||
return None
|
|
||||||
return self._sync_bucket(rule)
|
|
||||||
|
|
||||||
def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
|
||||||
with self._lock:
|
|
||||||
return self._bucket_stats.get(bucket_name)
|
|
||||||
|
|
||||||
def _sync_loop(self) -> None:
|
|
||||||
while not self._shutdown.is_set():
|
|
||||||
self._shutdown.wait(timeout=self.interval_seconds)
|
|
||||||
if self._shutdown.is_set():
|
|
||||||
break
|
|
||||||
self._run_sync_cycle()
|
|
||||||
|
|
||||||
def _run_sync_cycle(self) -> None:
|
|
||||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
|
||||||
for bucket_name, rule in list(self.replication_manager._rules.items()):
|
|
||||||
if self._shutdown.is_set():
|
|
||||||
break
|
|
||||||
if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
stats = self._sync_bucket(rule)
|
|
||||||
with self._lock:
|
|
||||||
self._bucket_stats[bucket_name] = stats
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
|
|
||||||
|
|
||||||
def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
|
|
||||||
stats = SiteSyncStats()
|
|
||||||
connection = self.connections.get(rule.target_connection_id)
|
|
||||||
if not connection:
|
|
||||||
logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
|
|
||||||
stats.errors += 1
|
|
||||||
return stats
|
|
||||||
|
|
||||||
try:
|
|
||||||
local_objects = self._list_local_objects(rule.bucket_name)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
|
|
||||||
stats.errors += 1
|
|
||||||
return stats
|
|
||||||
|
|
||||||
try:
|
|
||||||
remote_objects = self._list_remote_objects(rule, connection)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
|
|
||||||
stats.errors += 1
|
|
||||||
return stats
|
|
||||||
|
|
||||||
sync_state = self._load_sync_state(rule.bucket_name)
|
|
||||||
local_keys = set(local_objects.keys())
|
|
||||||
remote_keys = set(remote_objects.keys())
|
|
||||||
|
|
||||||
to_pull = []
|
|
||||||
for key in remote_keys:
|
|
||||||
remote_meta = remote_objects[key]
|
|
||||||
local_meta = local_objects.get(key)
|
|
||||||
if local_meta is None:
|
|
||||||
to_pull.append(key)
|
|
||||||
else:
|
|
||||||
resolution = self._resolve_conflict(local_meta, remote_meta)
|
|
||||||
if resolution == "pull":
|
|
||||||
to_pull.append(key)
|
|
||||||
stats.conflicts_resolved += 1
|
|
||||||
else:
|
|
||||||
stats.objects_skipped += 1
|
|
||||||
|
|
||||||
pulled_count = 0
|
|
||||||
for key in to_pull:
|
|
||||||
if self._shutdown.is_set():
|
|
||||||
break
|
|
||||||
if pulled_count >= self.batch_size:
|
|
||||||
break
|
|
||||||
remote_meta = remote_objects[key]
|
|
||||||
success = self._pull_object(rule, key, connection, remote_meta)
|
|
||||||
if success:
|
|
||||||
stats.objects_pulled += 1
|
|
||||||
pulled_count += 1
|
|
||||||
sync_state.synced_objects[key] = SyncedObjectInfo(
|
|
||||||
last_synced_at=time.time(),
|
|
||||||
remote_etag=remote_meta.etag,
|
|
||||||
source="remote",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
stats.errors += 1
|
|
||||||
|
|
||||||
if rule.sync_deletions:
|
|
||||||
for key in list(sync_state.synced_objects.keys()):
|
|
||||||
if key not in remote_keys and key in local_keys:
|
|
||||||
tracked = sync_state.synced_objects[key]
|
|
||||||
if tracked.source == "remote":
|
|
||||||
local_meta = local_objects.get(key)
|
|
||||||
if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
|
|
||||||
success = self._apply_remote_deletion(rule.bucket_name, key)
|
|
||||||
if success:
|
|
||||||
stats.deletions_applied += 1
|
|
||||||
del sync_state.synced_objects[key]
|
|
||||||
|
|
||||||
sync_state.last_full_sync = time.time()
|
|
||||||
self._save_sync_state(rule.bucket_name, sync_state)
|
|
||||||
|
|
||||||
with self.replication_manager._stats_lock:
|
|
||||||
rule.last_pull_at = time.time()
|
|
||||||
self.replication_manager.save_rules()
|
|
||||||
|
|
||||||
stats.last_sync_at = time.time()
|
|
||||||
logger.info(
|
|
||||||
"Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
|
|
||||||
rule.bucket_name,
|
|
||||||
stats.objects_pulled,
|
|
||||||
stats.objects_skipped,
|
|
||||||
stats.conflicts_resolved,
|
|
||||||
stats.deletions_applied,
|
|
||||||
stats.errors,
|
|
||||||
)
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
|
|
||||||
from .storage import ObjectMeta
|
|
||||||
objects = self.storage.list_objects_all(bucket_name)
|
|
||||||
return {obj.key: obj for obj in objects}
|
|
||||||
|
|
||||||
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
|
|
||||||
s3 = self._create_client(connection)
|
|
||||||
result: Dict[str, RemoteObjectMeta] = {}
|
|
||||||
paginator = s3.get_paginator("list_objects_v2")
|
|
||||||
try:
|
|
||||||
for page in paginator.paginate(Bucket=rule.target_bucket):
|
|
||||||
for obj in page.get("Contents", []):
|
|
||||||
meta = RemoteObjectMeta.from_s3_object(obj)
|
|
||||||
result[meta.key] = meta
|
|
||||||
except ClientError as e:
|
|
||||||
if e.response["Error"]["Code"] == "NoSuchBucket":
|
|
||||||
return {}
|
|
||||||
raise
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
|
|
||||||
local_ts = local_meta.last_modified.timestamp()
|
|
||||||
remote_ts = remote_meta.last_modified.timestamp()
|
|
||||||
|
|
||||||
if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
|
|
||||||
local_etag = local_meta.etag or ""
|
|
||||||
if remote_meta.etag == local_etag:
|
|
||||||
return "skip"
|
|
||||||
return "pull" if remote_meta.etag > local_etag else "keep"
|
|
||||||
|
|
||||||
return "pull" if remote_ts > local_ts else "keep"
|
|
||||||
|
|
||||||
def _pull_object(
|
|
||||||
self,
|
|
||||||
rule: "ReplicationRule",
|
|
||||||
object_key: str,
|
|
||||||
connection: "RemoteConnection",
|
|
||||||
remote_meta: RemoteObjectMeta,
|
|
||||||
) -> bool:
|
|
||||||
s3 = self._create_client(connection)
|
|
||||||
tmp_path = None
|
|
||||||
try:
|
|
||||||
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
|
|
||||||
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
|
|
||||||
tmp_path = Path(tmp_file.name)
|
|
||||||
|
|
||||||
s3.download_file(rule.target_bucket, object_key, str(tmp_path))
|
|
||||||
|
|
||||||
head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
|
|
||||||
user_metadata = head_response.get("Metadata", {})
|
|
||||||
|
|
||||||
with open(tmp_path, "rb") as f:
|
|
||||||
self.storage.put_object(
|
|
||||||
rule.bucket_name,
|
|
||||||
object_key,
|
|
||||||
f,
|
|
||||||
metadata=user_metadata if user_metadata else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except ClientError as e:
|
|
||||||
logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
|
|
||||||
return False
|
|
||||||
finally:
|
|
||||||
if tmp_path and tmp_path.exists():
|
|
||||||
try:
|
|
||||||
tmp_path.unlink()
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
|
|
||||||
try:
|
|
||||||
self.storage.delete_object(bucket_name, object_key)
|
|
||||||
logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _sync_state_path(self, bucket_name: str) -> Path:
|
|
||||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
|
|
||||||
|
|
||||||
def _load_sync_state(self, bucket_name: str) -> SyncState:
|
|
||||||
path = self._sync_state_path(bucket_name)
|
|
||||||
if not path.exists():
|
|
||||||
return SyncState()
|
|
||||||
try:
|
|
||||||
data = json.loads(path.read_text(encoding="utf-8"))
|
|
||||||
return SyncState.from_dict(data)
|
|
||||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
|
||||||
logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
|
|
||||||
return SyncState()
|
|
||||||
|
|
||||||
def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
|
|
||||||
path = self._sync_state_path(bucket_name)
|
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
try:
|
|
||||||
path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
|
|
||||||
except OSError as e:
|
|
||||||
logger.warning("Failed to save sync state for %s: %s", bucket_name, e)
|
|
||||||
568
app/storage.py
568
app/storage.py
File diff suppressed because it is too large
Load Diff
@@ -1,215 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
||||||
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from .storage import ObjectStorage
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SystemMetricsSnapshot:
|
|
||||||
timestamp: datetime
|
|
||||||
cpu_percent: float
|
|
||||||
memory_percent: float
|
|
||||||
disk_percent: float
|
|
||||||
storage_bytes: int
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
||||||
"cpu_percent": round(self.cpu_percent, 2),
|
|
||||||
"memory_percent": round(self.memory_percent, 2),
|
|
||||||
"disk_percent": round(self.disk_percent, 2),
|
|
||||||
"storage_bytes": self.storage_bytes,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
|
|
||||||
timestamp_str = data["timestamp"]
|
|
||||||
if timestamp_str.endswith("Z"):
|
|
||||||
timestamp_str = timestamp_str[:-1] + "+00:00"
|
|
||||||
return cls(
|
|
||||||
timestamp=datetime.fromisoformat(timestamp_str),
|
|
||||||
cpu_percent=data.get("cpu_percent", 0.0),
|
|
||||||
memory_percent=data.get("memory_percent", 0.0),
|
|
||||||
disk_percent=data.get("disk_percent", 0.0),
|
|
||||||
storage_bytes=data.get("storage_bytes", 0),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SystemMetricsCollector:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
storage_root: Path,
|
|
||||||
interval_minutes: int = 5,
|
|
||||||
retention_hours: int = 24,
|
|
||||||
):
|
|
||||||
self.storage_root = storage_root
|
|
||||||
self.interval_seconds = interval_minutes * 60
|
|
||||||
self.retention_hours = retention_hours
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._shutdown = threading.Event()
|
|
||||||
self._snapshots: List[SystemMetricsSnapshot] = []
|
|
||||||
self._storage_ref: Optional["ObjectStorage"] = None
|
|
||||||
|
|
||||||
self._load_history()
|
|
||||||
|
|
||||||
self._snapshot_thread = threading.Thread(
|
|
||||||
target=self._snapshot_loop,
|
|
||||||
name="system-metrics-snapshot",
|
|
||||||
daemon=True,
|
|
||||||
)
|
|
||||||
self._snapshot_thread.start()
|
|
||||||
|
|
||||||
def set_storage(self, storage: "ObjectStorage") -> None:
|
|
||||||
with self._lock:
|
|
||||||
self._storage_ref = storage
|
|
||||||
|
|
||||||
def _config_path(self) -> Path:
|
|
||||||
return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
|
|
||||||
|
|
||||||
def _load_history(self) -> None:
|
|
||||||
config_path = self._config_path()
|
|
||||||
if not config_path.exists():
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
|
||||||
history_data = data.get("history", [])
|
|
||||||
self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
|
|
||||||
self._prune_old_snapshots()
|
|
||||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
|
||||||
logger.warning(f"Failed to load system metrics history: {e}")
|
|
||||||
|
|
||||||
def _save_history(self) -> None:
|
|
||||||
config_path = self._config_path()
|
|
||||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
try:
|
|
||||||
data = {"history": [s.to_dict() for s in self._snapshots]}
|
|
||||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
||||||
except OSError as e:
|
|
||||||
logger.warning(f"Failed to save system metrics history: {e}")
|
|
||||||
|
|
||||||
def _prune_old_snapshots(self) -> None:
|
|
||||||
if not self._snapshots:
|
|
||||||
return
|
|
||||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
|
||||||
self._snapshots = [
|
|
||||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
|
||||||
]
|
|
||||||
|
|
||||||
def _snapshot_loop(self) -> None:
|
|
||||||
while not self._shutdown.is_set():
|
|
||||||
self._shutdown.wait(timeout=self.interval_seconds)
|
|
||||||
if not self._shutdown.is_set():
|
|
||||||
self._take_snapshot()
|
|
||||||
|
|
||||||
def _take_snapshot(self) -> None:
|
|
||||||
try:
|
|
||||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
||||||
memory = psutil.virtual_memory()
|
|
||||||
disk = psutil.disk_usage(str(self.storage_root))
|
|
||||||
|
|
||||||
storage_bytes = 0
|
|
||||||
with self._lock:
|
|
||||||
storage = self._storage_ref
|
|
||||||
if storage:
|
|
||||||
try:
|
|
||||||
buckets = storage.list_buckets()
|
|
||||||
for bucket in buckets:
|
|
||||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
|
||||||
storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to collect bucket stats: {e}")
|
|
||||||
|
|
||||||
snapshot = SystemMetricsSnapshot(
|
|
||||||
timestamp=datetime.now(timezone.utc),
|
|
||||||
cpu_percent=cpu_percent,
|
|
||||||
memory_percent=memory.percent,
|
|
||||||
disk_percent=disk.percent,
|
|
||||||
storage_bytes=storage_bytes,
|
|
||||||
)
|
|
||||||
|
|
||||||
with self._lock:
|
|
||||||
self._snapshots.append(snapshot)
|
|
||||||
self._prune_old_snapshots()
|
|
||||||
self._save_history()
|
|
||||||
|
|
||||||
logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to take system metrics snapshot: {e}")
|
|
||||||
|
|
||||||
def get_current(self) -> Dict[str, Any]:
|
|
||||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
||||||
memory = psutil.virtual_memory()
|
|
||||||
disk = psutil.disk_usage(str(self.storage_root))
|
|
||||||
boot_time = psutil.boot_time()
|
|
||||||
uptime_seconds = time.time() - boot_time
|
|
||||||
uptime_days = int(uptime_seconds / 86400)
|
|
||||||
|
|
||||||
total_buckets = 0
|
|
||||||
total_objects = 0
|
|
||||||
total_bytes_used = 0
|
|
||||||
total_versions = 0
|
|
||||||
|
|
||||||
with self._lock:
|
|
||||||
storage = self._storage_ref
|
|
||||||
if storage:
|
|
||||||
try:
|
|
||||||
buckets = storage.list_buckets()
|
|
||||||
total_buckets = len(buckets)
|
|
||||||
for bucket in buckets:
|
|
||||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
|
||||||
total_objects += stats.get("total_objects", stats.get("objects", 0))
|
|
||||||
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
|
|
||||||
total_versions += stats.get("version_count", 0)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to collect current bucket stats: {e}")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"cpu_percent": round(cpu_percent, 2),
|
|
||||||
"memory": {
|
|
||||||
"total": memory.total,
|
|
||||||
"available": memory.available,
|
|
||||||
"used": memory.used,
|
|
||||||
"percent": round(memory.percent, 2),
|
|
||||||
},
|
|
||||||
"disk": {
|
|
||||||
"total": disk.total,
|
|
||||||
"free": disk.free,
|
|
||||||
"used": disk.used,
|
|
||||||
"percent": round(disk.percent, 2),
|
|
||||||
},
|
|
||||||
"app": {
|
|
||||||
"buckets": total_buckets,
|
|
||||||
"objects": total_objects,
|
|
||||||
"versions": total_versions,
|
|
||||||
"storage_bytes": total_bytes_used,
|
|
||||||
"uptime_days": uptime_days,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
||||||
with self._lock:
|
|
||||||
snapshots = list(self._snapshots)
|
|
||||||
|
|
||||||
if hours:
|
|
||||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
|
||||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
|
||||||
|
|
||||||
return [s.to_dict() for s in snapshots]
|
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
|
||||||
self._shutdown.set()
|
|
||||||
self._take_snapshot()
|
|
||||||
self._snapshot_thread.join(timeout=5.0)
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
APP_VERSION = "0.3.0"
|
APP_VERSION = "0.2.1"
|
||||||
|
|
||||||
|
|
||||||
def get_version() -> str:
|
def get_version() -> str:
|
||||||
|
|||||||
@@ -1,79 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import threading
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List, Optional
|
|
||||||
|
|
||||||
_DOMAIN_RE = re.compile(
|
|
||||||
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_domain(raw: str) -> str:
|
|
||||||
raw = raw.strip().lower()
|
|
||||||
for prefix in ("https://", "http://"):
|
|
||||||
if raw.startswith(prefix):
|
|
||||||
raw = raw[len(prefix):]
|
|
||||||
raw = raw.split("/", 1)[0]
|
|
||||||
raw = raw.split("?", 1)[0]
|
|
||||||
raw = raw.split("#", 1)[0]
|
|
||||||
if ":" in raw:
|
|
||||||
raw = raw.rsplit(":", 1)[0]
|
|
||||||
return raw
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_domain(domain: str) -> bool:
|
|
||||||
if not domain or len(domain) > 253:
|
|
||||||
return False
|
|
||||||
return bool(_DOMAIN_RE.match(domain))
|
|
||||||
|
|
||||||
|
|
||||||
class WebsiteDomainStore:
|
|
||||||
def __init__(self, config_path: Path) -> None:
|
|
||||||
self.config_path = config_path
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._domains: Dict[str, str] = {}
|
|
||||||
self.reload()
|
|
||||||
|
|
||||||
def reload(self) -> None:
|
|
||||||
if not self.config_path.exists():
|
|
||||||
self._domains = {}
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
|
||||||
data = json.load(f)
|
|
||||||
if isinstance(data, dict):
|
|
||||||
self._domains = {k.lower(): v for k, v in data.items()}
|
|
||||||
else:
|
|
||||||
self._domains = {}
|
|
||||||
except (OSError, json.JSONDecodeError):
|
|
||||||
self._domains = {}
|
|
||||||
|
|
||||||
def _save(self) -> None:
|
|
||||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(self._domains, f, indent=2)
|
|
||||||
|
|
||||||
def list_all(self) -> List[Dict[str, str]]:
|
|
||||||
with self._lock:
|
|
||||||
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
|
||||||
|
|
||||||
def get_bucket(self, domain: str) -> Optional[str]:
|
|
||||||
with self._lock:
|
|
||||||
return self._domains.get(domain.lower())
|
|
||||||
|
|
||||||
def set_mapping(self, domain: str, bucket: str) -> None:
|
|
||||||
with self._lock:
|
|
||||||
self._domains[domain.lower()] = bucket
|
|
||||||
self._save()
|
|
||||||
|
|
||||||
def delete_mapping(self, domain: str) -> bool:
|
|
||||||
with self._lock:
|
|
||||||
key = domain.lower()
|
|
||||||
if key not in self._domains:
|
|
||||||
return False
|
|
||||||
del self._domains[key]
|
|
||||||
self._save()
|
|
||||||
return True
|
|
||||||
421
myfsio_core/Cargo.lock
generated
421
myfsio_core/Cargo.lock
generated
@@ -1,421 +0,0 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 4
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "aho-corasick"
|
|
||||||
version = "1.1.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "allocator-api2"
|
|
||||||
version = "0.2.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bitflags"
|
|
||||||
version = "2.11.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "block-buffer"
|
|
||||||
version = "0.10.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
|
||||||
dependencies = [
|
|
||||||
"generic-array",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cfg-if"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cpufeatures"
|
|
||||||
version = "0.2.17"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crypto-common"
|
|
||||||
version = "0.1.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
|
||||||
dependencies = [
|
|
||||||
"generic-array",
|
|
||||||
"typenum",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "digest"
|
|
||||||
version = "0.10.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
|
||||||
dependencies = [
|
|
||||||
"block-buffer",
|
|
||||||
"crypto-common",
|
|
||||||
"subtle",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "equivalent"
|
|
||||||
version = "1.0.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "foldhash"
|
|
||||||
version = "0.1.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "generic-array"
|
|
||||||
version = "0.14.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
|
||||||
dependencies = [
|
|
||||||
"typenum",
|
|
||||||
"version_check",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hashbrown"
|
|
||||||
version = "0.15.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
|
||||||
dependencies = [
|
|
||||||
"allocator-api2",
|
|
||||||
"equivalent",
|
|
||||||
"foldhash",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "heck"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hex"
|
|
||||||
version = "0.4.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hmac"
|
|
||||||
version = "0.12.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
|
||||||
dependencies = [
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libc"
|
|
||||||
version = "0.2.182"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lock_api"
|
|
||||||
version = "0.4.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
|
|
||||||
dependencies = [
|
|
||||||
"scopeguard",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lru"
|
|
||||||
version = "0.14.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
|
|
||||||
dependencies = [
|
|
||||||
"hashbrown",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "md-5"
|
|
||||||
version = "0.10.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "memchr"
|
|
||||||
version = "2.8.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "myfsio_core"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"hex",
|
|
||||||
"hmac",
|
|
||||||
"lru",
|
|
||||||
"md-5",
|
|
||||||
"parking_lot",
|
|
||||||
"pyo3",
|
|
||||||
"regex",
|
|
||||||
"sha2",
|
|
||||||
"unicode-normalization",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.21.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot"
|
|
||||||
version = "0.12.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
|
|
||||||
dependencies = [
|
|
||||||
"lock_api",
|
|
||||||
"parking_lot_core",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot_core"
|
|
||||||
version = "0.9.12"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"libc",
|
|
||||||
"redox_syscall",
|
|
||||||
"smallvec",
|
|
||||||
"windows-link",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "portable-atomic"
|
|
||||||
version = "1.13.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro2"
|
|
||||||
version = "1.0.106"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"once_cell",
|
|
||||||
"portable-atomic",
|
|
||||||
"pyo3-build-config",
|
|
||||||
"pyo3-ffi",
|
|
||||||
"pyo3-macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-build-config"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
|
|
||||||
dependencies = [
|
|
||||||
"target-lexicon",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-ffi"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"pyo3-build-config",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-macros"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"pyo3-macros-backend",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyo3-macros-backend"
|
|
||||||
version = "0.28.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
|
|
||||||
dependencies = [
|
|
||||||
"heck",
|
|
||||||
"proc-macro2",
|
|
||||||
"pyo3-build-config",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.44"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "redox_syscall"
|
|
||||||
version = "0.5.18"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex"
|
|
||||||
version = "1.12.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
|
||||||
dependencies = [
|
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-automata",
|
|
||||||
"regex-syntax",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-automata"
|
|
||||||
version = "0.4.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
||||||
dependencies = [
|
|
||||||
"aho-corasick",
|
|
||||||
"memchr",
|
|
||||||
"regex-syntax",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-syntax"
|
|
||||||
version = "0.8.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "scopeguard"
|
|
||||||
version = "1.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sha2"
|
|
||||||
version = "0.10.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"cpufeatures",
|
|
||||||
"digest",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "smallvec"
|
|
||||||
version = "1.15.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "subtle"
|
|
||||||
version = "2.6.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "2.0.116"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "target-lexicon"
|
|
||||||
version = "0.13.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tinyvec"
|
|
||||||
version = "1.10.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
|
|
||||||
dependencies = [
|
|
||||||
"tinyvec_macros",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tinyvec_macros"
|
|
||||||
version = "0.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "typenum"
|
|
||||||
version = "1.19.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.24"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-normalization"
|
|
||||||
version = "0.1.25"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
|
||||||
dependencies = [
|
|
||||||
"tinyvec",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "version_check"
|
|
||||||
version = "0.9.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-link"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "myfsio_core"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
[lib]
|
|
||||||
name = "myfsio_core"
|
|
||||||
crate-type = ["cdylib"]
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
pyo3 = { version = "0.28", features = ["extension-module"] }
|
|
||||||
hmac = "0.12"
|
|
||||||
sha2 = "0.10"
|
|
||||||
md-5 = "0.10"
|
|
||||||
hex = "0.4"
|
|
||||||
unicode-normalization = "0.1"
|
|
||||||
regex = "1"
|
|
||||||
lru = "0.14"
|
|
||||||
parking_lot = "0.12"
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
[build-system]
|
|
||||||
requires = ["maturin>=1.0,<2.0"]
|
|
||||||
build-backend = "maturin"
|
|
||||||
|
|
||||||
[project]
|
|
||||||
name = "myfsio_core"
|
|
||||||
version = "0.1.0"
|
|
||||||
requires-python = ">=3.10"
|
|
||||||
|
|
||||||
[tool.maturin]
|
|
||||||
features = ["pyo3/extension-module"]
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
use md5::{Digest, Md5};
|
|
||||||
use pyo3::exceptions::PyIOError;
|
|
||||||
use pyo3::prelude::*;
|
|
||||||
use sha2::Sha256;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::Read;
|
|
||||||
|
|
||||||
const CHUNK_SIZE: usize = 65536;
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
|
||||||
let path = path.to_owned();
|
|
||||||
py.detach(move || {
|
|
||||||
let mut file = File::open(&path)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
|
||||||
let mut hasher = Md5::new();
|
|
||||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
|
||||||
loop {
|
|
||||||
let n = file
|
|
||||||
.read(&mut buf)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
|
||||||
if n == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
hasher.update(&buf[..n]);
|
|
||||||
}
|
|
||||||
Ok(format!("{:x}", hasher.finalize()))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn md5_bytes(data: &[u8]) -> String {
|
|
||||||
let mut hasher = Md5::new();
|
|
||||||
hasher.update(data);
|
|
||||||
format!("{:x}", hasher.finalize())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
|
||||||
let path = path.to_owned();
|
|
||||||
py.detach(move || {
|
|
||||||
let mut file = File::open(&path)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
|
||||||
loop {
|
|
||||||
let n = file
|
|
||||||
.read(&mut buf)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
|
||||||
if n == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
hasher.update(&buf[..n]);
|
|
||||||
}
|
|
||||||
Ok(format!("{:x}", hasher.finalize()))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn sha256_bytes(data: &[u8]) -> String {
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
hasher.update(data);
|
|
||||||
format!("{:x}", hasher.finalize())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
|
|
||||||
let path = path.to_owned();
|
|
||||||
py.detach(move || {
|
|
||||||
let mut file = File::open(&path)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
|
||||||
let mut md5_hasher = Md5::new();
|
|
||||||
let mut sha_hasher = Sha256::new();
|
|
||||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
|
||||||
loop {
|
|
||||||
let n = file
|
|
||||||
.read(&mut buf)
|
|
||||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
|
||||||
if n == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
md5_hasher.update(&buf[..n]);
|
|
||||||
sha_hasher.update(&buf[..n]);
|
|
||||||
}
|
|
||||||
Ok((
|
|
||||||
format!("{:x}", md5_hasher.finalize()),
|
|
||||||
format!("{:x}", sha_hasher.finalize()),
|
|
||||||
))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
mod hashing;
|
|
||||||
mod sigv4;
|
|
||||||
mod validation;
|
|
||||||
|
|
||||||
use pyo3::prelude::*;
|
|
||||||
|
|
||||||
#[pymodule]
|
|
||||||
mod myfsio_core {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[pymodule_init]
|
|
||||||
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
||||||
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
|
|
||||||
|
|
||||||
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
|
|
||||||
|
|
||||||
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
|
||||||
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,108 +0,0 @@
|
|||||||
use hmac::{Hmac, Mac};
|
|
||||||
use lru::LruCache;
|
|
||||||
use parking_lot::Mutex;
|
|
||||||
use pyo3::prelude::*;
|
|
||||||
use sha2::{Digest, Sha256};
|
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
use std::sync::LazyLock;
|
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
type HmacSha256 = Hmac<Sha256>;
|
|
||||||
|
|
||||||
struct CacheEntry {
|
|
||||||
key: Vec<u8>,
|
|
||||||
created: Instant,
|
|
||||||
}
|
|
||||||
|
|
||||||
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
|
|
||||||
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
|
|
||||||
|
|
||||||
const CACHE_TTL_SECS: u64 = 60;
|
|
||||||
|
|
||||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
|
||||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
|
||||||
mac.update(msg);
|
|
||||||
mac.finalize().into_bytes().to_vec()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn derive_signing_key(
|
|
||||||
secret_key: &str,
|
|
||||||
date_stamp: &str,
|
|
||||||
region: &str,
|
|
||||||
service: &str,
|
|
||||||
) -> Vec<u8> {
|
|
||||||
let cache_key = (
|
|
||||||
secret_key.to_owned(),
|
|
||||||
date_stamp.to_owned(),
|
|
||||||
region.to_owned(),
|
|
||||||
service.to_owned(),
|
|
||||||
);
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
|
||||||
if let Some(entry) = cache.get(&cache_key) {
|
|
||||||
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
|
|
||||||
return entry.key.clone();
|
|
||||||
}
|
|
||||||
cache.pop(&cache_key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
|
|
||||||
let k_region = hmac_sha256(&k_date, region.as_bytes());
|
|
||||||
let k_service = hmac_sha256(&k_region, service.as_bytes());
|
|
||||||
let k_signing = hmac_sha256(&k_service, b"aws4_request");
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
|
||||||
cache.put(
|
|
||||||
cache_key,
|
|
||||||
CacheEntry {
|
|
||||||
key: k_signing.clone(),
|
|
||||||
created: Instant::now(),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
k_signing
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
|
||||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
|
||||||
hex::encode(sig)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sha256_hex(data: &[u8]) -> String {
|
|
||||||
let mut hasher = Sha256::new();
|
|
||||||
hasher.update(data);
|
|
||||||
hex::encode(hasher.finalize())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn build_string_to_sign(
|
|
||||||
amz_date: &str,
|
|
||||||
credential_scope: &str,
|
|
||||||
canonical_request: &str,
|
|
||||||
) -> String {
|
|
||||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
|
||||||
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
|
||||||
if a.len() != b.len() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
let mut result: u8 = 0;
|
|
||||||
for (x, y) in a.bytes().zip(b.bytes()) {
|
|
||||||
result |= x ^ y;
|
|
||||||
}
|
|
||||||
result == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn clear_signing_key_cache() {
|
|
||||||
SIGNING_KEY_CACHE.lock().clear();
|
|
||||||
}
|
|
||||||
@@ -1,149 +0,0 @@
|
|||||||
use pyo3::prelude::*;
|
|
||||||
use std::sync::LazyLock;
|
|
||||||
use unicode_normalization::UnicodeNormalization;
|
|
||||||
|
|
||||||
const WINDOWS_RESERVED: &[&str] = &[
|
|
||||||
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
|
||||||
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
|
||||||
"LPT9",
|
|
||||||
];
|
|
||||||
|
|
||||||
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
|
||||||
|
|
||||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
|
||||||
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
|
||||||
|
|
||||||
static IP_REGEX: LazyLock<regex::Regex> =
|
|
||||||
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
|
|
||||||
pub fn validate_object_key(
|
|
||||||
object_key: &str,
|
|
||||||
max_length_bytes: usize,
|
|
||||||
is_windows: bool,
|
|
||||||
reserved_prefixes: Option<Vec<String>>,
|
|
||||||
) -> PyResult<Option<String>> {
|
|
||||||
if object_key.is_empty() {
|
|
||||||
return Ok(Some("Object key required".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if object_key.contains('\0') {
|
|
||||||
return Ok(Some("Object key contains null bytes".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let normalized: String = object_key.nfc().collect();
|
|
||||||
|
|
||||||
if normalized.len() > max_length_bytes {
|
|
||||||
return Ok(Some(format!(
|
|
||||||
"Object key exceeds maximum length of {} bytes",
|
|
||||||
max_length_bytes
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
|
||||||
return Ok(Some("Object key cannot start with a slash".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
|
||||||
normalized.split(['/', '\\']).collect()
|
|
||||||
} else {
|
|
||||||
normalized.split('/').collect()
|
|
||||||
};
|
|
||||||
|
|
||||||
for part in &parts {
|
|
||||||
if part.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if *part == ".." {
|
|
||||||
return Ok(Some(
|
|
||||||
"Object key contains parent directory references".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
if *part == "." {
|
|
||||||
return Ok(Some("Object key contains invalid segments".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if part.chars().any(|c| (c as u32) < 32) {
|
|
||||||
return Ok(Some(
|
|
||||||
"Object key contains control characters".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
if is_windows {
|
|
||||||
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
|
||||||
return Ok(Some(
|
|
||||||
"Object key contains characters not supported on Windows filesystems"
|
|
||||||
.to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
if part.ends_with(' ') || part.ends_with('.') {
|
|
||||||
return Ok(Some(
|
|
||||||
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
|
||||||
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
|
||||||
return Ok(Some(format!("Invalid filename segment: {}", part)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
|
||||||
if let Some(top) = non_empty_parts.first() {
|
|
||||||
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
|
||||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref prefixes) = reserved_prefixes {
|
|
||||||
for prefix in prefixes {
|
|
||||||
if *top == prefix.as_str() {
|
|
||||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyfunction]
|
|
||||||
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
|
||||||
let len = bucket_name.len();
|
|
||||||
if len < 3 || len > 63 {
|
|
||||||
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
let bytes = bucket_name.as_bytes();
|
|
||||||
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
|
||||||
return Some(
|
|
||||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
|
||||||
return Some(
|
|
||||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
for &b in bytes {
|
|
||||||
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
|
||||||
return Some(
|
|
||||||
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
|
||||||
.to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if bucket_name.contains("..") {
|
|
||||||
return Some("Bucket name must not contain consecutive periods".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
if IP_REGEX.is_match(bucket_name) {
|
|
||||||
return Some("Bucket name must not be formatted as an IP address".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
Signature: 8a477f597d28d172789f06886806bc55
|
|
||||||
# This file is a cache directory tag created by cargo.
|
|
||||||
# For information about cache directory tags see https://bford.info/cachedir/
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
801af22cf202da8e
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
435555ec2fb592e3
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
d28af275d001c358
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
1fbf4ba9542edced
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
37923e6f5f9687ab
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
603e28136cf5763c
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
896672d759b5299c
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
914a617b9f05c9d8
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
3b95cf48bbd7dc53
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
b7ba5182ce570398
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
f0a5af4d8a8c7106
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
5f316276809d5ca0
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
e417d28fc1909ceb
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
aec88a641c5288e3
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
ddc0b590ff80762b
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":17886154901722686619,"profile":1369601567987815722,"path":8608102977929876445,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\heck-b47c94fd2a7e00cb\\dep-lib-heck","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
This file has an mtime of when this was started.
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
41890ebff4143fa5
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"rustc":8323788817864214825,"features":"[\"alloc\", \"default\", \"std\"]","declared_features":"[\"alloc\", \"default\", \"serde\", \"std\"]","target":4242469766639956503,"profile":2040997289075261528,"path":6793865871540733919,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hex-253414d2260adcdf\\dep-lib-hex","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user