25 Commits

Author SHA1 Message Date
85181f0be6 Merge pull request 'MyFSIO v0.2.5 Release' (#17) from next into main
Reviewed-on: #17
2026-02-02 05:32:02 +00:00
a779b002d7 Optimize CPU usage via caching and reducing ThreadPoolExecutor workers to prevent CPU saturation 2026-02-02 13:30:06 +08:00
d5ca7a8be1 Merge pull request 'MyFSIO v0.2.4 Release' (#16) from next into main
Reviewed-on: #16
2026-02-01 10:27:11 +00:00
45d21cce21 Add ALLOW_INTERNAL_ENDPOINTS config for self-hosted internal network deployments 2026-02-01 18:26:14 +08:00
9629507acd Fix auth bypass, user enumeration, xml DoS, multipart race, path traversal unicode, silent permissions failures, data key without AAD, KMS streaming 2026-02-01 18:12:03 +08:00
5d6cb4efa1 Update documentation 2026-02-01 15:18:20 +08:00
56ad83bbaf Fix bidirectional sync UI issues 2026-02-01 14:56:20 +08:00
847933b7c0 Add UI endpoint for bidirectional-status to fix 403 auth error 2026-02-01 14:30:55 +08:00
be55d08c0a Fix bidirectional-status 404 when UI runs separately from API 2026-02-01 14:23:35 +08:00
8c4bf67974 Fix 15 security vulnerabilities across auth, storage, and API modules 2026-01-31 00:55:27 +08:00
9385d1fe1c Add 4 new S3 APIs: UploadPartCopy, Bucket Replication, PostObject, SelectObjectContent 2026-01-29 12:51:00 +08:00
0ea54457e8 Fix 17 security vulnerabilities across encryption, auth, and API modules 2026-01-29 12:05:35 +08:00
ae26d22388 Add bidirectional replication setup verification and improved UX warnings 2026-01-26 23:29:20 +08:00
6b715851b9 Add replication setup wizard and site-level sync dashboard for site registry 2026-01-26 21:39:47 +08:00
62c36f7a6c Add site registry UI and update documentation for geo-distribution 2026-01-26 19:49:23 +08:00
b32f1f94f7 Add configurable env variables for hardcoded timeouts and limits 2026-01-25 23:32:36 +08:00
6e3d280a75 Add SlowDown error code tracking for 429 rate limit responses 2026-01-25 21:29:58 +08:00
704f79dc44 Add configurable rate limits for S3 API endpoints 2026-01-25 20:15:38 +08:00
476dc79e42 MyFSIO v0.2.3 Release
Reviewed-on: #15
2026-01-25 06:05:53 +00:00
87c7f1bc7d Add bidirectional mode option to replication panel UI 2026-01-25 12:35:14 +08:00
23ea164215 Add bi-directional site replication with LWW conflict resolution 2026-01-24 19:38:17 +08:00
7a8acfb933 Add missing lifecycle and cors actions to Full control template 2026-01-22 11:12:23 +08:00
71327bcbf1 Add dynamic updates to System Health section on metrics page 2026-01-22 11:06:53 +08:00
c0603c592b Add configurable server threads and connections 2026-01-22 10:58:44 +08:00
912a7dc74f Add background collection for system metrics 2026-01-20 00:00:31 +08:00
33 changed files with 7380 additions and 623 deletions

View File

@@ -102,6 +102,11 @@ python run.py --mode ui # UI only (port 5100)
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
| `KMS_ENABLED` | `false` | Enable Key Management Service |
| `LOG_LEVEL` | `INFO` | Logging verbosity |
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
## Data Layout

View File

@@ -31,6 +31,7 @@ from .notifications import NotificationService
from .object_lock import ObjectLockService
from .replication import ReplicationManager
from .secret_store import EphemeralSecretStore
from .site_registry import SiteRegistry, SiteInfo
from .storage import ObjectStorage
from .version import get_version
@@ -104,6 +105,9 @@ def create_app(
storage = ObjectStorage(
Path(app.config["STORAGE_ROOT"]),
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
)
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
@@ -137,12 +141,33 @@ def create_app(
)
connections = ConnectionStore(connections_path)
replication = ReplicationManager(storage, connections, replication_rules_path, storage_root)
replication = ReplicationManager(
storage,
connections,
replication_rules_path,
storage_root,
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
)
site_registry_path = config_dir / "site_registry.json"
site_registry = SiteRegistry(site_registry_path)
if app.config.get("SITE_ID") and not site_registry.get_local_site():
site_registry.set_local_site(SiteInfo(
site_id=app.config["SITE_ID"],
endpoint=app.config.get("SITE_ENDPOINT") or "",
region=app.config.get("SITE_REGION", "us-east-1"),
priority=app.config.get("SITE_PRIORITY", 100),
))
encryption_config = {
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
}
encryption_manager = EncryptionManager(encryption_config)
@@ -150,7 +175,12 @@ def create_app(
if app.config.get("KMS_ENABLED", False):
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
kms_manager = KMSManager(kms_keys_path, kms_master_key_path)
kms_manager = KMSManager(
kms_keys_path,
kms_master_key_path,
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
)
encryption_manager.set_kms_provider(kms_manager)
if app.config.get("ENCRYPTION_ENABLED", False):
@@ -159,7 +189,10 @@ def create_app(
acl_service = AclService(storage_root)
object_lock_service = ObjectLockService(storage_root)
notification_service = NotificationService(storage_root)
notification_service = NotificationService(
storage_root,
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
)
access_logging_service = AccessLoggingService(storage_root)
access_logging_service.set_storage(storage)
@@ -170,6 +203,7 @@ def create_app(
base_storage,
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
storage_root=storage_root,
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
)
lifecycle_manager.start()
@@ -187,6 +221,7 @@ def create_app(
app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service
app.extensions["site_registry"] = site_registry
operation_metrics_collector = None
if app.config.get("OPERATION_METRICS_ENABLED", False):
@@ -197,6 +232,35 @@ def create_app(
)
app.extensions["operation_metrics"] = operation_metrics_collector
system_metrics_collector = None
if app.config.get("METRICS_HISTORY_ENABLED", False):
from .system_metrics import SystemMetricsCollector
system_metrics_collector = SystemMetricsCollector(
storage_root,
interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
)
system_metrics_collector.set_storage(storage)
app.extensions["system_metrics"] = system_metrics_collector
site_sync_worker = None
if app.config.get("SITE_SYNC_ENABLED", False):
from .site_sync import SiteSyncWorker
site_sync_worker = SiteSyncWorker(
storage=storage,
connections=connections,
replication_manager=replication,
storage_root=storage_root,
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
)
site_sync_worker.start()
app.extensions["site_sync"] = site_sync_worker
@app.errorhandler(500)
def internal_error(error):
return render_template('500.html'), 500
@@ -264,11 +328,14 @@ def create_app(
if include_api:
from .s3_api import s3_api_bp
from .kms_api import kms_api_bp
from .admin_api import admin_api_bp
app.register_blueprint(s3_api_bp)
app.register_blueprint(kms_api_bp)
app.register_blueprint(admin_api_bp)
csrf.exempt(s3_api_bp)
csrf.exempt(kms_api_bp)
csrf.exempt(admin_api_bp)
if include_ui:
from .ui import ui_bp

670
app/admin_api.py Normal file
View File

@@ -0,0 +1,670 @@
from __future__ import annotations
import ipaddress
import logging
import re
import socket
import time
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlparse
import requests
from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore
from .extensions import limiter
from .iam import IamError, Principal
from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
def _validate_endpoint(endpoint: str) -> Optional[str]:
"""Validate endpoint URL format. Returns error message or None."""
try:
parsed = urlparse(endpoint)
if not parsed.scheme or parsed.scheme not in ("http", "https"):
return "Endpoint must be http or https URL"
if not parsed.netloc:
return "Endpoint must have a host"
return None
except Exception:
return "Invalid endpoint URL"
def _validate_priority(priority: Any) -> Optional[str]:
"""Validate priority value. Returns error message or None."""
try:
p = int(priority)
if p < 0 or p > 1000:
return "Priority must be between 0 and 1000"
return None
except (TypeError, ValueError):
return "Priority must be an integer"
def _validate_region(region: str) -> Optional[str]:
"""Validate region format. Returns error message or None."""
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
return "Region must match format like us-east-1"
return None
def _validate_site_id(site_id: str) -> Optional[str]:
"""Validate site_id format. Returns error message or None."""
if not site_id or len(site_id) > 63:
return "site_id must be 1-63 characters"
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
return None
logger = logging.getLogger(__name__)
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
from .s3_api import _require_principal as s3_require_principal
return s3_require_principal()
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
principal, error = _require_principal()
if error:
return None, error
try:
_iam().authorize(principal, None, "iam:*")
return principal, None
except IamError:
return None, _json_error("AccessDenied", "Admin access required", 403)
def _site_registry() -> SiteRegistry:
return current_app.extensions["site_registry"]
def _connections() -> ConnectionStore:
return current_app.extensions["connections"]
def _replication() -> ReplicationManager:
return current_app.extensions["replication"]
def _iam():
return current_app.extensions["iam"]
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
return {"error": {"code": code, "message": message}}, status
def _get_admin_rate_limit() -> str:
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
@admin_api_bp.route("/site", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_local_site():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local_site = registry.get_local_site()
if local_site:
return jsonify(local_site.to_dict())
config_site_id = current_app.config.get("SITE_ID")
config_endpoint = current_app.config.get("SITE_ENDPOINT")
if config_site_id:
return jsonify({
"site_id": config_site_id,
"endpoint": config_endpoint or "",
"region": current_app.config.get("SITE_REGION", "us-east-1"),
"priority": current_app.config.get("SITE_PRIORITY", 100),
"display_name": config_site_id,
"source": "environment",
})
return _json_error("NotFound", "Local site not configured", 404)
@admin_api_bp.route("/site", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_local_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if endpoint:
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
registry = _site_registry()
existing = registry.get_local_site()
site = SiteInfo(
site_id=site_id,
endpoint=endpoint or "",
region=payload.get("region", "us-east-1"),
priority=payload.get("priority", 100),
display_name=payload.get("display_name", site_id),
created_at=existing.created_at if existing else None,
)
registry.set_local_site(site)
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(site.to_dict())
@admin_api_bp.route("/sites", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_all_sites():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
result = {
"local": local.to_dict() if local else None,
"peers": [peer.to_dict() for peer in peers],
"total_peers": len(peers),
}
return jsonify(result)
@admin_api_bp.route("/sites", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def register_peer_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if not endpoint:
return _json_error("ValidationError", "endpoint is required", 400)
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
region = payload.get("region", "us-east-1")
region_error = _validate_region(region)
if region_error:
return _json_error("ValidationError", region_error, 400)
priority = payload.get("priority", 100)
priority_error = _validate_priority(priority)
if priority_error:
return _json_error("ValidationError", priority_error, 400)
registry = _site_registry()
if registry.get_peer(site_id):
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
connection_id = payload.get("connection_id")
if connection_id:
if not _connections().get(connection_id):
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
peer = PeerSite(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=int(priority),
display_name=payload.get("display_name", site_id),
connection_id=connection_id,
)
registry.add_peer(peer)
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict()), 201
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
existing = registry.get_peer(site_id)
if not existing:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
payload = request.get_json(silent=True) or {}
if "endpoint" in payload:
endpoint_error = _validate_endpoint(payload["endpoint"])
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
peer = PeerSite(
site_id=site_id,
endpoint=payload.get("endpoint", existing.endpoint),
region=payload.get("region", existing.region),
priority=payload.get("priority", existing.priority),
display_name=payload.get("display_name", existing.display_name),
connection_id=payload.get("connection_id", existing.connection_id),
created_at=existing.created_at,
is_healthy=existing.is_healthy,
last_health_check=existing.last_health_check,
)
registry.update_peer(peer)
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def delete_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
if not registry.delete_peer(site_id):
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
return Response(status=204)
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_peer_health(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
is_healthy = False
error_message = None
if peer.connection_id:
connection = _connections().get(peer.connection_id)
if connection:
is_healthy = _replication().check_endpoint_health(connection)
else:
error_message = f"Connection '{peer.connection_id}' not found"
else:
error_message = "No connection configured for this peer"
registry.update_health(site_id, is_healthy)
result = {
"site_id": site_id,
"is_healthy": is_healthy,
"checked_at": time.time(),
}
if error_message:
result["error"] = error_message
return jsonify(result)
@admin_api_bp.route("/topology", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_topology():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
sites = []
if local:
sites.append({
**local.to_dict(),
"is_local": True,
"is_healthy": True,
})
for peer in peers:
sites.append({
**peer.to_dict(),
"is_local": False,
})
sites.sort(key=lambda s: s.get("priority", 100))
return jsonify({
"sites": sites,
"total": len(sites),
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
})
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_bidirectional_status(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
local_site = registry.get_local_site()
replication = _replication()
local_rules = replication.list_rules()
local_bidir_rules = []
for rule in local_rules:
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
local_bidir_rules.append({
"bucket_name": rule.bucket_name,
"target_bucket": rule.target_bucket,
"enabled": rule.enabled,
})
result = {
"site_id": site_id,
"local_site_id": local_site.site_id if local_site else None,
"local_endpoint": local_site.endpoint if local_site else None,
"local_bidirectional_rules": local_bidir_rules,
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
"remote_status": None,
"issues": [],
"is_fully_configured": False,
}
if not local_site or not local_site.site_id:
result["issues"].append({
"code": "NO_LOCAL_SITE_ID",
"message": "Local site identity not configured",
"severity": "error",
})
if not local_site or not local_site.endpoint:
result["issues"].append({
"code": "NO_LOCAL_ENDPOINT",
"message": "Local site endpoint not configured (remote site cannot reach back)",
"severity": "error",
})
if not peer.connection_id:
result["issues"].append({
"code": "NO_CONNECTION",
"message": "No connection configured for this peer",
"severity": "error",
})
return jsonify(result)
connection = _connections().get(peer.connection_id)
if not connection:
result["issues"].append({
"code": "CONNECTION_NOT_FOUND",
"message": f"Connection '{peer.connection_id}' not found",
"severity": "error",
})
return jsonify(result)
if not local_bidir_rules:
result["issues"].append({
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
"message": "No bidirectional replication rules configured on this site",
"severity": "warning",
})
if not result["local_site_sync_enabled"]:
result["issues"].append({
"code": "SITE_SYNC_DISABLED",
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
"severity": "warning",
})
if not replication.check_endpoint_health(connection):
result["issues"].append({
"code": "REMOTE_UNREACHABLE",
"message": "Remote endpoint is not reachable",
"severity": "error",
})
return jsonify(result)
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
"severity": "error",
})
return jsonify(result)
try:
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
resp = requests.get(
admin_url,
timeout=10,
headers={
"Accept": "application/json",
"X-Access-Key": connection.access_key,
"X-Secret-Key": connection.secret_key,
},
)
if resp.status_code == 200:
try:
remote_data = resp.json()
if not isinstance(remote_data, dict):
raise ValueError("Expected JSON object")
remote_local = remote_data.get("local")
if remote_local is not None and not isinstance(remote_local, dict):
raise ValueError("Expected 'local' to be an object")
remote_peers = remote_data.get("peers", [])
if not isinstance(remote_peers, list):
raise ValueError("Expected 'peers' to be a list")
except (ValueError, json.JSONDecodeError) as e:
logger.warning("Invalid JSON from remote admin API: %s", e)
result["remote_status"] = {"reachable": True, "invalid_response": True}
result["issues"].append({
"code": "REMOTE_INVALID_RESPONSE",
"message": "Remote admin API returned invalid JSON",
"severity": "warning",
})
return jsonify(result)
result["remote_status"] = {
"reachable": True,
"local_site": remote_local,
"site_sync_enabled": None,
"has_peer_for_us": False,
"peer_connection_configured": False,
"has_bidirectional_rules_for_us": False,
}
for rp in remote_peers:
if not isinstance(rp, dict):
continue
if local_site and (
rp.get("site_id") == local_site.site_id or
rp.get("endpoint") == local_site.endpoint
):
result["remote_status"]["has_peer_for_us"] = True
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
break
if not result["remote_status"]["has_peer_for_us"]:
result["issues"].append({
"code": "REMOTE_NO_PEER_FOR_US",
"message": "Remote site does not have this site registered as a peer",
"severity": "error",
})
elif not result["remote_status"]["peer_connection_configured"]:
result["issues"].append({
"code": "REMOTE_NO_CONNECTION_FOR_US",
"message": "Remote site has us as peer but no connection configured (cannot push back)",
"severity": "error",
})
elif resp.status_code == 401 or resp.status_code == 403:
result["remote_status"] = {
"reachable": True,
"admin_access_denied": True,
}
result["issues"].append({
"code": "REMOTE_ADMIN_ACCESS_DENIED",
"message": "Cannot verify remote configuration (admin access denied)",
"severity": "warning",
})
else:
result["remote_status"] = {
"reachable": True,
"admin_api_error": resp.status_code,
}
result["issues"].append({
"code": "REMOTE_ADMIN_API_ERROR",
"message": f"Remote admin API returned status {resp.status_code}",
"severity": "warning",
})
except requests.RequestException as e:
logger.warning("Remote admin API unreachable: %s", e)
result["remote_status"] = {
"reachable": False,
"error": "Connection failed",
}
result["issues"].append({
"code": "REMOTE_ADMIN_UNREACHABLE",
"message": "Could not reach remote admin API",
"severity": "warning",
})
except Exception as e:
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
result["issues"].append({
"code": "VERIFICATION_ERROR",
"message": "Internal error during verification",
"severity": "warning",
})
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
return jsonify(result)

View File

@@ -6,6 +6,7 @@ import re
import time
from dataclasses import dataclass, field
from fnmatch import fnmatch, translate
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
@@ -13,9 +14,14 @@ from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
RESOURCE_PREFIX = "arn:aws:s3:::"
@lru_cache(maxsize=256)
def _compile_pattern(pattern: str) -> Pattern[str]:
return re.compile(translate(pattern), re.IGNORECASE)
def _match_string_like(value: str, pattern: str) -> bool:
regex = translate(pattern)
return bool(re.match(regex, value, re.IGNORECASE))
compiled = _compile_pattern(pattern)
return bool(compiled.match(value))
def _ip_in_cidr(ip_str: str, cidr: str) -> bool:

View File

@@ -10,6 +10,23 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional
import psutil
def _calculate_auto_threads() -> int:
cpu_count = psutil.cpu_count(logical=True) or 4
return max(1, min(cpu_count * 2, 64))
def _calculate_auto_connection_limit() -> int:
available_mb = psutil.virtual_memory().available / (1024 * 1024)
calculated = int(available_mb / 5)
return max(20, min(calculated, 1000))
def _calculate_auto_backlog(connection_limit: int) -> int:
return max(64, min(connection_limit * 2, 4096))
def _validate_rate_limit(value: str) -> str:
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
@@ -63,6 +80,10 @@ class AppConfig:
log_backup_count: int
ratelimit_default: str
ratelimit_storage_uri: str
ratelimit_list_buckets: str
ratelimit_bucket_ops: str
ratelimit_object_ops: str
ratelimit_head_ops: str
cors_origins: list[str]
cors_methods: list[str]
cors_allow_headers: list[str]
@@ -90,6 +111,44 @@ class AppConfig:
operation_metrics_enabled: bool
operation_metrics_interval_minutes: int
operation_metrics_retention_hours: int
server_threads: int
server_connection_limit: int
server_backlog: int
server_channel_timeout: int
server_threads_auto: bool
server_connection_limit_auto: bool
server_backlog_auto: bool
site_sync_enabled: bool
site_sync_interval_seconds: int
site_sync_batch_size: int
sigv4_timestamp_tolerance_seconds: int
presigned_url_min_expiry_seconds: int
presigned_url_max_expiry_seconds: int
replication_connect_timeout_seconds: int
replication_read_timeout_seconds: int
replication_max_retries: int
replication_streaming_threshold_bytes: int
replication_max_failures_per_bucket: int
site_sync_connect_timeout_seconds: int
site_sync_read_timeout_seconds: int
site_sync_max_retries: int
site_sync_clock_skew_tolerance_seconds: float
object_key_max_length_bytes: int
object_cache_max_size: int
bucket_config_cache_ttl_seconds: float
object_tag_limit: int
encryption_chunk_size_bytes: int
kms_generate_data_key_min_bytes: int
kms_generate_data_key_max_bytes: int
lifecycle_max_history_per_bucket: int
site_id: Optional[str]
site_endpoint: Optional[str]
site_region: str
site_priority: int
ratelimit_admin: str
num_trusted_proxies: int
allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool
@classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -164,6 +223,10 @@ class AppConfig:
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
def _csv(value: str, default: list[str]) -> list[str]:
if not value:
@@ -193,6 +256,68 @@ class AppConfig:
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
_raw_threads = int(_get("SERVER_THREADS", 0))
if _raw_threads == 0:
server_threads = _calculate_auto_threads()
server_threads_auto = True
else:
server_threads = _raw_threads
server_threads_auto = False
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
if _raw_conn_limit == 0:
server_connection_limit = _calculate_auto_connection_limit()
server_connection_limit_auto = True
else:
server_connection_limit = _raw_conn_limit
server_connection_limit_auto = False
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
if _raw_backlog == 0:
server_backlog = _calculate_auto_backlog(server_connection_limit)
server_backlog_auto = True
else:
server_backlog = _raw_backlog
server_backlog_auto = False
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
site_id_raw = _get("SITE_ID", None)
site_id = str(site_id_raw).strip() if site_id_raw else None
site_endpoint_raw = _get("SITE_ENDPOINT", None)
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
site_region = str(_get("SITE_REGION", "us-east-1"))
site_priority = int(_get("SITE_PRIORITY", 100))
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
return cls(storage_root=storage_root,
max_upload_size=max_upload_size,
ui_page_size=ui_page_size,
@@ -210,6 +335,10 @@ class AppConfig:
log_backup_count=log_backup_count,
ratelimit_default=ratelimit_default,
ratelimit_storage_uri=ratelimit_storage_uri,
ratelimit_list_buckets=ratelimit_list_buckets,
ratelimit_bucket_ops=ratelimit_bucket_ops,
ratelimit_object_ops=ratelimit_object_ops,
ratelimit_head_ops=ratelimit_head_ops,
cors_origins=cors_origins,
cors_methods=cors_methods,
cors_allow_headers=cors_allow_headers,
@@ -236,7 +365,45 @@ class AppConfig:
metrics_history_interval_minutes=metrics_history_interval_minutes,
operation_metrics_enabled=operation_metrics_enabled,
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
operation_metrics_retention_hours=operation_metrics_retention_hours)
operation_metrics_retention_hours=operation_metrics_retention_hours,
server_threads=server_threads,
server_connection_limit=server_connection_limit,
server_backlog=server_backlog,
server_channel_timeout=server_channel_timeout,
server_threads_auto=server_threads_auto,
server_connection_limit_auto=server_connection_limit_auto,
server_backlog_auto=server_backlog_auto,
site_sync_enabled=site_sync_enabled,
site_sync_interval_seconds=site_sync_interval_seconds,
site_sync_batch_size=site_sync_batch_size,
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
replication_read_timeout_seconds=replication_read_timeout_seconds,
replication_max_retries=replication_max_retries,
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
site_sync_max_retries=site_sync_max_retries,
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
object_key_max_length_bytes=object_key_max_length_bytes,
object_cache_max_size=object_cache_max_size,
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
object_tag_limit=object_tag_limit,
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
site_id=site_id,
site_endpoint=site_endpoint,
site_region=site_region,
site_priority=site_priority,
ratelimit_admin=ratelimit_admin,
num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints)
def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues.
@@ -296,7 +463,35 @@ class AppConfig:
if "*" in self.cors_origins:
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
if not (1 <= self.server_threads <= 64):
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
if not (10 <= self.server_connection_limit <= 1000):
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
if not (64 <= self.server_backlog <= 4096):
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
if not (10 <= self.server_channel_timeout <= 300):
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
if sys.platform != "win32":
try:
import resource
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
threshold = int(soft_limit * 0.8)
if self.server_connection_limit > threshold:
issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
except (ImportError, OSError):
pass
try:
import psutil
available_mb = psutil.virtual_memory().available / (1024 * 1024)
estimated_mb = self.server_threads * 50
if estimated_mb > available_mb * 0.5:
issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
except ImportError:
pass
return issues
def print_startup_summary(self) -> None:
@@ -314,6 +509,12 @@ class AppConfig:
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
if self.kms_enabled:
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
def _auto(flag: bool) -> str:
return " (auto)" if flag else ""
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
print("=" * 60)
issues = self.validate_and_report()
@@ -352,6 +553,10 @@ class AppConfig:
"LOG_BACKUP_COUNT": self.log_backup_count,
"RATELIMIT_DEFAULT": self.ratelimit_default,
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
"CORS_ORIGINS": self.cors_origins,
"CORS_METHODS": self.cors_methods,
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
@@ -371,4 +576,39 @@ class AppConfig:
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
"SERVER_THREADS": self.server_threads,
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
"SERVER_BACKLOG": self.server_backlog,
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
"SITE_SYNC_ENABLED": self.site_sync_enabled,
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
"OBJECT_TAG_LIMIT": self.object_tag_limit,
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
"SITE_ID": self.site_id,
"SITE_ENDPOINT": self.site_endpoint,
"SITE_REGION": self.site_region,
"SITE_PRIORITY": self.site_priority,
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
}

View File

@@ -1,15 +1,44 @@
"""Encryption providers for server-side and client-side encryption."""
from __future__ import annotations
import base64
import io
import json
import logging
import os
import secrets
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, BinaryIO, Dict, Generator, Optional
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
class EncryptionError(Exception):
@@ -59,22 +88,34 @@ class EncryptionMetadata:
class EncryptionProvider:
"""Base class for encryption providers."""
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
raise NotImplementedError
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes:
raise NotImplementedError
def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form.
Returns:
Tuple of (plaintext_key, encrypted_key)
"""
raise NotImplementedError
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key.
Args:
encrypted_data_key: The encrypted data key bytes
key_id: Optional key identifier (used by KMS providers)
Returns:
The decrypted data key
"""
raise NotImplementedError
class LocalKeyEncryption(EncryptionProvider):
"""SSE-S3 style encryption using a local master key.
@@ -99,28 +140,48 @@ class LocalKeyEncryption(EncryptionProvider):
return self._master_key
def _load_or_create_master_key(self) -> bytes:
"""Load master key from file or generate a new one."""
if self.master_key_path.exists():
try:
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
"""Load master key from file or generate a new one (with file locking)."""
lock_path = self.master_key_path.with_suffix(".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
try:
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
self.master_key_path.write_text(base64.b64encode(key).decode())
with open(lock_path, "w") as lock_file:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
try:
if self.master_key_path.exists():
try:
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
try:
self.master_key_path.write_text(base64.b64encode(key).decode())
_set_secure_file_permissions(self.master_key_path)
except OSError as exc:
raise EncryptionError(f"Failed to save master key: {exc}") from exc
return key
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except OSError as exc:
raise EncryptionError(f"Failed to save master key: {exc}") from exc
return key
raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
def _encrypt_data_key(self, data_key: bytes) -> bytes:
"""Encrypt the data key with the master key."""
aesgcm = AESGCM(self.master_key)
nonce = secrets.token_bytes(12)
encrypted = aesgcm.encrypt(nonce, data_key, None)
encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
return nonce + encrypted
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
"""Decrypt the data key using the master key."""
if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
@@ -129,10 +190,17 @@ class LocalKeyEncryption(EncryptionProvider):
nonce = encrypted_data_key[:12]
ciphertext = encrypted_data_key[12:]
try:
return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
except Exception:
try:
return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
return self._decrypt_data_key(encrypted_data_key)
def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form."""
plaintext_key = secrets.token_bytes(32)
@@ -142,11 +210,12 @@ class LocalKeyEncryption(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption."""
data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult(
ciphertext=ciphertext,
nonce=nonce,
@@ -159,10 +228,11 @@ class LocalKeyEncryption(EncryptionProvider):
"""Decrypt data using envelope encryption."""
data_key = self._decrypt_data_key(encrypted_data_key)
aesgcm = AESGCM(data_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try:
return aesgcm.decrypt(nonce, ciphertext, None)
return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
raise EncryptionError("Failed to decrypt data") from exc
class StreamingEncryptor:
@@ -180,12 +250,14 @@ class StreamingEncryptor:
self.chunk_size = chunk_size
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
"""Derive a unique nonce for each chunk.
Performance: Use direct byte manipulation instead of full int conversion.
"""
# Performance: Only modify last 4 bytes instead of full 12-byte conversion
return base_nonce[:8] + (chunk_index ^ int.from_bytes(base_nonce[8:], "big")).to_bytes(4, "big")
"""Derive a unique nonce for each chunk using HKDF."""
hkdf = HKDF(
algorithm=hashes.SHA256(),
length=12,
salt=base_nonce,
info=chunk_index.to_bytes(4, "big"),
)
return hkdf.derive(b"chunk_nonce")
def encrypt_stream(self, stream: BinaryIO,
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
@@ -234,10 +306,7 @@ class StreamingEncryptor:
Performance: Writes chunks directly to output buffer instead of accumulating in list.
"""
if isinstance(self.provider, LocalKeyEncryption):
data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
else:
raise EncryptionError("Unsupported provider for streaming decryption")
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
aesgcm = AESGCM(data_key)
base_nonce = metadata.nonce
@@ -310,7 +379,8 @@ class EncryptionManager:
def get_streaming_encryptor(self) -> StreamingEncryptor:
if self._streaming_encryptor is None:
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider())
chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
return self._streaming_encryptor
def encrypt_object(self, data: bytes, algorithm: str = "AES256",
@@ -403,7 +473,8 @@ class SSECEncryption(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
aesgcm = AESGCM(self.customer_key)
nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult(
ciphertext=ciphertext,
@@ -415,10 +486,11 @@ class SSECEncryption(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes:
aesgcm = AESGCM(self.customer_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try:
return aesgcm.decrypt(nonce, ciphertext, None)
return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc:
raise EncryptionError(f"SSE-C decryption failed: {exc}") from exc
raise EncryptionError("SSE-C decryption failed") from exc
def generate_data_key(self) -> tuple[bytes, bytes]:
return self.customer_key, b""
@@ -472,34 +544,36 @@ class ClientEncryptionHelper:
}
@staticmethod
def encrypt_with_key(plaintext: bytes, key_b64: str) -> Dict[str, str]:
def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
"""Encrypt data with a client-provided key."""
key = base64.b64decode(key_b64)
if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key)
nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return {
"ciphertext": base64.b64encode(ciphertext).decode(),
"nonce": base64.b64encode(nonce).decode(),
"algorithm": "AES-256-GCM",
}
@staticmethod
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str) -> bytes:
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data with a client-provided key."""
key = base64.b64decode(key_b64)
nonce = base64.b64decode(nonce_b64)
ciphertext = base64.b64decode(ciphertext_b64)
if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try:
return aesgcm.decrypt(nonce, ciphertext, None)
return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc:
raise EncryptionError(f"Decryption failed: {exc}") from exc
raise EncryptionError("Decryption failed") from exc

View File

@@ -6,6 +6,7 @@ from typing import Optional, Dict, Any
from xml.etree.ElementTree import Element, SubElement, tostring
from flask import Response, jsonify, request, flash, redirect, url_for, g
from flask_limiter import RateLimitExceeded
logger = logging.getLogger(__name__)
@@ -172,10 +173,22 @@ def handle_app_error(error: AppError) -> Response:
return error.to_xml_response()
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
g.s3_error_code = "SlowDown"
error = Element("Error")
SubElement(error, "Code").text = "SlowDown"
SubElement(error, "Message").text = "Please reduce your request rate."
SubElement(error, "Resource").text = request.path
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
xml_bytes = tostring(error, encoding="utf-8")
return Response(xml_bytes, status=429, mimetype="application/xml")
def register_error_handlers(app):
"""Register error handlers with a Flask app."""
app.register_error_handler(AppError, handle_app_error)
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
for error_class in [
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
ObjectNotFoundError, InvalidObjectKeyError,

View File

@@ -1,9 +1,12 @@
from __future__ import annotations
import hashlib
import hmac
import json
import math
import os
import secrets
import threading
import time
from collections import deque
from dataclasses import dataclass
@@ -118,12 +121,15 @@ class IamService:
self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._last_load_time = 0.0
self._credential_cache: Dict[str, Tuple[str, Principal, float]] = {}
self._cache_ttl = 60.0
self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0
self._stat_check_interval = 1.0
self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock()
self._load()
self._load_lockout_state()
def _maybe_reload(self) -> None:
"""Reload configuration if the file has changed on disk."""
@@ -134,7 +140,8 @@ class IamService:
try:
if self.config_path.stat().st_mtime > self._last_load_time:
self._load()
self._credential_cache.clear()
self._principal_cache.clear()
self._secret_key_cache.clear()
except OSError:
pass
@@ -150,7 +157,8 @@ class IamService:
f"Access temporarily locked. Try again in {seconds} seconds."
)
record = self._users.get(access_key)
if not record or not hmac.compare_digest(record["secret_key"], secret_key):
stored_secret = record["secret_key"] if record else secrets.token_urlsafe(24)
if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key)
raise IamError("Invalid credentials")
self._clear_failed_attempts(access_key)
@@ -162,11 +170,46 @@ class IamService:
attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts)
attempts.append(datetime.now(timezone.utc))
self._save_lockout_state()
def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key:
return
self._failed_attempts.pop(access_key, None)
if self._failed_attempts.pop(access_key, None) is not None:
self._save_lockout_state()
def _lockout_file(self) -> Path:
return self.config_path.parent / "lockout_state.json"
def _load_lockout_state(self) -> None:
"""Load lockout state from disk."""
try:
if self._lockout_file().exists():
data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
for key, timestamps in data.get("failed_attempts", {}).items():
valid = []
for ts in timestamps:
try:
dt = datetime.fromisoformat(ts)
if dt > cutoff:
valid.append(dt)
except (ValueError, TypeError):
continue
if valid:
self._failed_attempts[key] = deque(valid)
except (OSError, json.JSONDecodeError):
pass
def _save_lockout_state(self) -> None:
"""Persist lockout state to disk."""
data: Dict[str, Any] = {"failed_attempts": {}}
for key, attempts in self._failed_attempts.items():
data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
try:
self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
except OSError:
pass
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
@@ -209,16 +252,23 @@ class IamService:
return token
def validate_session_token(self, access_key: str, session_token: str) -> bool:
"""Validate a session token for an access key."""
session = self._sessions.get(session_token)
if not session:
return False
if session["access_key"] != access_key:
return False
if time.time() > session["expires_at"]:
del self._sessions[session_token]
return False
return True
"""Validate a session token for an access key (thread-safe, constant-time)."""
dummy_key = secrets.token_urlsafe(16)
dummy_token = secrets.token_urlsafe(32)
with self._session_lock:
session = self._sessions.get(session_token)
if not session:
hmac.compare_digest(access_key, dummy_key)
hmac.compare_digest(session_token, dummy_token)
return False
key_match = hmac.compare_digest(session["access_key"], access_key)
if not key_match:
hmac.compare_digest(session_token, dummy_token)
return False
if time.time() > session["expires_at"]:
self._sessions.pop(session_token, None)
return False
return True
def _cleanup_expired_sessions(self) -> None:
"""Remove expired session tokens."""
@@ -229,9 +279,9 @@ class IamService:
def principal_for_key(self, access_key: str) -> Principal:
now = time.time()
cached = self._credential_cache.get(access_key)
cached = self._principal_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
@@ -240,23 +290,14 @@ class IamService:
if not record:
raise IamError("Unknown access key")
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
self._principal_cache[access_key] = (principal, now)
return principal
def secret_for_key(self, access_key: str) -> str:
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret
self._maybe_reload()
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
@@ -328,6 +369,10 @@ class IamService:
new_secret = self._generate_secret_key()
user["secret_key"] = new_secret
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load()
return new_secret
@@ -346,6 +391,10 @@ class IamService:
raise IamError("User not found")
self._raw_config["users"] = remaining
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
@@ -508,25 +557,25 @@ class IamService:
def get_secret_key(self, access_key: str) -> str | None:
now = time.time()
cached = self._credential_cache.get(access_key)
cached = self._secret_key_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
secret_key, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret
return secret_key
self._maybe_reload()
record = self._users.get(access_key)
if record:
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return record["secret_key"]
secret_key = record["secret_key"]
self._secret_key_cache[access_key] = (secret_key, now)
return secret_key
return None
def get_principal(self, access_key: str) -> Principal | None:
now = time.time()
cached = self._credential_cache.get(access_key)
cached = self._principal_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
@@ -534,6 +583,6 @@ class IamService:
record = self._users.get(access_key)
if record:
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
self._principal_cache[access_key] = (principal, now)
return principal
return None

View File

@@ -2,7 +2,11 @@ from __future__ import annotations
import base64
import json
import logging
import os
import secrets
import subprocess
import sys
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
@@ -13,6 +17,30 @@ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
@dataclass
class KMSKey:
@@ -74,11 +102,11 @@ class KMSEncryptionProvider(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption with KMS."""
data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext,
json.dumps(context).encode() if context else None)
ciphertext = aesgcm.encrypt(nonce, plaintext,
json.dumps(context, sort_keys=True).encode() if context else None)
return EncryptionResult(
ciphertext=ciphertext,
@@ -90,15 +118,26 @@ class KMSEncryptionProvider(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data using envelope encryption with KMS."""
# Note: Data key is encrypted without context (AAD), so we decrypt without context
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
aesgcm = AESGCM(data_key)
try:
return aesgcm.decrypt(nonce, ciphertext,
json.dumps(context).encode() if context else None)
json.dumps(context, sort_keys=True).encode() if context else None)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
logger.debug("KMS decryption failed: %s", exc)
raise EncryptionError("Failed to decrypt data") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key using KMS."""
if key_id is None:
key_id = self.key_id
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
return data_key
class KMSManager:
@@ -108,27 +147,50 @@ class KMSManager:
Keys are stored encrypted on disk.
"""
def __init__(self, keys_path: Path, master_key_path: Path):
def __init__(
self,
keys_path: Path,
master_key_path: Path,
generate_data_key_min_bytes: int = 1,
generate_data_key_max_bytes: int = 1024,
):
self.keys_path = keys_path
self.master_key_path = master_key_path
self.generate_data_key_min_bytes = generate_data_key_min_bytes
self.generate_data_key_max_bytes = generate_data_key_max_bytes
self._keys: Dict[str, KMSKey] = {}
self._master_key: bytes | None = None
self._loaded = False
@property
def master_key(self) -> bytes:
"""Load or create the master key for encrypting KMS keys."""
"""Load or create the master key for encrypting KMS keys (with file locking)."""
if self._master_key is None:
if self.master_key_path.exists():
self._master_key = base64.b64decode(
self.master_key_path.read_text().strip()
)
else:
self._master_key = secrets.token_bytes(32)
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
self.master_key_path.write_text(
base64.b64encode(self._master_key).decode()
)
lock_path = self.master_key_path.with_suffix(".lock")
lock_path.parent.mkdir(parents=True, exist_ok=True)
with open(lock_path, "w") as lock_file:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
try:
if self.master_key_path.exists():
self._master_key = base64.b64decode(
self.master_key_path.read_text().strip()
)
else:
self._master_key = secrets.token_bytes(32)
self.master_key_path.write_text(
base64.b64encode(self._master_key).decode()
)
_set_secure_file_permissions(self.master_key_path)
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
return self._master_key
def _load_keys(self) -> None:
@@ -145,8 +207,10 @@ class KMSManager:
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
key.key_material = self._decrypt_key_material(encrypted)
self._keys[key.key_id] = key
except Exception:
pass
except json.JSONDecodeError as exc:
logger.error("Failed to parse KMS keys file: %s", exc)
except (ValueError, KeyError) as exc:
logger.error("Invalid KMS key data: %s", exc)
self._loaded = True
@@ -158,12 +222,13 @@ class KMSManager:
encrypted = self._encrypt_key_material(key.key_material)
data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
keys_data.append(data)
self.keys_path.parent.mkdir(parents=True, exist_ok=True)
self.keys_path.write_text(
json.dumps({"keys": keys_data}, indent=2),
encoding="utf-8"
)
_set_secure_file_permissions(self.keys_path)
def _encrypt_key_material(self, key_material: bytes) -> bytes:
"""Encrypt key material with the master key."""
@@ -269,7 +334,7 @@ class KMSManager:
aesgcm = AESGCM(key.key_material)
nonce = secrets.token_bytes(12)
aad = json.dumps(context).encode() if context else None
aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
key_id_bytes = key_id.encode("utf-8")
@@ -298,17 +363,24 @@ class KMSManager:
encrypted = rest[12:]
aesgcm = AESGCM(key.key_material)
aad = json.dumps(context).encode() if context else None
aad = json.dumps(context, sort_keys=True).encode() if context else None
try:
plaintext = aesgcm.decrypt(nonce, encrypted, aad)
return plaintext, key_id
except Exception as exc:
raise EncryptionError(f"Decryption failed: {exc}") from exc
logger.debug("KMS decrypt operation failed: %s", exc)
raise EncryptionError("Decryption failed") from exc
def generate_data_key(self, key_id: str,
context: Dict[str, str] | None = None) -> tuple[bytes, bytes]:
context: Dict[str, str] | None = None,
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
"""Generate a data key and return both plaintext and encrypted versions.
Args:
key_id: The KMS key ID to use for encryption
context: Optional encryption context
key_spec: Key specification - AES_128 or AES_256 (default)
Returns:
Tuple of (plaintext_key, encrypted_key)
"""
@@ -318,11 +390,12 @@ class KMSManager:
raise EncryptionError(f"Key not found: {key_id}")
if not key.enabled:
raise EncryptionError(f"Key is disabled: {key_id}")
plaintext_key = secrets.token_bytes(32)
key_bytes = 32 if key_spec == "AES_256" else 16
plaintext_key = secrets.token_bytes(key_bytes)
encrypted_key = self.encrypt(key_id, plaintext_key, context)
return plaintext_key, encrypted_key
def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
@@ -358,6 +431,8 @@ class KMSManager:
def generate_random(self, num_bytes: int = 32) -> bytes:
"""Generate cryptographically secure random bytes."""
if num_bytes < 1 or num_bytes > 1024:
raise EncryptionError("Number of bytes must be between 1 and 1024")
if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
raise EncryptionError(
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
)
return secrets.token_bytes(num_bytes)

View File

@@ -71,10 +71,9 @@ class LifecycleExecutionRecord:
class LifecycleHistoryStore:
MAX_HISTORY_PER_BUCKET = 50
def __init__(self, storage_root: Path) -> None:
def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
self.storage_root = storage_root
self.max_history_per_bucket = max_history_per_bucket
self._lock = threading.Lock()
def _get_history_path(self, bucket_name: str) -> Path:
@@ -95,7 +94,7 @@ class LifecycleHistoryStore:
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
path = self._get_history_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[:self.MAX_HISTORY_PER_BUCKET]]}
data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
@@ -114,14 +113,20 @@ class LifecycleHistoryStore:
class LifecycleManager:
def __init__(self, storage: ObjectStorage, interval_seconds: int = 3600, storage_root: Optional[Path] = None):
def __init__(
self,
storage: ObjectStorage,
interval_seconds: int = 3600,
storage_root: Optional[Path] = None,
max_history_per_bucket: int = 50,
):
self.storage = storage
self.interval_seconds = interval_seconds
self.storage_root = storage_root
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = LifecycleHistoryStore(storage_root) if storage_root else None
self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
def start(self) -> None:
if self._timer is not None:

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import ipaddress
import json
import logging
import queue
import socket
import threading
import time
import uuid
@@ -14,6 +16,48 @@ from urllib.parse import urlparse
import requests
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
logger = logging.getLogger(__name__)
@@ -165,8 +209,9 @@ class NotificationConfiguration:
class NotificationService:
def __init__(self, storage_root: Path, worker_count: int = 2):
def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
self.storage_root = storage_root
self._allow_internal_endpoints = allow_internal_endpoints
self._configs: Dict[str, List[NotificationConfiguration]] = {}
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
self._workers: List[threading.Thread] = []
@@ -299,6 +344,8 @@ class NotificationService:
self._queue.task_done()
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
payload = event.to_s3_event()
headers = {"Content-Type": "application/json", **destination.headers}

View File

@@ -21,15 +21,20 @@ from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__)
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_CONNECT_TIMEOUT = 5
REPLICATION_READ_TIMEOUT = 30
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024
REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all"
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
def _create_s3_client(connection: RemoteConnection, *, health_check: bool = False) -> Any:
def _create_s3_client(
connection: RemoteConnection,
*,
health_check: bool = False,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
) -> Any:
"""Create a boto3 S3 client for the given connection.
Args:
connection: Remote S3 connection configuration
@@ -37,9 +42,9 @@ def _create_s3_client(connection: RemoteConnection, *, health_check: bool = Fals
"""
config = Config(
user_agent_extra=REPLICATION_USER_AGENT,
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
read_timeout=REPLICATION_READ_TIMEOUT,
retries={'max_attempts': 1 if health_check else 2},
connect_timeout=connect_timeout,
read_timeout=read_timeout,
retries={'max_attempts': 1 if health_check else max_retries},
signature_version='s3v4',
s3={'addressing_style': 'path'},
request_checksum_calculation='when_required',
@@ -127,10 +132,13 @@ class ReplicationRule:
target_connection_id: str
target_bucket: str
enabled: bool = True
mode: str = REPLICATION_MODE_NEW_ONLY
mode: str = REPLICATION_MODE_NEW_ONLY
created_at: Optional[float] = None
stats: ReplicationStats = field(default_factory=ReplicationStats)
sync_deletions: bool = True
last_pull_at: Optional[float] = None
filter_prefix: Optional[str] = None
def to_dict(self) -> dict:
return {
"bucket_name": self.bucket_name,
@@ -140,8 +148,11 @@ class ReplicationRule:
"mode": self.mode,
"created_at": self.created_at,
"stats": self.stats.to_dict(),
"sync_deletions": self.sync_deletions,
"last_pull_at": self.last_pull_at,
"filter_prefix": self.filter_prefix,
}
@classmethod
def from_dict(cls, data: dict) -> "ReplicationRule":
stats_data = data.pop("stats", {})
@@ -149,16 +160,21 @@ class ReplicationRule:
data["mode"] = REPLICATION_MODE_NEW_ONLY
if "created_at" not in data:
data["created_at"] = None
if "sync_deletions" not in data:
data["sync_deletions"] = True
if "last_pull_at" not in data:
data["last_pull_at"] = None
if "filter_prefix" not in data:
data["filter_prefix"] = None
rule = cls(**data)
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
return rule
class ReplicationFailureStore:
MAX_FAILURES_PER_BUCKET = 50
def __init__(self, storage_root: Path) -> None:
def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
self.storage_root = storage_root
self.max_failures_per_bucket = max_failures_per_bucket
self._lock = threading.Lock()
def _get_failures_path(self, bucket_name: str) -> Path:
@@ -179,7 +195,7 @@ class ReplicationFailureStore:
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
path = self._get_failures_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"failures": [f.to_dict() for f in failures[:self.MAX_FAILURES_PER_BUCKET]]}
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
@@ -224,18 +240,43 @@ class ReplicationFailureStore:
class ReplicationManager:
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path, storage_root: Path) -> None:
def __init__(
self,
storage: ObjectStorage,
connections: ConnectionStore,
rules_path: Path,
storage_root: Path,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
streaming_threshold_bytes: int = 10 * 1024 * 1024,
max_failures_per_bucket: int = 50,
) -> None:
self.storage = storage
self.connections = connections
self.rules_path = rules_path
self.storage_root = storage_root
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.streaming_threshold_bytes = streaming_threshold_bytes
self._rules: Dict[str, ReplicationRule] = {}
self._stats_lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self._shutdown = False
self.failure_store = ReplicationFailureStore(storage_root)
self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
self.reload_rules()
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
"""Create an S3 client with the manager's configured timeouts."""
return _create_s3_client(
connection,
health_check=health_check,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def shutdown(self, wait: bool = True) -> None:
"""Shutdown the replication executor gracefully.
@@ -271,7 +312,7 @@ class ReplicationManager:
Uses short timeouts to prevent blocking.
"""
try:
s3 = _create_s3_client(connection, health_check=True)
s3 = self._create_client(connection, health_check=True)
s3.list_buckets()
return True
except Exception as e:
@@ -281,6 +322,9 @@ class ReplicationManager:
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
return self._rules.get(bucket_name)
def list_rules(self) -> List[ReplicationRule]:
return list(self._rules.values())
def set_rule(self, rule: ReplicationRule) -> None:
old_rule = self._rules.get(rule.bucket_name)
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
@@ -320,7 +364,7 @@ class ReplicationManager:
source_objects = self.storage.list_objects_all(bucket_name)
source_keys = {obj.key: obj.size for obj in source_objects}
s3 = _create_s3_client(connection)
s3 = self._create_client(connection)
dest_keys = set()
bytes_synced = 0
@@ -386,7 +430,7 @@ class ReplicationManager:
raise ValueError(f"Connection {connection_id} not found")
try:
s3 = _create_s3_client(connection)
s3 = self._create_client(connection)
s3.create_bucket(Bucket=bucket_name)
except ClientError as e:
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
@@ -429,7 +473,7 @@ class ReplicationManager:
return
try:
s3 = _create_s3_client(conn)
s3 = self._create_client(conn)
if action == "delete":
try:
@@ -472,7 +516,7 @@ class ReplicationManager:
if content_type:
extra_args["ContentType"] = content_type
if file_size >= STREAMING_THRESHOLD_BYTES:
if file_size >= self.streaming_threshold_bytes:
s3.upload_file(
str(path),
rule.target_bucket,

File diff suppressed because it is too large Load Diff

171
app/select_content.py Normal file
View File

@@ -0,0 +1,171 @@
"""S3 SelectObjectContent SQL query execution using DuckDB."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, Generator, Optional
try:
import duckdb
DUCKDB_AVAILABLE = True
except ImportError:
DUCKDB_AVAILABLE = False
class SelectError(Exception):
"""Error during SELECT query execution."""
pass
def execute_select_query(
file_path: Path,
expression: str,
input_format: str,
input_config: Dict[str, Any],
output_format: str,
output_config: Dict[str, Any],
chunk_size: int = 65536,
) -> Generator[bytes, None, None]:
"""Execute SQL query on object content."""
if not DUCKDB_AVAILABLE:
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
conn = duckdb.connect(":memory:")
try:
if input_format == "CSV":
_load_csv(conn, file_path, input_config)
elif input_format == "JSON":
_load_json(conn, file_path, input_config)
elif input_format == "Parquet":
_load_parquet(conn, file_path)
else:
raise SelectError(f"Unsupported input format: {input_format}")
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
try:
result = conn.execute(normalized_expression)
except duckdb.Error as exc:
raise SelectError(f"SQL execution error: {exc}")
if output_format == "CSV":
yield from _output_csv(result, output_config, chunk_size)
elif output_format == "JSON":
yield from _output_json(result, output_config, chunk_size)
else:
raise SelectError(f"Unsupported output format: {output_format}")
finally:
conn.close()
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load CSV file into DuckDB."""
file_header_info = config.get("file_header_info", "NONE")
delimiter = config.get("field_delimiter", ",")
quote = config.get("quote_character", '"')
header = file_header_info in ("USE", "IGNORE")
path_str = str(file_path).replace("\\", "/")
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_csv('{path_str}',
header={header},
delim='{delimiter}',
quote='{quote}'
)
""")
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load JSON file into DuckDB."""
json_type = config.get("type", "DOCUMENT")
path_str = str(file_path).replace("\\", "/")
if json_type == "LINES":
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
""")
else:
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='array')
""")
def _load_parquet(conn, file_path: Path) -> None:
"""Load Parquet file into DuckDB."""
path_str = str(file_path).replace("\\", "/")
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
def _output_csv(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as CSV."""
delimiter = config.get("field_delimiter", ",")
record_delimiter = config.get("record_delimiter", "\n")
quote = config.get("quote_character", '"')
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
fields = []
for value in row:
if value is None:
fields.append("")
elif isinstance(value, str):
if delimiter in value or quote in value or record_delimiter in value:
escaped = value.replace(quote, quote + quote)
fields.append(f'{quote}{escaped}{quote}')
else:
fields.append(value)
else:
fields.append(str(value))
buffer += delimiter.join(fields) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")
def _output_json(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as JSON Lines."""
record_delimiter = config.get("record_delimiter", "\n")
columns = [desc[0] for desc in result.description]
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
record = dict(zip(columns, row))
buffer += json.dumps(record, default=str) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")

177
app/site_registry.py Normal file
View File

@@ -0,0 +1,177 @@
from __future__ import annotations
import json
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
@dataclass
class SiteInfo:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
@dataclass
class PeerSite:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
connection_id: Optional[str] = None
is_healthy: Optional[bool] = None
last_health_check: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
"connection_id": self.connection_id,
"is_healthy": self.is_healthy,
"last_health_check": self.last_health_check,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
connection_id=data.get("connection_id"),
is_healthy=data.get("is_healthy"),
last_health_check=data.get("last_health_check"),
)
class SiteRegistry:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._local_site: Optional[SiteInfo] = None
self._peers: Dict[str, PeerSite] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._local_site = None
self._peers = {}
return
try:
with open(self.config_path, "r", encoding="utf-8") as f:
data = json.load(f)
if data.get("local"):
self._local_site = SiteInfo.from_dict(data["local"])
else:
self._local_site = None
self._peers = {}
for peer_data in data.get("peers", []):
peer = PeerSite.from_dict(peer_data)
self._peers[peer.site_id] = peer
except (OSError, json.JSONDecodeError, KeyError):
self._local_site = None
self._peers = {}
def save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"local": self._local_site.to_dict() if self._local_site else None,
"peers": [peer.to_dict() for peer in self._peers.values()],
}
with open(self.config_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
def get_local_site(self) -> Optional[SiteInfo]:
return self._local_site
def set_local_site(self, site: SiteInfo) -> None:
site.updated_at = time.time()
self._local_site = site
self.save()
def list_peers(self) -> List[PeerSite]:
return list(self._peers.values())
def get_peer(self, site_id: str) -> Optional[PeerSite]:
return self._peers.get(site_id)
def add_peer(self, peer: PeerSite) -> None:
peer.created_at = peer.created_at or time.time()
self._peers[peer.site_id] = peer
self.save()
def update_peer(self, peer: PeerSite) -> None:
if peer.site_id not in self._peers:
raise ValueError(f"Peer {peer.site_id} not found")
peer.updated_at = time.time()
self._peers[peer.site_id] = peer
self.save()
def delete_peer(self, site_id: str) -> bool:
if site_id in self._peers:
del self._peers[site_id]
self.save()
return True
return False
def update_health(self, site_id: str, is_healthy: bool) -> None:
peer = self._peers.get(site_id)
if peer:
peer.is_healthy = is_healthy
peer.last_health_check = time.time()
self.save()

416
app/site_sync.py Normal file
View File

@@ -0,0 +1,416 @@
from __future__ import annotations
import json
import logging
import tempfile
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError
if TYPE_CHECKING:
from .connections import ConnectionStore, RemoteConnection
from .replication import ReplicationManager, ReplicationRule
from .storage import ObjectStorage
logger = logging.getLogger(__name__)
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
@dataclass
class SyncedObjectInfo:
last_synced_at: float
remote_etag: str
source: str
def to_dict(self) -> Dict[str, Any]:
return {
"last_synced_at": self.last_synced_at,
"remote_etag": self.remote_etag,
"source": self.source,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
return cls(
last_synced_at=data["last_synced_at"],
remote_etag=data["remote_etag"],
source=data["source"],
)
@dataclass
class SyncState:
synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
last_full_sync: Optional[float] = None
def to_dict(self) -> Dict[str, Any]:
return {
"synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
"last_full_sync": self.last_full_sync,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
synced_objects = {}
for k, v in data.get("synced_objects", {}).items():
synced_objects[k] = SyncedObjectInfo.from_dict(v)
return cls(
synced_objects=synced_objects,
last_full_sync=data.get("last_full_sync"),
)
@dataclass
class SiteSyncStats:
last_sync_at: Optional[float] = None
objects_pulled: int = 0
objects_skipped: int = 0
conflicts_resolved: int = 0
deletions_applied: int = 0
errors: int = 0
def to_dict(self) -> Dict[str, Any]:
return {
"last_sync_at": self.last_sync_at,
"objects_pulled": self.objects_pulled,
"objects_skipped": self.objects_skipped,
"conflicts_resolved": self.conflicts_resolved,
"deletions_applied": self.deletions_applied,
"errors": self.errors,
}
@dataclass
class RemoteObjectMeta:
key: str
size: int
last_modified: datetime
etag: str
@classmethod
def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
return cls(
key=obj["Key"],
size=obj.get("Size", 0),
last_modified=obj["LastModified"],
etag=obj.get("ETag", "").strip('"'),
)
def _create_sync_client(
connection: "RemoteConnection",
*,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
) -> Any:
config = Config(
user_agent_extra=SITE_SYNC_USER_AGENT,
connect_timeout=connect_timeout,
read_timeout=read_timeout,
retries={"max_attempts": max_retries},
signature_version="s3v4",
s3={"addressing_style": "path"},
request_checksum_calculation="when_required",
response_checksum_validation="when_required",
)
return boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region or "us-east-1",
config=config,
)
class SiteSyncWorker:
def __init__(
self,
storage: "ObjectStorage",
connections: "ConnectionStore",
replication_manager: "ReplicationManager",
storage_root: Path,
interval_seconds: int = 60,
batch_size: int = 100,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
clock_skew_tolerance_seconds: float = 1.0,
):
self.storage = storage
self.connections = connections
self.replication_manager = replication_manager
self.storage_root = storage_root
self.interval_seconds = interval_seconds
self.batch_size = batch_size
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
self._lock = threading.Lock()
self._shutdown = threading.Event()
self._sync_thread: Optional[threading.Thread] = None
self._bucket_stats: Dict[str, SiteSyncStats] = {}
def _create_client(self, connection: "RemoteConnection") -> Any:
"""Create an S3 client with the worker's configured timeouts."""
return _create_sync_client(
connection,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def start(self) -> None:
if self._sync_thread is not None and self._sync_thread.is_alive():
return
self._shutdown.clear()
self._sync_thread = threading.Thread(
target=self._sync_loop, name="site-sync-worker", daemon=True
)
self._sync_thread.start()
logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
def shutdown(self) -> None:
self._shutdown.set()
if self._sync_thread is not None:
self._sync_thread.join(timeout=10.0)
logger.info("Site sync worker shut down")
def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
from .replication import REPLICATION_MODE_BIDIRECTIONAL
rule = self.replication_manager.get_rule(bucket_name)
if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
return None
return self._sync_bucket(rule)
def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
with self._lock:
return self._bucket_stats.get(bucket_name)
def _sync_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.interval_seconds)
if self._shutdown.is_set():
break
self._run_sync_cycle()
def _run_sync_cycle(self) -> None:
from .replication import REPLICATION_MODE_BIDIRECTIONAL
for bucket_name, rule in list(self.replication_manager._rules.items()):
if self._shutdown.is_set():
break
if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
continue
try:
stats = self._sync_bucket(rule)
with self._lock:
self._bucket_stats[bucket_name] = stats
except Exception as e:
logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
stats = SiteSyncStats()
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
stats.errors += 1
return stats
try:
local_objects = self._list_local_objects(rule.bucket_name)
except Exception as e:
logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
stats.errors += 1
return stats
try:
remote_objects = self._list_remote_objects(rule, connection)
except Exception as e:
logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
stats.errors += 1
return stats
sync_state = self._load_sync_state(rule.bucket_name)
local_keys = set(local_objects.keys())
remote_keys = set(remote_objects.keys())
to_pull = []
for key in remote_keys:
remote_meta = remote_objects[key]
local_meta = local_objects.get(key)
if local_meta is None:
to_pull.append(key)
else:
resolution = self._resolve_conflict(local_meta, remote_meta)
if resolution == "pull":
to_pull.append(key)
stats.conflicts_resolved += 1
else:
stats.objects_skipped += 1
pulled_count = 0
for key in to_pull:
if self._shutdown.is_set():
break
if pulled_count >= self.batch_size:
break
remote_meta = remote_objects[key]
success = self._pull_object(rule, key, connection, remote_meta)
if success:
stats.objects_pulled += 1
pulled_count += 1
sync_state.synced_objects[key] = SyncedObjectInfo(
last_synced_at=time.time(),
remote_etag=remote_meta.etag,
source="remote",
)
else:
stats.errors += 1
if rule.sync_deletions:
for key in list(sync_state.synced_objects.keys()):
if key not in remote_keys and key in local_keys:
tracked = sync_state.synced_objects[key]
if tracked.source == "remote":
local_meta = local_objects.get(key)
if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
success = self._apply_remote_deletion(rule.bucket_name, key)
if success:
stats.deletions_applied += 1
del sync_state.synced_objects[key]
sync_state.last_full_sync = time.time()
self._save_sync_state(rule.bucket_name, sync_state)
with self.replication_manager._stats_lock:
rule.last_pull_at = time.time()
self.replication_manager.save_rules()
stats.last_sync_at = time.time()
logger.info(
"Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
rule.bucket_name,
stats.objects_pulled,
stats.objects_skipped,
stats.conflicts_resolved,
stats.deletions_applied,
stats.errors,
)
return stats
def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
from .storage import ObjectMeta
objects = self.storage.list_objects_all(bucket_name)
return {obj.key: obj for obj in objects}
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
s3 = self._create_client(connection)
result: Dict[str, RemoteObjectMeta] = {}
paginator = s3.get_paginator("list_objects_v2")
try:
for page in paginator.paginate(Bucket=rule.target_bucket):
for obj in page.get("Contents", []):
meta = RemoteObjectMeta.from_s3_object(obj)
result[meta.key] = meta
except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchBucket":
return {}
raise
return result
def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
local_ts = local_meta.last_modified.timestamp()
remote_ts = remote_meta.last_modified.timestamp()
if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
local_etag = local_meta.etag or ""
if remote_meta.etag == local_etag:
return "skip"
return "pull" if remote_meta.etag > local_etag else "keep"
return "pull" if remote_ts > local_ts else "keep"
def _pull_object(
self,
rule: "ReplicationRule",
object_key: str,
connection: "RemoteConnection",
remote_meta: RemoteObjectMeta,
) -> bool:
s3 = self._create_client(connection)
tmp_path = None
try:
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
tmp_dir.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
tmp_path = Path(tmp_file.name)
s3.download_file(rule.target_bucket, object_key, str(tmp_path))
head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
user_metadata = head_response.get("Metadata", {})
with open(tmp_path, "rb") as f:
self.storage.put_object(
rule.bucket_name,
object_key,
f,
metadata=user_metadata if user_metadata else None,
)
logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
return True
except ClientError as e:
logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
return False
except Exception as e:
logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
return False
finally:
if tmp_path and tmp_path.exists():
try:
tmp_path.unlink()
except OSError:
pass
def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
try:
self.storage.delete_object(bucket_name, object_key)
logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
return True
except Exception as e:
logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
return False
def _sync_state_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
def _load_sync_state(self, bucket_name: str) -> SyncState:
path = self._sync_state_path(bucket_name)
if not path.exists():
return SyncState()
try:
data = json.loads(path.read_text(encoding="utf-8"))
return SyncState.from_dict(data)
except (json.JSONDecodeError, OSError, KeyError) as e:
logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
return SyncState()
def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
path = self._sync_state_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
try:
path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
except OSError as e:
logger.warning("Failed to save sync state for %s: %s", bucket_name, e)

View File

@@ -46,6 +46,34 @@ else:
fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)
@contextmanager
def _atomic_lock_file(lock_path: Path, max_retries: int = 10, base_delay: float = 0.1) -> Generator[None, None, None]:
"""Atomically acquire a lock file with exponential backoff.
Uses O_EXCL to ensure atomic creation of the lock file.
"""
lock_path.parent.mkdir(parents=True, exist_ok=True)
fd = None
for attempt in range(max_retries):
try:
fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
break
except FileExistsError:
if attempt == max_retries - 1:
raise BlockingIOError("Another upload to this key is in progress")
delay = base_delay * (2 ** attempt)
time.sleep(min(delay, 2.0))
try:
yield
finally:
if fd is not None:
os.close(fd)
try:
lock_path.unlink(missing_ok=True)
except OSError:
pass
WINDOWS_RESERVED_NAMES = {
"CON",
"PRN",
@@ -137,10 +165,15 @@ class ObjectStorage:
BUCKET_VERSIONS_DIR = "versions"
MULTIPART_MANIFEST = "manifest.json"
BUCKET_CONFIG_FILE = ".bucket.json"
DEFAULT_CACHE_TTL = 5
OBJECT_CACHE_MAX_SIZE = 100
def __init__(self, root: Path, cache_ttl: int = DEFAULT_CACHE_TTL) -> None:
def __init__(
self,
root: Path,
cache_ttl: int = 5,
object_cache_max_size: int = 100,
bucket_config_cache_ttl: float = 30.0,
object_key_max_length_bytes: int = 1024,
) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots()
@@ -149,8 +182,10 @@ class ObjectStorage:
self._bucket_locks: Dict[str, threading.Lock] = {}
self._cache_version: Dict[str, int] = {}
self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
self._bucket_config_cache_ttl = 30.0
self._bucket_config_cache_ttl = bucket_config_cache_ttl
self._cache_ttl = cache_ttl
self._object_cache_max_size = object_cache_max_size
self._object_key_max_length_bytes = object_key_max_length_bytes
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
@@ -313,18 +348,15 @@ class ObjectStorage:
total_count = len(all_keys)
start_index = 0
if continuation_token:
try:
import bisect
start_index = bisect.bisect_right(all_keys, continuation_token)
if start_index >= total_count:
return ListObjectsResult(
objects=[],
is_truncated=False,
next_continuation_token=None,
total_count=total_count,
)
except Exception:
pass
import bisect
start_index = bisect.bisect_right(all_keys, continuation_token)
if start_index >= total_count:
return ListObjectsResult(
objects=[],
is_truncated=False,
next_continuation_token=None,
total_count=total_count,
)
end_index = start_index + max_keys
keys_slice = all_keys[start_index:end_index]
@@ -364,7 +396,7 @@ class ObjectStorage:
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True)
@@ -439,7 +471,7 @@ class ObjectStorage:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
return {}
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
return self._read_metadata(bucket_path.name, safe_key) or {}
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
@@ -487,7 +519,7 @@ class ObjectStorage:
self._safe_unlink(target)
self._delete_metadata(bucket_id, rel)
else:
rel = self._sanitize_object_key(object_key)
rel = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
self._delete_metadata(bucket_id, rel)
version_dir = self._version_dir(bucket_id, rel)
if version_dir.exists():
@@ -696,7 +728,7 @@ class ObjectStorage:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
object_path = bucket_path / safe_key
if not object_path.exists():
raise ObjectNotFoundError("Object does not exist")
@@ -719,7 +751,7 @@ class ObjectStorage:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
object_path = bucket_path / safe_key
if not object_path.exists():
raise ObjectNotFoundError("Object does not exist")
@@ -758,7 +790,7 @@ class ObjectStorage:
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
version_dir = self._version_dir(bucket_id, safe_key)
if not version_dir.exists():
version_dir = self._legacy_version_dir(bucket_id, safe_key)
@@ -782,7 +814,7 @@ class ObjectStorage:
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
version_dir = self._version_dir(bucket_id, safe_key)
data_path = version_dir / f"{version_id}.bin"
meta_path = version_dir / f"{version_id}.json"
@@ -819,7 +851,7 @@ class ObjectStorage:
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
version_dir = self._version_dir(bucket_id, safe_key)
data_path = version_dir / f"{version_id}.bin"
meta_path = version_dir / f"{version_id}.json"
@@ -910,7 +942,7 @@ class ObjectStorage:
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
upload_id = uuid.uuid4().hex
upload_root = self._multipart_dir(bucket_id, upload_id)
upload_root.mkdir(parents=True, exist_ok=False)
@@ -995,6 +1027,102 @@ class ObjectStorage:
return record["etag"]
def upload_part_copy(
self,
bucket_name: str,
upload_id: str,
part_number: int,
source_bucket: str,
source_key: str,
start_byte: Optional[int] = None,
end_byte: Optional[int] = None,
) -> Dict[str, Any]:
"""Copy a range from an existing object as a multipart part."""
if part_number < 1 or part_number > 10000:
raise StorageError("part_number must be between 1 and 10000")
source_path = self.get_object_path(source_bucket, source_key)
source_size = source_path.stat().st_size
if start_byte is None:
start_byte = 0
if end_byte is None:
end_byte = source_size - 1
if start_byte < 0 or end_byte >= source_size or start_byte > end_byte:
raise StorageError("Invalid byte range")
bucket_path = self._bucket_path(bucket_name)
upload_root = self._multipart_dir(bucket_path.name, upload_id)
if not upload_root.exists():
upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
if not upload_root.exists():
raise StorageError("Multipart upload not found")
checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename
temp_path = upload_root / f".{part_filename}.tmp"
try:
with source_path.open("rb") as src:
src.seek(start_byte)
bytes_to_copy = end_byte - start_byte + 1
with temp_path.open("wb") as target:
remaining = bytes_to_copy
while remaining > 0:
chunk_size = min(65536, remaining)
chunk = src.read(chunk_size)
if not chunk:
break
checksum.update(chunk)
target.write(chunk)
remaining -= len(chunk)
temp_path.replace(part_path)
except OSError:
try:
temp_path.unlink(missing_ok=True)
except OSError:
pass
raise
record = {
"etag": checksum.hexdigest(),
"size": part_path.stat().st_size,
"filename": part_filename,
}
manifest_path = upload_root / self.MULTIPART_MANIFEST
lock_path = upload_root / ".manifest.lock"
max_retries = 3
for attempt in range(max_retries):
try:
with lock_path.open("w") as lock_file:
with _file_lock(lock_file):
try:
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
if attempt < max_retries - 1:
time.sleep(0.1 * (attempt + 1))
continue
raise StorageError("Multipart manifest unreadable") from exc
parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
break
except OSError as exc:
if attempt < max_retries - 1:
time.sleep(0.1 * (attempt + 1))
continue
raise StorageError(f"Failed to update multipart manifest: {exc}") from exc
return {
"etag": record["etag"],
"last_modified": datetime.fromtimestamp(part_path.stat().st_mtime, timezone.utc),
}
def complete_multipart_upload(
self,
bucket_name: str,
@@ -1034,7 +1162,7 @@ class ObjectStorage:
total_size += record.get("size", 0)
validated.sort(key=lambda entry: entry[0])
safe_key = self._sanitize_object_key(manifest["object_key"])
safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes)
destination = bucket_path / safe_key
is_overwrite = destination.exists()
@@ -1057,36 +1185,28 @@ class ObjectStorage:
)
destination.parent.mkdir(parents=True, exist_ok=True)
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
lock_file_path.parent.mkdir(parents=True, exist_ok=True)
try:
with lock_file_path.open("w") as lock_file:
with _file_lock(lock_file):
if self._is_versioning_enabled(bucket_path) and destination.exists():
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
for _, record in validated:
part_path = upload_root / record["filename"]
if not part_path.exists():
raise StorageError(f"Missing part file {record['filename']}")
with part_path.open("rb") as chunk:
while True:
data = chunk.read(1024 * 1024)
if not data:
break
checksum.update(data)
target.write(data)
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
try:
with _atomic_lock_file(lock_file_path):
if self._is_versioning_enabled(bucket_path) and destination.exists():
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
for _, record in validated:
part_path = upload_root / record["filename"]
if not part_path.exists():
raise StorageError(f"Missing part file {record['filename']}")
with part_path.open("rb") as chunk:
while True:
data = chunk.read(1024 * 1024)
if not data:
break
checksum.update(data)
target.write(data)
except BlockingIOError:
raise StorageError("Another upload to this key is in progress")
finally:
try:
lock_file_path.unlink(missing_ok=True)
except OSError:
pass
shutil.rmtree(upload_root, ignore_errors=True)
@@ -1213,7 +1333,7 @@ class ObjectStorage:
def _object_path(self, bucket_name: str, object_key: str) -> Path:
bucket_path = self._bucket_path(bucket_name)
safe_key = self._sanitize_object_key(object_key)
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
return bucket_path / safe_key
def _system_root_path(self) -> Path:
@@ -1349,7 +1469,8 @@ class ObjectStorage:
if meta_files:
meta_cache = {}
with ThreadPoolExecutor(max_workers=min(64, len(meta_files))) as executor:
max_workers = min((os.cpu_count() or 4) * 2, len(meta_files), 16)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for key, etag in executor.map(read_meta_file, meta_files):
if etag:
meta_cache[key] = etag
@@ -1429,7 +1550,7 @@ class ObjectStorage:
current_version = self._cache_version.get(bucket_id, 0)
if current_version != cache_version:
objects = self._build_object_cache(bucket_path)
while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
while len(self._object_cache) >= self._object_cache_max_size:
self._object_cache.popitem(last=False)
self._object_cache[bucket_id] = (objects, time.time())
@@ -1764,16 +1885,16 @@ class ObjectStorage:
return name
@staticmethod
def _sanitize_object_key(object_key: str) -> Path:
def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path:
if not object_key:
raise StorageError("Object key required")
if len(object_key.encode("utf-8")) > 1024:
raise StorageError("Object key exceeds maximum length of 1024 bytes")
if "\x00" in object_key:
raise StorageError("Object key contains null bytes")
object_key = unicodedata.normalize("NFC", object_key)
if len(object_key.encode("utf-8")) > max_length_bytes:
raise StorageError(f"Object key exceeds maximum length of {max_length_bytes} bytes")
if object_key.startswith(("/", "\\")):
raise StorageError("Object key cannot start with a slash")
object_key = unicodedata.normalize("NFC", object_key)
candidate = Path(object_key)
if ".." in candidate.parts:

215
app/system_metrics.py Normal file
View File

@@ -0,0 +1,215 @@
from __future__ import annotations
import json
import logging
import threading
import time
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING
import psutil
if TYPE_CHECKING:
from .storage import ObjectStorage
logger = logging.getLogger(__name__)
@dataclass
class SystemMetricsSnapshot:
timestamp: datetime
cpu_percent: float
memory_percent: float
disk_percent: float
storage_bytes: int
def to_dict(self) -> Dict[str, Any]:
return {
"timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
"cpu_percent": round(self.cpu_percent, 2),
"memory_percent": round(self.memory_percent, 2),
"disk_percent": round(self.disk_percent, 2),
"storage_bytes": self.storage_bytes,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
timestamp_str = data["timestamp"]
if timestamp_str.endswith("Z"):
timestamp_str = timestamp_str[:-1] + "+00:00"
return cls(
timestamp=datetime.fromisoformat(timestamp_str),
cpu_percent=data.get("cpu_percent", 0.0),
memory_percent=data.get("memory_percent", 0.0),
disk_percent=data.get("disk_percent", 0.0),
storage_bytes=data.get("storage_bytes", 0),
)
class SystemMetricsCollector:
def __init__(
self,
storage_root: Path,
interval_minutes: int = 5,
retention_hours: int = 24,
):
self.storage_root = storage_root
self.interval_seconds = interval_minutes * 60
self.retention_hours = retention_hours
self._lock = threading.Lock()
self._shutdown = threading.Event()
self._snapshots: List[SystemMetricsSnapshot] = []
self._storage_ref: Optional["ObjectStorage"] = None
self._load_history()
self._snapshot_thread = threading.Thread(
target=self._snapshot_loop,
name="system-metrics-snapshot",
daemon=True,
)
self._snapshot_thread.start()
def set_storage(self, storage: "ObjectStorage") -> None:
with self._lock:
self._storage_ref = storage
def _config_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
def _load_history(self) -> None:
config_path = self._config_path()
if not config_path.exists():
return
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
history_data = data.get("history", [])
self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
self._prune_old_snapshots()
except (json.JSONDecodeError, OSError, KeyError) as e:
logger.warning(f"Failed to load system metrics history: {e}")
def _save_history(self) -> None:
config_path = self._config_path()
config_path.parent.mkdir(parents=True, exist_ok=True)
try:
data = {"history": [s.to_dict() for s in self._snapshots]}
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
except OSError as e:
logger.warning(f"Failed to save system metrics history: {e}")
def _prune_old_snapshots(self) -> None:
if not self._snapshots:
return
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
self._snapshots = [
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
]
def _snapshot_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.interval_seconds)
if not self._shutdown.is_set():
self._take_snapshot()
def _take_snapshot(self) -> None:
try:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
disk = psutil.disk_usage(str(self.storage_root))
storage_bytes = 0
with self._lock:
storage = self._storage_ref
if storage:
try:
buckets = storage.list_buckets()
for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
except Exception as e:
logger.warning(f"Failed to collect bucket stats: {e}")
snapshot = SystemMetricsSnapshot(
timestamp=datetime.now(timezone.utc),
cpu_percent=cpu_percent,
memory_percent=memory.percent,
disk_percent=disk.percent,
storage_bytes=storage_bytes,
)
with self._lock:
self._snapshots.append(snapshot)
self._prune_old_snapshots()
self._save_history()
logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
except Exception as e:
logger.warning(f"Failed to take system metrics snapshot: {e}")
def get_current(self) -> Dict[str, Any]:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
disk = psutil.disk_usage(str(self.storage_root))
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
uptime_days = int(uptime_seconds / 86400)
total_buckets = 0
total_objects = 0
total_bytes_used = 0
total_versions = 0
with self._lock:
storage = self._storage_ref
if storage:
try:
buckets = storage.list_buckets()
total_buckets = len(buckets)
for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
total_objects += stats.get("total_objects", stats.get("objects", 0))
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
total_versions += stats.get("version_count", 0)
except Exception as e:
logger.warning(f"Failed to collect current bucket stats: {e}")
return {
"cpu_percent": round(cpu_percent, 2),
"memory": {
"total": memory.total,
"available": memory.available,
"used": memory.used,
"percent": round(memory.percent, 2),
},
"disk": {
"total": disk.total,
"free": disk.free,
"used": disk.used,
"percent": round(disk.percent, 2),
},
"app": {
"buckets": total_buckets,
"objects": total_objects,
"versions": total_versions,
"storage_bytes": total_bytes_used,
"uptime_days": uptime_days,
},
}
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
with self._lock:
snapshots = list(self._snapshots)
if hours:
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
return [s.to_dict() for s in snapshots]
def shutdown(self) -> None:
self._shutdown.set()
self._take_snapshot()
self._snapshot_thread.join(timeout=5.0)

748
app/ui.py
View File

@@ -38,6 +38,7 @@ from .kms import KMSManager
from .replication import ReplicationManager, ReplicationRule
from .s3_api import _generate_presigned_url
from .secret_store import EphemeralSecretStore
from .site_registry import SiteRegistry, SiteInfo, PeerSite
from .storage import ObjectStorage, StorageError
ui_bp = Blueprint("ui", __name__, template_folder="../templates", url_prefix="/ui")
@@ -145,6 +146,10 @@ def _operation_metrics():
return current_app.extensions.get("operation_metrics")
def _site_registry() -> SiteRegistry:
return current_app.extensions["site_registry"]
def _format_bytes(num: int) -> str:
step = 1024
units = ["B", "KB", "MB", "GB", "TB", "PB"]
@@ -158,69 +163,6 @@ def _format_bytes(num: int) -> str:
return f"{value:.1f} PB"
_metrics_last_save_time: float = 0.0
def _get_metrics_history_path() -> Path:
storage_root = Path(current_app.config["STORAGE_ROOT"])
return storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
def _load_metrics_history() -> dict:
path = _get_metrics_history_path()
if not path.exists():
return {"history": []}
try:
return json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return {"history": []}
def _save_metrics_snapshot(cpu_percent: float, memory_percent: float, disk_percent: float, storage_bytes: int) -> None:
global _metrics_last_save_time
if not current_app.config.get("METRICS_HISTORY_ENABLED", False):
return
import time
from datetime import datetime, timezone
interval_minutes = current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5)
now_ts = time.time()
if now_ts - _metrics_last_save_time < interval_minutes * 60:
return
path = _get_metrics_history_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = _load_metrics_history()
history = data.get("history", [])
retention_hours = current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24)
now = datetime.now(timezone.utc)
snapshot = {
"timestamp": now.strftime("%Y-%m-%dT%H:%M:%SZ"),
"cpu_percent": round(cpu_percent, 2),
"memory_percent": round(memory_percent, 2),
"disk_percent": round(disk_percent, 2),
"storage_bytes": storage_bytes,
}
history.append(snapshot)
cutoff = now.timestamp() - (retention_hours * 3600)
history = [
h for h in history
if datetime.fromisoformat(h["timestamp"].replace("Z", "+00:00")).timestamp() > cutoff
]
data["history"] = history
try:
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
_metrics_last_save_time = now_ts
except OSError:
pass
def _friendly_error_message(exc: Exception) -> str:
message = str(exc) or "An unexpected error occurred"
if isinstance(exc, IamError):
@@ -533,6 +475,7 @@ def bucket_detail(bucket_name: str):
kms_enabled = current_app.config.get("KMS_ENABLED", False)
encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
lifecycle_enabled = current_app.config.get("LIFECYCLE_ENABLED", False)
site_sync_enabled = current_app.config.get("SITE_SYNC_ENABLED", False)
can_manage_encryption = can_manage_versioning
bucket_quota = storage.get_bucket_quota(bucket_name)
@@ -585,6 +528,7 @@ def bucket_detail(bucket_name: str):
bucket_quota=bucket_quota,
bucket_stats=bucket_stats,
can_manage_quota=can_manage_quota,
site_sync_enabled=site_sync_enabled,
)
@@ -1152,7 +1096,9 @@ def object_presign(bucket_name: str, object_key: str):
expires = int(payload.get("expires_in", 900))
except (TypeError, ValueError):
return jsonify({"error": "expires_in must be an integer"}), 400
expires = max(1, min(expires, 7 * 24 * 3600))
min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)
max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)
expires = max(min_expiry, min(expires, max_expiry))
storage = _storage()
if not storage.bucket_exists(bucket_name):
return jsonify({"error": "Bucket does not exist"}), 404
@@ -2240,8 +2186,6 @@ def metrics_api():
uptime_seconds = time.time() - boot_time
uptime_days = int(uptime_seconds / 86400)
_save_metrics_snapshot(cpu_percent, memory.percent, disk.percent, total_bytes_used)
return jsonify({
"cpu_percent": round(cpu_percent, 2),
"memory": {
@@ -2276,23 +2220,15 @@ def metrics_history():
except IamError:
return jsonify({"error": "Access denied"}), 403
if not current_app.config.get("METRICS_HISTORY_ENABLED", False):
system_metrics = current_app.extensions.get("system_metrics")
if not system_metrics:
return jsonify({"enabled": False, "history": []})
hours = request.args.get("hours", type=int)
if hours is None:
hours = current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24)
data = _load_metrics_history()
history = data.get("history", [])
if hours:
from datetime import datetime, timezone
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
history = [
h for h in history
if datetime.fromisoformat(h["timestamp"].replace("Z", "+00:00")).timestamp() > cutoff
]
history = system_metrics.get_history(hours=hours)
return jsonify({
"enabled": True,
@@ -2732,6 +2668,664 @@ def list_buckets_for_copy(bucket_name: str):
return jsonify({"buckets": allowed})
@ui_bp.get("/sites")
def sites_dashboard():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied: Site management requires admin permissions", "danger")
return redirect(url_for("ui.buckets_overview"))
registry = _site_registry()
local_site = registry.get_local_site()
peers = registry.list_peers()
connections = _connections().list()
replication = _replication()
all_rules = replication.list_rules()
peers_with_stats = []
for peer in peers:
buckets_syncing = 0
has_bidirectional = False
if peer.connection_id:
for rule in all_rules:
if rule.target_connection_id == peer.connection_id:
buckets_syncing += 1
if rule.mode == "bidirectional":
has_bidirectional = True
peers_with_stats.append({
"peer": peer,
"buckets_syncing": buckets_syncing,
"has_connection": bool(peer.connection_id),
"has_bidirectional": has_bidirectional,
})
return render_template(
"sites.html",
principal=principal,
local_site=local_site,
peers=peers,
peers_with_stats=peers_with_stats,
connections=connections,
config_site_id=current_app.config.get("SITE_ID"),
config_site_endpoint=current_app.config.get("SITE_ENDPOINT"),
config_site_region=current_app.config.get("SITE_REGION", "us-east-1"),
)
@ui_bp.post("/sites/local")
def update_local_site():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
site_id = request.form.get("site_id", "").strip()
endpoint = request.form.get("endpoint", "").strip()
region = request.form.get("region", "us-east-1").strip()
priority = request.form.get("priority", "100")
display_name = request.form.get("display_name", "").strip()
if not site_id:
flash("Site ID is required", "danger")
return redirect(url_for("ui.sites_dashboard"))
try:
priority_int = int(priority)
except ValueError:
priority_int = 100
registry = _site_registry()
existing = registry.get_local_site()
site = SiteInfo(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=priority_int,
display_name=display_name or site_id,
created_at=existing.created_at if existing else None,
)
registry.set_local_site(site)
flash("Local site configuration updated", "success")
return redirect(url_for("ui.sites_dashboard"))
@ui_bp.post("/sites/peers")
def add_peer_site():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
site_id = request.form.get("site_id", "").strip()
endpoint = request.form.get("endpoint", "").strip()
region = request.form.get("region", "us-east-1").strip()
priority = request.form.get("priority", "100")
display_name = request.form.get("display_name", "").strip()
connection_id = request.form.get("connection_id", "").strip() or None
if not site_id:
flash("Site ID is required", "danger")
return redirect(url_for("ui.sites_dashboard"))
if not endpoint:
flash("Endpoint is required", "danger")
return redirect(url_for("ui.sites_dashboard"))
try:
priority_int = int(priority)
except ValueError:
priority_int = 100
registry = _site_registry()
if registry.get_peer(site_id):
flash(f"Peer site '{site_id}' already exists", "danger")
return redirect(url_for("ui.sites_dashboard"))
if connection_id and not _connections().get(connection_id):
flash(f"Connection '{connection_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
peer = PeerSite(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=priority_int,
display_name=display_name or site_id,
connection_id=connection_id,
)
registry.add_peer(peer)
flash(f"Peer site '{site_id}' added", "success")
if connection_id:
return redirect(url_for("ui.replication_wizard", site_id=site_id))
return redirect(url_for("ui.sites_dashboard"))
@ui_bp.post("/sites/peers/<site_id>/update")
def update_peer_site(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
registry = _site_registry()
existing = registry.get_peer(site_id)
if not existing:
flash(f"Peer site '{site_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
endpoint = request.form.get("endpoint", existing.endpoint).strip()
region = request.form.get("region", existing.region).strip()
priority = request.form.get("priority", str(existing.priority))
display_name = request.form.get("display_name", existing.display_name).strip()
connection_id = request.form.get("connection_id", "").strip() or existing.connection_id
try:
priority_int = int(priority)
except ValueError:
priority_int = existing.priority
if connection_id and not _connections().get(connection_id):
flash(f"Connection '{connection_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
peer = PeerSite(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=priority_int,
display_name=display_name or site_id,
connection_id=connection_id,
created_at=existing.created_at,
is_healthy=existing.is_healthy,
last_health_check=existing.last_health_check,
)
registry.update_peer(peer)
flash(f"Peer site '{site_id}' updated", "success")
return redirect(url_for("ui.sites_dashboard"))
@ui_bp.post("/sites/peers/<site_id>/delete")
def delete_peer_site(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
registry = _site_registry()
if registry.delete_peer(site_id):
flash(f"Peer site '{site_id}' deleted", "success")
else:
flash(f"Peer site '{site_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
@ui_bp.get("/sites/peers/<site_id>/health")
def check_peer_site_health(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
is_healthy = False
error_message = None
if peer.connection_id:
connection = _connections().get(peer.connection_id)
if connection:
is_healthy = _replication().check_endpoint_health(connection)
else:
error_message = f"Connection '{peer.connection_id}' not found"
else:
error_message = "No connection configured for this peer"
registry.update_health(site_id, is_healthy)
result = {
"site_id": site_id,
"is_healthy": is_healthy,
}
if error_message:
result["error"] = error_message
return jsonify(result)
@ui_bp.get("/sites/peers/<site_id>/bidirectional-status")
def check_peer_bidirectional_status(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
local_site = registry.get_local_site()
replication = _replication()
local_rules = replication.list_rules()
local_bidir_rules = []
for rule in local_rules:
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
local_bidir_rules.append({
"bucket_name": rule.bucket_name,
"target_bucket": rule.target_bucket,
"enabled": rule.enabled,
})
result = {
"site_id": site_id,
"local_site_id": local_site.site_id if local_site else None,
"local_endpoint": local_site.endpoint if local_site else None,
"local_bidirectional_rules": local_bidir_rules,
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
"remote_status": None,
"issues": [],
"is_fully_configured": False,
}
if not local_site or not local_site.site_id:
result["issues"].append({
"code": "NO_LOCAL_SITE_ID",
"message": "Local site identity not configured",
"severity": "error",
})
if not local_site or not local_site.endpoint:
result["issues"].append({
"code": "NO_LOCAL_ENDPOINT",
"message": "Local site endpoint not configured (remote site cannot reach back)",
"severity": "error",
})
if not peer.connection_id:
result["issues"].append({
"code": "NO_CONNECTION",
"message": "No connection configured for this peer",
"severity": "error",
})
return jsonify(result)
connection = _connections().get(peer.connection_id)
if not connection:
result["issues"].append({
"code": "CONNECTION_NOT_FOUND",
"message": f"Connection '{peer.connection_id}' not found",
"severity": "error",
})
return jsonify(result)
if not local_bidir_rules:
result["issues"].append({
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
"message": "No bidirectional replication rules configured on this site",
"severity": "warning",
})
if not result["local_site_sync_enabled"]:
result["issues"].append({
"code": "SITE_SYNC_DISABLED",
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
"severity": "warning",
})
if not replication.check_endpoint_health(connection):
result["issues"].append({
"code": "REMOTE_UNREACHABLE",
"message": "Remote endpoint is not reachable",
"severity": "error",
})
return jsonify(result)
try:
parsed = urlparse(peer.endpoint)
hostname = parsed.hostname or ""
import ipaddress
cloud_metadata_hosts = {"metadata.google.internal", "169.254.169.254"}
if hostname.lower() in cloud_metadata_hosts:
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
"severity": "error",
})
return jsonify(result)
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
if not allow_internal:
try:
ip = ipaddress.ip_address(hostname)
if ip.is_private or ip.is_loopback or ip.is_reserved or ip.is_link_local:
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)",
"severity": "error",
})
return jsonify(result)
except ValueError:
blocked_patterns = ["localhost", "127.", "10.", "192.168.", "172.16."]
if any(hostname.startswith(p) or hostname == p.rstrip(".") for p in blocked_patterns):
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)",
"severity": "error",
})
return jsonify(result)
except Exception:
pass
try:
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
resp = requests.get(
admin_url,
timeout=10,
headers={
"Accept": "application/json",
"X-Access-Key": connection.access_key,
"X-Secret-Key": connection.secret_key,
},
)
if resp.status_code == 200:
try:
remote_data = resp.json()
if not isinstance(remote_data, dict):
raise ValueError("Expected JSON object")
remote_local = remote_data.get("local")
if remote_local is not None and not isinstance(remote_local, dict):
raise ValueError("Expected 'local' to be an object")
remote_peers = remote_data.get("peers", [])
if not isinstance(remote_peers, list):
raise ValueError("Expected 'peers' to be a list")
except (ValueError, json.JSONDecodeError) as e:
result["remote_status"] = {"reachable": True, "invalid_response": True}
result["issues"].append({
"code": "REMOTE_INVALID_RESPONSE",
"message": "Remote admin API returned invalid JSON",
"severity": "warning",
})
return jsonify(result)
result["remote_status"] = {
"reachable": True,
"local_site": remote_local,
"site_sync_enabled": None,
"has_peer_for_us": False,
"peer_connection_configured": False,
"has_bidirectional_rules_for_us": False,
}
for rp in remote_peers:
if not isinstance(rp, dict):
continue
if local_site and (
rp.get("site_id") == local_site.site_id or
rp.get("endpoint") == local_site.endpoint
):
result["remote_status"]["has_peer_for_us"] = True
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
break
if not result["remote_status"]["has_peer_for_us"]:
result["issues"].append({
"code": "REMOTE_NO_PEER_FOR_US",
"message": "Remote site does not have this site registered as a peer",
"severity": "error",
})
elif not result["remote_status"]["peer_connection_configured"]:
result["issues"].append({
"code": "REMOTE_NO_CONNECTION_FOR_US",
"message": "Remote site has us as peer but no connection configured (cannot push back)",
"severity": "error",
})
elif resp.status_code == 401 or resp.status_code == 403:
result["remote_status"] = {
"reachable": True,
"admin_access_denied": True,
}
result["issues"].append({
"code": "REMOTE_ADMIN_ACCESS_DENIED",
"message": "Cannot verify remote configuration (admin access denied)",
"severity": "warning",
})
else:
result["remote_status"] = {
"reachable": True,
"admin_api_error": resp.status_code,
}
result["issues"].append({
"code": "REMOTE_ADMIN_API_ERROR",
"message": f"Remote admin API returned status {resp.status_code}",
"severity": "warning",
})
except requests.RequestException:
result["remote_status"] = {
"reachable": False,
"error": "Connection failed",
}
result["issues"].append({
"code": "REMOTE_ADMIN_UNREACHABLE",
"message": "Could not reach remote admin API",
"severity": "warning",
})
except Exception:
result["issues"].append({
"code": "VERIFICATION_ERROR",
"message": "Internal error during verification",
"severity": "warning",
})
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
return jsonify(result)
@ui_bp.get("/sites/peers/<site_id>/replication-wizard")
def replication_wizard(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
flash(f"Peer site '{site_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
if not peer.connection_id:
flash("This peer has no connection configured. Add a connection first to set up replication.", "warning")
return redirect(url_for("ui.sites_dashboard"))
connection = _connections().get(peer.connection_id)
if not connection:
flash(f"Connection '{peer.connection_id}' not found", "danger")
return redirect(url_for("ui.sites_dashboard"))
buckets = _storage().list_buckets()
replication = _replication()
bucket_info = []
for bucket in buckets:
existing_rule = replication.get_rule(bucket.name)
has_rule_for_peer = (
existing_rule and
existing_rule.target_connection_id == peer.connection_id
)
bucket_info.append({
"name": bucket.name,
"has_rule": has_rule_for_peer,
"existing_mode": existing_rule.mode if has_rule_for_peer else None,
"existing_target": existing_rule.target_bucket if has_rule_for_peer else None,
})
local_site = registry.get_local_site()
return render_template(
"replication_wizard.html",
principal=principal,
peer=peer,
connection=connection,
buckets=bucket_info,
local_site=local_site,
csrf_token=generate_csrf,
)
@ui_bp.post("/sites/peers/<site_id>/replication-rules")
def create_peer_replication_rules(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied", "danger")
return redirect(url_for("ui.sites_dashboard"))
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer or not peer.connection_id:
flash("Invalid peer site or no connection configured", "danger")
return redirect(url_for("ui.sites_dashboard"))
from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL
import time as time_module
selected_buckets = request.form.getlist("buckets")
mode = request.form.get("mode", REPLICATION_MODE_NEW_ONLY)
if not selected_buckets:
flash("No buckets selected", "warning")
return redirect(url_for("ui.sites_dashboard"))
created = 0
failed = 0
replication = _replication()
for bucket_name in selected_buckets:
target_bucket = request.form.get(f"target_{bucket_name}", bucket_name).strip()
if not target_bucket:
target_bucket = bucket_name
try:
rule = ReplicationRule(
bucket_name=bucket_name,
target_connection_id=peer.connection_id,
target_bucket=target_bucket,
enabled=True,
mode=mode,
created_at=time_module.time(),
)
replication.set_rule(rule)
if mode == REPLICATION_MODE_ALL:
replication.replicate_existing_objects(bucket_name)
created += 1
except Exception:
failed += 1
if created > 0:
flash(f"Created {created} replication rule(s) for {peer.display_name or peer.site_id}", "success")
if failed > 0:
flash(f"Failed to create {failed} rule(s)", "danger")
return redirect(url_for("ui.sites_dashboard"))
@ui_bp.get("/sites/peers/<site_id>/sync-stats")
def get_peer_sync_stats(site_id: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return jsonify({"error": "Peer not found"}), 404
if not peer.connection_id:
return jsonify({"error": "No connection configured"}), 400
replication = _replication()
all_rules = replication.list_rules()
stats = {
"buckets_syncing": 0,
"objects_synced": 0,
"objects_pending": 0,
"objects_failed": 0,
"bytes_synced": 0,
"last_sync_at": None,
"buckets": [],
}
for rule in all_rules:
if rule.target_connection_id != peer.connection_id:
continue
stats["buckets_syncing"] += 1
bucket_stats = {
"bucket_name": rule.bucket_name,
"target_bucket": rule.target_bucket,
"mode": rule.mode,
"enabled": rule.enabled,
}
if rule.stats:
stats["objects_synced"] += rule.stats.objects_synced
stats["objects_pending"] += rule.stats.objects_pending
stats["bytes_synced"] += rule.stats.bytes_synced
if rule.stats.last_sync_at:
if not stats["last_sync_at"] or rule.stats.last_sync_at > stats["last_sync_at"]:
stats["last_sync_at"] = rule.stats.last_sync_at
bucket_stats["last_sync_at"] = rule.stats.last_sync_at
bucket_stats["objects_synced"] = rule.stats.objects_synced
bucket_stats["objects_pending"] = rule.stats.objects_pending
failure_count = replication.get_failure_count(rule.bucket_name)
stats["objects_failed"] += failure_count
bucket_stats["failures"] = failure_count
stats["buckets"].append(bucket_stats)
return jsonify(stats)
@ui_bp.app_errorhandler(404)
def ui_not_found(error): # type: ignore[override]
prefix = ui_bp.url_prefix or ""

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
APP_VERSION = "0.2.2"
APP_VERSION = "0.2.5"
def get_version() -> str:

853
docs.md
View File

@@ -166,8 +166,21 @@ All configuration is done via environment variables. The table below lists every
| Variable | Default | Notes |
| --- | --- | --- |
| `RATE_LIMIT_DEFAULT` | `200 per minute` | Default rate limit for API endpoints. |
| `RATE_LIMIT_LIST_BUCKETS` | `60 per minute` | Rate limit for listing buckets (`GET /`). |
| `RATE_LIMIT_BUCKET_OPS` | `120 per minute` | Rate limit for bucket operations (PUT/DELETE/GET/POST on `/<bucket>`). |
| `RATE_LIMIT_OBJECT_OPS` | `240 per minute` | Rate limit for object operations (PUT/GET/DELETE/POST on `/<bucket>/<key>`). |
| `RATE_LIMIT_HEAD_OPS` | `100 per minute` | Rate limit for HEAD requests (bucket and object). |
| `RATE_LIMIT_STORAGE_URI` | `memory://` | Storage backend for rate limits. Use `redis://host:port` for distributed setups. |
### Server Configuration
| Variable | Default | Notes |
| --- | --- | --- |
| `SERVER_THREADS` | `0` (auto) | Waitress worker threads (1-64). Set to `0` for auto-calculation based on CPU cores (×2). |
| `SERVER_CONNECTION_LIMIT` | `0` (auto) | Maximum concurrent connections (10-1000). Set to `0` for auto-calculation based on available RAM. |
| `SERVER_BACKLOG` | `0` (auto) | TCP listen backlog (64-4096). Set to `0` for auto-calculation (connection_limit × 2). |
| `SERVER_CHANNEL_TIMEOUT` | `120` | Seconds before idle connections are closed (10-300). |
### Logging
| Variable | Default | Notes |
@@ -1239,12 +1252,22 @@ Replication uses a two-tier permission system:
This separation allows administrators to pre-configure where data should replicate, while allowing authorized users to toggle replication on/off without accessing connection credentials.
### Replication Modes
| Mode | Behavior |
|------|----------|
| `new_only` | Only replicate new/modified objects (default) |
| `all` | Sync all existing objects when rule is enabled |
| `bidirectional` | Two-way sync with Last-Write-Wins conflict resolution |
### Architecture
- **Source Instance**: The MyFSIO instance where you upload files. It runs the replication worker.
- **Target Instance**: Another MyFSIO instance (or any S3-compatible service like AWS S3, MinIO) that receives the copies.
Replication is **asynchronous** (happens in the background) and **one-way** (Source -> Target).
For `new_only` and `all` modes, replication is **asynchronous** (happens in the background) and **one-way** (Source -> Target).
For `bidirectional` mode, replication is **two-way** with automatic conflict resolution.
### Setup Guide
@@ -1346,16 +1369,117 @@ When paused, new objects uploaded to the source will not replicate until replica
> **Note:** Only admins can create new replication rules, change the target connection/bucket, or delete rules entirely.
### Bidirectional Replication (Active-Active)
### Bidirectional Site Replication
To set up two-way replication (Server A ↔ Server B):
For true two-way synchronization with automatic conflict resolution, use the `bidirectional` replication mode. This enables a background sync worker that periodically pulls changes from the remote site.
> **Important:** Both sites must be configured to sync with each other. Each site pushes its changes and pulls from the other. You must set up connections and replication rules on both ends.
#### Step 1: Enable Site Sync on Both Sites
Set these environment variables on **both** Site A and Site B:
```bash
SITE_SYNC_ENABLED=true
SITE_SYNC_INTERVAL_SECONDS=60 # How often to pull changes (default: 60)
SITE_SYNC_BATCH_SIZE=100 # Max objects per sync cycle (default: 100)
```
#### Step 2: Create IAM Users for Cross-Site Access
On each site, create an IAM user that the other site will use to connect:
| Site | Create User For | Required Permissions |
|------|-----------------|---------------------|
| Site A | Site B to connect | `read`, `write`, `list`, `delete` on target bucket |
| Site B | Site A to connect | `read`, `write`, `list`, `delete` on target bucket |
Example policy for the replication user:
```json
[{"bucket": "my-bucket", "actions": ["read", "write", "list", "delete"]}]
```
#### Step 3: Create Connections
On each site, add a connection pointing to the other:
**On Site A:**
- Go to **Connections** and add a connection to Site B
- Endpoint: `https://site-b.example.com`
- Credentials: Site B's IAM user (created in Step 2)
**On Site B:**
- Go to **Connections** and add a connection to Site A
- Endpoint: `https://site-a.example.com`
- Credentials: Site A's IAM user (created in Step 2)
#### Step 4: Enable Bidirectional Replication
On each site, go to the bucket's **Replication** tab and enable with mode `bidirectional`:
**On Site A:**
- Source bucket: `my-bucket`
- Target connection: Site B connection
- Target bucket: `my-bucket`
- Mode: **Bidirectional sync**
**On Site B:**
- Source bucket: `my-bucket`
- Target connection: Site A connection
- Target bucket: `my-bucket`
- Mode: **Bidirectional sync**
#### How It Works
- **PUSH**: Local changes replicate to remote immediately on write/delete
- **PULL**: Background worker fetches remote changes every `SITE_SYNC_INTERVAL_SECONDS`
- **Loop Prevention**: `S3ReplicationAgent` and `SiteSyncAgent` User-Agents prevent infinite sync loops
#### Conflict Resolution (Last-Write-Wins)
When the same object exists on both sites, the system uses Last-Write-Wins (LWW) based on `last_modified` timestamps:
- **Remote newer**: Pull the remote version
- **Local newer**: Keep the local version
- **Same timestamp**: Use ETag as tiebreaker (higher ETag wins)
A 1-second clock skew tolerance prevents false conflicts from minor time differences.
#### Deletion Synchronization
When `sync_deletions=true` (default), remote deletions propagate locally only if:
1. The object was previously synced FROM remote (tracked in sync state)
2. The local version hasn't been modified since last sync
This prevents accidental deletion of local-only objects.
#### Sync State Storage
Sync state is stored at: `data/.myfsio.sys/buckets/<bucket>/site_sync_state.json`
```json
{
"synced_objects": {
"path/to/file.txt": {
"last_synced_at": 1706100000.0,
"remote_etag": "abc123",
"source": "remote"
}
},
"last_full_sync": 1706100000.0
}
```
### Legacy Bidirectional Setup (Manual)
For simpler use cases without the site sync worker, you can manually configure two one-way rules:
1. Follow the steps above to replicate **A → B**.
2. Repeat the process on Server B to replicate **B → A**:
- Create a connection on Server B pointing to Server A.
- Enable replication on the target bucket on Server B.
**Loop Prevention**: The system automatically detects replication traffic using a custom User-Agent (`S3ReplicationAgent`). This prevents infinite loops where an object replicated from A to B is immediately replicated back to A.
**Loop Prevention**: The system automatically detects replication traffic using custom User-Agents (`S3ReplicationAgent` and `SiteSyncAgent`). This prevents infinite loops where an object replicated from A to B is immediately replicated back to A.
**Deletes**: Deleting an object on one server will propagate the deletion to the other server.
@@ -1383,16 +1507,723 @@ The suite covers bucket CRUD, presigned downloads, bucket policy enforcement, an
## 14. API Matrix
```
# Service Endpoints
GET /myfsio/health # Health check
# Bucket Operations
GET / # List buckets
PUT /<bucket> # Create bucket
DELETE /<bucket> # Remove bucket
GET /<bucket> # List objects
PUT /<bucket>/<key> # Upload object
GET /<bucket>/<key> # Download object
DELETE /<bucket>/<key> # Delete object
GET /<bucket>?policy # Fetch policy
PUT /<bucket>?policy # Upsert policy
DELETE /<bucket>?policy # Delete policy
GET /<bucket> # List objects (supports ?list-type=2)
HEAD /<bucket> # Check bucket exists
POST /<bucket> # POST object upload (HTML form)
POST /<bucket>?delete # Bulk delete objects
# Bucket Configuration
GET /<bucket>?policy # Fetch bucket policy
PUT /<bucket>?policy # Upsert bucket policy
DELETE /<bucket>?policy # Delete bucket policy
GET /<bucket>?quota # Get bucket quota
PUT /<bucket>?quota # Set bucket quota (admin only)
GET /<bucket>?versioning # Get versioning status
PUT /<bucket>?versioning # Enable/disable versioning
GET /<bucket>?lifecycle # Get lifecycle rules
PUT /<bucket>?lifecycle # Set lifecycle rules
DELETE /<bucket>?lifecycle # Delete lifecycle rules
GET /<bucket>?cors # Get CORS configuration
PUT /<bucket>?cors # Set CORS configuration
DELETE /<bucket>?cors # Delete CORS configuration
GET /<bucket>?encryption # Get encryption configuration
PUT /<bucket>?encryption # Set default encryption
DELETE /<bucket>?encryption # Delete encryption configuration
GET /<bucket>?acl # Get bucket ACL
PUT /<bucket>?acl # Set bucket ACL
GET /<bucket>?tagging # Get bucket tags
PUT /<bucket>?tagging # Set bucket tags
DELETE /<bucket>?tagging # Delete bucket tags
GET /<bucket>?replication # Get replication configuration
PUT /<bucket>?replication # Set replication rules
DELETE /<bucket>?replication # Delete replication configuration
GET /<bucket>?logging # Get access logging configuration
PUT /<bucket>?logging # Set access logging
GET /<bucket>?notification # Get event notifications
PUT /<bucket>?notification # Set event notifications (webhooks)
GET /<bucket>?object-lock # Get object lock configuration
PUT /<bucket>?object-lock # Set object lock configuration
GET /<bucket>?uploads # List active multipart uploads
GET /<bucket>?versions # List object versions
GET /<bucket>?location # Get bucket location/region
# Object Operations
PUT /<bucket>/<key> # Upload object
GET /<bucket>/<key> # Download object (supports Range header)
DELETE /<bucket>/<key> # Delete object
HEAD /<bucket>/<key> # Get object metadata
POST /<bucket>/<key> # POST upload with policy
POST /<bucket>/<key>?select # SelectObjectContent (SQL query)
# Object Configuration
GET /<bucket>/<key>?tagging # Get object tags
PUT /<bucket>/<key>?tagging # Set object tags
DELETE /<bucket>/<key>?tagging # Delete object tags
GET /<bucket>/<key>?acl # Get object ACL
PUT /<bucket>/<key>?acl # Set object ACL
PUT /<bucket>/<key>?retention # Set object retention
GET /<bucket>/<key>?retention # Get object retention
PUT /<bucket>/<key>?legal-hold # Set legal hold
GET /<bucket>/<key>?legal-hold # Get legal hold status
# Multipart Upload
POST /<bucket>/<key>?uploads # Initiate multipart upload
PUT /<bucket>/<key>?uploadId=X&partNumber=N # Upload part
PUT /<bucket>/<key>?uploadId=X&partNumber=N (with x-amz-copy-source) # UploadPartCopy
POST /<bucket>/<key>?uploadId=X # Complete multipart upload
DELETE /<bucket>/<key>?uploadId=X # Abort multipart upload
GET /<bucket>/<key>?uploadId=X # List parts
# Copy Operations
PUT /<bucket>/<key> (with x-amz-copy-source header) # CopyObject
# Admin API
GET /admin/site # Get local site info
PUT /admin/site # Update local site
GET /admin/sites # List peer sites
POST /admin/sites # Register peer site
GET /admin/sites/<site_id> # Get peer site
PUT /admin/sites/<site_id> # Update peer site
DELETE /admin/sites/<site_id> # Unregister peer site
GET /admin/sites/<site_id>/health # Check peer health
GET /admin/topology # Get cluster topology
# KMS API
GET /kms/keys # List KMS keys
POST /kms/keys # Create KMS key
GET /kms/keys/<key_id> # Get key details
DELETE /kms/keys/<key_id> # Schedule key deletion
POST /kms/keys/<key_id>/enable # Enable key
POST /kms/keys/<key_id>/disable # Disable key
POST /kms/keys/<key_id>/rotate # Rotate key material
POST /kms/encrypt # Encrypt data
POST /kms/decrypt # Decrypt data
POST /kms/generate-data-key # Generate data key
POST /kms/generate-random # Generate random bytes
```
## 15. Health Check Endpoint
The API exposes a simple health check endpoint for monitoring and load balancer integration:
```bash
# Check API health
curl http://localhost:5000/myfsio/health
# Response
{"status": "ok", "version": "0.1.7"}
```
The response includes:
- `status`: Always `"ok"` when the server is running
- `version`: Current application version from `app/version.py`
Use this endpoint for:
- Load balancer health checks
- Kubernetes liveness/readiness probes
- Monitoring system integration (Prometheus, Datadog, etc.)
## 16. Object Lock & Retention
Object Lock prevents objects from being deleted or overwritten for a specified retention period. MyFSIO supports both GOVERNANCE and COMPLIANCE modes.
### Retention Modes
| Mode | Description |
|------|-------------|
| **GOVERNANCE** | Objects can't be deleted by normal users, but users with `s3:BypassGovernanceRetention` permission can override |
| **COMPLIANCE** | Objects can't be deleted or overwritten by anyone, including root, until the retention period expires |
### Enabling Object Lock
Object Lock must be enabled when creating a bucket:
```bash
# Create bucket with Object Lock enabled
curl -X PUT "http://localhost:5000/my-bucket" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-bucket-object-lock-enabled: true"
# Set default retention configuration
curl -X PUT "http://localhost:5000/my-bucket?object-lock" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"ObjectLockEnabled": "Enabled",
"Rule": {
"DefaultRetention": {
"Mode": "GOVERNANCE",
"Days": 30
}
}
}'
```
### Per-Object Retention
Set retention on individual objects:
```bash
# Set object retention
curl -X PUT "http://localhost:5000/my-bucket/important.pdf?retention" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Mode": "COMPLIANCE",
"RetainUntilDate": "2025-12-31T23:59:59Z"
}'
# Get object retention
curl "http://localhost:5000/my-bucket/important.pdf?retention" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Legal Hold
Legal hold provides indefinite protection independent of retention settings:
```bash
# Enable legal hold
curl -X PUT "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{"Status": "ON"}'
# Disable legal hold
curl -X PUT "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{"Status": "OFF"}'
# Check legal hold status
curl "http://localhost:5000/my-bucket/document.pdf?legal-hold" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
## 17. Access Logging
Enable S3-style access logging to track all requests to your buckets.
### Configuration
```bash
# Enable access logging
curl -X PUT "http://localhost:5000/my-bucket?logging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"LoggingEnabled": {
"TargetBucket": "log-bucket",
"TargetPrefix": "logs/my-bucket/"
}
}'
# Get logging configuration
curl "http://localhost:5000/my-bucket?logging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Disable logging (empty configuration)
curl -X PUT "http://localhost:5000/my-bucket?logging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{}'
```
### Log Format
Access logs are written in S3-compatible format with fields including:
- Timestamp, bucket, key
- Operation (REST.GET.OBJECT, REST.PUT.OBJECT, etc.)
- Request ID, requester, source IP
- HTTP status, error code, bytes sent
- Total time, turn-around time
- Referrer, User-Agent
## 18. Bucket Notifications & Webhooks
Configure event notifications to trigger webhooks when objects are created or deleted.
### Supported Events
| Event Type | Description |
|-----------|-------------|
| `s3:ObjectCreated:*` | Any object creation (PUT, POST, COPY, multipart) |
| `s3:ObjectCreated:Put` | Object created via PUT |
| `s3:ObjectCreated:Post` | Object created via POST |
| `s3:ObjectCreated:Copy` | Object created via COPY |
| `s3:ObjectCreated:CompleteMultipartUpload` | Multipart upload completed |
| `s3:ObjectRemoved:*` | Any object deletion |
| `s3:ObjectRemoved:Delete` | Object deleted |
| `s3:ObjectRemoved:DeleteMarkerCreated` | Delete marker created (versioned bucket) |
### Configuration
```bash
# Set notification configuration
curl -X PUT "http://localhost:5000/my-bucket?notification" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TopicConfigurations": [
{
"Id": "upload-notify",
"TopicArn": "https://webhook.example.com/s3-events",
"Events": ["s3:ObjectCreated:*"],
"Filter": {
"Key": {
"FilterRules": [
{"Name": "prefix", "Value": "uploads/"},
{"Name": "suffix", "Value": ".jpg"}
]
}
}
}
]
}'
# Get notification configuration
curl "http://localhost:5000/my-bucket?notification" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Webhook Payload
The webhook receives a JSON payload similar to AWS S3 event notifications:
```json
{
"Records": [
{
"eventVersion": "2.1",
"eventSource": "myfsio:s3",
"eventTime": "2024-01-15T10:30:00.000Z",
"eventName": "ObjectCreated:Put",
"s3": {
"bucket": {"name": "my-bucket"},
"object": {
"key": "uploads/photo.jpg",
"size": 102400,
"eTag": "abc123..."
}
}
}
]
}
```
### Security Notes
- Webhook URLs are validated to prevent SSRF attacks
- Internal/private IP ranges are blocked by default
- Use HTTPS endpoints in production
## 19. SelectObjectContent (SQL Queries)
Query CSV, JSON, or Parquet files directly using SQL without downloading the entire object. Requires DuckDB to be installed.
### Prerequisites
```bash
pip install duckdb
```
### Usage
```bash
# Query a CSV file
curl -X POST "http://localhost:5000/my-bucket/data.csv?select" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Expression": "SELECT name, age FROM s3object WHERE age > 25",
"ExpressionType": "SQL",
"InputSerialization": {
"CSV": {
"FileHeaderInfo": "USE",
"FieldDelimiter": ","
}
},
"OutputSerialization": {
"JSON": {}
}
}'
# Query a JSON file
curl -X POST "http://localhost:5000/my-bucket/data.json?select" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"Expression": "SELECT * FROM s3object s WHERE s.status = '\"active'\"",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "LINES"}},
"OutputSerialization": {"JSON": {}}
}'
```
### Supported Input Formats
| Format | Options |
|--------|---------|
| **CSV** | `FileHeaderInfo` (USE, IGNORE, NONE), `FieldDelimiter`, `QuoteCharacter`, `RecordDelimiter` |
| **JSON** | `Type` (DOCUMENT, LINES) |
| **Parquet** | Automatic schema detection |
### Output Formats
- **JSON**: Returns results as JSON records
- **CSV**: Returns results as CSV
## 20. PostObject (HTML Form Upload)
Upload objects using HTML forms with policy-based authorization. Useful for browser-based direct uploads.
### Form Fields
| Field | Required | Description |
|-------|----------|-------------|
| `key` | Yes | Object key (can include `${filename}` placeholder) |
| `file` | Yes | The file to upload |
| `policy` | No | Base64-encoded policy document |
| `x-amz-signature` | No | Policy signature |
| `x-amz-credential` | No | Credential scope |
| `x-amz-algorithm` | No | Signing algorithm (AWS4-HMAC-SHA256) |
| `x-amz-date` | No | Request timestamp |
| `Content-Type` | No | MIME type of the file |
| `x-amz-meta-*` | No | Custom metadata |
### Example HTML Form
```html
<form action="http://localhost:5000/my-bucket" method="post" enctype="multipart/form-data">
<input type="hidden" name="key" value="uploads/${filename}">
<input type="hidden" name="Content-Type" value="image/jpeg">
<input type="hidden" name="x-amz-meta-user" value="john">
<input type="file" name="file">
<button type="submit">Upload</button>
</form>
```
### With Policy (Signed Upload)
For authenticated uploads, include a policy document:
```bash
# Generate policy and signature using boto3 or similar
# Then include in form:
# - policy: base64(policy_document)
# - x-amz-signature: HMAC-SHA256(policy, signing_key)
# - x-amz-credential: access_key/date/region/s3/aws4_request
# - x-amz-algorithm: AWS4-HMAC-SHA256
# - x-amz-date: YYYYMMDDTHHMMSSZ
```
## 21. Advanced S3 Operations
### CopyObject
Copy objects within or between buckets:
```bash
# Copy within same bucket
curl -X PUT "http://localhost:5000/my-bucket/copy-of-file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/original-file.txt"
# Copy to different bucket
curl -X PUT "http://localhost:5000/other-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/original-file.txt"
# Copy with metadata replacement
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /my-bucket/file.txt" \
-H "x-amz-metadata-directive: REPLACE" \
-H "x-amz-meta-newkey: newvalue"
```
### UploadPartCopy
Copy data from an existing object into a multipart upload part:
```bash
# Initiate multipart upload
UPLOAD_ID=$(curl -X POST "http://localhost:5000/my-bucket/large-file.bin?uploads" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." | jq -r '.UploadId')
# Copy bytes 0-10485759 from source as part 1
curl -X PUT "http://localhost:5000/my-bucket/large-file.bin?uploadId=$UPLOAD_ID&partNumber=1" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /source-bucket/source-file.bin" \
-H "x-amz-copy-source-range: bytes=0-10485759"
# Copy bytes 10485760-20971519 as part 2
curl -X PUT "http://localhost:5000/my-bucket/large-file.bin?uploadId=$UPLOAD_ID&partNumber=2" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-copy-source: /source-bucket/source-file.bin" \
-H "x-amz-copy-source-range: bytes=10485760-20971519"
```
### Range Requests
Download partial content using the Range header:
```bash
# Get first 1000 bytes
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=0-999"
# Get bytes 1000-1999
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=1000-1999"
# Get last 500 bytes
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=-500"
# Get from byte 5000 to end
curl "http://localhost:5000/my-bucket/large-file.bin" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "Range: bytes=5000-"
```
Range responses include:
- HTTP 206 Partial Content status
- `Content-Range` header showing the byte range
- `Accept-Ranges: bytes` header
### Conditional Requests
Use conditional headers for cache validation:
```bash
# Only download if modified since
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-Modified-Since: Wed, 15 Jan 2025 10:00:00 GMT"
# Only download if ETag doesn't match (changed)
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-None-Match: \"abc123...\""
# Only download if ETag matches
curl "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "If-Match: \"abc123...\""
```
## 22. Access Control Lists (ACLs)
ACLs provide legacy-style permission management for buckets and objects.
### Canned ACLs
| ACL | Description |
|-----|-------------|
| `private` | Owner gets FULL_CONTROL (default) |
| `public-read` | Owner FULL_CONTROL, public READ |
| `public-read-write` | Owner FULL_CONTROL, public READ and WRITE |
| `authenticated-read` | Owner FULL_CONTROL, authenticated users READ |
### Setting ACLs
```bash
# Set bucket ACL using canned ACL
curl -X PUT "http://localhost:5000/my-bucket?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: public-read"
# Set object ACL
curl -X PUT "http://localhost:5000/my-bucket/file.txt?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: private"
# Set ACL during upload
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-acl: public-read" \
--data-binary @file.txt
# Get bucket ACL
curl "http://localhost:5000/my-bucket?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Get object ACL
curl "http://localhost:5000/my-bucket/file.txt?acl" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### ACL vs Bucket Policies
- **ACLs**: Simple, limited options, legacy approach
- **Bucket Policies**: Powerful, flexible, recommended for new deployments
For most use cases, prefer bucket policies over ACLs.
## 23. Object & Bucket Tagging
Add metadata tags to buckets and objects for organization, cost allocation, or lifecycle rule filtering.
### Bucket Tagging
```bash
# Set bucket tags
curl -X PUT "http://localhost:5000/my-bucket?tagging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TagSet": [
{"Key": "Environment", "Value": "Production"},
{"Key": "Team", "Value": "Engineering"}
]
}'
# Get bucket tags
curl "http://localhost:5000/my-bucket?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete bucket tags
curl -X DELETE "http://localhost:5000/my-bucket?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### Object Tagging
```bash
# Set object tags
curl -X PUT "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"TagSet": [
{"Key": "Classification", "Value": "Confidential"},
{"Key": "Owner", "Value": "john@example.com"}
]
}'
# Get object tags
curl "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete object tags
curl -X DELETE "http://localhost:5000/my-bucket/file.txt?tagging" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Set tags during upload
curl -X PUT "http://localhost:5000/my-bucket/file.txt" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-H "x-amz-tagging: Environment=Staging&Team=QA" \
--data-binary @file.txt
```
### Tagging Limits
- Maximum 50 tags per object (configurable via `OBJECT_TAG_LIMIT`)
- Tag key: 1-128 Unicode characters
- Tag value: 0-256 Unicode characters
### Use Cases
- **Lifecycle Rules**: Filter objects for expiration by tag
- **Access Control**: Use tag conditions in bucket policies
- **Cost Tracking**: Group objects by project or department
- **Automation**: Trigger actions based on object tags
## 24. CORS Configuration
Configure Cross-Origin Resource Sharing for browser-based applications.
### Setting CORS Rules
```bash
# Set CORS configuration
curl -X PUT "http://localhost:5000/my-bucket?cors" \
-H "Content-Type: application/json" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..." \
-d '{
"CORSRules": [
{
"AllowedOrigins": ["https://example.com", "https://app.example.com"],
"AllowedMethods": ["GET", "PUT", "POST", "DELETE"],
"AllowedHeaders": ["*"],
"ExposeHeaders": ["ETag", "x-amz-meta-*"],
"MaxAgeSeconds": 3600
}
]
}'
# Get CORS configuration
curl "http://localhost:5000/my-bucket?cors" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Delete CORS configuration
curl -X DELETE "http://localhost:5000/my-bucket?cors" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### CORS Rule Fields
| Field | Description |
|-------|-------------|
| `AllowedOrigins` | Origins allowed to access the bucket (required) |
| `AllowedMethods` | HTTP methods allowed (GET, PUT, POST, DELETE, HEAD) |
| `AllowedHeaders` | Request headers allowed in preflight |
| `ExposeHeaders` | Response headers visible to browser |
| `MaxAgeSeconds` | How long browser can cache preflight response |
## 25. List Objects API v2
MyFSIO supports both ListBucketResult v1 and v2 APIs.
### Using v2 API
```bash
# List with v2 (supports continuation tokens)
curl "http://localhost:5000/my-bucket?list-type=2" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# With prefix and delimiter (folder-like listing)
curl "http://localhost:5000/my-bucket?list-type=2&prefix=photos/&delimiter=/" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Pagination with continuation token
curl "http://localhost:5000/my-bucket?list-type=2&max-keys=100&continuation-token=TOKEN" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
# Start after specific key
curl "http://localhost:5000/my-bucket?list-type=2&start-after=photos/2024/" \
-H "X-Access-Key: ..." -H "X-Secret-Key: ..."
```
### v1 vs v2 Differences
| Feature | v1 | v2 |
|---------|----|----|
| Pagination | `marker` | `continuation-token` |
| Start position | `marker` | `start-after` |
| Fetch owner info | Always included | Use `fetch-owner=true` |
| Max keys | 1000 | 1000 |
### Query Parameters
| Parameter | Description |
|-----------|-------------|
| `list-type` | Set to `2` for v2 API |
| `prefix` | Filter objects by key prefix |
| `delimiter` | Group objects (typically `/`) |
| `max-keys` | Maximum results (1-1000, default 1000) |
| `continuation-token` | Token from previous response |
| `start-after` | Start listing after this key |
| `fetch-owner` | Include owner info in response |
| `encoding-type` | Set to `url` for URL-encoded keys

View File

@@ -9,4 +9,5 @@ boto3>=1.42.14
waitress>=3.0.2
psutil>=7.1.3
cryptography>=46.0.3
defusedxml>=0.7.1
defusedxml>=0.7.1
duckdb>=1.4.4

56
run.py
View File

@@ -18,6 +18,8 @@ for _env_file in [
if _env_file.exists():
load_dotenv(_env_file, override=True)
from typing import Optional
from app import create_api_app, create_ui_app
from app.config import AppConfig
@@ -36,11 +38,23 @@ def _is_frozen() -> bool:
return getattr(sys, 'frozen', False) or '__compiled__' in globals()
def serve_api(port: int, prod: bool = False) -> None:
def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
app = create_api_app()
if prod:
from waitress import serve
serve(app, host=_server_host(), port=port, ident="MyFSIO")
if config:
serve(
app,
host=_server_host(),
port=port,
ident="MyFSIO",
threads=config.server_threads,
connection_limit=config.server_connection_limit,
backlog=config.server_backlog,
channel_timeout=config.server_channel_timeout,
)
else:
serve(app, host=_server_host(), port=port, ident="MyFSIO")
else:
debug = _is_debug_enabled()
if debug:
@@ -48,11 +62,23 @@ def serve_api(port: int, prod: bool = False) -> None:
app.run(host=_server_host(), port=port, debug=debug)
def serve_ui(port: int, prod: bool = False) -> None:
def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
app = create_ui_app()
if prod:
from waitress import serve
serve(app, host=_server_host(), port=port, ident="MyFSIO")
if config:
serve(
app,
host=_server_host(),
port=port,
ident="MyFSIO",
threads=config.server_threads,
connection_limit=config.server_connection_limit,
backlog=config.server_backlog,
channel_timeout=config.server_channel_timeout,
)
else:
serve(app, host=_server_host(), port=port, ident="MyFSIO")
else:
debug = _is_debug_enabled()
if debug:
@@ -71,7 +97,6 @@ if __name__ == "__main__":
parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
args = parser.parse_args()
# Handle config check/show modes
if args.check_config or args.show_config:
config = AppConfig.from_env()
config.print_startup_summary()
@@ -81,49 +106,50 @@ if __name__ == "__main__":
sys.exit(1 if critical else 0)
sys.exit(0)
# Default to production mode when running as compiled binary
# unless --dev is explicitly passed
prod_mode = args.prod or (_is_frozen() and not args.dev)
# Validate configuration before starting
config = AppConfig.from_env()
# Show startup summary only on first run (when marker file doesn't exist)
first_run_marker = config.storage_root / ".myfsio.sys" / ".initialized"
is_first_run = not first_run_marker.exists()
if is_first_run:
config.print_startup_summary()
# Check for critical issues that should prevent startup
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
print("ABORTING: Critical configuration issues detected. Fix them before starting.")
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
# Create the marker file to indicate successful first run
try:
first_run_marker.parent.mkdir(parents=True, exist_ok=True)
first_run_marker.write_text(f"Initialized on {__import__('datetime').datetime.now().isoformat()}\n")
except OSError:
pass # Non-critical, just skip marker creation
pass
if prod_mode:
print("Running in production mode (Waitress)")
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
for issue in critical_issues:
print(f" {issue}")
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
else:
print("Running in development mode (Flask dev server)")
if args.mode in {"api", "both"}:
print(f"Starting API server on port {args.api_port}...")
api_proc = Process(target=serve_api, args=(args.api_port, prod_mode), daemon=True)
api_proc = Process(target=serve_api, args=(args.api_port, prod_mode, config), daemon=True)
api_proc.start()
else:
api_proc = None
if args.mode in {"ui", "both"}:
print(f"Starting UI server on port {args.ui_port}...")
serve_ui(args.ui_port, prod_mode)
serve_ui(args.ui_port, prod_mode, config)
elif api_proc:
try:
api_proc.join()

View File

@@ -1081,11 +1081,17 @@ html.sidebar-will-collapse .sidebar-user {
letter-spacing: 0.08em;
}
.main-content:has(.docs-sidebar) {
overflow-x: visible;
}
.docs-sidebar {
position: sticky;
top: 1.5rem;
border-radius: 1rem;
border: 1px solid var(--myfsio-card-border);
max-height: calc(100vh - 3rem);
overflow-y: auto;
}
.docs-sidebar-callouts {

View File

@@ -16,7 +16,7 @@ window.IAMManagement = (function() {
var currentDeleteKey = null;
var policyTemplates = {
full: [{ bucket: '*', actions: ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'iam:list_users', 'iam:*'] }],
full: [{ bucket: '*', actions: ['list', 'read', 'write', 'delete', 'share', 'policy', 'replication', 'lifecycle', 'cors', 'iam:*'] }],
readonly: [{ bucket: '*', actions: ['list', 'read'] }],
writer: [{ bucket: '*', actions: ['list', 'read', 'write'] }]
};

View File

@@ -94,6 +94,12 @@
</svg>
<span>Metrics</span>
</a>
<a href="{{ url_for('ui.sites_dashboard') }}" class="sidebar-link {% if request.endpoint == 'ui.sites_dashboard' %}active{% endif %}">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
</svg>
<span>Sites</span>
</a>
{% endif %}
</div>
<div class="nav-section">
@@ -179,6 +185,12 @@
</svg>
<span class="sidebar-link-text">Metrics</span>
</a>
<a href="{{ url_for('ui.sites_dashboard') }}" class="sidebar-link {% if request.endpoint == 'ui.sites_dashboard' %}active{% endif %}" data-tooltip="Sites">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
</svg>
<span class="sidebar-link-text">Sites</span>
</a>
{% endif %}
</div>
<div class="nav-section">

View File

@@ -1065,8 +1065,10 @@
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
</svg>
<div>
<strong>Replication Active</strong>
{% if replication_rule.mode == 'all' %}
<strong>Replication Active</strong>
{% if replication_rule.mode == 'bidirectional' %}
Bi-directional sync enabled with LWW conflict resolution.
{% elif replication_rule.mode == 'all' %}
All objects (existing + new) are being replicated.
{% else %}
New uploads to this bucket are automatically replicated.
@@ -1159,7 +1161,7 @@
</div>
<div class="text-muted small text-uppercase">Mode</div>
<div class="fw-semibold small">
{% if replication_rule.mode == 'all' %}All Objects{% else %}New Only{% endif %}
{% if replication_rule.mode == 'bidirectional' %}Bidirectional{% elif replication_rule.mode == 'all' %}All Objects{% else %}New Only{% endif %}
</div>
</div>
</div>
@@ -1310,7 +1312,9 @@
<div>
<strong>Replication Paused</strong>
<p class="mb-1">Replication is configured but currently paused. New uploads will not be replicated until resumed.</p>
{% if replication_rule.mode == 'all' %}
{% if replication_rule.mode == 'bidirectional' %}
<p class="mb-0 small text-dark"><strong>Tip:</strong> When you resume, bi-directional sync will continue and any missed changes will be reconciled using LWW conflict resolution.</p>
{% elif replication_rule.mode == 'all' %}
<p class="mb-0 small text-dark"><strong>Tip:</strong> When you resume, any objects uploaded while paused will be automatically synced to the target.</p>
{% else %}
<p class="mb-0 small text-dark"><strong>Note:</strong> Objects uploaded while paused will not be synced (mode: new_only). Consider switching to "All Objects" mode if you need to sync missed uploads.</p>
@@ -1435,17 +1439,50 @@
<div class="text-muted small">Only replicate objects uploaded after enabling replication. Existing objects will not be copied.</div>
</label>
</div>
<div class="form-check p-3 m-0">
<div class="form-check p-3 border-bottom m-0">
<input class="form-check-input" type="radio" name="replication_mode" id="mode_all" value="all">
<label class="form-check-label w-100" for="mode_all">
<span class="fw-medium">All objects (existing + new)</span>
<div class="text-muted small">Replicate all existing objects immediately, plus all future uploads. <span class="text-warning">This may take time for large buckets.</span></div>
</label>
</div>
{% if site_sync_enabled %}
<div class="form-check p-3 m-0">
<input class="form-check-input" type="radio" name="replication_mode" id="mode_bidirectional" value="bidirectional">
<label class="form-check-label w-100" for="mode_bidirectional">
<span class="fw-medium">Bidirectional sync</span>
<div class="text-muted small">Two-way sync with Last-Write-Wins conflict resolution.</div>
</label>
</div>
{% endif %}
</div>
</div>
</div>
<div id="bidirWarningBucket" class="alert alert-warning d-none mb-4" role="alert">
<h6 class="alert-heading fw-bold d-flex align-items-center gap-2 mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M1 11.5a.5.5 0 0 0 .5.5h11.793l-3.147 3.146a.5.5 0 0 0 .708.708l4-4a.5.5 0 0 0 0-.708l-4-4a.5.5 0 0 0-.708.708L13.293 11H1.5a.5.5 0 0 0-.5.5zm14-7a.5.5 0 0 1-.5.5H2.707l3.147 3.146a.5.5 0 1 1-.708.708l-4-4a.5.5 0 0 1 0-.708l4-4a.5.5 0 1 1 .708.708L2.707 4H14.5a.5.5 0 0 1 .5.5z"/>
</svg>
Requires Configuration on Both Sites
</h6>
<p class="mb-2 small">For bidirectional sync to work, <strong>both sites</strong> must be configured:</p>
<ol class="mb-2 ps-3 small">
<li>This site: Enable bidirectional replication here</li>
<li>Remote site: Register this site as a peer with a connection</li>
<li>Remote site: Create matching bidirectional rule pointing back</li>
<li>Both sites: Ensure <code>SITE_SYNC_ENABLED=true</code></li>
</ol>
<div class="small">
<a href="{{ url_for('ui.sites_dashboard') }}" class="alert-link">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077z"/>
</svg>
Check bidirectional status in Sites Dashboard
</a>
</div>
</div>
<button type="submit" class="btn btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 0 4.546 2.914.5.5 0 0 1 .908-.417A6 6 0 1 1 8 2v1z"/>
@@ -2556,5 +2593,26 @@ window.BucketDetailConfig = {
bucketsOverview: "{{ url_for('ui.buckets_overview') }}"
}
};
(function() {
const bidirWarning = document.getElementById('bidirWarningBucket');
const modeRadios = document.querySelectorAll('input[name="replication_mode"]');
function updateBidirWarning() {
if (!bidirWarning) return;
const selected = document.querySelector('input[name="replication_mode"]:checked');
if (selected && selected.value === 'bidirectional') {
bidirWarning.classList.remove('d-none');
} else {
bidirWarning.classList.add('d-none');
}
}
modeRadios.forEach(function(radio) {
radio.addEventListener('change', updateBidirWarning);
});
updateBidirWarning();
})();
</script>
{% endblock %}

File diff suppressed because it is too large Load Diff

View File

@@ -218,10 +218,10 @@
<div class="col-lg-4">
{% set has_issues = (cpu_percent > 80) or (memory.percent > 85) or (disk.percent > 90) %}
<div class="card shadow-sm border-0 h-100 overflow-hidden" style="background: linear-gradient(135deg, {% if has_issues %}#ef4444 0%, #f97316{% else %}#3b82f6 0%, #8b5cf6{% endif %} 100%);">
<div id="systemHealthCard" class="card shadow-sm border-0 h-100 overflow-hidden" style="background: linear-gradient(135deg, {% if has_issues %}#ef4444 0%, #f97316{% else %}#3b82f6 0%, #8b5cf6{% endif %} 100%);">
<div class="card-body p-4 d-flex flex-column justify-content-center text-white position-relative">
<div class="position-absolute top-0 end-0 opacity-25" style="transform: translate(20%, -20%);">
<svg xmlns="http://www.w3.org/2000/svg" width="160" height="160" fill="currentColor" class="bi bi-{% if has_issues %}exclamation-triangle{% else %}cloud-check{% endif %}" viewBox="0 0 16 16">
<svg id="healthIcon" xmlns="http://www.w3.org/2000/svg" width="160" height="160" fill="currentColor" viewBox="0 0 16 16">
{% if has_issues %}
<path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/>
<path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>
@@ -232,8 +232,8 @@
</svg>
</div>
<div class="mb-3">
<span class="badge bg-white {% if has_issues %}text-danger{% else %}text-primary{% endif %} fw-semibold px-3 py-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-{% if has_issues %}exclamation-circle-fill{% else %}check-circle-fill{% endif %} me-1" viewBox="0 0 16 16">
<span id="healthBadge" class="badge bg-white {% if has_issues %}text-danger{% else %}text-primary{% endif %} fw-semibold px-3 py-2">
<svg id="healthBadgeIcon" xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
{% if has_issues %}
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM8 4a.905.905 0 0 0-.9.995l.35 3.507a.552.552 0 0 0 1.1 0l.35-3.507A.905.905 0 0 0 8 4zm.002 6a1 1 0 1 0 0 2 1 1 0 0 0 0-2z"/>
{% else %}
@@ -244,22 +244,24 @@
</span>
</div>
<h4 class="card-title fw-bold mb-3">System Health</h4>
{% if has_issues %}
<ul class="list-unstyled small mb-4 opacity-90">
{% if cpu_percent > 80 %}<li class="mb-1">CPU usage is high ({{ cpu_percent }}%)</li>{% endif %}
{% if memory.percent > 85 %}<li class="mb-1">Memory usage is high ({{ memory.percent }}%)</li>{% endif %}
{% if disk.percent > 90 %}<li class="mb-1">Disk space is critically low ({{ disk.percent }}% used)</li>{% endif %}
</ul>
{% else %}
<p class="card-text opacity-90 mb-4 small">All resources are within normal operating parameters.</p>
{% endif %}
<div id="healthContent">
{% if has_issues %}
<ul class="list-unstyled small mb-4 opacity-90">
{% if cpu_percent > 80 %}<li class="mb-1">CPU usage is high ({{ cpu_percent }}%)</li>{% endif %}
{% if memory.percent > 85 %}<li class="mb-1">Memory usage is high ({{ memory.percent }}%)</li>{% endif %}
{% if disk.percent > 90 %}<li class="mb-1">Disk space is critically low ({{ disk.percent }}% used)</li>{% endif %}
</ul>
{% else %}
<p class="card-text opacity-90 mb-4 small">All resources are within normal operating parameters.</p>
{% endif %}
</div>
<div class="d-flex gap-4">
<div>
<div class="h3 fw-bold mb-0">{{ app.uptime_days }}d</div>
<div class="h3 fw-bold mb-0" data-metric="health_uptime">{{ app.uptime_days }}d</div>
<small class="opacity-75">Uptime</small>
</div>
<div>
<div class="h3 fw-bold mb-0">{{ app.buckets }}</div>
<div class="h3 fw-bold mb-0" data-metric="health_buckets">{{ app.buckets }}</div>
<small class="opacity-75">Active Buckets</small>
</div>
</div>
@@ -480,6 +482,55 @@
el = document.querySelector('[data-metric="objects_count"]');
if (el) el.textContent = data.app.objects;
var cpuHigh = data.cpu_percent > 80;
var memHigh = data.memory.percent > 85;
var diskHigh = data.disk.percent > 90;
var hasIssues = cpuHigh || memHigh || diskHigh;
var healthCard = document.getElementById('systemHealthCard');
if (healthCard) {
healthCard.style.background = hasIssues
? 'linear-gradient(135deg, #ef4444 0%, #f97316 100%)'
: 'linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%)';
}
var healthIcon = document.getElementById('healthIcon');
if (healthIcon) {
healthIcon.innerHTML = hasIssues
? '<path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/><path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>'
: '<path fill-rule="evenodd" d="M10.354 6.146a.5.5 0 0 1 0 .708l-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 1 1 .708-.708L7 8.793l2.646-2.647a.5.5 0 0 1 .708 0z"/><path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/>';
}
var healthBadge = document.getElementById('healthBadge');
if (healthBadge) {
healthBadge.className = 'badge bg-white fw-semibold px-3 py-2 ' + (hasIssues ? 'text-danger' : 'text-primary');
}
var healthBadgeIcon = document.getElementById('healthBadgeIcon');
if (healthBadgeIcon) {
healthBadgeIcon.innerHTML = hasIssues
? '<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM8 4a.905.905 0 0 0-.9.995l.35 3.507a.552.552 0 0 0 1.1 0l.35-3.507A.905.905 0 0 0 8 4zm.002 6a1 1 0 1 0 0 2 1 1 0 0 0 0-2z"/>'
: '<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>';
}
var healthContent = document.getElementById('healthContent');
if (healthContent) {
if (hasIssues) {
var issues = [];
if (cpuHigh) issues.push('<li class="mb-1">CPU usage is high (' + data.cpu_percent.toFixed(1) + '%)</li>');
if (memHigh) issues.push('<li class="mb-1">Memory usage is high (' + data.memory.percent.toFixed(1) + '%)</li>');
if (diskHigh) issues.push('<li class="mb-1">Disk space is critically low (' + data.disk.percent.toFixed(1) + '% used)</li>');
healthContent.innerHTML = '<ul class="list-unstyled small mb-4 opacity-90">' + issues.join('') + '</ul>';
} else {
healthContent.innerHTML = '<p class="card-text opacity-90 mb-4 small">All resources are within normal operating parameters.</p>';
}
}
el = document.querySelector('[data-metric="health_uptime"]');
if (el) el.textContent = data.app.uptime_days + 'd';
el = document.querySelector('[data-metric="health_buckets"]');
if (el) el.textContent = data.app.buckets;
countdown = 5;
})
.catch(function(err) {

View File

@@ -0,0 +1,270 @@
{% extends "base.html" %}
{% block title %}Set Up Replication - S3 Compatible Storage{% endblock %}
{% block content %}
<div class="page-header d-flex justify-content-between align-items-center mb-4">
<div>
<nav aria-label="breadcrumb">
<ol class="breadcrumb mb-1">
<li class="breadcrumb-item"><a href="{{ url_for('ui.sites_dashboard') }}">Sites</a></li>
<li class="breadcrumb-item active" aria-current="page">Replication Wizard</li>
</ol>
</nav>
<h1 class="h3 mb-1 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M8 4a.5.5 0 0 1 .5.5V6a.5.5 0 0 1-1 0V4.5A.5.5 0 0 1 8 4zM3.732 5.732a.5.5 0 0 1 .707 0l.915.914a.5.5 0 1 1-.708.708l-.914-.915a.5.5 0 0 1 0-.707zM2 10a.5.5 0 0 1 .5-.5h1.586a.5.5 0 0 1 0 1H2.5A.5.5 0 0 1 2 10zm9.5 0a.5.5 0 0 1 .5-.5h1.5a.5.5 0 0 1 0 1H12a.5.5 0 0 1-.5-.5zm.754-4.246a.389.389 0 0 0-.527-.02L7.547 9.31a.91.91 0 1 0 1.302 1.258l3.434-4.297a.389.389 0 0 0-.029-.518z"/>
<path fill-rule="evenodd" d="M0 10a8 8 0 1 1 15.547 2.661c-.442 1.253-1.845 1.602-2.932 1.25C11.309 13.488 9.475 13 8 13c-1.474 0-3.31.488-4.615.911-1.087.352-2.49.003-2.932-1.25A7.988 7.988 0 0 1 0 10zm8-7a7 7 0 0 0-6.603 9.329c.203.575.923.876 1.68.63C4.397 12.533 6.358 12 8 12s3.604.532 4.923.96c.757.245 1.477-.056 1.68-.631A7 7 0 0 0 8 3z"/>
</svg>
Set Up Replication
</h1>
<p class="text-muted mb-0 mt-1">Configure bucket replication to <strong>{{ peer.display_name or peer.site_id }}</strong></p>
</div>
</div>
<div class="row g-4">
<div class="col-lg-4 col-md-5">
<div class="card shadow-sm border-0 mb-4" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8m7.5-6.923c-.67.204-1.335.82-1.887 1.855A8 8 0 0 0 5.145 4H7.5zM4.09 4a9.3 9.3 0 0 1 .64-1.539 7 7 0 0 1 .597-.933A7.03 7.03 0 0 0 2.255 4zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a7 7 0 0 0-.656 2.5zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5zM8.5 5v2.5h2.99a12.5 12.5 0 0 0-.337-2.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5zM5.145 12q.208.58.468 1.068c.552 1.035 1.218 1.65 1.887 1.855V12zm.182 2.472a7 7 0 0 1-.597-.933A9.3 9.3 0 0 1 4.09 12H2.255a7 7 0 0 0 3.072 2.472M3.82 11a13.7 13.7 0 0 1-.312-2.5h-2.49a7 7 0 0 0 .656 2.5zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855q.26-.487.468-1.068zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.7 13.7 0 0 1-.312 2.5m2.802-3.5a7 7 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7 7 0 0 0-3.072-2.472c.218.284.418.598.597.933M10.855 4a8 8 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4z"/>
</svg>
Peer Site
</h5>
</div>
<div class="card-body px-4 pb-4">
<dl class="mb-0">
<dt class="text-muted small">Site ID</dt>
<dd class="mb-2">{{ peer.site_id }}</dd>
<dt class="text-muted small">Endpoint</dt>
<dd class="mb-2 text-truncate" title="{{ peer.endpoint }}">{{ peer.endpoint }}</dd>
<dt class="text-muted small">Region</dt>
<dd class="mb-2"><span class="badge bg-primary bg-opacity-10 text-primary">{{ peer.region }}</span></dd>
<dt class="text-muted small">Connection</dt>
<dd class="mb-0"><span class="badge bg-secondary bg-opacity-10 text-secondary">{{ connection.name }}</span></dd>
</dl>
</div>
</div>
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
</svg>
Replication Modes
</h5>
</div>
<div class="card-body px-4 pb-4 small">
<p class="mb-2"><strong>New Only:</strong> Only replicate new objects uploaded after the rule is created.</p>
<p class="mb-2"><strong>All Objects:</strong> Replicate all existing objects plus new uploads.</p>
<p class="mb-0"><strong>Bidirectional:</strong> Two-way sync between sites. Changes on either side are synchronized.</p>
</div>
</div>
</div>
<div class="col-lg-8 col-md-7">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M2.522 5H2a.5.5 0 0 0-.494.574l1.372 9.149A1.5 1.5 0 0 0 4.36 16h7.278a1.5 1.5 0 0 0 1.483-1.277l1.373-9.149A.5.5 0 0 0 14 5h-.522A5.5 5.5 0 0 0 2.522 5zm1.005 0a4.5 4.5 0 0 1 8.945 0H3.527z"/>
</svg>
Select Buckets to Replicate
</h5>
<p class="text-muted small mb-0">Choose which buckets should be replicated to this peer site</p>
</div>
<div class="card-body px-4 pb-4">
{% if buckets %}
<form method="POST" action="{{ url_for('ui.create_peer_replication_rules', site_id=peer.site_id) }}">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="mb-4">
<label for="mode" class="form-label fw-medium">Replication Mode</label>
<select class="form-select" id="mode" name="mode">
<option value="new_only">New Objects Only</option>
<option value="all">All Objects (includes existing)</option>
<option value="bidirectional">Bidirectional Sync</option>
</select>
</div>
<div id="bidirWarning" class="alert alert-warning d-none mb-4" role="alert">
<h6 class="alert-heading fw-bold d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M1 11.5a.5.5 0 0 0 .5.5h11.793l-3.147 3.146a.5.5 0 0 0 .708.708l4-4a.5.5 0 0 0 0-.708l-4-4a.5.5 0 0 0-.708.708L13.293 11H1.5a.5.5 0 0 0-.5.5zm14-7a.5.5 0 0 1-.5.5H2.707l3.147 3.146a.5.5 0 1 1-.708.708l-4-4a.5.5 0 0 1 0-.708l4-4a.5.5 0 1 1 .708.708L2.707 4H14.5a.5.5 0 0 1 .5.5z"/>
</svg>
Bidirectional Sync Requires Configuration on Both Sites
</h6>
<p class="mb-2">For bidirectional sync to work properly, you must configure <strong>both</strong> sites. This wizard only configures one direction.</p>
<hr class="my-2">
<p class="mb-2 fw-semibold">After completing this wizard, you must also:</p>
<ol class="mb-2 ps-3">
<li>Go to <strong>{{ peer.display_name or peer.site_id }}</strong>'s admin UI</li>
<li>Register <strong>this site</strong> as a peer (with a connection)</li>
<li>Create matching bidirectional replication rules pointing back to this site</li>
<li>Ensure <code>SITE_SYNC_ENABLED=true</code> is set on both sites</li>
</ol>
<div class="d-flex align-items-center gap-2 mt-3">
<span class="badge bg-light text-dark border">Local Site ID: <strong>{{ local_site.site_id if local_site else 'Not configured' }}</strong></span>
<span class="badge bg-light text-dark border">Local Endpoint: <strong>{{ local_site.endpoint if local_site and local_site.endpoint else 'Not configured' }}</strong></span>
</div>
{% if not local_site or not local_site.site_id or not local_site.endpoint %}
<div class="alert alert-danger mt-3 mb-0 py-2">
<small><strong>Warning:</strong> Your local site identity is not fully configured. The remote site won't be able to connect back. <a href="{{ url_for('ui.sites_dashboard') }}">Configure it now</a>.</small>
</div>
{% endif %}
</div>
<div class="table-responsive">
<table class="table table-hover align-middle mb-0">
<thead class="table-light">
<tr>
<th scope="col" style="width: 40px;">
<input type="checkbox" class="form-check-input" id="selectAll">
</th>
<th scope="col">Local Bucket</th>
<th scope="col">Target Bucket Name</th>
<th scope="col">Status</th>
</tr>
</thead>
<tbody>
{% for bucket in buckets %}
<tr>
<td>
<input type="checkbox" class="form-check-input bucket-checkbox"
name="buckets" value="{{ bucket.name }}"
{% if bucket.has_rule %}disabled{% endif %}>
</td>
<td>
<div class="d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M2.522 5H2a.5.5 0 0 0-.494.574l1.372 9.149A1.5 1.5 0 0 0 4.36 16h7.278a1.5 1.5 0 0 0 1.483-1.277l1.373-9.149A.5.5 0 0 0 14 5h-.522A5.5 5.5 0 0 0 2.522 5zm1.005 0a4.5 4.5 0 0 1 8.945 0H3.527z"/>
</svg>
<span class="fw-medium">{{ bucket.name }}</span>
</div>
</td>
<td>
<input type="text" class="form-control form-control-sm"
name="target_{{ bucket.name }}"
value="{{ bucket.existing_target or bucket.name }}"
placeholder="{{ bucket.name }}"
{% if bucket.has_rule %}disabled{% endif %}>
</td>
<td>
{% if bucket.has_rule %}
<span class="badge bg-info bg-opacity-10 text-info">
Already configured ({{ bucket.existing_mode }})
</span>
{% else %}
<span class="badge bg-secondary bg-opacity-10 text-secondary">
Not configured
</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="d-flex gap-2 mt-4 pt-3 border-top">
<button type="submit" class="btn btn-primary" id="submitBtn" disabled>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
Create Replication Rules
</button>
<a href="{{ url_for('ui.sites_dashboard') }}" class="btn btn-outline-secondary">
Skip for Now
</a>
</div>
</form>
{% else %}
<div class="empty-state text-center py-5">
<div class="empty-state-icon mx-auto mb-3">
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" fill="currentColor" viewBox="0 0 16 16">
<path d="M2.522 5H2a.5.5 0 0 0-.494.574l1.372 9.149A1.5 1.5 0 0 0 4.36 16h7.278a1.5 1.5 0 0 0 1.483-1.277l1.373-9.149A.5.5 0 0 0 14 5h-.522A5.5 5.5 0 0 0 2.522 5zm1.005 0a4.5 4.5 0 0 1 8.945 0H3.527z"/>
</svg>
</div>
<h5 class="fw-semibold mb-2">No buckets yet</h5>
<p class="text-muted mb-3">Create some buckets first, then come back to set up replication.</p>
<a href="{{ url_for('ui.buckets_overview') }}" class="btn btn-primary">
Go to Buckets
</a>
</div>
{% endif %}
</div>
</div>
</div>
</div>
<script>
(function() {
const selectAllCheckbox = document.getElementById('selectAll');
const bucketCheckboxes = document.querySelectorAll('.bucket-checkbox:not(:disabled)');
const submitBtn = document.getElementById('submitBtn');
const modeSelect = document.getElementById('mode');
const bidirWarning = document.getElementById('bidirWarning');
function updateBidirWarning() {
if (modeSelect && bidirWarning) {
if (modeSelect.value === 'bidirectional') {
bidirWarning.classList.remove('d-none');
} else {
bidirWarning.classList.add('d-none');
}
}
}
if (modeSelect) {
modeSelect.addEventListener('change', updateBidirWarning);
updateBidirWarning();
}
function updateSubmitButton() {
const checkedCount = document.querySelectorAll('.bucket-checkbox:checked').length;
if (submitBtn) {
submitBtn.disabled = checkedCount === 0;
const text = checkedCount > 0
? `Create ${checkedCount} Replication Rule${checkedCount > 1 ? 's' : ''}`
: 'Create Replication Rules';
submitBtn.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
${text}
`;
}
}
function updateSelectAll() {
if (selectAllCheckbox && bucketCheckboxes.length > 0) {
const allChecked = Array.from(bucketCheckboxes).every(cb => cb.checked);
const someChecked = Array.from(bucketCheckboxes).some(cb => cb.checked);
selectAllCheckbox.checked = allChecked;
selectAllCheckbox.indeterminate = someChecked && !allChecked;
}
}
if (selectAllCheckbox) {
selectAllCheckbox.addEventListener('change', function() {
bucketCheckboxes.forEach(cb => {
cb.checked = this.checked;
});
updateSubmitButton();
});
}
bucketCheckboxes.forEach(cb => {
cb.addEventListener('change', function() {
updateSelectAll();
updateSubmitButton();
});
});
updateSelectAll();
updateSubmitButton();
})();
</script>
{% endblock %}

742
templates/sites.html Normal file
View File

@@ -0,0 +1,742 @@
{% extends "base.html" %}
{% block title %}Sites - S3 Compatible Storage{% endblock %}
{% block content %}
<div class="page-header d-flex justify-content-between align-items-center mb-4">
<div>
<p class="text-uppercase text-muted small mb-1">Geo-Distribution</p>
<h1 class="h3 mb-1 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
</svg>
Site Registry
</h1>
<p class="text-muted mb-0 mt-1">Configure this site's identity and manage peer sites for geo-distribution.</p>
</div>
<div class="d-none d-md-block">
<span class="badge bg-primary bg-opacity-10 text-primary fs-6 px-3 py-2">
{{ peers|length }} peer{{ 's' if peers|length != 1 else '' }}
</span>
</div>
</div>
<div class="row g-4">
<div class="col-lg-4 col-md-5">
<div class="card shadow-sm border-0 mb-4" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M8 16s6-5.686 6-10A6 6 0 0 0 2 6c0 4.314 6 10 6 10zm0-7a3 3 0 1 1 0-6 3 3 0 0 1 0 6z"/>
</svg>
Local Site Identity
</h5>
<p class="text-muted small mb-0">This site's configuration</p>
</div>
<div class="card-body px-4 pb-4">
<form method="POST" action="{{ url_for('ui.update_local_site') }}">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="mb-3">
<label for="site_id" class="form-label fw-medium">Site ID</label>
<input type="text" class="form-control" id="site_id" name="site_id" required
value="{{ local_site.site_id if local_site else config_site_id or '' }}"
placeholder="us-west-1">
<div class="form-text">Unique identifier for this site</div>
</div>
<div class="mb-3">
<label for="endpoint" class="form-label fw-medium">Endpoint URL</label>
<input type="url" class="form-control" id="endpoint" name="endpoint"
value="{{ local_site.endpoint if local_site else config_site_endpoint or '' }}"
placeholder="https://s3.us-west-1.example.com">
<div class="form-text">Public URL for this site</div>
</div>
<div class="mb-3">
<label for="region" class="form-label fw-medium">Region</label>
<input type="text" class="form-control" id="region" name="region"
value="{{ local_site.region if local_site else config_site_region }}">
</div>
<div class="row mb-3">
<div class="col-6">
<label for="priority" class="form-label fw-medium">Priority</label>
<input type="number" class="form-control" id="priority" name="priority"
value="{{ local_site.priority if local_site else 100 }}" min="0">
<div class="form-text">Lower = preferred</div>
</div>
<div class="col-6">
<label for="display_name" class="form-label fw-medium">Display Name</label>
<input type="text" class="form-control" id="display_name" name="display_name"
value="{{ local_site.display_name if local_site else '' }}"
placeholder="US West Primary">
</div>
</div>
<div class="d-grid">
<button type="submit" class="btn btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
Save Local Site
</button>
</div>
</form>
</div>
</div>
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 2a.5.5 0 0 1 .5.5v5h5a.5.5 0 0 1 0 1h-5v5a.5.5 0 0 1-1 0v-5h-5a.5.5 0 0 1 0-1h5v-5A.5.5 0 0 1 8 2Z"/>
</svg>
Add Peer Site
</h5>
<p class="text-muted small mb-0">Register a remote site</p>
</div>
<div class="card-body px-4 pb-4">
<form method="POST" action="{{ url_for('ui.add_peer_site') }}">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="mb-3">
<label for="peer_site_id" class="form-label fw-medium">Site ID</label>
<input type="text" class="form-control" id="peer_site_id" name="site_id" required placeholder="us-east-1">
</div>
<div class="mb-3">
<label for="peer_endpoint" class="form-label fw-medium">Endpoint URL</label>
<input type="url" class="form-control" id="peer_endpoint" name="endpoint" required placeholder="https://s3.us-east-1.example.com">
</div>
<div class="mb-3">
<label for="peer_region" class="form-label fw-medium">Region</label>
<input type="text" class="form-control" id="peer_region" name="region" value="us-east-1">
</div>
<div class="row mb-3">
<div class="col-6">
<label for="peer_priority" class="form-label fw-medium">Priority</label>
<input type="number" class="form-control" id="peer_priority" name="priority" value="100" min="0">
</div>
<div class="col-6">
<label for="peer_display_name" class="form-label fw-medium">Display Name</label>
<input type="text" class="form-control" id="peer_display_name" name="display_name" placeholder="US East DR">
</div>
</div>
<div class="mb-3">
<label for="peer_connection_id" class="form-label fw-medium">Connection</label>
<select class="form-select" id="peer_connection_id" name="connection_id">
<option value="">No connection</option>
{% for conn in connections %}
<option value="{{ conn.id }}">{{ conn.name }} ({{ conn.endpoint_url }})</option>
{% endfor %}
</select>
<div class="form-text">Link to a remote connection for health checks</div>
</div>
<div class="d-grid">
<button type="submit" class="btn btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 2a.5.5 0 0 1 .5.5v5h5a.5.5 0 0 1 0 1h-5v5a.5.5 0 0 1-1 0v-5h-5a.5.5 0 0 1 0-1h5v-5A.5.5 0 0 1 8 2Z"/>
</svg>
Add Peer Site
</button>
</div>
</form>
</div>
</div>
</div>
<div class="col-lg-8 col-md-7">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4 d-flex justify-content-between align-items-center">
<div>
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M6 3.5A1.5 1.5 0 0 1 7.5 2h1A1.5 1.5 0 0 1 10 3.5v1A1.5 1.5 0 0 1 8.5 6v1H14a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-1 0V8h-5v.5a.5.5 0 0 1-1 0V8h-5v.5a.5.5 0 0 1-1 0v-1A.5.5 0 0 1 2 7h5.5V6A1.5 1.5 0 0 1 6 4.5v-1zM8.5 5a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1zM0 11.5A1.5 1.5 0 0 1 1.5 10h1A1.5 1.5 0 0 1 4 11.5v1A1.5 1.5 0 0 1 2.5 14h-1A1.5 1.5 0 0 1 0 12.5v-1zm1.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1zm4.5.5A1.5 1.5 0 0 1 7.5 10h1a1.5 1.5 0 0 1 1.5 1.5v1A1.5 1.5 0 0 1 8.5 14h-1A1.5 1.5 0 0 1 6 12.5v-1zm1.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1zm4.5.5a1.5 1.5 0 0 1 1.5-1.5h1a1.5 1.5 0 0 1 1.5 1.5v1a1.5 1.5 0 0 1-1.5 1.5h-1a1.5 1.5 0 0 1-1.5-1.5v-1zm1.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1z"/>
</svg>
Peer Sites
</h5>
<p class="text-muted small mb-0">Known remote sites in the cluster</p>
</div>
</div>
<div class="card-body px-4 pb-4">
{% if peers %}
<div class="table-responsive">
<table class="table table-hover align-middle mb-0">
<thead class="table-light">
<tr>
<th scope="col" style="width: 50px;">Health</th>
<th scope="col">Site ID</th>
<th scope="col">Endpoint</th>
<th scope="col">Region</th>
<th scope="col">Priority</th>
<th scope="col">Sync Status</th>
<th scope="col" class="text-end">Actions</th>
</tr>
</thead>
<tbody>
{% for item in peers_with_stats %}
{% set peer = item.peer %}
<tr data-site-id="{{ peer.site_id }}">
<td class="text-center">
<span class="peer-health-status" data-site-id="{{ peer.site_id }}" title="{% if peer.is_healthy == true %}Healthy{% elif peer.is_healthy == false %}Unhealthy{% else %}Unknown{% endif %}">
{% if peer.is_healthy == true %}
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-success" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
</svg>
{% elif peer.is_healthy == false %}
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-danger" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>
{% else %}
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
<path d="M5.255 5.786a.237.237 0 0 0 .241.247h.825c.138 0 .248-.113.266-.25.09-.656.54-1.134 1.342-1.134.686 0 1.314.343 1.314 1.168 0 .635-.374.927-.965 1.371-.673.489-1.206 1.06-1.168 1.987l.003.217a.25.25 0 0 0 .25.246h.811a.25.25 0 0 0 .25-.25v-.105c0-.718.273-.927 1.01-1.486.609-.463 1.244-.977 1.244-2.056 0-1.511-1.276-2.241-2.673-2.241-1.267 0-2.655.59-2.75 2.286zm1.557 5.763c0 .533.425.927 1.01.927.609 0 1.028-.394 1.028-.927 0-.552-.42-.94-1.029-.94-.584 0-1.009.388-1.009.94z"/>
</svg>
{% endif %}
</span>
</td>
<td>
<div class="d-flex align-items-center gap-2">
<div class="peer-icon">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5z"/>
</svg>
</div>
<div>
<span class="fw-medium">{{ peer.display_name or peer.site_id }}</span>
{% if peer.display_name and peer.display_name != peer.site_id %}
<br><small class="text-muted">{{ peer.site_id }}</small>
{% endif %}
</div>
</div>
</td>
<td>
<span class="text-muted small text-truncate d-inline-block" style="max-width: 180px;" title="{{ peer.endpoint }}">{{ peer.endpoint }}</span>
</td>
<td><span class="badge bg-primary bg-opacity-10 text-primary">{{ peer.region }}</span></td>
<td><span class="badge bg-secondary bg-opacity-10 text-secondary">{{ peer.priority }}</span></td>
<td class="sync-stats-cell" data-site-id="{{ peer.site_id }}">
{% if item.has_connection %}
<div class="d-flex align-items-center gap-2">
<span class="badge bg-primary bg-opacity-10 text-primary">{{ item.buckets_syncing }} bucket{{ 's' if item.buckets_syncing != 1 else '' }}</span>
{% if item.has_bidirectional %}
<span class="bidir-status-icon" data-site-id="{{ peer.site_id }}" title="Bidirectional sync configured - click to verify">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="text-info" viewBox="0 0 16 16" style="cursor: pointer;">
<path fill-rule="evenodd" d="M1 11.5a.5.5 0 0 0 .5.5h11.793l-3.147 3.146a.5.5 0 0 0 .708.708l4-4a.5.5 0 0 0 0-.708l-4-4a.5.5 0 0 0-.708.708L13.293 11H1.5a.5.5 0 0 0-.5.5zm14-7a.5.5 0 0 1-.5.5H2.707l3.147 3.146a.5.5 0 1 1-.708.708l-4-4a.5.5 0 0 1 0-.708l4-4a.5.5 0 1 1 .708.708L2.707 4H14.5a.5.5 0 0 1 .5.5z"/>
</svg>
</span>
{% endif %}
{% if item.buckets_syncing > 0 %}
<button type="button" class="btn btn-sm btn-outline-secondary btn-load-stats py-0 px-1"
data-site-id="{{ peer.site_id }}" title="Load sync details">
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 0 4.546 2.914.5.5 0 0 1 .908-.417A6 6 0 1 1 8 2v1z"/>
<path d="M8 4.466V.534a.25.25 0 0 1 .41-.192l2.36 1.966c.12.1.12.284 0 .384L8.41 4.658A.25.25 0 0 1 8 4.466z"/>
</svg>
</button>
{% endif %}
</div>
<div class="sync-stats-detail d-none mt-2 small" id="stats-{{ peer.site_id }}">
<span class="spinner-border spinner-border-sm text-muted" style="width: 12px; height: 12px;"></span>
</div>
{% else %}
<span class="text-muted small">No connection</span>
{% endif %}
</td>
<td class="text-end">
<div class="btn-group btn-group-sm" role="group">
<a href="{{ url_for('ui.replication_wizard', site_id=peer.site_id) }}"
class="btn btn-outline-primary {% if not item.has_connection %}disabled{% endif %}"
title="Set up replication">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/>
<path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/>
</svg>
</a>
<button type="button" class="btn btn-outline-info btn-check-bidir {% if not item.has_connection %}disabled{% endif %}"
data-site-id="{{ peer.site_id }}"
data-display-name="{{ peer.display_name or peer.site_id }}"
title="Check bidirectional sync status">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M1 11.5a.5.5 0 0 0 .5.5h11.793l-3.147 3.146a.5.5 0 0 0 .708.708l4-4a.5.5 0 0 0 0-.708l-4-4a.5.5 0 0 0-.708.708L13.293 11H1.5a.5.5 0 0 0-.5.5zm14-7a.5.5 0 0 1-.5.5H2.707l3.147 3.146a.5.5 0 1 1-.708.708l-4-4a.5.5 0 0 1 0-.708l4-4a.5.5 0 1 1 .708.708L2.707 4H14.5a.5.5 0 0 1 .5.5z"/>
</svg>
</button>
<button type="button" class="btn btn-outline-secondary btn-check-health"
data-site-id="{{ peer.site_id }}"
title="Check health">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M11.251.068a.5.5 0 0 1 .227.58L9.677 6.5H13a.5.5 0 0 1 .364.843l-8 8.5a.5.5 0 0 1-.842-.49L6.323 9.5H3a.5.5 0 0 1-.364-.843l8-8.5a.5.5 0 0 1 .615-.09z"/>
</svg>
</button>
<button type="button" class="btn btn-outline-secondary"
data-bs-toggle="modal"
data-bs-target="#editPeerModal"
data-site-id="{{ peer.site_id }}"
data-endpoint="{{ peer.endpoint }}"
data-region="{{ peer.region }}"
data-priority="{{ peer.priority }}"
data-display-name="{{ peer.display_name }}"
data-connection-id="{{ peer.connection_id or '' }}"
title="Edit peer">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/>
</svg>
</button>
<button type="button" class="btn btn-outline-danger"
data-bs-toggle="modal"
data-bs-target="#deletePeerModal"
data-site-id="{{ peer.site_id }}"
data-display-name="{{ peer.display_name or peer.site_id }}"
title="Delete peer">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M5.5 5.5A.5.5 0 0 1 6 6v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm3 .5a.5.5 0 0 0-1 0v6a.5.5 0 0 0 1 0V6z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
</button>
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="empty-state text-center py-5">
<div class="empty-state-icon mx-auto mb-3">
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" fill="currentColor" viewBox="0 0 16 16">
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472z"/>
</svg>
</div>
<h5 class="fw-semibold mb-2">No peer sites yet</h5>
<p class="text-muted mb-0">Add peer sites to enable geo-distribution and site-to-site replication.</p>
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="modal fade" id="editPeerModal" tabindex="-1" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered">
<div class="modal-content">
<div class="modal-header border-0 pb-0">
<h5 class="modal-title fw-semibold">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
</svg>
Edit Peer Site
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<form method="POST" id="editPeerForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="modal-body">
<div class="mb-3">
<label class="form-label fw-medium">Site ID</label>
<input type="text" class="form-control" id="edit_site_id" readonly>
</div>
<div class="mb-3">
<label for="edit_endpoint" class="form-label fw-medium">Endpoint URL</label>
<input type="url" class="form-control" id="edit_endpoint" name="endpoint" required>
</div>
<div class="mb-3">
<label for="edit_region" class="form-label fw-medium">Region</label>
<input type="text" class="form-control" id="edit_region" name="region" required>
</div>
<div class="row mb-3">
<div class="col-6">
<label for="edit_priority" class="form-label fw-medium">Priority</label>
<input type="number" class="form-control" id="edit_priority" name="priority" min="0">
</div>
<div class="col-6">
<label for="edit_display_name" class="form-label fw-medium">Display Name</label>
<input type="text" class="form-control" id="edit_display_name" name="display_name">
</div>
</div>
<div class="mb-3">
<label for="edit_connection_id" class="form-label fw-medium">Connection</label>
<select class="form-select" id="edit_connection_id" name="connection_id">
<option value="">No connection</option>
{% for conn in connections %}
<option value="{{ conn.id }}">{{ conn.name }} ({{ conn.endpoint_url }})</option>
{% endfor %}
</select>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" data-bs-dismiss="modal">Cancel</button>
<button type="submit" class="btn btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M10.97 4.97a.75.75 0 0 1 1.07 1.05l-3.99 4.99a.75.75 0 0 1-1.08.02L4.324 8.384a.75.75 0 1 1 1.06-1.06l2.094 2.093 3.473-4.425a.267.267 0 0 1 .02-.022z"/>
</svg>
Save
</button>
</div>
</form>
</div>
</div>
</div>
<div class="modal fade" id="deletePeerModal" tabindex="-1" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered">
<div class="modal-content">
<div class="modal-header border-0 pb-0">
<h5 class="modal-title fw-semibold">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-danger" viewBox="0 0 16 16">
<path d="M5.5 5.5A.5.5 0 0 1 6 6v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm3 .5a.5.5 0 0 0-1 0v6a.5.5 0 0 0 1 0V6z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
Delete Peer Site
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body">
<p>Are you sure you want to delete <strong id="deletePeerName"></strong>?</p>
<div class="alert alert-warning d-flex align-items-start small" role="alert">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="flex-shrink-0 me-2 mt-0" viewBox="0 0 16 16">
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
</svg>
<div>This will remove the peer from the site registry. Any site sync configurations may be affected.</div>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" data-bs-dismiss="modal">Cancel</button>
<form method="POST" id="deletePeerForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<button type="submit" class="btn btn-danger">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M5.5 5.5A.5.5 0 0 1 6 6v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0V6a.5.5 0 0 1 .5-.5zm3 .5a.5.5 0 0 0-1 0v6a.5.5 0 0 0 1 0V6z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
Delete
</button>
</form>
</div>
</div>
</div>
</div>
<div class="modal fade" id="bidirStatusModal" tabindex="-1" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered modal-lg">
<div class="modal-content">
<div class="modal-header border-0 pb-0">
<h5 class="modal-title fw-semibold">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-info me-2" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M1 11.5a.5.5 0 0 0 .5.5h11.793l-3.147 3.146a.5.5 0 0 0 .708.708l4-4a.5.5 0 0 0 0-.708l-4-4a.5.5 0 0 0-.708.708L13.293 11H1.5a.5.5 0 0 0-.5.5zm14-7a.5.5 0 0 1-.5.5H2.707l3.147 3.146a.5.5 0 1 1-.708.708l-4-4a.5.5 0 0 1 0-.708l4-4a.5.5 0 1 1 .708.708L2.707 4H14.5a.5.5 0 0 1 .5.5z"/>
</svg>
Bidirectional Sync Status
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body">
<div id="bidirStatusContent">
<div class="text-center py-4">
<span class="spinner-border text-primary" role="status"></span>
<p class="text-muted mt-2 mb-0">Checking configuration...</p>
</div>
</div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-outline-secondary" data-bs-dismiss="modal">Close</button>
<a href="#" id="bidirWizardLink" class="btn btn-primary d-none">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M9.828.722a.5.5 0 0 1 .354.146l4.95 4.95a.5.5 0 0 1 0 .707c-.48.48-1.072.588-1.503.588-.177 0-.335-.018-.46-.039l-3.134 3.134a5.927 5.927 0 0 1 .16 1.013c.046.702-.032 1.687-.72 2.375a.5.5 0 0 1-.707 0l-2.829-2.828-3.182 3.182c-.195.195-1.219.902-1.414.707-.195-.195.512-1.22.707-1.414l3.182-3.182-2.828-2.829a.5.5 0 0 1 0-.707c.688-.688 1.673-.767 2.375-.72a5.922 5.922 0 0 1 1.013.16l3.134-3.133a2.772 2.772 0 0 1-.04-.461c0-.43.108-1.022.589-1.503a.5.5 0 0 1 .353-.146z"/>
</svg>
Run Setup Wizard
</a>
</div>
</div>
</div>
</div>
<script>
(function() {
const editPeerModal = document.getElementById('editPeerModal');
if (editPeerModal) {
editPeerModal.addEventListener('show.bs.modal', function (event) {
const button = event.relatedTarget;
const siteId = button.getAttribute('data-site-id');
const endpoint = button.getAttribute('data-endpoint');
const region = button.getAttribute('data-region');
const priority = button.getAttribute('data-priority');
const displayName = button.getAttribute('data-display-name');
const connectionId = button.getAttribute('data-connection-id');
document.getElementById('edit_site_id').value = siteId;
document.getElementById('edit_endpoint').value = endpoint;
document.getElementById('edit_region').value = region;
document.getElementById('edit_priority').value = priority;
document.getElementById('edit_display_name').value = displayName;
document.getElementById('edit_connection_id').value = connectionId;
document.getElementById('editPeerForm').action = '/ui/sites/peers/' + encodeURIComponent(siteId) + '/update';
});
}
const deletePeerModal = document.getElementById('deletePeerModal');
if (deletePeerModal) {
deletePeerModal.addEventListener('show.bs.modal', function (event) {
const button = event.relatedTarget;
const siteId = button.getAttribute('data-site-id');
const displayName = button.getAttribute('data-display-name');
document.getElementById('deletePeerName').textContent = displayName;
document.getElementById('deletePeerForm').action = '/ui/sites/peers/' + encodeURIComponent(siteId) + '/delete';
});
}
document.querySelectorAll('.btn-check-health').forEach(function(btn) {
btn.addEventListener('click', function() {
const siteId = this.getAttribute('data-site-id');
const statusSpan = document.querySelector('.peer-health-status[data-site-id="' + siteId + '"]');
statusSpan.innerHTML = '<span class="spinner-border spinner-border-sm text-muted" role="status" style="width: 14px; height: 14px;"></span>';
fetch('/ui/sites/peers/' + encodeURIComponent(siteId) + '/health')
.then(response => response.json())
.then(data => {
if (data.is_healthy) {
statusSpan.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-success" viewBox="0 0 16 16"><path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/></svg>';
statusSpan.title = 'Healthy';
if (window.showToast) window.showToast('Peer site is healthy', 'Health Check', 'success');
} else {
statusSpan.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-danger" viewBox="0 0 16 16"><path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/></svg>';
statusSpan.title = 'Unhealthy' + (data.error ? ': ' + data.error : '');
if (window.showToast) window.showToast(data.error || 'Peer site is unhealthy', 'Health Check', 'error');
}
})
.catch(err => {
statusSpan.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-muted" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="M5.255 5.786a.237.237 0 0 0 .241.247h.825c.138 0 .248-.113.266-.25.09-.656.54-1.134 1.342-1.134.686 0 1.314.343 1.314 1.168 0 .635-.374.927-.965 1.371-.673.489-1.206 1.06-1.168 1.987l.003.217a.25.25 0 0 0 .25.246h.811a.25.25 0 0 0 .25-.25v-.105c0-.718.273-.927 1.01-1.486.609-.463 1.244-.977 1.244-2.056 0-1.511-1.276-2.241-2.673-2.241-1.267 0-2.655.59-2.75 2.286zm1.557 5.763c0 .533.425.927 1.01.927.609 0 1.028-.394 1.028-.927 0-.552-.42-.94-1.029-.94-.584 0-1.009.388-1.009.94z"/></svg>';
statusSpan.title = 'Check failed';
if (window.showToast) window.showToast('Failed to check health', 'Health Check', 'error');
});
});
});
document.querySelectorAll('.btn-load-stats').forEach(function(btn) {
btn.addEventListener('click', function() {
const siteId = this.getAttribute('data-site-id');
const detailDiv = document.getElementById('stats-' + siteId);
if (!detailDiv) return;
detailDiv.classList.remove('d-none');
detailDiv.innerHTML = '<span class="spinner-border spinner-border-sm text-muted" style="width: 12px; height: 12px;"></span> Loading...';
fetch('/ui/sites/peers/' + encodeURIComponent(siteId) + '/sync-stats')
.then(response => response.json())
.then(data => {
if (data.error) {
detailDiv.innerHTML = '<span class="text-danger">' + data.error + '</span>';
} else {
const lastSync = data.last_sync_at
? new Date(data.last_sync_at * 1000).toLocaleString()
: 'Never';
detailDiv.innerHTML = `
<div class="d-flex flex-wrap gap-2 mb-1">
<span class="text-success"><strong>${data.objects_synced}</strong> synced</span>
<span class="text-warning"><strong>${data.objects_pending}</strong> pending</span>
<span class="text-danger"><strong>${data.objects_failed}</strong> failed</span>
</div>
<div class="text-muted" style="font-size: 0.75rem;">
Last sync: ${lastSync}
</div>
`;
}
})
.catch(err => {
detailDiv.innerHTML = '<span class="text-danger">Failed to load stats</span>';
});
});
});
document.querySelectorAll('.bidir-status-icon').forEach(function(icon) {
icon.addEventListener('click', function() {
const siteId = this.getAttribute('data-site-id');
const btn = document.querySelector('.btn-check-bidir[data-site-id="' + siteId + '"]');
if (btn) btn.click();
});
});
document.querySelectorAll('.btn-check-bidir').forEach(function(btn) {
btn.addEventListener('click', function() {
const siteId = this.getAttribute('data-site-id');
const displayName = this.getAttribute('data-display-name');
const modal = new bootstrap.Modal(document.getElementById('bidirStatusModal'));
const contentDiv = document.getElementById('bidirStatusContent');
const wizardLink = document.getElementById('bidirWizardLink');
contentDiv.innerHTML = `
<div class="text-center py-4">
<span class="spinner-border text-primary" role="status"></span>
<p class="text-muted mt-2 mb-0">Checking bidirectional configuration with ${displayName}...</p>
</div>
`;
wizardLink.classList.add('d-none');
modal.show();
fetch('/ui/sites/peers/' + encodeURIComponent(siteId) + '/bidirectional-status')
.then(response => response.json())
.then(data => {
let html = '';
if (data.is_fully_configured) {
html += `
<div class="alert alert-success d-flex align-items-center mb-4" role="alert">
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="flex-shrink-0 me-2" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
</svg>
<div>
<strong>Bidirectional sync is fully configured!</strong><br>
<small>Both sites are set up to sync data in both directions.</small>
</div>
</div>
`;
} else if (data.issues && data.issues.length > 0) {
const errors = data.issues.filter(i => i.severity === 'error');
const warnings = data.issues.filter(i => i.severity === 'warning');
if (errors.length > 0) {
html += `
<div class="alert alert-danger mb-3" role="alert">
<h6 class="alert-heading fw-bold mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>
Configuration Errors
</h6>
<ul class="mb-0 ps-3">
`;
errors.forEach(issue => {
html += `<li><strong>${issue.code}:</strong> ${issue.message}</li>`;
});
html += '</ul></div>';
}
if (warnings.length > 0) {
html += `
<div class="alert alert-warning mb-3" role="alert">
<h6 class="alert-heading fw-bold mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/>
</svg>
Warnings
</h6>
<ul class="mb-0 ps-3">
`;
warnings.forEach(issue => {
html += `<li><strong>${issue.code}:</strong> ${issue.message}</li>`;
});
html += '</ul></div>';
}
}
html += '<div class="row g-3">';
html += `
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-light py-2">
<strong>This Site (Local)</strong>
</div>
<div class="card-body small">
<p class="mb-1"><strong>Site ID:</strong> ${data.local_site_id || '<span class="text-danger">Not configured</span>'}</p>
<p class="mb-1"><strong>Endpoint:</strong> ${data.local_endpoint || '<span class="text-danger">Not configured</span>'}</p>
<p class="mb-1"><strong>Site Sync Worker:</strong> ${data.local_site_sync_enabled ? '<span class="text-success">Enabled</span>' : '<span class="text-warning">Disabled</span>'}</p>
<p class="mb-0"><strong>Bidirectional Rules:</strong> ${data.local_bidirectional_rules ? data.local_bidirectional_rules.length : 0}</p>
</div>
</div>
</div>
`;
if (data.remote_status) {
const rs = data.remote_status;
html += `
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-light py-2">
<strong>Remote Site (${displayName})</strong>
</div>
<div class="card-body small">
`;
if (rs.admin_access_denied) {
html += '<p class="text-warning mb-0">Admin access denied - cannot verify remote configuration</p>';
} else if (rs.reachable === false) {
html += '<p class="text-danger mb-0">Could not reach remote admin API</p>';
} else {
html += `
<p class="mb-1"><strong>Has Peer Entry for Us:</strong> ${rs.has_peer_for_us ? '<span class="text-success">Yes</span>' : '<span class="text-danger">No</span>'}</p>
<p class="mb-1"><strong>Connection Configured:</strong> ${rs.peer_connection_configured ? '<span class="text-success">Yes</span>' : '<span class="text-danger">No</span>'}</p>
`;
}
html += '</div></div></div>';
} else {
html += `
<div class="col-md-6">
<div class="card h-100">
<div class="card-header bg-light py-2">
<strong>Remote Site (${displayName})</strong>
</div>
<div class="card-body small">
<p class="text-muted mb-0">Could not check remote status</p>
</div>
</div>
</div>
`;
}
html += '</div>';
if (data.local_bidirectional_rules && data.local_bidirectional_rules.length > 0) {
html += `
<div class="mt-3">
<h6 class="fw-semibold">Local Bidirectional Rules</h6>
<table class="table table-sm table-bordered mb-0">
<thead class="table-light">
<tr>
<th>Source Bucket</th>
<th>Target Bucket</th>
<th>Status</th>
</tr>
</thead>
<tbody>
`;
data.local_bidirectional_rules.forEach(rule => {
html += `
<tr>
<td>${rule.bucket_name}</td>
<td>${rule.target_bucket}</td>
<td>${rule.enabled ? '<span class="badge bg-success">Enabled</span>' : '<span class="badge bg-secondary">Disabled</span>'}</td>
</tr>
`;
});
html += '</tbody></table></div>';
}
if (!data.is_fully_configured) {
html += `
<div class="alert alert-info mt-3 mb-0" role="alert">
<h6 class="alert-heading fw-bold">How to Fix</h6>
<ol class="mb-0 ps-3">
<li>Ensure this site has a Site ID and Endpoint URL configured</li>
<li>On the remote site, register this site as a peer with a connection</li>
<li>Create bidirectional replication rules on both sites</li>
<li>Enable SITE_SYNC_ENABLED=true on both sites</li>
</ol>
</div>
`;
const blockingErrors = ['NO_CONNECTION', 'CONNECTION_NOT_FOUND', 'REMOTE_UNREACHABLE', 'ENDPOINT_NOT_ALLOWED'];
const hasBlockingError = data.issues && data.issues.some(i => blockingErrors.includes(i.code));
if (!hasBlockingError) {
wizardLink.href = '/ui/sites/peers/' + encodeURIComponent(siteId) + '/replication-wizard';
wizardLink.classList.remove('d-none');
}
}
contentDiv.innerHTML = html;
})
.catch(err => {
contentDiv.innerHTML = `
<div class="alert alert-danger" role="alert">
<strong>Error:</strong> Failed to check bidirectional status. ${err.message || ''}
</div>
`;
});
});
});
})();
</script>
{% endblock %}

View File

@@ -1,191 +0,0 @@
import hashlib
import hmac
import pytest
from datetime import datetime, timedelta, timezone
from urllib.parse import quote
def _sign(key, msg):
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
def _get_signature_key(key, date_stamp, region_name, service_name):
k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp)
k_region = _sign(k_date, region_name)
k_service = _sign(k_region, service_name)
k_signing = _sign(k_service, "aws4_request")
return k_signing
def create_signed_headers(
method,
path,
headers=None,
body=None,
access_key="test",
secret_key="secret",
region="us-east-1",
service="s3",
timestamp=None
):
if headers is None:
headers = {}
if timestamp is None:
now = datetime.now(timezone.utc)
else:
now = timestamp
amz_date = now.strftime("%Y%m%dT%H%M%SZ")
date_stamp = now.strftime("%Y%m%d")
headers["X-Amz-Date"] = amz_date
headers["Host"] = "testserver"
canonical_uri = quote(path, safe="/-_.~")
canonical_query_string = ""
canonical_headers = ""
signed_headers_list = []
for k, v in sorted(headers.items(), key=lambda x: x[0].lower()):
canonical_headers += f"{k.lower()}:{v.strip()}\n"
signed_headers_list.append(k.lower())
signed_headers = ";".join(signed_headers_list)
payload_hash = hashlib.sha256(body or b"").hexdigest()
headers["X-Amz-Content-Sha256"] = payload_hash
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{payload_hash}"
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
headers["Authorization"] = (
f"AWS4-HMAC-SHA256 Credential={access_key}/{credential_scope}, "
f"SignedHeaders={signed_headers}, Signature={signature}"
)
return headers
def test_sigv4_old_date(client):
# Test with a date 20 minutes in the past
old_time = datetime.now(timezone.utc) - timedelta(minutes=20)
headers = create_signed_headers("GET", "/", timestamp=old_time)
response = client.get("/", headers=headers)
assert response.status_code == 403
assert b"Request timestamp too old" in response.data
def test_sigv4_future_date(client):
# Test with a date 20 minutes in the future
future_time = datetime.now(timezone.utc) + timedelta(minutes=20)
headers = create_signed_headers("GET", "/", timestamp=future_time)
response = client.get("/", headers=headers)
assert response.status_code == 403
assert b"Request timestamp too old" in response.data # The error message is the same
def test_path_traversal_in_key(client, signer):
headers = signer("PUT", "/test-bucket")
client.put("/test-bucket", headers=headers)
# Try to upload with .. in key
headers = signer("PUT", "/test-bucket/../secret.txt", body=b"attack")
response = client.put("/test-bucket/../secret.txt", headers=headers, data=b"attack")
# Should be rejected by storage layer or flask routing
# Flask might normalize it before it reaches the app, but if it reaches, it should fail.
# If Flask normalizes /test-bucket/../secret.txt to /secret.txt, then it hits 404 (bucket not found) or 403.
# But we want to test the storage layer check.
# We can try to encode the dots?
# If we use a key that doesn't get normalized by Flask routing easily.
# But wait, the route is /<bucket_name>/<path:object_key>
# If I send /test-bucket/folder/../file.txt, Flask might pass "folder/../file.txt" as object_key?
# Let's try.
headers = signer("PUT", "/test-bucket/folder/../file.txt", body=b"attack")
response = client.put("/test-bucket/folder/../file.txt", headers=headers, data=b"attack")
# If Flask normalizes it, it becomes /test-bucket/file.txt.
# If it doesn't, it hits our check.
# Let's try to call the storage method directly to verify the check works,
# because testing via client depends on Flask's URL handling.
pass
def test_storage_path_traversal(app):
storage = app.extensions["object_storage"]
from app.storage import StorageError, ObjectStorage
from app.encrypted_storage import EncryptedObjectStorage
# Get the underlying ObjectStorage if wrapped
if isinstance(storage, EncryptedObjectStorage):
storage = storage.storage
with pytest.raises(StorageError, match="Object key contains parent directory references"):
storage._sanitize_object_key("folder/../file.txt")
with pytest.raises(StorageError, match="Object key contains parent directory references"):
storage._sanitize_object_key("..")
def test_head_bucket(client, signer):
headers = signer("PUT", "/head-test")
client.put("/head-test", headers=headers)
headers = signer("HEAD", "/head-test")
response = client.head("/head-test", headers=headers)
assert response.status_code == 200
headers = signer("HEAD", "/non-existent")
response = client.head("/non-existent", headers=headers)
assert response.status_code == 404
def test_head_object(client, signer):
headers = signer("PUT", "/head-obj-test")
client.put("/head-obj-test", headers=headers)
headers = signer("PUT", "/head-obj-test/obj", body=b"content")
client.put("/head-obj-test/obj", headers=headers, data=b"content")
headers = signer("HEAD", "/head-obj-test/obj")
response = client.head("/head-obj-test/obj", headers=headers)
assert response.status_code == 200
assert response.headers["ETag"]
assert response.headers["Content-Length"] == "7"
headers = signer("HEAD", "/head-obj-test/missing")
response = client.head("/head-obj-test/missing", headers=headers)
assert response.status_code == 404
def test_list_parts(client, signer):
# Create bucket
headers = signer("PUT", "/multipart-test")
client.put("/multipart-test", headers=headers)
# Initiate multipart upload
headers = signer("POST", "/multipart-test/obj?uploads")
response = client.post("/multipart-test/obj?uploads", headers=headers)
assert response.status_code == 200
from xml.etree.ElementTree import fromstring
upload_id = fromstring(response.data).find("UploadId").text
# Upload part 1
headers = signer("PUT", f"/multipart-test/obj?partNumber=1&uploadId={upload_id}", body=b"part1")
client.put(f"/multipart-test/obj?partNumber=1&uploadId={upload_id}", headers=headers, data=b"part1")
# Upload part 2
headers = signer("PUT", f"/multipart-test/obj?partNumber=2&uploadId={upload_id}", body=b"part2")
client.put(f"/multipart-test/obj?partNumber=2&uploadId={upload_id}", headers=headers, data=b"part2")
# List parts
headers = signer("GET", f"/multipart-test/obj?uploadId={upload_id}")
response = client.get(f"/multipart-test/obj?uploadId={upload_id}", headers=headers)
assert response.status_code == 200
root = fromstring(response.data)
assert root.tag == "ListPartsResult"
parts = root.findall("Part")
assert len(parts) == 2
assert parts[0].find("PartNumber").text == "1"
assert parts[1].find("PartNumber").text == "2"

460
tests/test_site_sync.py Normal file
View File

@@ -0,0 +1,460 @@
import io
import json
import time
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.connections import ConnectionStore, RemoteConnection
from app.replication import (
ReplicationManager,
ReplicationRule,
REPLICATION_MODE_BIDIRECTIONAL,
REPLICATION_MODE_NEW_ONLY,
)
from app.site_sync import (
SiteSyncWorker,
SyncState,
SyncedObjectInfo,
SiteSyncStats,
RemoteObjectMeta,
)
from app.storage import ObjectStorage
@pytest.fixture
def storage(tmp_path: Path):
storage_root = tmp_path / "data"
storage_root.mkdir(parents=True)
return ObjectStorage(storage_root)
@pytest.fixture
def connections(tmp_path: Path):
connections_path = tmp_path / "connections.json"
store = ConnectionStore(connections_path)
conn = RemoteConnection(
id="test-conn",
name="Test Remote",
endpoint_url="http://localhost:9000",
access_key="remote-access",
secret_key="remote-secret",
region="us-east-1",
)
store.add(conn)
return store
@pytest.fixture
def replication_manager(storage, connections, tmp_path):
rules_path = tmp_path / "replication_rules.json"
storage_root = tmp_path / "data"
storage_root.mkdir(exist_ok=True)
manager = ReplicationManager(storage, connections, rules_path, storage_root)
yield manager
manager.shutdown(wait=False)
@pytest.fixture
def site_sync_worker(storage, connections, replication_manager, tmp_path):
storage_root = tmp_path / "data"
worker = SiteSyncWorker(
storage=storage,
connections=connections,
replication_manager=replication_manager,
storage_root=storage_root,
interval_seconds=60,
batch_size=100,
)
yield worker
worker.shutdown()
class TestSyncedObjectInfo:
def test_to_dict(self):
info = SyncedObjectInfo(
last_synced_at=1234567890.0,
remote_etag="abc123",
source="remote",
)
result = info.to_dict()
assert result["last_synced_at"] == 1234567890.0
assert result["remote_etag"] == "abc123"
assert result["source"] == "remote"
def test_from_dict(self):
data = {
"last_synced_at": 9876543210.0,
"remote_etag": "def456",
"source": "local",
}
info = SyncedObjectInfo.from_dict(data)
assert info.last_synced_at == 9876543210.0
assert info.remote_etag == "def456"
assert info.source == "local"
class TestSyncState:
def test_to_dict(self):
state = SyncState(
synced_objects={
"test.txt": SyncedObjectInfo(
last_synced_at=1000.0,
remote_etag="etag1",
source="remote",
)
},
last_full_sync=2000.0,
)
result = state.to_dict()
assert "test.txt" in result["synced_objects"]
assert result["synced_objects"]["test.txt"]["remote_etag"] == "etag1"
assert result["last_full_sync"] == 2000.0
def test_from_dict(self):
data = {
"synced_objects": {
"file.txt": {
"last_synced_at": 3000.0,
"remote_etag": "etag2",
"source": "remote",
}
},
"last_full_sync": 4000.0,
}
state = SyncState.from_dict(data)
assert "file.txt" in state.synced_objects
assert state.synced_objects["file.txt"].remote_etag == "etag2"
assert state.last_full_sync == 4000.0
def test_from_dict_empty(self):
state = SyncState.from_dict({})
assert state.synced_objects == {}
assert state.last_full_sync is None
class TestSiteSyncStats:
def test_to_dict(self):
stats = SiteSyncStats(
last_sync_at=1234567890.0,
objects_pulled=10,
objects_skipped=5,
conflicts_resolved=2,
deletions_applied=1,
errors=0,
)
result = stats.to_dict()
assert result["objects_pulled"] == 10
assert result["objects_skipped"] == 5
assert result["conflicts_resolved"] == 2
assert result["deletions_applied"] == 1
assert result["errors"] == 0
class TestRemoteObjectMeta:
def test_from_s3_object(self):
obj = {
"Key": "test/file.txt",
"Size": 1024,
"LastModified": datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
"ETag": '"abc123def456"',
}
meta = RemoteObjectMeta.from_s3_object(obj)
assert meta.key == "test/file.txt"
assert meta.size == 1024
assert meta.last_modified == datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
assert meta.etag == "abc123def456"
class TestReplicationRuleBidirectional:
def test_rule_with_bidirectional_mode(self):
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=True,
mode=REPLICATION_MODE_BIDIRECTIONAL,
sync_deletions=True,
)
assert rule.mode == REPLICATION_MODE_BIDIRECTIONAL
assert rule.sync_deletions is True
assert rule.last_pull_at is None
def test_rule_to_dict_includes_new_fields(self):
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
sync_deletions=False,
last_pull_at=1234567890.0,
)
result = rule.to_dict()
assert result["mode"] == REPLICATION_MODE_BIDIRECTIONAL
assert result["sync_deletions"] is False
assert result["last_pull_at"] == 1234567890.0
def test_rule_from_dict_with_new_fields(self):
data = {
"bucket_name": "sync-bucket",
"target_connection_id": "test-conn",
"target_bucket": "remote-bucket",
"mode": REPLICATION_MODE_BIDIRECTIONAL,
"sync_deletions": False,
"last_pull_at": 1234567890.0,
}
rule = ReplicationRule.from_dict(data)
assert rule.mode == REPLICATION_MODE_BIDIRECTIONAL
assert rule.sync_deletions is False
assert rule.last_pull_at == 1234567890.0
def test_rule_from_dict_defaults_new_fields(self):
data = {
"bucket_name": "sync-bucket",
"target_connection_id": "test-conn",
"target_bucket": "remote-bucket",
}
rule = ReplicationRule.from_dict(data)
assert rule.sync_deletions is True
assert rule.last_pull_at is None
class TestSiteSyncWorker:
def test_start_and_shutdown(self, site_sync_worker):
site_sync_worker.start()
assert site_sync_worker._sync_thread is not None
assert site_sync_worker._sync_thread.is_alive()
site_sync_worker.shutdown()
assert not site_sync_worker._sync_thread.is_alive()
def test_trigger_sync_no_rule(self, site_sync_worker):
result = site_sync_worker.trigger_sync("nonexistent-bucket")
assert result is None
def test_trigger_sync_wrong_mode(self, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="new-only-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_NEW_ONLY,
enabled=True,
)
replication_manager.set_rule(rule)
result = site_sync_worker.trigger_sync("new-only-bucket")
assert result is None
def test_trigger_sync_disabled_rule(self, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="disabled-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=False,
)
replication_manager.set_rule(rule)
result = site_sync_worker.trigger_sync("disabled-bucket")
assert result is None
def test_get_stats_no_sync(self, site_sync_worker):
stats = site_sync_worker.get_stats("nonexistent")
assert stats is None
def test_resolve_conflict_remote_newer(self, site_sync_worker):
local_meta = MagicMock()
local_meta.last_modified = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta.etag = "local123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=datetime(2025, 1, 2, 12, 0, 0, tzinfo=timezone.utc),
etag="remote456",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "pull"
def test_resolve_conflict_local_newer(self, site_sync_worker):
local_meta = MagicMock()
local_meta.last_modified = datetime(2025, 1, 2, 12, 0, 0, tzinfo=timezone.utc)
local_meta.etag = "local123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
etag="remote456",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "keep"
def test_resolve_conflict_same_time_same_etag(self, site_sync_worker):
ts = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta = MagicMock()
local_meta.last_modified = ts
local_meta.etag = "same123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=ts,
etag="same123",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "skip"
def test_resolve_conflict_same_time_different_etag(self, site_sync_worker):
ts = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta = MagicMock()
local_meta.last_modified = ts
local_meta.etag = "aaa"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=ts,
etag="zzz",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "pull"
def test_sync_state_persistence(self, site_sync_worker, tmp_path):
bucket_name = "test-bucket"
state = SyncState(
synced_objects={
"file1.txt": SyncedObjectInfo(
last_synced_at=time.time(),
remote_etag="etag1",
source="remote",
)
},
last_full_sync=time.time(),
)
site_sync_worker._save_sync_state(bucket_name, state)
loaded = site_sync_worker._load_sync_state(bucket_name)
assert "file1.txt" in loaded.synced_objects
assert loaded.synced_objects["file1.txt"].remote_etag == "etag1"
def test_load_sync_state_nonexistent(self, site_sync_worker):
state = site_sync_worker._load_sync_state("nonexistent-bucket")
assert state.synced_objects == {}
assert state.last_full_sync is None
@patch("app.site_sync._create_sync_client")
def test_list_remote_objects(self, mock_create_client, site_sync_worker, connections, replication_manager):
mock_client = MagicMock()
mock_paginator = MagicMock()
mock_paginator.paginate.return_value = [
{
"Contents": [
{
"Key": "file1.txt",
"Size": 100,
"LastModified": datetime(2025, 1, 1, tzinfo=timezone.utc),
"ETag": '"etag1"',
},
{
"Key": "file2.txt",
"Size": 200,
"LastModified": datetime(2025, 1, 2, tzinfo=timezone.utc),
"ETag": '"etag2"',
},
]
}
]
mock_client.get_paginator.return_value = mock_paginator
mock_create_client.return_value = mock_client
rule = ReplicationRule(
bucket_name="local-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
)
conn = connections.get("test-conn")
result = site_sync_worker._list_remote_objects(rule, conn)
assert "file1.txt" in result
assert "file2.txt" in result
assert result["file1.txt"].size == 100
assert result["file2.txt"].size == 200
def test_list_local_objects(self, site_sync_worker, storage):
storage.create_bucket("test-bucket")
storage.put_object("test-bucket", "file1.txt", io.BytesIO(b"content1"))
storage.put_object("test-bucket", "file2.txt", io.BytesIO(b"content2"))
result = site_sync_worker._list_local_objects("test-bucket")
assert "file1.txt" in result
assert "file2.txt" in result
@patch("app.site_sync._create_sync_client")
def test_sync_bucket_connection_not_found(self, mock_create_client, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="test-bucket",
target_connection_id="missing-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=True,
)
replication_manager.set_rule(rule)
stats = site_sync_worker._sync_bucket(rule)
assert stats.errors == 1
class TestSiteSyncIntegration:
@patch("app.site_sync._create_sync_client")
def test_full_sync_cycle(self, mock_create_client, site_sync_worker, storage, connections, replication_manager):
storage.create_bucket("sync-bucket")
storage.put_object("sync-bucket", "local-only.txt", io.BytesIO(b"local content"))
mock_client = MagicMock()
mock_paginator = MagicMock()
mock_paginator.paginate.return_value = [
{
"Contents": [
{
"Key": "remote-only.txt",
"Size": 100,
"LastModified": datetime(2025, 1, 15, tzinfo=timezone.utc),
"ETag": '"remoteetag"',
},
]
}
]
mock_client.get_paginator.return_value = mock_paginator
mock_client.head_object.return_value = {"Metadata": {}}
def mock_download(bucket, key, path):
Path(path).write_bytes(b"remote content")
mock_client.download_file.side_effect = mock_download
mock_create_client.return_value = mock_client
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=True,
)
replication_manager.set_rule(rule)
stats = site_sync_worker._sync_bucket(rule)
assert stats.objects_pulled == 1
assert stats.errors == 0
objects = site_sync_worker._list_local_objects("sync-bucket")
assert "local-only.txt" in objects
assert "remote-only.txt" in objects