UI overhaul; Replication and S3 API improvements
This commit is contained in:
@@ -105,6 +105,18 @@ def create_app(
|
||||
value /= 1024.0
|
||||
return f"{value:.1f} PB"
|
||||
|
||||
@app.template_filter("timestamp_to_datetime")
|
||||
def timestamp_to_datetime(value: float) -> str:
|
||||
"""Format Unix timestamp as human-readable datetime."""
|
||||
from datetime import datetime
|
||||
if not value:
|
||||
return "Never"
|
||||
try:
|
||||
dt = datetime.fromtimestamp(value)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except (ValueError, OSError):
|
||||
return "Unknown"
|
||||
|
||||
if include_api:
|
||||
from .s3_api import s3_api_bp
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ class AppConfig:
|
||||
secret_ttl_seconds: int
|
||||
stream_chunk_size: int
|
||||
multipart_min_part_size: int
|
||||
bucket_stats_cache_ttl: int
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||
@@ -85,8 +86,6 @@ class AppConfig:
|
||||
default_secret = "dev-secret-key"
|
||||
secret_key = str(_get("SECRET_KEY", default_secret))
|
||||
|
||||
# If using default/missing secret, try to load/persist a generated one from disk
|
||||
# This ensures consistency across Gunicorn workers
|
||||
if not secret_key or secret_key == default_secret:
|
||||
secret_file = storage_root / ".myfsio.sys" / "config" / ".secret"
|
||||
if secret_file.exists():
|
||||
@@ -100,7 +99,6 @@ class AppConfig:
|
||||
secret_file.write_text(generated)
|
||||
secret_key = generated
|
||||
except OSError:
|
||||
# Fallback if we can't write to disk (e.g. read-only fs)
|
||||
secret_key = generated
|
||||
|
||||
iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ
|
||||
@@ -156,6 +154,7 @@ class AppConfig:
|
||||
"X-Amz-Signature",
|
||||
])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60)) # Default 60 seconds
|
||||
|
||||
return cls(storage_root=storage_root,
|
||||
max_upload_size=max_upload_size,
|
||||
@@ -182,7 +181,8 @@ class AppConfig:
|
||||
bulk_delete_max_keys=bulk_delete_max_keys,
|
||||
secret_ttl_seconds=secret_ttl_seconds,
|
||||
stream_chunk_size=stream_chunk_size,
|
||||
multipart_min_part_size=multipart_min_part_size)
|
||||
multipart_min_part_size=multipart_min_part_size,
|
||||
bucket_stats_cache_ttl=bucket_stats_cache_ttl)
|
||||
|
||||
def to_flask_config(self) -> Dict[str, Any]:
|
||||
return {
|
||||
@@ -202,6 +202,7 @@ class AppConfig:
|
||||
"SECRET_TTL_SECONDS": self.secret_ttl_seconds,
|
||||
"STREAM_CHUNK_SIZE": self.stream_chunk_size,
|
||||
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
|
||||
"BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl,
|
||||
"LOG_LEVEL": self.log_level,
|
||||
"LOG_FILE": str(self.log_path),
|
||||
"LOG_MAX_BYTES": self.log_max_bytes,
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
"""Background replication worker."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
@@ -21,6 +23,41 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||
|
||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||
REPLICATION_MODE_ALL = "all"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationStats:
|
||||
"""Statistics for replication operations - computed dynamically."""
|
||||
objects_synced: int = 0 # Objects that exist in both source and destination
|
||||
objects_pending: int = 0 # Objects in source but not in destination
|
||||
objects_orphaned: int = 0 # Objects in destination but not in source (will be deleted)
|
||||
bytes_synced: int = 0 # Total bytes synced to destination
|
||||
last_sync_at: Optional[float] = None
|
||||
last_sync_key: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"objects_synced": self.objects_synced,
|
||||
"objects_pending": self.objects_pending,
|
||||
"objects_orphaned": self.objects_orphaned,
|
||||
"bytes_synced": self.bytes_synced,
|
||||
"last_sync_at": self.last_sync_at,
|
||||
"last_sync_key": self.last_sync_key,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationStats":
|
||||
return cls(
|
||||
objects_synced=data.get("objects_synced", 0),
|
||||
objects_pending=data.get("objects_pending", 0),
|
||||
objects_orphaned=data.get("objects_orphaned", 0),
|
||||
bytes_synced=data.get("bytes_synced", 0),
|
||||
last_sync_at=data.get("last_sync_at"),
|
||||
last_sync_key=data.get("last_sync_key"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationRule:
|
||||
@@ -28,6 +65,32 @@ class ReplicationRule:
|
||||
target_connection_id: str
|
||||
target_bucket: str
|
||||
enabled: bool = True
|
||||
mode: str = REPLICATION_MODE_NEW_ONLY
|
||||
created_at: Optional[float] = None
|
||||
stats: ReplicationStats = field(default_factory=ReplicationStats)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"bucket_name": self.bucket_name,
|
||||
"target_connection_id": self.target_connection_id,
|
||||
"target_bucket": self.target_bucket,
|
||||
"enabled": self.enabled,
|
||||
"mode": self.mode,
|
||||
"created_at": self.created_at,
|
||||
"stats": self.stats.to_dict(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationRule":
|
||||
stats_data = data.pop("stats", {})
|
||||
# Handle old rules without mode/created_at
|
||||
if "mode" not in data:
|
||||
data["mode"] = REPLICATION_MODE_NEW_ONLY
|
||||
if "created_at" not in data:
|
||||
data["created_at"] = None
|
||||
rule = cls(**data)
|
||||
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
|
||||
return rule
|
||||
|
||||
|
||||
class ReplicationManager:
|
||||
@@ -36,6 +99,7 @@ class ReplicationManager:
|
||||
self.connections = connections
|
||||
self.rules_path = rules_path
|
||||
self._rules: Dict[str, ReplicationRule] = {}
|
||||
self._stats_lock = threading.Lock()
|
||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||
self.reload_rules()
|
||||
|
||||
@@ -44,17 +108,15 @@ class ReplicationManager:
|
||||
self._rules = {}
|
||||
return
|
||||
try:
|
||||
import json
|
||||
with open(self.rules_path, "r") as f:
|
||||
data = json.load(f)
|
||||
for bucket, rule_data in data.items():
|
||||
self._rules[bucket] = ReplicationRule(**rule_data)
|
||||
self._rules[bucket] = ReplicationRule.from_dict(rule_data)
|
||||
except (OSError, ValueError) as e:
|
||||
logger.error(f"Failed to load replication rules: {e}")
|
||||
|
||||
def save_rules(self) -> None:
|
||||
import json
|
||||
data = {b: rule.__dict__ for b, rule in self._rules.items()}
|
||||
data = {b: rule.to_dict() for b, rule in self._rules.items()}
|
||||
self.rules_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.rules_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
@@ -70,6 +132,99 @@ class ReplicationManager:
|
||||
if bucket_name in self._rules:
|
||||
del self._rules[bucket_name]
|
||||
self.save_rules()
|
||||
|
||||
def _update_last_sync(self, bucket_name: str, object_key: str = "") -> None:
|
||||
"""Update last sync timestamp after a successful operation."""
|
||||
with self._stats_lock:
|
||||
rule = self._rules.get(bucket_name)
|
||||
if not rule:
|
||||
return
|
||||
rule.stats.last_sync_at = time.time()
|
||||
rule.stats.last_sync_key = object_key
|
||||
self.save_rules()
|
||||
|
||||
def get_sync_status(self, bucket_name: str) -> Optional[ReplicationStats]:
|
||||
"""Dynamically compute replication status by comparing source and destination buckets."""
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule:
|
||||
return None
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
return rule.stats # Return cached stats if connection unavailable
|
||||
|
||||
try:
|
||||
# Get source objects
|
||||
source_objects = self.storage.list_objects(bucket_name)
|
||||
source_keys = {obj.key: obj.size for obj in source_objects}
|
||||
|
||||
# Get destination objects
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
)
|
||||
|
||||
dest_keys = set()
|
||||
bytes_synced = 0
|
||||
paginator = s3.get_paginator('list_objects_v2')
|
||||
try:
|
||||
for page in paginator.paginate(Bucket=rule.target_bucket):
|
||||
for obj in page.get('Contents', []):
|
||||
dest_keys.add(obj['Key'])
|
||||
if obj['Key'] in source_keys:
|
||||
bytes_synced += obj.get('Size', 0)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'NoSuchBucket':
|
||||
# Destination bucket doesn't exist yet
|
||||
dest_keys = set()
|
||||
else:
|
||||
raise
|
||||
|
||||
# Compute stats
|
||||
synced = source_keys.keys() & dest_keys # Objects in both
|
||||
orphaned = dest_keys - source_keys.keys() # In dest but not source
|
||||
|
||||
# For "new_only" mode, we can't determine pending since we don't know
|
||||
# which objects existed before replication was enabled. Only "all" mode
|
||||
# should show pending (objects that should be replicated but aren't yet).
|
||||
if rule.mode == REPLICATION_MODE_ALL:
|
||||
pending = source_keys.keys() - dest_keys # In source but not dest
|
||||
else:
|
||||
pending = set() # New-only mode: don't show pre-existing as pending
|
||||
|
||||
# Update cached stats with computed values
|
||||
rule.stats.objects_synced = len(synced)
|
||||
rule.stats.objects_pending = len(pending)
|
||||
rule.stats.objects_orphaned = len(orphaned)
|
||||
rule.stats.bytes_synced = bytes_synced
|
||||
|
||||
return rule.stats
|
||||
|
||||
except (ClientError, StorageError) as e:
|
||||
logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
|
||||
return rule.stats # Return cached stats on error
|
||||
|
||||
def replicate_existing_objects(self, bucket_name: str) -> None:
|
||||
"""Trigger replication for all existing objects in a bucket."""
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
|
||||
return
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects(bucket_name)
|
||||
logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
|
||||
for obj in objects:
|
||||
self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write")
|
||||
except StorageError as e:
|
||||
logger.error(f"Failed to list objects for replication: {e}")
|
||||
|
||||
def create_remote_bucket(self, connection_id: str, bucket_name: str) -> None:
|
||||
"""Create a bucket on the remote connection."""
|
||||
@@ -103,6 +258,7 @@ class ReplicationManager:
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
|
||||
|
||||
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
|
||||
file_size = 0
|
||||
try:
|
||||
# Using boto3 to upload
|
||||
config = Config(user_agent_extra=REPLICATION_USER_AGENT)
|
||||
@@ -119,21 +275,15 @@ class ReplicationManager:
|
||||
try:
|
||||
s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
|
||||
logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
except ClientError as e:
|
||||
logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
|
||||
return
|
||||
|
||||
# 1. Get local file path
|
||||
# Note: We are accessing internal storage structure here.
|
||||
# Ideally storage.py should expose a 'get_file_path' or we read the stream.
|
||||
# For efficiency, we'll try to read the file directly if we can, or use storage.get_object
|
||||
|
||||
# We need the file content.
|
||||
# Since ObjectStorage is filesystem based, let's get the stream.
|
||||
# We need to be careful about closing it.
|
||||
try:
|
||||
path = self.storage.get_object_path(bucket_name, object_key)
|
||||
except StorageError:
|
||||
logger.error(f"Source object not found: {bucket_name}/{object_key}")
|
||||
return
|
||||
|
||||
metadata = self.storage.get_object_metadata(bucket_name, object_key)
|
||||
@@ -159,7 +309,6 @@ class ReplicationManager:
|
||||
Metadata=metadata or {}
|
||||
)
|
||||
except (ClientError, S3UploadFailedError) as e:
|
||||
# Check if it's a NoSuchBucket error (either direct or wrapped)
|
||||
is_no_bucket = False
|
||||
if isinstance(e, ClientError):
|
||||
if e.response['Error']['Code'] == 'NoSuchBucket':
|
||||
@@ -189,6 +338,7 @@ class ReplicationManager:
|
||||
raise e
|
||||
|
||||
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
|
||||
except (ClientError, OSError, ValueError) as e:
|
||||
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
|
||||
|
||||
479
app/s3_api.py
479
app/s3_api.py
@@ -584,6 +584,73 @@ def _render_tagging_document(tags: list[dict[str, str]]) -> Element:
|
||||
SubElement(tag_el, "Value").text = tag.get("Value", "")
|
||||
return root
|
||||
|
||||
DANGEROUS_CONTENT_TYPES = frozenset([
|
||||
"text/html",
|
||||
"application/xhtml+xml",
|
||||
"application/javascript",
|
||||
"text/javascript",
|
||||
"application/x-javascript",
|
||||
"text/ecmascript",
|
||||
"application/ecmascript",
|
||||
"image/svg+xml",
|
||||
])
|
||||
|
||||
SAFE_EXTENSION_MAP = {
|
||||
".txt": ["text/plain"],
|
||||
".json": ["application/json"],
|
||||
".xml": ["application/xml", "text/xml"],
|
||||
".csv": ["text/csv"],
|
||||
".pdf": ["application/pdf"],
|
||||
".png": ["image/png"],
|
||||
".jpg": ["image/jpeg"],
|
||||
".jpeg": ["image/jpeg"],
|
||||
".gif": ["image/gif"],
|
||||
".webp": ["image/webp"],
|
||||
".mp4": ["video/mp4"],
|
||||
".mp3": ["audio/mpeg"],
|
||||
".zip": ["application/zip"],
|
||||
".gz": ["application/gzip"],
|
||||
".tar": ["application/x-tar"],
|
||||
}
|
||||
|
||||
|
||||
def _validate_content_type(object_key: str, content_type: str | None) -> str | None:
|
||||
"""Validate Content-Type header for security.
|
||||
|
||||
Returns an error message if validation fails, None otherwise.
|
||||
|
||||
Rules:
|
||||
1. Block dangerous MIME types that can execute scripts (unless explicitly allowed)
|
||||
2. Warn if Content-Type doesn't match file extension (but don't block)
|
||||
"""
|
||||
if not content_type:
|
||||
return None
|
||||
|
||||
base_type = content_type.split(";")[0].strip().lower()
|
||||
|
||||
if base_type in DANGEROUS_CONTENT_TYPES:
|
||||
ext = "." + object_key.rsplit(".", 1)[-1].lower() if "." in object_key else ""
|
||||
|
||||
allowed_dangerous = {
|
||||
".svg": "image/svg+xml",
|
||||
".html": "text/html",
|
||||
".htm": "text/html",
|
||||
".xhtml": "application/xhtml+xml",
|
||||
".js": "application/javascript",
|
||||
".mjs": "application/javascript",
|
||||
}
|
||||
|
||||
if ext in allowed_dangerous and base_type == allowed_dangerous[ext]:
|
||||
return None
|
||||
|
||||
return (
|
||||
f"Content-Type '{content_type}' is potentially dangerous and not allowed "
|
||||
f"for object key '{object_key}'. Use a safe Content-Type or rename the file "
|
||||
f"with an appropriate extension."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_cors_document(payload: bytes) -> list[dict[str, Any]]:
|
||||
try:
|
||||
@@ -731,6 +798,8 @@ def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
||||
"tagging": _bucket_tagging_handler,
|
||||
"cors": _bucket_cors_handler,
|
||||
"encryption": _bucket_encryption_handler,
|
||||
"location": _bucket_location_handler,
|
||||
"acl": _bucket_acl_handler,
|
||||
}
|
||||
requested = [key for key in handlers if key in request.args]
|
||||
if not requested:
|
||||
@@ -746,8 +815,8 @@ def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
||||
|
||||
|
||||
def _bucket_versioning_handler(bucket_name: str) -> Response:
|
||||
if request.method != "GET":
|
||||
return _method_not_allowed(["GET"])
|
||||
if request.method not in {"GET", "PUT"}:
|
||||
return _method_not_allowed(["GET", "PUT"])
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
@@ -756,6 +825,31 @@ def _bucket_versioning_handler(bucket_name: str) -> Response:
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
storage = _storage()
|
||||
|
||||
if request.method == "PUT":
|
||||
payload = request.get_data(cache=False) or b""
|
||||
if not payload.strip():
|
||||
return _error_response("MalformedXML", "Request body is required", 400)
|
||||
try:
|
||||
root = fromstring(payload)
|
||||
except ParseError:
|
||||
return _error_response("MalformedXML", "Unable to parse XML document", 400)
|
||||
if _strip_ns(root.tag) != "VersioningConfiguration":
|
||||
return _error_response("MalformedXML", "Root element must be VersioningConfiguration", 400)
|
||||
status_el = root.find("{*}Status")
|
||||
if status_el is None:
|
||||
status_el = root.find("Status")
|
||||
status = (status_el.text or "").strip() if status_el is not None else ""
|
||||
if status not in {"Enabled", "Suspended", ""}:
|
||||
return _error_response("MalformedXML", "Status must be Enabled or Suspended", 400)
|
||||
try:
|
||||
storage.set_bucket_versioning(bucket_name, status == "Enabled")
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status})
|
||||
return Response(status=200)
|
||||
|
||||
# GET
|
||||
try:
|
||||
enabled = storage.is_versioning_enabled(bucket_name)
|
||||
except StorageError as exc:
|
||||
@@ -766,8 +860,8 @@ def _bucket_versioning_handler(bucket_name: str) -> Response:
|
||||
|
||||
|
||||
def _bucket_tagging_handler(bucket_name: str) -> Response:
|
||||
if request.method not in {"GET", "PUT"}:
|
||||
return _method_not_allowed(["GET", "PUT"])
|
||||
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
@@ -784,6 +878,14 @@ def _bucket_tagging_handler(bucket_name: str) -> Response:
|
||||
if not tags:
|
||||
return _error_response("NoSuchTagSet", "No tags are configured for this bucket", 404)
|
||||
return _xml_response(_render_tagging_document(tags))
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
storage.set_bucket_tags(bucket_name, None)
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name})
|
||||
return Response(status=204)
|
||||
# PUT
|
||||
payload = request.get_data(cache=False) or b""
|
||||
try:
|
||||
tags = _parse_tagging_document(payload)
|
||||
@@ -799,6 +901,64 @@ def _bucket_tagging_handler(bucket_name: str) -> Response:
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
|
||||
"""Handle object tagging operations (GET/PUT/DELETE /<bucket>/<key>?tagging)."""
|
||||
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
|
||||
# For tagging, we use read permission for GET, write for PUT/DELETE
|
||||
action = "read" if request.method == "GET" else "write"
|
||||
try:
|
||||
_authorize_action(principal, bucket_name, action, object_key=object_key)
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
|
||||
storage = _storage()
|
||||
|
||||
if request.method == "GET":
|
||||
try:
|
||||
tags = storage.get_object_tags(bucket_name, object_key)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
return _xml_response(_render_tagging_document(tags))
|
||||
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
storage.delete_object_tags(bucket_name, object_key)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
|
||||
return Response(status=204)
|
||||
|
||||
# PUT
|
||||
payload = request.get_data(cache=False) or b""
|
||||
try:
|
||||
tags = _parse_tagging_document(payload)
|
||||
except ValueError as exc:
|
||||
return _error_response("MalformedXML", str(exc), 400)
|
||||
if len(tags) > 10:
|
||||
return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
|
||||
try:
|
||||
storage.set_object_tags(bucket_name, object_key, tags)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
sanitized: list[dict[str, Any]] = []
|
||||
for rule in rules:
|
||||
@@ -823,8 +983,8 @@ def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
|
||||
|
||||
def _bucket_cors_handler(bucket_name: str) -> Response:
|
||||
if request.method not in {"GET", "PUT"}:
|
||||
return _method_not_allowed(["GET", "PUT"])
|
||||
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
@@ -841,6 +1001,14 @@ def _bucket_cors_handler(bucket_name: str) -> Response:
|
||||
if not rules:
|
||||
return _error_response("NoSuchCORSConfiguration", "No CORS configuration found", 404)
|
||||
return _xml_response(_render_cors_document(rules))
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
storage.set_bucket_cors(bucket_name, None)
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
current_app.logger.info("Bucket CORS deleted", extra={"bucket": bucket_name})
|
||||
return Response(status=204)
|
||||
# PUT
|
||||
payload = request.get_data(cache=False) or b""
|
||||
if not payload.strip():
|
||||
try:
|
||||
@@ -907,6 +1075,66 @@ def _bucket_encryption_handler(bucket_name: str) -> Response:
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _bucket_location_handler(bucket_name: str) -> Response:
|
||||
if request.method != "GET":
|
||||
return _method_not_allowed(["GET"])
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_authorize_action(principal, bucket_name, "list")
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket_name):
|
||||
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
||||
|
||||
# Return the configured AWS_REGION
|
||||
region = current_app.config.get("AWS_REGION", "us-east-1")
|
||||
root = Element("LocationConstraint")
|
||||
# AWS returns empty for us-east-1, but we'll be explicit
|
||||
root.text = region if region != "us-east-1" else None
|
||||
return _xml_response(root)
|
||||
|
||||
|
||||
def _bucket_acl_handler(bucket_name: str) -> Response:
|
||||
if request.method not in {"GET", "PUT"}:
|
||||
return _method_not_allowed(["GET", "PUT"])
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_authorize_action(principal, bucket_name, "policy")
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket_name):
|
||||
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
||||
|
||||
if request.method == "PUT":
|
||||
# We don't fully implement ACLs, but we accept the request for compatibility
|
||||
# Check for canned ACL header
|
||||
canned_acl = request.headers.get("x-amz-acl", "private")
|
||||
current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl})
|
||||
return Response(status=200)
|
||||
|
||||
# GET - Return a basic ACL document showing full control for owner
|
||||
root = Element("AccessControlPolicy")
|
||||
owner = SubElement(root, "Owner")
|
||||
SubElement(owner, "ID").text = principal.access_key if principal else "anonymous"
|
||||
SubElement(owner, "DisplayName").text = principal.display_name if principal else "Anonymous"
|
||||
|
||||
acl = SubElement(root, "AccessControlList")
|
||||
grant = SubElement(acl, "Grant")
|
||||
grantee = SubElement(grant, "Grantee")
|
||||
grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser")
|
||||
SubElement(grantee, "ID").text = principal.access_key if principal else "anonymous"
|
||||
SubElement(grantee, "DisplayName").text = principal.display_name if principal else "Anonymous"
|
||||
SubElement(grant, "Permission").text = "FULL_CONTROL"
|
||||
|
||||
return _xml_response(root)
|
||||
|
||||
|
||||
def _bulk_delete_handler(bucket_name: str) -> Response:
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
@@ -1067,7 +1295,7 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name})
|
||||
return Response(status=204)
|
||||
|
||||
# GET - list objects
|
||||
# GET - list objects (supports both ListObjects and ListObjectsV2)
|
||||
principal, error = _require_principal()
|
||||
try:
|
||||
_authorize_action(principal, bucket_name, "list")
|
||||
@@ -1080,16 +1308,131 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
|
||||
root = Element("ListBucketResult")
|
||||
SubElement(root, "Name").text = bucket_name
|
||||
SubElement(root, "MaxKeys").text = str(current_app.config["UI_PAGE_SIZE"])
|
||||
SubElement(root, "IsTruncated").text = "false"
|
||||
for meta in objects:
|
||||
obj_el = SubElement(root, "Contents")
|
||||
SubElement(obj_el, "Key").text = meta.key
|
||||
SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
|
||||
SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
|
||||
SubElement(obj_el, "Size").text = str(meta.size)
|
||||
# Check if this is ListObjectsV2 (list-type=2)
|
||||
list_type = request.args.get("list-type")
|
||||
prefix = request.args.get("prefix", "")
|
||||
delimiter = request.args.get("delimiter", "")
|
||||
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
|
||||
|
||||
# Pagination markers
|
||||
marker = request.args.get("marker", "") # ListObjects v1
|
||||
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
|
||||
start_after = request.args.get("start-after", "") # ListObjectsV2
|
||||
|
||||
# For ListObjectsV2, continuation-token takes precedence, then start-after
|
||||
# For ListObjects v1, use marker
|
||||
effective_start = ""
|
||||
if list_type == "2":
|
||||
if continuation_token:
|
||||
import base64
|
||||
try:
|
||||
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
|
||||
except Exception:
|
||||
effective_start = continuation_token
|
||||
elif start_after:
|
||||
effective_start = start_after
|
||||
else:
|
||||
effective_start = marker
|
||||
|
||||
if prefix:
|
||||
objects = [obj for obj in objects if obj.key.startswith(prefix)]
|
||||
|
||||
if effective_start:
|
||||
objects = [obj for obj in objects if obj.key > effective_start]
|
||||
|
||||
common_prefixes: list[str] = []
|
||||
filtered_objects: list = []
|
||||
if delimiter:
|
||||
seen_prefixes: set[str] = set()
|
||||
for obj in objects:
|
||||
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
|
||||
if delimiter in key_after_prefix:
|
||||
# This is a "folder" - extract the common prefix
|
||||
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
|
||||
if common_prefix not in seen_prefixes:
|
||||
seen_prefixes.add(common_prefix)
|
||||
common_prefixes.append(common_prefix)
|
||||
else:
|
||||
filtered_objects.append(obj)
|
||||
objects = filtered_objects
|
||||
common_prefixes = sorted(common_prefixes)
|
||||
|
||||
total_items = len(objects) + len(common_prefixes)
|
||||
is_truncated = total_items > max_keys
|
||||
|
||||
if len(objects) >= max_keys:
|
||||
objects = objects[:max_keys]
|
||||
common_prefixes = []
|
||||
else:
|
||||
remaining = max_keys - len(objects)
|
||||
common_prefixes = common_prefixes[:remaining]
|
||||
|
||||
next_marker = ""
|
||||
next_continuation_token = ""
|
||||
if is_truncated:
|
||||
if objects:
|
||||
next_marker = objects[-1].key
|
||||
elif common_prefixes:
|
||||
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
||||
|
||||
if list_type == "2" and next_marker:
|
||||
import base64
|
||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||
|
||||
if list_type == "2":
|
||||
root = Element("ListBucketResult")
|
||||
SubElement(root, "Name").text = bucket_name
|
||||
SubElement(root, "Prefix").text = prefix
|
||||
SubElement(root, "MaxKeys").text = str(max_keys)
|
||||
SubElement(root, "KeyCount").text = str(len(objects) + len(common_prefixes))
|
||||
SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
|
||||
if delimiter:
|
||||
SubElement(root, "Delimiter").text = delimiter
|
||||
|
||||
continuation_token = request.args.get("continuation-token", "")
|
||||
start_after = request.args.get("start-after", "")
|
||||
if continuation_token:
|
||||
SubElement(root, "ContinuationToken").text = continuation_token
|
||||
if start_after:
|
||||
SubElement(root, "StartAfter").text = start_after
|
||||
|
||||
if is_truncated and next_continuation_token:
|
||||
SubElement(root, "NextContinuationToken").text = next_continuation_token
|
||||
|
||||
for meta in objects:
|
||||
obj_el = SubElement(root, "Contents")
|
||||
SubElement(obj_el, "Key").text = meta.key
|
||||
SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
|
||||
SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
|
||||
SubElement(obj_el, "Size").text = str(meta.size)
|
||||
SubElement(obj_el, "StorageClass").text = "STANDARD"
|
||||
|
||||
for cp in common_prefixes:
|
||||
cp_el = SubElement(root, "CommonPrefixes")
|
||||
SubElement(cp_el, "Prefix").text = cp
|
||||
else:
|
||||
root = Element("ListBucketResult")
|
||||
SubElement(root, "Name").text = bucket_name
|
||||
SubElement(root, "Prefix").text = prefix
|
||||
SubElement(root, "Marker").text = marker
|
||||
SubElement(root, "MaxKeys").text = str(max_keys)
|
||||
SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
|
||||
if delimiter:
|
||||
SubElement(root, "Delimiter").text = delimiter
|
||||
|
||||
if is_truncated and delimiter and next_marker:
|
||||
SubElement(root, "NextMarker").text = next_marker
|
||||
|
||||
for meta in objects:
|
||||
obj_el = SubElement(root, "Contents")
|
||||
SubElement(obj_el, "Key").text = meta.key
|
||||
SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
|
||||
SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
|
||||
SubElement(obj_el, "Size").text = str(meta.size)
|
||||
|
||||
for cp in common_prefixes:
|
||||
cp_el = SubElement(root, "CommonPrefixes")
|
||||
SubElement(cp_el, "Prefix").text = cp
|
||||
|
||||
return _xml_response(root)
|
||||
|
||||
@@ -1099,6 +1442,9 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
def object_handler(bucket_name: str, object_key: str):
|
||||
storage = _storage()
|
||||
|
||||
if "tagging" in request.args:
|
||||
return _object_tagging_handler(bucket_name, object_key)
|
||||
|
||||
# Multipart Uploads
|
||||
if request.method == "POST":
|
||||
if "uploads" in request.args:
|
||||
@@ -1111,6 +1457,10 @@ def object_handler(bucket_name: str, object_key: str):
|
||||
if "partNumber" in request.args and "uploadId" in request.args:
|
||||
return _upload_part(bucket_name, object_key)
|
||||
|
||||
copy_source = request.headers.get("x-amz-copy-source")
|
||||
if copy_source:
|
||||
return _copy_object(bucket_name, object_key, copy_source)
|
||||
|
||||
_, error = _object_principal("write", bucket_name, object_key)
|
||||
if error:
|
||||
return error
|
||||
@@ -1121,6 +1471,12 @@ def object_handler(bucket_name: str, object_key: str):
|
||||
stream = AwsChunkedDecoder(stream)
|
||||
|
||||
metadata = _extract_request_metadata()
|
||||
|
||||
content_type = request.headers.get("Content-Type")
|
||||
validation_error = _validate_content_type(object_key, content_type)
|
||||
if validation_error:
|
||||
return _error_response("InvalidArgument", validation_error, 400)
|
||||
|
||||
try:
|
||||
meta = storage.put_object(
|
||||
bucket_name,
|
||||
@@ -1357,6 +1713,88 @@ def head_object(bucket_name: str, object_key: str) -> Response:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
|
||||
|
||||
def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
|
||||
"""Handle S3 CopyObject operation."""
|
||||
from urllib.parse import unquote
|
||||
copy_source = unquote(copy_source)
|
||||
if copy_source.startswith("/"):
|
||||
copy_source = copy_source[1:]
|
||||
|
||||
parts = copy_source.split("/", 1)
|
||||
if len(parts) != 2:
|
||||
return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
|
||||
|
||||
source_bucket, source_key = parts
|
||||
if not source_bucket or not source_key:
|
||||
return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
|
||||
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_authorize_action(principal, source_bucket, "read", object_key=source_key)
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
|
||||
try:
|
||||
_authorize_action(principal, dest_bucket, "write", object_key=dest_key)
|
||||
except IamError as exc:
|
||||
return _error_response("AccessDenied", str(exc), 403)
|
||||
|
||||
storage = _storage()
|
||||
|
||||
try:
|
||||
source_path = storage.get_object_path(source_bucket, source_key)
|
||||
except StorageError:
|
||||
return _error_response("NoSuchKey", "Source object not found", 404)
|
||||
|
||||
source_metadata = storage.get_object_metadata(source_bucket, source_key)
|
||||
|
||||
metadata_directive = request.headers.get("x-amz-metadata-directive", "COPY").upper()
|
||||
if metadata_directive == "REPLACE":
|
||||
metadata = _extract_request_metadata()
|
||||
content_type = request.headers.get("Content-Type")
|
||||
validation_error = _validate_content_type(dest_key, content_type)
|
||||
if validation_error:
|
||||
return _error_response("InvalidArgument", validation_error, 400)
|
||||
else:
|
||||
metadata = source_metadata
|
||||
|
||||
try:
|
||||
with source_path.open("rb") as stream:
|
||||
meta = storage.put_object(
|
||||
dest_bucket,
|
||||
dest_key,
|
||||
stream,
|
||||
metadata=metadata or None,
|
||||
)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("InvalidArgument", message, 400)
|
||||
|
||||
current_app.logger.info(
|
||||
"Object copied",
|
||||
extra={
|
||||
"source_bucket": source_bucket,
|
||||
"source_key": source_key,
|
||||
"dest_bucket": dest_bucket,
|
||||
"dest_key": dest_key,
|
||||
"size": meta.size,
|
||||
},
|
||||
)
|
||||
|
||||
user_agent = request.headers.get("User-Agent", "")
|
||||
if "S3ReplicationAgent" not in user_agent:
|
||||
_replication_manager().trigger_replication(dest_bucket, dest_key, action="write")
|
||||
|
||||
root = Element("CopyObjectResult")
|
||||
SubElement(root, "LastModified").text = meta.last_modified.isoformat()
|
||||
SubElement(root, "ETag").text = f'"{meta.etag}"'
|
||||
return _xml_response(root)
|
||||
|
||||
|
||||
class AwsChunkedDecoder:
|
||||
"""Decodes aws-chunked encoded streams."""
|
||||
def __init__(self, stream):
|
||||
@@ -1389,12 +1827,11 @@ class AwsChunkedDecoder:
|
||||
if crlf != b"\r\n":
|
||||
raise IOError("Malformed chunk: missing CRLF")
|
||||
else:
|
||||
# Read chunk size line
|
||||
line = b""
|
||||
while True:
|
||||
char = self.stream.read(1)
|
||||
if not char:
|
||||
if not line: # EOF at start of chunk size
|
||||
if not line:
|
||||
self.finished = True
|
||||
return result
|
||||
raise IOError("Unexpected EOF in chunk size")
|
||||
@@ -1402,7 +1839,6 @@ class AwsChunkedDecoder:
|
||||
if line.endswith(b"\r\n"):
|
||||
break
|
||||
|
||||
# Parse chunk size (hex)
|
||||
try:
|
||||
line_str = line.decode("ascii").strip()
|
||||
# Handle chunk-signature extension if present (e.g. "1000;chunk-signature=...")
|
||||
@@ -1414,7 +1850,6 @@ class AwsChunkedDecoder:
|
||||
|
||||
if chunk_size == 0:
|
||||
self.finished = True
|
||||
# Read trailers if any (until empty line)
|
||||
while True:
|
||||
line = b""
|
||||
while True:
|
||||
@@ -1534,13 +1969,11 @@ def _complete_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
||||
return _error_response("NoSuchUpload", str(exc), 404)
|
||||
return _error_response("InvalidPart", str(exc), 400)
|
||||
|
||||
# Trigger replication
|
||||
user_agent = request.headers.get("User-Agent", "")
|
||||
if "S3ReplicationAgent" not in user_agent:
|
||||
_replication_manager().trigger_replication(bucket_name, object_key, action="write")
|
||||
|
||||
root = Element("CompleteMultipartUploadResult")
|
||||
# Use request.host_url to construct full location
|
||||
location = f"{request.host_url}{bucket_name}/{object_key}"
|
||||
SubElement(root, "Location").text = location
|
||||
SubElement(root, "Bucket").text = bucket_name
|
||||
|
||||
193
app/storage.py
193
app/storage.py
@@ -10,10 +10,40 @@ import stat
|
||||
import time
|
||||
import unicodedata
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, List, Optional
|
||||
from typing import Any, BinaryIO, Dict, Generator, List, Optional
|
||||
|
||||
# Platform-specific file locking
|
||||
if os.name == "nt":
|
||||
import msvcrt
|
||||
|
||||
@contextmanager
|
||||
def _file_lock(file_handle) -> Generator[None, None, None]:
|
||||
"""Acquire an exclusive lock on a file (Windows)."""
|
||||
try:
|
||||
msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
yield
|
||||
finally:
|
||||
try:
|
||||
file_handle.seek(0)
|
||||
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
import fcntl # type: ignore
|
||||
|
||||
@contextmanager
|
||||
def _file_lock(file_handle) -> Generator[None, None, None]:
|
||||
"""Acquire an exclusive lock on a file (Unix)."""
|
||||
try:
|
||||
fcntl.flock(file_handle.fileno(), fcntl.LOCK_EX)
|
||||
yield
|
||||
finally:
|
||||
fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
|
||||
WINDOWS_RESERVED_NAMES = {
|
||||
"CON",
|
||||
@@ -119,8 +149,13 @@ class ObjectStorage:
|
||||
bucket_path.mkdir(parents=True, exist_ok=False)
|
||||
self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def bucket_stats(self, bucket_name: str) -> dict[str, int]:
|
||||
"""Return object count and total size for the bucket (cached)."""
|
||||
def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
|
||||
"""Return object count and total size for the bucket (cached).
|
||||
|
||||
Args:
|
||||
bucket_name: Name of the bucket
|
||||
cache_ttl: Cache time-to-live in seconds (default 60)
|
||||
"""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
@@ -129,8 +164,8 @@ class ObjectStorage:
|
||||
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
|
||||
if cache_path.exists():
|
||||
try:
|
||||
# Check if cache is fresh (e.g., < 60 seconds old)
|
||||
if time.time() - cache_path.stat().st_mtime < 60:
|
||||
# Check if cache is fresh
|
||||
if time.time() - cache_path.stat().st_mtime < cache_ttl:
|
||||
return json.loads(cache_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
@@ -158,6 +193,14 @@ class ObjectStorage:
|
||||
|
||||
return stats
|
||||
|
||||
def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
|
||||
"""Invalidate the cached bucket statistics."""
|
||||
cache_path = self._system_bucket_root(bucket_id) / "stats.json"
|
||||
try:
|
||||
cache_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def delete_bucket(self, bucket_name: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
@@ -228,6 +271,10 @@ class ObjectStorage:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
|
||||
# Invalidate bucket stats cache
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
|
||||
return ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
size=stat.st_size,
|
||||
@@ -261,6 +308,10 @@ class ObjectStorage:
|
||||
rel = path.relative_to(bucket_path)
|
||||
self._safe_unlink(path)
|
||||
self._delete_metadata(bucket_id, rel)
|
||||
|
||||
# Invalidate bucket stats cache
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
|
||||
for parent in path.parents:
|
||||
if parent == bucket_path:
|
||||
break
|
||||
@@ -284,6 +335,10 @@ class ObjectStorage:
|
||||
legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
|
||||
if legacy_version_dir.exists():
|
||||
shutil.rmtree(legacy_version_dir, ignore_errors=True)
|
||||
|
||||
# Invalidate bucket stats cache
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
|
||||
for parent in target.parents:
|
||||
if parent == bucket_path:
|
||||
break
|
||||
@@ -356,6 +411,74 @@ class ObjectStorage:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None)
|
||||
|
||||
# ---------------------- Object tagging helpers ----------------------
|
||||
def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]:
|
||||
"""Get tags for an object."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
object_path = bucket_path / safe_key
|
||||
if not object_path.exists():
|
||||
raise StorageError("Object does not exist")
|
||||
|
||||
# Tags are stored in the metadata file alongside user metadata
|
||||
for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
|
||||
if not meta_file.exists():
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
tags = payload.get("tags")
|
||||
if isinstance(tags, list):
|
||||
return tags
|
||||
return []
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return []
|
||||
return []
|
||||
|
||||
def set_object_tags(self, bucket_name: str, object_key: str, tags: Optional[List[Dict[str, str]]]) -> None:
|
||||
"""Set tags for an object."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
object_path = bucket_path / safe_key
|
||||
if not object_path.exists():
|
||||
raise StorageError("Object does not exist")
|
||||
|
||||
meta_file = self._metadata_file(bucket_path.name, safe_key)
|
||||
|
||||
# Read existing metadata
|
||||
existing_payload: Dict[str, Any] = {}
|
||||
if meta_file.exists():
|
||||
try:
|
||||
existing_payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
# Update tags
|
||||
if tags:
|
||||
existing_payload["tags"] = tags
|
||||
else:
|
||||
existing_payload.pop("tags", None)
|
||||
|
||||
# Write back if there's anything to store, otherwise delete
|
||||
if existing_payload.get("metadata") or existing_payload.get("tags"):
|
||||
meta_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
meta_file.write_text(json.dumps(existing_payload), encoding="utf-8")
|
||||
elif meta_file.exists():
|
||||
meta_file.unlink()
|
||||
# Clean up empty parent directories
|
||||
parent = meta_file.parent
|
||||
meta_root = self._bucket_meta_root(bucket_path.name)
|
||||
while parent != meta_root and parent.exists() and not any(parent.iterdir()):
|
||||
parent.rmdir()
|
||||
parent = parent.parent
|
||||
|
||||
def delete_object_tags(self, bucket_name: str, object_key: str) -> None:
|
||||
"""Delete all tags from an object."""
|
||||
self.set_object_tags(bucket_name, object_key, None)
|
||||
|
||||
def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
@@ -571,29 +694,49 @@ class ObjectStorage:
|
||||
safe_key = self._sanitize_object_key(manifest["object_key"])
|
||||
destination = bucket_path / safe_key
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||
checksum = hashlib.md5()
|
||||
with destination.open("wb") as target:
|
||||
for _, record in validated:
|
||||
part_path = upload_root / record["filename"]
|
||||
if not part_path.exists():
|
||||
raise StorageError(f"Missing part file {record['filename']}")
|
||||
with part_path.open("rb") as chunk:
|
||||
while True:
|
||||
data = chunk.read(1024 * 1024)
|
||||
if not data:
|
||||
break
|
||||
checksum.update(data)
|
||||
target.write(data)
|
||||
|
||||
# Use a lock file to prevent concurrent writes to the same destination
|
||||
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
|
||||
lock_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
with lock_file_path.open("w") as lock_file:
|
||||
with _file_lock(lock_file):
|
||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||
checksum = hashlib.md5()
|
||||
with destination.open("wb") as target:
|
||||
for _, record in validated:
|
||||
part_path = upload_root / record["filename"]
|
||||
if not part_path.exists():
|
||||
raise StorageError(f"Missing part file {record['filename']}")
|
||||
with part_path.open("rb") as chunk:
|
||||
while True:
|
||||
data = chunk.read(1024 * 1024)
|
||||
if not data:
|
||||
break
|
||||
checksum.update(data)
|
||||
target.write(data)
|
||||
|
||||
metadata = manifest.get("metadata")
|
||||
if metadata:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
metadata = manifest.get("metadata")
|
||||
if metadata:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
except BlockingIOError:
|
||||
raise StorageError("Another upload to this key is in progress")
|
||||
finally:
|
||||
# Clean up lock file
|
||||
try:
|
||||
lock_file_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
shutil.rmtree(upload_root, ignore_errors=True)
|
||||
|
||||
# Invalidate bucket stats cache
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
|
||||
stat = destination.stat()
|
||||
return ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
|
||||
52
app/ui.py
52
app/ui.py
@@ -249,7 +249,8 @@ def buckets_overview():
|
||||
if bucket.name not in allowed_names:
|
||||
continue
|
||||
policy = policy_store.get_policy(bucket.name)
|
||||
stats = _storage().bucket_stats(bucket.name)
|
||||
cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
|
||||
stats = _storage().bucket_stats(bucket.name, cache_ttl=cache_ttl)
|
||||
access_label, access_badge = _bucket_access_descriptor(policy)
|
||||
visible_buckets.append({
|
||||
"meta": bucket,
|
||||
@@ -335,7 +336,7 @@ def bucket_detail(bucket_name: str):
|
||||
except IamError:
|
||||
can_manage_versioning = False
|
||||
|
||||
# Replication info
|
||||
# Replication info - don't compute sync status here (it's slow), let JS fetch it async
|
||||
replication_rule = _replication().get_rule(bucket_name)
|
||||
connections = _connections().list()
|
||||
|
||||
@@ -1178,8 +1179,12 @@ def update_bucket_replication(bucket_name: str):
|
||||
_replication().delete_rule(bucket_name)
|
||||
flash("Replication disabled", "info")
|
||||
else:
|
||||
from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL
|
||||
import time
|
||||
|
||||
target_conn_id = request.form.get("target_connection_id")
|
||||
target_bucket = request.form.get("target_bucket", "").strip()
|
||||
replication_mode = request.form.get("replication_mode", REPLICATION_MODE_NEW_ONLY)
|
||||
|
||||
if not target_conn_id or not target_bucket:
|
||||
flash("Target connection and bucket are required", "danger")
|
||||
@@ -1188,14 +1193,50 @@ def update_bucket_replication(bucket_name: str):
|
||||
bucket_name=bucket_name,
|
||||
target_connection_id=target_conn_id,
|
||||
target_bucket=target_bucket,
|
||||
enabled=True
|
||||
enabled=True,
|
||||
mode=replication_mode,
|
||||
created_at=time.time(),
|
||||
)
|
||||
_replication().set_rule(rule)
|
||||
flash("Replication configured", "success")
|
||||
|
||||
# If mode is "all", trigger replication of existing objects
|
||||
if replication_mode == REPLICATION_MODE_ALL:
|
||||
_replication().replicate_existing_objects(bucket_name)
|
||||
flash("Replication configured. Existing objects are being replicated in the background.", "success")
|
||||
else:
|
||||
flash("Replication configured. Only new uploads will be replicated.", "success")
|
||||
|
||||
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
|
||||
|
||||
|
||||
@ui_bp.get("/buckets/<bucket_name>/replication/status")
|
||||
def get_replication_status(bucket_name: str):
|
||||
"""Async endpoint to fetch replication sync status without blocking page load."""
|
||||
principal = _current_principal()
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "read")
|
||||
except IamError:
|
||||
return jsonify({"error": "Access denied"}), 403
|
||||
|
||||
rule = _replication().get_rule(bucket_name)
|
||||
if not rule:
|
||||
return jsonify({"error": "No replication rule"}), 404
|
||||
|
||||
# This is the slow operation - compute sync status by comparing buckets
|
||||
stats = _replication().get_sync_status(bucket_name)
|
||||
if not stats:
|
||||
return jsonify({"error": "Failed to compute status"}), 500
|
||||
|
||||
return jsonify({
|
||||
"objects_synced": stats.objects_synced,
|
||||
"objects_pending": stats.objects_pending,
|
||||
"objects_orphaned": stats.objects_orphaned,
|
||||
"bytes_synced": stats.bytes_synced,
|
||||
"last_sync_at": stats.last_sync_at,
|
||||
"last_sync_key": stats.last_sync_key,
|
||||
})
|
||||
|
||||
|
||||
@ui_bp.get("/connections")
|
||||
def connections_dashboard():
|
||||
principal = _current_principal()
|
||||
@@ -1227,8 +1268,9 @@ def metrics_dashboard():
|
||||
total_bytes_used = 0
|
||||
|
||||
# Note: Uses cached stats from storage layer to improve performance
|
||||
cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
|
||||
for bucket in buckets:
|
||||
stats = storage.bucket_stats(bucket.name)
|
||||
stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl)
|
||||
total_objects += stats["objects"]
|
||||
total_bytes_used += stats["bytes"]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user