Add new tests; Fix typo and validations
This commit is contained in:
@@ -289,17 +289,17 @@ def _configure_logging(app: Flask) -> None:
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
|
||||
)
|
||||
|
||||
# Stream Handler (stdout) - Primary for Docker
|
||||
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
stream_handler.setFormatter(formatter)
|
||||
stream_handler.addFilter(_RequestContextFilter())
|
||||
|
||||
logger = app.logger
|
||||
for handler in logger.handlers[:]:
|
||||
handler.close()
|
||||
logger.handlers.clear()
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
# File Handler (optional, if configured)
|
||||
if app.config.get("LOG_TO_FILE"):
|
||||
log_file = Path(app.config["LOG_FILE"])
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -196,18 +196,21 @@ class AccessLoggingService:
|
||||
)
|
||||
|
||||
target_key = f"{config.target_bucket}:{config.target_prefix}"
|
||||
should_flush = False
|
||||
with self._buffer_lock:
|
||||
if target_key not in self._buffer:
|
||||
self._buffer[target_key] = []
|
||||
self._buffer[target_key].append(entry)
|
||||
should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
|
||||
|
||||
if len(self._buffer[target_key]) >= self.max_buffer_size:
|
||||
self._flush_buffer(target_key)
|
||||
if should_flush:
|
||||
self._flush_buffer(target_key)
|
||||
|
||||
def _flush_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
time.sleep(self.flush_interval)
|
||||
self._flush_all()
|
||||
self._shutdown.wait(timeout=self.flush_interval)
|
||||
if not self._shutdown.is_set():
|
||||
self._flush_all()
|
||||
|
||||
def _flush_all(self) -> None:
|
||||
with self._buffer_lock:
|
||||
|
||||
@@ -84,7 +84,7 @@ class AppConfig:
|
||||
return overrides.get(name, os.getenv(name, default))
|
||||
|
||||
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))
|
||||
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
|
||||
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
|
||||
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
|
||||
@@ -108,6 +108,10 @@ class AppConfig:
|
||||
try:
|
||||
secret_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
secret_file.write_text(generated)
|
||||
try:
|
||||
os.chmod(secret_file, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
secret_key = generated
|
||||
except OSError:
|
||||
secret_key = generated
|
||||
|
||||
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||
REPLICATION_CONNECT_TIMEOUT = 5
|
||||
REPLICATION_READ_TIMEOUT = 30
|
||||
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024 # 10 MiB - use streaming for larger files
|
||||
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024
|
||||
|
||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||
REPLICATION_MODE_ALL = "all"
|
||||
@@ -307,7 +307,6 @@ class ReplicationManager:
|
||||
if self._shutdown:
|
||||
return
|
||||
|
||||
# Re-check if rule is still enabled (may have been paused after task was submitted)
|
||||
current_rule = self.get_rule(bucket_name)
|
||||
if not current_rule or not current_rule.enabled:
|
||||
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
|
||||
@@ -358,7 +357,6 @@ class ReplicationManager:
|
||||
extra_args["ContentType"] = content_type
|
||||
|
||||
if file_size >= STREAMING_THRESHOLD_BYTES:
|
||||
# Use multipart upload for large files
|
||||
s3.upload_file(
|
||||
str(path),
|
||||
rule.target_bucket,
|
||||
@@ -366,7 +364,6 @@ class ReplicationManager:
|
||||
ExtraArgs=extra_args if extra_args else None,
|
||||
)
|
||||
else:
|
||||
# Read small files into memory
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
|
||||
@@ -25,7 +25,7 @@ from .iam import IamError, Principal
|
||||
from .notifications import NotificationService, NotificationConfiguration, WebhookDestination
|
||||
from .object_lock import ObjectLockService, ObjectLockRetention, ObjectLockConfig, ObjectLockError, RetentionMode
|
||||
from .replication import ReplicationManager
|
||||
from .storage import ObjectStorage, StorageError, QuotaExceededError
|
||||
from .storage import ObjectStorage, StorageError, QuotaExceededError, BucketNotFoundError, ObjectNotFoundError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -217,7 +217,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
if not hmac.compare_digest(calculated_signature, signature):
|
||||
# Only log detailed signature debug info if DEBUG_SIGV4 is enabled
|
||||
if current_app.config.get("DEBUG_SIGV4"):
|
||||
logger.warning(
|
||||
"SigV4 signature mismatch",
|
||||
@@ -260,7 +259,13 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
|
||||
raise IamError("Invalid Date format")
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
if now > req_time + timedelta(seconds=int(expires)):
|
||||
try:
|
||||
expires_seconds = int(expires)
|
||||
if expires_seconds <= 0:
|
||||
raise IamError("Invalid Expires value: must be positive")
|
||||
except ValueError:
|
||||
raise IamError("Invalid Expires value: must be an integer")
|
||||
if now > req_time + timedelta(seconds=expires_seconds):
|
||||
raise IamError("Request expired")
|
||||
|
||||
secret_key = _iam().get_secret_key(access_key)
|
||||
@@ -1036,21 +1041,23 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
|
||||
if request.method == "GET":
|
||||
try:
|
||||
tags = storage.get_object_tags(bucket_name, object_key)
|
||||
except BucketNotFoundError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
except ObjectNotFoundError as exc:
|
||||
return _error_response("NoSuchKey", str(exc), 404)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
return _error_response("InternalError", str(exc), 500)
|
||||
return _xml_response(_render_tagging_document(tags))
|
||||
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
storage.delete_object_tags(bucket_name, object_key)
|
||||
except BucketNotFoundError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
except ObjectNotFoundError as exc:
|
||||
return _error_response("NoSuchKey", str(exc), 404)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
return _error_response("InternalError", str(exc), 500)
|
||||
current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
|
||||
return Response(status=204)
|
||||
|
||||
@@ -1063,11 +1070,12 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
|
||||
return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
|
||||
try:
|
||||
storage.set_object_tags(bucket_name, object_key, tags)
|
||||
except BucketNotFoundError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
except ObjectNotFoundError as exc:
|
||||
return _error_response("NoSuchKey", str(exc), 404)
|
||||
except StorageError as exc:
|
||||
message = str(exc)
|
||||
if "Bucket" in message:
|
||||
return _error_response("NoSuchBucket", message, 404)
|
||||
return _error_response("NoSuchKey", message, 404)
|
||||
return _error_response("InternalError", str(exc), 500)
|
||||
current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
|
||||
return Response(status=204)
|
||||
|
||||
@@ -1283,7 +1291,10 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
|
||||
|
||||
prefix = request.args.get("prefix", "")
|
||||
delimiter = request.args.get("delimiter", "")
|
||||
max_keys = min(int(request.args.get("max-keys", 1000)), 1000)
|
||||
try:
|
||||
max_keys = max(1, min(int(request.args.get("max-keys", 1000)), 1000))
|
||||
except ValueError:
|
||||
return _error_response("InvalidArgument", "max-keys must be an integer", 400)
|
||||
key_marker = request.args.get("key-marker", "")
|
||||
|
||||
if prefix:
|
||||
@@ -1476,7 +1487,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
|
||||
expiration: dict = {}
|
||||
days_el = exp_el.find("{*}Days") or exp_el.find("Days")
|
||||
if days_el is not None and days_el.text:
|
||||
expiration["Days"] = int(days_el.text.strip())
|
||||
days_val = int(days_el.text.strip())
|
||||
if days_val <= 0:
|
||||
raise ValueError("Expiration Days must be a positive integer")
|
||||
expiration["Days"] = days_val
|
||||
date_el = exp_el.find("{*}Date") or exp_el.find("Date")
|
||||
if date_el is not None and date_el.text:
|
||||
expiration["Date"] = date_el.text.strip()
|
||||
@@ -1491,7 +1505,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
|
||||
nve: dict = {}
|
||||
days_el = nve_el.find("{*}NoncurrentDays") or nve_el.find("NoncurrentDays")
|
||||
if days_el is not None and days_el.text:
|
||||
nve["NoncurrentDays"] = int(days_el.text.strip())
|
||||
noncurrent_days = int(days_el.text.strip())
|
||||
if noncurrent_days <= 0:
|
||||
raise ValueError("NoncurrentDays must be a positive integer")
|
||||
nve["NoncurrentDays"] = noncurrent_days
|
||||
if nve:
|
||||
rule["NoncurrentVersionExpiration"] = nve
|
||||
|
||||
@@ -1500,7 +1517,10 @@ def _parse_lifecycle_config(payload: bytes) -> list:
|
||||
aimu: dict = {}
|
||||
days_el = aimu_el.find("{*}DaysAfterInitiation") or aimu_el.find("DaysAfterInitiation")
|
||||
if days_el is not None and days_el.text:
|
||||
aimu["DaysAfterInitiation"] = int(days_el.text.strip())
|
||||
days_after = int(days_el.text.strip())
|
||||
if days_after <= 0:
|
||||
raise ValueError("DaysAfterInitiation must be a positive integer")
|
||||
aimu["DaysAfterInitiation"] = days_after
|
||||
if aimu:
|
||||
rule["AbortIncompleteMultipartUpload"] = aimu
|
||||
|
||||
@@ -2086,7 +2106,10 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
list_type = request.args.get("list-type")
|
||||
prefix = request.args.get("prefix", "")
|
||||
delimiter = request.args.get("delimiter", "")
|
||||
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
|
||||
try:
|
||||
max_keys = max(1, min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000))
|
||||
except ValueError:
|
||||
return _error_response("InvalidArgument", "max-keys must be an integer", 400)
|
||||
|
||||
marker = request.args.get("marker", "") # ListObjects v1
|
||||
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
|
||||
@@ -2099,7 +2122,7 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
if continuation_token:
|
||||
try:
|
||||
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
|
||||
except Exception:
|
||||
except (ValueError, UnicodeDecodeError):
|
||||
effective_start = continuation_token
|
||||
elif start_after:
|
||||
effective_start = start_after
|
||||
@@ -2742,7 +2765,7 @@ class AwsChunkedDecoder:
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
self._read_buffer = bytearray() # Performance: Pre-allocated buffer
|
||||
self._read_buffer = bytearray()
|
||||
self.chunk_remaining = 0
|
||||
self.finished = False
|
||||
|
||||
@@ -2753,20 +2776,15 @@ class AwsChunkedDecoder:
|
||||
"""
|
||||
line = bytearray()
|
||||
while True:
|
||||
# Check if we have data in buffer
|
||||
if self._read_buffer:
|
||||
# Look for CRLF in buffer
|
||||
idx = self._read_buffer.find(b"\r\n")
|
||||
if idx != -1:
|
||||
# Found CRLF - extract line and update buffer
|
||||
line.extend(self._read_buffer[: idx + 2])
|
||||
del self._read_buffer[: idx + 2]
|
||||
return bytes(line)
|
||||
# No CRLF yet - consume entire buffer
|
||||
line.extend(self._read_buffer)
|
||||
self._read_buffer.clear()
|
||||
|
||||
# Read more data in larger chunks (64 bytes is enough for chunk headers)
|
||||
chunk = self.stream.read(64)
|
||||
if not chunk:
|
||||
return bytes(line) if line else b""
|
||||
@@ -2775,14 +2793,11 @@ class AwsChunkedDecoder:
|
||||
def _read_exact(self, n: int) -> bytes:
|
||||
"""Read exactly n bytes, using buffer first."""
|
||||
result = bytearray()
|
||||
# Use buffered data first
|
||||
if self._read_buffer:
|
||||
take = min(len(self._read_buffer), n)
|
||||
result.extend(self._read_buffer[:take])
|
||||
del self._read_buffer[:take]
|
||||
n -= take
|
||||
|
||||
# Read remaining directly from stream
|
||||
if n > 0:
|
||||
data = self.stream.read(n)
|
||||
if data:
|
||||
@@ -2794,7 +2809,7 @@ class AwsChunkedDecoder:
|
||||
if self.finished:
|
||||
return b""
|
||||
|
||||
result = bytearray() # Performance: Use bytearray for building result
|
||||
result = bytearray()
|
||||
while size == -1 or len(result) < size:
|
||||
if self.chunk_remaining > 0:
|
||||
to_read = self.chunk_remaining
|
||||
@@ -2828,7 +2843,6 @@ class AwsChunkedDecoder:
|
||||
|
||||
if chunk_size == 0:
|
||||
self.finished = True
|
||||
# Skip trailing headers
|
||||
while True:
|
||||
trailer = self._read_line()
|
||||
if trailer == b"\r\n" or not trailer:
|
||||
@@ -2969,10 +2983,11 @@ def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
||||
|
||||
try:
|
||||
_storage().abort_multipart_upload(bucket_name, upload_id)
|
||||
except BucketNotFoundError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
except StorageError as exc:
|
||||
if "Bucket does not exist" in str(exc):
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
|
||||
current_app.logger.warning(f"Error aborting multipart upload: {exc}")
|
||||
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
@@ -2984,13 +2999,15 @@ def resolve_principal():
|
||||
(request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
|
||||
g.principal = _verify_sigv4(request)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except IamError as exc:
|
||||
logger.debug(f"SigV4 authentication failed: {exc}")
|
||||
except (ValueError, KeyError) as exc:
|
||||
logger.debug(f"SigV4 parsing error: {exc}")
|
||||
|
||||
access_key = request.headers.get("X-Access-Key")
|
||||
secret_key = request.headers.get("X-Secret-Key")
|
||||
if access_key and secret_key:
|
||||
try:
|
||||
g.principal = _iam().authenticate(access_key, secret_key)
|
||||
except Exception:
|
||||
pass
|
||||
except IamError as exc:
|
||||
logger.debug(f"Header authentication failed: {exc}")
|
||||
|
||||
@@ -76,6 +76,14 @@ class StorageError(RuntimeError):
|
||||
"""Raised when the storage layer encounters an unrecoverable problem."""
|
||||
|
||||
|
||||
class BucketNotFoundError(StorageError):
|
||||
"""Raised when the bucket does not exist."""
|
||||
|
||||
|
||||
class ObjectNotFoundError(StorageError):
|
||||
"""Raised when the object does not exist."""
|
||||
|
||||
|
||||
class QuotaExceededError(StorageError):
|
||||
"""Raised when an operation would exceed bucket quota limits."""
|
||||
|
||||
@@ -106,7 +114,7 @@ class ListObjectsResult:
|
||||
objects: List[ObjectMeta]
|
||||
is_truncated: bool
|
||||
next_continuation_token: Optional[str]
|
||||
total_count: Optional[int] = None # Total objects in bucket (from stats cache)
|
||||
total_count: Optional[int] = None
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
@@ -130,22 +138,18 @@ class ObjectStorage:
|
||||
MULTIPART_MANIFEST = "manifest.json"
|
||||
BUCKET_CONFIG_FILE = ".bucket.json"
|
||||
KEY_INDEX_CACHE_TTL = 30
|
||||
OBJECT_CACHE_MAX_SIZE = 100 # Maximum number of buckets to cache
|
||||
OBJECT_CACHE_MAX_SIZE = 100
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self._ensure_system_roots()
|
||||
# LRU cache for object metadata with thread-safe access
|
||||
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
|
||||
self._cache_lock = threading.Lock() # Global lock for cache structure
|
||||
# Performance: Per-bucket locks to reduce contention
|
||||
self._cache_lock = threading.Lock()
|
||||
self._bucket_locks: Dict[str, threading.Lock] = {}
|
||||
# Cache version counter for detecting stale reads
|
||||
self._cache_version: Dict[str, int] = {}
|
||||
# Performance: Bucket config cache with TTL
|
||||
self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
|
||||
self._bucket_config_cache_ttl = 30.0 # 30 second TTL
|
||||
self._bucket_config_cache_ttl = 30.0
|
||||
|
||||
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
|
||||
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
|
||||
@@ -170,6 +174,11 @@ class ObjectStorage:
|
||||
def bucket_exists(self, bucket_name: str) -> bool:
|
||||
return self._bucket_path(bucket_name).exists()
|
||||
|
||||
def _require_bucket_exists(self, bucket_path: Path) -> None:
|
||||
"""Raise BucketNotFoundError if bucket does not exist."""
|
||||
if not bucket_path.exists():
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
|
||||
def _validate_bucket_name(self, bucket_name: str) -> None:
|
||||
if len(bucket_name) < 3 or len(bucket_name) > 63:
|
||||
raise StorageError("Bucket name must be between 3 and 63 characters")
|
||||
@@ -188,14 +197,14 @@ class ObjectStorage:
|
||||
|
||||
def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
|
||||
"""Return object count and total size for the bucket (cached).
|
||||
|
||||
|
||||
Args:
|
||||
bucket_name: Name of the bucket
|
||||
cache_ttl: Cache time-to-live in seconds (default 60)
|
||||
"""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
|
||||
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
|
||||
if cache_path.exists():
|
||||
@@ -257,8 +266,7 @@ class ObjectStorage:
|
||||
def delete_bucket(self, bucket_name: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
# Performance: Single check instead of three separate traversals
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
|
||||
if has_objects:
|
||||
raise StorageError("Bucket not empty")
|
||||
@@ -291,7 +299,7 @@ class ObjectStorage:
|
||||
"""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
|
||||
object_cache = self._get_object_cache(bucket_id, bucket_path)
|
||||
@@ -352,7 +360,7 @@ class ObjectStorage:
|
||||
) -> ObjectMeta:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
@@ -409,7 +417,6 @@ class ObjectStorage:
|
||||
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
|
||||
# Performance: Lazy update - only update the affected key instead of invalidating whole cache
|
||||
obj_meta = ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
size=stat.st_size,
|
||||
@@ -424,7 +431,7 @@ class ObjectStorage:
|
||||
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
|
||||
path = self._object_path(bucket_name, object_key)
|
||||
if not path.exists():
|
||||
raise StorageError("Object not found")
|
||||
raise ObjectNotFoundError("Object not found")
|
||||
return path
|
||||
|
||||
def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
|
||||
@@ -467,7 +474,6 @@ class ObjectStorage:
|
||||
self._delete_metadata(bucket_id, rel)
|
||||
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
|
||||
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
|
||||
self._cleanup_empty_parents(path, bucket_path)
|
||||
|
||||
@@ -490,14 +496,13 @@ class ObjectStorage:
|
||||
shutil.rmtree(legacy_version_dir, ignore_errors=True)
|
||||
|
||||
self._invalidate_bucket_stats_cache(bucket_id)
|
||||
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
|
||||
self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
|
||||
self._cleanup_empty_parents(target, bucket_path)
|
||||
|
||||
def is_versioning_enabled(self, bucket_name: str) -> bool:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
return self._is_versioning_enabled(bucket_path)
|
||||
|
||||
def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
|
||||
@@ -689,11 +694,11 @@ class ObjectStorage:
|
||||
"""Get tags for an object."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
object_path = bucket_path / safe_key
|
||||
if not object_path.exists():
|
||||
raise StorageError("Object does not exist")
|
||||
raise ObjectNotFoundError("Object does not exist")
|
||||
|
||||
for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
|
||||
if not meta_file.exists():
|
||||
@@ -712,11 +717,11 @@ class ObjectStorage:
|
||||
"""Set tags for an object."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
object_path = bucket_path / safe_key
|
||||
if not object_path.exists():
|
||||
raise StorageError("Object does not exist")
|
||||
raise ObjectNotFoundError("Object does not exist")
|
||||
|
||||
meta_file = self._metadata_file(bucket_path.name, safe_key)
|
||||
|
||||
@@ -750,7 +755,7 @@ class ObjectStorage:
|
||||
def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
@@ -774,7 +779,7 @@ class ObjectStorage:
|
||||
def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
@@ -811,7 +816,7 @@ class ObjectStorage:
|
||||
def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
@@ -834,7 +839,7 @@ class ObjectStorage:
|
||||
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
|
||||
if not any(root.exists() for root in version_roots):
|
||||
@@ -902,7 +907,7 @@ class ObjectStorage:
|
||||
) -> str:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
upload_id = uuid.uuid4().hex
|
||||
@@ -929,8 +934,8 @@ class ObjectStorage:
|
||||
|
||||
Uses file locking to safely update the manifest and handle concurrent uploads.
|
||||
"""
|
||||
if part_number < 1:
|
||||
raise StorageError("part_number must be >= 1")
|
||||
if part_number < 1 or part_number > 10000:
|
||||
raise StorageError("part_number must be between 1 and 10000")
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
|
||||
upload_root = self._multipart_dir(bucket_path.name, upload_id)
|
||||
@@ -939,7 +944,6 @@ class ObjectStorage:
|
||||
if not upload_root.exists():
|
||||
raise StorageError("Multipart upload not found")
|
||||
|
||||
# Write part to temporary file first, then rename atomically
|
||||
checksum = hashlib.md5()
|
||||
part_filename = f"part-{part_number:05d}.part"
|
||||
part_path = upload_root / part_filename
|
||||
@@ -948,11 +952,8 @@ class ObjectStorage:
|
||||
try:
|
||||
with temp_path.open("wb") as target:
|
||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||
|
||||
# Atomic rename (or replace on Windows)
|
||||
temp_path.replace(part_path)
|
||||
except OSError:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
@@ -968,7 +969,6 @@ class ObjectStorage:
|
||||
manifest_path = upload_root / self.MULTIPART_MANIFEST
|
||||
lock_path = upload_root / ".manifest.lock"
|
||||
|
||||
# Retry loop for handling transient lock/read failures
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
@@ -1151,10 +1151,10 @@ class ObjectStorage:
|
||||
"""List all active multipart uploads for a bucket."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
raise BucketNotFoundError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
uploads = []
|
||||
multipart_root = self._bucket_multipart_root(bucket_id)
|
||||
multipart_root = self._multipart_bucket_root(bucket_id)
|
||||
if multipart_root.exists():
|
||||
for upload_dir in multipart_root.iterdir():
|
||||
if not upload_dir.is_dir():
|
||||
@@ -1171,7 +1171,7 @@ class ObjectStorage:
|
||||
})
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
legacy_root = self._legacy_multipart_root(bucket_id)
|
||||
legacy_root = self._legacy_multipart_bucket_root(bucket_id)
|
||||
if legacy_root.exists():
|
||||
for upload_dir in legacy_root.iterdir():
|
||||
if not upload_dir.is_dir():
|
||||
@@ -1394,7 +1394,6 @@ class ObjectStorage:
|
||||
"""
|
||||
now = time.time()
|
||||
|
||||
# Quick check with global lock (brief)
|
||||
with self._cache_lock:
|
||||
cached = self._object_cache.get(bucket_id)
|
||||
if cached:
|
||||
@@ -1404,10 +1403,8 @@ class ObjectStorage:
|
||||
return objects
|
||||
cache_version = self._cache_version.get(bucket_id, 0)
|
||||
|
||||
# Use per-bucket lock for cache building (allows parallel builds for different buckets)
|
||||
bucket_lock = self._get_bucket_lock(bucket_id)
|
||||
with bucket_lock:
|
||||
# Double-check cache after acquiring per-bucket lock
|
||||
with self._cache_lock:
|
||||
cached = self._object_cache.get(bucket_id)
|
||||
if cached:
|
||||
@@ -1415,17 +1412,12 @@ class ObjectStorage:
|
||||
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
|
||||
self._object_cache.move_to_end(bucket_id)
|
||||
return objects
|
||||
|
||||
# Build cache with per-bucket lock held (prevents duplicate work)
|
||||
objects = self._build_object_cache(bucket_path)
|
||||
|
||||
with self._cache_lock:
|
||||
# Check if cache was invalidated while we were building
|
||||
current_version = self._cache_version.get(bucket_id, 0)
|
||||
if current_version != cache_version:
|
||||
objects = self._build_object_cache(bucket_path)
|
||||
|
||||
# Evict oldest entries if cache is full
|
||||
while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
|
||||
self._object_cache.popitem(last=False)
|
||||
|
||||
@@ -1459,12 +1451,9 @@ class ObjectStorage:
|
||||
if cached:
|
||||
objects, timestamp = cached
|
||||
if meta is None:
|
||||
# Delete operation - remove key from cache
|
||||
objects.pop(key, None)
|
||||
else:
|
||||
# Put operation - update/add key in cache
|
||||
objects[key] = meta
|
||||
# Keep same timestamp - don't reset TTL for single key updates
|
||||
|
||||
def _ensure_system_roots(self) -> None:
|
||||
for path in (
|
||||
@@ -1485,13 +1474,12 @@ class ObjectStorage:
|
||||
return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
|
||||
|
||||
def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
|
||||
# Performance: Check cache first
|
||||
now = time.time()
|
||||
cached = self._bucket_config_cache.get(bucket_name)
|
||||
if cached:
|
||||
config, cached_time = cached
|
||||
if now - cached_time < self._bucket_config_cache_ttl:
|
||||
return config.copy() # Return copy to prevent mutation
|
||||
return config.copy()
|
||||
|
||||
config_path = self._bucket_config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
@@ -1510,7 +1498,6 @@ class ObjectStorage:
|
||||
config_path = self._bucket_config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
# Performance: Update cache immediately after write
|
||||
self._bucket_config_cache[bucket_name] = (payload.copy(), time.time())
|
||||
|
||||
def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
|
||||
@@ -1636,7 +1623,6 @@ class ObjectStorage:
|
||||
def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
|
||||
"""Check bucket for objects, versions, and multipart uploads in a single pass.
|
||||
|
||||
Performance optimization: Combines three separate rglob traversals into one.
|
||||
Returns (has_visible_objects, has_archived_versions, has_active_multipart_uploads).
|
||||
Uses early exit when all three are found.
|
||||
"""
|
||||
@@ -1645,7 +1631,6 @@ class ObjectStorage:
|
||||
has_multipart = False
|
||||
bucket_name = bucket_path.name
|
||||
|
||||
# Check visible objects in bucket
|
||||
for path in bucket_path.rglob("*"):
|
||||
if has_objects:
|
||||
break
|
||||
@@ -1656,7 +1641,6 @@ class ObjectStorage:
|
||||
continue
|
||||
has_objects = True
|
||||
|
||||
# Check archived versions (only if needed)
|
||||
for version_root in (
|
||||
self._bucket_versions_root(bucket_name),
|
||||
self._legacy_versions_root(bucket_name),
|
||||
@@ -1669,7 +1653,6 @@ class ObjectStorage:
|
||||
has_versions = True
|
||||
break
|
||||
|
||||
# Check multipart uploads (only if needed)
|
||||
for uploads_root in (
|
||||
self._multipart_bucket_root(bucket_name),
|
||||
self._legacy_multipart_bucket_root(bucket_name),
|
||||
@@ -1703,7 +1686,7 @@ class ObjectStorage:
|
||||
try:
|
||||
os.chmod(target_path, stat.S_IRWXU)
|
||||
func(target_path)
|
||||
except Exception as exc: # pragma: no cover - fallback failure
|
||||
except Exception as exc:
|
||||
raise StorageError(f"Unable to delete bucket contents: {exc}") from exc
|
||||
|
||||
try:
|
||||
|
||||
19
app/ui.py
19
app/ui.py
@@ -371,7 +371,7 @@ def bucket_detail(bucket_name: str):
|
||||
kms_keys = kms_manager.list_keys() if kms_manager else []
|
||||
kms_enabled = current_app.config.get("KMS_ENABLED", False)
|
||||
encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
|
||||
can_manage_encryption = can_manage_versioning # Same as other bucket properties
|
||||
can_manage_encryption = can_manage_versioning
|
||||
|
||||
bucket_quota = storage.get_bucket_quota(bucket_name)
|
||||
bucket_stats = storage.bucket_stats(bucket_name)
|
||||
@@ -450,8 +450,6 @@ def list_bucket_objects(bucket_name: str):
|
||||
except StorageError:
|
||||
versioning_enabled = False
|
||||
|
||||
# Pre-compute URL templates once (not per-object) for performance
|
||||
# Frontend will construct actual URLs by replacing KEY_PLACEHOLDER
|
||||
preview_template = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
delete_template = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
presign_template = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
@@ -527,8 +525,6 @@ def upload_object(bucket_name: str):
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "write")
|
||||
_storage().put_object(bucket_name, object_key, file.stream, metadata=metadata)
|
||||
|
||||
# Trigger replication
|
||||
_replication().trigger_replication(bucket_name, object_key)
|
||||
|
||||
message = f"Uploaded '{object_key}'"
|
||||
@@ -765,20 +761,18 @@ def bulk_download_objects(bucket_name: str):
|
||||
if not cleaned:
|
||||
return jsonify({"error": "Select at least one object to download"}), 400
|
||||
|
||||
MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) # Reuse same limit for now
|
||||
MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)
|
||||
if len(cleaned) > MAX_KEYS:
|
||||
return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400
|
||||
|
||||
unique_keys = list(dict.fromkeys(cleaned))
|
||||
storage = _storage()
|
||||
|
||||
# Verify permission to read bucket contents
|
||||
try:
|
||||
_authorize_ui(principal, bucket_name, "read")
|
||||
except IamError as exc:
|
||||
return jsonify({"error": str(exc)}), 403
|
||||
|
||||
# Create ZIP archive of selected objects
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for key in unique_keys:
|
||||
@@ -795,7 +789,6 @@ def bulk_download_objects(bucket_name: str):
|
||||
path = storage.get_object_path(bucket_name, key)
|
||||
zf.write(path, arcname=key)
|
||||
except (StorageError, IamError):
|
||||
# Skip objects that can't be accessed
|
||||
continue
|
||||
|
||||
buffer.seek(0)
|
||||
@@ -846,7 +839,6 @@ def object_preview(bucket_name: str, object_key: str) -> Response:
|
||||
|
||||
download = request.args.get("download") == "1"
|
||||
|
||||
# Check if object is encrypted and needs decryption
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
if is_encrypted and hasattr(storage, 'get_object_data'):
|
||||
try:
|
||||
@@ -882,7 +874,6 @@ def object_presign(bucket_name: str, object_key: str):
|
||||
encoded_key = quote(object_key, safe="/")
|
||||
url = f"{api_base}/presign/{bucket_name}/{encoded_key}"
|
||||
|
||||
# Use API base URL for forwarded headers so presigned URLs point to API, not UI
|
||||
parsed_api = urlparse(api_base)
|
||||
headers = _api_headers()
|
||||
headers["X-Forwarded-Host"] = parsed_api.netloc or "127.0.0.1:5000"
|
||||
@@ -1027,7 +1018,6 @@ def update_bucket_quota(bucket_name: str):
|
||||
"""Update bucket quota configuration (admin only)."""
|
||||
principal = _current_principal()
|
||||
|
||||
# Quota management is admin-only
|
||||
is_admin = False
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:list_users")
|
||||
@@ -1049,7 +1039,6 @@ def update_bucket_quota(bucket_name: str):
|
||||
flash(_friendly_error_message(exc), "danger")
|
||||
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
|
||||
|
||||
# Parse quota values
|
||||
max_mb_str = request.form.get("max_mb", "").strip()
|
||||
max_objects_str = request.form.get("max_objects", "").strip()
|
||||
|
||||
@@ -1061,7 +1050,7 @@ def update_bucket_quota(bucket_name: str):
|
||||
max_mb = int(max_mb_str)
|
||||
if max_mb < 1:
|
||||
raise ValueError("Size must be at least 1 MB")
|
||||
max_bytes = max_mb * 1024 * 1024 # Convert MB to bytes
|
||||
max_bytes = max_mb * 1024 * 1024
|
||||
except ValueError as exc:
|
||||
flash(f"Invalid size value: {exc}", "danger")
|
||||
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
|
||||
@@ -1114,7 +1103,6 @@ def update_bucket_encryption(bucket_name: str):
|
||||
flash("Invalid encryption algorithm", "danger")
|
||||
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
|
||||
|
||||
# Build encryption configuration in AWS S3 format
|
||||
encryption_config: dict[str, Any] = {
|
||||
"Rules": [
|
||||
{
|
||||
@@ -1505,7 +1493,6 @@ def update_bucket_replication(bucket_name: str):
|
||||
if rule:
|
||||
rule.enabled = True
|
||||
_replication().set_rule(rule)
|
||||
# When resuming, sync any pending objects that accumulated while paused
|
||||
if rule.mode == REPLICATION_MODE_ALL:
|
||||
_replication().replicate_existing_objects(bucket_name)
|
||||
flash("Replication resumed. Syncing pending objects in background.", "success")
|
||||
|
||||
Reference in New Issue
Block a user