MyFSIO v0.1.9 Release #10
@@ -9,7 +9,7 @@ import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
@@ -24,11 +24,42 @@ logger = logging.getLogger(__name__)
|
||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||
REPLICATION_CONNECT_TIMEOUT = 5
|
||||
REPLICATION_READ_TIMEOUT = 30
|
||||
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024 # 10 MiB - use streaming for larger files
|
||||
|
||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||
REPLICATION_MODE_ALL = "all"
|
||||
|
||||
|
||||
def _create_s3_client(connection: RemoteConnection, *, health_check: bool = False) -> Any:
|
||||
"""Create a boto3 S3 client for the given connection.
|
||||
|
||||
Args:
|
||||
connection: Remote S3 connection configuration
|
||||
health_check: If True, use minimal retries for quick health checks
|
||||
|
||||
Returns:
|
||||
Configured boto3 S3 client
|
||||
"""
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||
retries={'max_attempts': 1 if health_check else 2},
|
||||
signature_version='s3v4',
|
||||
s3={'addressing_style': 'path'},
|
||||
request_checksum_calculation='when_required',
|
||||
response_checksum_validation='when_required',
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region or 'us-east-1',
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationStats:
|
||||
"""Statistics for replication operations - computed dynamically."""
|
||||
@@ -102,8 +133,19 @@ class ReplicationManager:
|
||||
self._rules: Dict[str, ReplicationRule] = {}
|
||||
self._stats_lock = threading.Lock()
|
||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||
self._shutdown = False
|
||||
self.reload_rules()
|
||||
|
||||
def shutdown(self, wait: bool = True) -> None:
|
||||
"""Shutdown the replication executor gracefully.
|
||||
|
||||
Args:
|
||||
wait: If True, wait for pending tasks to complete
|
||||
"""
|
||||
self._shutdown = True
|
||||
self._executor.shutdown(wait=wait)
|
||||
logger.info("Replication manager shut down")
|
||||
|
||||
def reload_rules(self) -> None:
|
||||
if not self.rules_path.exists():
|
||||
self._rules = {}
|
||||
@@ -129,20 +171,7 @@ class ReplicationManager:
|
||||
Uses short timeouts to prevent blocking.
|
||||
"""
|
||||
try:
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||
retries={'max_attempts': 1}
|
||||
)
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
config=config,
|
||||
)
|
||||
s3 = _create_s3_client(connection, health_check=True)
|
||||
s3.list_buckets()
|
||||
return True
|
||||
except Exception as e:
|
||||
@@ -185,13 +214,7 @@ class ReplicationManager:
|
||||
source_objects = self.storage.list_objects_all(bucket_name)
|
||||
source_keys = {obj.key: obj.size for obj in source_objects}
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
)
|
||||
s3 = _create_s3_client(connection)
|
||||
|
||||
dest_keys = set()
|
||||
bytes_synced = 0
|
||||
@@ -257,13 +280,7 @@ class ReplicationManager:
|
||||
raise ValueError(f"Connection {connection_id} not found")
|
||||
|
||||
try:
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
)
|
||||
s3 = _create_s3_client(connection)
|
||||
s3.create_bucket(Bucket=bucket_name)
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
|
||||
@@ -286,6 +303,9 @@ class ReplicationManager:
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
|
||||
|
||||
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
|
||||
if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
|
||||
logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
|
||||
return
|
||||
@@ -297,30 +317,8 @@ class ReplicationManager:
|
||||
logger.error(f"Object key validation failed in replication: {e}")
|
||||
return
|
||||
|
||||
file_size = 0
|
||||
try:
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||
retries={'max_attempts': 2},
|
||||
signature_version='s3v4',
|
||||
s3={
|
||||
'addressing_style': 'path',
|
||||
},
|
||||
# Disable SDK automatic checksums - they cause SignatureDoesNotMatch errors
|
||||
# with S3-compatible servers that don't support CRC32 checksum headers
|
||||
request_checksum_calculation='when_required',
|
||||
response_checksum_validation='when_required',
|
||||
)
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=conn.endpoint_url,
|
||||
aws_access_key_id=conn.access_key,
|
||||
aws_secret_access_key=conn.secret_key,
|
||||
region_name=conn.region or 'us-east-1',
|
||||
config=config,
|
||||
)
|
||||
s3 = _create_s3_client(conn)
|
||||
|
||||
if action == "delete":
|
||||
try:
|
||||
@@ -337,34 +335,42 @@ class ReplicationManager:
|
||||
logger.error(f"Source object not found: {bucket_name}/{object_key}")
|
||||
return
|
||||
|
||||
# Don't replicate metadata - destination server will generate its own
|
||||
# __etag__ and __size__. Replicating them causes signature mismatches when they have None/empty values.
|
||||
|
||||
content_type, _ = mimetypes.guess_type(path)
|
||||
file_size = path.stat().st_size
|
||||
|
||||
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
|
||||
|
||||
def do_put_object() -> None:
|
||||
"""Helper to upload object.
|
||||
def do_upload() -> None:
|
||||
"""Upload object using appropriate method based on file size.
|
||||
|
||||
Reads the file content into memory first to avoid signature calculation
|
||||
issues with certain binary file types (like GIFs) when streaming.
|
||||
Do NOT set ContentLength explicitly - boto3 calculates it from the bytes
|
||||
and setting it manually can cause SignatureDoesNotMatch errors.
|
||||
For small files (< 10 MiB): Read into memory for simpler handling
|
||||
For large files: Use streaming upload to avoid memory issues
|
||||
"""
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
"Key": object_key,
|
||||
"Body": file_content,
|
||||
}
|
||||
extra_args = {}
|
||||
if content_type:
|
||||
put_kwargs["ContentType"] = content_type
|
||||
s3.put_object(**put_kwargs)
|
||||
extra_args["ContentType"] = content_type
|
||||
|
||||
if file_size >= STREAMING_THRESHOLD_BYTES:
|
||||
# Use multipart upload for large files
|
||||
s3.upload_file(
|
||||
str(path),
|
||||
rule.target_bucket,
|
||||
object_key,
|
||||
ExtraArgs=extra_args if extra_args else None,
|
||||
)
|
||||
else:
|
||||
# Read small files into memory
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
"Key": object_key,
|
||||
"Body": file_content,
|
||||
**extra_args,
|
||||
}
|
||||
s3.put_object(**put_kwargs)
|
||||
|
||||
try:
|
||||
do_put_object()
|
||||
do_upload()
|
||||
except (ClientError, S3UploadFailedError) as e:
|
||||
error_code = None
|
||||
if isinstance(e, ClientError):
|
||||
@@ -389,7 +395,7 @@ class ReplicationManager:
|
||||
raise e
|
||||
|
||||
if bucket_ready:
|
||||
do_put_object()
|
||||
do_upload()
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
"""Flask blueprint exposing a subset of the S3 REST API."""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import mimetypes
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import quote, urlencode, urlparse, unquote
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError
|
||||
|
||||
@@ -20,6 +22,8 @@ from .iam import IamError, Principal
|
||||
from .replication import ReplicationManager
|
||||
from .storage import ObjectStorage, StorageError, QuotaExceededError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
s3_api_bp = Blueprint("s3_api", __name__)
|
||||
|
||||
def _storage() -> ObjectStorage:
|
||||
@@ -118,6 +122,9 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
||||
if header_val is None:
|
||||
header_val = ""
|
||||
|
||||
if header.lower() == 'expect' and header_val == "":
|
||||
header_val = "100-continue"
|
||||
|
||||
header_val = " ".join(header_val.split())
|
||||
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
|
||||
canonical_headers = "".join(canonical_headers_parts)
|
||||
@@ -128,15 +135,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
||||
|
||||
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
|
||||
|
||||
# Debug logging for signature issues
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.debug(f"SigV4 Debug - Method: {method}, URI: {canonical_uri}")
|
||||
logger.debug(f"SigV4 Debug - Payload hash from header: {req.headers.get('X-Amz-Content-Sha256')}")
|
||||
logger.debug(f"SigV4 Debug - Signed headers: {signed_headers_str}")
|
||||
logger.debug(f"SigV4 Debug - Content-Type: {req.headers.get('Content-Type')}")
|
||||
logger.debug(f"SigV4 Debug - Content-Length: {req.headers.get('Content-Length')}")
|
||||
|
||||
amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
|
||||
if not amz_date:
|
||||
raise IamError("Missing Date header")
|
||||
@@ -167,24 +165,18 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
if not hmac.compare_digest(calculated_signature, signature):
|
||||
# Debug logging for signature mismatch
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error(f"Signature mismatch for {req.path}")
|
||||
logger.error(f" Content-Type: {req.headers.get('Content-Type')}")
|
||||
logger.error(f" Content-Length: {req.headers.get('Content-Length')}")
|
||||
logger.error(f" X-Amz-Content-Sha256: {req.headers.get('X-Amz-Content-Sha256')}")
|
||||
logger.error(f" Canonical URI: {canonical_uri}")
|
||||
logger.error(f" Signed headers: {signed_headers_str}")
|
||||
# Log each signed header's value
|
||||
for h in signed_headers_list:
|
||||
logger.error(f" Header '{h}': {repr(req.headers.get(h))}")
|
||||
logger.error(f" Expected sig: {signature[:16]}...")
|
||||
logger.error(f" Calculated sig: {calculated_signature[:16]}...")
|
||||
# Log first part of canonical request to compare
|
||||
logger.error(f" Canonical request hash: {hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()[:16]}...")
|
||||
# Log the full canonical request for debugging
|
||||
logger.error(f" Canonical request:\n{canonical_request[:500]}...")
|
||||
# Only log detailed signature debug info if DEBUG_SIGV4 is enabled
|
||||
if current_app.config.get("DEBUG_SIGV4"):
|
||||
logger.warning(
|
||||
"SigV4 signature mismatch",
|
||||
extra={
|
||||
"path": req.path,
|
||||
"method": method,
|
||||
"signed_headers": signed_headers_str,
|
||||
"content_type": req.headers.get("Content-Type"),
|
||||
"content_length": req.headers.get("Content-Length"),
|
||||
}
|
||||
)
|
||||
raise IamError("SignatureDoesNotMatch")
|
||||
|
||||
return _iam().get_principal(access_key)
|
||||
@@ -236,6 +228,8 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
|
||||
canonical_headers_parts = []
|
||||
for header in signed_headers_list:
|
||||
val = req.headers.get(header, "").strip()
|
||||
if header.lower() == 'expect' and val == "":
|
||||
val = "100-continue"
|
||||
val = " ".join(val.split())
|
||||
canonical_headers_parts.append(f"{header}:{val}\n")
|
||||
canonical_headers = "".join(canonical_headers_parts)
|
||||
@@ -569,6 +563,28 @@ def _strip_ns(tag: str | None) -> str:
|
||||
return tag.split("}")[-1]
|
||||
|
||||
|
||||
def _find_element(parent: Element, name: str) -> Optional[Element]:
|
||||
"""Find a child element by name, trying both namespaced and non-namespaced variants.
|
||||
|
||||
This handles XML documents that may or may not include namespace prefixes.
|
||||
"""
|
||||
el = parent.find(f"{{*}}{name}")
|
||||
if el is None:
|
||||
el = parent.find(name)
|
||||
return el
|
||||
|
||||
|
||||
def _find_element_text(parent: Element, name: str, default: str = "") -> str:
|
||||
"""Find a child element and return its text content.
|
||||
|
||||
Returns the default value if element not found or has no text.
|
||||
"""
|
||||
el = _find_element(parent, name)
|
||||
if el is None or el.text is None:
|
||||
return default
|
||||
return el.text.strip()
|
||||
|
||||
|
||||
def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
|
||||
try:
|
||||
root = fromstring(payload)
|
||||
@@ -585,17 +601,11 @@ def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
|
||||
for tag_el in list(tagset):
|
||||
if _strip_ns(tag_el.tag) != "Tag":
|
||||
continue
|
||||
key_el = tag_el.find("{*}Key")
|
||||
if key_el is None:
|
||||
key_el = tag_el.find("Key")
|
||||
value_el = tag_el.find("{*}Value")
|
||||
if value_el is None:
|
||||
value_el = tag_el.find("Value")
|
||||
key = (key_el.text or "").strip() if key_el is not None else ""
|
||||
key = _find_element_text(tag_el, "Key")
|
||||
if not key:
|
||||
continue
|
||||
value = value_el.text if value_el is not None else ""
|
||||
tags.append({"Key": key, "Value": value or ""})
|
||||
value = _find_element_text(tag_el, "Value")
|
||||
tags.append({"Key": key, "Value": value})
|
||||
return tags
|
||||
|
||||
|
||||
@@ -1439,7 +1449,7 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
|
||||
|
||||
if request.method == "DELETE":
|
||||
try:
|
||||
storage.set_bucket_quota(bucket_name, max_size_bytes=None, max_objects=None)
|
||||
storage.set_bucket_quota(bucket_name, max_bytes=None, max_objects=None)
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name})
|
||||
@@ -1473,7 +1483,7 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
|
||||
return _error_response("InvalidArgument", f"max_objects {exc}", 400)
|
||||
|
||||
try:
|
||||
storage.set_bucket_quota(bucket_name, max_size_bytes=max_size_bytes, max_objects=max_objects)
|
||||
storage.set_bucket_quota(bucket_name, max_bytes=max_size_bytes, max_objects=max_objects)
|
||||
except StorageError as exc:
|
||||
return _error_response("NoSuchBucket", str(exc), 404)
|
||||
|
||||
@@ -1665,7 +1675,6 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
effective_start = ""
|
||||
if list_type == "2":
|
||||
if continuation_token:
|
||||
import base64
|
||||
try:
|
||||
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
|
||||
except Exception:
|
||||
@@ -1722,7 +1731,6 @@ def bucket_handler(bucket_name: str) -> Response:
|
||||
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
||||
|
||||
if list_type == "2" and next_marker:
|
||||
import base64
|
||||
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
||||
|
||||
if list_type == "2":
|
||||
|
||||
113
app/storage.py
113
app/storage.py
@@ -7,9 +7,11 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import stat
|
||||
import threading
|
||||
import time
|
||||
import unicodedata
|
||||
import uuid
|
||||
from collections import OrderedDict
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
@@ -129,12 +131,17 @@ class ObjectStorage:
|
||||
MULTIPART_MANIFEST = "manifest.json"
|
||||
BUCKET_CONFIG_FILE = ".bucket.json"
|
||||
KEY_INDEX_CACHE_TTL = 30
|
||||
OBJECT_CACHE_MAX_SIZE = 100 # Maximum number of buckets to cache
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self._ensure_system_roots()
|
||||
self._object_cache: Dict[str, tuple[Dict[str, ObjectMeta], float]] = {}
|
||||
# LRU cache for object metadata with thread-safe access
|
||||
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
|
||||
self._cache_lock = threading.Lock()
|
||||
# Cache version counter for detecting stale reads
|
||||
self._cache_version: Dict[str, int] = {}
|
||||
|
||||
def list_buckets(self) -> List[BucketMeta]:
|
||||
buckets: List[BucketMeta] = []
|
||||
@@ -729,8 +736,6 @@ class ObjectStorage:
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
version_dir = self._legacy_version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
version_dir = self._legacy_version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
@@ -879,6 +884,10 @@ class ObjectStorage:
|
||||
part_number: int,
|
||||
stream: BinaryIO,
|
||||
) -> str:
|
||||
"""Upload a part for a multipart upload.
|
||||
|
||||
Uses file locking to safely update the manifest and handle concurrent uploads.
|
||||
"""
|
||||
if part_number < 1:
|
||||
raise StorageError("part_number must be >= 1")
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
@@ -889,11 +898,26 @@ class ObjectStorage:
|
||||
if not upload_root.exists():
|
||||
raise StorageError("Multipart upload not found")
|
||||
|
||||
# Write part to temporary file first, then rename atomically
|
||||
checksum = hashlib.md5()
|
||||
part_filename = f"part-{part_number:05d}.part"
|
||||
part_path = upload_root / part_filename
|
||||
with part_path.open("wb") as target:
|
||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||
temp_path = upload_root / f".{part_filename}.tmp"
|
||||
|
||||
try:
|
||||
with temp_path.open("wb") as target:
|
||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||
|
||||
# Atomic rename (or replace on Windows)
|
||||
temp_path.replace(part_path)
|
||||
except OSError:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
record = {
|
||||
"etag": checksum.hexdigest(),
|
||||
"size": part_path.stat().st_size,
|
||||
@@ -903,16 +927,29 @@ class ObjectStorage:
|
||||
manifest_path = upload_root / self.MULTIPART_MANIFEST
|
||||
lock_path = upload_root / ".manifest.lock"
|
||||
|
||||
with lock_path.open("w") as lock_file:
|
||||
with _file_lock(lock_file):
|
||||
try:
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
raise StorageError("Multipart manifest unreadable") from exc
|
||||
# Retry loop for handling transient lock/read failures
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
with lock_path.open("w") as lock_file:
|
||||
with _file_lock(lock_file):
|
||||
try:
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(0.1 * (attempt + 1))
|
||||
continue
|
||||
raise StorageError("Multipart manifest unreadable") from exc
|
||||
|
||||
parts = manifest.setdefault("parts", {})
|
||||
parts[str(part_number)] = record
|
||||
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
|
||||
parts = manifest.setdefault("parts", {})
|
||||
parts[str(part_number)] = record
|
||||
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
|
||||
break
|
||||
except OSError as exc:
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(0.1 * (attempt + 1))
|
||||
continue
|
||||
raise StorageError(f"Failed to update multipart manifest: {exc}") from exc
|
||||
|
||||
return record["etag"]
|
||||
|
||||
@@ -1264,22 +1301,52 @@ class ObjectStorage:
|
||||
return objects
|
||||
|
||||
def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]:
|
||||
"""Get cached object metadata for a bucket, refreshing if stale."""
|
||||
"""Get cached object metadata for a bucket, refreshing if stale.
|
||||
|
||||
Uses LRU eviction to prevent unbounded cache growth.
|
||||
Thread-safe with version tracking to detect concurrent invalidations.
|
||||
"""
|
||||
now = time.time()
|
||||
cached = self._object_cache.get(bucket_id)
|
||||
|
||||
if cached:
|
||||
objects, timestamp = cached
|
||||
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
|
||||
return objects
|
||||
with self._cache_lock:
|
||||
cached = self._object_cache.get(bucket_id)
|
||||
cache_version = self._cache_version.get(bucket_id, 0)
|
||||
|
||||
if cached:
|
||||
objects, timestamp = cached
|
||||
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
|
||||
# Move to end (most recently used)
|
||||
self._object_cache.move_to_end(bucket_id)
|
||||
return objects
|
||||
|
||||
# Build cache outside lock to avoid holding lock during I/O
|
||||
objects = self._build_object_cache(bucket_path)
|
||||
self._object_cache[bucket_id] = (objects, now)
|
||||
|
||||
with self._cache_lock:
|
||||
# Check if cache was invalidated while we were building
|
||||
current_version = self._cache_version.get(bucket_id, 0)
|
||||
if current_version != cache_version:
|
||||
# Cache was invalidated, rebuild
|
||||
objects = self._build_object_cache(bucket_path)
|
||||
|
||||
# Evict oldest entries if cache is full
|
||||
while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
|
||||
self._object_cache.popitem(last=False)
|
||||
|
||||
self._object_cache[bucket_id] = (objects, time.time())
|
||||
self._object_cache.move_to_end(bucket_id)
|
||||
|
||||
return objects
|
||||
|
||||
def _invalidate_object_cache(self, bucket_id: str) -> None:
|
||||
"""Invalidate the object cache and etag index for a bucket."""
|
||||
self._object_cache.pop(bucket_id, None)
|
||||
"""Invalidate the object cache and etag index for a bucket.
|
||||
|
||||
Increments version counter to signal stale reads.
|
||||
"""
|
||||
with self._cache_lock:
|
||||
self._object_cache.pop(bucket_id, None)
|
||||
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
|
||||
|
||||
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
|
||||
try:
|
||||
etag_index_path.unlink(missing_ok=True)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Central location for the application version string."""
|
||||
from __future__ import annotations
|
||||
|
||||
APP_VERSION = "0.1.8"
|
||||
APP_VERSION = "0.1.9"
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
|
||||
Reference in New Issue
Block a user