18 Commits

Author SHA1 Message Date
4ab58e59c2 Optimize S3 performance: add caching, per-bucket locks, streaming encryption 2025-12-29 18:12:28 +08:00
32232211a1 Revamp UI/UX: bucket icons, dynamic metrics, mobile docs navigation, rework IAM UI, add JSON auto-indent to policy editors 2025-12-29 17:37:56 +08:00
1cacb80dd6 Fix replication pause, multipart cache, and select all with virtual scroll 2025-12-29 14:46:06 +08:00
e89bbb62dc Fix pausing replication and resuming replication does not continue the replication for the remaining pending objects; Improve Documentation 2025-12-29 14:05:17 +08:00
c8eb3de629 Fix issues -- Bug fixes:
- Fix duplicate _legacy_version_dir check in storage.py
      - Fix max_size_bytes -> max_bytes param in quota handler
      - Move base64 import to module level in s3_api.py
      - Add retry logic and atomic file ops to multipart upload
      - Add shutdown() method to ReplicationManager

      Performance:
      - Add LRU eviction with OrderedDict to object cache
      - Add cache version tracking for stale read detection
      - Add streaming uploads for large files (>10 MiB) in replication
      - Create _find_element() XML parsing helpers

      Security:
      - Gate SigV4 debug logging behind DEBUG_SIGV4 config
2025-12-29 12:46:23 +08:00
9165e365e6 Comment cleanup 2025-12-23 13:57:13 +08:00
01e26754e8 Add option to display custom timezone; Fix timezone inconsistencies 2025-12-23 13:48:02 +08:00
b592fa9fdb Fixed replication issue - clean up debug 2025-12-23 13:37:51 +08:00
cd9734b398 Debug replication corruption issue - Fix attempt 2025-12-23 13:24:05 +08:00
90893cac27 Debug replication corruption issue - check if it's boto3 issue 2025-12-23 12:02:26 +08:00
6e659902bd Addd header debugging for replication issue 2025-12-23 11:55:47 +08:00
39a707ecbc Add additional debugging for replication issue 2025-12-23 11:49:51 +08:00
4199f8e6c7 Add debugging for replication issue 2025-12-23 11:43:29 +08:00
adc6770273 Improve object browser search filter; Test: Fix replication GIF issue 2025-12-23 11:31:32 +08:00
f5451c162b Improve object storage performance via caching 2025-12-22 17:03:33 +08:00
aab9ef696a Fix race condition in replication 2025-12-22 14:14:04 +08:00
be48f59452 Improve UI bucket replication and policy 2025-12-22 13:34:24 +08:00
86c04f85f6 Fix bucket object browser nested object action button; Improve UX 2025-12-22 13:17:27 +08:00
20 changed files with 2344 additions and 795 deletions

View File

@@ -45,7 +45,6 @@ def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
try: try:
shutil.move(str(legacy_path), str(active_path)) shutil.move(str(legacy_path), str(active_path))
except OSError: except OSError:
# Fall back to copy + delete if move fails (e.g., cross-device)
shutil.copy2(legacy_path, active_path) shutil.copy2(legacy_path, active_path)
try: try:
legacy_path.unlink(missing_ok=True) legacy_path.unlink(missing_ok=True)
@@ -101,32 +100,28 @@ def create_app(
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
# Initialize Replication components
# Store config files in the system config directory for consistency
storage_root = Path(app.config["STORAGE_ROOT"]) storage_root = Path(app.config["STORAGE_ROOT"])
config_dir = storage_root / ".myfsio.sys" / "config" config_dir = storage_root / ".myfsio.sys" / "config"
config_dir.mkdir(parents=True, exist_ok=True) config_dir.mkdir(parents=True, exist_ok=True)
# Define paths with migration from legacy locations
connections_path = _migrate_config_file( connections_path = _migrate_config_file(
active_path=config_dir / "connections.json", active_path=config_dir / "connections.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "connections.json", # Previous location storage_root / ".myfsio.sys" / "connections.json",
storage_root / ".connections.json", # Original legacy location storage_root / ".connections.json",
], ],
) )
replication_rules_path = _migrate_config_file( replication_rules_path = _migrate_config_file(
active_path=config_dir / "replication_rules.json", active_path=config_dir / "replication_rules.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "replication_rules.json", # Previous location storage_root / ".myfsio.sys" / "replication_rules.json",
storage_root / ".replication_rules.json", # Original legacy location storage_root / ".replication_rules.json",
], ],
) )
connections = ConnectionStore(connections_path) connections = ConnectionStore(connections_path)
replication = ReplicationManager(storage, connections, replication_rules_path) replication = ReplicationManager(storage, connections, replication_rules_path)
# Initialize encryption and KMS
encryption_config = { encryption_config = {
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False), "encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"), "encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
@@ -141,7 +136,6 @@ def create_app(
kms_manager = KMSManager(kms_keys_path, kms_master_key_path) kms_manager = KMSManager(kms_keys_path, kms_master_key_path)
encryption_manager.set_kms_provider(kms_manager) encryption_manager.set_kms_provider(kms_manager)
# Wrap storage with encryption layer if encryption is enabled
if app.config.get("ENCRYPTION_ENABLED", False): if app.config.get("ENCRYPTION_ENABLED", False):
from .encrypted_storage import EncryptedObjectStorage from .encrypted_storage import EncryptedObjectStorage
storage = EncryptedObjectStorage(storage, encryption_manager) storage = EncryptedObjectStorage(storage, encryption_manager)
@@ -177,13 +171,22 @@ def create_app(
@app.template_filter("timestamp_to_datetime") @app.template_filter("timestamp_to_datetime")
def timestamp_to_datetime(value: float) -> str: def timestamp_to_datetime(value: float) -> str:
"""Format Unix timestamp as human-readable datetime.""" """Format Unix timestamp as human-readable datetime in configured timezone."""
from datetime import datetime from datetime import datetime, timezone as dt_timezone
from zoneinfo import ZoneInfo
if not value: if not value:
return "Never" return "Never"
try: try:
dt = datetime.fromtimestamp(value) dt_utc = datetime.fromtimestamp(value, dt_timezone.utc)
return dt.strftime("%Y-%m-%d %H:%M:%S") display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
if display_tz and display_tz != "UTC":
try:
tz = ZoneInfo(display_tz)
dt_local = dt_utc.astimezone(tz)
return dt_local.strftime("%Y-%m-%d %H:%M:%S")
except (KeyError, ValueError):
pass
return dt_utc.strftime("%Y-%m-%d %H:%M:%S UTC")
except (ValueError, OSError): except (ValueError, OSError):
return "Unknown" return "Unknown"
@@ -244,7 +247,7 @@ def _configure_cors(app: Flask) -> None:
class _RequestContextFilter(logging.Filter): class _RequestContextFilter(logging.Filter):
"""Inject request-specific attributes into log records.""" """Inject request-specific attributes into log records."""
def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - simple boilerplate def filter(self, record: logging.LogRecord) -> bool:
if has_request_context(): if has_request_context():
record.request_id = getattr(g, "request_id", "-") record.request_id = getattr(g, "request_id", "-")
record.path = request.path record.path = request.path

View File

@@ -2,10 +2,12 @@
from __future__ import annotations from __future__ import annotations
import json import json
import re
import time
from dataclasses import dataclass from dataclasses import dataclass
from fnmatch import fnmatch from fnmatch import fnmatch, translate
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
RESOURCE_PREFIX = "arn:aws:s3:::" RESOURCE_PREFIX = "arn:aws:s3:::"
@@ -133,7 +135,22 @@ class BucketPolicyStatement:
effect: str effect: str
principals: List[str] | str principals: List[str] | str
actions: List[str] actions: List[str]
resources: List[tuple[str | None, str | None]] resources: List[Tuple[str | None, str | None]]
# Performance: Pre-compiled regex patterns for resource matching
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
"""Lazily compile fnmatch patterns to regex for faster matching."""
if self._compiled_patterns is None:
self._compiled_patterns = []
for resource_bucket, key_pattern in self.resources:
if key_pattern is None:
self._compiled_patterns.append((resource_bucket, None))
else:
# Convert fnmatch pattern to regex
regex_pattern = translate(key_pattern)
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
return self._compiled_patterns
def matches_principal(self, access_key: Optional[str]) -> bool: def matches_principal(self, access_key: Optional[str]) -> bool:
if self.principals == "*": if self.principals == "*":
@@ -149,15 +166,16 @@ class BucketPolicyStatement:
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool: def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
bucket = (bucket or "*").lower() bucket = (bucket or "*").lower()
key = object_key or "" key = object_key or ""
for resource_bucket, key_pattern in self.resources: for resource_bucket, compiled_pattern in self._get_compiled_patterns():
resource_bucket = (resource_bucket or "*").lower() resource_bucket = (resource_bucket or "*").lower()
if resource_bucket not in {"*", bucket}: if resource_bucket not in {"*", bucket}:
continue continue
if key_pattern is None: if compiled_pattern is None:
if not key: if not key:
return True return True
continue continue
if fnmatch(key, key_pattern): # Performance: Use pre-compiled regex instead of fnmatch
if compiled_pattern.match(key):
return True return True
return False return False
@@ -174,8 +192,16 @@ class BucketPolicyStore:
self._policies: Dict[str, List[BucketPolicyStatement]] = {} self._policies: Dict[str, List[BucketPolicyStatement]] = {}
self._load() self._load()
self._last_mtime = self._current_mtime() self._last_mtime = self._current_mtime()
# Performance: Avoid stat() on every request
self._last_stat_check = 0.0
self._stat_check_interval = 1.0 # Only check mtime every 1 second
def maybe_reload(self) -> None: def maybe_reload(self) -> None:
# Performance: Skip stat check if we checked recently
now = time.time()
if now - self._last_stat_check < self._stat_check_interval:
return
self._last_stat_check = now
current = self._current_mtime() current = self._current_mtime()
if current is None or current == self._last_mtime: if current is None or current == self._last_mtime:
return return
@@ -188,7 +214,6 @@ class BucketPolicyStore:
except FileNotFoundError: except FileNotFoundError:
return None return None
# ------------------------------------------------------------------
def evaluate( def evaluate(
self, self,
access_key: Optional[str], access_key: Optional[str],
@@ -229,7 +254,6 @@ class BucketPolicyStore:
self._policies.pop(bucket, None) self._policies.pop(bucket, None)
self._persist() self._persist()
# ------------------------------------------------------------------
def _load(self) -> None: def _load(self) -> None:
try: try:
content = self.policy_path.read_text(encoding='utf-8') content = self.policy_path.read_text(encoding='utf-8')

View File

@@ -73,6 +73,7 @@ class AppConfig:
kms_enabled: bool kms_enabled: bool
kms_keys_path: Path kms_keys_path: Path
default_encryption_algorithm: str default_encryption_algorithm: str
display_timezone: str
@classmethod @classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -153,15 +154,15 @@ class AppConfig:
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"]) cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"])
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"]) cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30)) session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60)) # Default 60 seconds bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
# Encryption settings
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys" encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve() encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve()
kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve() kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256")) default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
return cls(storage_root=storage_root, return cls(storage_root=storage_root,
max_upload_size=max_upload_size, max_upload_size=max_upload_size,
@@ -196,7 +197,8 @@ class AppConfig:
encryption_master_key_path=encryption_master_key_path, encryption_master_key_path=encryption_master_key_path,
kms_enabled=kms_enabled, kms_enabled=kms_enabled,
kms_keys_path=kms_keys_path, kms_keys_path=kms_keys_path,
default_encryption_algorithm=default_encryption_algorithm) default_encryption_algorithm=default_encryption_algorithm,
display_timezone=display_timezone)
def validate_and_report(self) -> list[str]: def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues. """Validate configuration and return a list of warnings/issues.
@@ -206,7 +208,6 @@ class AppConfig:
""" """
issues = [] issues = []
# Check if storage_root is writable
try: try:
test_file = self.storage_root / ".write_test" test_file = self.storage_root / ".write_test"
test_file.touch() test_file.touch()
@@ -214,24 +215,20 @@ class AppConfig:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}") issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}")
# Check if storage_root looks like a temp directory
storage_str = str(self.storage_root).lower() storage_str = str(self.storage_root).lower()
if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str: if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str:
issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!") issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!")
# Check if IAM config path is under storage_root
try: try:
self.iam_config_path.relative_to(self.storage_root) self.iam_config_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.") issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.")
# Check if bucket policy path is under storage_root
try: try:
self.bucket_policy_path.relative_to(self.storage_root) self.bucket_policy_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.") issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.")
# Check if log path is writable
try: try:
self.log_path.parent.mkdir(parents=True, exist_ok=True) self.log_path.parent.mkdir(parents=True, exist_ok=True)
test_log = self.log_path.parent / ".write_test" test_log = self.log_path.parent / ".write_test"
@@ -240,26 +237,22 @@ class AppConfig:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}") issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}")
# Check log path location
log_str = str(self.log_path).lower() log_str = str(self.log_path).lower()
if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str: if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str:
issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!") issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!")
# Check if encryption keys path is under storage_root (when encryption is enabled)
if self.encryption_enabled: if self.encryption_enabled:
try: try:
self.encryption_master_key_path.relative_to(self.storage_root) self.encryption_master_key_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
# Check if KMS keys path is under storage_root (when KMS is enabled)
if self.kms_enabled: if self.kms_enabled:
try: try:
self.kms_keys_path.relative_to(self.storage_root) self.kms_keys_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
# Warn about production settings
if self.secret_key == "dev-secret-key": if self.secret_key == "dev-secret-key":
issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.") issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.")
@@ -330,4 +323,5 @@ class AppConfig:
"KMS_ENABLED": self.kms_enabled, "KMS_ENABLED": self.kms_enabled,
"KMS_KEYS_PATH": str(self.kms_keys_path), "KMS_KEYS_PATH": str(self.kms_keys_path),
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm, "DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
"DISPLAY_TIMEZONE": self.display_timezone,
} }

View File

@@ -79,7 +79,7 @@ class EncryptedObjectStorage:
kms_key_id: Optional[str] = None, kms_key_id: Optional[str] = None,
) -> ObjectMeta: ) -> ObjectMeta:
"""Store an object, optionally with encryption. """Store an object, optionally with encryption.
Args: Args:
bucket_name: Name of the bucket bucket_name: Name of the bucket
object_key: Key for the object object_key: Key for the object
@@ -87,42 +87,41 @@ class EncryptedObjectStorage:
metadata: Optional user metadata metadata: Optional user metadata
server_side_encryption: Encryption algorithm ("AES256" or "aws:kms") server_side_encryption: Encryption algorithm ("AES256" or "aws:kms")
kms_key_id: KMS key ID (for aws:kms encryption) kms_key_id: KMS key ID (for aws:kms encryption)
Returns: Returns:
ObjectMeta with object information ObjectMeta with object information
Performance: Uses streaming encryption for large files to reduce memory usage.
""" """
should_encrypt, algorithm, detected_kms_key = self._should_encrypt( should_encrypt, algorithm, detected_kms_key = self._should_encrypt(
bucket_name, server_side_encryption bucket_name, server_side_encryption
) )
if kms_key_id is None: if kms_key_id is None:
kms_key_id = detected_kms_key kms_key_id = detected_kms_key
if should_encrypt: if should_encrypt:
data = stream.read()
try: try:
ciphertext, enc_metadata = self.encryption.encrypt_object( # Performance: Use streaming encryption to avoid loading entire file into memory
data, encrypted_stream, enc_metadata = self.encryption.encrypt_stream(
stream,
algorithm=algorithm, algorithm=algorithm,
kms_key_id=kms_key_id,
context={"bucket": bucket_name, "key": object_key}, context={"bucket": bucket_name, "key": object_key},
) )
combined_metadata = metadata.copy() if metadata else {} combined_metadata = metadata.copy() if metadata else {}
combined_metadata.update(enc_metadata.to_dict()) combined_metadata.update(enc_metadata.to_dict())
encrypted_stream = io.BytesIO(ciphertext)
result = self.storage.put_object( result = self.storage.put_object(
bucket_name, bucket_name,
object_key, object_key,
encrypted_stream, encrypted_stream,
metadata=combined_metadata, metadata=combined_metadata,
) )
result.metadata = combined_metadata result.metadata = combined_metadata
return result return result
except EncryptionError as exc: except EncryptionError as exc:
raise StorageError(f"Encryption failed: {exc}") from exc raise StorageError(f"Encryption failed: {exc}") from exc
else: else:
@@ -135,33 +134,34 @@ class EncryptedObjectStorage:
def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]: def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]:
"""Get object data, decrypting if necessary. """Get object data, decrypting if necessary.
Returns: Returns:
Tuple of (data, metadata) Tuple of (data, metadata)
Performance: Uses streaming decryption to reduce memory usage.
""" """
path = self.storage.get_object_path(bucket_name, object_key) path = self.storage.get_object_path(bucket_name, object_key)
metadata = self.storage.get_object_metadata(bucket_name, object_key) metadata = self.storage.get_object_metadata(bucket_name, object_key)
with path.open("rb") as f:
data = f.read()
enc_metadata = EncryptionMetadata.from_dict(metadata) enc_metadata = EncryptionMetadata.from_dict(metadata)
if enc_metadata: if enc_metadata:
try: try:
data = self.encryption.decrypt_object( # Performance: Use streaming decryption to avoid loading entire file into memory
data, with path.open("rb") as f:
enc_metadata, decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata)
context={"bucket": bucket_name, "key": object_key}, data = decrypted_stream.read()
)
except EncryptionError as exc: except EncryptionError as exc:
raise StorageError(f"Decryption failed: {exc}") from exc raise StorageError(f"Decryption failed: {exc}") from exc
else:
with path.open("rb") as f:
data = f.read()
clean_metadata = { clean_metadata = {
k: v for k, v in metadata.items() k: v for k, v in metadata.items()
if not k.startswith("x-amz-encryption") if not k.startswith("x-amz-encryption")
and k != "x-amz-encrypted-data-key" and k != "x-amz-encrypted-data-key"
} }
return data, clean_metadata return data, clean_metadata
def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]: def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]:

View File

@@ -183,81 +183,94 @@ class StreamingEncryptor:
self.chunk_size = chunk_size self.chunk_size = chunk_size
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes: def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
"""Derive a unique nonce for each chunk.""" """Derive a unique nonce for each chunk.
# XOR the base nonce with the chunk index
nonce_int = int.from_bytes(base_nonce, "big")
derived = nonce_int ^ chunk_index
return derived.to_bytes(12, "big")
def encrypt_stream(self, stream: BinaryIO,
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
"""Encrypt a stream and return encrypted stream + metadata."""
Performance: Use direct byte manipulation instead of full int conversion.
"""
# Performance: Only modify last 4 bytes instead of full 12-byte conversion
return base_nonce[:8] + (chunk_index ^ int.from_bytes(base_nonce[8:], "big")).to_bytes(4, "big")
def encrypt_stream(self, stream: BinaryIO,
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
"""Encrypt a stream and return encrypted stream + metadata.
Performance: Writes chunks directly to output buffer instead of accumulating in list.
"""
data_key, encrypted_data_key = self.provider.generate_data_key() data_key, encrypted_data_key = self.provider.generate_data_key()
base_nonce = secrets.token_bytes(12) base_nonce = secrets.token_bytes(12)
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
encrypted_chunks = [] # Performance: Write directly to BytesIO instead of accumulating chunks
output = io.BytesIO()
output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count
chunk_index = 0 chunk_index = 0
while True: while True:
chunk = stream.read(self.chunk_size) chunk = stream.read(self.chunk_size)
if not chunk: if not chunk:
break break
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None) encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
size_prefix = len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big") # Write size prefix + encrypted chunk directly
encrypted_chunks.append(size_prefix + encrypted_chunk) output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
output.write(encrypted_chunk)
chunk_index += 1 chunk_index += 1
header = chunk_index.to_bytes(4, "big") # Write actual chunk count to header
encrypted_data = header + b"".join(encrypted_chunks) output.seek(0)
output.write(chunk_index.to_bytes(4, "big"))
output.seek(0)
metadata = EncryptionMetadata( metadata = EncryptionMetadata(
algorithm="AES256", algorithm="AES256",
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local", key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
nonce=base_nonce, nonce=base_nonce,
encrypted_data_key=encrypted_data_key, encrypted_data_key=encrypted_data_key,
) )
return io.BytesIO(encrypted_data), metadata return output, metadata
def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO: def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
"""Decrypt a stream using the provided metadata.""" """Decrypt a stream using the provided metadata.
Performance: Writes chunks directly to output buffer instead of accumulating in list.
"""
if isinstance(self.provider, LocalKeyEncryption): if isinstance(self.provider, LocalKeyEncryption):
data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key) data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
else: else:
raise EncryptionError("Unsupported provider for streaming decryption") raise EncryptionError("Unsupported provider for streaming decryption")
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
base_nonce = metadata.nonce base_nonce = metadata.nonce
chunk_count_bytes = stream.read(4) chunk_count_bytes = stream.read(4)
if len(chunk_count_bytes) < 4: if len(chunk_count_bytes) < 4:
raise EncryptionError("Invalid encrypted stream: missing header") raise EncryptionError("Invalid encrypted stream: missing header")
chunk_count = int.from_bytes(chunk_count_bytes, "big") chunk_count = int.from_bytes(chunk_count_bytes, "big")
decrypted_chunks = [] # Performance: Write directly to BytesIO instead of accumulating chunks
output = io.BytesIO()
for chunk_index in range(chunk_count): for chunk_index in range(chunk_count):
size_bytes = stream.read(self.HEADER_SIZE) size_bytes = stream.read(self.HEADER_SIZE)
if len(size_bytes) < self.HEADER_SIZE: if len(size_bytes) < self.HEADER_SIZE:
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}") raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
chunk_size = int.from_bytes(size_bytes, "big") chunk_size = int.from_bytes(size_bytes, "big")
encrypted_chunk = stream.read(chunk_size) encrypted_chunk = stream.read(chunk_size)
if len(encrypted_chunk) < chunk_size: if len(encrypted_chunk) < chunk_size:
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}") raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
try: try:
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None) decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
decrypted_chunks.append(decrypted_chunk) output.write(decrypted_chunk) # Write directly instead of appending to list
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
return io.BytesIO(b"".join(decrypted_chunks)) output.seek(0)
return output
class EncryptionManager: class EncryptionManager:

View File

@@ -4,11 +4,12 @@ from __future__ import annotations
import json import json
import math import math
import secrets import secrets
import time
from collections import deque from collections import deque
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
class IamError(RuntimeError): class IamError(RuntimeError):
@@ -115,17 +116,27 @@ class IamService:
self._raw_config: Dict[str, Any] = {} self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {} self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._last_load_time = 0.0 self._last_load_time = 0.0
# Performance: credential cache with TTL
self._credential_cache: Dict[str, Tuple[str, Principal, float]] = {}
self._cache_ttl = 60.0 # Cache credentials for 60 seconds
self._last_stat_check = 0.0
self._stat_check_interval = 1.0 # Only stat() file every 1 second
self._load() self._load()
def _maybe_reload(self) -> None: def _maybe_reload(self) -> None:
"""Reload configuration if the file has changed on disk.""" """Reload configuration if the file has changed on disk."""
# Performance: Skip stat check if we checked recently
now = time.time()
if now - self._last_stat_check < self._stat_check_interval:
return
self._last_stat_check = now
try: try:
if self.config_path.stat().st_mtime > self._last_load_time: if self.config_path.stat().st_mtime > self._last_load_time:
self._load() self._load()
self._credential_cache.clear() # Invalidate cache on reload
except OSError: except OSError:
pass pass
# ---------------------- authz helpers ----------------------
def authenticate(self, access_key: str, secret_key: str) -> Principal: def authenticate(self, access_key: str, secret_key: str) -> Principal:
self._maybe_reload() self._maybe_reload()
access_key = (access_key or "").strip() access_key = (access_key or "").strip()
@@ -149,7 +160,7 @@ class IamService:
return return
attempts = self._failed_attempts.setdefault(access_key, deque()) attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts) self._prune_attempts(attempts)
attempts.append(datetime.now()) attempts.append(datetime.now(timezone.utc))
def _clear_failed_attempts(self, access_key: str) -> None: def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key: if not access_key:
@@ -157,7 +168,7 @@ class IamService:
self._failed_attempts.pop(access_key, None) self._failed_attempts.pop(access_key, None)
def _prune_attempts(self, attempts: Deque[datetime]) -> None: def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now() - self.auth_lockout_window cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
while attempts and attempts[0] < cutoff: while attempts and attempts[0] < cutoff:
attempts.popleft() attempts.popleft()
@@ -178,21 +189,41 @@ class IamService:
if len(attempts) < self.auth_max_attempts: if len(attempts) < self.auth_max_attempts:
return 0 return 0
oldest = attempts[0] oldest = attempts[0]
elapsed = (datetime.now() - oldest).total_seconds() elapsed = (datetime.now(timezone.utc) - oldest).total_seconds()
return int(max(0, self.auth_lockout_window.total_seconds() - elapsed)) return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
def principal_for_key(self, access_key: str) -> Principal: def principal_for_key(self, access_key: str) -> Principal:
# Performance: Check cache first
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
return self._build_principal(access_key, record) principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return principal
def secret_for_key(self, access_key: str) -> str: def secret_for_key(self, access_key: str) -> str:
# Performance: Check cache first
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return record["secret_key"] return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None: def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
@@ -218,7 +249,6 @@ class IamService:
return True return True
return False return False
# ---------------------- management helpers ----------------------
def list_users(self) -> List[Dict[str, Any]]: def list_users(self) -> List[Dict[str, Any]]:
listing: List[Dict[str, Any]] = [] listing: List[Dict[str, Any]] = []
for access_key, record in self._users.items(): for access_key, record in self._users.items():
@@ -291,7 +321,6 @@ class IamService:
self._save() self._save()
self._load() self._load()
# ---------------------- config helpers ----------------------
def _load(self) -> None: def _load(self) -> None:
try: try:
self._last_load_time = self.config_path.stat().st_mtime self._last_load_time = self.config_path.stat().st_mtime
@@ -337,7 +366,6 @@ class IamService:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}") raise IamError(f"Cannot save IAM config: {e}")
# ---------------------- insight helpers ----------------------
def config_summary(self) -> Dict[str, Any]: def config_summary(self) -> Dict[str, Any]:
return { return {
"path": str(self.config_path), "path": str(self.config_path),
@@ -446,11 +474,36 @@ class IamService:
raise IamError("User not found") raise IamError("User not found")
def get_secret_key(self, access_key: str) -> str | None: def get_secret_key(self, access_key: str) -> str | None:
# Performance: Check cache first
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
return record["secret_key"] if record else None if record:
# Cache the result
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return record["secret_key"]
return None
def get_principal(self, access_key: str) -> Principal | None: def get_principal(self, access_key: str) -> Principal | None:
# Performance: Check cache first
now = time.time()
cached = self._credential_cache.get(access_key)
if cached:
secret, principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
return self._build_principal(access_key, record) if record else None if record:
principal = self._build_principal(access_key, record)
self._credential_cache[access_key] = (record["secret_key"], principal, now)
return principal
return None

View File

@@ -33,9 +33,6 @@ def _encryption():
def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]: def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]:
return {"__type": code, "message": message}, status return {"__type": code, "message": message}, status
# ---------------------- Key Management ----------------------
@kms_api_bp.route("/keys", methods=["GET", "POST"]) @kms_api_bp.route("/keys", methods=["GET", "POST"])
@limiter.limit("30 per minute") @limiter.limit("30 per minute")
def list_or_create_keys(): def list_or_create_keys():
@@ -65,7 +62,6 @@ def list_or_create_keys():
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("KMSInternalException", str(exc), 400) return _error_response("KMSInternalException", str(exc), 400)
# GET - List keys
keys = kms.list_keys() keys = kms.list_keys()
return jsonify({ return jsonify({
"Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys], "Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys],
@@ -96,7 +92,6 @@ def get_or_delete_key(key_id: str):
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("NotFoundException", str(exc), 404) return _error_response("NotFoundException", str(exc), 404)
# GET
key = kms.get_key(key_id) key = kms.get_key(key_id)
if not key: if not key:
return _error_response("NotFoundException", f"Key not found: {key_id}", 404) return _error_response("NotFoundException", f"Key not found: {key_id}", 404)
@@ -149,9 +144,6 @@ def disable_key(key_id: str):
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("NotFoundException", str(exc), 404) return _error_response("NotFoundException", str(exc), 404)
# ---------------------- Encryption Operations ----------------------
@kms_api_bp.route("/encrypt", methods=["POST"]) @kms_api_bp.route("/encrypt", methods=["POST"])
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def encrypt_data(): def encrypt_data():
@@ -251,7 +243,6 @@ def generate_data_key():
try: try:
plaintext_key, encrypted_key = kms.generate_data_key(key_id, context) plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
# Trim key if AES_128 requested
if key_spec == "AES_128": if key_spec == "AES_128":
plaintext_key = plaintext_key[:16] plaintext_key = plaintext_key[:16]
@@ -322,10 +313,7 @@ def re_encrypt():
return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400) return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
try: try:
# First decrypt, get source key id
plaintext, source_key_id = kms.decrypt(ciphertext, source_context) plaintext, source_key_id = kms.decrypt(ciphertext, source_context)
# Re-encrypt with destination key
new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context) new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context)
return jsonify({ return jsonify({
@@ -365,9 +353,6 @@ def generate_random():
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("ValidationException", str(exc), 400) return _error_response("ValidationException", str(exc), 400)
# ---------------------- Client-Side Encryption Helpers ----------------------
@kms_api_bp.route("/client/generate-key", methods=["POST"]) @kms_api_bp.route("/client/generate-key", methods=["POST"])
@limiter.limit("30 per minute") @limiter.limit("30 per minute")
def generate_client_key(): def generate_client_key():
@@ -427,9 +412,6 @@ def client_decrypt():
except Exception as exc: except Exception as exc:
return _error_response("DecryptionError", str(exc), 400) return _error_response("DecryptionError", str(exc), 400)
# ---------------------- Encryption Materials for S3 Client-Side Encryption ----------------------
@kms_api_bp.route("/materials/<key_id>", methods=["POST"]) @kms_api_bp.route("/materials/<key_id>", methods=["POST"])
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def get_encryption_materials(key_id: str): def get_encryption_materials(key_id: str):

View File

@@ -9,7 +9,7 @@ import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Dict, Optional from typing import Any, Dict, Optional
import boto3 import boto3
from botocore.config import Config from botocore.config import Config
@@ -22,18 +22,51 @@ from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0" REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_CONNECT_TIMEOUT = 5
REPLICATION_READ_TIMEOUT = 30
STREAMING_THRESHOLD_BYTES = 10 * 1024 * 1024 # 10 MiB - use streaming for larger files
REPLICATION_MODE_NEW_ONLY = "new_only" REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all" REPLICATION_MODE_ALL = "all"
def _create_s3_client(connection: RemoteConnection, *, health_check: bool = False) -> Any:
"""Create a boto3 S3 client for the given connection.
Args:
connection: Remote S3 connection configuration
health_check: If True, use minimal retries for quick health checks
Returns:
Configured boto3 S3 client
"""
config = Config(
user_agent_extra=REPLICATION_USER_AGENT,
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
read_timeout=REPLICATION_READ_TIMEOUT,
retries={'max_attempts': 1 if health_check else 2},
signature_version='s3v4',
s3={'addressing_style': 'path'},
request_checksum_calculation='when_required',
response_checksum_validation='when_required',
)
return boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region or 'us-east-1',
config=config,
)
@dataclass @dataclass
class ReplicationStats: class ReplicationStats:
"""Statistics for replication operations - computed dynamically.""" """Statistics for replication operations - computed dynamically."""
objects_synced: int = 0 # Objects that exist in both source and destination objects_synced: int = 0
objects_pending: int = 0 # Objects in source but not in destination objects_pending: int = 0
objects_orphaned: int = 0 # Objects in destination but not in source (will be deleted) objects_orphaned: int = 0
bytes_synced: int = 0 # Total bytes synced to destination bytes_synced: int = 0
last_sync_at: Optional[float] = None last_sync_at: Optional[float] = None
last_sync_key: Optional[str] = None last_sync_key: Optional[str] = None
@@ -83,7 +116,6 @@ class ReplicationRule:
@classmethod @classmethod
def from_dict(cls, data: dict) -> "ReplicationRule": def from_dict(cls, data: dict) -> "ReplicationRule":
stats_data = data.pop("stats", {}) stats_data = data.pop("stats", {})
# Handle old rules without mode/created_at
if "mode" not in data: if "mode" not in data:
data["mode"] = REPLICATION_MODE_NEW_ONLY data["mode"] = REPLICATION_MODE_NEW_ONLY
if "created_at" not in data: if "created_at" not in data:
@@ -101,8 +133,19 @@ class ReplicationManager:
self._rules: Dict[str, ReplicationRule] = {} self._rules: Dict[str, ReplicationRule] = {}
self._stats_lock = threading.Lock() self._stats_lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker") self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self._shutdown = False
self.reload_rules() self.reload_rules()
def shutdown(self, wait: bool = True) -> None:
"""Shutdown the replication executor gracefully.
Args:
wait: If True, wait for pending tasks to complete
"""
self._shutdown = True
self._executor.shutdown(wait=wait)
logger.info("Replication manager shut down")
def reload_rules(self) -> None: def reload_rules(self) -> None:
if not self.rules_path.exists(): if not self.rules_path.exists():
self._rules = {} self._rules = {}
@@ -121,6 +164,20 @@ class ReplicationManager:
with open(self.rules_path, "w") as f: with open(self.rules_path, "w") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
def check_endpoint_health(self, connection: RemoteConnection) -> bool:
"""Check if a remote endpoint is reachable and responsive.
Returns True if endpoint is healthy, False otherwise.
Uses short timeouts to prevent blocking.
"""
try:
s3 = _create_s3_client(connection, health_check=True)
s3.list_buckets()
return True
except Exception as e:
logger.warning(f"Endpoint health check failed for {connection.name} ({connection.endpoint_url}): {e}")
return False
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]: def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
return self._rules.get(bucket_name) return self._rules.get(bucket_name)
@@ -151,22 +208,14 @@ class ReplicationManager:
connection = self.connections.get(rule.target_connection_id) connection = self.connections.get(rule.target_connection_id)
if not connection: if not connection:
return rule.stats # Return cached stats if connection unavailable return rule.stats
try: try:
# Get source objects
source_objects = self.storage.list_objects_all(bucket_name) source_objects = self.storage.list_objects_all(bucket_name)
source_keys = {obj.key: obj.size for obj in source_objects} source_keys = {obj.key: obj.size for obj in source_objects}
# Get destination objects s3 = _create_s3_client(connection)
s3 = boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region,
)
dest_keys = set() dest_keys = set()
bytes_synced = 0 bytes_synced = 0
paginator = s3.get_paginator('list_objects_v2') paginator = s3.get_paginator('list_objects_v2')
@@ -178,24 +227,18 @@ class ReplicationManager:
bytes_synced += obj.get('Size', 0) bytes_synced += obj.get('Size', 0)
except ClientError as e: except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket': if e.response['Error']['Code'] == 'NoSuchBucket':
# Destination bucket doesn't exist yet
dest_keys = set() dest_keys = set()
else: else:
raise raise
# Compute stats synced = source_keys.keys() & dest_keys
synced = source_keys.keys() & dest_keys # Objects in both orphaned = dest_keys - source_keys.keys()
orphaned = dest_keys - source_keys.keys() # In dest but not source
# For "new_only" mode, we can't determine pending since we don't know
# which objects existed before replication was enabled. Only "all" mode
# should show pending (objects that should be replicated but aren't yet).
if rule.mode == REPLICATION_MODE_ALL: if rule.mode == REPLICATION_MODE_ALL:
pending = source_keys.keys() - dest_keys # In source but not dest pending = source_keys.keys() - dest_keys
else: else:
pending = set() # New-only mode: don't show pre-existing as pending pending = set()
# Update cached stats with computed values
rule.stats.objects_synced = len(synced) rule.stats.objects_synced = len(synced)
rule.stats.objects_pending = len(pending) rule.stats.objects_pending = len(pending)
rule.stats.objects_orphaned = len(orphaned) rule.stats.objects_orphaned = len(orphaned)
@@ -205,7 +248,7 @@ class ReplicationManager:
except (ClientError, StorageError) as e: except (ClientError, StorageError) as e:
logger.error(f"Failed to compute sync status for {bucket_name}: {e}") logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
return rule.stats # Return cached stats on error return rule.stats
def replicate_existing_objects(self, bucket_name: str) -> None: def replicate_existing_objects(self, bucket_name: str) -> None:
"""Trigger replication for all existing objects in a bucket.""" """Trigger replication for all existing objects in a bucket."""
@@ -218,6 +261,10 @@ class ReplicationManager:
logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found") logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
return return
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return
try: try:
objects = self.storage.list_objects_all(bucket_name) objects = self.storage.list_objects_all(bucket_name)
logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}") logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
@@ -233,13 +280,7 @@ class ReplicationManager:
raise ValueError(f"Connection {connection_id} not found") raise ValueError(f"Connection {connection_id} not found")
try: try:
s3 = boto3.client( s3 = _create_s3_client(connection)
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region,
)
s3.create_bucket(Bucket=bucket_name) s3.create_bucket(Bucket=bucket_name)
except ClientError as e: except ClientError as e:
logger.error(f"Failed to create remote bucket {bucket_name}: {e}") logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
@@ -255,31 +296,35 @@ class ReplicationManager:
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
return return
if not self.check_endpoint_health(connection):
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action) self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None: def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
if self._shutdown:
return
# Re-check if rule is still enabled (may have been paused after task was submitted)
current_rule = self.get_rule(bucket_name)
if not current_rule or not current_rule.enabled:
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
return
if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"): if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}") logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
return return
try: try:
from .storage import ObjectStorage from .storage import ObjectStorage
ObjectStorage._sanitize_object_key(object_key) ObjectStorage._sanitize_object_key(object_key)
except StorageError as e: except StorageError as e:
logger.error(f"Object key validation failed in replication: {e}") logger.error(f"Object key validation failed in replication: {e}")
return return
file_size = 0
try: try:
config = Config(user_agent_extra=REPLICATION_USER_AGENT) s3 = _create_s3_client(conn)
s3 = boto3.client(
"s3",
endpoint_url=conn.endpoint_url,
aws_access_key_id=conn.access_key,
aws_secret_access_key=conn.secret_key,
region_name=conn.region,
config=config,
)
if action == "delete": if action == "delete":
try: try:
@@ -296,57 +341,70 @@ class ReplicationManager:
logger.error(f"Source object not found: {bucket_name}/{object_key}") logger.error(f"Source object not found: {bucket_name}/{object_key}")
return return
metadata = self.storage.get_object_metadata(bucket_name, object_key)
extra_args = {}
if metadata:
extra_args["Metadata"] = metadata
# Guess content type to prevent corruption/wrong handling
content_type, _ = mimetypes.guess_type(path) content_type, _ = mimetypes.guess_type(path)
file_size = path.stat().st_size file_size = path.stat().st_size
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}") logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
try: def do_upload() -> None:
with path.open("rb") as f: """Upload object using appropriate method based on file size.
s3.put_object(
Bucket=rule.target_bucket, For small files (< 10 MiB): Read into memory for simpler handling
Key=object_key, For large files: Use streaming upload to avoid memory issues
Body=f, """
ContentLength=file_size, extra_args = {}
ContentType=content_type or "application/octet-stream", if content_type:
Metadata=metadata or {} extra_args["ContentType"] = content_type
if file_size >= STREAMING_THRESHOLD_BYTES:
# Use multipart upload for large files
s3.upload_file(
str(path),
rule.target_bucket,
object_key,
ExtraArgs=extra_args if extra_args else None,
) )
else:
# Read small files into memory
file_content = path.read_bytes()
put_kwargs = {
"Bucket": rule.target_bucket,
"Key": object_key,
"Body": file_content,
**extra_args,
}
s3.put_object(**put_kwargs)
try:
do_upload()
except (ClientError, S3UploadFailedError) as e: except (ClientError, S3UploadFailedError) as e:
is_no_bucket = False error_code = None
if isinstance(e, ClientError): if isinstance(e, ClientError):
if e.response['Error']['Code'] == 'NoSuchBucket': error_code = e.response['Error']['Code']
is_no_bucket = True
elif isinstance(e, S3UploadFailedError): elif isinstance(e, S3UploadFailedError):
if "NoSuchBucket" in str(e): if "NoSuchBucket" in str(e):
is_no_bucket = True error_code = 'NoSuchBucket'
if is_no_bucket: if error_code == 'NoSuchBucket':
logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.") logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
bucket_ready = False
try: try:
s3.create_bucket(Bucket=rule.target_bucket) s3.create_bucket(Bucket=rule.target_bucket)
# Retry upload bucket_ready = True
with path.open("rb") as f: logger.info(f"Created target bucket {rule.target_bucket}")
s3.put_object( except ClientError as bucket_err:
Bucket=rule.target_bucket, if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'):
Key=object_key, logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)")
Body=f, bucket_ready = True
ContentLength=file_size, else:
ContentType=content_type or "application/octet-stream", logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}")
Metadata=metadata or {} raise e
)
except Exception as create_err: if bucket_ready:
logger.error(f"Failed to create target bucket {rule.target_bucket}: {create_err}") do_upload()
raise e # Raise original error
else: else:
raise e raise e
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
self._update_last_sync(bucket_name, object_key) self._update_last_sync(bucket_name, object_key)
@@ -354,3 +412,4 @@ class ReplicationManager:
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}") logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
except Exception: except Exception:
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}") logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")

View File

@@ -1,14 +1,16 @@
"""Flask blueprint exposing a subset of the S3 REST API.""" """Flask blueprint exposing a subset of the S3 REST API."""
from __future__ import annotations from __future__ import annotations
import base64
import hashlib import hashlib
import hmac import hmac
import logging
import mimetypes import mimetypes
import re import re
import uuid import uuid
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Any, Dict from typing import Any, Dict, Optional
from urllib.parse import quote, urlencode, urlparse from urllib.parse import quote, urlencode, urlparse, unquote
from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError
from flask import Blueprint, Response, current_app, jsonify, request, g from flask import Blueprint, Response, current_app, jsonify, request, g
@@ -20,10 +22,10 @@ from .iam import IamError, Principal
from .replication import ReplicationManager from .replication import ReplicationManager
from .storage import ObjectStorage, StorageError, QuotaExceededError from .storage import ObjectStorage, StorageError, QuotaExceededError
logger = logging.getLogger(__name__)
s3_api_bp = Blueprint("s3_api", __name__) s3_api_bp = Blueprint("s3_api", __name__)
# ---------------------- helpers ----------------------
def _storage() -> ObjectStorage: def _storage() -> ObjectStorage:
return current_app.extensions["object_storage"] return current_app.extensions["object_storage"]
@@ -68,9 +70,26 @@ def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name
return k_signing return k_signing
def _get_canonical_uri(req: Any) -> str:
"""Get the canonical URI for SigV4 signature verification.
AWS SigV4 requires the canonical URI to be URL-encoded exactly as the client
sent it. Flask/Werkzeug automatically URL-decodes request.path, so we need
to get the raw path from the environ.
The canonical URI should have each path segment URL-encoded (with '/' preserved),
and the encoding should match what the client used when signing.
"""
raw_uri = req.environ.get('RAW_URI') or req.environ.get('REQUEST_URI')
if raw_uri:
path = raw_uri.split('?')[0]
return path
return quote(req.path, safe="/-_.~")
def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None: def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
# Parse Authorization header
# AWS4-HMAC-SHA256 Credential=AKIA.../20230101/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-date, Signature=...
match = re.match( match = re.match(
r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)", r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)",
auth_header, auth_header,
@@ -79,17 +98,13 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
return None return None
access_key, date_stamp, region, service, signed_headers_str, signature = match.groups() access_key, date_stamp, region, service, signed_headers_str, signature = match.groups()
# Get secret key
secret_key = _iam().get_secret_key(access_key) secret_key = _iam().get_secret_key(access_key)
if not secret_key: if not secret_key:
raise IamError("Invalid access key") raise IamError("Invalid access key")
# Canonical Request
method = req.method method = req.method
canonical_uri = quote(req.path, safe="/-_.~") canonical_uri = _get_canonical_uri(req)
# Canonical Query String
query_args = [] query_args = []
for key, value in req.args.items(multi=True): for key, value in req.args.items(multi=True):
query_args.append((key, value)) query_args.append((key, value))
@@ -100,7 +115,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
canonical_query_string = "&".join(canonical_query_parts) canonical_query_string = "&".join(canonical_query_parts)
# Canonical Headers
signed_headers_list = signed_headers_str.split(";") signed_headers_list = signed_headers_str.split(";")
canonical_headers_parts = [] canonical_headers_parts = []
for header in signed_headers_list: for header in signed_headers_list:
@@ -108,22 +122,20 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
if header_val is None: if header_val is None:
header_val = "" header_val = ""
if header.lower() == 'expect' and header_val == "":
header_val = "100-continue"
header_val = " ".join(header_val.split()) header_val = " ".join(header_val.split())
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n") canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
canonical_headers = "".join(canonical_headers_parts) canonical_headers = "".join(canonical_headers_parts)
# Payload Hash
payload_hash = req.headers.get("X-Amz-Content-Sha256") payload_hash = req.headers.get("X-Amz-Content-Sha256")
if not payload_hash: if not payload_hash:
payload_hash = hashlib.sha256(req.get_data()).hexdigest() payload_hash = hashlib.sha256(req.get_data()).hexdigest()
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}" canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
# String to Sign amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
amz_date = req.headers.get("X-Amz-Date")
if not amz_date:
amz_date = req.headers.get("Date")
if not amz_date: if not amz_date:
raise IamError("Missing Date header") raise IamError("Missing Date header")
@@ -134,13 +146,12 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
time_diff = abs((now - request_time).total_seconds()) time_diff = abs((now - request_time).total_seconds())
if time_diff > 900: # 15 minutes if time_diff > 900:
raise IamError("Request timestamp too old or too far in the future") raise IamError("Request timestamp too old or too far in the future")
required_headers = {'host', 'x-amz-date'} required_headers = {'host', 'x-amz-date'}
signed_headers_set = set(signed_headers_str.split(';')) signed_headers_set = set(signed_headers_str.split(';'))
if not required_headers.issubset(signed_headers_set): if not required_headers.issubset(signed_headers_set):
# Some clients might sign 'date' instead of 'x-amz-date'
if 'date' in signed_headers_set: if 'date' in signed_headers_set:
required_headers.remove('x-amz-date') required_headers.remove('x-amz-date')
required_headers.add('date') required_headers.add('date')
@@ -154,6 +165,18 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
if not hmac.compare_digest(calculated_signature, signature): if not hmac.compare_digest(calculated_signature, signature):
# Only log detailed signature debug info if DEBUG_SIGV4 is enabled
if current_app.config.get("DEBUG_SIGV4"):
logger.warning(
"SigV4 signature mismatch",
extra={
"path": req.path,
"method": method,
"signed_headers": signed_headers_str,
"content_type": req.headers.get("Content-Type"),
"content_length": req.headers.get("Content-Length"),
}
)
raise IamError("SignatureDoesNotMatch") raise IamError("SignatureDoesNotMatch")
return _iam().get_principal(access_key) return _iam().get_principal(access_key)
@@ -187,11 +210,9 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
if not secret_key: if not secret_key:
raise IamError("Invalid access key") raise IamError("Invalid access key")
# Canonical Request
method = req.method method = req.method
canonical_uri = quote(req.path, safe="/-_.~") canonical_uri = _get_canonical_uri(req)
# Canonical Query String
query_args = [] query_args = []
for key, value in req.args.items(multi=True): for key, value in req.args.items(multi=True):
if key != "X-Amz-Signature": if key != "X-Amz-Signature":
@@ -203,16 +224,16 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
canonical_query_string = "&".join(canonical_query_parts) canonical_query_string = "&".join(canonical_query_parts)
# Canonical Headers
signed_headers_list = signed_headers_str.split(";") signed_headers_list = signed_headers_str.split(";")
canonical_headers_parts = [] canonical_headers_parts = []
for header in signed_headers_list: for header in signed_headers_list:
val = req.headers.get(header, "").strip() val = req.headers.get(header, "").strip()
if header.lower() == 'expect' and val == "":
val = "100-continue"
val = " ".join(val.split()) val = " ".join(val.split())
canonical_headers_parts.append(f"{header}:{val}\n") canonical_headers_parts.append(f"{header}:{val}\n")
canonical_headers = "".join(canonical_headers_parts) canonical_headers = "".join(canonical_headers_parts)
# Payload Hash
payload_hash = "UNSIGNED-PAYLOAD" payload_hash = "UNSIGNED-PAYLOAD"
canonical_request = "\n".join([ canonical_request = "\n".join([
@@ -224,7 +245,6 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
payload_hash payload_hash
]) ])
# String to Sign
algorithm = "AWS4-HMAC-SHA256" algorithm = "AWS4-HMAC-SHA256"
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest() hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
@@ -235,7 +255,6 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
hashed_request hashed_request
]) ])
# Signature
signing_key = _get_signature_key(secret_key, date_stamp, region, service) signing_key = _get_signature_key(secret_key, date_stamp, region, service)
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
@@ -493,7 +512,6 @@ def _generate_presigned_url(
} }
canonical_query = _encode_query_params(query_params) canonical_query = _encode_query_params(query_params)
# Determine host and scheme from config or request
api_base = current_app.config.get("API_BASE_URL") api_base = current_app.config.get("API_BASE_URL")
if api_base: if api_base:
parsed = urlparse(api_base) parsed = urlparse(api_base)
@@ -545,6 +563,28 @@ def _strip_ns(tag: str | None) -> str:
return tag.split("}")[-1] return tag.split("}")[-1]
def _find_element(parent: Element, name: str) -> Optional[Element]:
"""Find a child element by name, trying both namespaced and non-namespaced variants.
This handles XML documents that may or may not include namespace prefixes.
"""
el = parent.find(f"{{*}}{name}")
if el is None:
el = parent.find(name)
return el
def _find_element_text(parent: Element, name: str, default: str = "") -> str:
"""Find a child element and return its text content.
Returns the default value if element not found or has no text.
"""
el = _find_element(parent, name)
if el is None or el.text is None:
return default
return el.text.strip()
def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]: def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
try: try:
root = fromstring(payload) root = fromstring(payload)
@@ -561,17 +601,11 @@ def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
for tag_el in list(tagset): for tag_el in list(tagset):
if _strip_ns(tag_el.tag) != "Tag": if _strip_ns(tag_el.tag) != "Tag":
continue continue
key_el = tag_el.find("{*}Key") key = _find_element_text(tag_el, "Key")
if key_el is None:
key_el = tag_el.find("Key")
value_el = tag_el.find("{*}Value")
if value_el is None:
value_el = tag_el.find("Value")
key = (key_el.text or "").strip() if key_el is not None else ""
if not key: if not key:
continue continue
value = value_el.text if value_el is not None else "" value = _find_element_text(tag_el, "Value")
tags.append({"Key": key, "Value": value or ""}) tags.append({"Key": key, "Value": value})
return tags return tags
@@ -853,7 +887,6 @@ def _bucket_versioning_handler(bucket_name: str) -> Response:
current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status}) current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status})
return Response(status=200) return Response(status=200)
# GET
try: try:
enabled = storage.is_versioning_enabled(bucket_name) enabled = storage.is_versioning_enabled(bucket_name)
except StorageError as exc: except StorageError as exc:
@@ -889,7 +922,7 @@ def _bucket_tagging_handler(bucket_name: str) -> Response:
return _error_response("NoSuchBucket", str(exc), 404) return _error_response("NoSuchBucket", str(exc), 404)
current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name}) current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name})
return Response(status=204) return Response(status=204)
# PUT
payload = request.get_data(cache=False) or b"" payload = request.get_data(cache=False) or b""
try: try:
tags = _parse_tagging_document(payload) tags = _parse_tagging_document(payload)
@@ -914,7 +947,6 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
if error: if error:
return error return error
# For tagging, we use read permission for GET, write for PUT/DELETE
action = "read" if request.method == "GET" else "write" action = "read" if request.method == "GET" else "write"
try: try:
_authorize_action(principal, bucket_name, action, object_key=object_key) _authorize_action(principal, bucket_name, action, object_key=object_key)
@@ -1093,10 +1125,8 @@ def _bucket_location_handler(bucket_name: str) -> Response:
if not storage.bucket_exists(bucket_name): if not storage.bucket_exists(bucket_name):
return _error_response("NoSuchBucket", "Bucket does not exist", 404) return _error_response("NoSuchBucket", "Bucket does not exist", 404)
# Return the configured AWS_REGION
region = current_app.config.get("AWS_REGION", "us-east-1") region = current_app.config.get("AWS_REGION", "us-east-1")
root = Element("LocationConstraint") root = Element("LocationConstraint")
# AWS returns empty for us-east-1, but we'll be explicit
root.text = region if region != "us-east-1" else None root.text = region if region != "us-east-1" else None
return _xml_response(root) return _xml_response(root)
@@ -1116,13 +1146,11 @@ def _bucket_acl_handler(bucket_name: str) -> Response:
return _error_response("NoSuchBucket", "Bucket does not exist", 404) return _error_response("NoSuchBucket", "Bucket does not exist", 404)
if request.method == "PUT": if request.method == "PUT":
# We don't fully implement ACLs, but we accept the request for compatibility # Accept canned ACL headers for S3 compatibility (not fully implemented)
# Check for canned ACL header
canned_acl = request.headers.get("x-amz-acl", "private") canned_acl = request.headers.get("x-amz-acl", "private")
current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl}) current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl})
return Response(status=200) return Response(status=200)
# GET - Return a basic ACL document showing full control for owner
root = Element("AccessControlPolicy") root = Element("AccessControlPolicy")
owner = SubElement(root, "Owner") owner = SubElement(root, "Owner")
SubElement(owner, "ID").text = principal.access_key if principal else "anonymous" SubElement(owner, "ID").text = principal.access_key if principal else "anonymous"
@@ -1170,7 +1198,6 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
if key_marker: if key_marker:
objects = [obj for obj in objects if obj.key > key_marker] objects = [obj for obj in objects if obj.key > key_marker]
# Build XML response
root = Element("ListVersionsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/") root = Element("ListVersionsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
SubElement(root, "Name").text = bucket_name SubElement(root, "Name").text = bucket_name
SubElement(root, "Prefix").text = prefix SubElement(root, "Prefix").text = prefix
@@ -1188,10 +1215,9 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
is_truncated = True is_truncated = True
break break
# Current version
version = SubElement(root, "Version") version = SubElement(root, "Version")
SubElement(version, "Key").text = obj.key SubElement(version, "Key").text = obj.key
SubElement(version, "VersionId").text = "null" # Current version ID SubElement(version, "VersionId").text = "null"
SubElement(version, "IsLatest").text = "true" SubElement(version, "IsLatest").text = "true"
SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z") SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z")
SubElement(version, "ETag").text = f'"{obj.etag}"' SubElement(version, "ETag").text = f'"{obj.etag}"'
@@ -1205,7 +1231,6 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
version_count += 1 version_count += 1
next_key_marker = obj.key next_key_marker = obj.key
# Get historical versions
try: try:
versions = storage.list_object_versions(bucket_name, obj.key) versions = storage.list_object_versions(bucket_name, obj.key)
for v in versions: for v in versions:
@@ -1289,14 +1314,12 @@ def _render_lifecycle_config(config: list) -> Element:
rule_el = SubElement(root, "Rule") rule_el = SubElement(root, "Rule")
SubElement(rule_el, "ID").text = rule.get("ID", "") SubElement(rule_el, "ID").text = rule.get("ID", "")
# Filter
filter_el = SubElement(rule_el, "Filter") filter_el = SubElement(rule_el, "Filter")
if rule.get("Prefix"): if rule.get("Prefix"):
SubElement(filter_el, "Prefix").text = rule.get("Prefix", "") SubElement(filter_el, "Prefix").text = rule.get("Prefix", "")
SubElement(rule_el, "Status").text = rule.get("Status", "Enabled") SubElement(rule_el, "Status").text = rule.get("Status", "Enabled")
# Expiration
if "Expiration" in rule: if "Expiration" in rule:
exp = rule["Expiration"] exp = rule["Expiration"]
exp_el = SubElement(rule_el, "Expiration") exp_el = SubElement(rule_el, "Expiration")
@@ -1307,14 +1330,12 @@ def _render_lifecycle_config(config: list) -> Element:
if exp.get("ExpiredObjectDeleteMarker"): if exp.get("ExpiredObjectDeleteMarker"):
SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true" SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true"
# NoncurrentVersionExpiration
if "NoncurrentVersionExpiration" in rule: if "NoncurrentVersionExpiration" in rule:
nve = rule["NoncurrentVersionExpiration"] nve = rule["NoncurrentVersionExpiration"]
nve_el = SubElement(rule_el, "NoncurrentVersionExpiration") nve_el = SubElement(rule_el, "NoncurrentVersionExpiration")
if "NoncurrentDays" in nve: if "NoncurrentDays" in nve:
SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"]) SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"])
# AbortIncompleteMultipartUpload
if "AbortIncompleteMultipartUpload" in rule: if "AbortIncompleteMultipartUpload" in rule:
aimu = rule["AbortIncompleteMultipartUpload"] aimu = rule["AbortIncompleteMultipartUpload"]
aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload") aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload")
@@ -1338,29 +1359,24 @@ def _parse_lifecycle_config(payload: bytes) -> list:
for rule_el in root.findall("{*}Rule") or root.findall("Rule"): for rule_el in root.findall("{*}Rule") or root.findall("Rule"):
rule: dict = {} rule: dict = {}
# ID
id_el = rule_el.find("{*}ID") or rule_el.find("ID") id_el = rule_el.find("{*}ID") or rule_el.find("ID")
if id_el is not None and id_el.text: if id_el is not None and id_el.text:
rule["ID"] = id_el.text.strip() rule["ID"] = id_el.text.strip()
# Filter/Prefix
filter_el = rule_el.find("{*}Filter") or rule_el.find("Filter") filter_el = rule_el.find("{*}Filter") or rule_el.find("Filter")
if filter_el is not None: if filter_el is not None:
prefix_el = filter_el.find("{*}Prefix") or filter_el.find("Prefix") prefix_el = filter_el.find("{*}Prefix") or filter_el.find("Prefix")
if prefix_el is not None and prefix_el.text: if prefix_el is not None and prefix_el.text:
rule["Prefix"] = prefix_el.text rule["Prefix"] = prefix_el.text
# Legacy Prefix (outside Filter)
if "Prefix" not in rule: if "Prefix" not in rule:
prefix_el = rule_el.find("{*}Prefix") or rule_el.find("Prefix") prefix_el = rule_el.find("{*}Prefix") or rule_el.find("Prefix")
if prefix_el is not None: if prefix_el is not None:
rule["Prefix"] = prefix_el.text or "" rule["Prefix"] = prefix_el.text or ""
# Status
status_el = rule_el.find("{*}Status") or rule_el.find("Status") status_el = rule_el.find("{*}Status") or rule_el.find("Status")
rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled" rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled"
# Expiration
exp_el = rule_el.find("{*}Expiration") or rule_el.find("Expiration") exp_el = rule_el.find("{*}Expiration") or rule_el.find("Expiration")
if exp_el is not None: if exp_el is not None:
expiration: dict = {} expiration: dict = {}
@@ -1376,7 +1392,6 @@ def _parse_lifecycle_config(payload: bytes) -> list:
if expiration: if expiration:
rule["Expiration"] = expiration rule["Expiration"] = expiration
# NoncurrentVersionExpiration
nve_el = rule_el.find("{*}NoncurrentVersionExpiration") or rule_el.find("NoncurrentVersionExpiration") nve_el = rule_el.find("{*}NoncurrentVersionExpiration") or rule_el.find("NoncurrentVersionExpiration")
if nve_el is not None: if nve_el is not None:
nve: dict = {} nve: dict = {}
@@ -1386,7 +1401,6 @@ def _parse_lifecycle_config(payload: bytes) -> list:
if nve: if nve:
rule["NoncurrentVersionExpiration"] = nve rule["NoncurrentVersionExpiration"] = nve
# AbortIncompleteMultipartUpload
aimu_el = rule_el.find("{*}AbortIncompleteMultipartUpload") or rule_el.find("AbortIncompleteMultipartUpload") aimu_el = rule_el.find("{*}AbortIncompleteMultipartUpload") or rule_el.find("AbortIncompleteMultipartUpload")
if aimu_el is not None: if aimu_el is not None:
aimu: dict = {} aimu: dict = {}
@@ -1424,7 +1438,6 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
if not quota: if not quota:
return _error_response("NoSuchQuotaConfiguration", "No quota configuration found", 404) return _error_response("NoSuchQuotaConfiguration", "No quota configuration found", 404)
# Return as JSON for simplicity (not a standard S3 API)
stats = storage.bucket_stats(bucket_name) stats = storage.bucket_stats(bucket_name)
return jsonify({ return jsonify({
"quota": quota, "quota": quota,
@@ -1436,7 +1449,7 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
if request.method == "DELETE": if request.method == "DELETE":
try: try:
storage.set_bucket_quota(bucket_name, max_size_bytes=None, max_objects=None) storage.set_bucket_quota(bucket_name, max_bytes=None, max_objects=None)
except StorageError as exc: except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404) return _error_response("NoSuchBucket", str(exc), 404)
current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name}) current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name})
@@ -1453,7 +1466,6 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
if max_size_bytes is None and max_objects is None: if max_size_bytes is None and max_objects is None:
return _error_response("InvalidArgument", "At least one of max_size_bytes or max_objects is required", 400) return _error_response("InvalidArgument", "At least one of max_size_bytes or max_objects is required", 400)
# Validate types
if max_size_bytes is not None: if max_size_bytes is not None:
try: try:
max_size_bytes = int(max_size_bytes) max_size_bytes = int(max_size_bytes)
@@ -1471,7 +1483,7 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
return _error_response("InvalidArgument", f"max_objects {exc}", 400) return _error_response("InvalidArgument", f"max_objects {exc}", 400)
try: try:
storage.set_bucket_quota(bucket_name, max_size_bytes=max_size_bytes, max_objects=max_objects) storage.set_bucket_quota(bucket_name, max_bytes=max_size_bytes, max_objects=max_objects)
except StorageError as exc: except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404) return _error_response("NoSuchBucket", str(exc), 404)
@@ -1564,7 +1576,6 @@ def _bulk_delete_handler(bucket_name: str) -> Response:
return _xml_response(result, status=200) return _xml_response(result, status=200)
# ---------------------- routes ----------------------
@s3_api_bp.get("/") @s3_api_bp.get("/")
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def list_buckets() -> Response: def list_buckets() -> Response:
@@ -1642,7 +1653,6 @@ def bucket_handler(bucket_name: str) -> Response:
current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name}) current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name})
return Response(status=204) return Response(status=204)
# GET - list objects (supports both ListObjects and ListObjectsV2)
principal, error = _require_principal() principal, error = _require_principal()
try: try:
_authorize_action(principal, bucket_name, "list") _authorize_action(principal, bucket_name, "list")
@@ -1650,18 +1660,12 @@ def bucket_handler(bucket_name: str) -> Response:
if error: if error:
return error return error
return _error_response("AccessDenied", str(exc), 403) return _error_response("AccessDenied", str(exc), 403)
try:
objects = storage.list_objects_all(bucket_name)
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
# Check if this is ListObjectsV2 (list-type=2)
list_type = request.args.get("list-type") list_type = request.args.get("list-type")
prefix = request.args.get("prefix", "") prefix = request.args.get("prefix", "")
delimiter = request.args.get("delimiter", "") delimiter = request.args.get("delimiter", "")
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000) max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
# Pagination markers
marker = request.args.get("marker", "") # ListObjects v1 marker = request.args.get("marker", "") # ListObjects v1
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2 continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
start_after = request.args.get("start-after", "") # ListObjectsV2 start_after = request.args.get("start-after", "") # ListObjectsV2
@@ -1671,7 +1675,6 @@ def bucket_handler(bucket_name: str) -> Response:
effective_start = "" effective_start = ""
if list_type == "2": if list_type == "2":
if continuation_token: if continuation_token:
import base64
try: try:
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8") effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
except Exception: except Exception:
@@ -1681,11 +1684,17 @@ def bucket_handler(bucket_name: str) -> Response:
else: else:
effective_start = marker effective_start = marker
if prefix: fetch_keys = max_keys * 10 if delimiter else max_keys
objects = [obj for obj in objects if obj.key.startswith(prefix)] try:
list_result = storage.list_objects(
if effective_start: bucket_name,
objects = [obj for obj in objects if obj.key > effective_start] max_keys=fetch_keys,
continuation_token=effective_start or None,
prefix=prefix or None,
)
objects = list_result.objects
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
common_prefixes: list[str] = [] common_prefixes: list[str] = []
filtered_objects: list = [] filtered_objects: list = []
@@ -1694,7 +1703,6 @@ def bucket_handler(bucket_name: str) -> Response:
for obj in objects: for obj in objects:
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
if delimiter in key_after_prefix: if delimiter in key_after_prefix:
# This is a "folder" - extract the common prefix
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
if common_prefix not in seen_prefixes: if common_prefix not in seen_prefixes:
seen_prefixes.add(common_prefix) seen_prefixes.add(common_prefix)
@@ -1705,7 +1713,7 @@ def bucket_handler(bucket_name: str) -> Response:
common_prefixes = sorted(common_prefixes) common_prefixes = sorted(common_prefixes)
total_items = len(objects) + len(common_prefixes) total_items = len(objects) + len(common_prefixes)
is_truncated = total_items > max_keys is_truncated = total_items > max_keys or list_result.is_truncated
if len(objects) >= max_keys: if len(objects) >= max_keys:
objects = objects[:max_keys] objects = objects[:max_keys]
@@ -1723,7 +1731,6 @@ def bucket_handler(bucket_name: str) -> Response:
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1] next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
if list_type == "2" and next_marker: if list_type == "2" and next_marker:
import base64
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8") next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
if list_type == "2": if list_type == "2":
@@ -1792,7 +1799,6 @@ def object_handler(bucket_name: str, object_key: str):
if "tagging" in request.args: if "tagging" in request.args:
return _object_tagging_handler(bucket_name, object_key) return _object_tagging_handler(bucket_name, object_key)
# Multipart Uploads
if request.method == "POST": if request.method == "POST":
if "uploads" in request.args: if "uploads" in request.args:
return _initiate_multipart_upload(bucket_name, object_key) return _initiate_multipart_upload(bucket_name, object_key)
@@ -1845,9 +1851,7 @@ def object_handler(bucket_name: str, object_key: str):
response = Response(status=200) response = Response(status=200)
response.headers["ETag"] = f'"{meta.etag}"' response.headers["ETag"] = f'"{meta.etag}"'
# Trigger replication if not a replication request if "S3ReplicationAgent" not in request.headers.get("User-Agent", ""):
user_agent = request.headers.get("User-Agent", "")
if "S3ReplicationAgent" not in user_agent:
_replication_manager().trigger_replication(bucket_name, object_key, action="write") _replication_manager().trigger_replication(bucket_name, object_key, action="write")
return response return response
@@ -1866,31 +1870,25 @@ def object_handler(bucket_name: str, object_key: str):
metadata = storage.get_object_metadata(bucket_name, object_key) metadata = storage.get_object_metadata(bucket_name, object_key)
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream" mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
# Check if object is encrypted and needs decryption
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
if request.method == "GET": if request.method == "GET":
if is_encrypted and hasattr(storage, 'get_object_data'): if is_encrypted and hasattr(storage, 'get_object_data'):
# Use encrypted storage to decrypt
try: try:
data, clean_metadata = storage.get_object_data(bucket_name, object_key) data, clean_metadata = storage.get_object_data(bucket_name, object_key)
response = Response(data, mimetype=mimetype) response = Response(data, mimetype=mimetype)
logged_bytes = len(data) logged_bytes = len(data)
# Use decrypted size for Content-Length
response.headers["Content-Length"] = len(data) response.headers["Content-Length"] = len(data)
etag = hashlib.md5(data).hexdigest() etag = hashlib.md5(data).hexdigest()
except StorageError as exc: except StorageError as exc:
return _error_response("InternalError", str(exc), 500) return _error_response("InternalError", str(exc), 500)
else: else:
# Stream unencrypted file directly
stat = path.stat() stat = path.stat()
response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True) response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True)
logged_bytes = stat.st_size logged_bytes = stat.st_size
etag = storage._compute_etag(path) etag = storage._compute_etag(path)
else: else:
# HEAD request
if is_encrypted and hasattr(storage, 'get_object_data'): if is_encrypted and hasattr(storage, 'get_object_data'):
# For encrypted objects, we need to report decrypted size
try: try:
data, _ = storage.get_object_data(bucket_name, object_key) data, _ = storage.get_object_data(bucket_name, object_key)
response = Response(status=200) response = Response(status=200)
@@ -1919,7 +1917,6 @@ def object_handler(bucket_name: str, object_key: str):
storage.delete_object(bucket_name, object_key) storage.delete_object(bucket_name, object_key)
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
# Trigger replication if not a replication request
user_agent = request.headers.get("User-Agent", "") user_agent = request.headers.get("User-Agent", "")
if "S3ReplicationAgent" not in user_agent: if "S3ReplicationAgent" not in user_agent:
_replication_manager().trigger_replication(bucket_name, object_key, action="delete") _replication_manager().trigger_replication(bucket_name, object_key, action="delete")
@@ -2174,52 +2171,91 @@ def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
class AwsChunkedDecoder: class AwsChunkedDecoder:
"""Decodes aws-chunked encoded streams.""" """Decodes aws-chunked encoded streams.
Performance optimized with buffered line reading instead of byte-by-byte.
"""
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
self.buffer = b"" self._read_buffer = bytearray() # Performance: Pre-allocated buffer
self.chunk_remaining = 0 self.chunk_remaining = 0
self.finished = False self.finished = False
def _read_line(self) -> bytes:
"""Read until CRLF using buffered reads instead of byte-by-byte.
Performance: Reads in batches of 64-256 bytes instead of 1 byte at a time.
"""
line = bytearray()
while True:
# Check if we have data in buffer
if self._read_buffer:
# Look for CRLF in buffer
idx = self._read_buffer.find(b"\r\n")
if idx != -1:
# Found CRLF - extract line and update buffer
line.extend(self._read_buffer[: idx + 2])
del self._read_buffer[: idx + 2]
return bytes(line)
# No CRLF yet - consume entire buffer
line.extend(self._read_buffer)
self._read_buffer.clear()
# Read more data in larger chunks (64 bytes is enough for chunk headers)
chunk = self.stream.read(64)
if not chunk:
return bytes(line) if line else b""
self._read_buffer.extend(chunk)
def _read_exact(self, n: int) -> bytes:
"""Read exactly n bytes, using buffer first."""
result = bytearray()
# Use buffered data first
if self._read_buffer:
take = min(len(self._read_buffer), n)
result.extend(self._read_buffer[:take])
del self._read_buffer[:take]
n -= take
# Read remaining directly from stream
if n > 0:
data = self.stream.read(n)
if data:
result.extend(data)
return bytes(result)
def read(self, size=-1): def read(self, size=-1):
if self.finished: if self.finished:
return b"" return b""
result = b"" result = bytearray() # Performance: Use bytearray for building result
while size == -1 or len(result) < size: while size == -1 or len(result) < size:
if self.chunk_remaining > 0: if self.chunk_remaining > 0:
to_read = self.chunk_remaining to_read = self.chunk_remaining
if size != -1: if size != -1:
to_read = min(to_read, size - len(result)) to_read = min(to_read, size - len(result))
chunk = self.stream.read(to_read) chunk = self._read_exact(to_read)
if not chunk: if not chunk:
raise IOError("Unexpected EOF in chunk data") raise IOError("Unexpected EOF in chunk data")
result += chunk result.extend(chunk)
self.chunk_remaining -= len(chunk) self.chunk_remaining -= len(chunk)
if self.chunk_remaining == 0: if self.chunk_remaining == 0:
# Read CRLF after chunk data crlf = self._read_exact(2)
crlf = self.stream.read(2)
if crlf != b"\r\n": if crlf != b"\r\n":
raise IOError("Malformed chunk: missing CRLF") raise IOError("Malformed chunk: missing CRLF")
else: else:
line = b"" line = self._read_line()
while True: if not line:
char = self.stream.read(1) self.finished = True
if not char: return bytes(result)
if not line:
self.finished = True
return result
raise IOError("Unexpected EOF in chunk size")
line += char
if line.endswith(b"\r\n"):
break
try: try:
line_str = line.decode("ascii").strip() line_str = line.decode("ascii").strip()
# Handle chunk-signature extension if present (e.g. "1000;chunk-signature=...")
if ";" in line_str: if ";" in line_str:
line_str = line_str.split(";")[0] line_str = line_str.split(";")[0]
chunk_size = int(line_str, 16) chunk_size = int(line_str, 16)
@@ -2228,22 +2264,16 @@ class AwsChunkedDecoder:
if chunk_size == 0: if chunk_size == 0:
self.finished = True self.finished = True
# Skip trailing headers
while True: while True:
line = b"" trailer = self._read_line()
while True: if trailer == b"\r\n" or not trailer:
char = self.stream.read(1)
if not char:
break
line += char
if line.endswith(b"\r\n"):
break
if line == b"\r\n" or not line:
break break
return result return bytes(result)
self.chunk_remaining = chunk_size self.chunk_remaining = chunk_size
return result return bytes(result)
def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response: def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
@@ -2375,7 +2405,6 @@ def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
try: try:
_storage().abort_multipart_upload(bucket_name, upload_id) _storage().abort_multipart_upload(bucket_name, upload_id)
except StorageError as exc: except StorageError as exc:
# Abort is idempotent, but if bucket missing...
if "Bucket does not exist" in str(exc): if "Bucket does not exist" in str(exc):
return _error_response("NoSuchBucket", str(exc), 404) return _error_response("NoSuchBucket", str(exc), 404)
@@ -2385,7 +2414,6 @@ def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
@s3_api_bp.before_request @s3_api_bp.before_request
def resolve_principal(): def resolve_principal():
g.principal = None g.principal = None
# Try SigV4
try: try:
if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \ if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \
(request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"): (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
@@ -2394,7 +2422,6 @@ def resolve_principal():
except Exception: except Exception:
pass pass
# Try simple auth headers (internal/testing)
access_key = request.headers.get("X-Access-Key") access_key = request.headers.get("X-Access-Key")
secret_key = request.headers.get("X-Secret-Key") secret_key = request.headers.get("X-Secret-Key")
if access_key and secret_key: if access_key and secret_key:

View File

@@ -7,9 +7,11 @@ import os
import re import re
import shutil import shutil
import stat import stat
import threading
import time import time
import unicodedata import unicodedata
import uuid import uuid
from collections import OrderedDict
from contextlib import contextmanager from contextlib import contextmanager
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -128,11 +130,30 @@ class ObjectStorage:
BUCKET_VERSIONS_DIR = "versions" BUCKET_VERSIONS_DIR = "versions"
MULTIPART_MANIFEST = "manifest.json" MULTIPART_MANIFEST = "manifest.json"
BUCKET_CONFIG_FILE = ".bucket.json" BUCKET_CONFIG_FILE = ".bucket.json"
KEY_INDEX_CACHE_TTL = 30
OBJECT_CACHE_MAX_SIZE = 100 # Maximum number of buckets to cache
def __init__(self, root: Path) -> None: def __init__(self, root: Path) -> None:
self.root = Path(root) self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True) self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots() self._ensure_system_roots()
# LRU cache for object metadata with thread-safe access
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
self._cache_lock = threading.Lock() # Global lock for cache structure
# Performance: Per-bucket locks to reduce contention
self._bucket_locks: Dict[str, threading.Lock] = {}
# Cache version counter for detecting stale reads
self._cache_version: Dict[str, int] = {}
# Performance: Bucket config cache with TTL
self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
self._bucket_config_cache_ttl = 30.0 # 30 second TTL
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
with self._cache_lock:
if bucket_id not in self._bucket_locks:
self._bucket_locks[bucket_id] = threading.Lock()
return self._bucket_locks[bucket_id]
def list_buckets(self) -> List[BucketMeta]: def list_buckets(self) -> List[BucketMeta]:
buckets: List[BucketMeta] = [] buckets: List[BucketMeta] = []
@@ -142,7 +163,7 @@ class ObjectStorage:
buckets.append( buckets.append(
BucketMeta( BucketMeta(
name=bucket.name, name=bucket.name,
created_at=datetime.fromtimestamp(stat.st_ctime), created_at=datetime.fromtimestamp(stat.st_ctime, timezone.utc),
) )
) )
return buckets return buckets
@@ -189,8 +210,7 @@ class ObjectStorage:
total_bytes = 0 total_bytes = 0
version_count = 0 version_count = 0
version_bytes = 0 version_bytes = 0
# Count current objects in the bucket folder
for path in bucket_path.rglob("*"): for path in bucket_path.rglob("*"):
if path.is_file(): if path.is_file():
rel = path.relative_to(bucket_path) rel = path.relative_to(bucket_path)
@@ -201,8 +221,7 @@ class ObjectStorage:
stat = path.stat() stat = path.stat()
object_count += 1 object_count += 1
total_bytes += stat.st_size total_bytes += stat.st_size
# Count archived versions in the system folder
versions_root = self._bucket_versions_root(bucket_name) versions_root = self._bucket_versions_root(bucket_name)
if versions_root.exists(): if versions_root.exists():
for path in versions_root.rglob("*.bin"): for path in versions_root.rglob("*.bin"):
@@ -216,8 +235,8 @@ class ObjectStorage:
"bytes": total_bytes, "bytes": total_bytes,
"version_count": version_count, "version_count": version_count,
"version_bytes": version_bytes, "version_bytes": version_bytes,
"total_objects": object_count + version_count, # All objects including versions "total_objects": object_count + version_count,
"total_bytes": total_bytes + version_bytes, # All storage including versions "total_bytes": total_bytes + version_bytes,
} }
try: try:
@@ -240,11 +259,13 @@ class ObjectStorage:
bucket_path = self._bucket_path(bucket_name) bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists(): if not bucket_path.exists():
raise StorageError("Bucket does not exist") raise StorageError("Bucket does not exist")
if self._has_visible_objects(bucket_path): # Performance: Single check instead of three separate traversals
has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
if has_objects:
raise StorageError("Bucket not empty") raise StorageError("Bucket not empty")
if self._has_archived_versions(bucket_path): if has_versions:
raise StorageError("Bucket contains archived object versions") raise StorageError("Bucket contains archived object versions")
if self._has_active_multipart_uploads(bucket_path): if has_multipart:
raise StorageError("Bucket has active multipart uploads") raise StorageError("Bucket has active multipart uploads")
self._remove_tree(bucket_path) self._remove_tree(bucket_path)
self._remove_tree(self._system_bucket_root(bucket_path.name)) self._remove_tree(self._system_bucket_root(bucket_path.name))
@@ -274,32 +295,20 @@ class ObjectStorage:
raise StorageError("Bucket does not exist") raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name bucket_id = bucket_path.name
# Collect all matching object keys first (lightweight - just paths) object_cache = self._get_object_cache(bucket_id, bucket_path)
all_keys: List[str] = []
for path in bucket_path.rglob("*"): all_keys = sorted(object_cache.keys())
if path.is_file():
rel = path.relative_to(bucket_path) if prefix:
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: all_keys = [k for k in all_keys if k.startswith(prefix)]
continue
key = str(rel.as_posix())
if prefix and not key.startswith(prefix):
continue
all_keys.append(key)
all_keys.sort()
total_count = len(all_keys) total_count = len(all_keys)
# Handle continuation token (the key to start after)
start_index = 0 start_index = 0
if continuation_token: if continuation_token:
try: try:
# continuation_token is the last key from previous page import bisect
for i, key in enumerate(all_keys): start_index = bisect.bisect_right(all_keys, continuation_token)
if key > continuation_token: if start_index >= total_count:
start_index = i
break
else:
# Token is past all keys
return ListObjectsResult( return ListObjectsResult(
objects=[], objects=[],
is_truncated=False, is_truncated=False,
@@ -307,34 +316,17 @@ class ObjectStorage:
total_count=total_count, total_count=total_count,
) )
except Exception: except Exception:
pass # Invalid token, start from beginning pass
# Get the slice we need
end_index = start_index + max_keys end_index = start_index + max_keys
keys_slice = all_keys[start_index:end_index] keys_slice = all_keys[start_index:end_index]
is_truncated = end_index < total_count is_truncated = end_index < total_count
# Now load full metadata only for the objects we're returning
objects: List[ObjectMeta] = [] objects: List[ObjectMeta] = []
for key in keys_slice: for key in keys_slice:
safe_key = self._sanitize_object_key(key) obj = object_cache.get(key)
path = bucket_path / safe_key if obj:
if not path.exists(): objects.append(obj)
continue # Object may have been deleted
try:
stat = path.stat()
metadata = self._read_metadata(bucket_id, safe_key)
objects.append(
ObjectMeta(
key=key,
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=self._compute_etag(path),
metadata=metadata or None,
)
)
except OSError:
continue # File may have been deleted during iteration
next_token = keys_slice[-1] if is_truncated and keys_slice else None next_token = keys_slice[-1] if is_truncated and keys_slice else None
@@ -368,14 +360,12 @@ class ObjectStorage:
destination = bucket_path / safe_key destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True) destination.parent.mkdir(parents=True, exist_ok=True)
# Check if this is an overwrite (won't add to object count)
is_overwrite = destination.exists() is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0 existing_size = destination.stat().st_size if is_overwrite else 0
if self._is_versioning_enabled(bucket_path) and is_overwrite: if self._is_versioning_enabled(bucket_path) and is_overwrite:
self._archive_current_version(bucket_id, safe_key, reason="overwrite") self._archive_current_version(bucket_id, safe_key, reason="overwrite")
# Write to temp file first to get actual size
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
tmp_dir.mkdir(parents=True, exist_ok=True) tmp_dir.mkdir(parents=True, exist_ok=True)
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp" tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
@@ -387,9 +377,7 @@ class ObjectStorage:
new_size = tmp_path.stat().st_size new_size = tmp_path.stat().st_size
# Check quota before finalizing
if enforce_quota: if enforce_quota:
# Calculate net change (new size minus size being replaced)
size_delta = new_size - existing_size size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1 object_delta = 0 if is_overwrite else 1
@@ -405,31 +393,34 @@ class ObjectStorage:
quota_check["usage"], quota_check["usage"],
) )
# Move to final destination
shutil.move(str(tmp_path), str(destination)) shutil.move(str(tmp_path), str(destination))
finally: finally:
# Clean up temp file if it still exists
try: try:
tmp_path.unlink(missing_ok=True) tmp_path.unlink(missing_ok=True)
except OSError: except OSError:
pass pass
stat = destination.stat() stat = destination.stat()
if metadata: etag = checksum.hexdigest()
self._write_metadata(bucket_id, safe_key, metadata)
else:
self._delete_metadata(bucket_id, safe_key)
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
return ObjectMeta( # Performance: Lazy update - only update the affected key instead of invalidating whole cache
obj_meta = ObjectMeta(
key=safe_key.as_posix(), key=safe_key.as_posix(),
size=stat.st_size, size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime), last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
etag=checksum.hexdigest(), etag=etag,
metadata=metadata, metadata=metadata,
) )
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
return obj_meta
def get_object_path(self, bucket_name: str, object_key: str) -> Path: def get_object_path(self, bucket_name: str, object_key: str) -> Path:
path = self._object_path(bucket_name, object_key) path = self._object_path(bucket_name, object_key)
@@ -453,16 +444,14 @@ class ObjectStorage:
for parent in path.parents: for parent in path.parents:
if parent == stop_at: if parent == stop_at:
break break
# Retry a few times with small delays for Windows/OneDrive
for attempt in range(3): for attempt in range(3):
try: try:
if parent.exists() and not any(parent.iterdir()): if parent.exists() and not any(parent.iterdir()):
parent.rmdir() parent.rmdir()
break # Success, move to next parent break
except OSError: except OSError:
if attempt < 2: if attempt < 2:
time.sleep(0.1) # Brief delay before retry time.sleep(0.1)
# Final attempt failed - continue to next parent
break break
def delete_object(self, bucket_name: str, object_key: str) -> None: def delete_object(self, bucket_name: str, object_key: str) -> None:
@@ -477,8 +466,10 @@ class ObjectStorage:
rel = path.relative_to(bucket_path) rel = path.relative_to(bucket_path)
self._safe_unlink(path) self._safe_unlink(path)
self._delete_metadata(bucket_id, rel) self._delete_metadata(bucket_id, rel)
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
self._cleanup_empty_parents(path, bucket_path) self._cleanup_empty_parents(path, bucket_path)
def purge_object(self, bucket_name: str, object_key: str) -> None: def purge_object(self, bucket_name: str, object_key: str) -> None:
@@ -498,9 +489,10 @@ class ObjectStorage:
legacy_version_dir = self._legacy_version_dir(bucket_id, rel) legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
if legacy_version_dir.exists(): if legacy_version_dir.exists():
shutil.rmtree(legacy_version_dir, ignore_errors=True) shutil.rmtree(legacy_version_dir, ignore_errors=True)
# Invalidate bucket stats cache
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
# Performance: Lazy update - only remove the affected key instead of invalidating whole cache
self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
self._cleanup_empty_parents(target, bucket_path) self._cleanup_empty_parents(target, bucket_path)
def is_versioning_enabled(self, bucket_name: str) -> bool: def is_versioning_enabled(self, bucket_name: str) -> bool:
@@ -612,7 +604,6 @@ class ObjectStorage:
bucket_path = self._require_bucket_path(bucket_name) bucket_path = self._require_bucket_path(bucket_name)
if max_bytes is None and max_objects is None: if max_bytes is None and max_objects is None:
# Remove quota entirely
self._set_bucket_config_entry(bucket_path.name, "quota", None) self._set_bucket_config_entry(bucket_path.name, "quota", None)
return return
@@ -654,9 +645,7 @@ class ObjectStorage:
"message": None, "message": None,
} }
# Get current stats (uses cache when available)
stats = self.bucket_stats(bucket_name) stats = self.bucket_stats(bucket_name)
# Use totals which include versions for quota enforcement
current_bytes = stats.get("total_bytes", stats.get("bytes", 0)) current_bytes = stats.get("total_bytes", stats.get("bytes", 0))
current_objects = stats.get("total_objects", stats.get("objects", 0)) current_objects = stats.get("total_objects", stats.get("objects", 0))
@@ -766,8 +755,6 @@ class ObjectStorage:
bucket_id = bucket_path.name bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key) safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key) version_dir = self._version_dir(bucket_id, safe_key)
if not version_dir.exists():
version_dir = self._legacy_version_dir(bucket_id, safe_key)
if not version_dir.exists(): if not version_dir.exists():
version_dir = self._legacy_version_dir(bucket_id, safe_key) version_dir = self._legacy_version_dir(bucket_id, safe_key)
if not version_dir.exists(): if not version_dir.exists():
@@ -817,7 +804,7 @@ class ObjectStorage:
return ObjectMeta( return ObjectMeta(
key=safe_key.as_posix(), key=safe_key.as_posix(),
size=stat.st_size, size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime), last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
etag=self._compute_etag(destination), etag=self._compute_etag(destination),
metadata=metadata or None, metadata=metadata or None,
) )
@@ -916,45 +903,73 @@ class ObjectStorage:
part_number: int, part_number: int,
stream: BinaryIO, stream: BinaryIO,
) -> str: ) -> str:
"""Upload a part for a multipart upload.
Uses file locking to safely update the manifest and handle concurrent uploads.
"""
if part_number < 1: if part_number < 1:
raise StorageError("part_number must be >= 1") raise StorageError("part_number must be >= 1")
bucket_path = self._bucket_path(bucket_name) bucket_path = self._bucket_path(bucket_name)
# Get the upload root directory
upload_root = self._multipart_dir(bucket_path.name, upload_id) upload_root = self._multipart_dir(bucket_path.name, upload_id)
if not upload_root.exists(): if not upload_root.exists():
upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id) upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
if not upload_root.exists(): if not upload_root.exists():
raise StorageError("Multipart upload not found") raise StorageError("Multipart upload not found")
# Write the part data first (can happen concurrently) # Write part to temporary file first, then rename atomically
checksum = hashlib.md5() checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part" part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename part_path = upload_root / part_filename
with part_path.open("wb") as target: temp_path = upload_root / f".{part_filename}.tmp"
shutil.copyfileobj(_HashingReader(stream, checksum), target)
try:
with temp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
# Atomic rename (or replace on Windows)
temp_path.replace(part_path)
except OSError:
# Clean up temp file on failure
try:
temp_path.unlink(missing_ok=True)
except OSError:
pass
raise
record = { record = {
"etag": checksum.hexdigest(), "etag": checksum.hexdigest(),
"size": part_path.stat().st_size, "size": part_path.stat().st_size,
"filename": part_filename, "filename": part_filename,
} }
# Update manifest with file locking to prevent race conditions
manifest_path = upload_root / self.MULTIPART_MANIFEST manifest_path = upload_root / self.MULTIPART_MANIFEST
lock_path = upload_root / ".manifest.lock" lock_path = upload_root / ".manifest.lock"
with lock_path.open("w") as lock_file: # Retry loop for handling transient lock/read failures
with _file_lock(lock_file): max_retries = 3
# Re-read manifest under lock to get latest state for attempt in range(max_retries):
try: try:
manifest = json.loads(manifest_path.read_text(encoding="utf-8")) with lock_path.open("w") as lock_file:
except (OSError, json.JSONDecodeError) as exc: with _file_lock(lock_file):
raise StorageError("Multipart manifest unreadable") from exc try:
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
parts = manifest.setdefault("parts", {}) except (OSError, json.JSONDecodeError) as exc:
parts[str(part_number)] = record if attempt < max_retries - 1:
manifest_path.write_text(json.dumps(manifest), encoding="utf-8") time.sleep(0.1 * (attempt + 1))
continue
raise StorageError("Multipart manifest unreadable") from exc
parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
break
except OSError as exc:
if attempt < max_retries - 1:
time.sleep(0.1 * (attempt + 1))
continue
raise StorageError(f"Failed to update multipart manifest: {exc}") from exc
return record["etag"] return record["etag"]
def complete_multipart_upload( def complete_multipart_upload(
@@ -999,11 +1014,9 @@ class ObjectStorage:
safe_key = self._sanitize_object_key(manifest["object_key"]) safe_key = self._sanitize_object_key(manifest["object_key"])
destination = bucket_path / safe_key destination = bucket_path / safe_key
# Check if this is an overwrite
is_overwrite = destination.exists() is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0 existing_size = destination.stat().st_size if is_overwrite else 0
# Check quota before writing
if enforce_quota: if enforce_quota:
size_delta = total_size - existing_size size_delta = total_size - existing_size
object_delta = 0 if is_overwrite else 1 object_delta = 0 if is_overwrite else 1
@@ -1058,17 +1071,21 @@ class ObjectStorage:
pass pass
shutil.rmtree(upload_root, ignore_errors=True) shutil.rmtree(upload_root, ignore_errors=True)
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
stat = destination.stat() stat = destination.stat()
return ObjectMeta( # Performance: Lazy update - only update the affected key instead of invalidating whole cache
obj_meta = ObjectMeta(
key=safe_key.as_posix(), key=safe_key.as_posix(),
size=stat.st_size, size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime), last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
etag=checksum.hexdigest(), etag=checksum.hexdigest(),
metadata=metadata, metadata=metadata,
) )
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
return obj_meta
def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None: def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
bucket_path = self._bucket_path(bucket_name) bucket_path = self._bucket_path(bucket_name)
@@ -1163,6 +1180,229 @@ class ObjectStorage:
def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path: def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._legacy_multipart_bucket_root(bucket_name) / upload_id return self._legacy_multipart_bucket_root(bucket_name) / upload_id
def _fast_list_keys(self, bucket_path: Path) -> List[str]:
"""Fast directory walk using os.scandir instead of pathlib.rglob.
This is significantly faster for large directories (10K+ files).
Returns just the keys (for backward compatibility).
"""
return list(self._build_object_cache(bucket_path).keys())
def _build_object_cache(self, bucket_path: Path) -> Dict[str, ObjectMeta]:
"""Build a complete object metadata cache for a bucket.
Uses os.scandir for fast directory walking and a persistent etag index.
"""
from concurrent.futures import ThreadPoolExecutor
bucket_id = bucket_path.name
objects: Dict[str, ObjectMeta] = {}
bucket_str = str(bucket_path)
bucket_len = len(bucket_str) + 1
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
meta_cache: Dict[str, str] = {}
index_mtime: float = 0
if etag_index_path.exists():
try:
index_mtime = etag_index_path.stat().st_mtime
with open(etag_index_path, 'r', encoding='utf-8') as f:
meta_cache = json.load(f)
except (OSError, json.JSONDecodeError):
meta_cache = {}
meta_root = self._bucket_meta_root(bucket_id)
needs_rebuild = False
if meta_root.exists() and index_mtime > 0:
def check_newer(dir_path: str) -> bool:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
if check_newer(entry.path):
return True
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
if entry.stat().st_mtime > index_mtime:
return True
except OSError:
pass
return False
needs_rebuild = check_newer(str(meta_root))
elif not meta_cache:
needs_rebuild = True
if needs_rebuild and meta_root.exists():
meta_str = str(meta_root)
meta_len = len(meta_str) + 1
meta_files: list[tuple[str, str]] = []
def collect_meta_files(dir_path: str) -> None:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
collect_meta_files(entry.path)
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
rel = entry.path[meta_len:]
key = rel[:-10].replace(os.sep, '/')
meta_files.append((key, entry.path))
except OSError:
pass
collect_meta_files(meta_str)
def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
key, path = item
try:
with open(path, 'rb') as f:
content = f.read()
etag_marker = b'"__etag__"'
idx = content.find(etag_marker)
if idx != -1:
start = content.find(b'"', idx + len(etag_marker) + 1)
if start != -1:
end = content.find(b'"', start + 1)
if end != -1:
return key, content[start+1:end].decode('utf-8')
return key, None
except (OSError, UnicodeDecodeError):
return key, None
if meta_files:
meta_cache = {}
with ThreadPoolExecutor(max_workers=min(64, len(meta_files))) as executor:
for key, etag in executor.map(read_meta_file, meta_files):
if etag:
meta_cache[key] = etag
try:
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(meta_cache, f)
except OSError:
pass
def scan_dir(dir_path: str) -> None:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
rel_start = entry.path[bucket_len:].split(os.sep)[0] if len(entry.path) > bucket_len else entry.name
if rel_start in self.INTERNAL_FOLDERS:
continue
scan_dir(entry.path)
elif entry.is_file(follow_symlinks=False):
rel = entry.path[bucket_len:]
first_part = rel.split(os.sep)[0] if os.sep in rel else rel
if first_part in self.INTERNAL_FOLDERS:
continue
key = rel.replace(os.sep, '/')
try:
stat = entry.stat()
etag = meta_cache.get(key)
if not etag:
etag = f'"{stat.st_size}-{int(stat.st_mtime)}"'
objects[key] = ObjectMeta(
key=key,
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
etag=etag,
metadata=None,
)
except OSError:
pass
except OSError:
pass
scan_dir(bucket_str)
return objects
def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]:
"""Get cached object metadata for a bucket, refreshing if stale.
Uses LRU eviction to prevent unbounded cache growth.
Thread-safe with per-bucket locks to reduce contention.
"""
now = time.time()
# Quick check with global lock (brief)
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp = cached
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
self._object_cache.move_to_end(bucket_id)
return objects
cache_version = self._cache_version.get(bucket_id, 0)
# Use per-bucket lock for cache building (allows parallel builds for different buckets)
bucket_lock = self._get_bucket_lock(bucket_id)
with bucket_lock:
# Double-check cache after acquiring per-bucket lock
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp = cached
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
self._object_cache.move_to_end(bucket_id)
return objects
# Build cache with per-bucket lock held (prevents duplicate work)
objects = self._build_object_cache(bucket_path)
with self._cache_lock:
# Check if cache was invalidated while we were building
current_version = self._cache_version.get(bucket_id, 0)
if current_version != cache_version:
objects = self._build_object_cache(bucket_path)
# Evict oldest entries if cache is full
while len(self._object_cache) >= self.OBJECT_CACHE_MAX_SIZE:
self._object_cache.popitem(last=False)
self._object_cache[bucket_id] = (objects, time.time())
self._object_cache.move_to_end(bucket_id)
return objects
def _invalidate_object_cache(self, bucket_id: str) -> None:
"""Invalidate the object cache and etag index for a bucket.
Increments version counter to signal stale reads.
"""
with self._cache_lock:
self._object_cache.pop(bucket_id, None)
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
etag_index_path.unlink(missing_ok=True)
except OSError:
pass
def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
"""Update a single entry in the object cache instead of invalidating the whole cache.
This is a performance optimization - lazy update instead of full invalidation.
"""
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp = cached
if meta is None:
# Delete operation - remove key from cache
objects.pop(key, None)
else:
# Put operation - update/add key in cache
objects[key] = meta
# Keep same timestamp - don't reset TTL for single key updates
def _ensure_system_roots(self) -> None: def _ensure_system_roots(self) -> None:
for path in ( for path in (
self._system_root_path(), self._system_root_path(),
@@ -1182,19 +1422,33 @@ class ObjectStorage:
return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]: def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
# Performance: Check cache first
now = time.time()
cached = self._bucket_config_cache.get(bucket_name)
if cached:
config, cached_time = cached
if now - cached_time < self._bucket_config_cache_ttl:
return config.copy() # Return copy to prevent mutation
config_path = self._bucket_config_path(bucket_name) config_path = self._bucket_config_path(bucket_name)
if not config_path.exists(): if not config_path.exists():
self._bucket_config_cache[bucket_name] = ({}, now)
return {} return {}
try: try:
data = json.loads(config_path.read_text(encoding="utf-8")) data = json.loads(config_path.read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {} config = data if isinstance(data, dict) else {}
self._bucket_config_cache[bucket_name] = (config, now)
return config.copy()
except (OSError, json.JSONDecodeError): except (OSError, json.JSONDecodeError):
self._bucket_config_cache[bucket_name] = ({}, now)
return {} return {}
def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None: def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
config_path = self._bucket_config_path(bucket_name) config_path = self._bucket_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True) config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(payload), encoding="utf-8") config_path.write_text(json.dumps(payload), encoding="utf-8")
# Performance: Update cache immediately after write
self._bucket_config_cache[bucket_name] = (payload.copy(), time.time())
def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None: def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
config = self._read_bucket_config(bucket_name) config = self._read_bucket_config(bucket_name)
@@ -1316,33 +1570,68 @@ class ObjectStorage:
except OSError: except OSError:
continue continue
def _has_visible_objects(self, bucket_path: Path) -> bool: def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
"""Check bucket for objects, versions, and multipart uploads in a single pass.
Performance optimization: Combines three separate rglob traversals into one.
Returns (has_visible_objects, has_archived_versions, has_active_multipart_uploads).
Uses early exit when all three are found.
"""
has_objects = False
has_versions = False
has_multipart = False
bucket_name = bucket_path.name
# Check visible objects in bucket
for path in bucket_path.rglob("*"): for path in bucket_path.rglob("*"):
if has_objects:
break
if not path.is_file(): if not path.is_file():
continue continue
rel = path.relative_to(bucket_path) rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue continue
return True has_objects = True
return False
# Check archived versions (only if needed)
for version_root in (
self._bucket_versions_root(bucket_name),
self._legacy_versions_root(bucket_name),
):
if has_versions:
break
if version_root.exists():
for path in version_root.rglob("*"):
if path.is_file():
has_versions = True
break
# Check multipart uploads (only if needed)
for uploads_root in (
self._multipart_bucket_root(bucket_name),
self._legacy_multipart_bucket_root(bucket_name),
):
if has_multipart:
break
if uploads_root.exists():
for path in uploads_root.rglob("*"):
if path.is_file():
has_multipart = True
break
return has_objects, has_versions, has_multipart
def _has_visible_objects(self, bucket_path: Path) -> bool:
has_objects, _, _ = self._check_bucket_contents(bucket_path)
return has_objects
def _has_archived_versions(self, bucket_path: Path) -> bool: def _has_archived_versions(self, bucket_path: Path) -> bool:
for version_root in ( _, has_versions, _ = self._check_bucket_contents(bucket_path)
self._bucket_versions_root(bucket_path.name), return has_versions
self._legacy_versions_root(bucket_path.name),
):
if version_root.exists() and any(path.is_file() for path in version_root.rglob("*")):
return True
return False
def _has_active_multipart_uploads(self, bucket_path: Path) -> bool: def _has_active_multipart_uploads(self, bucket_path: Path) -> bool:
for uploads_root in ( _, _, has_multipart = self._check_bucket_contents(bucket_path)
self._multipart_bucket_root(bucket_path.name), return has_multipart
self._legacy_multipart_bucket_root(bucket_path.name),
):
if uploads_root.exists() and any(path.is_file() for path in uploads_root.rglob("*")):
return True
return False
def _remove_tree(self, path: Path) -> None: def _remove_tree(self, path: Path) -> None:
if not path.exists(): if not path.exists():

146
app/ui.py
View File

@@ -189,7 +189,7 @@ def inject_nav_state() -> dict[str, Any]:
return { return {
"principal": principal, "principal": principal,
"can_manage_iam": can_manage, "can_manage_iam": can_manage,
"can_view_metrics": can_manage, # Only admins can view metrics "can_view_metrics": can_manage,
"csrf_token": generate_csrf, "csrf_token": generate_csrf,
} }
@@ -294,7 +294,6 @@ def bucket_detail(bucket_name: str):
storage = _storage() storage = _storage()
try: try:
_authorize_ui(principal, bucket_name, "list") _authorize_ui(principal, bucket_name, "list")
# Don't load objects here - UI fetches them asynchronously via /buckets/<name>/objects
if not storage.bucket_exists(bucket_name): if not storage.bucket_exists(bucket_name):
raise StorageError("Bucket does not exist") raise StorageError("Bucket does not exist")
except (StorageError, IamError) as exc: except (StorageError, IamError) as exc:
@@ -343,7 +342,6 @@ def bucket_detail(bucket_name: str):
except IamError: except IamError:
can_manage_versioning = False can_manage_versioning = False
# Check replication permission
can_manage_replication = False can_manage_replication = False
if principal: if principal:
try: try:
@@ -352,7 +350,6 @@ def bucket_detail(bucket_name: str):
except IamError: except IamError:
can_manage_replication = False can_manage_replication = False
# Check if user is admin (can configure replication settings, not just toggle)
is_replication_admin = False is_replication_admin = False
if principal: if principal:
try: try:
@@ -361,12 +358,9 @@ def bucket_detail(bucket_name: str):
except IamError: except IamError:
is_replication_admin = False is_replication_admin = False
# Replication info - don't compute sync status here (it's slow), let JS fetch it async
replication_rule = _replication().get_rule(bucket_name) replication_rule = _replication().get_rule(bucket_name)
# Load connections for admin, or for non-admin if there's an existing rule (to show target name)
connections = _connections().list() if (is_replication_admin or replication_rule) else [] connections = _connections().list() if (is_replication_admin or replication_rule) else []
# Encryption settings
encryption_config = storage.get_bucket_encryption(bucket_name) encryption_config = storage.get_bucket_encryption(bucket_name)
kms_manager = _kms() kms_manager = _kms()
kms_keys = kms_manager.list_keys() if kms_manager else [] kms_keys = kms_manager.list_keys() if kms_manager else []
@@ -374,7 +368,6 @@ def bucket_detail(bucket_name: str):
encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False) encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
can_manage_encryption = can_manage_versioning # Same as other bucket properties can_manage_encryption = can_manage_versioning # Same as other bucket properties
# Quota settings (admin only)
bucket_quota = storage.get_bucket_quota(bucket_name) bucket_quota = storage.get_bucket_quota(bucket_name)
bucket_stats = storage.bucket_stats(bucket_name) bucket_stats = storage.bucket_stats(bucket_name)
can_manage_quota = False can_manage_quota = False
@@ -384,7 +377,6 @@ def bucket_detail(bucket_name: str):
except IamError: except IamError:
pass pass
# Pass the objects API endpoint URL for async loading
objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name) objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
return render_template( return render_template(
@@ -423,7 +415,7 @@ def list_bucket_objects(bucket_name: str):
except IamError as exc: except IamError as exc:
return jsonify({"error": str(exc)}), 403 return jsonify({"error": str(exc)}), 403
max_keys = min(int(request.args.get("max_keys", 100)), 1000) max_keys = min(int(request.args.get("max_keys", 1000)), 100000)
continuation_token = request.args.get("continuation_token") or None continuation_token = request.args.get("continuation_token") or None
prefix = request.args.get("prefix") or None prefix = request.args.get("prefix") or None
@@ -442,6 +434,14 @@ def list_bucket_objects(bucket_name: str):
except StorageError: except StorageError:
versioning_enabled = False versioning_enabled = False
# Pre-compute URL templates once (not per-object) for performance
# Frontend will construct actual URLs by replacing KEY_PLACEHOLDER
preview_template = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
delete_template = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
presign_template = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
versions_template = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
restore_template = url_for("ui.restore_object_version", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER", version_id="VERSION_ID_PLACEHOLDER")
objects_data = [] objects_data = []
for obj in result.objects: for obj in result.objects:
objects_data.append({ objects_data.append({
@@ -450,13 +450,6 @@ def list_bucket_objects(bucket_name: str):
"last_modified": obj.last_modified.isoformat(), "last_modified": obj.last_modified.isoformat(),
"last_modified_display": obj.last_modified.strftime("%b %d, %Y %H:%M"), "last_modified_display": obj.last_modified.strftime("%b %d, %Y %H:%M"),
"etag": obj.etag, "etag": obj.etag,
"metadata": obj.metadata or {},
"preview_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key),
"download_url": url_for("ui.object_preview", bucket_name=bucket_name, object_key=obj.key) + "?download=1",
"presign_endpoint": url_for("ui.object_presign", bucket_name=bucket_name, object_key=obj.key),
"delete_endpoint": url_for("ui.delete_object", bucket_name=bucket_name, object_key=obj.key),
"versions_endpoint": url_for("ui.object_versions", bucket_name=bucket_name, object_key=obj.key),
"restore_template": url_for("ui.restore_object_version", bucket_name=bucket_name, object_key=obj.key, version_id="VERSION_ID_PLACEHOLDER"),
}) })
return jsonify({ return jsonify({
@@ -465,6 +458,14 @@ def list_bucket_objects(bucket_name: str):
"next_continuation_token": result.next_continuation_token, "next_continuation_token": result.next_continuation_token,
"total_count": result.total_count, "total_count": result.total_count,
"versioning_enabled": versioning_enabled, "versioning_enabled": versioning_enabled,
"url_templates": {
"preview": preview_template,
"download": preview_template + "?download=1",
"presign": presign_template,
"delete": delete_template,
"versions": versions_template,
"restore": restore_template,
},
}) })
@@ -738,41 +739,30 @@ def bulk_download_objects(bucket_name: str):
unique_keys = list(dict.fromkeys(cleaned)) unique_keys = list(dict.fromkeys(cleaned))
storage = _storage() storage = _storage()
# Check permissions for all keys first (or at least bucket read) # Verify permission to read bucket contents
# We'll check bucket read once, then object read for each if needed?
# _authorize_ui checks bucket level if object_key is None, but we need to check each object if fine-grained policies exist.
# For simplicity/performance, we check bucket list/read.
try: try:
_authorize_ui(principal, bucket_name, "read") _authorize_ui(principal, bucket_name, "read")
except IamError as exc: except IamError as exc:
return jsonify({"error": str(exc)}), 403 return jsonify({"error": str(exc)}), 403
# Create ZIP # Create ZIP archive of selected objects
buffer = io.BytesIO() buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for key in unique_keys: for key in unique_keys:
try: try:
# Verify individual object permission if needed?
# _authorize_ui(principal, bucket_name, "read", object_key=key)
# This might be slow for many objects. Assuming bucket read is enough for now or we accept the overhead.
# Let's skip individual check for bulk speed, assuming bucket read implies object read unless denied.
# But strictly we should check. Let's check.
_authorize_ui(principal, bucket_name, "read", object_key=key) _authorize_ui(principal, bucket_name, "read", object_key=key)
# Check if object is encrypted
metadata = storage.get_object_metadata(bucket_name, key) metadata = storage.get_object_metadata(bucket_name, key)
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
if is_encrypted and hasattr(storage, 'get_object_data'): if is_encrypted and hasattr(storage, 'get_object_data'):
# Decrypt and add to zip
data, _ = storage.get_object_data(bucket_name, key) data, _ = storage.get_object_data(bucket_name, key)
zf.writestr(key, data) zf.writestr(key, data)
else: else:
# Add unencrypted file directly
path = storage.get_object_path(bucket_name, key) path = storage.get_object_path(bucket_name, key)
zf.write(path, arcname=key) zf.write(path, arcname=key)
except (StorageError, IamError): except (StorageError, IamError):
# Skip files we can't read or don't exist # Skip objects that can't be accessed
continue continue
buffer.seek(0) buffer.seek(0)
@@ -1077,7 +1067,6 @@ def update_bucket_encryption(bucket_name: str):
action = request.form.get("action", "enable") action = request.form.get("action", "enable")
if action == "disable": if action == "disable":
# Disable encryption
try: try:
_storage().set_bucket_encryption(bucket_name, None) _storage().set_bucket_encryption(bucket_name, None)
flash("Default encryption disabled", "info") flash("Default encryption disabled", "info")
@@ -1085,16 +1074,14 @@ def update_bucket_encryption(bucket_name: str):
flash(_friendly_error_message(exc), "danger") flash(_friendly_error_message(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Enable or update encryption
algorithm = request.form.get("algorithm", "AES256") algorithm = request.form.get("algorithm", "AES256")
kms_key_id = request.form.get("kms_key_id", "").strip() or None kms_key_id = request.form.get("kms_key_id", "").strip() or None
# Validate algorithm
if algorithm not in ("AES256", "aws:kms"): if algorithm not in ("AES256", "aws:kms"):
flash("Invalid encryption algorithm", "danger") flash("Invalid encryption algorithm", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Build encryption config following AWS format # Build encryption configuration in AWS S3 format
encryption_config: dict[str, Any] = { encryption_config: dict[str, Any] = {
"Rules": [ "Rules": [
{ {
@@ -1270,7 +1257,6 @@ def delete_iam_user(access_key: str):
return redirect(url_for("ui.iam_dashboard")) return redirect(url_for("ui.iam_dashboard"))
if access_key == principal.access_key: if access_key == principal.access_key:
# Self-deletion
try: try:
_iam().delete_user(access_key) _iam().delete_user(access_key)
session.pop("credentials", None) session.pop("credentials", None)
@@ -1352,6 +1338,9 @@ def create_connection():
@ui_bp.post("/connections/test") @ui_bp.post("/connections/test")
def test_connection(): def test_connection():
from botocore.config import Config as BotoConfig
from botocore.exceptions import ConnectTimeoutError, EndpointConnectionError, ReadTimeoutError
principal = _current_principal() principal = _current_principal()
try: try:
_iam().authorize(principal, None, "iam:list_users") _iam().authorize(principal, None, "iam:list_users")
@@ -1368,18 +1357,32 @@ def test_connection():
return jsonify({"status": "error", "message": "Missing credentials"}), 400 return jsonify({"status": "error", "message": "Missing credentials"}), 400
try: try:
config = BotoConfig(
connect_timeout=5,
read_timeout=10,
retries={'max_attempts': 1}
)
s3 = boto3.client( s3 = boto3.client(
"s3", "s3",
endpoint_url=endpoint, endpoint_url=endpoint,
aws_access_key_id=access_key, aws_access_key_id=access_key,
aws_secret_access_key=secret_key, aws_secret_access_key=secret_key,
region_name=region, region_name=region,
config=config,
) )
# Try to list buckets to verify credentials and endpoint
s3.list_buckets() s3.list_buckets()
return jsonify({"status": "ok", "message": "Connection successful"}) return jsonify({"status": "ok", "message": "Connection successful"})
except (ConnectTimeoutError, ReadTimeoutError):
return jsonify({"status": "error", "message": f"Connection timed out - endpoint may be down or unreachable: {endpoint}"}), 400
except EndpointConnectionError:
return jsonify({"status": "error", "message": f"Could not connect to endpoint: {endpoint}"}), 400
except ClientError as e:
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
error_msg = e.response.get('Error', {}).get('Message', str(e))
return jsonify({"status": "error", "message": f"Connection failed ({error_code}): {error_msg}"}), 400
except Exception as e: except Exception as e:
return jsonify({"status": "error", "message": str(e)}), 400 return jsonify({"status": "error", "message": f"Connection failed: {str(e)}"}), 400
@ui_bp.post("/connections/<connection_id>/update") @ui_bp.post("/connections/<connection_id>/update")
@@ -1440,7 +1443,6 @@ def update_bucket_replication(bucket_name: str):
flash(str(exc), "danger") flash(str(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
# Check if user is admin (required for create/delete operations)
is_admin = False is_admin = False
try: try:
_iam().authorize(principal, None, "iam:list_users") _iam().authorize(principal, None, "iam:list_users")
@@ -1451,14 +1453,12 @@ def update_bucket_replication(bucket_name: str):
action = request.form.get("action") action = request.form.get("action")
if action == "delete": if action == "delete":
# Admin only - remove configuration entirely
if not is_admin: if not is_admin:
flash("Only administrators can remove replication configuration", "danger") flash("Only administrators can remove replication configuration", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
_replication().delete_rule(bucket_name) _replication().delete_rule(bucket_name)
flash("Replication configuration removed", "info") flash("Replication configuration removed", "info")
elif action == "pause": elif action == "pause":
# Users can pause - just set enabled=False
rule = _replication().get_rule(bucket_name) rule = _replication().get_rule(bucket_name)
if rule: if rule:
rule.enabled = False rule.enabled = False
@@ -1467,16 +1467,20 @@ def update_bucket_replication(bucket_name: str):
else: else:
flash("No replication configuration to pause", "warning") flash("No replication configuration to pause", "warning")
elif action == "resume": elif action == "resume":
# Users can resume - just set enabled=True from .replication import REPLICATION_MODE_ALL
rule = _replication().get_rule(bucket_name) rule = _replication().get_rule(bucket_name)
if rule: if rule:
rule.enabled = True rule.enabled = True
_replication().set_rule(rule) _replication().set_rule(rule)
flash("Replication resumed", "success") # When resuming, sync any pending objects that accumulated while paused
if rule.mode == REPLICATION_MODE_ALL:
_replication().replicate_existing_objects(bucket_name)
flash("Replication resumed. Syncing pending objects in background.", "success")
else:
flash("Replication resumed", "success")
else: else:
flash("No replication configuration to resume", "warning") flash("No replication configuration to resume", "warning")
elif action == "create": elif action == "create":
# Admin only - create new configuration
if not is_admin: if not is_admin:
flash("Only administrators can configure replication settings", "danger") flash("Only administrators can configure replication settings", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
@@ -1501,7 +1505,6 @@ def update_bucket_replication(bucket_name: str):
) )
_replication().set_rule(rule) _replication().set_rule(rule)
# If mode is "all", trigger replication of existing objects
if replication_mode == REPLICATION_MODE_ALL: if replication_mode == REPLICATION_MODE_ALL:
_replication().replicate_existing_objects(bucket_name) _replication().replicate_existing_objects(bucket_name)
flash("Replication configured. Existing objects are being replicated in the background.", "success") flash("Replication configured. Existing objects are being replicated in the background.", "success")
@@ -1526,10 +1529,31 @@ def get_replication_status(bucket_name: str):
if not rule: if not rule:
return jsonify({"error": "No replication rule"}), 404 return jsonify({"error": "No replication rule"}), 404
# This is the slow operation - compute sync status by comparing buckets connection = _connections().get(rule.target_connection_id)
stats = _replication().get_sync_status(bucket_name) endpoint_healthy = False
endpoint_error = None
if connection:
endpoint_healthy = _replication().check_endpoint_health(connection)
if not endpoint_healthy:
endpoint_error = f"Cannot reach endpoint: {connection.endpoint_url}"
else:
endpoint_error = "Target connection not found"
stats = None
if endpoint_healthy:
stats = _replication().get_sync_status(bucket_name)
if not stats: if not stats:
return jsonify({"error": "Failed to compute status"}), 500 return jsonify({
"objects_synced": 0,
"objects_pending": 0,
"objects_orphaned": 0,
"bytes_synced": 0,
"last_sync_at": rule.stats.last_sync_at if rule.stats else None,
"last_sync_key": rule.stats.last_sync_key if rule.stats else None,
"endpoint_healthy": endpoint_healthy,
"endpoint_error": endpoint_error,
})
return jsonify({ return jsonify({
"objects_synced": stats.objects_synced, "objects_synced": stats.objects_synced,
@@ -1538,6 +1562,28 @@ def get_replication_status(bucket_name: str):
"bytes_synced": stats.bytes_synced, "bytes_synced": stats.bytes_synced,
"last_sync_at": stats.last_sync_at, "last_sync_at": stats.last_sync_at,
"last_sync_key": stats.last_sync_key, "last_sync_key": stats.last_sync_key,
"endpoint_healthy": endpoint_healthy,
"endpoint_error": endpoint_error,
})
@ui_bp.get("/connections/<connection_id>/health")
def check_connection_health(connection_id: str):
"""Check if a connection endpoint is reachable."""
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:list_users")
except IamError:
return jsonify({"error": "Access denied"}), 403
conn = _connections().get(connection_id)
if not conn:
return jsonify({"healthy": False, "error": "Connection not found"}), 404
healthy = _replication().check_endpoint_health(conn)
return jsonify({
"healthy": healthy,
"error": None if healthy else f"Cannot reach endpoint: {conn.endpoint_url}"
}) })
@@ -1558,7 +1604,6 @@ def connections_dashboard():
def metrics_dashboard(): def metrics_dashboard():
principal = _current_principal() principal = _current_principal()
# Metrics are restricted to admin users
try: try:
_iam().authorize(principal, None, "iam:list_users") _iam().authorize(principal, None, "iam:list_users")
except IamError: except IamError:
@@ -1582,16 +1627,13 @@ def metrics_dashboard():
total_bytes_used = 0 total_bytes_used = 0
total_versions = 0 total_versions = 0
# Note: Uses cached stats from storage layer to improve performance
cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60) cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
for bucket in buckets: for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl) stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl)
# Use totals which include archived versions
total_objects += stats.get("total_objects", stats.get("objects", 0)) total_objects += stats.get("total_objects", stats.get("objects", 0))
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0)) total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
total_versions += stats.get("version_count", 0) total_versions += stats.get("version_count", 0)
# Calculate system uptime
boot_time = psutil.boot_time() boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time uptime_seconds = time.time() - boot_time
uptime_days = int(uptime_seconds / 86400) uptime_days = int(uptime_seconds / 86400)

View File

@@ -1,7 +1,7 @@
"""Central location for the application version string.""" """Central location for the application version string."""
from __future__ import annotations from __future__ import annotations
APP_VERSION = "0.1.7" APP_VERSION = "0.2.0"
def get_version() -> str: def get_version() -> str:

View File

@@ -66,8 +66,28 @@ html {
color: var(--myfsio-muted) !important; color: var(--myfsio-muted) !important;
} }
.table-responsive { border-radius: 0.5rem; overflow: hidden; } .table-responsive {
border-radius: 0.5rem;
overflow-x: auto;
-webkit-overflow-scrolling: touch;
}
.message-stack { position: sticky; top: 1rem; z-index: 100; } .message-stack { position: sticky; top: 1rem; z-index: 100; }
/* Mobile-friendly table improvements */
.table-responsive table {
min-width: 600px;
}
.table-responsive table th,
.table-responsive table td {
white-space: nowrap;
}
/* Allow text wrapping for description columns */
.table-responsive table td.text-wrap {
white-space: normal;
min-width: 200px;
}
code { font-size: 0.85rem; } code { font-size: 0.85rem; }
code { code {
@@ -342,6 +362,68 @@ code {
color: #2563eb; color: #2563eb;
} }
.docs-sidebar-mobile {
border-radius: 0.75rem;
border: 1px solid var(--myfsio-card-border);
}
.docs-sidebar-mobile .docs-toc {
display: flex;
flex-wrap: wrap;
gap: 0.5rem 1rem;
padding-top: 0.5rem;
}
.docs-sidebar-mobile .docs-toc li {
flex: 1 0 45%;
}
.min-width-0 {
min-width: 0;
}
/* Ensure pre blocks don't overflow on mobile */
.alert pre {
max-width: 100%;
overflow-x: auto;
-webkit-overflow-scrolling: touch;
}
/* IAM User Cards */
.iam-user-card {
border: 1px solid var(--myfsio-card-border);
border-radius: 0.75rem;
transition: box-shadow 0.2s ease, transform 0.2s ease;
}
.iam-user-card:hover {
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
}
[data-theme='dark'] .iam-user-card:hover {
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.user-avatar-lg {
width: 48px;
height: 48px;
border-radius: 12px;
}
.btn-icon {
padding: 0.25rem;
line-height: 1;
border: none;
background: transparent;
color: var(--myfsio-muted);
border-radius: 0.375rem;
}
.btn-icon:hover {
background: var(--myfsio-hover-bg);
color: var(--myfsio-text);
}
.badge { .badge {
font-weight: 500; font-weight: 500;
padding: 0.35em 0.65em; padding: 0.35em 0.65em;
@@ -389,8 +471,22 @@ code {
.bucket-table th:last-child { white-space: nowrap; } .bucket-table th:last-child { white-space: nowrap; }
.object-key { .object-key {
word-break: break-word; max-width: 0;
max-width: 32rem; width: 100%;
overflow: hidden;
text-overflow: ellipsis;
}
.object-key .fw-medium {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.object-key .text-muted {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
} }
.preview-card { top: 1rem; } .preview-card { top: 1rem; }
@@ -517,6 +613,22 @@ code {
overflow-y: auto; overflow-y: auto;
} }
.objects-table-container thead {
position: sticky;
top: 0;
z-index: 10;
}
.objects-table-container thead th {
background-color: #f8f9fa;
border-bottom: 1px solid var(--myfsio-card-border);
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
}
[data-theme='dark'] .objects-table-container thead th {
background-color: #1e293b;
}
.btn-group form { display: inline; } .btn-group form { display: inline; }
.font-monospace { font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; } .font-monospace { font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; }
@@ -1537,6 +1649,41 @@ pre code {
position: relative !important; position: relative !important;
top: 0 !important; top: 0 !important;
} }
/* Ensure tables are scrollable on mobile */
.card-body .table-responsive {
margin: -1rem;
padding: 0;
width: calc(100% + 2rem);
}
.card-body .table-responsive table {
margin-bottom: 0;
}
/* IAM users table mobile adjustments */
.table th,
.table td {
padding: 0.5rem 0.75rem;
}
/* Better touch scrolling indicator */
.table-responsive::after {
content: '';
position: absolute;
top: 0;
right: 0;
bottom: 0;
width: 20px;
background: linear-gradient(to left, var(--myfsio-card-bg), transparent);
pointer-events: none;
opacity: 0;
transition: opacity 0.3s;
}
.table-responsive:not(:hover)::after {
opacity: 0.8;
}
} }
*:focus-visible { *:focus-visible {

File diff suppressed because it is too large Load Diff

View File

@@ -46,8 +46,7 @@
<div class="d-flex align-items-center gap-3"> <div class="d-flex align-items-center gap-3">
<div class="bucket-icon"> <div class="bucket-icon">
<svg xmlns="http://www.w3.org/2000/svg" width="22" height="22" fill="currentColor" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="22" height="22" fill="currentColor" viewBox="0 0 16 16">
<path d="M4.5 5a.5.5 0 1 0 0-1 .5.5 0 0 0 0 1zM3 4.5a.5.5 0 1 1-1 0 .5.5 0 0 1 1 0z"/> <path d="M2.522 5H2a.5.5 0 0 0-.494.574l1.372 9.149A1.5 1.5 0 0 0 4.36 16h7.278a1.5 1.5 0 0 0 1.483-1.277l1.373-9.149A.5.5 0 0 0 14 5h-.522A5.5 5.5 0 0 0 2.522 5zm1.005 0a4.5 4.5 0 0 1 8.945 0H3.527z"/>
<path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v1a2 2 0 0 1-2 2H8.5v3a1.5 1.5 0 0 1 1.5 1.5H11a.5.5 0 0 1 0 1h-1v1h1a.5.5 0 0 1 0 1h-1v1a.5.5 0 0 1-1 0v-1H6v1a.5.5 0 0 1-1 0v-1H4a.5.5 0 0 1 0-1h1v-1H4a.5.5 0 0 1 0-1h1.5A1.5 1.5 0 0 1 7 10.5V7H2a2 2 0 0 1-2-2V4zm1 0v1a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1V4a1 1 0 0 0-1-1H2a1 1 0 0 0-1 1zm5 7.5v1h3v-1a.5.5 0 0 0-.5-.5h-2a.5.5 0 0 0-.5.5z"/>
</svg> </svg>
</div> </div>
<div> <div>

View File

@@ -8,8 +8,8 @@
<p class="text-uppercase text-muted small mb-1">Replication</p> <p class="text-uppercase text-muted small mb-1">Replication</p>
<h1 class="h3 mb-1 d-flex align-items-center gap-2"> <h1 class="h3 mb-1 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" class="text-primary" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M4.5 5a.5.5 0 1 0 0-1 .5.5 0 0 0 0 1zM3 4.5a.5.5 0 1 1-1 0 .5.5 0 0 1 1 0z"/> <path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/>
<path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v1a2 2 0 0 1-2 2H8.5v3a1.5 1.5 0 0 1 1.5 1.5H12a.5.5 0 0 1 0 1H4a.5.5 0 0 1 0-1h2A1.5 1.5 0 0 1 7.5 10V7H2a2 2 0 0 1-2-2V4zm1 0v1a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1V4a1 1 0 0 0-1-1H2a1 1 0 0 0-1 1z"/> <path d="M10.232 8.768l.546-.353a.25.25 0 0 0 0-.418l-.546-.354a.25.25 0 0 1-.116-.21V6.25a.25.25 0 0 0-.25-.25h-.5a.25.25 0 0 0-.25.25v1.183a.25.25 0 0 1-.116.21l-.546.354a.25.25 0 0 0 0 .418l.546.353a.25.25 0 0 1 .116.21v1.183a.25.25 0 0 0 .25.25h.5a.25.25 0 0 0 .25-.25V8.978a.25.25 0 0 1 .116-.21z"/>
</svg> </svg>
Remote Connections Remote Connections
</h1> </h1>
@@ -104,6 +104,7 @@
<table class="table table-hover align-middle mb-0"> <table class="table table-hover align-middle mb-0">
<thead class="table-light"> <thead class="table-light">
<tr> <tr>
<th scope="col" style="width: 50px;">Status</th>
<th scope="col">Name</th> <th scope="col">Name</th>
<th scope="col">Endpoint</th> <th scope="col">Endpoint</th>
<th scope="col">Region</th> <th scope="col">Region</th>
@@ -113,13 +114,17 @@
</thead> </thead>
<tbody> <tbody>
{% for conn in connections %} {% for conn in connections %}
<tr> <tr data-connection-id="{{ conn.id }}">
<td class="text-center">
<span class="connection-status" data-status="checking" title="Checking...">
<span class="spinner-border spinner-border-sm text-muted" role="status" style="width: 12px; height: 12px;"></span>
</span>
</td>
<td> <td>
<div class="d-flex align-items-center gap-2"> <div class="d-flex align-items-center gap-2">
<div class="connection-icon"> <div class="connection-icon">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
<path d="M4.5 5a.5.5 0 1 0 0-1 .5.5 0 0 0 0 1zM3 4.5a.5.5 0 1 1-1 0 .5.5 0 0 1 1 0z"/> <path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/>
<path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v1a2 2 0 0 1-2 2H8.5v3a1.5 1.5 0 0 1 1.5 1.5H12a.5.5 0 0 1 0 1H4a.5.5 0 0 1 0-1h2A1.5 1.5 0 0 1 7.5 10V7H2a2 2 0 0 1-2-2V4zm1 0v1a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1V4a1 1 0 0 0-1-1H2a1 1 0 0 0-1 1z"/>
</svg> </svg>
</div> </div>
<span class="fw-medium">{{ conn.name }}</span> <span class="fw-medium">{{ conn.name }}</span>
@@ -168,8 +173,7 @@
<div class="empty-state text-center py-5"> <div class="empty-state text-center py-5">
<div class="empty-state-icon mx-auto mb-3"> <div class="empty-state-icon mx-auto mb-3">
<svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" fill="currentColor" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" fill="currentColor" viewBox="0 0 16 16">
<path d="M4.5 5a.5.5 0 1 0 0-1 .5.5 0 0 0 0 1zM3 4.5a.5.5 0 1 1-1 0 .5.5 0 0 1 1 0z"/> <path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/>
<path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v1a2 2 0 0 1-2 2H8.5v3a1.5 1.5 0 0 1 1.5 1.5H12a.5.5 0 0 1 0 1H4a.5.5 0 0 1 0-1h2A1.5 1.5 0 0 1 7.5 10V7H2a2 2 0 0 1-2-2V4zm1 0v1a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1V4a1 1 0 0 0-1-1H2a1 1 0 0 0-1 1z"/>
</svg> </svg>
</div> </div>
<h5 class="fw-semibold mb-2">No connections yet</h5> <h5 class="fw-semibold mb-2">No connections yet</h5>
@@ -301,7 +305,11 @@
const formData = new FormData(form); const formData = new FormData(form);
const data = Object.fromEntries(formData.entries()); const data = Object.fromEntries(formData.entries());
resultDiv.innerHTML = '<div class="text-info"><span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Testing...</div>'; resultDiv.innerHTML = '<div class="text-info"><span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span> Testing connection...</div>';
// Use AbortController to timeout client-side after 20 seconds
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 20000);
try { try {
const response = await fetch("{{ url_for('ui.test_connection') }}", { const response = await fetch("{{ url_for('ui.test_connection') }}", {
@@ -310,17 +318,44 @@
"Content-Type": "application/json", "Content-Type": "application/json",
"X-CSRFToken": "{{ csrf_token() }}" "X-CSRFToken": "{{ csrf_token() }}"
}, },
body: JSON.stringify(data) body: JSON.stringify(data),
signal: controller.signal
}); });
clearTimeout(timeoutId);
const result = await response.json(); const result = await response.json();
if (response.ok) { if (response.ok) {
resultDiv.innerHTML = `<div class="text-success"><i class="bi bi-check-circle"></i> ${result.message}</div>`; resultDiv.innerHTML = `<div class="text-success">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
</svg>
${result.message}
</div>`;
} else { } else {
resultDiv.innerHTML = `<div class="text-danger"><i class="bi bi-exclamation-circle"></i> ${result.message}</div>`; resultDiv.innerHTML = `<div class="text-danger">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>
${result.message}
</div>`;
} }
} catch (error) { } catch (error) {
resultDiv.innerHTML = `<div class="text-danger"><i class="bi bi-exclamation-circle"></i> Connection failed</div>`; clearTimeout(timeoutId);
if (error.name === 'AbortError') {
resultDiv.innerHTML = `<div class="text-danger">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>
Connection test timed out - endpoint may be unreachable
</div>`;
} else {
resultDiv.innerHTML = `<div class="text-danger">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>
Connection failed: Network error
</div>`;
}
} }
} }
@@ -358,5 +393,54 @@
const form = document.getElementById('deleteConnectionForm'); const form = document.getElementById('deleteConnectionForm');
form.action = "{{ url_for('ui.delete_connection', connection_id='CONN_ID') }}".replace('CONN_ID', id); form.action = "{{ url_for('ui.delete_connection', connection_id='CONN_ID') }}".replace('CONN_ID', id);
}); });
// Check connection health for each connection in the table
// Uses staggered requests to avoid overwhelming the server
async function checkConnectionHealth(connectionId, statusEl) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 15000);
const response = await fetch(`/ui/connections/${connectionId}/health`, {
signal: controller.signal
});
clearTimeout(timeoutId);
const data = await response.json();
if (data.healthy) {
statusEl.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-success" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
</svg>`;
statusEl.setAttribute('data-status', 'healthy');
statusEl.setAttribute('title', 'Connected');
} else {
statusEl.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-danger" viewBox="0 0 16 16">
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
</svg>`;
statusEl.setAttribute('data-status', 'unhealthy');
statusEl.setAttribute('title', data.error || 'Unreachable');
}
} catch (error) {
statusEl.innerHTML = `
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="text-warning" viewBox="0 0 16 16">
<path d="M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/>
</svg>`;
statusEl.setAttribute('data-status', 'unknown');
statusEl.setAttribute('title', 'Could not check status');
}
}
// Stagger health checks to avoid all requests at once
const connectionRows = document.querySelectorAll('tr[data-connection-id]');
connectionRows.forEach((row, index) => {
const connectionId = row.getAttribute('data-connection-id');
const statusEl = row.querySelector('.connection-status');
if (statusEl) {
// Stagger requests by 200ms each
setTimeout(() => checkConnectionHealth(connectionId, statusEl), index * 200);
}
});
</script> </script>
{% endblock %} {% endblock %}

View File

@@ -14,6 +14,36 @@
</div> </div>
</section> </section>
<div class="row g-4"> <div class="row g-4">
<div class="col-12 d-xl-none">
<div class="card shadow-sm docs-sidebar-mobile mb-0">
<div class="card-body py-3">
<div class="d-flex align-items-center justify-content-between mb-2">
<h3 class="h6 text-uppercase text-muted mb-0">On this page</h3>
<button class="btn btn-sm btn-outline-secondary" type="button" data-bs-toggle="collapse" data-bs-target="#mobileDocsToc" aria-expanded="false" aria-controls="mobileDocsToc">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/>
</svg>
</button>
</div>
<div class="collapse" id="mobileDocsToc">
<ul class="list-unstyled docs-toc mb-0 small">
<li><a href="#setup">Set up &amp; run</a></li>
<li><a href="#background">Running in background</a></li>
<li><a href="#auth">Authentication &amp; IAM</a></li>
<li><a href="#console">Console tour</a></li>
<li><a href="#automation">Automation / CLI</a></li>
<li><a href="#api">REST endpoints</a></li>
<li><a href="#examples">API Examples</a></li>
<li><a href="#replication">Site Replication</a></li>
<li><a href="#versioning">Object Versioning</a></li>
<li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li>
</ul>
</div>
</div>
</div>
</div>
<div class="col-xl-8"> <div class="col-xl-8">
<article id="setup" class="card shadow-sm docs-section"> <article id="setup" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
@@ -47,9 +77,9 @@ python run.py --mode ui
<table class="table table-sm table-bordered small mb-0"> <table class="table table-sm table-bordered small mb-0">
<thead class="table-light"> <thead class="table-light">
<tr> <tr>
<th>Variable</th> <th style="min-width: 180px;">Variable</th>
<th>Default</th> <th style="min-width: 120px;">Default</th>
<th>Description</th> <th class="text-wrap" style="min-width: 250px;">Description</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
@@ -407,10 +437,62 @@ curl -X POST {{ api_base }}/presign/demo/notes.txt \
<span class="docs-section-kicker">07</span> <span class="docs-section-kicker">07</span>
<h2 class="h4 mb-0">API Examples</h2> <h2 class="h4 mb-0">API Examples</h2>
</div> </div>
<p class="text-muted">Common operations using boto3.</p> <p class="text-muted">Common operations using popular SDKs and tools.</p>
<h5 class="mt-4">Multipart Upload</h5> <h3 class="h6 text-uppercase text-muted mt-4">Python (boto3)</h3>
<pre><code class="language-python">import boto3 <pre class="mb-4"><code class="language-python">import boto3
s3 = boto3.client(
's3',
endpoint_url='{{ api_base }}',
aws_access_key_id='&lt;access_key&gt;',
aws_secret_access_key='&lt;secret_key&gt;'
)
# List buckets
buckets = s3.list_buckets()['Buckets']
# Create bucket
s3.create_bucket(Bucket='mybucket')
# Upload file
s3.upload_file('local.txt', 'mybucket', 'remote.txt')
# Download file
s3.download_file('mybucket', 'remote.txt', 'downloaded.txt')
# Generate presigned URL (valid 1 hour)
url = s3.generate_presigned_url(
'get_object',
Params={'Bucket': 'mybucket', 'Key': 'remote.txt'},
ExpiresIn=3600
)</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">JavaScript (AWS SDK v3)</h3>
<pre class="mb-4"><code class="language-javascript">import { S3Client, ListBucketsCommand, PutObjectCommand } from '@aws-sdk/client-s3';
const s3 = new S3Client({
endpoint: '{{ api_base }}',
region: 'us-east-1',
credentials: {
accessKeyId: '&lt;access_key&gt;',
secretAccessKey: '&lt;secret_key&gt;'
},
forcePathStyle: true // Required for S3-compatible services
});
// List buckets
const { Buckets } = await s3.send(new ListBucketsCommand({}));
// Upload object
await s3.send(new PutObjectCommand({
Bucket: 'mybucket',
Key: 'hello.txt',
Body: 'Hello, World!'
}));</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">Multipart Upload (Python)</h3>
<pre class="mb-4"><code class="language-python">import boto3
s3 = boto3.client('s3', endpoint_url='{{ api_base }}') s3 = boto3.client('s3', endpoint_url='{{ api_base }}')
@@ -418,9 +500,9 @@ s3 = boto3.client('s3', endpoint_url='{{ api_base }}')
response = s3.create_multipart_upload(Bucket='mybucket', Key='large.bin') response = s3.create_multipart_upload(Bucket='mybucket', Key='large.bin')
upload_id = response['UploadId'] upload_id = response['UploadId']
# Upload parts # Upload parts (minimum 5MB each, except last part)
parts = [] parts = []
chunks = [b'chunk1', b'chunk2'] # Example data chunks chunks = [b'chunk1...', b'chunk2...']
for part_number, chunk in enumerate(chunks, start=1): for part_number, chunk in enumerate(chunks, start=1):
response = s3.upload_part( response = s3.upload_part(
Bucket='mybucket', Bucket='mybucket',
@@ -438,6 +520,19 @@ s3.complete_multipart_upload(
UploadId=upload_id, UploadId=upload_id,
MultipartUpload={'Parts': parts} MultipartUpload={'Parts': parts}
)</code></pre> )</code></pre>
<h3 class="h6 text-uppercase text-muted mt-4">Presigned URLs for Sharing</h3>
<pre class="mb-0"><code class="language-bash"># Generate a download link valid for 15 minutes
curl -X POST "{{ api_base }}/presign/mybucket/photo.jpg" \
-H "Content-Type: application/json" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-d '{"method": "GET", "expires_in": 900}'
# Generate an upload link (PUT) valid for 1 hour
curl -X POST "{{ api_base }}/presign/mybucket/upload.bin" \
-H "Content-Type: application/json" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-d '{"method": "PUT", "expires_in": 3600}'</code></pre>
</div> </div>
</article> </article>
<article id="replication" class="card shadow-sm docs-section"> <article id="replication" class="card shadow-sm docs-section">
@@ -461,15 +556,46 @@ s3.complete_multipart_upload(
</li> </li>
</ol> </ol>
<div class="alert alert-light border mb-0"> <div class="alert alert-light border mb-3 overflow-hidden">
<div class="d-flex gap-2"> <div class="d-flex flex-column flex-sm-row gap-2 mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-terminal text-muted mt-1" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-terminal text-muted mt-1 flex-shrink-0 d-none d-sm-block" viewBox="0 0 16 16">
<path d="M6 9a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 0 1h-3A.5.5 0 0 1 6 9zM3.854 4.146a.5.5 0 1 0-.708.708L4.793 6.5 3.146 8.146a.5.5 0 1 0 .708.708l2-2a.5.5 0 0 0 0-.708l-2-2z"/> <path d="M6 9a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 0 1h-3A.5.5 0 0 1 6 9zM3.854 4.146a.5.5 0 1 0-.708.708L4.793 6.5 3.146 8.146a.5.5 0 1 0 .708.708l2-2a.5.5 0 0 0 0-.708l-2-2z"/>
<path d="M2 1a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V3a2 2 0 0 0-2-2H2zm12 1a1 1 0 0 1 1 1v10a1 1 0 0 1-1 1H2a1 1 0 0 1-1-1V3a1 1 0 0 1 1-1h12z"/> <path d="M2 1a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V3a2 2 0 0 0-2-2H2zm12 1a1 1 0 0 1 1 1v10a1 1 0 0 1-1 1H2a1 1 0 0 1-1-1V3a1 1 0 0 1 1-1h12z"/>
</svg> </svg>
<div> <div class="flex-grow-1 min-width-0">
<strong>Headless Target Setup?</strong> <strong>Headless Target Setup</strong>
<p class="small text-muted mb-0">If your target server has no UI, use the Python API directly to bootstrap credentials. See <code>docs.md</code> in the project root for the <code>setup_target.py</code> script.</p> <p class="small text-muted mb-2">If your target server has no UI, create a <code>setup_target.py</code> script to bootstrap credentials:</p>
<pre class="mb-0 overflow-auto" style="max-width: 100%;"><code class="language-python"># setup_target.py
from pathlib import Path
from app.iam import IamService
from app.storage import ObjectStorage
# Initialize services (paths match default config)
data_dir = Path("data")
iam = IamService(data_dir / ".myfsio.sys" / "config" / "iam.json")
storage = ObjectStorage(data_dir)
# 1. Create the bucket
bucket_name = "backup-bucket"
try:
storage.create_bucket(bucket_name)
print(f"Bucket '{bucket_name}' created.")
except Exception as e:
print(f"Bucket creation skipped: {e}")
# 2. Create the user
try:
creds = iam.create_user(
display_name="Replication User",
policies=[{"bucket": bucket_name, "actions": ["write", "read", "list"]}]
)
print("\n--- CREDENTIALS GENERATED ---")
print(f"Access Key: {creds['access_key']}")
print(f"Secret Key: {creds['secret_key']}")
print("-----------------------------")
except Exception as e:
print(f"User creation failed: {e}")</code></pre>
<p class="small text-muted mt-2 mb-0">Save and run: <code>python setup_target.py</code></p>
</div> </div>
</div> </div>
</div> </div>
@@ -487,6 +613,86 @@ s3.complete_multipart_upload(
</p> </p>
</div> </div>
</article> </article>
<article id="versioning" class="card shadow-sm docs-section">
<div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3">
<span class="docs-section-kicker">09</span>
<h2 class="h4 mb-0">Object Versioning</h2>
</div>
<p class="text-muted">Keep multiple versions of objects to protect against accidental deletions and overwrites. Restore previous versions at any time.</p>
<h3 class="h6 text-uppercase text-muted mt-4">Enabling Versioning</h3>
<ol class="docs-steps mb-3">
<li>Navigate to your bucket's <strong>Properties</strong> tab.</li>
<li>Find the <strong>Versioning</strong> card and click <strong>Enable</strong>.</li>
<li>All subsequent uploads will create new versions instead of overwriting.</li>
</ol>
<h3 class="h6 text-uppercase text-muted mt-4">Version Operations</h3>
<div class="table-responsive mb-3">
<table class="table table-sm table-bordered small">
<thead class="table-light">
<tr>
<th>Operation</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>View Versions</strong></td>
<td>Click the version icon on any object to see all historical versions with timestamps and sizes.</td>
</tr>
<tr>
<td><strong>Restore Version</strong></td>
<td>Click <strong>Restore</strong> on any version to make it the current version (creates a copy).</td>
</tr>
<tr>
<td><strong>Delete Current</strong></td>
<td>Deleting an object archives it. Previous versions remain accessible.</td>
</tr>
<tr>
<td><strong>Purge All</strong></td>
<td>Permanently delete an object and all its versions. This cannot be undone.</td>
</tr>
</tbody>
</table>
</div>
<h3 class="h6 text-uppercase text-muted mt-4">Archived Objects</h3>
<p class="small text-muted mb-3">When you delete a versioned object, it becomes "archived" - the current version is removed but historical versions remain. The <strong>Archived</strong> tab shows these objects so you can restore them.</p>
<h3 class="h6 text-uppercase text-muted mt-4">API Usage</h3>
<pre class="mb-3"><code class="language-bash"># Enable versioning
curl -X PUT "{{ api_base }}/&lt;bucket&gt;?versioning" \
-H "Content-Type: application/json" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;" \
-d '{"Status": "Enabled"}'
# Get versioning status
curl "{{ api_base }}/&lt;bucket&gt;?versioning" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# List object versions
curl "{{ api_base }}/&lt;bucket&gt;?versions" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"
# Get specific version
curl "{{ api_base }}/&lt;bucket&gt;/&lt;key&gt;?versionId=&lt;version-id&gt;" \
-H "X-Access-Key: &lt;key&gt;" -H "X-Secret-Key: &lt;secret&gt;"</code></pre>
<div class="alert alert-light border mb-0">
<div class="d-flex gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-info-circle text-muted mt-1" viewBox="0 0 16 16">
<path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/>
<path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/>
</svg>
<div>
<strong>Storage Impact:</strong> Each version consumes storage. Enable quotas to limit total bucket size including all versions.
</div>
</div>
</div>
</div>
</article>
<article id="quotas" class="card shadow-sm docs-section"> <article id="quotas" class="card shadow-sm docs-section">
<div class="card-body"> <div class="card-body">
<div class="d-flex align-items-center gap-2 mb-3"> <div class="d-flex align-items-center gap-2 mb-3">
@@ -709,6 +915,7 @@ curl -X DELETE "{{ api_base }}/kms/keys/{key-id}?waiting_period_days=30" \
<li><a href="#api">REST endpoints</a></li> <li><a href="#api">REST endpoints</a></li>
<li><a href="#examples">API Examples</a></li> <li><a href="#examples">API Examples</a></li>
<li><a href="#replication">Site Replication</a></li> <li><a href="#replication">Site Replication</a></li>
<li><a href="#versioning">Object Versioning</a></li>
<li><a href="#quotas">Bucket Quotas</a></li> <li><a href="#quotas">Bucket Quotas</a></li>
<li><a href="#encryption">Encryption</a></li> <li><a href="#encryption">Encryption</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li> <li><a href="#troubleshooting">Troubleshooting</a></li>

View File

@@ -10,6 +10,7 @@
</svg> </svg>
IAM Configuration IAM Configuration
</h1> </h1>
<p class="text-muted mb-0 mt-1">Create and manage users with fine-grained bucket permissions.</p>
</div> </div>
<div class="d-flex gap-2"> <div class="d-flex gap-2">
{% if not iam_locked %} {% if not iam_locked %}
@@ -109,35 +110,68 @@
{% else %} {% else %}
<div class="card-body px-4 pb-4"> <div class="card-body px-4 pb-4">
{% if users %} {% if users %}
<div class="table-responsive"> <div class="row g-3">
<table class="table table-hover align-middle mb-0"> {% for user in users %}
<thead class="table-light"> <div class="col-md-6 col-xl-4">
<tr> <div class="card h-100 iam-user-card">
<th scope="col">User</th> <div class="card-body">
<th scope="col">Policies</th> <div class="d-flex align-items-start justify-content-between mb-3">
<th scope="col" class="text-end">Actions</th>
</tr>
</thead>
<tbody>
{% for user in users %}
<tr>
<td>
<div class="d-flex align-items-center gap-3"> <div class="d-flex align-items-center gap-3">
<div class="user-avatar"> <div class="user-avatar user-avatar-lg">
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" viewBox="0 0 16 16">
<path d="M8 8a3 3 0 1 0 0-6 3 3 0 0 0 0 6zm2-3a2 2 0 1 1-4 0 2 2 0 0 1 4 0zm4 8c0 1-1 1-1 1H3s-1 0-1-1 1-4 6-4 6 3 6 4zm-1-.004c-.001-.246-.154-.986-.832-1.664C11.516 10.68 10.289 10 8 10c-2.29 0-3.516.68-4.168 1.332-.678.678-.83 1.418-.832 1.664h10z"/> <path d="M8 8a3 3 0 1 0 0-6 3 3 0 0 0 0 6zm2-3a2 2 0 1 1-4 0 2 2 0 0 1 4 0zm4 8c0 1-1 1-1 1H3s-1 0-1-1 1-4 6-4 6 3 6 4zm-1-.004c-.001-.246-.154-.986-.832-1.664C11.516 10.68 10.289 10 8 10c-2.29 0-3.516.68-4.168 1.332-.678.678-.83 1.418-.832 1.664h10z"/>
</svg> </svg>
</div> </div>
<div> <div class="min-width-0">
<div class="fw-medium">{{ user.display_name }}</div> <h6 class="fw-semibold mb-0 text-truncate" title="{{ user.display_name }}">{{ user.display_name }}</h6>
<code class="small text-muted">{{ user.access_key }}</code> <code class="small text-muted d-block text-truncate" title="{{ user.access_key }}">{{ user.access_key }}</code>
</div> </div>
</div> </div>
</td> <div class="dropdown">
<td> <button class="btn btn-sm btn-icon" type="button" data-bs-toggle="dropdown" aria-expanded="false">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" viewBox="0 0 16 16">
<path d="M9.5 13a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0zm0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0zm0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0z"/>
</svg>
</button>
<ul class="dropdown-menu dropdown-menu-end">
<li>
<button class="dropdown-item" type="button" data-edit-user="{{ user.access_key }}" data-display-name="{{ user.display_name }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/>
</svg>
Edit Name
</button>
</li>
<li>
<button class="dropdown-item" type="button" data-rotate-user="{{ user.access_key }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
<path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/>
<path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/>
</svg>
Rotate Secret
</button>
</li>
<li><hr class="dropdown-divider"></li>
<li>
<button class="dropdown-item text-danger" type="button" data-delete-user="{{ user.access_key }}">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-2" viewBox="0 0 16 16">
<path d="M5.5 5.5a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 .5-.5zm3 .5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 1 0z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
Delete User
</button>
</li>
</ul>
</div>
</div>
<div class="mb-3">
<div class="small text-muted mb-2">Bucket Permissions</div>
<div class="d-flex flex-wrap gap-1"> <div class="d-flex flex-wrap gap-1">
{% for policy in user.policies %} {% for policy in user.policies %}
<span class="badge bg-primary bg-opacity-10 text-primary"> <span class="badge bg-primary bg-opacity-10 text-primary">
<svg xmlns="http://www.w3.org/2000/svg" width="10" height="10" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<path d="M2.522 5H2a.5.5 0 0 0-.494.574l1.372 9.149A1.5 1.5 0 0 0 4.36 16h7.278a1.5 1.5 0 0 0 1.483-1.277l1.373-9.149A.5.5 0 0 0 14 5h-.522A5.5 5.5 0 0 0 2.522 5zm1.005 0a4.5 4.5 0 0 1 8.945 0H3.527z"/>
</svg>
{{ policy.bucket }} {{ policy.bucket }}
{% if '*' in policy.actions %} {% if '*' in policy.actions %}
<span class="opacity-75">(full)</span> <span class="opacity-75">(full)</span>
@@ -149,38 +183,18 @@
<span class="badge bg-secondary bg-opacity-10 text-secondary">No policies</span> <span class="badge bg-secondary bg-opacity-10 text-secondary">No policies</span>
{% endfor %} {% endfor %}
</div> </div>
</td> </div>
<td class="text-end"> <button class="btn btn-outline-primary btn-sm w-100" type="button" data-policy-editor data-access-key="{{ user.access_key }}">
<div class="btn-group btn-group-sm" role="group"> <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1" viewBox="0 0 16 16">
<button class="btn btn-outline-primary" type="button" data-rotate-user="{{ user.access_key }}" title="Rotate Secret"> <path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492zM5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0z"/>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16"> <path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52l-.094-.319z"/>
<path d="M11.534 7h3.932a.25.25 0 0 1 .192.41l-1.966 2.36a.25.25 0 0 1-.384 0l-1.966-2.36a.25.25 0 0 1 .192-.41zm-11 2h3.932a.25.25 0 0 0 .192-.41L2.692 6.23a.25.25 0 0 0-.384 0L.342 8.59A.25.25 0 0 0 .534 9z"/> </svg>
<path fill-rule="evenodd" d="M8 3c-1.552 0-2.94.707-3.857 1.818a.5.5 0 1 1-.771-.636A6.002 6.002 0 0 1 13.917 7H12.9A5.002 5.002 0 0 0 8 3zM3.1 9a5.002 5.002 0 0 0 8.757 2.182.5.5 0 1 1 .771.636A6.002 6.002 0 0 1 2.083 9H3.1z"/> Manage Policies
</svg> </button>
</button> </div>
<button class="btn btn-outline-secondary" type="button" data-edit-user="{{ user.access_key }}" data-display-name="{{ user.display_name }}" title="Edit User"> </div>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16"> </div>
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5z"/> {% endfor %}
</svg>
</button>
<button class="btn btn-outline-secondary" type="button" data-policy-editor data-access-key="{{ user.access_key }}" title="Edit Policies">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492zM5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0z"/>
<path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52l-.094-.319z"/>
</svg>
</button>
<button class="btn btn-outline-danger" type="button" data-delete-user="{{ user.access_key }}" title="Delete User">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M5.5 5.5a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 .5-.5zm2.5 0a.5.5 0 0 1 .5.5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 .5-.5zm3 .5v6a.5.5 0 0 1-1 0v-6a.5.5 0 0 1 1 0z"/>
<path fill-rule="evenodd" d="M14.5 3a1 1 0 0 1-1 1H13v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V4h-.5a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1H6a1 1 0 0 1 1-1h2a1 1 0 0 1 1 1h3.5a1 1 0 0 1 1 1v1zM4.118 4 4 4.059V13a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1V4.059L11.882 4H4.118zM2.5 3V2h11v1h-11z"/>
</svg>
</button>
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div> </div>
{% else %} {% else %}
<div class="empty-state text-center py-5"> <div class="empty-state text-center py-5">
@@ -442,6 +456,95 @@
{{ super() }} {{ super() }}
<script> <script>
(function () { (function () {
// Auto-indent for JSON textareas
function setupJsonAutoIndent(textarea) {
if (!textarea) return;
textarea.addEventListener('keydown', function(e) {
if (e.key === 'Enter') {
e.preventDefault();
const start = this.selectionStart;
const end = this.selectionEnd;
const value = this.value;
// Get the current line
const lineStart = value.lastIndexOf('\n', start - 1) + 1;
const currentLine = value.substring(lineStart, start);
// Calculate base indentation (leading whitespace of current line)
const indentMatch = currentLine.match(/^(\s*)/);
let indent = indentMatch ? indentMatch[1] : '';
// Check if the line ends with { or [ (should increase indent)
const trimmedLine = currentLine.trim();
const lastChar = trimmedLine.slice(-1);
// Check the character before cursor
const charBeforeCursor = value.substring(start - 1, start).trim();
let newIndent = indent;
let insertAfter = '';
if (lastChar === '{' || lastChar === '[') {
// Add extra indentation
newIndent = indent + ' ';
// Check if we need to add closing bracket on new line
const charAfterCursor = value.substring(start, start + 1).trim();
if ((lastChar === '{' && charAfterCursor === '}') ||
(lastChar === '[' && charAfterCursor === ']')) {
insertAfter = '\n' + indent;
}
} else if (lastChar === ',' || lastChar === ':') {
// Keep same indentation for continuation
newIndent = indent;
}
// Insert newline with proper indentation
const insertion = '\n' + newIndent + insertAfter;
const newValue = value.substring(0, start) + insertion + value.substring(end);
this.value = newValue;
// Set cursor position after the indentation
const newCursorPos = start + 1 + newIndent.length;
this.selectionStart = this.selectionEnd = newCursorPos;
// Trigger input event for any listeners
this.dispatchEvent(new Event('input', { bubbles: true }));
}
// Handle Tab key for indentation
if (e.key === 'Tab') {
e.preventDefault();
const start = this.selectionStart;
const end = this.selectionEnd;
if (e.shiftKey) {
// Outdent: remove 2 spaces from start of line
const lineStart = this.value.lastIndexOf('\n', start - 1) + 1;
const lineContent = this.value.substring(lineStart, start);
if (lineContent.startsWith(' ')) {
this.value = this.value.substring(0, lineStart) +
this.value.substring(lineStart + 2);
this.selectionStart = this.selectionEnd = Math.max(lineStart, start - 2);
}
} else {
// Indent: insert 2 spaces
this.value = this.value.substring(0, start) + ' ' + this.value.substring(end);
this.selectionStart = this.selectionEnd = start + 2;
}
this.dispatchEvent(new Event('input', { bubbles: true }));
}
});
}
// Apply auto-indent to policy editor textareas
setupJsonAutoIndent(document.getElementById('policyEditorDocument'));
setupJsonAutoIndent(document.getElementById('createUserPolicies'));
const currentUserKey = {{ principal.access_key | tojson }}; const currentUserKey = {{ principal.access_key | tojson }};
const configCopyButtons = document.querySelectorAll('.config-copy'); const configCopyButtons = document.querySelectorAll('.config-copy');
configCopyButtons.forEach((button) => { configCopyButtons.forEach((button) => {

View File

@@ -219,24 +219,42 @@
</div> </div>
<div class="col-lg-4"> <div class="col-lg-4">
<div class="card shadow-sm border-0 h-100 overflow-hidden" style="background: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%);"> {% set has_issues = (cpu_percent > 80) or (memory.percent > 85) or (disk.percent > 90) %}
<div class="card shadow-sm border-0 h-100 overflow-hidden" style="background: linear-gradient(135deg, {% if has_issues %}#ef4444 0%, #f97316{% else %}#3b82f6 0%, #8b5cf6{% endif %} 100%);">
<div class="card-body p-4 d-flex flex-column justify-content-center text-white position-relative"> <div class="card-body p-4 d-flex flex-column justify-content-center text-white position-relative">
<div class="position-absolute top-0 end-0 opacity-25" style="transform: translate(20%, -20%);"> <div class="position-absolute top-0 end-0 opacity-25" style="transform: translate(20%, -20%);">
<svg xmlns="http://www.w3.org/2000/svg" width="160" height="160" fill="currentColor" class="bi bi-cloud-check" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="160" height="160" fill="currentColor" class="bi bi-{% if has_issues %}exclamation-triangle{% else %}cloud-check{% endif %}" viewBox="0 0 16 16">
{% if has_issues %}
<path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/>
<path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>
{% else %}
<path fill-rule="evenodd" d="M10.354 6.146a.5.5 0 0 1 0 .708l-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 1 1 .708-.708L7 8.793l2.646-2.647a.5.5 0 0 1 .708 0z"/> <path fill-rule="evenodd" d="M10.354 6.146a.5.5 0 0 1 0 .708l-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 1 1 .708-.708L7 8.793l2.646-2.647a.5.5 0 0 1 .708 0z"/>
<path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/> <path d="M4.406 3.342A5.53 5.53 0 0 1 8 2c2.69 0 4.923 2 5.166 4.579C14.758 6.804 16 8.137 16 9.773 16 11.569 14.502 13 12.687 13H3.781C1.708 13 0 11.366 0 9.318c0-1.763 1.266-3.223 2.942-3.593.143-.863.698-1.723 1.464-2.383z"/>
{% endif %}
</svg> </svg>
</div> </div>
<div class="mb-3"> <div class="mb-3">
<span class="badge bg-white text-primary fw-semibold px-3 py-2"> <span class="badge bg-white {% if has_issues %}text-danger{% else %}text-primary{% endif %} fw-semibold px-3 py-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-check-circle-fill me-1" viewBox="0 0 16 16"> <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="bi bi-{% if has_issues %}exclamation-circle-fill{% else %}check-circle-fill{% endif %} me-1" viewBox="0 0 16 16">
{% if has_issues %}
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM8 4a.905.905 0 0 0-.9.995l.35 3.507a.552.552 0 0 0 1.1 0l.35-3.507A.905.905 0 0 0 8 4zm.002 6a1 1 0 1 0 0 2 1 1 0 0 0 0-2z"/>
{% else %}
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/> <path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
{% endif %}
</svg> </svg>
v{{ app.version }} v{{ app.version }}
</span> </span>
</div> </div>
<h4 class="card-title fw-bold mb-3">System Status</h4> <h4 class="card-title fw-bold mb-3">System Health</h4>
<p class="card-text opacity-90 mb-4">All systems operational. Your storage infrastructure is running smoothly with no detected issues.</p> {% if has_issues %}
<ul class="list-unstyled small mb-4 opacity-90">
{% if cpu_percent > 80 %}<li class="mb-1">CPU usage is high ({{ cpu_percent }}%)</li>{% endif %}
{% if memory.percent > 85 %}<li class="mb-1">Memory usage is high ({{ memory.percent }}%)</li>{% endif %}
{% if disk.percent > 90 %}<li class="mb-1">Disk space is critically low ({{ disk.percent }}% used)</li>{% endif %}
</ul>
{% else %}
<p class="card-text opacity-90 mb-4 small">All resources are within normal operating parameters.</p>
{% endif %}
<div class="d-flex gap-4"> <div class="d-flex gap-4">
<div> <div>
<div class="h3 fw-bold mb-0">{{ app.uptime_days }}d</div> <div class="h3 fw-bold mb-0">{{ app.uptime_days }}d</div>

View File

@@ -150,16 +150,21 @@ class TestPaginatedObjectListing:
assert len(data["objects"]) == 1 assert len(data["objects"]) == 1
obj = data["objects"][0] obj = data["objects"][0]
# Check all expected fields # Check all expected fields
assert obj["key"] == "test.txt" assert obj["key"] == "test.txt"
assert obj["size"] == 12 # len("test content") assert obj["size"] == 12 # len("test content")
assert "last_modified" in obj assert "last_modified" in obj
assert "last_modified_display" in obj assert "last_modified_display" in obj
assert "etag" in obj assert "etag" in obj
assert "preview_url" in obj
assert "download_url" in obj # URLs are now returned as templates (not per-object) for performance
assert "delete_endpoint" in obj assert "url_templates" in data
templates = data["url_templates"]
assert "preview" in templates
assert "download" in templates
assert "delete" in templates
assert "KEY_PLACEHOLDER" in templates["preview"]
def test_bucket_detail_page_loads_without_objects(self, tmp_path): def test_bucket_detail_page_loads_without_objects(self, tmp_path):
"""Bucket detail page should load even with many objects.""" """Bucket detail page should load even with many objects."""