MyFSIO v0.1.8 Release #9

Merged
kqjy merged 13 commits from next into main 2025-12-23 06:01:32 +00:00
5 changed files with 542 additions and 311 deletions
Showing only changes of commit f5451c162b - Show all commits

View File

@@ -45,7 +45,7 @@ def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
try: try:
shutil.move(str(legacy_path), str(active_path)) shutil.move(str(legacy_path), str(active_path))
except OSError: except OSError:
# Fall back to copy + delete if move fails (e.g., cross-device) # Fall back to copy + delete for cross-device moves
shutil.copy2(legacy_path, active_path) shutil.copy2(legacy_path, active_path)
try: try:
legacy_path.unlink(missing_ok=True) legacy_path.unlink(missing_ok=True)
@@ -101,25 +101,24 @@ def create_app(
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
# Initialize Replication components # Initialize replication with system config directory for consistency
# Store config files in the system config directory for consistency
storage_root = Path(app.config["STORAGE_ROOT"]) storage_root = Path(app.config["STORAGE_ROOT"])
config_dir = storage_root / ".myfsio.sys" / "config" config_dir = storage_root / ".myfsio.sys" / "config"
config_dir.mkdir(parents=True, exist_ok=True) config_dir.mkdir(parents=True, exist_ok=True)
# Define paths with migration from legacy locations # Migrate connection configs from legacy locations
connections_path = _migrate_config_file( connections_path = _migrate_config_file(
active_path=config_dir / "connections.json", active_path=config_dir / "connections.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "connections.json", # Previous location storage_root / ".myfsio.sys" / "connections.json",
storage_root / ".connections.json", # Original legacy location storage_root / ".connections.json",
], ],
) )
replication_rules_path = _migrate_config_file( replication_rules_path = _migrate_config_file(
active_path=config_dir / "replication_rules.json", active_path=config_dir / "replication_rules.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "replication_rules.json", # Previous location storage_root / ".myfsio.sys" / "replication_rules.json",
storage_root / ".replication_rules.json", # Original legacy location storage_root / ".replication_rules.json",
], ],
) )

View File

@@ -23,7 +23,7 @@ from .storage import ObjectStorage, StorageError, QuotaExceededError
s3_api_bp = Blueprint("s3_api", __name__) s3_api_bp = Blueprint("s3_api", __name__)
# ---------------------- helpers ---------------------- # Helper functions for accessing app extensions and generating responses
def _storage() -> ObjectStorage: def _storage() -> ObjectStorage:
return current_app.extensions["object_storage"] return current_app.extensions["object_storage"]
@@ -69,8 +69,7 @@ def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name
def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None: def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
# Parse Authorization header # Parse Authorization header: AWS4-HMAC-SHA256 Credential=AKIA.../20230101/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-date, Signature=...
# AWS4-HMAC-SHA256 Credential=AKIA.../20230101/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-date, Signature=...
match = re.match( match = re.match(
r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)", r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)",
auth_header, auth_header,
@@ -79,17 +78,14 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
return None return None
access_key, date_stamp, region, service, signed_headers_str, signature = match.groups() access_key, date_stamp, region, service, signed_headers_str, signature = match.groups()
# Get secret key
secret_key = _iam().get_secret_key(access_key) secret_key = _iam().get_secret_key(access_key)
if not secret_key: if not secret_key:
raise IamError("Invalid access key") raise IamError("Invalid access key")
# Canonical Request # Build canonical request
method = req.method method = req.method
canonical_uri = quote(req.path, safe="/-_.~") canonical_uri = quote(req.path, safe="/-_.~")
# Canonical Query String
query_args = [] query_args = []
for key, value in req.args.items(multi=True): for key, value in req.args.items(multi=True):
query_args.append((key, value)) query_args.append((key, value))
@@ -100,7 +96,6 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
canonical_query_string = "&".join(canonical_query_parts) canonical_query_string = "&".join(canonical_query_parts)
# Canonical Headers
signed_headers_list = signed_headers_str.split(";") signed_headers_list = signed_headers_str.split(";")
canonical_headers_parts = [] canonical_headers_parts = []
for header in signed_headers_list: for header in signed_headers_list:
@@ -112,18 +107,13 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n") canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
canonical_headers = "".join(canonical_headers_parts) canonical_headers = "".join(canonical_headers_parts)
# Payload Hash
payload_hash = req.headers.get("X-Amz-Content-Sha256") payload_hash = req.headers.get("X-Amz-Content-Sha256")
if not payload_hash: if not payload_hash:
payload_hash = hashlib.sha256(req.get_data()).hexdigest() payload_hash = hashlib.sha256(req.get_data()).hexdigest()
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}" canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
# String to Sign amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
amz_date = req.headers.get("X-Amz-Date")
if not amz_date:
amz_date = req.headers.get("Date")
if not amz_date: if not amz_date:
raise IamError("Missing Date header") raise IamError("Missing Date header")
@@ -134,13 +124,13 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
time_diff = abs((now - request_time).total_seconds()) time_diff = abs((now - request_time).total_seconds())
if time_diff > 900: # 15 minutes if time_diff > 900: # AWS standard: 15-minute request validity window
raise IamError("Request timestamp too old or too far in the future") raise IamError("Request timestamp too old or too far in the future")
required_headers = {'host', 'x-amz-date'} required_headers = {'host', 'x-amz-date'}
signed_headers_set = set(signed_headers_str.split(';')) signed_headers_set = set(signed_headers_str.split(';'))
if not required_headers.issubset(signed_headers_set): if not required_headers.issubset(signed_headers_set):
# Some clients might sign 'date' instead of 'x-amz-date' # Some clients use 'date' instead of 'x-amz-date'
if 'date' in signed_headers_set: if 'date' in signed_headers_set:
required_headers.remove('x-amz-date') required_headers.remove('x-amz-date')
required_headers.add('date') required_headers.add('date')
@@ -187,11 +177,10 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
if not secret_key: if not secret_key:
raise IamError("Invalid access key") raise IamError("Invalid access key")
# Canonical Request # Build canonical request
method = req.method method = req.method
canonical_uri = quote(req.path, safe="/-_.~") canonical_uri = quote(req.path, safe="/-_.~")
# Canonical Query String
query_args = [] query_args = []
for key, value in req.args.items(multi=True): for key, value in req.args.items(multi=True):
if key != "X-Amz-Signature": if key != "X-Amz-Signature":
@@ -203,7 +192,6 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
canonical_query_string = "&".join(canonical_query_parts) canonical_query_string = "&".join(canonical_query_parts)
# Canonical Headers
signed_headers_list = signed_headers_str.split(";") signed_headers_list = signed_headers_str.split(";")
canonical_headers_parts = [] canonical_headers_parts = []
for header in signed_headers_list: for header in signed_headers_list:
@@ -212,7 +200,6 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
canonical_headers_parts.append(f"{header}:{val}\n") canonical_headers_parts.append(f"{header}:{val}\n")
canonical_headers = "".join(canonical_headers_parts) canonical_headers = "".join(canonical_headers_parts)
# Payload Hash
payload_hash = "UNSIGNED-PAYLOAD" payload_hash = "UNSIGNED-PAYLOAD"
canonical_request = "\n".join([ canonical_request = "\n".join([
@@ -224,7 +211,7 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
payload_hash payload_hash
]) ])
# String to Sign # Build signature
algorithm = "AWS4-HMAC-SHA256" algorithm = "AWS4-HMAC-SHA256"
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest() hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
@@ -235,7 +222,6 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
hashed_request hashed_request
]) ])
# Signature
signing_key = _get_signature_key(secret_key, date_stamp, region, service) signing_key = _get_signature_key(secret_key, date_stamp, region, service)
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
@@ -493,7 +479,7 @@ def _generate_presigned_url(
} }
canonical_query = _encode_query_params(query_params) canonical_query = _encode_query_params(query_params)
# Determine host and scheme from config or request # Get presigned URL host and scheme from config or request headers
api_base = current_app.config.get("API_BASE_URL") api_base = current_app.config.get("API_BASE_URL")
if api_base: if api_base:
parsed = urlparse(api_base) parsed = urlparse(api_base)
@@ -914,7 +900,7 @@ def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
if error: if error:
return error return error
# For tagging, we use read permission for GET, write for PUT/DELETE # Use read permission for GET, write for PUT/DELETE
action = "read" if request.method == "GET" else "write" action = "read" if request.method == "GET" else "write"
try: try:
_authorize_action(principal, bucket_name, action, object_key=object_key) _authorize_action(principal, bucket_name, action, object_key=object_key)
@@ -1093,10 +1079,9 @@ def _bucket_location_handler(bucket_name: str) -> Response:
if not storage.bucket_exists(bucket_name): if not storage.bucket_exists(bucket_name):
return _error_response("NoSuchBucket", "Bucket does not exist", 404) return _error_response("NoSuchBucket", "Bucket does not exist", 404)
# Return the configured AWS_REGION # Return bucket location (empty for us-east-1 per AWS spec)
region = current_app.config.get("AWS_REGION", "us-east-1") region = current_app.config.get("AWS_REGION", "us-east-1")
root = Element("LocationConstraint") root = Element("LocationConstraint")
# AWS returns empty for us-east-1, but we'll be explicit
root.text = region if region != "us-east-1" else None root.text = region if region != "us-east-1" else None
return _xml_response(root) return _xml_response(root)
@@ -1116,13 +1101,12 @@ def _bucket_acl_handler(bucket_name: str) -> Response:
return _error_response("NoSuchBucket", "Bucket does not exist", 404) return _error_response("NoSuchBucket", "Bucket does not exist", 404)
if request.method == "PUT": if request.method == "PUT":
# We don't fully implement ACLs, but we accept the request for compatibility # Accept canned ACL headers for S3 compatibility (not fully implemented)
# Check for canned ACL header
canned_acl = request.headers.get("x-amz-acl", "private") canned_acl = request.headers.get("x-amz-acl", "private")
current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl}) current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl})
return Response(status=200) return Response(status=200)
# GET - Return a basic ACL document showing full control for owner # Return basic ACL document showing owner's full control
root = Element("AccessControlPolicy") root = Element("AccessControlPolicy")
owner = SubElement(root, "Owner") owner = SubElement(root, "Owner")
SubElement(owner, "ID").text = principal.access_key if principal else "anonymous" SubElement(owner, "ID").text = principal.access_key if principal else "anonymous"
@@ -1188,10 +1172,10 @@ def _bucket_list_versions_handler(bucket_name: str) -> Response:
is_truncated = True is_truncated = True
break break
# Current version # Add current version to response
version = SubElement(root, "Version") version = SubElement(root, "Version")
SubElement(version, "Key").text = obj.key SubElement(version, "Key").text = obj.key
SubElement(version, "VersionId").text = "null" # Current version ID SubElement(version, "VersionId").text = "null"
SubElement(version, "IsLatest").text = "true" SubElement(version, "IsLatest").text = "true"
SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z") SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z")
SubElement(version, "ETag").text = f'"{obj.etag}"' SubElement(version, "ETag").text = f'"{obj.etag}"'
@@ -1296,7 +1280,7 @@ def _render_lifecycle_config(config: list) -> Element:
SubElement(rule_el, "Status").text = rule.get("Status", "Enabled") SubElement(rule_el, "Status").text = rule.get("Status", "Enabled")
# Expiration # Add expiration rule if present
if "Expiration" in rule: if "Expiration" in rule:
exp = rule["Expiration"] exp = rule["Expiration"]
exp_el = SubElement(rule_el, "Expiration") exp_el = SubElement(rule_el, "Expiration")
@@ -1307,14 +1291,14 @@ def _render_lifecycle_config(config: list) -> Element:
if exp.get("ExpiredObjectDeleteMarker"): if exp.get("ExpiredObjectDeleteMarker"):
SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true" SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true"
# NoncurrentVersionExpiration # Add noncurrent version expiration if present
if "NoncurrentVersionExpiration" in rule: if "NoncurrentVersionExpiration" in rule:
nve = rule["NoncurrentVersionExpiration"] nve = rule["NoncurrentVersionExpiration"]
nve_el = SubElement(rule_el, "NoncurrentVersionExpiration") nve_el = SubElement(rule_el, "NoncurrentVersionExpiration")
if "NoncurrentDays" in nve: if "NoncurrentDays" in nve:
SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"]) SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"])
# AbortIncompleteMultipartUpload # Add incomplete multipart upload cleanup if present
if "AbortIncompleteMultipartUpload" in rule: if "AbortIncompleteMultipartUpload" in rule:
aimu = rule["AbortIncompleteMultipartUpload"] aimu = rule["AbortIncompleteMultipartUpload"]
aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload") aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload")
@@ -1338,29 +1322,29 @@ def _parse_lifecycle_config(payload: bytes) -> list:
for rule_el in root.findall("{*}Rule") or root.findall("Rule"): for rule_el in root.findall("{*}Rule") or root.findall("Rule"):
rule: dict = {} rule: dict = {}
# ID # Extract rule ID
id_el = rule_el.find("{*}ID") or rule_el.find("ID") id_el = rule_el.find("{*}ID") or rule_el.find("ID")
if id_el is not None and id_el.text: if id_el is not None and id_el.text:
rule["ID"] = id_el.text.strip() rule["ID"] = id_el.text.strip()
# Filter/Prefix # Extract filter prefix
filter_el = rule_el.find("{*}Filter") or rule_el.find("Filter") filter_el = rule_el.find("{*}Filter") or rule_el.find("Filter")
if filter_el is not None: if filter_el is not None:
prefix_el = filter_el.find("{*}Prefix") or filter_el.find("Prefix") prefix_el = filter_el.find("{*}Prefix") or filter_el.find("Prefix")
if prefix_el is not None and prefix_el.text: if prefix_el is not None and prefix_el.text:
rule["Prefix"] = prefix_el.text rule["Prefix"] = prefix_el.text
# Legacy Prefix (outside Filter) # Fall back to legacy Prefix element (outside Filter)
if "Prefix" not in rule: if "Prefix" not in rule:
prefix_el = rule_el.find("{*}Prefix") or rule_el.find("Prefix") prefix_el = rule_el.find("{*}Prefix") or rule_el.find("Prefix")
if prefix_el is not None: if prefix_el is not None:
rule["Prefix"] = prefix_el.text or "" rule["Prefix"] = prefix_el.text or ""
# Status # Extract status
status_el = rule_el.find("{*}Status") or rule_el.find("Status") status_el = rule_el.find("{*}Status") or rule_el.find("Status")
rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled" rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled"
# Expiration # Parse expiration rule
exp_el = rule_el.find("{*}Expiration") or rule_el.find("Expiration") exp_el = rule_el.find("{*}Expiration") or rule_el.find("Expiration")
if exp_el is not None: if exp_el is not None:
expiration: dict = {} expiration: dict = {}
@@ -1564,7 +1548,7 @@ def _bulk_delete_handler(bucket_name: str) -> Response:
return _xml_response(result, status=200) return _xml_response(result, status=200)
# ---------------------- routes ---------------------- # Route handlers for S3 API endpoints
@s3_api_bp.get("/") @s3_api_bp.get("/")
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def list_buckets() -> Response: def list_buckets() -> Response:
@@ -1642,7 +1626,7 @@ def bucket_handler(bucket_name: str) -> Response:
current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name}) current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name})
return Response(status=204) return Response(status=204)
# GET - list objects (supports both ListObjects and ListObjectsV2) # Handle GET - list objects (supports both ListObjects and ListObjectsV2)
principal, error = _require_principal() principal, error = _require_principal()
try: try:
_authorize_action(principal, bucket_name, "list") _authorize_action(principal, bucket_name, "list")
@@ -1650,18 +1634,13 @@ def bucket_handler(bucket_name: str) -> Response:
if error: if error:
return error return error
return _error_response("AccessDenied", str(exc), 403) return _error_response("AccessDenied", str(exc), 403)
try:
objects = storage.list_objects_all(bucket_name)
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
# Check if this is ListObjectsV2 (list-type=2)
list_type = request.args.get("list-type") list_type = request.args.get("list-type")
prefix = request.args.get("prefix", "") prefix = request.args.get("prefix", "")
delimiter = request.args.get("delimiter", "") delimiter = request.args.get("delimiter", "")
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000) max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
# Pagination markers # Use appropriate markers for pagination depending on API version
marker = request.args.get("marker", "") # ListObjects v1 marker = request.args.get("marker", "") # ListObjects v1
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2 continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
start_after = request.args.get("start-after", "") # ListObjectsV2 start_after = request.args.get("start-after", "") # ListObjectsV2
@@ -1681,11 +1660,18 @@ def bucket_handler(bucket_name: str) -> Response:
else: else:
effective_start = marker effective_start = marker
if prefix: # Fetch with buffer for delimiter processing; delimiter requires extra objects to compute prefixes
objects = [obj for obj in objects if obj.key.startswith(prefix)] fetch_keys = max_keys * 10 if delimiter else max_keys
try:
if effective_start: list_result = storage.list_objects(
objects = [obj for obj in objects if obj.key > effective_start] bucket_name,
max_keys=fetch_keys,
continuation_token=effective_start or None,
prefix=prefix or None,
)
objects = list_result.objects
except StorageError as exc:
return _error_response("NoSuchBucket", str(exc), 404)
common_prefixes: list[str] = [] common_prefixes: list[str] = []
filtered_objects: list = [] filtered_objects: list = []
@@ -1694,7 +1680,7 @@ def bucket_handler(bucket_name: str) -> Response:
for obj in objects: for obj in objects:
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
if delimiter in key_after_prefix: if delimiter in key_after_prefix:
# This is a "folder" - extract the common prefix # Extract common prefix (folder-like structure)
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
if common_prefix not in seen_prefixes: if common_prefix not in seen_prefixes:
seen_prefixes.add(common_prefix) seen_prefixes.add(common_prefix)
@@ -1705,7 +1691,7 @@ def bucket_handler(bucket_name: str) -> Response:
common_prefixes = sorted(common_prefixes) common_prefixes = sorted(common_prefixes)
total_items = len(objects) + len(common_prefixes) total_items = len(objects) + len(common_prefixes)
is_truncated = total_items > max_keys is_truncated = total_items > max_keys or list_result.is_truncated
if len(objects) >= max_keys: if len(objects) >= max_keys:
objects = objects[:max_keys] objects = objects[:max_keys]
@@ -1845,9 +1831,8 @@ def object_handler(bucket_name: str, object_key: str):
response = Response(status=200) response = Response(status=200)
response.headers["ETag"] = f'"{meta.etag}"' response.headers["ETag"] = f'"{meta.etag}"'
# Trigger replication if not a replication request # Trigger replication for non-replication requests
user_agent = request.headers.get("User-Agent", "") if "S3ReplicationAgent" not in request.headers.get("User-Agent", ""):
if "S3ReplicationAgent" not in user_agent:
_replication_manager().trigger_replication(bucket_name, object_key, action="write") _replication_manager().trigger_replication(bucket_name, object_key, action="write")
return response return response
@@ -1866,17 +1851,15 @@ def object_handler(bucket_name: str, object_key: str):
metadata = storage.get_object_metadata(bucket_name, object_key) metadata = storage.get_object_metadata(bucket_name, object_key)
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream" mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
# Check if object is encrypted and needs decryption # Decrypt encrypted objects
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
if request.method == "GET": if request.method == "GET":
if is_encrypted and hasattr(storage, 'get_object_data'): if is_encrypted and hasattr(storage, 'get_object_data'):
# Use encrypted storage to decrypt
try: try:
data, clean_metadata = storage.get_object_data(bucket_name, object_key) data, clean_metadata = storage.get_object_data(bucket_name, object_key)
response = Response(data, mimetype=mimetype) response = Response(data, mimetype=mimetype)
logged_bytes = len(data) logged_bytes = len(data)
# Use decrypted size for Content-Length
response.headers["Content-Length"] = len(data) response.headers["Content-Length"] = len(data)
etag = hashlib.md5(data).hexdigest() etag = hashlib.md5(data).hexdigest()
except StorageError as exc: except StorageError as exc:

View File

@@ -128,11 +128,14 @@ class ObjectStorage:
BUCKET_VERSIONS_DIR = "versions" BUCKET_VERSIONS_DIR = "versions"
MULTIPART_MANIFEST = "manifest.json" MULTIPART_MANIFEST = "manifest.json"
BUCKET_CONFIG_FILE = ".bucket.json" BUCKET_CONFIG_FILE = ".bucket.json"
KEY_INDEX_CACHE_TTL = 30 # seconds - longer TTL for better browsing performance
def __init__(self, root: Path) -> None: def __init__(self, root: Path) -> None:
self.root = Path(root) self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True) self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots() self._ensure_system_roots()
# In-memory object metadata cache: bucket_id -> (dict[key -> ObjectMeta], timestamp)
self._object_cache: Dict[str, tuple[Dict[str, ObjectMeta], float]] = {}
def list_buckets(self) -> List[BucketMeta]: def list_buckets(self) -> List[BucketMeta]:
buckets: List[BucketMeta] = [] buckets: List[BucketMeta] = []
@@ -274,32 +277,26 @@ class ObjectStorage:
raise StorageError("Bucket does not exist") raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name bucket_id = bucket_path.name
# Collect all matching object keys first (lightweight - just paths) # Use cached object metadata for fast listing
all_keys: List[str] = [] object_cache = self._get_object_cache(bucket_id, bucket_path)
for path in bucket_path.rglob("*"):
if path.is_file(): # Get sorted keys
rel = path.relative_to(bucket_path) all_keys = sorted(object_cache.keys())
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue # Apply prefix filter if specified
key = str(rel.as_posix()) if prefix:
if prefix and not key.startswith(prefix): all_keys = [k for k in all_keys if k.startswith(prefix)]
continue
all_keys.append(key)
all_keys.sort()
total_count = len(all_keys) total_count = len(all_keys)
# Handle continuation token (the key to start after) # Handle continuation token (the key to start after)
start_index = 0 start_index = 0
if continuation_token: if continuation_token:
try: try:
# continuation_token is the last key from previous page # Binary search for efficiency on large lists
for i, key in enumerate(all_keys): import bisect
if key > continuation_token: start_index = bisect.bisect_right(all_keys, continuation_token)
start_index = i if start_index >= total_count:
break
else:
# Token is past all keys
return ListObjectsResult( return ListObjectsResult(
objects=[], objects=[],
is_truncated=False, is_truncated=False,
@@ -314,27 +311,12 @@ class ObjectStorage:
keys_slice = all_keys[start_index:end_index] keys_slice = all_keys[start_index:end_index]
is_truncated = end_index < total_count is_truncated = end_index < total_count
# Now load full metadata only for the objects we're returning # Build result from cached metadata (no file I/O!)
objects: List[ObjectMeta] = [] objects: List[ObjectMeta] = []
for key in keys_slice: for key in keys_slice:
safe_key = self._sanitize_object_key(key) obj = object_cache.get(key)
path = bucket_path / safe_key if obj:
if not path.exists(): objects.append(obj)
continue # Object may have been deleted
try:
stat = path.stat()
metadata = self._read_metadata(bucket_id, safe_key)
objects.append(
ObjectMeta(
key=key,
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=self._compute_etag(path),
metadata=metadata or None,
)
)
except OSError:
continue # File may have been deleted during iteration
next_token = keys_slice[-1] if is_truncated and keys_slice else None next_token = keys_slice[-1] if is_truncated and keys_slice else None
@@ -416,18 +398,21 @@ class ObjectStorage:
pass pass
stat = destination.stat() stat = destination.stat()
if metadata: etag = checksum.hexdigest()
self._write_metadata(bucket_id, safe_key, metadata)
else: # Always store internal metadata (etag, size) alongside user metadata
self._delete_metadata(bucket_id, safe_key) internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
self._invalidate_object_cache(bucket_id)
return ObjectMeta( return ObjectMeta(
key=safe_key.as_posix(), key=safe_key.as_posix(),
size=stat.st_size, size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime), last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=checksum.hexdigest(), etag=etag,
metadata=metadata, metadata=metadata,
) )
@@ -479,6 +464,7 @@ class ObjectStorage:
self._delete_metadata(bucket_id, rel) self._delete_metadata(bucket_id, rel)
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
self._invalidate_object_cache(bucket_id)
self._cleanup_empty_parents(path, bucket_path) self._cleanup_empty_parents(path, bucket_path)
def purge_object(self, bucket_name: str, object_key: str) -> None: def purge_object(self, bucket_name: str, object_key: str) -> None:
@@ -501,6 +487,7 @@ class ObjectStorage:
# Invalidate bucket stats cache # Invalidate bucket stats cache
self._invalidate_bucket_stats_cache(bucket_id) self._invalidate_bucket_stats_cache(bucket_id)
self._invalidate_object_cache(bucket_id)
self._cleanup_empty_parents(target, bucket_path) self._cleanup_empty_parents(target, bucket_path)
def is_versioning_enabled(self, bucket_name: str) -> bool: def is_versioning_enabled(self, bucket_name: str) -> bool:
@@ -1163,6 +1150,187 @@ class ObjectStorage:
def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path: def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._legacy_multipart_bucket_root(bucket_name) / upload_id return self._legacy_multipart_bucket_root(bucket_name) / upload_id
def _fast_list_keys(self, bucket_path: Path) -> List[str]:
"""Fast directory walk using os.scandir instead of pathlib.rglob.
This is significantly faster for large directories (10K+ files).
Returns just the keys (for backward compatibility).
"""
return list(self._build_object_cache(bucket_path).keys())
def _build_object_cache(self, bucket_path: Path) -> Dict[str, ObjectMeta]:
"""Build a complete object metadata cache for a bucket.
Uses os.scandir for fast directory walking and a persistent etag index.
"""
from concurrent.futures import ThreadPoolExecutor
bucket_id = bucket_path.name
objects: Dict[str, ObjectMeta] = {}
bucket_str = str(bucket_path)
bucket_len = len(bucket_str) + 1 # +1 for the separator
# Try to load persisted etag index first (single file read vs thousands)
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
meta_cache: Dict[str, str] = {}
index_mtime: float = 0
if etag_index_path.exists():
try:
index_mtime = etag_index_path.stat().st_mtime
with open(etag_index_path, 'r', encoding='utf-8') as f:
meta_cache = json.load(f)
except (OSError, json.JSONDecodeError):
meta_cache = {}
# Check if we need to rebuild the index
meta_root = self._bucket_meta_root(bucket_id)
needs_rebuild = False
if meta_root.exists() and index_mtime > 0:
# Quick check: if any meta file is newer than index, rebuild
def check_newer(dir_path: str) -> bool:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
if check_newer(entry.path):
return True
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
if entry.stat().st_mtime > index_mtime:
return True
except OSError:
pass
return False
needs_rebuild = check_newer(str(meta_root))
elif not meta_cache:
needs_rebuild = True
if needs_rebuild and meta_root.exists():
meta_str = str(meta_root)
meta_len = len(meta_str) + 1
meta_files: list[tuple[str, str]] = []
# Collect all metadata file paths
def collect_meta_files(dir_path: str) -> None:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
collect_meta_files(entry.path)
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
rel = entry.path[meta_len:]
key = rel[:-10].replace(os.sep, '/')
meta_files.append((key, entry.path))
except OSError:
pass
collect_meta_files(meta_str)
# Parallel read of metadata files - only extract __etag__
def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
key, path = item
try:
with open(path, 'rb') as f:
content = f.read()
etag_marker = b'"__etag__"'
idx = content.find(etag_marker)
if idx != -1:
start = content.find(b'"', idx + len(etag_marker) + 1)
if start != -1:
end = content.find(b'"', start + 1)
if end != -1:
return key, content[start+1:end].decode('utf-8')
return key, None
except (OSError, UnicodeDecodeError):
return key, None
if meta_files:
meta_cache = {}
with ThreadPoolExecutor(max_workers=min(64, len(meta_files))) as executor:
for key, etag in executor.map(read_meta_file, meta_files):
if etag:
meta_cache[key] = etag
# Persist the index for next time
try:
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(meta_cache, f)
except OSError:
pass
# Now scan objects and use cached etags
def scan_dir(dir_path: str) -> None:
try:
with os.scandir(dir_path) as it:
for entry in it:
if entry.is_dir(follow_symlinks=False):
# Skip internal folders
rel_start = entry.path[bucket_len:].split(os.sep)[0] if len(entry.path) > bucket_len else entry.name
if rel_start in self.INTERNAL_FOLDERS:
continue
scan_dir(entry.path)
elif entry.is_file(follow_symlinks=False):
# Get relative path and convert to POSIX
rel = entry.path[bucket_len:]
# Check if in internal folder
first_part = rel.split(os.sep)[0] if os.sep in rel else rel
if first_part in self.INTERNAL_FOLDERS:
continue
key = rel.replace(os.sep, '/')
try:
# Use entry.stat() which is cached from scandir
stat = entry.stat()
# Get etag from cache (now just a string, not dict)
etag = meta_cache.get(key)
# Use placeholder for legacy objects without stored etag
if not etag:
etag = f'"{stat.st_size}-{int(stat.st_mtime)}"'
objects[key] = ObjectMeta(
key=key,
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=etag,
metadata=None, # Don't include user metadata in listing
)
except OSError:
pass
except OSError:
pass
scan_dir(bucket_str)
return objects
def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]:
"""Get cached object metadata for a bucket, refreshing if stale."""
now = time.time()
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp = cached
if now - timestamp < self.KEY_INDEX_CACHE_TTL:
return objects
# Rebuild cache
objects = self._build_object_cache(bucket_path)
self._object_cache[bucket_id] = (objects, now)
return objects
def _invalidate_object_cache(self, bucket_id: str) -> None:
"""Invalidate the object cache and etag index for a bucket."""
self._object_cache.pop(bucket_id, None)
# Also invalidate persisted etag index
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
etag_index_path.unlink(missing_ok=True)
except OSError:
pass
def _ensure_system_roots(self) -> None: def _ensure_system_roots(self) -> None:
for path in ( for path in (
self._system_root_path(), self._system_root_path(),

View File

@@ -423,7 +423,7 @@ def list_bucket_objects(bucket_name: str):
except IamError as exc: except IamError as exc:
return jsonify({"error": str(exc)}), 403 return jsonify({"error": str(exc)}), 403
max_keys = min(int(request.args.get("max_keys", 100)), 1000) max_keys = min(int(request.args.get("max_keys", 1000)), 10000)
continuation_token = request.args.get("continuation_token") or None continuation_token = request.args.get("continuation_token") or None
prefix = request.args.get("prefix") or None prefix = request.args.get("prefix") or None
@@ -738,41 +738,30 @@ def bulk_download_objects(bucket_name: str):
unique_keys = list(dict.fromkeys(cleaned)) unique_keys = list(dict.fromkeys(cleaned))
storage = _storage() storage = _storage()
# Check permissions for all keys first (or at least bucket read) # Verify permission to read bucket contents
# We'll check bucket read once, then object read for each if needed?
# _authorize_ui checks bucket level if object_key is None, but we need to check each object if fine-grained policies exist.
# For simplicity/performance, we check bucket list/read.
try: try:
_authorize_ui(principal, bucket_name, "read") _authorize_ui(principal, bucket_name, "read")
except IamError as exc: except IamError as exc:
return jsonify({"error": str(exc)}), 403 return jsonify({"error": str(exc)}), 403
# Create ZIP # Create ZIP archive of selected objects
buffer = io.BytesIO() buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
for key in unique_keys: for key in unique_keys:
try: try:
# Verify individual object permission if needed?
# _authorize_ui(principal, bucket_name, "read", object_key=key)
# This might be slow for many objects. Assuming bucket read is enough for now or we accept the overhead.
# Let's skip individual check for bulk speed, assuming bucket read implies object read unless denied.
# But strictly we should check. Let's check.
_authorize_ui(principal, bucket_name, "read", object_key=key) _authorize_ui(principal, bucket_name, "read", object_key=key)
# Check if object is encrypted
metadata = storage.get_object_metadata(bucket_name, key) metadata = storage.get_object_metadata(bucket_name, key)
is_encrypted = "x-amz-server-side-encryption" in metadata is_encrypted = "x-amz-server-side-encryption" in metadata
if is_encrypted and hasattr(storage, 'get_object_data'): if is_encrypted and hasattr(storage, 'get_object_data'):
# Decrypt and add to zip
data, _ = storage.get_object_data(bucket_name, key) data, _ = storage.get_object_data(bucket_name, key)
zf.writestr(key, data) zf.writestr(key, data)
else: else:
# Add unencrypted file directly
path = storage.get_object_path(bucket_name, key) path = storage.get_object_path(bucket_name, key)
zf.write(path, arcname=key) zf.write(path, arcname=key)
except (StorageError, IamError): except (StorageError, IamError):
# Skip files we can't read or don't exist # Skip objects that can't be accessed
continue continue
buffer.seek(0) buffer.seek(0)
@@ -1077,7 +1066,6 @@ def update_bucket_encryption(bucket_name: str):
action = request.form.get("action", "enable") action = request.form.get("action", "enable")
if action == "disable": if action == "disable":
# Disable encryption
try: try:
_storage().set_bucket_encryption(bucket_name, None) _storage().set_bucket_encryption(bucket_name, None)
flash("Default encryption disabled", "info") flash("Default encryption disabled", "info")
@@ -1085,16 +1073,14 @@ def update_bucket_encryption(bucket_name: str):
flash(_friendly_error_message(exc), "danger") flash(_friendly_error_message(exc), "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Enable or update encryption
algorithm = request.form.get("algorithm", "AES256") algorithm = request.form.get("algorithm", "AES256")
kms_key_id = request.form.get("kms_key_id", "").strip() or None kms_key_id = request.form.get("kms_key_id", "").strip() or None
# Validate algorithm
if algorithm not in ("AES256", "aws:kms"): if algorithm not in ("AES256", "aws:kms"):
flash("Invalid encryption algorithm", "danger") flash("Invalid encryption algorithm", "danger")
return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
# Build encryption config following AWS format # Build encryption configuration in AWS S3 format
encryption_config: dict[str, Any] = { encryption_config: dict[str, Any] = {
"Rules": [ "Rules": [
{ {

View File

@@ -172,15 +172,15 @@
<button id="load-more-btn" class="btn btn-link btn-sm p-0 d-none" style="font-size: 0.75rem;">Load more</button> <button id="load-more-btn" class="btn btn-link btn-sm p-0 d-none" style="font-size: 0.75rem;">Load more</button>
</div> </div>
<div class="d-flex align-items-center gap-1"> <div class="d-flex align-items-center gap-1">
<span class="text-muted">Load</span> <span class="text-muted">Batch</span>
<select id="page-size-select" class="form-select form-select-sm py-0" style="width: auto; font-size: 0.75rem;"> <select id="page-size-select" class="form-select form-select-sm py-0" style="width: auto; font-size: 0.75rem;">
<option value="50">50</option> <option value="1000">1K</option>
<option value="100" selected>100</option> <option value="5000" selected>5K</option>
<option value="150">150</option> <option value="10000">10K</option>
<option value="200">200</option> <option value="25000">25K</option>
<option value="250">250</option> <option value="50000">50K</option>
</select> </select>
<span class="text-muted">per page</span> <span class="text-muted">objects</span>
</div> </div>
</div> </div>
</div> </div>
@@ -1866,30 +1866,42 @@
let isLoadingObjects = false; let isLoadingObjects = false;
let hasMoreObjects = false; let hasMoreObjects = false;
let currentFilterTerm = ''; let currentFilterTerm = '';
let pageSize = 100; let pageSize = 5000; // Load large batches for virtual scrolling
let currentPrefix = ''; // Current folder prefix for navigation
let allObjects = []; // All loaded object metadata (lightweight)
// Virtual scrolling state
const ROW_HEIGHT = 53; // Height of each table row in pixels
const BUFFER_ROWS = 10; // Extra rows to render above/below viewport
let visibleItems = []; // Current items to display (filtered by folder/search)
let renderedRange = { start: 0, end: 0 }; // Currently rendered row indices
const createObjectRow = (obj) => { // Create a row element from object data (for virtual scrolling)
const createObjectRow = (obj, displayKey = null) => {
const tr = document.createElement('tr'); const tr = document.createElement('tr');
tr.dataset.objectRow = ''; tr.dataset.objectRow = '';
tr.dataset.key = obj.key; tr.dataset.key = obj.key;
tr.dataset.size = obj.size; tr.dataset.size = obj.size;
tr.dataset.lastModified = obj.last_modified; tr.dataset.lastModified = obj.lastModified || obj.last_modified;
tr.dataset.etag = obj.etag; tr.dataset.etag = obj.etag;
tr.dataset.previewUrl = obj.preview_url; tr.dataset.previewUrl = obj.previewUrl || obj.preview_url;
tr.dataset.downloadUrl = obj.download_url; tr.dataset.downloadUrl = obj.downloadUrl || obj.download_url;
tr.dataset.presignEndpoint = obj.presign_endpoint; tr.dataset.presignEndpoint = obj.presignEndpoint || obj.presign_endpoint;
tr.dataset.deleteEndpoint = obj.delete_endpoint; tr.dataset.deleteEndpoint = obj.deleteEndpoint || obj.delete_endpoint;
tr.dataset.metadata = JSON.stringify(obj.metadata || {}); tr.dataset.metadata = typeof obj.metadata === 'string' ? obj.metadata : JSON.stringify(obj.metadata || {});
tr.dataset.versionsEndpoint = obj.versions_endpoint; tr.dataset.versionsEndpoint = obj.versionsEndpoint || obj.versions_endpoint;
tr.dataset.restoreTemplate = obj.restore_template; tr.dataset.restoreTemplate = obj.restoreTemplate || obj.restore_template;
const keyToShow = displayKey || obj.key;
const lastModDisplay = obj.lastModifiedDisplay || obj.last_modified_display || new Date(obj.lastModified || obj.last_modified).toLocaleDateString();
tr.innerHTML = ` tr.innerHTML = `
<td class="text-center align-middle"> <td class="text-center align-middle">
<input class="form-check-input" type="checkbox" data-object-select aria-label="Select ${escapeHtml(obj.key)}" /> <input class="form-check-input" type="checkbox" data-object-select aria-label="Select ${escapeHtml(obj.key)}" />
</td> </td>
<td class="object-key text-break" title="${escapeHtml(obj.key)}"> <td class="object-key text-break" title="${escapeHtml(obj.key)}">
<div class="fw-medium">${escapeHtml(obj.key)}</div> <div class="fw-medium">${escapeHtml(keyToShow)}</div>
<div class="text-muted small">Modified ${escapeHtml(obj.last_modified_display)}</div> <div class="text-muted small">Modified ${escapeHtml(lastModDisplay)}</div>
</td> </td>
<td class="text-end text-nowrap"> <td class="text-end text-nowrap">
<span class="text-muted small">${formatBytes(obj.size)}</span> <span class="text-muted small">${formatBytes(obj.size)}</span>
@@ -1898,7 +1910,7 @@
<div class="btn-group btn-group-sm" role="group"> <div class="btn-group btn-group-sm" role="group">
<a <a
class="btn btn-outline-primary btn-icon" class="btn btn-outline-primary btn-icon"
href="${escapeHtml(obj.download_url)}" href="${escapeHtml(obj.downloadUrl || obj.download_url)}"
target="_blank" target="_blank"
title="Download" title="Download"
aria-label="Download" aria-label="Download"
@@ -1987,12 +1999,178 @@
} }
}; };
// ============== VIRTUAL SCROLLING SYSTEM ==============
// Spacer elements for virtual scroll height
let topSpacer = null;
let bottomSpacer = null;
const initVirtualScrollElements = () => {
if (!objectsTableBody) return;
// Create spacer rows if they don't exist
if (!topSpacer) {
topSpacer = document.createElement('tr');
topSpacer.id = 'virtual-top-spacer';
topSpacer.innerHTML = '<td colspan="4" style="padding: 0; border: none;"></td>';
}
if (!bottomSpacer) {
bottomSpacer = document.createElement('tr');
bottomSpacer.id = 'virtual-bottom-spacer';
bottomSpacer.innerHTML = '<td colspan="4" style="padding: 0; border: none;"></td>';
}
};
// Compute which items should be visible based on current view
const computeVisibleItems = () => {
const items = [];
const folders = new Set();
allObjects.forEach(obj => {
if (!obj.key.startsWith(currentPrefix)) return;
const remainder = obj.key.slice(currentPrefix.length);
const slashIndex = remainder.indexOf('/');
if (slashIndex === -1) {
// File in current folder
if (!currentFilterTerm || obj.key.toLowerCase().includes(currentFilterTerm)) {
items.push({ type: 'file', data: obj, displayKey: remainder });
}
} else {
// Folder
const folderPath = currentPrefix + remainder.slice(0, slashIndex + 1);
if (!folders.has(folderPath)) {
folders.add(folderPath);
if (!currentFilterTerm || folderPath.toLowerCase().includes(currentFilterTerm)) {
items.push({ type: 'folder', path: folderPath, displayKey: remainder.slice(0, slashIndex) });
}
}
}
});
// Sort: folders first, then files
items.sort((a, b) => {
if (a.type === 'folder' && b.type === 'file') return -1;
if (a.type === 'file' && b.type === 'folder') return 1;
const aKey = a.type === 'folder' ? a.path : a.data.key;
const bKey = b.type === 'folder' ? b.path : b.data.key;
return aKey.localeCompare(bKey);
});
return items;
};
// Render only the visible rows based on scroll position
const renderVirtualRows = () => {
if (!objectsTableBody || !scrollContainer) return;
const containerHeight = scrollContainer.clientHeight;
const scrollTop = scrollContainer.scrollTop;
// Calculate visible range
const startIndex = Math.max(0, Math.floor(scrollTop / ROW_HEIGHT) - BUFFER_ROWS);
const endIndex = Math.min(visibleItems.length, Math.ceil((scrollTop + containerHeight) / ROW_HEIGHT) + BUFFER_ROWS);
// Skip if range hasn't changed significantly
if (startIndex === renderedRange.start && endIndex === renderedRange.end) return;
renderedRange = { start: startIndex, end: endIndex };
// Clear and rebuild
objectsTableBody.innerHTML = '';
// Add top spacer
initVirtualScrollElements();
topSpacer.querySelector('td').style.height = `${startIndex * ROW_HEIGHT}px`;
objectsTableBody.appendChild(topSpacer);
// Render visible rows
for (let i = startIndex; i < endIndex; i++) {
const item = visibleItems[i];
if (!item) continue;
let row;
if (item.type === 'folder') {
row = createFolderRow(item.path, item.displayKey);
} else {
row = createObjectRow(item.data, item.displayKey);
}
row.dataset.virtualIndex = i;
objectsTableBody.appendChild(row);
}
// Add bottom spacer
const remainingRows = visibleItems.length - endIndex;
bottomSpacer.querySelector('td').style.height = `${remainingRows * ROW_HEIGHT}px`;
objectsTableBody.appendChild(bottomSpacer);
// Re-attach handlers to new rows
attachRowHandlers();
};
// Debounced scroll handler for virtual scrolling
let scrollTimeout = null;
const handleVirtualScroll = () => {
if (scrollTimeout) cancelAnimationFrame(scrollTimeout);
scrollTimeout = requestAnimationFrame(renderVirtualRows);
};
// Refresh the virtual list (after data changes or navigation)
const refreshVirtualList = () => {
visibleItems = computeVisibleItems();
renderedRange = { start: -1, end: -1 }; // Force re-render
if (visibleItems.length === 0) {
if (allObjects.length === 0 && !hasMoreObjects) {
showEmptyState();
} else {
// Empty folder
objectsTableBody.innerHTML = `
<tr>
<td colspan="4" class="py-5">
<div class="empty-state">
<div class="empty-state-icon mx-auto" style="width: 64px; height: 64px;">
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
<path d="M9.828 3h3.982a2 2 0 0 1 1.992 2.181l-.637 7A2 2 0 0 1 13.174 14H2.825a2 2 0 0 1-1.991-1.819l-.637-7a1.99 1.99 0 0 1 .342-1.31L.5 3a2 2 0 0 1 2-2h3.672a2 2 0 0 1 1.414.586l.828.828A2 2 0 0 0 9.828 3zm-8.322.12C1.72 3.042 1.95 3 2.19 3h5.396l-.707-.707A1 1 0 0 0 6.172 2H2.5a1 1 0 0 0-1 .981l.006.139z"/>
</svg>
</div>
<h6 class="mb-2">Empty folder</h6>
<p class="text-muted small mb-0">This folder contains no objects${hasMoreObjects ? ' yet. Loading more...' : '.'}</p>
</div>
</td>
</tr>
`;
}
} else {
renderVirtualRows();
}
updateFolderViewStatus();
};
// Update status bar
const updateFolderViewStatus = () => {
const folderViewStatusEl = document.getElementById('folder-view-status');
if (!folderViewStatusEl) return;
if (currentPrefix) {
const folderCount = visibleItems.filter(i => i.type === 'folder').length;
const fileCount = visibleItems.filter(i => i.type === 'file').length;
folderViewStatusEl.innerHTML = `<span class="text-muted">${folderCount} folder${folderCount !== 1 ? 's' : ''}, ${fileCount} file${fileCount !== 1 ? 's' : ''} in this view</span>`;
folderViewStatusEl.classList.remove('d-none');
} else {
folderViewStatusEl.classList.add('d-none');
}
};
// ============== DATA LOADING ==============
const loadObjects = async (append = false) => { const loadObjects = async (append = false) => {
if (isLoadingObjects) return; if (isLoadingObjects) return;
isLoadingObjects = true; isLoadingObjects = true;
if (!append) { if (!append) {
if (objectsLoadingRow) objectsLoadingRow.style.display = ''; if (objectsLoadingRow) objectsLoadingRow.style.display = '';
nextContinuationToken = null; nextContinuationToken = null;
loadedObjectCount = 0; loadedObjectCount = 0;
@@ -2026,35 +2204,18 @@
totalObjectCount = data.total_count || 0; totalObjectCount = data.total_count || 0;
nextContinuationToken = data.next_continuation_token; nextContinuationToken = data.next_continuation_token;
if (!append) { if (!append && objectsLoadingRow) {
objectsLoadingRow.remove();
if (objectsLoadingRow) objectsLoadingRow.remove();
if (data.objects.length === 0) {
showEmptyState();
updateObjectCountBadge();
isLoadingObjects = false;
return;
}
objectsTableBody.innerHTML = '';
} }
// Store lightweight object metadata (no DOM elements!)
data.objects.forEach(obj => { data.objects.forEach(obj => {
const row = createObjectRow(obj);
objectsTableBody.appendChild(row);
loadedObjectCount++; loadedObjectCount++;
// Apply current filter to newly loaded objects
if (currentFilterTerm) {
const keyLower = obj.key.toLowerCase();
row.style.display = keyLower.includes(currentFilterTerm) ? '' : 'none';
}
allObjects.push({ allObjects.push({
key: obj.key, key: obj.key,
size: obj.size, size: obj.size,
lastModified: obj.last_modified, lastModified: obj.last_modified,
lastModifiedDisplay: obj.last_modified_display,
etag: obj.etag, etag: obj.etag,
previewUrl: obj.preview_url, previewUrl: obj.preview_url,
downloadUrl: obj.download_url, downloadUrl: obj.download_url,
@@ -2062,86 +2223,28 @@
deleteEndpoint: obj.delete_endpoint, deleteEndpoint: obj.delete_endpoint,
metadata: JSON.stringify(obj.metadata || {}), metadata: JSON.stringify(obj.metadata || {}),
versionsEndpoint: obj.versions_endpoint, versionsEndpoint: obj.versions_endpoint,
restoreTemplate: obj.restore_template, restoreTemplate: obj.restore_template
element: row
}); });
}); });
updateObjectCountBadge(); updateObjectCountBadge();
// Track if there are more objects to load
hasMoreObjects = data.is_truncated; hasMoreObjects = data.is_truncated;
if (loadMoreStatus) { if (loadMoreStatus) {
if (data.is_truncated) { if (data.is_truncated) {
loadMoreStatus.textContent = `${loadedObjectCount.toLocaleString()} of ${totalObjectCount.toLocaleString()} objects loaded`; loadMoreStatus.textContent = `${loadedObjectCount.toLocaleString()} of ${totalObjectCount.toLocaleString()} loaded`;
} else { } else {
loadMoreStatus.textContent = `All ${loadedObjectCount.toLocaleString()} objects loaded`; loadMoreStatus.textContent = `${loadedObjectCount.toLocaleString()} objects`;
} }
} }
// Update Load More button visibility
if (typeof updateLoadMoreButton === 'function') { if (typeof updateLoadMoreButton === 'function') {
updateLoadMoreButton(); updateLoadMoreButton();
} }
// Track the count of items in current folder before re-rendering // Refresh virtual scroll view
let prevFolderItemCount = 0; refreshVirtualList();
if (currentPrefix && append) { renderBreadcrumb(currentPrefix);
const prevState = getFoldersAtPrefix(currentPrefix);
prevFolderItemCount = prevState.folders.length + prevState.files.length;
}
if (typeof initFolderNavigation === 'function') {
initFolderNavigation();
}
attachRowHandlers();
// If we're in a nested folder and loaded more objects, scroll to show newly loaded content
if (currentPrefix && append) {
const newState = getFoldersAtPrefix(currentPrefix);
const newFolderItemCount = newState.folders.length + newState.files.length;
const addedCount = newFolderItemCount - prevFolderItemCount;
if (addedCount > 0) {
// Show a brief notification about newly loaded items in the current folder
const folderViewStatusEl = document.getElementById('folder-view-status');
if (folderViewStatusEl) {
folderViewStatusEl.innerHTML = `<span class="text-success fw-medium">+${addedCount} new item${addedCount !== 1 ? 's' : ''} loaded in this folder</span>`;
folderViewStatusEl.classList.remove('d-none');
// Reset to normal status after 3 seconds
setTimeout(() => {
if (typeof updateFolderViewStatus === 'function') {
updateFolderViewStatus();
}
}, 3000);
}
// Scroll to show the first newly added item
const allRows = objectsTableBody.querySelectorAll('tr:not([style*="display: none"])');
if (allRows.length > prevFolderItemCount) {
const firstNewRow = allRows[prevFolderItemCount];
if (firstNewRow) {
firstNewRow.scrollIntoView({ behavior: 'smooth', block: 'center' });
// Briefly highlight the new rows
for (let i = prevFolderItemCount; i < allRows.length; i++) {
allRows[i].classList.add('table-info');
setTimeout(() => {
allRows[i].classList.remove('table-info');
}, 2000);
}
}
}
} else if (hasMoreObjects) {
// Objects were loaded but none were in the current folder - show a hint
const folderViewStatusEl = document.getElementById('folder-view-status');
if (folderViewStatusEl) {
folderViewStatusEl.innerHTML = `<span class="text-muted">Loaded more objects (not in this folder). <button type="button" class="btn btn-link btn-sm p-0" onclick="navigateToFolder('')">Go to root</button> to see all.</span>`;
folderViewStatusEl.classList.remove('d-none');
}
}
}
} catch (error) { } catch (error) {
console.error('Failed to load objects:', error); console.error('Failed to load objects:', error);
@@ -2152,7 +2255,6 @@
} }
} finally { } finally {
isLoadingObjects = false; isLoadingObjects = false;
// Hide loading spinner
if (loadMoreSpinner) { if (loadMoreSpinner) {
loadMoreSpinner.classList.add('d-none'); loadMoreSpinner.classList.add('d-none');
} }
@@ -2160,16 +2262,15 @@
}; };
const attachRowHandlers = () => { const attachRowHandlers = () => {
// Attach handlers to object rows
const objectRows = document.querySelectorAll('[data-object-row]'); const objectRows = document.querySelectorAll('[data-object-row]');
objectRows.forEach(row => { objectRows.forEach(row => {
if (row.dataset.handlersAttached) return; if (row.dataset.handlersAttached) return;
row.dataset.handlersAttached = 'true'; row.dataset.handlersAttached = 'true';
const deleteBtn = row.querySelector('[data-delete-object]'); const deleteBtn = row.querySelector('[data-delete-object]');
deleteBtn?.addEventListener('click', (e) => { deleteBtn?.addEventListener('click', (e) => {
e.stopPropagation(); e.stopPropagation();
const deleteModalEl = document.getElementById('deleteObjectModal'); const deleteModalEl = document.getElementById('deleteObjectModal');
const deleteModal = deleteModalEl ? bootstrap.Modal.getOrCreateInstance(deleteModalEl) : null; const deleteModal = deleteModalEl ? bootstrap.Modal.getOrCreateInstance(deleteModalEl) : null;
const deleteObjectForm = document.getElementById('deleteObjectForm'); const deleteObjectForm = document.getElementById('deleteObjectForm');
@@ -2186,17 +2287,63 @@
selectCheckbox?.addEventListener('change', () => { selectCheckbox?.addEventListener('change', () => {
toggleRowSelection(row, selectCheckbox.checked); toggleRowSelection(row, selectCheckbox.checked);
}); });
// Restore selection state
if (selectedRows.has(row.dataset.key)) {
selectCheckbox.checked = true;
row.classList.add('table-active');
}
});
// Attach handlers to folder rows
const folderRows = document.querySelectorAll('.folder-row');
folderRows.forEach(row => {
if (row.dataset.handlersAttached) return;
row.dataset.handlersAttached = 'true';
const folderPath = row.dataset.folderPath;
const checkbox = row.querySelector('[data-folder-select]');
checkbox?.addEventListener('change', (e) => {
e.stopPropagation();
// Select all objects in this folder
const folderObjects = allObjects.filter(obj => obj.key.startsWith(folderPath));
folderObjects.forEach(obj => {
if (checkbox.checked) {
selectedRows.set(obj.key, obj);
} else {
selectedRows.delete(obj.key);
}
});
updateBulkDeleteState();
});
const folderBtn = row.querySelector('button');
folderBtn?.addEventListener('click', (e) => {
e.stopPropagation();
navigateToFolder(folderPath);
});
row.addEventListener('click', (e) => {
if (e.target.closest('[data-folder-select]') || e.target.closest('button')) return;
navigateToFolder(folderPath);
});
}); });
updateBulkDeleteState(); updateBulkDeleteState();
}; };
// Infinite scroll: use IntersectionObserver to auto-load more objects // Scroll container reference (needed for virtual scrolling)
const scrollSentinel = document.getElementById('scroll-sentinel'); const scrollSentinel = document.getElementById('scroll-sentinel');
const scrollContainer = document.querySelector('.objects-table-container'); const scrollContainer = document.querySelector('.objects-table-container');
const loadMoreBtn = document.getElementById('load-more-btn'); const loadMoreBtn = document.getElementById('load-more-btn');
// Load More button click handler (fallback for mobile) // Virtual scroll: listen to scroll events
if (scrollContainer) {
scrollContainer.addEventListener('scroll', handleVirtualScroll, { passive: true });
}
// Load More button click handler (fallback)
loadMoreBtn?.addEventListener('click', () => { loadMoreBtn?.addEventListener('click', () => {
if (hasMoreObjects && !isLoadingObjects) { if (hasMoreObjects && !isLoadingObjects) {
loadObjects(true); loadObjects(true);
@@ -2210,8 +2357,8 @@
} }
} }
// Auto-load more when near bottom (for loading all data)
if (scrollSentinel && scrollContainer) { if (scrollSentinel && scrollContainer) {
// Observer for scrolling within the container (desktop)
const containerObserver = new IntersectionObserver((entries) => { const containerObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => { entries.forEach(entry => {
if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) { if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) {
@@ -2220,12 +2367,11 @@
}); });
}, { }, {
root: scrollContainer, root: scrollContainer,
rootMargin: '100px', rootMargin: '500px', // Load more earlier for smoother experience
threshold: 0 threshold: 0
}); });
containerObserver.observe(scrollSentinel); containerObserver.observe(scrollSentinel);
// Observer for page scrolling (mobile - when container is not scrollable)
const viewportObserver = new IntersectionObserver((entries) => { const viewportObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => { entries.forEach(entry => {
if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) { if (entry.isIntersecting && hasMoreObjects && !isLoadingObjects) {
@@ -2233,14 +2379,14 @@
} }
}); });
}, { }, {
root: null, // viewport root: null,
rootMargin: '200px', rootMargin: '500px',
threshold: 0 threshold: 0
}); });
viewportObserver.observe(scrollSentinel); viewportObserver.observe(scrollSentinel);
} }
// Page size selector // Page size selector (now controls batch size)
const pageSizeSelect = document.getElementById('page-size-select'); const pageSizeSelect = document.getElementById('page-size-select');
pageSizeSelect?.addEventListener('change', (e) => { pageSizeSelect?.addEventListener('change', (e) => {
pageSize = parseInt(e.target.value, 10); pageSize = parseInt(e.target.value, 10);
@@ -2252,7 +2398,6 @@
const folderBreadcrumb = document.getElementById('folder-breadcrumb'); const folderBreadcrumb = document.getElementById('folder-breadcrumb');
const objectsTableBody = document.querySelector('#objects-table tbody'); const objectsTableBody = document.querySelector('#objects-table tbody');
let currentPrefix = '';
if (objectsTableBody) { if (objectsTableBody) {
objectsTableBody.addEventListener('click', (e) => { objectsTableBody.addEventListener('click', (e) => {
@@ -2369,8 +2514,8 @@
return allObjects.filter(obj => obj.key.startsWith(folderPrefix)); return allObjects.filter(obj => obj.key.startsWith(folderPrefix));
}; };
const createFolderRow = (folderPath) => { const createFolderRow = (folderPath, displayName = null) => {
const folderName = folderPath.slice(currentPrefix.length).replace(/\/$/, ''); const folderName = displayName || folderPath.slice(currentPrefix.length).replace(/\/$/, '');
const { count: objectCount, mayHaveMore } = countObjectsInFolder(folderPath); const { count: objectCount, mayHaveMore } = countObjectsInFolder(folderPath);
const countDisplay = mayHaveMore ? `${objectCount}+` : objectCount; const countDisplay = mayHaveMore ? `${objectCount}+` : objectCount;
@@ -2403,38 +2548,20 @@
</button> </button>
</td> </td>
`; `;
const checkbox = tr.querySelector('[data-folder-select]');
checkbox?.addEventListener('change', (e) => {
e.stopPropagation();
const folderObjects = getObjectsInFolder(folderPath);
folderObjects.forEach(obj => {
const objCheckbox = obj.element.querySelector('[data-object-select]');
if (objCheckbox) {
objCheckbox.checked = checkbox.checked;
}
toggleRowSelection(obj.element, checkbox.checked);
});
});
const folderBtn = tr.querySelector('button');
folderBtn?.addEventListener('click', (e) => {
e.stopPropagation();
navigateToFolder(folderPath);
});
tr.addEventListener('click', (e) => {
if (e.target.closest('[data-folder-select]') || e.target.closest('button')) return;
navigateToFolder(folderPath);
});
return tr; return tr;
}; };
// Instant client-side folder navigation (no server round-trip!)
const navigateToFolder = (prefix) => { const navigateToFolder = (prefix) => {
currentPrefix = prefix; currentPrefix = prefix;
// Scroll to top when navigating
if (scrollContainer) scrollContainer.scrollTop = 0;
// Instant re-render from already-loaded data
refreshVirtualList();
renderBreadcrumb(prefix); renderBreadcrumb(prefix);
renderObjectsView();
selectedRows.clear(); selectedRows.clear();
@@ -2442,14 +2569,6 @@
updateBulkDeleteState(); updateBulkDeleteState();
} }
if (typeof updateFolderViewStatus === 'function') {
updateFolderViewStatus();
}
if (typeof updateFilterWarning === 'function') {
updateFilterWarning();
}
if (previewPanel) previewPanel.classList.add('d-none'); if (previewPanel) previewPanel.classList.add('d-none');
if (previewEmpty) previewEmpty.classList.remove('d-none'); if (previewEmpty) previewEmpty.classList.remove('d-none');
activeRow = null; activeRow = null;
@@ -2651,12 +2770,10 @@
bulkDeleteConfirm.disabled = selectedCount === 0 || bulkDeleting; bulkDeleteConfirm.disabled = selectedCount === 0 || bulkDeleting;
} }
if (selectAllCheckbox) { if (selectAllCheckbox) {
// With virtual scrolling, count files in current folder from visibleItems
const visibleRowsRaw = hasFolders() const filesInView = visibleItems.filter(item => item.type === 'file');
? allObjects.filter(obj => obj.key.startsWith(currentPrefix) && !obj.key.slice(currentPrefix.length).includes('/')).map(obj => obj.element) const total = filesInView.length;
: Array.from(document.querySelectorAll('[data-object-row]')); const visibleSelectedCount = filesInView.filter(item => selectedRows.has(item.data.key)).length;
const total = visibleRowsRaw.filter(r => r.style.display !== 'none').length;
const visibleSelectedCount = visibleRowsRaw.filter(r => r.style.display !== 'none' && selectedRows.has(r.dataset.key)).length;
selectAllCheckbox.disabled = total === 0; selectAllCheckbox.disabled = total === 0;
selectAllCheckbox.checked = visibleSelectedCount > 0 && visibleSelectedCount === total && total > 0; selectAllCheckbox.checked = visibleSelectedCount > 0 && visibleSelectedCount === total && total > 0;
selectAllCheckbox.indeterminate = visibleSelectedCount > 0 && visibleSelectedCount < total; selectAllCheckbox.indeterminate = visibleSelectedCount > 0 && visibleSelectedCount < total;
@@ -3287,28 +3404,6 @@
} }
}; };
const updateFolderViewStatus = () => {
if (!folderViewStatus || !loadMoreStatus) return;
if (currentPrefix) {
const { folders, files } = getFoldersAtPrefix(currentPrefix);
const visibleCount = folders.length + files.length;
const folderObjectCount = allObjects.filter(obj => obj.key.startsWith(currentPrefix)).length;
const folderMayHaveMore = hasMoreObjects && folderObjectCount > 0;
if (folderMayHaveMore) {
folderViewStatus.textContent = `Showing ${visibleCount} items in folder • more may be available`;
folderViewStatus.classList.remove('d-none');
loadMoreStatus.classList.add('d-none');
} else {
folderViewStatus.textContent = `${visibleCount} items in folder`;
folderViewStatus.classList.remove('d-none');
loadMoreStatus.classList.add('d-none');
}
} else {
folderViewStatus.classList.add('d-none');
loadMoreStatus.classList.remove('d-none');
}
};
document.getElementById('object-search')?.addEventListener('input', (event) => { document.getElementById('object-search')?.addEventListener('input', (event) => {
currentFilterTerm = event.target.value.toLowerCase(); currentFilterTerm = event.target.value.toLowerCase();
updateFilterWarning(); updateFilterWarning();