Release v0.1.0 Beta

This commit is contained in:
2025-11-21 22:01:34 +08:00
commit f400cedf02
40 changed files with 10720 additions and 0 deletions

215
app/__init__.py Normal file
View File

@@ -0,0 +1,215 @@
"""Application factory for the mini S3-compatible object store."""
from __future__ import annotations
import logging
import time
import uuid
from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import timedelta
from typing import Any, Dict, Optional
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
from flask_cors import CORS
from flask_wtf.csrf import CSRFError
from .bucket_policies import BucketPolicyStore
from .config import AppConfig
from .connections import ConnectionStore
from .extensions import limiter, csrf
from .iam import IamService
from .replication import ReplicationManager
from .secret_store import EphemeralSecretStore
from .storage import ObjectStorage
from .version import get_version
def create_app(
test_config: Optional[Dict[str, Any]] = None,
*,
include_api: bool = True,
include_ui: bool = True,
) -> Flask:
"""Create and configure the Flask application."""
config = AppConfig.from_env(test_config)
project_root = Path(__file__).resolve().parent.parent
app = Flask(
__name__,
static_folder=str(project_root / "static"),
template_folder=str(project_root / "templates"),
)
app.config.update(config.to_flask_config())
if test_config:
app.config.update(test_config)
app.config.setdefault("APP_VERSION", get_version())
app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30)))
if app.config.get("TESTING"):
app.config.setdefault("WTF_CSRF_ENABLED", False)
_configure_cors(app)
_configure_logging(app)
limiter.init_app(app)
csrf.init_app(app)
storage = ObjectStorage(Path(app.config["STORAGE_ROOT"]))
iam = IamService(
Path(app.config["IAM_CONFIG"]),
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
)
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
# Initialize Replication components
connections_path = Path(app.config["STORAGE_ROOT"]) / ".connections.json"
replication_rules_path = Path(app.config["STORAGE_ROOT"]) / ".replication_rules.json"
connections = ConnectionStore(connections_path)
replication = ReplicationManager(storage, connections, replication_rules_path)
app.extensions["object_storage"] = storage
app.extensions["iam"] = iam
app.extensions["bucket_policies"] = bucket_policies
app.extensions["secret_store"] = secret_store
app.extensions["limiter"] = limiter
app.extensions["connections"] = connections
app.extensions["replication"] = replication
@app.errorhandler(500)
def internal_error(error):
return render_template('500.html'), 500
@app.errorhandler(CSRFError)
def handle_csrf_error(e):
return render_template('csrf_error.html', reason=e.description), 400
@app.template_filter("filesizeformat")
def filesizeformat(value: int) -> str:
"""Format bytes as human-readable file size."""
for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
if abs(value) < 1024.0 or unit == "PB":
if unit == "B":
return f"{int(value)} {unit}"
return f"{value:.1f} {unit}"
value /= 1024.0
return f"{value:.1f} PB"
if include_api:
from .s3_api import s3_api_bp
app.register_blueprint(s3_api_bp)
csrf.exempt(s3_api_bp)
if include_ui:
from .ui import ui_bp
app.register_blueprint(ui_bp)
if not include_api:
@app.get("/")
def ui_root_redirect():
return redirect(url_for("ui.buckets_overview"))
@app.errorhandler(404)
def handle_not_found(error):
wants_html = request.accept_mimetypes.accept_html
path = request.path or ""
if include_ui and wants_html:
if not include_api or path.startswith("/ui") or path == "/":
return render_template("404.html"), 404
return error
@app.get("/healthz")
def healthcheck() -> Dict[str, str]:
return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")}
return app
def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
return create_app(test_config, include_api=True, include_ui=False)
def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
return create_app(test_config, include_api=False, include_ui=True)
def _configure_cors(app: Flask) -> None:
origins = app.config.get("CORS_ORIGINS", ["*"])
methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS"])
allow_headers = app.config.get(
"CORS_ALLOW_HEADERS",
["Content-Type", "X-Access-Key", "X-Secret-Key", "X-Amz-Date", "X-Amz-SignedHeaders"],
)
CORS(
app,
resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers}},
supports_credentials=True,
)
class _RequestContextFilter(logging.Filter):
"""Inject request-specific attributes into log records."""
def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - simple boilerplate
if has_request_context():
record.request_id = getattr(g, "request_id", "-")
record.path = request.path
record.method = request.method
record.remote_addr = request.remote_addr or "-"
else:
record.request_id = getattr(record, "request_id", "-")
record.path = getattr(record, "path", "-")
record.method = getattr(record, "method", "-")
record.remote_addr = getattr(record, "remote_addr", "-")
return True
def _configure_logging(app: Flask) -> None:
log_file = Path(app.config["LOG_FILE"])
log_file.parent.mkdir(parents=True, exist_ok=True)
handler = RotatingFileHandler(
log_file,
maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)),
backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)),
encoding="utf-8",
)
formatter = logging.Formatter(
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
)
handler.setFormatter(formatter)
handler.addFilter(_RequestContextFilter())
logger = app.logger
logger.handlers.clear()
logger.addHandler(handler)
logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO))
@app.before_request
def _log_request_start() -> None:
g.request_id = uuid.uuid4().hex
g.request_started_at = time.perf_counter()
app.logger.info(
"Request started",
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
)
@app.after_request
def _log_request_end(response):
duration_ms = 0.0
if hasattr(g, "request_started_at"):
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
request_id = getattr(g, "request_id", uuid.uuid4().hex)
response.headers.setdefault("X-Request-ID", request_id)
app.logger.info(
"Request completed",
extra={
"path": request.path,
"method": request.method,
"remote_addr": request.remote_addr,
},
)
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
response.headers["Server"] = "MyFISO"
return response

249
app/bucket_policies.py Normal file
View File

@@ -0,0 +1,249 @@
"""Bucket policy loader/enforcer with a subset of AWS semantics."""
from __future__ import annotations
import json
from dataclasses import dataclass
from fnmatch import fnmatch
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence
RESOURCE_PREFIX = "arn:aws:s3:::"
ACTION_ALIASES = {
"s3:getobject": "read",
"s3:getobjectversion": "read",
"s3:listbucket": "list",
"s3:listallmybuckets": "list",
"s3:putobject": "write",
"s3:createbucket": "write",
"s3:deleteobject": "delete",
"s3:deleteobjectversion": "delete",
"s3:deletebucket": "delete",
"s3:putobjectacl": "share",
"s3:putbucketpolicy": "policy",
}
def _normalize_action(action: str) -> str:
action = action.strip().lower()
if action == "*":
return "*"
return ACTION_ALIASES.get(action, action)
def _normalize_actions(actions: Iterable[str]) -> List[str]:
values: List[str] = []
for action in actions:
canonical = _normalize_action(action)
if canonical == "*" and "*" not in values:
return ["*"]
if canonical and canonical not in values:
values.append(canonical)
return values
def _normalize_principals(principal_field: Any) -> List[str] | str:
if principal_field == "*":
return "*"
def _collect(values: Any) -> List[str]:
if values is None:
return []
if values == "*":
return ["*"]
if isinstance(values, str):
return [values]
if isinstance(values, dict):
aggregated: List[str] = []
for nested in values.values():
chunk = _collect(nested)
if "*" in chunk:
return ["*"]
aggregated.extend(chunk)
return aggregated
if isinstance(values, Iterable):
aggregated = []
for nested in values:
chunk = _collect(nested)
if "*" in chunk:
return ["*"]
aggregated.extend(chunk)
return aggregated
return [str(values)]
normalized: List[str] = []
for entry in _collect(principal_field):
token = str(entry).strip()
if token == "*":
return "*"
if token and token not in normalized:
normalized.append(token)
return normalized or "*"
def _parse_resource(resource: str) -> tuple[str | None, str | None]:
if not resource.startswith(RESOURCE_PREFIX):
return None, None
remainder = resource[len(RESOURCE_PREFIX) :]
if "/" not in remainder:
bucket = remainder or "*"
return bucket, None
bucket, _, key_pattern = remainder.partition("/")
return bucket or "*", key_pattern or "*"
@dataclass
class BucketPolicyStatement:
sid: Optional[str]
effect: str
principals: List[str] | str
actions: List[str]
resources: List[tuple[str | None, str | None]]
def matches_principal(self, access_key: Optional[str]) -> bool:
if self.principals == "*":
return True
if access_key is None:
return False
return access_key in self.principals
def matches_action(self, action: str) -> bool:
action = _normalize_action(action)
return "*" in self.actions or action in self.actions
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
bucket = (bucket or "*").lower()
key = object_key or ""
for resource_bucket, key_pattern in self.resources:
resource_bucket = (resource_bucket or "*").lower()
if resource_bucket not in {"*", bucket}:
continue
if key_pattern is None:
if not key:
return True
continue
if fnmatch(key, key_pattern):
return True
return False
class BucketPolicyStore:
"""Loads bucket policies from disk and evaluates statements."""
def __init__(self, policy_path: Path) -> None:
self.policy_path = Path(policy_path)
self.policy_path.parent.mkdir(parents=True, exist_ok=True)
if not self.policy_path.exists():
self.policy_path.write_text(json.dumps({"policies": {}}, indent=2))
self._raw: Dict[str, Any] = {}
self._policies: Dict[str, List[BucketPolicyStatement]] = {}
self._load()
self._last_mtime = self._current_mtime()
def maybe_reload(self) -> None:
current = self._current_mtime()
if current is None or current == self._last_mtime:
return
self._load()
self._last_mtime = current
def _current_mtime(self) -> float | None:
try:
return self.policy_path.stat().st_mtime
except FileNotFoundError:
return None
# ------------------------------------------------------------------
def evaluate(
self,
access_key: Optional[str],
bucket: Optional[str],
object_key: Optional[str],
action: str,
) -> str | None:
bucket = (bucket or "").lower()
statements = self._policies.get(bucket) or []
decision: Optional[str] = None
for statement in statements:
if not statement.matches_principal(access_key):
continue
if not statement.matches_action(action):
continue
if not statement.matches_resource(bucket, object_key):
continue
if statement.effect == "deny":
return "deny"
decision = "allow"
return decision
def get_policy(self, bucket: str) -> Dict[str, Any] | None:
return self._raw.get(bucket.lower())
def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None:
bucket = bucket.lower()
statements = self._normalize_policy(policy_payload)
if not statements:
raise ValueError("Policy must include at least one valid statement")
self._raw[bucket] = policy_payload
self._policies[bucket] = statements
self._persist()
def delete_policy(self, bucket: str) -> None:
bucket = bucket.lower()
self._raw.pop(bucket, None)
self._policies.pop(bucket, None)
self._persist()
# ------------------------------------------------------------------
def _load(self) -> None:
try:
content = self.policy_path.read_text(encoding='utf-8')
raw_payload = json.loads(content)
except FileNotFoundError:
raw_payload = {"policies": {}}
except json.JSONDecodeError as e:
raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}")
except PermissionError as e:
raise ValueError(f"Cannot read bucket policy file (permission denied): {e}")
except (OSError, ValueError) as e:
raise ValueError(f"Failed to load bucket policies: {e}")
policies: Dict[str, Any] = raw_payload.get("policies", {})
parsed: Dict[str, List[BucketPolicyStatement]] = {}
for bucket, policy in policies.items():
parsed[bucket.lower()] = self._normalize_policy(policy)
self._raw = {bucket.lower(): policy for bucket, policy in policies.items()}
self._policies = parsed
def _persist(self) -> None:
payload = {"policies": self._raw}
self.policy_path.write_text(json.dumps(payload, indent=2))
def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]:
statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", [])
statements: List[BucketPolicyStatement] = []
for statement in statements_raw:
actions = _normalize_actions(statement.get("Action", []))
principals = _normalize_principals(statement.get("Principal", "*"))
resources_field = statement.get("Resource", [])
if isinstance(resources_field, str):
resources_field = [resources_field]
resources: List[tuple[str | None, str | None]] = []
for resource in resources_field:
bucket, pattern = _parse_resource(str(resource))
if bucket:
resources.append((bucket, pattern))
if not resources:
continue
effect = statement.get("Effect", "Allow").lower()
statements.append(
BucketPolicyStatement(
sid=statement.get("Sid"),
effect=effect,
principals=principals,
actions=actions or ["*"],
resources=resources,
)
)
return statements

192
app/config.py Normal file
View File

@@ -0,0 +1,192 @@
"""Configuration helpers for the S3 clone application."""
from __future__ import annotations
import os
import secrets
import shutil
import warnings
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional
PROJECT_ROOT = Path(__file__).resolve().parent.parent
def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path:
"""Ensure config directories exist and migrate legacy files when possible."""
active_path = Path(active_path)
active_path.parent.mkdir(parents=True, exist_ok=True)
if legacy_path:
legacy_path = Path(legacy_path)
if not active_path.exists() and legacy_path.exists():
legacy_path.parent.mkdir(parents=True, exist_ok=True)
try:
shutil.move(str(legacy_path), str(active_path))
except OSError:
shutil.copy2(legacy_path, active_path)
try:
legacy_path.unlink(missing_ok=True)
except OSError:
pass
return active_path
@dataclass
class AppConfig:
storage_root: Path
max_upload_size: int
ui_page_size: int
secret_key: str
iam_config_path: Path
bucket_policy_path: Path
api_base_url: str
aws_region: str
aws_service: str
ui_enforce_bucket_policies: bool
log_level: str
log_path: Path
log_max_bytes: int
log_backup_count: int
ratelimit_default: str
ratelimit_storage_uri: str
cors_origins: list[str]
cors_methods: list[str]
cors_allow_headers: list[str]
session_lifetime_days: int
auth_max_attempts: int
auth_lockout_minutes: int
bulk_delete_max_keys: int
secret_ttl_seconds: int
stream_chunk_size: int
multipart_min_part_size: int
@classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
overrides = overrides or {}
def _get(name: str, default: Any) -> Any:
return overrides.get(name, os.getenv(name, default))
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500))
secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
default_secret = "dev-secret-key"
secret_key = str(_get("SECRET_KEY", default_secret))
if not secret_key or secret_key == default_secret:
generated = secrets.token_urlsafe(32)
if secret_key == default_secret:
warnings.warn("Using insecure default SECRET_KEY. A random value has been generated; set SECRET_KEY for production", RuntimeWarning)
secret_key = generated
iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ
bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ
default_iam_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "iam.json"
default_bucket_policy_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "bucket_policies.json"
iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve()
bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve()
iam_config_path = _prepare_config_file(
iam_config_path,
legacy_path=None if iam_env_override else PROJECT_ROOT / "data" / "iam.json",
)
bucket_policy_path = _prepare_config_file(
bucket_policy_path,
legacy_path=None if bucket_policy_override else PROJECT_ROOT / "data" / "bucket_policies.json",
)
api_base_url = str(_get("API_BASE_URL", "http://127.0.0.1:5000"))
aws_region = str(_get("AWS_REGION", "us-east-1"))
aws_service = str(_get("AWS_SERVICE", "s3"))
enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"}
log_level = str(_get("LOG_LEVEL", "INFO")).upper()
log_dir = Path(_get("LOG_DIR", PROJECT_ROOT / "logs")).resolve()
log_dir.mkdir(parents=True, exist_ok=True)
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
def _csv(value: str, default: list[str]) -> list[str]:
if not value:
return default
parts = [segment.strip() for segment in value.split(",") if segment.strip()]
return parts or default
cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"])
cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS")), ["GET", "PUT", "POST", "DELETE", "OPTIONS"])
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "Content-Type,X-Access-Key,X-Secret-Key,X-Amz-Algorithm,X-Amz-Credential,X-Amz-Date,X-Amz-Expires,X-Amz-SignedHeaders,X-Amz-Signature")), [
"Content-Type",
"X-Access-Key",
"X-Secret-Key",
"X-Amz-Algorithm",
"X-Amz-Credential",
"X-Amz-Date",
"X-Amz-Expires",
"X-Amz-SignedHeaders",
"X-Amz-Signature",
])
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
return cls(storage_root=storage_root,
max_upload_size=max_upload_size,
ui_page_size=ui_page_size,
secret_key=secret_key,
iam_config_path=iam_config_path,
bucket_policy_path=bucket_policy_path,
api_base_url=api_base_url,
aws_region=aws_region,
aws_service=aws_service,
ui_enforce_bucket_policies=enforce_ui_policies,
log_level=log_level,
log_path=log_path,
log_max_bytes=log_max_bytes,
log_backup_count=log_backup_count,
ratelimit_default=ratelimit_default,
ratelimit_storage_uri=ratelimit_storage_uri,
cors_origins=cors_origins,
cors_methods=cors_methods,
cors_allow_headers=cors_allow_headers,
session_lifetime_days=session_lifetime_days,
auth_max_attempts=auth_max_attempts,
auth_lockout_minutes=auth_lockout_minutes,
bulk_delete_max_keys=bulk_delete_max_keys,
secret_ttl_seconds=secret_ttl_seconds,
stream_chunk_size=stream_chunk_size,
multipart_min_part_size=multipart_min_part_size)
def to_flask_config(self) -> Dict[str, Any]:
return {
"STORAGE_ROOT": str(self.storage_root),
"MAX_CONTENT_LENGTH": self.max_upload_size,
"UI_PAGE_SIZE": self.ui_page_size,
"SECRET_KEY": self.secret_key,
"IAM_CONFIG": str(self.iam_config_path),
"BUCKET_POLICY_PATH": str(self.bucket_policy_path),
"API_BASE_URL": self.api_base_url,
"AWS_REGION": self.aws_region,
"AWS_SERVICE": self.aws_service,
"UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies,
"AUTH_MAX_ATTEMPTS": self.auth_max_attempts,
"AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes,
"BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys,
"SECRET_TTL_SECONDS": self.secret_ttl_seconds,
"STREAM_CHUNK_SIZE": self.stream_chunk_size,
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
"LOG_LEVEL": self.log_level,
"LOG_FILE": str(self.log_path),
"LOG_MAX_BYTES": self.log_max_bytes,
"LOG_BACKUP_COUNT": self.log_backup_count,
"RATELIMIT_DEFAULT": self.ratelimit_default,
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
"CORS_ORIGINS": self.cors_origins,
"CORS_METHODS": self.cors_methods,
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
"SESSION_LIFETIME_DAYS": self.session_lifetime_days,
}

61
app/connections.py Normal file
View File

@@ -0,0 +1,61 @@
"""Manage remote S3 connections."""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Dict, List, Optional
from .config import AppConfig
@dataclass
class RemoteConnection:
id: str
name: str
endpoint_url: str
access_key: str
secret_key: str
region: str = "us-east-1"
class ConnectionStore:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._connections: Dict[str, RemoteConnection] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._connections = {}
return
try:
with open(self.config_path, "r") as f:
data = json.load(f)
for item in data:
conn = RemoteConnection(**item)
self._connections[conn.id] = conn
except (OSError, json.JSONDecodeError):
self._connections = {}
def save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
data = [asdict(conn) for conn in self._connections.values()]
with open(self.config_path, "w") as f:
json.dump(data, f, indent=2)
def list(self) -> List[RemoteConnection]:
return list(self._connections.values())
def get(self, connection_id: str) -> Optional[RemoteConnection]:
return self._connections.get(connection_id)
def add(self, connection: RemoteConnection) -> None:
self._connections[connection.id] = connection
self.save()
def delete(self, connection_id: str) -> None:
if connection_id in self._connections:
del self._connections[connection_id]
self.save()

10
app/extensions.py Normal file
View File

@@ -0,0 +1,10 @@
"""Application-wide extension instances."""
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from flask_wtf import CSRFProtect
# Shared rate limiter instance; configured in app factory.
limiter = Limiter(key_func=get_remote_address)
# Global CSRF protection for UI routes.
csrf = CSRFProtect()

404
app/iam.py Normal file
View File

@@ -0,0 +1,404 @@
"""Lightweight IAM-style user and policy management."""
from __future__ import annotations
import json
import math
import secrets
from collections import deque
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set
class IamError(RuntimeError):
"""Raised when authentication or authorization fails."""
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy"}
IAM_ACTIONS = {
"iam:list_users",
"iam:create_user",
"iam:delete_user",
"iam:rotate_key",
"iam:update_policy",
}
ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"}
ACTION_ALIASES = {
"list": "list",
"s3:listbucket": "list",
"s3:listallmybuckets": "list",
"read": "read",
"s3:getobject": "read",
"s3:getobjectversion": "read",
"write": "write",
"s3:putobject": "write",
"s3:createbucket": "write",
"delete": "delete",
"s3:deleteobject": "delete",
"s3:deletebucket": "delete",
"share": "share",
"s3:putobjectacl": "share",
"policy": "policy",
"s3:putbucketpolicy": "policy",
"iam:listusers": "iam:list_users",
"iam:createuser": "iam:create_user",
"iam:deleteuser": "iam:delete_user",
"iam:rotateaccesskey": "iam:rotate_key",
"iam:putuserpolicy": "iam:update_policy",
"iam:*": "iam:*",
}
@dataclass
class Policy:
bucket: str
actions: Set[str]
@dataclass
class Principal:
access_key: str
display_name: str
policies: List[Policy]
class IamService:
"""Loads IAM configuration, manages users, and evaluates policies."""
def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None:
self.config_path = Path(config_path)
self.auth_max_attempts = auth_max_attempts
self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
self.config_path.parent.mkdir(parents=True, exist_ok=True)
if not self.config_path.exists():
self._write_default()
self._users: Dict[str, Dict[str, Any]] = {}
self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._load()
# ---------------------- authz helpers ----------------------
def authenticate(self, access_key: str, secret_key: str) -> Principal:
access_key = (access_key or "").strip()
secret_key = (secret_key or "").strip()
if not access_key or not secret_key:
raise IamError("Missing access credentials")
if self._is_locked_out(access_key):
seconds = self._seconds_until_unlock(access_key)
raise IamError(
f"Access temporarily locked. Try again in {seconds} seconds."
)
record = self._users.get(access_key)
if not record or record["secret_key"] != secret_key:
self._record_failed_attempt(access_key)
raise IamError("Invalid credentials")
self._clear_failed_attempts(access_key)
return self._build_principal(access_key, record)
def _record_failed_attempt(self, access_key: str) -> None:
if not access_key:
return
attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts)
attempts.append(datetime.now())
def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key:
return
self._failed_attempts.pop(access_key, None)
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now() - self.auth_lockout_window
while attempts and attempts[0] < cutoff:
attempts.popleft()
def _is_locked_out(self, access_key: str) -> bool:
if not access_key:
return False
attempts = self._failed_attempts.get(access_key)
if not attempts:
return False
self._prune_attempts(attempts)
return len(attempts) >= self.auth_max_attempts
def _seconds_until_unlock(self, access_key: str) -> int:
attempts = self._failed_attempts.get(access_key)
if not attempts:
return 0
self._prune_attempts(attempts)
if len(attempts) < self.auth_max_attempts:
return 0
oldest = attempts[0]
elapsed = (datetime.now() - oldest).total_seconds()
return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
def principal_for_key(self, access_key: str) -> Principal:
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
return self._build_principal(access_key, record)
def secret_for_key(self, access_key: str) -> str:
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
action = self._normalize_action(action)
if action not in ALLOWED_ACTIONS:
raise IamError(f"Unknown action '{action}'")
bucket_name = bucket_name or "*"
normalized = bucket_name.lower() if bucket_name != "*" else bucket_name
if not self._is_allowed(principal, normalized, action):
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
def _is_allowed(self, principal: Principal, bucket_name: str, action: str) -> bool:
bucket_name = bucket_name.lower()
for policy in principal.policies:
if policy.bucket not in {"*", bucket_name}:
continue
if "*" in policy.actions or action in policy.actions:
return True
if "iam:*" in policy.actions and action.startswith("iam:"):
return True
return False
# ---------------------- management helpers ----------------------
def list_users(self) -> List[Dict[str, Any]]:
listing: List[Dict[str, Any]] = []
for access_key, record in self._users.items():
listing.append(
{
"access_key": access_key,
"display_name": record["display_name"],
"policies": [
{"bucket": policy.bucket, "actions": sorted(policy.actions)}
for policy in record["policies"]
],
}
)
return listing
def create_user(
self,
*,
display_name: str,
policies: Optional[Sequence[Dict[str, Any]]] = None,
access_key: str | None = None,
secret_key: str | None = None,
) -> Dict[str, str]:
access_key = (access_key or self._generate_access_key()).strip()
if not access_key:
raise IamError("Access key cannot be empty")
if access_key in self._users:
raise IamError("Access key already exists")
secret_key = secret_key or self._generate_secret_key()
sanitized_policies = self._prepare_policy_payload(policies)
record = {
"access_key": access_key,
"secret_key": secret_key,
"display_name": display_name or access_key,
"policies": sanitized_policies,
}
self._raw_config.setdefault("users", []).append(record)
self._save()
self._load()
return {"access_key": access_key, "secret_key": secret_key}
def rotate_secret(self, access_key: str) -> str:
user = self._get_raw_user(access_key)
new_secret = self._generate_secret_key()
user["secret_key"] = new_secret
self._save()
self._load()
return new_secret
def update_user(self, access_key: str, display_name: str) -> None:
user = self._get_raw_user(access_key)
user["display_name"] = display_name
self._save()
self._load()
def delete_user(self, access_key: str) -> None:
users = self._raw_config.get("users", [])
if len(users) <= 1:
raise IamError("Cannot delete the only user")
remaining = [user for user in users if user["access_key"] != access_key]
if len(remaining) == len(users):
raise IamError("User not found")
self._raw_config["users"] = remaining
self._save()
self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
user = self._get_raw_user(access_key)
user["policies"] = self._prepare_policy_payload(policies)
self._save()
self._load()
# ---------------------- config helpers ----------------------
def _load(self) -> None:
try:
content = self.config_path.read_text(encoding='utf-8')
raw = json.loads(content)
except FileNotFoundError:
raise IamError(f"IAM config not found: {self.config_path}")
except json.JSONDecodeError as e:
raise IamError(f"Corrupted IAM config (invalid JSON): {e}")
except PermissionError as e:
raise IamError(f"Cannot read IAM config (permission denied): {e}")
except (OSError, ValueError) as e:
raise IamError(f"Failed to load IAM config: {e}")
users: Dict[str, Dict[str, Any]] = {}
for user in raw.get("users", []):
policies = self._build_policy_objects(user.get("policies", []))
users[user["access_key"]] = {
"secret_key": user["secret_key"],
"display_name": user.get("display_name", user["access_key"]),
"policies": policies,
}
if not users:
raise IamError("IAM configuration contains no users")
self._users = users
self._raw_config = {
"users": [
{
"access_key": entry["access_key"],
"secret_key": entry["secret_key"],
"display_name": entry.get("display_name", entry["access_key"]),
"policies": entry.get("policies", []),
}
for entry in raw.get("users", [])
]
}
def _save(self) -> None:
try:
temp_path = self.config_path.with_suffix('.json.tmp')
temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8')
temp_path.replace(self.config_path)
except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}")
# ---------------------- insight helpers ----------------------
def config_summary(self) -> Dict[str, Any]:
return {
"path": str(self.config_path),
"user_count": len(self._users),
"allowed_actions": sorted(ALLOWED_ACTIONS),
}
def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
payload: Dict[str, Any] = {"users": []}
for user in self._raw_config.get("users", []):
record = dict(user)
if mask_secrets and "secret_key" in record:
record["secret_key"] = "••••••••••"
payload["users"].append(record)
return payload
def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]:
entries: List[Policy] = []
for policy in policies:
bucket = str(policy.get("bucket", "*")).lower()
raw_actions = policy.get("actions", [])
if isinstance(raw_actions, str):
raw_actions = [raw_actions]
action_set: Set[str] = set()
for action in raw_actions:
canonical = self._normalize_action(action)
if canonical == "*":
action_set = set(ALLOWED_ACTIONS)
break
if canonical:
action_set.add(canonical)
if action_set:
entries.append(Policy(bucket=bucket, actions=action_set))
return entries
def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]:
if not policies:
policies = (
{
"bucket": "*",
"actions": ["list", "read", "write", "delete", "share", "policy"],
},
)
sanitized: List[Dict[str, Any]] = []
for policy in policies:
bucket = str(policy.get("bucket", "*")).lower()
raw_actions = policy.get("actions", [])
if isinstance(raw_actions, str):
raw_actions = [raw_actions]
action_set: Set[str] = set()
for action in raw_actions:
canonical = self._normalize_action(action)
if canonical == "*":
action_set = set(ALLOWED_ACTIONS)
break
if canonical:
action_set.add(canonical)
if not action_set:
continue
sanitized.append({"bucket": bucket, "actions": sorted(action_set)})
if not sanitized:
raise IamError("At least one policy with valid actions is required")
return sanitized
def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal:
return Principal(
access_key=access_key,
display_name=record["display_name"],
policies=record["policies"],
)
def _normalize_action(self, action: str) -> str:
if not action:
return ""
lowered = action.strip().lower()
if lowered == "*":
return "*"
candidate = ACTION_ALIASES.get(lowered, lowered)
return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None:
default = {
"users": [
{
"access_key": "localadmin",
"secret_key": "localadmin",
"display_name": "Local Admin",
"policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
],
}
]
}
self.config_path.write_text(json.dumps(default, indent=2))
def _generate_access_key(self) -> str:
return secrets.token_hex(8)
def _generate_secret_key(self) -> str:
return secrets.token_urlsafe(24)
def _get_raw_user(self, access_key: str) -> Dict[str, Any]:
for user in self._raw_config.get("users", []):
if user["access_key"] == access_key:
return user
raise IamError("User not found")
def get_secret_key(self, access_key: str) -> str | None:
record = self._users.get(access_key)
return record["secret_key"] if record else None
def get_principal(self, access_key: str) -> Principal | None:
record = self._users.get(access_key)
return self._build_principal(access_key, record) if record else None

121
app/replication.py Normal file
View File

@@ -0,0 +1,121 @@
"""Background replication worker."""
from __future__ import annotations
import logging
import threading
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Optional
import boto3
from botocore.exceptions import ClientError
from .connections import ConnectionStore, RemoteConnection
from .storage import ObjectStorage
logger = logging.getLogger(__name__)
@dataclass
class ReplicationRule:
bucket_name: str
target_connection_id: str
target_bucket: str
enabled: bool = True
class ReplicationManager:
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None:
self.storage = storage
self.connections = connections
self.rules_path = rules_path
self._rules: Dict[str, ReplicationRule] = {}
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self.reload_rules()
def reload_rules(self) -> None:
if not self.rules_path.exists():
self._rules = {}
return
try:
import json
with open(self.rules_path, "r") as f:
data = json.load(f)
for bucket, rule_data in data.items():
self._rules[bucket] = ReplicationRule(**rule_data)
except (OSError, ValueError) as e:
logger.error(f"Failed to load replication rules: {e}")
def save_rules(self) -> None:
import json
data = {b: rule.__dict__ for b, rule in self._rules.items()}
self.rules_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.rules_path, "w") as f:
json.dump(data, f, indent=2)
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
return self._rules.get(bucket_name)
def set_rule(self, rule: ReplicationRule) -> None:
self._rules[rule.bucket_name] = rule
self.save_rules()
def delete_rule(self, bucket_name: str) -> None:
if bucket_name in self._rules:
del self._rules[bucket_name]
self.save_rules()
def trigger_replication(self, bucket_name: str, object_key: str) -> None:
rule = self.get_rule(bucket_name)
if not rule or not rule.enabled:
return
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
return
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection)
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection) -> None:
try:
# 1. Get local file path
# Note: We are accessing internal storage structure here.
# Ideally storage.py should expose a 'get_file_path' or we read the stream.
# For efficiency, we'll try to read the file directly if we can, or use storage.get_object
# Using boto3 to upload
s3 = boto3.client(
"s3",
endpoint_url=conn.endpoint_url,
aws_access_key_id=conn.access_key,
aws_secret_access_key=conn.secret_key,
region_name=conn.region,
)
# We need the file content.
# Since ObjectStorage is filesystem based, let's get the stream.
# We need to be careful about closing it.
meta = self.storage.get_object_meta(bucket_name, object_key)
if not meta:
return
with self.storage.open_object(bucket_name, object_key) as f:
extra_args = {}
if meta.metadata:
extra_args["Metadata"] = meta.metadata
s3.upload_fileobj(
f,
rule.target_bucket,
object_key,
ExtraArgs=extra_args
)
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
except (ClientError, OSError, ValueError) as e:
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
except Exception:
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")

1245
app/s3_api.py Normal file

File diff suppressed because it is too large Load Diff

37
app/secret_store.py Normal file
View File

@@ -0,0 +1,37 @@
"""Ephemeral store for one-time secrets communicated to the UI."""
from __future__ import annotations
import secrets
import time
from typing import Any, Dict, Optional
class EphemeralSecretStore:
"""Keeps values in-memory for a short period and returns them once."""
def __init__(self, default_ttl: int = 300) -> None:
self._default_ttl = max(default_ttl, 1)
self._store: Dict[str, tuple[Any, float]] = {}
def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str:
token = secrets.token_urlsafe(16)
expires_at = time.time() + (ttl or self._default_ttl)
self._store[token] = (payload, expires_at)
return token
def pop(self, token: str | None) -> Any | None:
if not token:
return None
entry = self._store.pop(token, None)
if not entry:
return None
payload, expires_at = entry
if expires_at < time.time():
return None
return payload
def purge_expired(self) -> None:
now = time.time()
stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now]
for token in stale:
self._store.pop(token, None)

935
app/storage.py Normal file
View File

@@ -0,0 +1,935 @@
"""Filesystem-backed object storage helpers."""
from __future__ import annotations
import hashlib
import json
import os
import re
import shutil
import stat
import time
import unicodedata
import uuid
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, BinaryIO, Dict, List, Optional
WINDOWS_RESERVED_NAMES = {
"CON",
"PRN",
"AUX",
"NUL",
"COM1",
"COM2",
"COM3",
"COM4",
"COM5",
"COM6",
"COM7",
"COM8",
"COM9",
"LPT1",
"LPT2",
"LPT3",
"LPT4",
"LPT5",
"LPT6",
"LPT7",
"LPT8",
"LPT9",
}
class StorageError(RuntimeError):
"""Raised when the storage layer encounters an unrecoverable problem."""
@dataclass
class ObjectMeta:
key: str
size: int
last_modified: datetime
etag: str
metadata: Optional[Dict[str, str]] = None
@dataclass
class BucketMeta:
name: str
created_at: datetime
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def _utc_isoformat() -> str:
return _utcnow().isoformat().replace("+00:00", "Z")
class ObjectStorage:
"""Very small filesystem wrapper implementing the bare S3 primitives."""
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_BUCKETS_DIR = "buckets"
SYSTEM_MULTIPART_DIR = "multipart"
SYSTEM_TMP_DIR = "tmp"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
MULTIPART_MANIFEST = "manifest.json"
BUCKET_CONFIG_FILE = ".bucket.json"
def __init__(self, root: Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots()
# ---------------------- Bucket helpers ----------------------
def list_buckets(self) -> List[BucketMeta]:
buckets: List[BucketMeta] = []
for bucket in sorted(self.root.iterdir()):
if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT:
stat = bucket.stat()
buckets.append(
BucketMeta(
name=bucket.name,
created_at=datetime.fromtimestamp(stat.st_ctime),
)
)
return buckets
def bucket_exists(self, bucket_name: str) -> bool:
return self._bucket_path(bucket_name).exists()
def _validate_bucket_name(self, bucket_name: str) -> None:
if len(bucket_name) < 3 or len(bucket_name) > 63:
raise StorageError("Bucket name must be between 3 and 63 characters")
if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number")
if ".." in bucket_name:
raise StorageError("Bucket name must not contain consecutive periods")
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name):
raise StorageError("Bucket name must not be formatted as an IP address")
def create_bucket(self, bucket_name: str) -> None:
self._validate_bucket_name(bucket_name)
bucket_path = self._bucket_path(bucket_name)
bucket_path.mkdir(parents=True, exist_ok=False)
self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)
def bucket_stats(self, bucket_name: str) -> dict[str, int]:
"""Return object count and total size for the bucket without hashing files."""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
object_count = 0
total_bytes = 0
for path in bucket_path.rglob("*"):
if path.is_file():
rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue
stat = path.stat()
object_count += 1
total_bytes += stat.st_size
return {"objects": object_count, "bytes": total_bytes}
def delete_bucket(self, bucket_name: str) -> None:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
if self._has_visible_objects(bucket_path):
raise StorageError("Bucket not empty")
if self._has_archived_versions(bucket_path):
raise StorageError("Bucket contains archived object versions")
if self._has_active_multipart_uploads(bucket_path):
raise StorageError("Bucket has active multipart uploads")
self._remove_tree(bucket_path)
self._remove_tree(self._system_bucket_root(bucket_path.name))
self._remove_tree(self._multipart_bucket_root(bucket_path.name))
# ---------------------- Object helpers ----------------------
def list_objects(self, bucket_name: str) -> List[ObjectMeta]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
objects: List[ObjectMeta] = []
for path in bucket_path.rglob("*"):
if path.is_file():
stat = path.stat()
rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue
metadata = self._read_metadata(bucket_id, rel)
objects.append(
ObjectMeta(
key=str(rel.as_posix()),
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=self._compute_etag(path),
metadata=metadata or None,
)
)
objects.sort(key=lambda meta: meta.key)
return objects
def put_object(
self,
bucket_name: str,
object_key: str,
stream: BinaryIO,
*,
metadata: Optional[Dict[str, str]] = None,
) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True)
if self._is_versioning_enabled(bucket_path) and destination.exists():
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
stat = destination.stat()
if metadata:
self._write_metadata(bucket_id, safe_key, metadata)
else:
self._delete_metadata(bucket_id, safe_key)
return ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=checksum.hexdigest(),
metadata=metadata,
)
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
path = self._object_path(bucket_name, object_key)
if not path.exists():
raise StorageError("Object not found")
return path
def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
return {}
safe_key = self._sanitize_object_key(object_key)
return self._read_metadata(bucket_path.name, safe_key) or {}
def delete_object(self, bucket_name: str, object_key: str) -> None:
bucket_path = self._bucket_path(bucket_name)
path = self._object_path(bucket_name, object_key)
if not path.exists():
return
safe_key = path.relative_to(bucket_path)
bucket_id = bucket_path.name
if self._is_versioning_enabled(bucket_path):
self._archive_current_version(bucket_id, safe_key, reason="delete")
rel = path.relative_to(bucket_path)
self._safe_unlink(path)
self._delete_metadata(bucket_id, rel)
# Clean up now empty parents inside the bucket.
for parent in path.parents:
if parent == bucket_path:
break
if parent.exists() and not any(parent.iterdir()):
parent.rmdir()
def purge_object(self, bucket_name: str, object_key: str) -> None:
bucket_path = self._bucket_path(bucket_name)
target = self._object_path(bucket_name, object_key)
bucket_id = bucket_path.name
if target.exists():
rel = target.relative_to(bucket_path)
self._safe_unlink(target)
self._delete_metadata(bucket_id, rel)
else:
rel = self._sanitize_object_key(object_key)
self._delete_metadata(bucket_id, rel)
version_dir = self._version_dir(bucket_id, rel)
if version_dir.exists():
shutil.rmtree(version_dir, ignore_errors=True)
legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
if legacy_version_dir.exists():
shutil.rmtree(legacy_version_dir, ignore_errors=True)
for parent in target.parents:
if parent == bucket_path:
break
if parent.exists() and not any(parent.iterdir()):
parent.rmdir()
# ---------------------- Versioning helpers ----------------------
def is_versioning_enabled(self, bucket_name: str) -> bool:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
return self._is_versioning_enabled(bucket_path)
def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
config["versioning_enabled"] = bool(enabled)
self._write_bucket_config(bucket_path.name, config)
# ---------------------- Bucket configuration helpers ----------------------
def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]:
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
raw_tags = config.get("tags")
if not isinstance(raw_tags, list):
return []
tags: List[Dict[str, str]] = []
for entry in raw_tags:
if not isinstance(entry, dict):
continue
key = str(entry.get("Key", "")).strip()
if not key:
continue
value = str(entry.get("Value", ""))
tags.append({"Key": key, "Value": value})
return tags
def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None:
bucket_path = self._require_bucket_path(bucket_name)
if not tags:
self._set_bucket_config_entry(bucket_path.name, "tags", None)
return
clean: List[Dict[str, str]] = []
for entry in tags:
if not isinstance(entry, dict):
continue
key = str(entry.get("Key", "")).strip()
if not key:
continue
clean.append({"Key": key, "Value": str(entry.get("Value", ""))})
self._set_bucket_config_entry(bucket_path.name, "tags", clean or None)
def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]:
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
cors_rules = config.get("cors")
return cors_rules if isinstance(cors_rules, list) else []
def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
bucket_path = self._require_bucket_path(bucket_name)
self._set_bucket_config_entry(bucket_path.name, "cors", rules or None)
def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]:
bucket_path = self._require_bucket_path(bucket_name)
config = self._read_bucket_config(bucket_path.name)
payload = config.get("encryption")
return payload if isinstance(payload, dict) else {}
def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None:
bucket_path = self._require_bucket_path(bucket_name)
self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None)
def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key)
if not version_dir.exists():
version_dir = self._legacy_version_dir(bucket_id, safe_key)
if not version_dir.exists():
version_dir = self._legacy_version_dir(bucket_id, safe_key)
if not version_dir.exists():
return []
versions: List[Dict[str, Any]] = []
for meta_file in version_dir.glob("*.json"):
try:
payload = json.loads(meta_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
if not isinstance(payload, dict):
continue
payload.setdefault("version_id", meta_file.stem)
versions.append(payload)
versions.sort(key=lambda item: item.get("archived_at", ""), reverse=True)
return versions
def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
version_dir = self._version_dir(bucket_id, safe_key)
data_path = version_dir / f"{version_id}.bin"
meta_path = version_dir / f"{version_id}.json"
if not data_path.exists() or not meta_path.exists():
raise StorageError("Version not found")
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
payload = {}
metadata = payload.get("metadata") if isinstance(payload, dict) else {}
if not isinstance(metadata, dict):
metadata = {}
destination = bucket_path / safe_key
if self._is_versioning_enabled(bucket_path) and destination.exists():
self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
destination.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(data_path, destination)
if metadata:
self._write_metadata(bucket_id, safe_key, metadata)
else:
self._delete_metadata(bucket_id, safe_key)
stat = destination.stat()
return ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=self._compute_etag(destination),
metadata=metadata or None,
)
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
if not any(root.exists() for root in version_roots):
return []
aggregated: Dict[str, Dict[str, Any]] = {}
skipped: set[str] = set()
for version_root in version_roots:
if not version_root.exists():
continue
for meta_file in version_root.glob("**/*.json"):
if not meta_file.is_file():
continue
rel = meta_file.parent.relative_to(version_root)
rel_key = rel.as_posix()
if rel_key in skipped:
continue
object_path = bucket_path / rel
if object_path.exists():
skipped.add(rel_key)
continue
try:
payload = json.loads(meta_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
payload = {}
version_id = payload.get("version_id") or meta_file.stem
archived_at = payload.get("archived_at") or ""
size = int(payload.get("size") or 0)
reason = payload.get("reason") or "update"
record = aggregated.setdefault(
rel_key,
{
"key": rel_key,
"versions": 0,
"total_size": 0,
"latest": None,
"_latest_sort": None,
},
)
record["versions"] += 1
record["total_size"] += size
candidate = {
"version_id": version_id,
"archived_at": archived_at,
"size": size,
"reason": reason,
}
sort_key = (
archived_at,
meta_file.stat().st_mtime,
)
current_sort = record.get("_latest_sort")
if current_sort is None or sort_key > current_sort:
record["_latest_sort"] = sort_key
record["latest"] = candidate
for record in aggregated.values():
record.pop("_latest_sort", None)
return sorted(aggregated.values(), key=lambda item: item["key"])
# ---------------------- Multipart helpers ----------------------
def initiate_multipart_upload(
self,
bucket_name: str,
object_key: str,
*,
metadata: Optional[Dict[str, str]] = None,
) -> str:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key)
upload_id = uuid.uuid4().hex
upload_root = self._multipart_dir(bucket_id, upload_id)
upload_root.mkdir(parents=True, exist_ok=False)
manifest = {
"upload_id": upload_id,
"object_key": safe_key.as_posix(),
"metadata": self._normalize_metadata(metadata),
"parts": {},
"created_at": _utc_isoformat(),
}
self._write_multipart_manifest(upload_root, manifest)
return upload_id
def upload_multipart_part(
self,
bucket_name: str,
upload_id: str,
part_number: int,
stream: BinaryIO,
) -> str:
if part_number < 1:
raise StorageError("part_number must be >= 1")
bucket_path = self._bucket_path(bucket_name)
manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id)
checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename
with part_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
record = {
"etag": checksum.hexdigest(),
"size": part_path.stat().st_size,
"filename": part_filename,
}
parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record
self._write_multipart_manifest(upload_root, manifest)
return record["etag"]
def complete_multipart_upload(
self,
bucket_name: str,
upload_id: str,
ordered_parts: List[Dict[str, Any]],
) -> ObjectMeta:
if not ordered_parts:
raise StorageError("parts list required")
bucket_path = self._bucket_path(bucket_name)
bucket_id = bucket_path.name
manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id)
parts_map = manifest.get("parts") or {}
if not parts_map:
raise StorageError("No uploaded parts found")
validated: List[tuple[int, Dict[str, Any]]] = []
for part in ordered_parts:
raw_number = part.get("part_number")
if raw_number is None:
raw_number = part.get("PartNumber")
try:
number = int(raw_number)
except (TypeError, ValueError) as exc:
raise StorageError("Each part must include part_number") from exc
if number < 1:
raise StorageError("part numbers must be >= 1")
key = str(number)
record = parts_map.get(key)
if not record:
raise StorageError(f"Part {number} missing from upload")
raw_etag = part.get("etag", part.get("ETag", ""))
supplied_etag = str(raw_etag).strip() or record.get("etag")
if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
raise StorageError(f"ETag mismatch for part {number}")
validated.append((number, record))
validated.sort(key=lambda entry: entry[0])
safe_key = self._sanitize_object_key(manifest["object_key"])
destination = bucket_path / safe_key
destination.parent.mkdir(parents=True, exist_ok=True)
if self._is_versioning_enabled(bucket_path) and destination.exists():
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with destination.open("wb") as target:
for _, record in validated:
part_path = upload_root / record["filename"]
if not part_path.exists():
raise StorageError(f"Missing part file {record['filename']}")
with part_path.open("rb") as chunk:
while True:
data = chunk.read(1024 * 1024)
if not data:
break
checksum.update(data)
target.write(data)
metadata = manifest.get("metadata")
if metadata:
self._write_metadata(bucket_id, safe_key, metadata)
else:
self._delete_metadata(bucket_id, safe_key)
shutil.rmtree(upload_root, ignore_errors=True)
stat = destination.stat()
return ObjectMeta(
key=safe_key.as_posix(),
size=stat.st_size,
last_modified=datetime.fromtimestamp(stat.st_mtime),
etag=checksum.hexdigest(),
metadata=metadata,
)
def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
bucket_path = self._bucket_path(bucket_name)
upload_root = self._multipart_dir(bucket_path.name, upload_id)
if upload_root.exists():
shutil.rmtree(upload_root, ignore_errors=True)
return
legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
if legacy_root.exists():
shutil.rmtree(legacy_root, ignore_errors=True)
# ---------------------- internal helpers ----------------------
def _bucket_path(self, bucket_name: str) -> Path:
safe_name = self._sanitize_bucket_name(bucket_name)
return self.root / safe_name
def _require_bucket_path(self, bucket_name: str) -> Path:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise StorageError("Bucket does not exist")
return bucket_path
def _object_path(self, bucket_name: str, object_key: str) -> Path:
bucket_path = self._bucket_path(bucket_name)
safe_key = self._sanitize_object_key(object_key)
return bucket_path / safe_key
def _system_root_path(self) -> Path:
return self.root / self.SYSTEM_ROOT
def _system_buckets_root(self) -> Path:
return self._system_root_path() / self.SYSTEM_BUCKETS_DIR
def _system_bucket_root(self, bucket_name: str) -> Path:
return self._system_buckets_root() / bucket_name
def _bucket_meta_root(self, bucket_name: str) -> Path:
return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR
def _bucket_versions_root(self, bucket_name: str) -> Path:
return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR
def _multipart_root(self) -> Path:
return self._system_root_path() / self.SYSTEM_MULTIPART_DIR
def _multipart_bucket_root(self, bucket_name: str) -> Path:
return self._multipart_root() / bucket_name
def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path:
meta_root = self._legacy_meta_root(bucket_name)
meta_rel = Path(key.as_posix() + ".meta.json")
return meta_root / meta_rel
def _legacy_meta_root(self, bucket_name: str) -> Path:
return self._bucket_path(bucket_name) / ".meta"
def _legacy_versions_root(self, bucket_name: str) -> Path:
return self._bucket_path(bucket_name) / ".versions"
def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path:
return self._legacy_versions_root(bucket_name) / key
def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path:
return self._bucket_path(bucket_name) / ".multipart"
def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._legacy_multipart_bucket_root(bucket_name) / upload_id
def _ensure_system_roots(self) -> None:
for path in (
self._system_root_path(),
self._system_buckets_root(),
self._multipart_root(),
self._system_root_path() / self.SYSTEM_TMP_DIR,
):
path.mkdir(parents=True, exist_ok=True)
def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._multipart_bucket_root(bucket_name) / upload_id
def _version_dir(self, bucket_name: str, key: Path) -> Path:
return self._bucket_versions_root(bucket_name) / key
def _bucket_config_path(self, bucket_name: str) -> Path:
return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
config_path = self._bucket_config_path(bucket_name)
if not config_path.exists():
return {}
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {}
except (OSError, json.JSONDecodeError):
return {}
def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
config_path = self._bucket_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(payload), encoding="utf-8")
def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
config = self._read_bucket_config(bucket_name)
if value is None:
config.pop(key, None)
else:
config[key] = value
self._write_bucket_config(bucket_name, config)
def _is_versioning_enabled(self, bucket_path: Path) -> bool:
config = self._read_bucket_config(bucket_path.name)
return bool(config.get("versioning_enabled"))
def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]:
upload_root = self._multipart_dir(bucket_name, upload_id)
if not upload_root.exists():
upload_root = self._legacy_multipart_dir(bucket_name, upload_id)
manifest_path = upload_root / self.MULTIPART_MANIFEST
if not manifest_path.exists():
raise StorageError("Multipart upload not found")
try:
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
raise StorageError("Multipart manifest unreadable") from exc
return manifest, upload_root
def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
manifest_path = upload_root / self.MULTIPART_MANIFEST
manifest_path.parent.mkdir(parents=True, exist_ok=True)
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
def _metadata_file(self, bucket_name: str, key: Path) -> Path:
meta_root = self._bucket_meta_root(bucket_name)
meta_rel = Path(key.as_posix() + ".meta.json")
return meta_root / meta_rel
def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
if not metadata:
return None
clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()}
return clean or None
def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None:
clean = self._normalize_metadata(metadata)
if not clean:
self._delete_metadata(bucket_name, key)
return
meta_file = self._metadata_file(bucket_name, key)
meta_file.parent.mkdir(parents=True, exist_ok=True)
meta_file.write_text(json.dumps({"metadata": clean}), encoding="utf-8")
def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
bucket_path = self._bucket_path(bucket_name)
source = bucket_path / key
if not source.exists():
return
version_dir = self._version_dir(bucket_name, key)
version_dir.mkdir(parents=True, exist_ok=True)
now = _utcnow()
version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}"
data_path = version_dir / f"{version_id}.bin"
shutil.copy2(source, data_path)
metadata = self._read_metadata(bucket_name, key)
record = {
"version_id": version_id,
"key": key.as_posix(),
"size": source.stat().st_size,
"archived_at": now.isoformat().replace("+00:00", "Z"),
"etag": self._compute_etag(source),
"metadata": metadata or {},
"reason": reason,
}
manifest_path = version_dir / f"{version_id}.json"
manifest_path.write_text(json.dumps(record), encoding="utf-8")
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
if not meta_file.exists():
continue
try:
payload = json.loads(meta_file.read_text(encoding="utf-8"))
data = payload.get("metadata")
return data if isinstance(data, dict) else {}
except (OSError, json.JSONDecodeError):
return {}
return {}
def _safe_unlink(self, path: Path) -> None:
attempts = 3
last_error: PermissionError | None = None
for attempt in range(attempts):
try:
path.unlink()
return
except FileNotFoundError:
return
except PermissionError as exc:
last_error = exc
if os.name == "nt":
time.sleep(0.15 * (attempt + 1))
except OSError as exc:
raise StorageError(f"Unable to delete object: {exc}") from exc
message = "Object file is currently in use. Close active previews or wait and try again."
raise StorageError(message) from last_error
def _delete_metadata(self, bucket_name: str, key: Path) -> None:
locations = (
(self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
(self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
)
for meta_file, meta_root in locations:
try:
if meta_file.exists():
meta_file.unlink()
parent = meta_file.parent
while parent != meta_root and parent.exists() and not any(parent.iterdir()):
parent.rmdir()
parent = parent.parent
except OSError:
continue
def _has_visible_objects(self, bucket_path: Path) -> bool:
for path in bucket_path.rglob("*"):
if not path.is_file():
continue
rel = path.relative_to(bucket_path)
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
continue
return True
return False
def _has_archived_versions(self, bucket_path: Path) -> bool:
for version_root in (
self._bucket_versions_root(bucket_path.name),
self._legacy_versions_root(bucket_path.name),
):
if version_root.exists() and any(path.is_file() for path in version_root.rglob("*")):
return True
return False
def _has_active_multipart_uploads(self, bucket_path: Path) -> bool:
for uploads_root in (
self._multipart_bucket_root(bucket_path.name),
self._legacy_multipart_bucket_root(bucket_path.name),
):
if uploads_root.exists() and any(path.is_file() for path in uploads_root.rglob("*")):
return True
return False
def _remove_tree(self, path: Path) -> None:
if not path.exists():
return
def _handle_error(func, target_path, exc_info):
try:
os.chmod(target_path, stat.S_IRWXU)
func(target_path)
except Exception as exc: # pragma: no cover - fallback failure
raise StorageError(f"Unable to delete bucket contents: {exc}") from exc
try:
shutil.rmtree(path, onerror=_handle_error)
except FileNotFoundError:
return
except PermissionError as exc:
raise StorageError("Bucket in use. Close open files and try again") from exc
@staticmethod
def _sanitize_bucket_name(bucket_name: str) -> str:
if not bucket_name:
raise StorageError("Bucket name required")
name = bucket_name.lower()
if len(name) < 3 or len(name) > 63:
raise StorageError("Bucket name must be between 3 and 63 characters")
if name.startswith("-") or name.endswith("-"):
raise StorageError("Bucket name cannot start or end with a hyphen")
if ".." in name:
raise StorageError("Bucket name cannot contain consecutive periods")
if name.startswith("xn--"):
raise StorageError("Bucket name cannot start with 'xn--'")
if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name):
raise StorageError("Bucket name cannot be formatted like an IP address")
if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name):
raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens")
return name
@staticmethod
def _sanitize_object_key(object_key: str) -> Path:
if not object_key:
raise StorageError("Object key required")
if len(object_key.encode("utf-8")) > 1024:
raise StorageError("Object key exceeds maximum length of 1024 bytes")
if "\x00" in object_key:
raise StorageError("Object key contains null bytes")
if object_key.startswith(("/", "\\")):
raise StorageError("Object key cannot start with a slash")
normalized = unicodedata.normalize("NFC", object_key)
if normalized != object_key:
raise StorageError("Object key must use normalized Unicode")
candidate = Path(normalized)
if candidate.is_absolute():
raise StorageError("Absolute object keys are not allowed")
if getattr(candidate, "drive", ""):
raise StorageError("Object key cannot include a drive letter")
parts = []
for part in candidate.parts:
if part in ("", ".", ".."):
raise StorageError("Object key contains invalid segments")
if any(ord(ch) < 32 for ch in part):
raise StorageError("Object key contains control characters")
if os.name == "nt":
if any(ch in part for ch in "<>:\"/\\|?*"):
raise StorageError("Object key contains characters not supported on Windows filesystems")
if part.endswith((" ", ".")):
raise StorageError("Object key segments cannot end with spaces or periods on Windows")
trimmed = part.upper().rstrip(". ")
if trimmed in WINDOWS_RESERVED_NAMES:
raise StorageError(f"Invalid filename segment: {part}")
parts.append(part)
if parts:
top_level = parts[0]
if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT:
raise StorageError("Object key uses a reserved prefix")
return Path(*parts)
@staticmethod
def _compute_etag(path: Path) -> str:
checksum = hashlib.md5()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(8192), b""):
checksum.update(chunk)
return checksum.hexdigest()
class _HashingReader:
"""Wraps a binary stream, updating the checksum as it is read."""
def __init__(self, stream: BinaryIO, checksum: Any) -> None:
self.stream = stream
self.checksum = checksum
def read(self, size: int = -1) -> bytes:
data = self.stream.read(size)
if data:
self.checksum.update(data)
return data

1134
app/ui.py Normal file

File diff suppressed because it is too large Load Diff

9
app/version.py Normal file
View File

@@ -0,0 +1,9 @@
"""Central location for the application version string."""
from __future__ import annotations
APP_VERSION = "0.1.0"
def get_version() -> str:
"""Return the current application version."""
return APP_VERSION