Release v0.1.0 Beta
This commit is contained in:
215
app/__init__.py
Normal file
215
app/__init__.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""Application factory for the mini S3-compatible object store."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
|
||||
from flask_cors import CORS
|
||||
from flask_wtf.csrf import CSRFError
|
||||
|
||||
from .bucket_policies import BucketPolicyStore
|
||||
from .config import AppConfig
|
||||
from .connections import ConnectionStore
|
||||
from .extensions import limiter, csrf
|
||||
from .iam import IamService
|
||||
from .replication import ReplicationManager
|
||||
from .secret_store import EphemeralSecretStore
|
||||
from .storage import ObjectStorage
|
||||
from .version import get_version
|
||||
|
||||
|
||||
def create_app(
|
||||
test_config: Optional[Dict[str, Any]] = None,
|
||||
*,
|
||||
include_api: bool = True,
|
||||
include_ui: bool = True,
|
||||
) -> Flask:
|
||||
"""Create and configure the Flask application."""
|
||||
config = AppConfig.from_env(test_config)
|
||||
|
||||
project_root = Path(__file__).resolve().parent.parent
|
||||
app = Flask(
|
||||
__name__,
|
||||
static_folder=str(project_root / "static"),
|
||||
template_folder=str(project_root / "templates"),
|
||||
)
|
||||
app.config.update(config.to_flask_config())
|
||||
if test_config:
|
||||
app.config.update(test_config)
|
||||
app.config.setdefault("APP_VERSION", get_version())
|
||||
app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30)))
|
||||
if app.config.get("TESTING"):
|
||||
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
||||
|
||||
_configure_cors(app)
|
||||
_configure_logging(app)
|
||||
|
||||
limiter.init_app(app)
|
||||
csrf.init_app(app)
|
||||
|
||||
storage = ObjectStorage(Path(app.config["STORAGE_ROOT"]))
|
||||
iam = IamService(
|
||||
Path(app.config["IAM_CONFIG"]),
|
||||
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
||||
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
||||
)
|
||||
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
||||
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
||||
|
||||
# Initialize Replication components
|
||||
connections_path = Path(app.config["STORAGE_ROOT"]) / ".connections.json"
|
||||
replication_rules_path = Path(app.config["STORAGE_ROOT"]) / ".replication_rules.json"
|
||||
|
||||
connections = ConnectionStore(connections_path)
|
||||
replication = ReplicationManager(storage, connections, replication_rules_path)
|
||||
|
||||
app.extensions["object_storage"] = storage
|
||||
app.extensions["iam"] = iam
|
||||
app.extensions["bucket_policies"] = bucket_policies
|
||||
app.extensions["secret_store"] = secret_store
|
||||
app.extensions["limiter"] = limiter
|
||||
app.extensions["connections"] = connections
|
||||
app.extensions["replication"] = replication
|
||||
|
||||
@app.errorhandler(500)
|
||||
def internal_error(error):
|
||||
return render_template('500.html'), 500
|
||||
|
||||
@app.errorhandler(CSRFError)
|
||||
def handle_csrf_error(e):
|
||||
return render_template('csrf_error.html', reason=e.description), 400
|
||||
|
||||
@app.template_filter("filesizeformat")
|
||||
def filesizeformat(value: int) -> str:
|
||||
"""Format bytes as human-readable file size."""
|
||||
for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
|
||||
if abs(value) < 1024.0 or unit == "PB":
|
||||
if unit == "B":
|
||||
return f"{int(value)} {unit}"
|
||||
return f"{value:.1f} {unit}"
|
||||
value /= 1024.0
|
||||
return f"{value:.1f} PB"
|
||||
|
||||
if include_api:
|
||||
from .s3_api import s3_api_bp
|
||||
|
||||
app.register_blueprint(s3_api_bp)
|
||||
csrf.exempt(s3_api_bp)
|
||||
|
||||
if include_ui:
|
||||
from .ui import ui_bp
|
||||
|
||||
app.register_blueprint(ui_bp)
|
||||
if not include_api:
|
||||
@app.get("/")
|
||||
def ui_root_redirect():
|
||||
return redirect(url_for("ui.buckets_overview"))
|
||||
|
||||
@app.errorhandler(404)
|
||||
def handle_not_found(error):
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html:
|
||||
if not include_api or path.startswith("/ui") or path == "/":
|
||||
return render_template("404.html"), 404
|
||||
return error
|
||||
|
||||
@app.get("/healthz")
|
||||
def healthcheck() -> Dict[str, str]:
|
||||
return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
|
||||
return create_app(test_config, include_api=True, include_ui=False)
|
||||
|
||||
|
||||
def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
|
||||
return create_app(test_config, include_api=False, include_ui=True)
|
||||
|
||||
|
||||
def _configure_cors(app: Flask) -> None:
|
||||
origins = app.config.get("CORS_ORIGINS", ["*"])
|
||||
methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS"])
|
||||
allow_headers = app.config.get(
|
||||
"CORS_ALLOW_HEADERS",
|
||||
["Content-Type", "X-Access-Key", "X-Secret-Key", "X-Amz-Date", "X-Amz-SignedHeaders"],
|
||||
)
|
||||
CORS(
|
||||
app,
|
||||
resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers}},
|
||||
supports_credentials=True,
|
||||
)
|
||||
|
||||
|
||||
class _RequestContextFilter(logging.Filter):
|
||||
"""Inject request-specific attributes into log records."""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - simple boilerplate
|
||||
if has_request_context():
|
||||
record.request_id = getattr(g, "request_id", "-")
|
||||
record.path = request.path
|
||||
record.method = request.method
|
||||
record.remote_addr = request.remote_addr or "-"
|
||||
else:
|
||||
record.request_id = getattr(record, "request_id", "-")
|
||||
record.path = getattr(record, "path", "-")
|
||||
record.method = getattr(record, "method", "-")
|
||||
record.remote_addr = getattr(record, "remote_addr", "-")
|
||||
return True
|
||||
|
||||
|
||||
def _configure_logging(app: Flask) -> None:
|
||||
log_file = Path(app.config["LOG_FILE"])
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
handler = RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)),
|
||||
backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)),
|
||||
encoding="utf-8",
|
||||
)
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
handler.addFilter(_RequestContextFilter())
|
||||
|
||||
logger = app.logger
|
||||
logger.handlers.clear()
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO))
|
||||
|
||||
@app.before_request
|
||||
def _log_request_start() -> None:
|
||||
g.request_id = uuid.uuid4().hex
|
||||
g.request_started_at = time.perf_counter()
|
||||
app.logger.info(
|
||||
"Request started",
|
||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||
)
|
||||
|
||||
@app.after_request
|
||||
def _log_request_end(response):
|
||||
duration_ms = 0.0
|
||||
if hasattr(g, "request_started_at"):
|
||||
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
||||
request_id = getattr(g, "request_id", uuid.uuid4().hex)
|
||||
response.headers.setdefault("X-Request-ID", request_id)
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||
response.headers["Server"] = "MyFISO"
|
||||
return response
|
||||
249
app/bucket_policies.py
Normal file
249
app/bucket_policies.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""Bucket policy loader/enforcer with a subset of AWS semantics."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from fnmatch import fnmatch
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||
|
||||
|
||||
RESOURCE_PREFIX = "arn:aws:s3:::"
|
||||
|
||||
ACTION_ALIASES = {
|
||||
"s3:getobject": "read",
|
||||
"s3:getobjectversion": "read",
|
||||
"s3:listbucket": "list",
|
||||
"s3:listallmybuckets": "list",
|
||||
"s3:putobject": "write",
|
||||
"s3:createbucket": "write",
|
||||
"s3:deleteobject": "delete",
|
||||
"s3:deleteobjectversion": "delete",
|
||||
"s3:deletebucket": "delete",
|
||||
"s3:putobjectacl": "share",
|
||||
"s3:putbucketpolicy": "policy",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_action(action: str) -> str:
|
||||
action = action.strip().lower()
|
||||
if action == "*":
|
||||
return "*"
|
||||
return ACTION_ALIASES.get(action, action)
|
||||
|
||||
|
||||
def _normalize_actions(actions: Iterable[str]) -> List[str]:
|
||||
values: List[str] = []
|
||||
for action in actions:
|
||||
canonical = _normalize_action(action)
|
||||
if canonical == "*" and "*" not in values:
|
||||
return ["*"]
|
||||
if canonical and canonical not in values:
|
||||
values.append(canonical)
|
||||
return values
|
||||
|
||||
|
||||
def _normalize_principals(principal_field: Any) -> List[str] | str:
|
||||
if principal_field == "*":
|
||||
return "*"
|
||||
|
||||
def _collect(values: Any) -> List[str]:
|
||||
if values is None:
|
||||
return []
|
||||
if values == "*":
|
||||
return ["*"]
|
||||
if isinstance(values, str):
|
||||
return [values]
|
||||
if isinstance(values, dict):
|
||||
aggregated: List[str] = []
|
||||
for nested in values.values():
|
||||
chunk = _collect(nested)
|
||||
if "*" in chunk:
|
||||
return ["*"]
|
||||
aggregated.extend(chunk)
|
||||
return aggregated
|
||||
if isinstance(values, Iterable):
|
||||
aggregated = []
|
||||
for nested in values:
|
||||
chunk = _collect(nested)
|
||||
if "*" in chunk:
|
||||
return ["*"]
|
||||
aggregated.extend(chunk)
|
||||
return aggregated
|
||||
return [str(values)]
|
||||
|
||||
normalized: List[str] = []
|
||||
for entry in _collect(principal_field):
|
||||
token = str(entry).strip()
|
||||
if token == "*":
|
||||
return "*"
|
||||
if token and token not in normalized:
|
||||
normalized.append(token)
|
||||
return normalized or "*"
|
||||
|
||||
|
||||
def _parse_resource(resource: str) -> tuple[str | None, str | None]:
|
||||
if not resource.startswith(RESOURCE_PREFIX):
|
||||
return None, None
|
||||
remainder = resource[len(RESOURCE_PREFIX) :]
|
||||
if "/" not in remainder:
|
||||
bucket = remainder or "*"
|
||||
return bucket, None
|
||||
bucket, _, key_pattern = remainder.partition("/")
|
||||
return bucket or "*", key_pattern or "*"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketPolicyStatement:
|
||||
sid: Optional[str]
|
||||
effect: str
|
||||
principals: List[str] | str
|
||||
actions: List[str]
|
||||
resources: List[tuple[str | None, str | None]]
|
||||
|
||||
def matches_principal(self, access_key: Optional[str]) -> bool:
|
||||
if self.principals == "*":
|
||||
return True
|
||||
if access_key is None:
|
||||
return False
|
||||
return access_key in self.principals
|
||||
|
||||
def matches_action(self, action: str) -> bool:
|
||||
action = _normalize_action(action)
|
||||
return "*" in self.actions or action in self.actions
|
||||
|
||||
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
|
||||
bucket = (bucket or "*").lower()
|
||||
key = object_key or ""
|
||||
for resource_bucket, key_pattern in self.resources:
|
||||
resource_bucket = (resource_bucket or "*").lower()
|
||||
if resource_bucket not in {"*", bucket}:
|
||||
continue
|
||||
if key_pattern is None:
|
||||
if not key:
|
||||
return True
|
||||
continue
|
||||
if fnmatch(key, key_pattern):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class BucketPolicyStore:
|
||||
"""Loads bucket policies from disk and evaluates statements."""
|
||||
|
||||
def __init__(self, policy_path: Path) -> None:
|
||||
self.policy_path = Path(policy_path)
|
||||
self.policy_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not self.policy_path.exists():
|
||||
self.policy_path.write_text(json.dumps({"policies": {}}, indent=2))
|
||||
self._raw: Dict[str, Any] = {}
|
||||
self._policies: Dict[str, List[BucketPolicyStatement]] = {}
|
||||
self._load()
|
||||
self._last_mtime = self._current_mtime()
|
||||
|
||||
def maybe_reload(self) -> None:
|
||||
current = self._current_mtime()
|
||||
if current is None or current == self._last_mtime:
|
||||
return
|
||||
self._load()
|
||||
self._last_mtime = current
|
||||
|
||||
def _current_mtime(self) -> float | None:
|
||||
try:
|
||||
return self.policy_path.stat().st_mtime
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def evaluate(
|
||||
self,
|
||||
access_key: Optional[str],
|
||||
bucket: Optional[str],
|
||||
object_key: Optional[str],
|
||||
action: str,
|
||||
) -> str | None:
|
||||
bucket = (bucket or "").lower()
|
||||
statements = self._policies.get(bucket) or []
|
||||
decision: Optional[str] = None
|
||||
for statement in statements:
|
||||
if not statement.matches_principal(access_key):
|
||||
continue
|
||||
if not statement.matches_action(action):
|
||||
continue
|
||||
if not statement.matches_resource(bucket, object_key):
|
||||
continue
|
||||
if statement.effect == "deny":
|
||||
return "deny"
|
||||
decision = "allow"
|
||||
return decision
|
||||
|
||||
def get_policy(self, bucket: str) -> Dict[str, Any] | None:
|
||||
return self._raw.get(bucket.lower())
|
||||
|
||||
def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None:
|
||||
bucket = bucket.lower()
|
||||
statements = self._normalize_policy(policy_payload)
|
||||
if not statements:
|
||||
raise ValueError("Policy must include at least one valid statement")
|
||||
self._raw[bucket] = policy_payload
|
||||
self._policies[bucket] = statements
|
||||
self._persist()
|
||||
|
||||
def delete_policy(self, bucket: str) -> None:
|
||||
bucket = bucket.lower()
|
||||
self._raw.pop(bucket, None)
|
||||
self._policies.pop(bucket, None)
|
||||
self._persist()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def _load(self) -> None:
|
||||
try:
|
||||
content = self.policy_path.read_text(encoding='utf-8')
|
||||
raw_payload = json.loads(content)
|
||||
except FileNotFoundError:
|
||||
raw_payload = {"policies": {}}
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}")
|
||||
except PermissionError as e:
|
||||
raise ValueError(f"Cannot read bucket policy file (permission denied): {e}")
|
||||
except (OSError, ValueError) as e:
|
||||
raise ValueError(f"Failed to load bucket policies: {e}")
|
||||
|
||||
policies: Dict[str, Any] = raw_payload.get("policies", {})
|
||||
parsed: Dict[str, List[BucketPolicyStatement]] = {}
|
||||
for bucket, policy in policies.items():
|
||||
parsed[bucket.lower()] = self._normalize_policy(policy)
|
||||
self._raw = {bucket.lower(): policy for bucket, policy in policies.items()}
|
||||
self._policies = parsed
|
||||
|
||||
def _persist(self) -> None:
|
||||
payload = {"policies": self._raw}
|
||||
self.policy_path.write_text(json.dumps(payload, indent=2))
|
||||
|
||||
def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]:
|
||||
statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", [])
|
||||
statements: List[BucketPolicyStatement] = []
|
||||
for statement in statements_raw:
|
||||
actions = _normalize_actions(statement.get("Action", []))
|
||||
principals = _normalize_principals(statement.get("Principal", "*"))
|
||||
resources_field = statement.get("Resource", [])
|
||||
if isinstance(resources_field, str):
|
||||
resources_field = [resources_field]
|
||||
resources: List[tuple[str | None, str | None]] = []
|
||||
for resource in resources_field:
|
||||
bucket, pattern = _parse_resource(str(resource))
|
||||
if bucket:
|
||||
resources.append((bucket, pattern))
|
||||
if not resources:
|
||||
continue
|
||||
effect = statement.get("Effect", "Allow").lower()
|
||||
statements.append(
|
||||
BucketPolicyStatement(
|
||||
sid=statement.get("Sid"),
|
||||
effect=effect,
|
||||
principals=principals,
|
||||
actions=actions or ["*"],
|
||||
resources=resources,
|
||||
)
|
||||
)
|
||||
return statements
|
||||
192
app/config.py
Normal file
192
app/config.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""Configuration helpers for the S3 clone application."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import secrets
|
||||
import shutil
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path:
|
||||
"""Ensure config directories exist and migrate legacy files when possible."""
|
||||
active_path = Path(active_path)
|
||||
active_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if legacy_path:
|
||||
legacy_path = Path(legacy_path)
|
||||
if not active_path.exists() and legacy_path.exists():
|
||||
legacy_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
shutil.move(str(legacy_path), str(active_path))
|
||||
except OSError:
|
||||
shutil.copy2(legacy_path, active_path)
|
||||
try:
|
||||
legacy_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return active_path
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
storage_root: Path
|
||||
max_upload_size: int
|
||||
ui_page_size: int
|
||||
secret_key: str
|
||||
iam_config_path: Path
|
||||
bucket_policy_path: Path
|
||||
api_base_url: str
|
||||
aws_region: str
|
||||
aws_service: str
|
||||
ui_enforce_bucket_policies: bool
|
||||
log_level: str
|
||||
log_path: Path
|
||||
log_max_bytes: int
|
||||
log_backup_count: int
|
||||
ratelimit_default: str
|
||||
ratelimit_storage_uri: str
|
||||
cors_origins: list[str]
|
||||
cors_methods: list[str]
|
||||
cors_allow_headers: list[str]
|
||||
session_lifetime_days: int
|
||||
auth_max_attempts: int
|
||||
auth_lockout_minutes: int
|
||||
bulk_delete_max_keys: int
|
||||
secret_ttl_seconds: int
|
||||
stream_chunk_size: int
|
||||
multipart_min_part_size: int
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||
overrides = overrides or {}
|
||||
|
||||
def _get(name: str, default: Any) -> Any:
|
||||
return overrides.get(name, os.getenv(name, default))
|
||||
|
||||
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
|
||||
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
|
||||
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
|
||||
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
|
||||
bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500))
|
||||
secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
|
||||
stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
|
||||
multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
|
||||
default_secret = "dev-secret-key"
|
||||
secret_key = str(_get("SECRET_KEY", default_secret))
|
||||
if not secret_key or secret_key == default_secret:
|
||||
generated = secrets.token_urlsafe(32)
|
||||
if secret_key == default_secret:
|
||||
warnings.warn("Using insecure default SECRET_KEY. A random value has been generated; set SECRET_KEY for production", RuntimeWarning)
|
||||
secret_key = generated
|
||||
iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ
|
||||
bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ
|
||||
|
||||
default_iam_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "iam.json"
|
||||
default_bucket_policy_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "bucket_policies.json"
|
||||
|
||||
iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve()
|
||||
bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve()
|
||||
|
||||
iam_config_path = _prepare_config_file(
|
||||
iam_config_path,
|
||||
legacy_path=None if iam_env_override else PROJECT_ROOT / "data" / "iam.json",
|
||||
)
|
||||
bucket_policy_path = _prepare_config_file(
|
||||
bucket_policy_path,
|
||||
legacy_path=None if bucket_policy_override else PROJECT_ROOT / "data" / "bucket_policies.json",
|
||||
)
|
||||
api_base_url = str(_get("API_BASE_URL", "http://127.0.0.1:5000"))
|
||||
aws_region = str(_get("AWS_REGION", "us-east-1"))
|
||||
aws_service = str(_get("AWS_SERVICE", "s3"))
|
||||
enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
log_level = str(_get("LOG_LEVEL", "INFO")).upper()
|
||||
log_dir = Path(_get("LOG_DIR", PROJECT_ROOT / "logs")).resolve()
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
|
||||
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
|
||||
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
|
||||
ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))
|
||||
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
|
||||
|
||||
def _csv(value: str, default: list[str]) -> list[str]:
|
||||
if not value:
|
||||
return default
|
||||
parts = [segment.strip() for segment in value.split(",") if segment.strip()]
|
||||
return parts or default
|
||||
|
||||
cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"])
|
||||
cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS")), ["GET", "PUT", "POST", "DELETE", "OPTIONS"])
|
||||
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "Content-Type,X-Access-Key,X-Secret-Key,X-Amz-Algorithm,X-Amz-Credential,X-Amz-Date,X-Amz-Expires,X-Amz-SignedHeaders,X-Amz-Signature")), [
|
||||
"Content-Type",
|
||||
"X-Access-Key",
|
||||
"X-Secret-Key",
|
||||
"X-Amz-Algorithm",
|
||||
"X-Amz-Credential",
|
||||
"X-Amz-Date",
|
||||
"X-Amz-Expires",
|
||||
"X-Amz-SignedHeaders",
|
||||
"X-Amz-Signature",
|
||||
])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
|
||||
return cls(storage_root=storage_root,
|
||||
max_upload_size=max_upload_size,
|
||||
ui_page_size=ui_page_size,
|
||||
secret_key=secret_key,
|
||||
iam_config_path=iam_config_path,
|
||||
bucket_policy_path=bucket_policy_path,
|
||||
api_base_url=api_base_url,
|
||||
aws_region=aws_region,
|
||||
aws_service=aws_service,
|
||||
ui_enforce_bucket_policies=enforce_ui_policies,
|
||||
log_level=log_level,
|
||||
log_path=log_path,
|
||||
log_max_bytes=log_max_bytes,
|
||||
log_backup_count=log_backup_count,
|
||||
ratelimit_default=ratelimit_default,
|
||||
ratelimit_storage_uri=ratelimit_storage_uri,
|
||||
cors_origins=cors_origins,
|
||||
cors_methods=cors_methods,
|
||||
cors_allow_headers=cors_allow_headers,
|
||||
session_lifetime_days=session_lifetime_days,
|
||||
auth_max_attempts=auth_max_attempts,
|
||||
auth_lockout_minutes=auth_lockout_minutes,
|
||||
bulk_delete_max_keys=bulk_delete_max_keys,
|
||||
secret_ttl_seconds=secret_ttl_seconds,
|
||||
stream_chunk_size=stream_chunk_size,
|
||||
multipart_min_part_size=multipart_min_part_size)
|
||||
|
||||
def to_flask_config(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"STORAGE_ROOT": str(self.storage_root),
|
||||
"MAX_CONTENT_LENGTH": self.max_upload_size,
|
||||
"UI_PAGE_SIZE": self.ui_page_size,
|
||||
"SECRET_KEY": self.secret_key,
|
||||
"IAM_CONFIG": str(self.iam_config_path),
|
||||
"BUCKET_POLICY_PATH": str(self.bucket_policy_path),
|
||||
"API_BASE_URL": self.api_base_url,
|
||||
"AWS_REGION": self.aws_region,
|
||||
"AWS_SERVICE": self.aws_service,
|
||||
"UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies,
|
||||
"AUTH_MAX_ATTEMPTS": self.auth_max_attempts,
|
||||
"AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes,
|
||||
"BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys,
|
||||
"SECRET_TTL_SECONDS": self.secret_ttl_seconds,
|
||||
"STREAM_CHUNK_SIZE": self.stream_chunk_size,
|
||||
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
|
||||
"LOG_LEVEL": self.log_level,
|
||||
"LOG_FILE": str(self.log_path),
|
||||
"LOG_MAX_BYTES": self.log_max_bytes,
|
||||
"LOG_BACKUP_COUNT": self.log_backup_count,
|
||||
"RATELIMIT_DEFAULT": self.ratelimit_default,
|
||||
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
|
||||
"CORS_ORIGINS": self.cors_origins,
|
||||
"CORS_METHODS": self.cors_methods,
|
||||
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
|
||||
"SESSION_LIFETIME_DAYS": self.session_lifetime_days,
|
||||
}
|
||||
61
app/connections.py
Normal file
61
app/connections.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Manage remote S3 connections."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteConnection:
|
||||
id: str
|
||||
name: str
|
||||
endpoint_url: str
|
||||
access_key: str
|
||||
secret_key: str
|
||||
region: str = "us-east-1"
|
||||
|
||||
|
||||
class ConnectionStore:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._connections: Dict[str, RemoteConnection] = {}
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._connections = {}
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.config_path, "r") as f:
|
||||
data = json.load(f)
|
||||
for item in data:
|
||||
conn = RemoteConnection(**item)
|
||||
self._connections[conn.id] = conn
|
||||
except (OSError, json.JSONDecodeError):
|
||||
self._connections = {}
|
||||
|
||||
def save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = [asdict(conn) for conn in self._connections.values()]
|
||||
with open(self.config_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def list(self) -> List[RemoteConnection]:
|
||||
return list(self._connections.values())
|
||||
|
||||
def get(self, connection_id: str) -> Optional[RemoteConnection]:
|
||||
return self._connections.get(connection_id)
|
||||
|
||||
def add(self, connection: RemoteConnection) -> None:
|
||||
self._connections[connection.id] = connection
|
||||
self.save()
|
||||
|
||||
def delete(self, connection_id: str) -> None:
|
||||
if connection_id in self._connections:
|
||||
del self._connections[connection_id]
|
||||
self.save()
|
||||
10
app/extensions.py
Normal file
10
app/extensions.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Application-wide extension instances."""
|
||||
from flask_limiter import Limiter
|
||||
from flask_limiter.util import get_remote_address
|
||||
from flask_wtf import CSRFProtect
|
||||
|
||||
# Shared rate limiter instance; configured in app factory.
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
# Global CSRF protection for UI routes.
|
||||
csrf = CSRFProtect()
|
||||
404
app/iam.py
Normal file
404
app/iam.py
Normal file
@@ -0,0 +1,404 @@
|
||||
"""Lightweight IAM-style user and policy management."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
import secrets
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set
|
||||
|
||||
|
||||
class IamError(RuntimeError):
|
||||
"""Raised when authentication or authorization fails."""
|
||||
|
||||
|
||||
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy"}
|
||||
IAM_ACTIONS = {
|
||||
"iam:list_users",
|
||||
"iam:create_user",
|
||||
"iam:delete_user",
|
||||
"iam:rotate_key",
|
||||
"iam:update_policy",
|
||||
}
|
||||
ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"}
|
||||
|
||||
ACTION_ALIASES = {
|
||||
"list": "list",
|
||||
"s3:listbucket": "list",
|
||||
"s3:listallmybuckets": "list",
|
||||
"read": "read",
|
||||
"s3:getobject": "read",
|
||||
"s3:getobjectversion": "read",
|
||||
"write": "write",
|
||||
"s3:putobject": "write",
|
||||
"s3:createbucket": "write",
|
||||
"delete": "delete",
|
||||
"s3:deleteobject": "delete",
|
||||
"s3:deletebucket": "delete",
|
||||
"share": "share",
|
||||
"s3:putobjectacl": "share",
|
||||
"policy": "policy",
|
||||
"s3:putbucketpolicy": "policy",
|
||||
"iam:listusers": "iam:list_users",
|
||||
"iam:createuser": "iam:create_user",
|
||||
"iam:deleteuser": "iam:delete_user",
|
||||
"iam:rotateaccesskey": "iam:rotate_key",
|
||||
"iam:putuserpolicy": "iam:update_policy",
|
||||
"iam:*": "iam:*",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Policy:
|
||||
bucket: str
|
||||
actions: Set[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Principal:
|
||||
access_key: str
|
||||
display_name: str
|
||||
policies: List[Policy]
|
||||
|
||||
|
||||
class IamService:
|
||||
"""Loads IAM configuration, manages users, and evaluates policies."""
|
||||
|
||||
def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None:
|
||||
self.config_path = Path(config_path)
|
||||
self.auth_max_attempts = auth_max_attempts
|
||||
self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if not self.config_path.exists():
|
||||
self._write_default()
|
||||
self._users: Dict[str, Dict[str, Any]] = {}
|
||||
self._raw_config: Dict[str, Any] = {}
|
||||
self._failed_attempts: Dict[str, Deque[datetime]] = {}
|
||||
self._load()
|
||||
|
||||
# ---------------------- authz helpers ----------------------
|
||||
def authenticate(self, access_key: str, secret_key: str) -> Principal:
|
||||
access_key = (access_key or "").strip()
|
||||
secret_key = (secret_key or "").strip()
|
||||
if not access_key or not secret_key:
|
||||
raise IamError("Missing access credentials")
|
||||
if self._is_locked_out(access_key):
|
||||
seconds = self._seconds_until_unlock(access_key)
|
||||
raise IamError(
|
||||
f"Access temporarily locked. Try again in {seconds} seconds."
|
||||
)
|
||||
record = self._users.get(access_key)
|
||||
if not record or record["secret_key"] != secret_key:
|
||||
self._record_failed_attempt(access_key)
|
||||
raise IamError("Invalid credentials")
|
||||
self._clear_failed_attempts(access_key)
|
||||
return self._build_principal(access_key, record)
|
||||
|
||||
def _record_failed_attempt(self, access_key: str) -> None:
|
||||
if not access_key:
|
||||
return
|
||||
attempts = self._failed_attempts.setdefault(access_key, deque())
|
||||
self._prune_attempts(attempts)
|
||||
attempts.append(datetime.now())
|
||||
|
||||
def _clear_failed_attempts(self, access_key: str) -> None:
|
||||
if not access_key:
|
||||
return
|
||||
self._failed_attempts.pop(access_key, None)
|
||||
|
||||
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
|
||||
cutoff = datetime.now() - self.auth_lockout_window
|
||||
while attempts and attempts[0] < cutoff:
|
||||
attempts.popleft()
|
||||
|
||||
def _is_locked_out(self, access_key: str) -> bool:
|
||||
if not access_key:
|
||||
return False
|
||||
attempts = self._failed_attempts.get(access_key)
|
||||
if not attempts:
|
||||
return False
|
||||
self._prune_attempts(attempts)
|
||||
return len(attempts) >= self.auth_max_attempts
|
||||
|
||||
def _seconds_until_unlock(self, access_key: str) -> int:
|
||||
attempts = self._failed_attempts.get(access_key)
|
||||
if not attempts:
|
||||
return 0
|
||||
self._prune_attempts(attempts)
|
||||
if len(attempts) < self.auth_max_attempts:
|
||||
return 0
|
||||
oldest = attempts[0]
|
||||
elapsed = (datetime.now() - oldest).total_seconds()
|
||||
return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
|
||||
|
||||
def principal_for_key(self, access_key: str) -> Principal:
|
||||
record = self._users.get(access_key)
|
||||
if not record:
|
||||
raise IamError("Unknown access key")
|
||||
return self._build_principal(access_key, record)
|
||||
|
||||
def secret_for_key(self, access_key: str) -> str:
|
||||
record = self._users.get(access_key)
|
||||
if not record:
|
||||
raise IamError("Unknown access key")
|
||||
return record["secret_key"]
|
||||
|
||||
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
|
||||
action = self._normalize_action(action)
|
||||
if action not in ALLOWED_ACTIONS:
|
||||
raise IamError(f"Unknown action '{action}'")
|
||||
bucket_name = bucket_name or "*"
|
||||
normalized = bucket_name.lower() if bucket_name != "*" else bucket_name
|
||||
if not self._is_allowed(principal, normalized, action):
|
||||
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
|
||||
|
||||
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
|
||||
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
|
||||
|
||||
def _is_allowed(self, principal: Principal, bucket_name: str, action: str) -> bool:
|
||||
bucket_name = bucket_name.lower()
|
||||
for policy in principal.policies:
|
||||
if policy.bucket not in {"*", bucket_name}:
|
||||
continue
|
||||
if "*" in policy.actions or action in policy.actions:
|
||||
return True
|
||||
if "iam:*" in policy.actions and action.startswith("iam:"):
|
||||
return True
|
||||
return False
|
||||
|
||||
# ---------------------- management helpers ----------------------
|
||||
def list_users(self) -> List[Dict[str, Any]]:
|
||||
listing: List[Dict[str, Any]] = []
|
||||
for access_key, record in self._users.items():
|
||||
listing.append(
|
||||
{
|
||||
"access_key": access_key,
|
||||
"display_name": record["display_name"],
|
||||
"policies": [
|
||||
{"bucket": policy.bucket, "actions": sorted(policy.actions)}
|
||||
for policy in record["policies"]
|
||||
],
|
||||
}
|
||||
)
|
||||
return listing
|
||||
|
||||
def create_user(
|
||||
self,
|
||||
*,
|
||||
display_name: str,
|
||||
policies: Optional[Sequence[Dict[str, Any]]] = None,
|
||||
access_key: str | None = None,
|
||||
secret_key: str | None = None,
|
||||
) -> Dict[str, str]:
|
||||
access_key = (access_key or self._generate_access_key()).strip()
|
||||
if not access_key:
|
||||
raise IamError("Access key cannot be empty")
|
||||
if access_key in self._users:
|
||||
raise IamError("Access key already exists")
|
||||
secret_key = secret_key or self._generate_secret_key()
|
||||
sanitized_policies = self._prepare_policy_payload(policies)
|
||||
record = {
|
||||
"access_key": access_key,
|
||||
"secret_key": secret_key,
|
||||
"display_name": display_name or access_key,
|
||||
"policies": sanitized_policies,
|
||||
}
|
||||
self._raw_config.setdefault("users", []).append(record)
|
||||
self._save()
|
||||
self._load()
|
||||
return {"access_key": access_key, "secret_key": secret_key}
|
||||
|
||||
def rotate_secret(self, access_key: str) -> str:
|
||||
user = self._get_raw_user(access_key)
|
||||
new_secret = self._generate_secret_key()
|
||||
user["secret_key"] = new_secret
|
||||
self._save()
|
||||
self._load()
|
||||
return new_secret
|
||||
|
||||
def update_user(self, access_key: str, display_name: str) -> None:
|
||||
user = self._get_raw_user(access_key)
|
||||
user["display_name"] = display_name
|
||||
self._save()
|
||||
self._load()
|
||||
|
||||
def delete_user(self, access_key: str) -> None:
|
||||
users = self._raw_config.get("users", [])
|
||||
if len(users) <= 1:
|
||||
raise IamError("Cannot delete the only user")
|
||||
remaining = [user for user in users if user["access_key"] != access_key]
|
||||
if len(remaining) == len(users):
|
||||
raise IamError("User not found")
|
||||
self._raw_config["users"] = remaining
|
||||
self._save()
|
||||
self._load()
|
||||
|
||||
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
|
||||
user = self._get_raw_user(access_key)
|
||||
user["policies"] = self._prepare_policy_payload(policies)
|
||||
self._save()
|
||||
self._load()
|
||||
|
||||
# ---------------------- config helpers ----------------------
|
||||
def _load(self) -> None:
|
||||
try:
|
||||
content = self.config_path.read_text(encoding='utf-8')
|
||||
raw = json.loads(content)
|
||||
except FileNotFoundError:
|
||||
raise IamError(f"IAM config not found: {self.config_path}")
|
||||
except json.JSONDecodeError as e:
|
||||
raise IamError(f"Corrupted IAM config (invalid JSON): {e}")
|
||||
except PermissionError as e:
|
||||
raise IamError(f"Cannot read IAM config (permission denied): {e}")
|
||||
except (OSError, ValueError) as e:
|
||||
raise IamError(f"Failed to load IAM config: {e}")
|
||||
|
||||
users: Dict[str, Dict[str, Any]] = {}
|
||||
for user in raw.get("users", []):
|
||||
policies = self._build_policy_objects(user.get("policies", []))
|
||||
users[user["access_key"]] = {
|
||||
"secret_key": user["secret_key"],
|
||||
"display_name": user.get("display_name", user["access_key"]),
|
||||
"policies": policies,
|
||||
}
|
||||
if not users:
|
||||
raise IamError("IAM configuration contains no users")
|
||||
self._users = users
|
||||
self._raw_config = {
|
||||
"users": [
|
||||
{
|
||||
"access_key": entry["access_key"],
|
||||
"secret_key": entry["secret_key"],
|
||||
"display_name": entry.get("display_name", entry["access_key"]),
|
||||
"policies": entry.get("policies", []),
|
||||
}
|
||||
for entry in raw.get("users", [])
|
||||
]
|
||||
}
|
||||
|
||||
def _save(self) -> None:
|
||||
try:
|
||||
temp_path = self.config_path.with_suffix('.json.tmp')
|
||||
temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8')
|
||||
temp_path.replace(self.config_path)
|
||||
except (OSError, PermissionError) as e:
|
||||
raise IamError(f"Cannot save IAM config: {e}")
|
||||
|
||||
# ---------------------- insight helpers ----------------------
|
||||
def config_summary(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"path": str(self.config_path),
|
||||
"user_count": len(self._users),
|
||||
"allowed_actions": sorted(ALLOWED_ACTIONS),
|
||||
}
|
||||
|
||||
def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {"users": []}
|
||||
for user in self._raw_config.get("users", []):
|
||||
record = dict(user)
|
||||
if mask_secrets and "secret_key" in record:
|
||||
record["secret_key"] = "••••••••••"
|
||||
payload["users"].append(record)
|
||||
return payload
|
||||
|
||||
def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]:
|
||||
entries: List[Policy] = []
|
||||
for policy in policies:
|
||||
bucket = str(policy.get("bucket", "*")).lower()
|
||||
raw_actions = policy.get("actions", [])
|
||||
if isinstance(raw_actions, str):
|
||||
raw_actions = [raw_actions]
|
||||
action_set: Set[str] = set()
|
||||
for action in raw_actions:
|
||||
canonical = self._normalize_action(action)
|
||||
if canonical == "*":
|
||||
action_set = set(ALLOWED_ACTIONS)
|
||||
break
|
||||
if canonical:
|
||||
action_set.add(canonical)
|
||||
if action_set:
|
||||
entries.append(Policy(bucket=bucket, actions=action_set))
|
||||
return entries
|
||||
|
||||
def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
||||
if not policies:
|
||||
policies = (
|
||||
{
|
||||
"bucket": "*",
|
||||
"actions": ["list", "read", "write", "delete", "share", "policy"],
|
||||
},
|
||||
)
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
for policy in policies:
|
||||
bucket = str(policy.get("bucket", "*")).lower()
|
||||
raw_actions = policy.get("actions", [])
|
||||
if isinstance(raw_actions, str):
|
||||
raw_actions = [raw_actions]
|
||||
action_set: Set[str] = set()
|
||||
for action in raw_actions:
|
||||
canonical = self._normalize_action(action)
|
||||
if canonical == "*":
|
||||
action_set = set(ALLOWED_ACTIONS)
|
||||
break
|
||||
if canonical:
|
||||
action_set.add(canonical)
|
||||
if not action_set:
|
||||
continue
|
||||
sanitized.append({"bucket": bucket, "actions": sorted(action_set)})
|
||||
if not sanitized:
|
||||
raise IamError("At least one policy with valid actions is required")
|
||||
return sanitized
|
||||
|
||||
def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal:
|
||||
return Principal(
|
||||
access_key=access_key,
|
||||
display_name=record["display_name"],
|
||||
policies=record["policies"],
|
||||
)
|
||||
|
||||
def _normalize_action(self, action: str) -> str:
|
||||
if not action:
|
||||
return ""
|
||||
lowered = action.strip().lower()
|
||||
if lowered == "*":
|
||||
return "*"
|
||||
candidate = ACTION_ALIASES.get(lowered, lowered)
|
||||
return candidate if candidate in ALLOWED_ACTIONS else ""
|
||||
|
||||
def _write_default(self) -> None:
|
||||
default = {
|
||||
"users": [
|
||||
{
|
||||
"access_key": "localadmin",
|
||||
"secret_key": "localadmin",
|
||||
"display_name": "Local Admin",
|
||||
"policies": [
|
||||
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
self.config_path.write_text(json.dumps(default, indent=2))
|
||||
|
||||
def _generate_access_key(self) -> str:
|
||||
return secrets.token_hex(8)
|
||||
|
||||
def _generate_secret_key(self) -> str:
|
||||
return secrets.token_urlsafe(24)
|
||||
|
||||
def _get_raw_user(self, access_key: str) -> Dict[str, Any]:
|
||||
for user in self._raw_config.get("users", []):
|
||||
if user["access_key"] == access_key:
|
||||
return user
|
||||
raise IamError("User not found")
|
||||
|
||||
def get_secret_key(self, access_key: str) -> str | None:
|
||||
record = self._users.get(access_key)
|
||||
return record["secret_key"] if record else None
|
||||
|
||||
def get_principal(self, access_key: str) -> Principal | None:
|
||||
record = self._users.get(access_key)
|
||||
return self._build_principal(access_key, record) if record else None
|
||||
121
app/replication.py
Normal file
121
app/replication.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Background replication worker."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from .connections import ConnectionStore, RemoteConnection
|
||||
from .storage import ObjectStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationRule:
|
||||
bucket_name: str
|
||||
target_connection_id: str
|
||||
target_bucket: str
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class ReplicationManager:
|
||||
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None:
|
||||
self.storage = storage
|
||||
self.connections = connections
|
||||
self.rules_path = rules_path
|
||||
self._rules: Dict[str, ReplicationRule] = {}
|
||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||
self.reload_rules()
|
||||
|
||||
def reload_rules(self) -> None:
|
||||
if not self.rules_path.exists():
|
||||
self._rules = {}
|
||||
return
|
||||
try:
|
||||
import json
|
||||
with open(self.rules_path, "r") as f:
|
||||
data = json.load(f)
|
||||
for bucket, rule_data in data.items():
|
||||
self._rules[bucket] = ReplicationRule(**rule_data)
|
||||
except (OSError, ValueError) as e:
|
||||
logger.error(f"Failed to load replication rules: {e}")
|
||||
|
||||
def save_rules(self) -> None:
|
||||
import json
|
||||
data = {b: rule.__dict__ for b, rule in self._rules.items()}
|
||||
self.rules_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.rules_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
|
||||
return self._rules.get(bucket_name)
|
||||
|
||||
def set_rule(self, rule: ReplicationRule) -> None:
|
||||
self._rules[rule.bucket_name] = rule
|
||||
self.save_rules()
|
||||
|
||||
def delete_rule(self, bucket_name: str) -> None:
|
||||
if bucket_name in self._rules:
|
||||
del self._rules[bucket_name]
|
||||
self.save_rules()
|
||||
|
||||
def trigger_replication(self, bucket_name: str, object_key: str) -> None:
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
|
||||
return
|
||||
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection)
|
||||
|
||||
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection) -> None:
|
||||
try:
|
||||
# 1. Get local file path
|
||||
# Note: We are accessing internal storage structure here.
|
||||
# Ideally storage.py should expose a 'get_file_path' or we read the stream.
|
||||
# For efficiency, we'll try to read the file directly if we can, or use storage.get_object
|
||||
|
||||
# Using boto3 to upload
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=conn.endpoint_url,
|
||||
aws_access_key_id=conn.access_key,
|
||||
aws_secret_access_key=conn.secret_key,
|
||||
region_name=conn.region,
|
||||
)
|
||||
|
||||
# We need the file content.
|
||||
# Since ObjectStorage is filesystem based, let's get the stream.
|
||||
# We need to be careful about closing it.
|
||||
meta = self.storage.get_object_meta(bucket_name, object_key)
|
||||
if not meta:
|
||||
return
|
||||
|
||||
with self.storage.open_object(bucket_name, object_key) as f:
|
||||
extra_args = {}
|
||||
if meta.metadata:
|
||||
extra_args["Metadata"] = meta.metadata
|
||||
|
||||
s3.upload_fileobj(
|
||||
f,
|
||||
rule.target_bucket,
|
||||
object_key,
|
||||
ExtraArgs=extra_args
|
||||
)
|
||||
|
||||
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
|
||||
except (ClientError, OSError, ValueError) as e:
|
||||
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
|
||||
except Exception:
|
||||
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
|
||||
1245
app/s3_api.py
Normal file
1245
app/s3_api.py
Normal file
File diff suppressed because it is too large
Load Diff
37
app/secret_store.py
Normal file
37
app/secret_store.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Ephemeral store for one-time secrets communicated to the UI."""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class EphemeralSecretStore:
|
||||
"""Keeps values in-memory for a short period and returns them once."""
|
||||
|
||||
def __init__(self, default_ttl: int = 300) -> None:
|
||||
self._default_ttl = max(default_ttl, 1)
|
||||
self._store: Dict[str, tuple[Any, float]] = {}
|
||||
|
||||
def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str:
|
||||
token = secrets.token_urlsafe(16)
|
||||
expires_at = time.time() + (ttl or self._default_ttl)
|
||||
self._store[token] = (payload, expires_at)
|
||||
return token
|
||||
|
||||
def pop(self, token: str | None) -> Any | None:
|
||||
if not token:
|
||||
return None
|
||||
entry = self._store.pop(token, None)
|
||||
if not entry:
|
||||
return None
|
||||
payload, expires_at = entry
|
||||
if expires_at < time.time():
|
||||
return None
|
||||
return payload
|
||||
|
||||
def purge_expired(self) -> None:
|
||||
now = time.time()
|
||||
stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now]
|
||||
for token in stale:
|
||||
self._store.pop(token, None)
|
||||
935
app/storage.py
Normal file
935
app/storage.py
Normal file
@@ -0,0 +1,935 @@
|
||||
"""Filesystem-backed object storage helpers."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import stat
|
||||
import time
|
||||
import unicodedata
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, List, Optional
|
||||
|
||||
WINDOWS_RESERVED_NAMES = {
|
||||
"CON",
|
||||
"PRN",
|
||||
"AUX",
|
||||
"NUL",
|
||||
"COM1",
|
||||
"COM2",
|
||||
"COM3",
|
||||
"COM4",
|
||||
"COM5",
|
||||
"COM6",
|
||||
"COM7",
|
||||
"COM8",
|
||||
"COM9",
|
||||
"LPT1",
|
||||
"LPT2",
|
||||
"LPT3",
|
||||
"LPT4",
|
||||
"LPT5",
|
||||
"LPT6",
|
||||
"LPT7",
|
||||
"LPT8",
|
||||
"LPT9",
|
||||
}
|
||||
|
||||
|
||||
class StorageError(RuntimeError):
|
||||
"""Raised when the storage layer encounters an unrecoverable problem."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectMeta:
|
||||
key: str
|
||||
size: int
|
||||
last_modified: datetime
|
||||
etag: str
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BucketMeta:
|
||||
name: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def _utc_isoformat() -> str:
|
||||
return _utcnow().isoformat().replace("+00:00", "Z")
|
||||
|
||||
|
||||
class ObjectStorage:
|
||||
"""Very small filesystem wrapper implementing the bare S3 primitives."""
|
||||
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
SYSTEM_MULTIPART_DIR = "multipart"
|
||||
SYSTEM_TMP_DIR = "tmp"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
MULTIPART_MANIFEST = "manifest.json"
|
||||
BUCKET_CONFIG_FILE = ".bucket.json"
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self.root = Path(root)
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
self._ensure_system_roots()
|
||||
|
||||
# ---------------------- Bucket helpers ----------------------
|
||||
def list_buckets(self) -> List[BucketMeta]:
|
||||
buckets: List[BucketMeta] = []
|
||||
for bucket in sorted(self.root.iterdir()):
|
||||
if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT:
|
||||
stat = bucket.stat()
|
||||
buckets.append(
|
||||
BucketMeta(
|
||||
name=bucket.name,
|
||||
created_at=datetime.fromtimestamp(stat.st_ctime),
|
||||
)
|
||||
)
|
||||
return buckets
|
||||
|
||||
def bucket_exists(self, bucket_name: str) -> bool:
|
||||
return self._bucket_path(bucket_name).exists()
|
||||
|
||||
def _validate_bucket_name(self, bucket_name: str) -> None:
|
||||
if len(bucket_name) < 3 or len(bucket_name) > 63:
|
||||
raise StorageError("Bucket name must be between 3 and 63 characters")
|
||||
if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
|
||||
raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number")
|
||||
if ".." in bucket_name:
|
||||
raise StorageError("Bucket name must not contain consecutive periods")
|
||||
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name):
|
||||
raise StorageError("Bucket name must not be formatted as an IP address")
|
||||
|
||||
def create_bucket(self, bucket_name: str) -> None:
|
||||
self._validate_bucket_name(bucket_name)
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
bucket_path.mkdir(parents=True, exist_ok=False)
|
||||
self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def bucket_stats(self, bucket_name: str) -> dict[str, int]:
|
||||
"""Return object count and total size for the bucket without hashing files."""
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
object_count = 0
|
||||
total_bytes = 0
|
||||
for path in bucket_path.rglob("*"):
|
||||
if path.is_file():
|
||||
rel = path.relative_to(bucket_path)
|
||||
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
stat = path.stat()
|
||||
object_count += 1
|
||||
total_bytes += stat.st_size
|
||||
return {"objects": object_count, "bytes": total_bytes}
|
||||
|
||||
def delete_bucket(self, bucket_name: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
if self._has_visible_objects(bucket_path):
|
||||
raise StorageError("Bucket not empty")
|
||||
if self._has_archived_versions(bucket_path):
|
||||
raise StorageError("Bucket contains archived object versions")
|
||||
if self._has_active_multipart_uploads(bucket_path):
|
||||
raise StorageError("Bucket has active multipart uploads")
|
||||
self._remove_tree(bucket_path)
|
||||
self._remove_tree(self._system_bucket_root(bucket_path.name))
|
||||
self._remove_tree(self._multipart_bucket_root(bucket_path.name))
|
||||
|
||||
# ---------------------- Object helpers ----------------------
|
||||
def list_objects(self, bucket_name: str) -> List[ObjectMeta]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
|
||||
objects: List[ObjectMeta] = []
|
||||
for path in bucket_path.rglob("*"):
|
||||
if path.is_file():
|
||||
stat = path.stat()
|
||||
rel = path.relative_to(bucket_path)
|
||||
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
metadata = self._read_metadata(bucket_id, rel)
|
||||
objects.append(
|
||||
ObjectMeta(
|
||||
key=str(rel.as_posix()),
|
||||
size=stat.st_size,
|
||||
last_modified=datetime.fromtimestamp(stat.st_mtime),
|
||||
etag=self._compute_etag(path),
|
||||
metadata=metadata or None,
|
||||
)
|
||||
)
|
||||
objects.sort(key=lambda meta: meta.key)
|
||||
return objects
|
||||
|
||||
def put_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
stream: BinaryIO,
|
||||
*,
|
||||
metadata: Optional[Dict[str, str]] = None,
|
||||
) -> ObjectMeta:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
destination = bucket_path / safe_key
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||
|
||||
checksum = hashlib.md5()
|
||||
with destination.open("wb") as target:
|
||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||
|
||||
stat = destination.stat()
|
||||
if metadata:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
return ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
size=stat.st_size,
|
||||
last_modified=datetime.fromtimestamp(stat.st_mtime),
|
||||
etag=checksum.hexdigest(),
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
|
||||
path = self._object_path(bucket_name, object_key)
|
||||
if not path.exists():
|
||||
raise StorageError("Object not found")
|
||||
return path
|
||||
|
||||
def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
return {}
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
return self._read_metadata(bucket_path.name, safe_key) or {}
|
||||
|
||||
def delete_object(self, bucket_name: str, object_key: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
path = self._object_path(bucket_name, object_key)
|
||||
if not path.exists():
|
||||
return
|
||||
safe_key = path.relative_to(bucket_path)
|
||||
bucket_id = bucket_path.name
|
||||
if self._is_versioning_enabled(bucket_path):
|
||||
self._archive_current_version(bucket_id, safe_key, reason="delete")
|
||||
rel = path.relative_to(bucket_path)
|
||||
self._safe_unlink(path)
|
||||
self._delete_metadata(bucket_id, rel)
|
||||
# Clean up now empty parents inside the bucket.
|
||||
for parent in path.parents:
|
||||
if parent == bucket_path:
|
||||
break
|
||||
if parent.exists() and not any(parent.iterdir()):
|
||||
parent.rmdir()
|
||||
|
||||
def purge_object(self, bucket_name: str, object_key: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
target = self._object_path(bucket_name, object_key)
|
||||
bucket_id = bucket_path.name
|
||||
if target.exists():
|
||||
rel = target.relative_to(bucket_path)
|
||||
self._safe_unlink(target)
|
||||
self._delete_metadata(bucket_id, rel)
|
||||
else:
|
||||
rel = self._sanitize_object_key(object_key)
|
||||
self._delete_metadata(bucket_id, rel)
|
||||
version_dir = self._version_dir(bucket_id, rel)
|
||||
if version_dir.exists():
|
||||
shutil.rmtree(version_dir, ignore_errors=True)
|
||||
legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
|
||||
if legacy_version_dir.exists():
|
||||
shutil.rmtree(legacy_version_dir, ignore_errors=True)
|
||||
for parent in target.parents:
|
||||
if parent == bucket_path:
|
||||
break
|
||||
if parent.exists() and not any(parent.iterdir()):
|
||||
parent.rmdir()
|
||||
|
||||
# ---------------------- Versioning helpers ----------------------
|
||||
def is_versioning_enabled(self, bucket_name: str) -> bool:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
return self._is_versioning_enabled(bucket_path)
|
||||
|
||||
def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
config = self._read_bucket_config(bucket_path.name)
|
||||
config["versioning_enabled"] = bool(enabled)
|
||||
self._write_bucket_config(bucket_path.name, config)
|
||||
|
||||
# ---------------------- Bucket configuration helpers ----------------------
|
||||
def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
config = self._read_bucket_config(bucket_path.name)
|
||||
raw_tags = config.get("tags")
|
||||
if not isinstance(raw_tags, list):
|
||||
return []
|
||||
tags: List[Dict[str, str]] = []
|
||||
for entry in raw_tags:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
key = str(entry.get("Key", "")).strip()
|
||||
if not key:
|
||||
continue
|
||||
value = str(entry.get("Value", ""))
|
||||
tags.append({"Key": key, "Value": value})
|
||||
return tags
|
||||
|
||||
def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
if not tags:
|
||||
self._set_bucket_config_entry(bucket_path.name, "tags", None)
|
||||
return
|
||||
clean: List[Dict[str, str]] = []
|
||||
for entry in tags:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
key = str(entry.get("Key", "")).strip()
|
||||
if not key:
|
||||
continue
|
||||
clean.append({"Key": key, "Value": str(entry.get("Value", ""))})
|
||||
self._set_bucket_config_entry(bucket_path.name, "tags", clean or None)
|
||||
|
||||
def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
config = self._read_bucket_config(bucket_path.name)
|
||||
cors_rules = config.get("cors")
|
||||
return cors_rules if isinstance(cors_rules, list) else []
|
||||
|
||||
def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
self._set_bucket_config_entry(bucket_path.name, "cors", rules or None)
|
||||
|
||||
def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
config = self._read_bucket_config(bucket_path.name)
|
||||
payload = config.get("encryption")
|
||||
return payload if isinstance(payload, dict) else {}
|
||||
|
||||
def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None:
|
||||
bucket_path = self._require_bucket_path(bucket_name)
|
||||
self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None)
|
||||
|
||||
def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
version_dir = self._legacy_version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
version_dir = self._legacy_version_dir(bucket_id, safe_key)
|
||||
if not version_dir.exists():
|
||||
return []
|
||||
versions: List[Dict[str, Any]] = []
|
||||
for meta_file in version_dir.glob("*.json"):
|
||||
try:
|
||||
payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
payload.setdefault("version_id", meta_file.stem)
|
||||
versions.append(payload)
|
||||
versions.sort(key=lambda item: item.get("archived_at", ""), reverse=True)
|
||||
return versions
|
||||
|
||||
def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
version_dir = self._version_dir(bucket_id, safe_key)
|
||||
data_path = version_dir / f"{version_id}.bin"
|
||||
meta_path = version_dir / f"{version_id}.json"
|
||||
if not data_path.exists() or not meta_path.exists():
|
||||
raise StorageError("Version not found")
|
||||
try:
|
||||
payload = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
payload = {}
|
||||
metadata = payload.get("metadata") if isinstance(payload, dict) else {}
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
destination = bucket_path / safe_key
|
||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
||||
self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(data_path, destination)
|
||||
if metadata:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
stat = destination.stat()
|
||||
return ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
size=stat.st_size,
|
||||
last_modified=datetime.fromtimestamp(stat.st_mtime),
|
||||
etag=self._compute_etag(destination),
|
||||
metadata=metadata or None,
|
||||
)
|
||||
|
||||
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
|
||||
if not any(root.exists() for root in version_roots):
|
||||
return []
|
||||
aggregated: Dict[str, Dict[str, Any]] = {}
|
||||
skipped: set[str] = set()
|
||||
for version_root in version_roots:
|
||||
if not version_root.exists():
|
||||
continue
|
||||
for meta_file in version_root.glob("**/*.json"):
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
rel = meta_file.parent.relative_to(version_root)
|
||||
rel_key = rel.as_posix()
|
||||
if rel_key in skipped:
|
||||
continue
|
||||
object_path = bucket_path / rel
|
||||
if object_path.exists():
|
||||
skipped.add(rel_key)
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
payload = {}
|
||||
version_id = payload.get("version_id") or meta_file.stem
|
||||
archived_at = payload.get("archived_at") or ""
|
||||
size = int(payload.get("size") or 0)
|
||||
reason = payload.get("reason") or "update"
|
||||
record = aggregated.setdefault(
|
||||
rel_key,
|
||||
{
|
||||
"key": rel_key,
|
||||
"versions": 0,
|
||||
"total_size": 0,
|
||||
"latest": None,
|
||||
"_latest_sort": None,
|
||||
},
|
||||
)
|
||||
record["versions"] += 1
|
||||
record["total_size"] += size
|
||||
candidate = {
|
||||
"version_id": version_id,
|
||||
"archived_at": archived_at,
|
||||
"size": size,
|
||||
"reason": reason,
|
||||
}
|
||||
sort_key = (
|
||||
archived_at,
|
||||
meta_file.stat().st_mtime,
|
||||
)
|
||||
current_sort = record.get("_latest_sort")
|
||||
if current_sort is None or sort_key > current_sort:
|
||||
record["_latest_sort"] = sort_key
|
||||
record["latest"] = candidate
|
||||
for record in aggregated.values():
|
||||
record.pop("_latest_sort", None)
|
||||
return sorted(aggregated.values(), key=lambda item: item["key"])
|
||||
|
||||
# ---------------------- Multipart helpers ----------------------
|
||||
def initiate_multipart_upload(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
*,
|
||||
metadata: Optional[Dict[str, str]] = None,
|
||||
) -> str:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
bucket_id = bucket_path.name
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
upload_id = uuid.uuid4().hex
|
||||
upload_root = self._multipart_dir(bucket_id, upload_id)
|
||||
upload_root.mkdir(parents=True, exist_ok=False)
|
||||
manifest = {
|
||||
"upload_id": upload_id,
|
||||
"object_key": safe_key.as_posix(),
|
||||
"metadata": self._normalize_metadata(metadata),
|
||||
"parts": {},
|
||||
"created_at": _utc_isoformat(),
|
||||
}
|
||||
self._write_multipart_manifest(upload_root, manifest)
|
||||
return upload_id
|
||||
|
||||
def upload_multipart_part(
|
||||
self,
|
||||
bucket_name: str,
|
||||
upload_id: str,
|
||||
part_number: int,
|
||||
stream: BinaryIO,
|
||||
) -> str:
|
||||
if part_number < 1:
|
||||
raise StorageError("part_number must be >= 1")
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id)
|
||||
checksum = hashlib.md5()
|
||||
part_filename = f"part-{part_number:05d}.part"
|
||||
part_path = upload_root / part_filename
|
||||
with part_path.open("wb") as target:
|
||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||
record = {
|
||||
"etag": checksum.hexdigest(),
|
||||
"size": part_path.stat().st_size,
|
||||
"filename": part_filename,
|
||||
}
|
||||
parts = manifest.setdefault("parts", {})
|
||||
parts[str(part_number)] = record
|
||||
self._write_multipart_manifest(upload_root, manifest)
|
||||
return record["etag"]
|
||||
|
||||
def complete_multipart_upload(
|
||||
self,
|
||||
bucket_name: str,
|
||||
upload_id: str,
|
||||
ordered_parts: List[Dict[str, Any]],
|
||||
) -> ObjectMeta:
|
||||
if not ordered_parts:
|
||||
raise StorageError("parts list required")
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
bucket_id = bucket_path.name
|
||||
manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id)
|
||||
parts_map = manifest.get("parts") or {}
|
||||
if not parts_map:
|
||||
raise StorageError("No uploaded parts found")
|
||||
validated: List[tuple[int, Dict[str, Any]]] = []
|
||||
for part in ordered_parts:
|
||||
raw_number = part.get("part_number")
|
||||
if raw_number is None:
|
||||
raw_number = part.get("PartNumber")
|
||||
try:
|
||||
number = int(raw_number)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise StorageError("Each part must include part_number") from exc
|
||||
if number < 1:
|
||||
raise StorageError("part numbers must be >= 1")
|
||||
key = str(number)
|
||||
record = parts_map.get(key)
|
||||
if not record:
|
||||
raise StorageError(f"Part {number} missing from upload")
|
||||
raw_etag = part.get("etag", part.get("ETag", ""))
|
||||
supplied_etag = str(raw_etag).strip() or record.get("etag")
|
||||
if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
|
||||
raise StorageError(f"ETag mismatch for part {number}")
|
||||
validated.append((number, record))
|
||||
validated.sort(key=lambda entry: entry[0])
|
||||
|
||||
safe_key = self._sanitize_object_key(manifest["object_key"])
|
||||
destination = bucket_path / safe_key
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||
checksum = hashlib.md5()
|
||||
with destination.open("wb") as target:
|
||||
for _, record in validated:
|
||||
part_path = upload_root / record["filename"]
|
||||
if not part_path.exists():
|
||||
raise StorageError(f"Missing part file {record['filename']}")
|
||||
with part_path.open("rb") as chunk:
|
||||
while True:
|
||||
data = chunk.read(1024 * 1024)
|
||||
if not data:
|
||||
break
|
||||
checksum.update(data)
|
||||
target.write(data)
|
||||
|
||||
metadata = manifest.get("metadata")
|
||||
if metadata:
|
||||
self._write_metadata(bucket_id, safe_key, metadata)
|
||||
else:
|
||||
self._delete_metadata(bucket_id, safe_key)
|
||||
|
||||
shutil.rmtree(upload_root, ignore_errors=True)
|
||||
stat = destination.stat()
|
||||
return ObjectMeta(
|
||||
key=safe_key.as_posix(),
|
||||
size=stat.st_size,
|
||||
last_modified=datetime.fromtimestamp(stat.st_mtime),
|
||||
etag=checksum.hexdigest(),
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
upload_root = self._multipart_dir(bucket_path.name, upload_id)
|
||||
if upload_root.exists():
|
||||
shutil.rmtree(upload_root, ignore_errors=True)
|
||||
return
|
||||
legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
|
||||
if legacy_root.exists():
|
||||
shutil.rmtree(legacy_root, ignore_errors=True)
|
||||
|
||||
# ---------------------- internal helpers ----------------------
|
||||
def _bucket_path(self, bucket_name: str) -> Path:
|
||||
safe_name = self._sanitize_bucket_name(bucket_name)
|
||||
return self.root / safe_name
|
||||
|
||||
def _require_bucket_path(self, bucket_name: str) -> Path:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
if not bucket_path.exists():
|
||||
raise StorageError("Bucket does not exist")
|
||||
return bucket_path
|
||||
|
||||
def _object_path(self, bucket_name: str, object_key: str) -> Path:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
safe_key = self._sanitize_object_key(object_key)
|
||||
return bucket_path / safe_key
|
||||
|
||||
def _system_root_path(self) -> Path:
|
||||
return self.root / self.SYSTEM_ROOT
|
||||
|
||||
def _system_buckets_root(self) -> Path:
|
||||
return self._system_root_path() / self.SYSTEM_BUCKETS_DIR
|
||||
|
||||
def _system_bucket_root(self, bucket_name: str) -> Path:
|
||||
return self._system_buckets_root() / bucket_name
|
||||
|
||||
def _bucket_meta_root(self, bucket_name: str) -> Path:
|
||||
return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR
|
||||
|
||||
def _bucket_versions_root(self, bucket_name: str) -> Path:
|
||||
return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR
|
||||
|
||||
def _multipart_root(self) -> Path:
|
||||
return self._system_root_path() / self.SYSTEM_MULTIPART_DIR
|
||||
|
||||
def _multipart_bucket_root(self, bucket_name: str) -> Path:
|
||||
return self._multipart_root() / bucket_name
|
||||
|
||||
def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path:
|
||||
meta_root = self._legacy_meta_root(bucket_name)
|
||||
meta_rel = Path(key.as_posix() + ".meta.json")
|
||||
return meta_root / meta_rel
|
||||
|
||||
def _legacy_meta_root(self, bucket_name: str) -> Path:
|
||||
return self._bucket_path(bucket_name) / ".meta"
|
||||
|
||||
def _legacy_versions_root(self, bucket_name: str) -> Path:
|
||||
return self._bucket_path(bucket_name) / ".versions"
|
||||
|
||||
def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path:
|
||||
return self._legacy_versions_root(bucket_name) / key
|
||||
|
||||
def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path:
|
||||
return self._bucket_path(bucket_name) / ".multipart"
|
||||
|
||||
def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
|
||||
return self._legacy_multipart_bucket_root(bucket_name) / upload_id
|
||||
|
||||
def _ensure_system_roots(self) -> None:
|
||||
for path in (
|
||||
self._system_root_path(),
|
||||
self._system_buckets_root(),
|
||||
self._multipart_root(),
|
||||
self._system_root_path() / self.SYSTEM_TMP_DIR,
|
||||
):
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
|
||||
return self._multipart_bucket_root(bucket_name) / upload_id
|
||||
|
||||
def _version_dir(self, bucket_name: str, key: Path) -> Path:
|
||||
return self._bucket_versions_root(bucket_name) / key
|
||||
|
||||
def _bucket_config_path(self, bucket_name: str) -> Path:
|
||||
return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
|
||||
|
||||
def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
|
||||
config_path = self._bucket_config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return {}
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
return data if isinstance(data, dict) else {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
|
||||
config_path = self._bucket_config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
|
||||
config = self._read_bucket_config(bucket_name)
|
||||
if value is None:
|
||||
config.pop(key, None)
|
||||
else:
|
||||
config[key] = value
|
||||
self._write_bucket_config(bucket_name, config)
|
||||
|
||||
def _is_versioning_enabled(self, bucket_path: Path) -> bool:
|
||||
config = self._read_bucket_config(bucket_path.name)
|
||||
return bool(config.get("versioning_enabled"))
|
||||
|
||||
def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]:
|
||||
upload_root = self._multipart_dir(bucket_name, upload_id)
|
||||
if not upload_root.exists():
|
||||
upload_root = self._legacy_multipart_dir(bucket_name, upload_id)
|
||||
manifest_path = upload_root / self.MULTIPART_MANIFEST
|
||||
if not manifest_path.exists():
|
||||
raise StorageError("Multipart upload not found")
|
||||
try:
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
raise StorageError("Multipart manifest unreadable") from exc
|
||||
return manifest, upload_root
|
||||
|
||||
def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
|
||||
manifest_path = upload_root / self.MULTIPART_MANIFEST
|
||||
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
|
||||
|
||||
def _metadata_file(self, bucket_name: str, key: Path) -> Path:
|
||||
meta_root = self._bucket_meta_root(bucket_name)
|
||||
meta_rel = Path(key.as_posix() + ".meta.json")
|
||||
return meta_root / meta_rel
|
||||
|
||||
def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
|
||||
if not metadata:
|
||||
return None
|
||||
clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()}
|
||||
return clean or None
|
||||
|
||||
def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None:
|
||||
clean = self._normalize_metadata(metadata)
|
||||
if not clean:
|
||||
self._delete_metadata(bucket_name, key)
|
||||
return
|
||||
meta_file = self._metadata_file(bucket_name, key)
|
||||
meta_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
meta_file.write_text(json.dumps({"metadata": clean}), encoding="utf-8")
|
||||
|
||||
def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
|
||||
bucket_path = self._bucket_path(bucket_name)
|
||||
source = bucket_path / key
|
||||
if not source.exists():
|
||||
return
|
||||
version_dir = self._version_dir(bucket_name, key)
|
||||
version_dir.mkdir(parents=True, exist_ok=True)
|
||||
now = _utcnow()
|
||||
version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}"
|
||||
data_path = version_dir / f"{version_id}.bin"
|
||||
shutil.copy2(source, data_path)
|
||||
metadata = self._read_metadata(bucket_name, key)
|
||||
record = {
|
||||
"version_id": version_id,
|
||||
"key": key.as_posix(),
|
||||
"size": source.stat().st_size,
|
||||
"archived_at": now.isoformat().replace("+00:00", "Z"),
|
||||
"etag": self._compute_etag(source),
|
||||
"metadata": metadata or {},
|
||||
"reason": reason,
|
||||
}
|
||||
manifest_path = version_dir / f"{version_id}.json"
|
||||
manifest_path.write_text(json.dumps(record), encoding="utf-8")
|
||||
|
||||
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
|
||||
for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
|
||||
if not meta_file.exists():
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
data = payload.get("metadata")
|
||||
return data if isinstance(data, dict) else {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
def _safe_unlink(self, path: Path) -> None:
|
||||
attempts = 3
|
||||
last_error: PermissionError | None = None
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
path.unlink()
|
||||
return
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except PermissionError as exc:
|
||||
last_error = exc
|
||||
if os.name == "nt":
|
||||
time.sleep(0.15 * (attempt + 1))
|
||||
except OSError as exc:
|
||||
raise StorageError(f"Unable to delete object: {exc}") from exc
|
||||
message = "Object file is currently in use. Close active previews or wait and try again."
|
||||
raise StorageError(message) from last_error
|
||||
|
||||
def _delete_metadata(self, bucket_name: str, key: Path) -> None:
|
||||
locations = (
|
||||
(self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
|
||||
(self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
|
||||
)
|
||||
for meta_file, meta_root in locations:
|
||||
try:
|
||||
if meta_file.exists():
|
||||
meta_file.unlink()
|
||||
parent = meta_file.parent
|
||||
while parent != meta_root and parent.exists() and not any(parent.iterdir()):
|
||||
parent.rmdir()
|
||||
parent = parent.parent
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
def _has_visible_objects(self, bucket_path: Path) -> bool:
|
||||
for path in bucket_path.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
rel = path.relative_to(bucket_path)
|
||||
if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
def _has_archived_versions(self, bucket_path: Path) -> bool:
|
||||
for version_root in (
|
||||
self._bucket_versions_root(bucket_path.name),
|
||||
self._legacy_versions_root(bucket_path.name),
|
||||
):
|
||||
if version_root.exists() and any(path.is_file() for path in version_root.rglob("*")):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _has_active_multipart_uploads(self, bucket_path: Path) -> bool:
|
||||
for uploads_root in (
|
||||
self._multipart_bucket_root(bucket_path.name),
|
||||
self._legacy_multipart_bucket_root(bucket_path.name),
|
||||
):
|
||||
if uploads_root.exists() and any(path.is_file() for path in uploads_root.rglob("*")):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _remove_tree(self, path: Path) -> None:
|
||||
if not path.exists():
|
||||
return
|
||||
def _handle_error(func, target_path, exc_info):
|
||||
try:
|
||||
os.chmod(target_path, stat.S_IRWXU)
|
||||
func(target_path)
|
||||
except Exception as exc: # pragma: no cover - fallback failure
|
||||
raise StorageError(f"Unable to delete bucket contents: {exc}") from exc
|
||||
|
||||
try:
|
||||
shutil.rmtree(path, onerror=_handle_error)
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except PermissionError as exc:
|
||||
raise StorageError("Bucket in use. Close open files and try again") from exc
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_bucket_name(bucket_name: str) -> str:
|
||||
if not bucket_name:
|
||||
raise StorageError("Bucket name required")
|
||||
|
||||
name = bucket_name.lower()
|
||||
if len(name) < 3 or len(name) > 63:
|
||||
raise StorageError("Bucket name must be between 3 and 63 characters")
|
||||
|
||||
if name.startswith("-") or name.endswith("-"):
|
||||
raise StorageError("Bucket name cannot start or end with a hyphen")
|
||||
|
||||
if ".." in name:
|
||||
raise StorageError("Bucket name cannot contain consecutive periods")
|
||||
|
||||
if name.startswith("xn--"):
|
||||
raise StorageError("Bucket name cannot start with 'xn--'")
|
||||
|
||||
if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name):
|
||||
raise StorageError("Bucket name cannot be formatted like an IP address")
|
||||
|
||||
if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name):
|
||||
raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens")
|
||||
|
||||
return name
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_object_key(object_key: str) -> Path:
|
||||
if not object_key:
|
||||
raise StorageError("Object key required")
|
||||
if len(object_key.encode("utf-8")) > 1024:
|
||||
raise StorageError("Object key exceeds maximum length of 1024 bytes")
|
||||
if "\x00" in object_key:
|
||||
raise StorageError("Object key contains null bytes")
|
||||
if object_key.startswith(("/", "\\")):
|
||||
raise StorageError("Object key cannot start with a slash")
|
||||
normalized = unicodedata.normalize("NFC", object_key)
|
||||
if normalized != object_key:
|
||||
raise StorageError("Object key must use normalized Unicode")
|
||||
candidate = Path(normalized)
|
||||
if candidate.is_absolute():
|
||||
raise StorageError("Absolute object keys are not allowed")
|
||||
if getattr(candidate, "drive", ""):
|
||||
raise StorageError("Object key cannot include a drive letter")
|
||||
parts = []
|
||||
for part in candidate.parts:
|
||||
if part in ("", ".", ".."):
|
||||
raise StorageError("Object key contains invalid segments")
|
||||
if any(ord(ch) < 32 for ch in part):
|
||||
raise StorageError("Object key contains control characters")
|
||||
if os.name == "nt":
|
||||
if any(ch in part for ch in "<>:\"/\\|?*"):
|
||||
raise StorageError("Object key contains characters not supported on Windows filesystems")
|
||||
if part.endswith((" ", ".")):
|
||||
raise StorageError("Object key segments cannot end with spaces or periods on Windows")
|
||||
trimmed = part.upper().rstrip(". ")
|
||||
if trimmed in WINDOWS_RESERVED_NAMES:
|
||||
raise StorageError(f"Invalid filename segment: {part}")
|
||||
parts.append(part)
|
||||
if parts:
|
||||
top_level = parts[0]
|
||||
if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT:
|
||||
raise StorageError("Object key uses a reserved prefix")
|
||||
return Path(*parts)
|
||||
|
||||
@staticmethod
|
||||
def _compute_etag(path: Path) -> str:
|
||||
checksum = hashlib.md5()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(8192), b""):
|
||||
checksum.update(chunk)
|
||||
return checksum.hexdigest()
|
||||
|
||||
|
||||
class _HashingReader:
|
||||
"""Wraps a binary stream, updating the checksum as it is read."""
|
||||
|
||||
def __init__(self, stream: BinaryIO, checksum: Any) -> None:
|
||||
self.stream = stream
|
||||
self.checksum = checksum
|
||||
|
||||
def read(self, size: int = -1) -> bytes:
|
||||
data = self.stream.read(size)
|
||||
if data:
|
||||
self.checksum.update(data)
|
||||
return data
|
||||
9
app/version.py
Normal file
9
app/version.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Central location for the application version string."""
|
||||
from __future__ import annotations
|
||||
|
||||
APP_VERSION = "0.1.0"
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
"""Return the current application version."""
|
||||
return APP_VERSION
|
||||
Reference in New Issue
Block a user