commit f400cedf023bb9e8dd4c55c669eea6d40d7f94bc Author: kqjy Date: Fri Nov 21 22:01:34 2025 +0800 Release v0.1.0 Beta diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a8f2958 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +# Bytecode and interpreter cruft +__pycache__/ +*.py[cod] +*$py.class + +# Virtual environments and local tooling +.venv/ +venv/ +.env +.env.* +.python-version + +# Test, coverage, and type-check caches +.pytest_cache/ +.coverage +.coverage.* +htmlcov/ +.mypy_cache/ +.dmypy.json +.pytype/ +.cache/ + +# Build / packaging outputs +build/ +dist/ +*.egg-info/ +.eggs/ + +# Local runtime artifacts +logs/ +*.log +tmp/ +tmp-storage/ +pytestdebug.log + +# Bucket/object data produced at runtime (buckets + system metadata) +/data/ +/tmp-storage/ + +# OS/editor noise +.DS_Store +Thumbs.db +.idea/ +.vscode/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6c0c97e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +# syntax=docker/dockerfile:1.7 +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Install build deps for any wheels that need compilation, then clean up +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +# Drop privileges +RUN useradd -m -u 1000 myfsio \ + && chown -R myfsio:myfsio /app +USER myfsio + +EXPOSE 5000 5100 +ENV APP_HOST=0.0.0.0 \ + FLASK_ENV=production \ + FLASK_DEBUG=0 + +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD python -c "import requests; requests.get('http://localhost:5000/healthz', timeout=2)" + +CMD ["python", "run.py", "--mode", "both"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..93e077f --- /dev/null +++ b/README.md @@ -0,0 +1,117 @@ +# MyFSIO (Flask S3 + IAM) + +MyFSIO is a batteries-included, Flask-based recreation of Amazon S3 and IAM workflows built for local development. The design mirrors the [AWS S3 documentation](https://docs.aws.amazon.com/s3/) wherever practical: bucket naming, Signature Version 4 presigning, Version 2012-10-17 bucket policies, IAM-style users, and familiar REST endpoints. + +## Why MyFSIO? + +- **Dual servers:** Run both the API (port 5000) and UI (port 5100) with a single command: `python run.py`. +- **IAM + access keys:** Users, access keys, key rotation, and bucket-scoped actions (`list/read/write/delete/policy`) now live in `data/.myfsio.sys/config/iam.json` and are editable from the IAM dashboard. +- **Bucket policies + hot reload:** `data/.myfsio.sys/config/bucket_policies.json` uses AWS' policy grammar (Version `2012-10-17`) with a built-in watcher, so editing the JSON file applies immediately. The UI also ships Public/Private/Custom presets for faster edits. +- **Presigned URLs everywhere:** Signature Version 4 presigned URLs respect IAM + bucket policies and replace the now-removed "share link" feature for public access scenarios. +- **Modern UI:** Responsive tables, quick filters, preview sidebar, object-level delete buttons, a presign modal, and an inline JSON policy editor that respects dark mode keep bucket management friendly. +- **Tests & health:** `/healthz` for smoke checks and `pytest` coverage for IAM, CRUD, presign, and policy flows. + +## Architecture at a Glance + +``` ++-----------------+ +----------------+ +| API Server |<----->| Object storage | +| (port 5000) | | (filesystem) | +| - S3 routes | +----------------+ +| - Presigned URLs | +| - Bucket policy | ++-----------------+ + ^ + | ++-----------------+ +| UI Server | +| (port 5100) | +| - Auth console | +| - IAM dashboard| +| - Bucket editor| ++-----------------+ +``` + +Both apps load the same configuration via `AppConfig` so IAM data and bucket policies stay consistent no matter which process you run. +Bucket policies are automatically reloaded whenever `bucket_policies.json` changes—no restarts required. + +## Getting Started + +```bash +python -m venv .venv +. .venv/Scripts/activate # PowerShell: .\.venv\Scripts\Activate.ps1 +pip install -r requirements.txt + +# Run both API and UI (default) +python run.py + +# Or run individually: +# python run.py --mode api +# python run.py --mode ui +``` + +Visit `http://127.0.0.1:5100/ui` for the console and `http://127.0.0.1:5000/` for the raw API. Override ports/hosts with the environment variables listed below. + +## IAM, Access Keys, and Bucket Policies + +- First run creates `data/.myfsio.sys/config/iam.json` with `localadmin / localadmin` (full control). Sign in via the UI, then use the **IAM** tab to create users, rotate secrets, or edit inline policies without touching JSON by hand. +- Bucket policies live in `data/.myfsio.sys/config/bucket_policies.json` and follow the AWS `arn:aws:s3:::bucket/key` resource syntax with Version `2012-10-17`. Attach/replace/remove policies from the bucket detail page or edit the JSON by hand—changes hot reload automatically. +- IAM actions include extended verbs (`iam:list_users`, `iam:create_user`, `iam:update_policy`, etc.) so you can control who is allowed to manage other users and policies. + +### Bucket Policy Presets & Hot Reload + +- **Presets:** Every bucket detail view includes Public (read-only), Private (detach policy), and Custom presets. Public auto-populates a policy that grants anonymous `s3:ListBucket` + `s3:GetObject` access to the entire bucket. +- **Custom drafts:** Switching back to Custom restores your last manual edit so you can toggle between presets without losing work. +- **Hot reload:** The server watches `bucket_policies.json` and reloads statements on-the-fly—ideal for editing policies in your favorite editor while testing Via curl or the UI. + +## Presigned URLs + +Presigned URLs follow the AWS CLI playbook: + +- Call `POST /presign//` (or use the "Presign" button in the UI) to request a Signature Version 4 URL valid for 1 second to 7 days. +- The generated URL honors IAM permissions and bucket-policy decisions at generation-time and again when somebody fetches it. +- Because presigned URLs cover both authenticated and public sharing scenarios, the legacy "share link" feature has been removed. + +## Configuration + +| Variable | Default | Description | +| --- | --- | --- | +| `STORAGE_ROOT` | `/data` | Filesystem root for bucket directories | +| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size (bytes) | +| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint for listings | +| `SECRET_KEY` | `dev-secret-key` | Flask session secret for the UI | +| `IAM_CONFIG` | `/data/.myfsio.sys/config/iam.json` | IAM user + policy store | +| `BUCKET_POLICY_PATH` | `/data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store | +| `API_BASE_URL` | `http://127.0.0.1:5000` | Used by the UI when calling API endpoints (presign, bucket policy) | +| `AWS_REGION` | `us-east-1` | Region used in Signature V4 scope | +| `AWS_SERVICE` | `s3` | Service used in Signature V4 scope | + +> Buckets now live directly under `data/` while system metadata (versions, IAM, bucket policies, multipart uploads, etc.) lives in `data/.myfsio.sys`. Existing installs can keep their environment variables, but the defaults now match MinIO's `data/.system` pattern for easier bind-mounting. + +## API Cheatsheet (IAM headers required) + +``` +GET / -> List buckets (XML) +PUT / -> Create bucket +DELETE / -> Delete bucket (must be empty) +GET / -> List objects (XML) +PUT // -> Upload object (binary stream) +GET // -> Download object +DELETE // -> Delete object +POST /presign// -> Generate AWS SigV4 presigned URL (JSON) +GET /bucket-policy/ -> Fetch bucket policy (JSON) +PUT /bucket-policy/ -> Attach/replace bucket policy (JSON) +DELETE /bucket-policy/ -> Remove bucket policy +``` + +## Testing + +```bash +pytest -q +``` + +## References + +- [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/) +- [Signature Version 4 Signing Process](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) +- [Amazon S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html) diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..17da538 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,215 @@ +"""Application factory for the mini S3-compatible object store.""" +from __future__ import annotations + +import logging +import time +import uuid +from logging.handlers import RotatingFileHandler +from pathlib import Path +from datetime import timedelta +from typing import Any, Dict, Optional + +from flask import Flask, g, has_request_context, redirect, render_template, request, url_for +from flask_cors import CORS +from flask_wtf.csrf import CSRFError + +from .bucket_policies import BucketPolicyStore +from .config import AppConfig +from .connections import ConnectionStore +from .extensions import limiter, csrf +from .iam import IamService +from .replication import ReplicationManager +from .secret_store import EphemeralSecretStore +from .storage import ObjectStorage +from .version import get_version + + +def create_app( + test_config: Optional[Dict[str, Any]] = None, + *, + include_api: bool = True, + include_ui: bool = True, +) -> Flask: + """Create and configure the Flask application.""" + config = AppConfig.from_env(test_config) + + project_root = Path(__file__).resolve().parent.parent + app = Flask( + __name__, + static_folder=str(project_root / "static"), + template_folder=str(project_root / "templates"), + ) + app.config.update(config.to_flask_config()) + if test_config: + app.config.update(test_config) + app.config.setdefault("APP_VERSION", get_version()) + app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30))) + if app.config.get("TESTING"): + app.config.setdefault("WTF_CSRF_ENABLED", False) + + _configure_cors(app) + _configure_logging(app) + + limiter.init_app(app) + csrf.init_app(app) + + storage = ObjectStorage(Path(app.config["STORAGE_ROOT"])) + iam = IamService( + Path(app.config["IAM_CONFIG"]), + auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5), + auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15), + ) + bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) + secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) + + # Initialize Replication components + connections_path = Path(app.config["STORAGE_ROOT"]) / ".connections.json" + replication_rules_path = Path(app.config["STORAGE_ROOT"]) / ".replication_rules.json" + + connections = ConnectionStore(connections_path) + replication = ReplicationManager(storage, connections, replication_rules_path) + + app.extensions["object_storage"] = storage + app.extensions["iam"] = iam + app.extensions["bucket_policies"] = bucket_policies + app.extensions["secret_store"] = secret_store + app.extensions["limiter"] = limiter + app.extensions["connections"] = connections + app.extensions["replication"] = replication + + @app.errorhandler(500) + def internal_error(error): + return render_template('500.html'), 500 + + @app.errorhandler(CSRFError) + def handle_csrf_error(e): + return render_template('csrf_error.html', reason=e.description), 400 + + @app.template_filter("filesizeformat") + def filesizeformat(value: int) -> str: + """Format bytes as human-readable file size.""" + for unit in ["B", "KB", "MB", "GB", "TB", "PB"]: + if abs(value) < 1024.0 or unit == "PB": + if unit == "B": + return f"{int(value)} {unit}" + return f"{value:.1f} {unit}" + value /= 1024.0 + return f"{value:.1f} PB" + + if include_api: + from .s3_api import s3_api_bp + + app.register_blueprint(s3_api_bp) + csrf.exempt(s3_api_bp) + + if include_ui: + from .ui import ui_bp + + app.register_blueprint(ui_bp) + if not include_api: + @app.get("/") + def ui_root_redirect(): + return redirect(url_for("ui.buckets_overview")) + + @app.errorhandler(404) + def handle_not_found(error): + wants_html = request.accept_mimetypes.accept_html + path = request.path or "" + if include_ui and wants_html: + if not include_api or path.startswith("/ui") or path == "/": + return render_template("404.html"), 404 + return error + + @app.get("/healthz") + def healthcheck() -> Dict[str, str]: + return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")} + + return app + + +def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask: + return create_app(test_config, include_api=True, include_ui=False) + + +def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask: + return create_app(test_config, include_api=False, include_ui=True) + + +def _configure_cors(app: Flask) -> None: + origins = app.config.get("CORS_ORIGINS", ["*"]) + methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS"]) + allow_headers = app.config.get( + "CORS_ALLOW_HEADERS", + ["Content-Type", "X-Access-Key", "X-Secret-Key", "X-Amz-Date", "X-Amz-SignedHeaders"], + ) + CORS( + app, + resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers}}, + supports_credentials=True, + ) + + +class _RequestContextFilter(logging.Filter): + """Inject request-specific attributes into log records.""" + + def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - simple boilerplate + if has_request_context(): + record.request_id = getattr(g, "request_id", "-") + record.path = request.path + record.method = request.method + record.remote_addr = request.remote_addr or "-" + else: + record.request_id = getattr(record, "request_id", "-") + record.path = getattr(record, "path", "-") + record.method = getattr(record, "method", "-") + record.remote_addr = getattr(record, "remote_addr", "-") + return True + + +def _configure_logging(app: Flask) -> None: + log_file = Path(app.config["LOG_FILE"]) + log_file.parent.mkdir(parents=True, exist_ok=True) + handler = RotatingFileHandler( + log_file, + maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)), + backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)), + encoding="utf-8", + ) + formatter = logging.Formatter( + "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s" + ) + handler.setFormatter(formatter) + handler.addFilter(_RequestContextFilter()) + + logger = app.logger + logger.handlers.clear() + logger.addHandler(handler) + logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO)) + + @app.before_request + def _log_request_start() -> None: + g.request_id = uuid.uuid4().hex + g.request_started_at = time.perf_counter() + app.logger.info( + "Request started", + extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr}, + ) + + @app.after_request + def _log_request_end(response): + duration_ms = 0.0 + if hasattr(g, "request_started_at"): + duration_ms = (time.perf_counter() - g.request_started_at) * 1000 + request_id = getattr(g, "request_id", uuid.uuid4().hex) + response.headers.setdefault("X-Request-ID", request_id) + app.logger.info( + "Request completed", + extra={ + "path": request.path, + "method": request.method, + "remote_addr": request.remote_addr, + }, + ) + response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" + response.headers["Server"] = "MyFISO" + return response diff --git a/app/bucket_policies.py b/app/bucket_policies.py new file mode 100644 index 0000000..ce2b894 --- /dev/null +++ b/app/bucket_policies.py @@ -0,0 +1,249 @@ +"""Bucket policy loader/enforcer with a subset of AWS semantics.""" +from __future__ import annotations + +import json +from dataclasses import dataclass +from fnmatch import fnmatch +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Sequence + + +RESOURCE_PREFIX = "arn:aws:s3:::" + +ACTION_ALIASES = { + "s3:getobject": "read", + "s3:getobjectversion": "read", + "s3:listbucket": "list", + "s3:listallmybuckets": "list", + "s3:putobject": "write", + "s3:createbucket": "write", + "s3:deleteobject": "delete", + "s3:deleteobjectversion": "delete", + "s3:deletebucket": "delete", + "s3:putobjectacl": "share", + "s3:putbucketpolicy": "policy", +} + + +def _normalize_action(action: str) -> str: + action = action.strip().lower() + if action == "*": + return "*" + return ACTION_ALIASES.get(action, action) + + +def _normalize_actions(actions: Iterable[str]) -> List[str]: + values: List[str] = [] + for action in actions: + canonical = _normalize_action(action) + if canonical == "*" and "*" not in values: + return ["*"] + if canonical and canonical not in values: + values.append(canonical) + return values + + +def _normalize_principals(principal_field: Any) -> List[str] | str: + if principal_field == "*": + return "*" + + def _collect(values: Any) -> List[str]: + if values is None: + return [] + if values == "*": + return ["*"] + if isinstance(values, str): + return [values] + if isinstance(values, dict): + aggregated: List[str] = [] + for nested in values.values(): + chunk = _collect(nested) + if "*" in chunk: + return ["*"] + aggregated.extend(chunk) + return aggregated + if isinstance(values, Iterable): + aggregated = [] + for nested in values: + chunk = _collect(nested) + if "*" in chunk: + return ["*"] + aggregated.extend(chunk) + return aggregated + return [str(values)] + + normalized: List[str] = [] + for entry in _collect(principal_field): + token = str(entry).strip() + if token == "*": + return "*" + if token and token not in normalized: + normalized.append(token) + return normalized or "*" + + +def _parse_resource(resource: str) -> tuple[str | None, str | None]: + if not resource.startswith(RESOURCE_PREFIX): + return None, None + remainder = resource[len(RESOURCE_PREFIX) :] + if "/" not in remainder: + bucket = remainder or "*" + return bucket, None + bucket, _, key_pattern = remainder.partition("/") + return bucket or "*", key_pattern or "*" + + +@dataclass +class BucketPolicyStatement: + sid: Optional[str] + effect: str + principals: List[str] | str + actions: List[str] + resources: List[tuple[str | None, str | None]] + + def matches_principal(self, access_key: Optional[str]) -> bool: + if self.principals == "*": + return True + if access_key is None: + return False + return access_key in self.principals + + def matches_action(self, action: str) -> bool: + action = _normalize_action(action) + return "*" in self.actions or action in self.actions + + def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool: + bucket = (bucket or "*").lower() + key = object_key or "" + for resource_bucket, key_pattern in self.resources: + resource_bucket = (resource_bucket or "*").lower() + if resource_bucket not in {"*", bucket}: + continue + if key_pattern is None: + if not key: + return True + continue + if fnmatch(key, key_pattern): + return True + return False + + +class BucketPolicyStore: + """Loads bucket policies from disk and evaluates statements.""" + + def __init__(self, policy_path: Path) -> None: + self.policy_path = Path(policy_path) + self.policy_path.parent.mkdir(parents=True, exist_ok=True) + if not self.policy_path.exists(): + self.policy_path.write_text(json.dumps({"policies": {}}, indent=2)) + self._raw: Dict[str, Any] = {} + self._policies: Dict[str, List[BucketPolicyStatement]] = {} + self._load() + self._last_mtime = self._current_mtime() + + def maybe_reload(self) -> None: + current = self._current_mtime() + if current is None or current == self._last_mtime: + return + self._load() + self._last_mtime = current + + def _current_mtime(self) -> float | None: + try: + return self.policy_path.stat().st_mtime + except FileNotFoundError: + return None + + # ------------------------------------------------------------------ + def evaluate( + self, + access_key: Optional[str], + bucket: Optional[str], + object_key: Optional[str], + action: str, + ) -> str | None: + bucket = (bucket or "").lower() + statements = self._policies.get(bucket) or [] + decision: Optional[str] = None + for statement in statements: + if not statement.matches_principal(access_key): + continue + if not statement.matches_action(action): + continue + if not statement.matches_resource(bucket, object_key): + continue + if statement.effect == "deny": + return "deny" + decision = "allow" + return decision + + def get_policy(self, bucket: str) -> Dict[str, Any] | None: + return self._raw.get(bucket.lower()) + + def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None: + bucket = bucket.lower() + statements = self._normalize_policy(policy_payload) + if not statements: + raise ValueError("Policy must include at least one valid statement") + self._raw[bucket] = policy_payload + self._policies[bucket] = statements + self._persist() + + def delete_policy(self, bucket: str) -> None: + bucket = bucket.lower() + self._raw.pop(bucket, None) + self._policies.pop(bucket, None) + self._persist() + + # ------------------------------------------------------------------ + def _load(self) -> None: + try: + content = self.policy_path.read_text(encoding='utf-8') + raw_payload = json.loads(content) + except FileNotFoundError: + raw_payload = {"policies": {}} + except json.JSONDecodeError as e: + raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}") + except PermissionError as e: + raise ValueError(f"Cannot read bucket policy file (permission denied): {e}") + except (OSError, ValueError) as e: + raise ValueError(f"Failed to load bucket policies: {e}") + + policies: Dict[str, Any] = raw_payload.get("policies", {}) + parsed: Dict[str, List[BucketPolicyStatement]] = {} + for bucket, policy in policies.items(): + parsed[bucket.lower()] = self._normalize_policy(policy) + self._raw = {bucket.lower(): policy for bucket, policy in policies.items()} + self._policies = parsed + + def _persist(self) -> None: + payload = {"policies": self._raw} + self.policy_path.write_text(json.dumps(payload, indent=2)) + + def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]: + statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", []) + statements: List[BucketPolicyStatement] = [] + for statement in statements_raw: + actions = _normalize_actions(statement.get("Action", [])) + principals = _normalize_principals(statement.get("Principal", "*")) + resources_field = statement.get("Resource", []) + if isinstance(resources_field, str): + resources_field = [resources_field] + resources: List[tuple[str | None, str | None]] = [] + for resource in resources_field: + bucket, pattern = _parse_resource(str(resource)) + if bucket: + resources.append((bucket, pattern)) + if not resources: + continue + effect = statement.get("Effect", "Allow").lower() + statements.append( + BucketPolicyStatement( + sid=statement.get("Sid"), + effect=effect, + principals=principals, + actions=actions or ["*"], + resources=resources, + ) + ) + return statements \ No newline at end of file diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..3bfd14f --- /dev/null +++ b/app/config.py @@ -0,0 +1,192 @@ +"""Configuration helpers for the S3 clone application.""" +from __future__ import annotations + +import os +import secrets +import shutil +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +PROJECT_ROOT = Path(__file__).resolve().parent.parent + + +def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path: + """Ensure config directories exist and migrate legacy files when possible.""" + active_path = Path(active_path) + active_path.parent.mkdir(parents=True, exist_ok=True) + if legacy_path: + legacy_path = Path(legacy_path) + if not active_path.exists() and legacy_path.exists(): + legacy_path.parent.mkdir(parents=True, exist_ok=True) + try: + shutil.move(str(legacy_path), str(active_path)) + except OSError: + shutil.copy2(legacy_path, active_path) + try: + legacy_path.unlink(missing_ok=True) + except OSError: + pass + return active_path + + +@dataclass +class AppConfig: + storage_root: Path + max_upload_size: int + ui_page_size: int + secret_key: str + iam_config_path: Path + bucket_policy_path: Path + api_base_url: str + aws_region: str + aws_service: str + ui_enforce_bucket_policies: bool + log_level: str + log_path: Path + log_max_bytes: int + log_backup_count: int + ratelimit_default: str + ratelimit_storage_uri: str + cors_origins: list[str] + cors_methods: list[str] + cors_allow_headers: list[str] + session_lifetime_days: int + auth_max_attempts: int + auth_lockout_minutes: int + bulk_delete_max_keys: int + secret_ttl_seconds: int + stream_chunk_size: int + multipart_min_part_size: int + + @classmethod + def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": + overrides = overrides or {} + + def _get(name: str, default: Any) -> Any: + return overrides.get(name, os.getenv(name, default)) + + storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve() + max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default + ui_page_size = int(_get("UI_PAGE_SIZE", 100)) + auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5)) + auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15)) + bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500)) + secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300)) + stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024)) + multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024)) + default_secret = "dev-secret-key" + secret_key = str(_get("SECRET_KEY", default_secret)) + if not secret_key or secret_key == default_secret: + generated = secrets.token_urlsafe(32) + if secret_key == default_secret: + warnings.warn("Using insecure default SECRET_KEY. A random value has been generated; set SECRET_KEY for production", RuntimeWarning) + secret_key = generated + iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ + bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ + + default_iam_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "iam.json" + default_bucket_policy_path = PROJECT_ROOT / "data" / ".myfsio.sys" / "config" / "bucket_policies.json" + + iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve() + bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve() + + iam_config_path = _prepare_config_file( + iam_config_path, + legacy_path=None if iam_env_override else PROJECT_ROOT / "data" / "iam.json", + ) + bucket_policy_path = _prepare_config_file( + bucket_policy_path, + legacy_path=None if bucket_policy_override else PROJECT_ROOT / "data" / "bucket_policies.json", + ) + api_base_url = str(_get("API_BASE_URL", "http://127.0.0.1:5000")) + aws_region = str(_get("AWS_REGION", "us-east-1")) + aws_service = str(_get("AWS_SERVICE", "s3")) + enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"} + log_level = str(_get("LOG_LEVEL", "INFO")).upper() + log_dir = Path(_get("LOG_DIR", PROJECT_ROOT / "logs")).resolve() + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / str(_get("LOG_FILE", "app.log")) + log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024)) + log_backup_count = int(_get("LOG_BACKUP_COUNT", 3)) + ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute")) + ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://")) + + def _csv(value: str, default: list[str]) -> list[str]: + if not value: + return default + parts = [segment.strip() for segment in value.split(",") if segment.strip()] + return parts or default + + cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"]) + cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS")), ["GET", "PUT", "POST", "DELETE", "OPTIONS"]) + cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "Content-Type,X-Access-Key,X-Secret-Key,X-Amz-Algorithm,X-Amz-Credential,X-Amz-Date,X-Amz-Expires,X-Amz-SignedHeaders,X-Amz-Signature")), [ + "Content-Type", + "X-Access-Key", + "X-Secret-Key", + "X-Amz-Algorithm", + "X-Amz-Credential", + "X-Amz-Date", + "X-Amz-Expires", + "X-Amz-SignedHeaders", + "X-Amz-Signature", + ]) + session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30)) + + return cls(storage_root=storage_root, + max_upload_size=max_upload_size, + ui_page_size=ui_page_size, + secret_key=secret_key, + iam_config_path=iam_config_path, + bucket_policy_path=bucket_policy_path, + api_base_url=api_base_url, + aws_region=aws_region, + aws_service=aws_service, + ui_enforce_bucket_policies=enforce_ui_policies, + log_level=log_level, + log_path=log_path, + log_max_bytes=log_max_bytes, + log_backup_count=log_backup_count, + ratelimit_default=ratelimit_default, + ratelimit_storage_uri=ratelimit_storage_uri, + cors_origins=cors_origins, + cors_methods=cors_methods, + cors_allow_headers=cors_allow_headers, + session_lifetime_days=session_lifetime_days, + auth_max_attempts=auth_max_attempts, + auth_lockout_minutes=auth_lockout_minutes, + bulk_delete_max_keys=bulk_delete_max_keys, + secret_ttl_seconds=secret_ttl_seconds, + stream_chunk_size=stream_chunk_size, + multipart_min_part_size=multipart_min_part_size) + + def to_flask_config(self) -> Dict[str, Any]: + return { + "STORAGE_ROOT": str(self.storage_root), + "MAX_CONTENT_LENGTH": self.max_upload_size, + "UI_PAGE_SIZE": self.ui_page_size, + "SECRET_KEY": self.secret_key, + "IAM_CONFIG": str(self.iam_config_path), + "BUCKET_POLICY_PATH": str(self.bucket_policy_path), + "API_BASE_URL": self.api_base_url, + "AWS_REGION": self.aws_region, + "AWS_SERVICE": self.aws_service, + "UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies, + "AUTH_MAX_ATTEMPTS": self.auth_max_attempts, + "AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes, + "BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys, + "SECRET_TTL_SECONDS": self.secret_ttl_seconds, + "STREAM_CHUNK_SIZE": self.stream_chunk_size, + "MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size, + "LOG_LEVEL": self.log_level, + "LOG_FILE": str(self.log_path), + "LOG_MAX_BYTES": self.log_max_bytes, + "LOG_BACKUP_COUNT": self.log_backup_count, + "RATELIMIT_DEFAULT": self.ratelimit_default, + "RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri, + "CORS_ORIGINS": self.cors_origins, + "CORS_METHODS": self.cors_methods, + "CORS_ALLOW_HEADERS": self.cors_allow_headers, + "SESSION_LIFETIME_DAYS": self.session_lifetime_days, + } diff --git a/app/connections.py b/app/connections.py new file mode 100644 index 0000000..c5a7b33 --- /dev/null +++ b/app/connections.py @@ -0,0 +1,61 @@ +"""Manage remote S3 connections.""" +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Dict, List, Optional + +from .config import AppConfig + + +@dataclass +class RemoteConnection: + id: str + name: str + endpoint_url: str + access_key: str + secret_key: str + region: str = "us-east-1" + + +class ConnectionStore: + def __init__(self, config_path: Path) -> None: + self.config_path = config_path + self._connections: Dict[str, RemoteConnection] = {} + self.reload() + + def reload(self) -> None: + if not self.config_path.exists(): + self._connections = {} + return + + try: + with open(self.config_path, "r") as f: + data = json.load(f) + for item in data: + conn = RemoteConnection(**item) + self._connections[conn.id] = conn + except (OSError, json.JSONDecodeError): + self._connections = {} + + def save(self) -> None: + self.config_path.parent.mkdir(parents=True, exist_ok=True) + data = [asdict(conn) for conn in self._connections.values()] + with open(self.config_path, "w") as f: + json.dump(data, f, indent=2) + + def list(self) -> List[RemoteConnection]: + return list(self._connections.values()) + + def get(self, connection_id: str) -> Optional[RemoteConnection]: + return self._connections.get(connection_id) + + def add(self, connection: RemoteConnection) -> None: + self._connections[connection.id] = connection + self.save() + + def delete(self, connection_id: str) -> None: + if connection_id in self._connections: + del self._connections[connection_id] + self.save() diff --git a/app/extensions.py b/app/extensions.py new file mode 100644 index 0000000..1f8b71a --- /dev/null +++ b/app/extensions.py @@ -0,0 +1,10 @@ +"""Application-wide extension instances.""" +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address +from flask_wtf import CSRFProtect + +# Shared rate limiter instance; configured in app factory. +limiter = Limiter(key_func=get_remote_address) + +# Global CSRF protection for UI routes. +csrf = CSRFProtect() diff --git a/app/iam.py b/app/iam.py new file mode 100644 index 0000000..f6dc33c --- /dev/null +++ b/app/iam.py @@ -0,0 +1,404 @@ +"""Lightweight IAM-style user and policy management.""" +from __future__ import annotations + +import json +import math +import secrets +from collections import deque +from dataclasses import dataclass +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set + + +class IamError(RuntimeError): + """Raised when authentication or authorization fails.""" + + +S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy"} +IAM_ACTIONS = { + "iam:list_users", + "iam:create_user", + "iam:delete_user", + "iam:rotate_key", + "iam:update_policy", +} +ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"} + +ACTION_ALIASES = { + "list": "list", + "s3:listbucket": "list", + "s3:listallmybuckets": "list", + "read": "read", + "s3:getobject": "read", + "s3:getobjectversion": "read", + "write": "write", + "s3:putobject": "write", + "s3:createbucket": "write", + "delete": "delete", + "s3:deleteobject": "delete", + "s3:deletebucket": "delete", + "share": "share", + "s3:putobjectacl": "share", + "policy": "policy", + "s3:putbucketpolicy": "policy", + "iam:listusers": "iam:list_users", + "iam:createuser": "iam:create_user", + "iam:deleteuser": "iam:delete_user", + "iam:rotateaccesskey": "iam:rotate_key", + "iam:putuserpolicy": "iam:update_policy", + "iam:*": "iam:*", +} + + +@dataclass +class Policy: + bucket: str + actions: Set[str] + + +@dataclass +class Principal: + access_key: str + display_name: str + policies: List[Policy] + + +class IamService: + """Loads IAM configuration, manages users, and evaluates policies.""" + + def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None: + self.config_path = Path(config_path) + self.auth_max_attempts = auth_max_attempts + self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes) + self.config_path.parent.mkdir(parents=True, exist_ok=True) + if not self.config_path.exists(): + self._write_default() + self._users: Dict[str, Dict[str, Any]] = {} + self._raw_config: Dict[str, Any] = {} + self._failed_attempts: Dict[str, Deque[datetime]] = {} + self._load() + + # ---------------------- authz helpers ---------------------- + def authenticate(self, access_key: str, secret_key: str) -> Principal: + access_key = (access_key or "").strip() + secret_key = (secret_key or "").strip() + if not access_key or not secret_key: + raise IamError("Missing access credentials") + if self._is_locked_out(access_key): + seconds = self._seconds_until_unlock(access_key) + raise IamError( + f"Access temporarily locked. Try again in {seconds} seconds." + ) + record = self._users.get(access_key) + if not record or record["secret_key"] != secret_key: + self._record_failed_attempt(access_key) + raise IamError("Invalid credentials") + self._clear_failed_attempts(access_key) + return self._build_principal(access_key, record) + + def _record_failed_attempt(self, access_key: str) -> None: + if not access_key: + return + attempts = self._failed_attempts.setdefault(access_key, deque()) + self._prune_attempts(attempts) + attempts.append(datetime.now()) + + def _clear_failed_attempts(self, access_key: str) -> None: + if not access_key: + return + self._failed_attempts.pop(access_key, None) + + def _prune_attempts(self, attempts: Deque[datetime]) -> None: + cutoff = datetime.now() - self.auth_lockout_window + while attempts and attempts[0] < cutoff: + attempts.popleft() + + def _is_locked_out(self, access_key: str) -> bool: + if not access_key: + return False + attempts = self._failed_attempts.get(access_key) + if not attempts: + return False + self._prune_attempts(attempts) + return len(attempts) >= self.auth_max_attempts + + def _seconds_until_unlock(self, access_key: str) -> int: + attempts = self._failed_attempts.get(access_key) + if not attempts: + return 0 + self._prune_attempts(attempts) + if len(attempts) < self.auth_max_attempts: + return 0 + oldest = attempts[0] + elapsed = (datetime.now() - oldest).total_seconds() + return int(max(0, self.auth_lockout_window.total_seconds() - elapsed)) + + def principal_for_key(self, access_key: str) -> Principal: + record = self._users.get(access_key) + if not record: + raise IamError("Unknown access key") + return self._build_principal(access_key, record) + + def secret_for_key(self, access_key: str) -> str: + record = self._users.get(access_key) + if not record: + raise IamError("Unknown access key") + return record["secret_key"] + + def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None: + action = self._normalize_action(action) + if action not in ALLOWED_ACTIONS: + raise IamError(f"Unknown action '{action}'") + bucket_name = bucket_name or "*" + normalized = bucket_name.lower() if bucket_name != "*" else bucket_name + if not self._is_allowed(principal, normalized, action): + raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'") + + def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]: + return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")] + + def _is_allowed(self, principal: Principal, bucket_name: str, action: str) -> bool: + bucket_name = bucket_name.lower() + for policy in principal.policies: + if policy.bucket not in {"*", bucket_name}: + continue + if "*" in policy.actions or action in policy.actions: + return True + if "iam:*" in policy.actions and action.startswith("iam:"): + return True + return False + + # ---------------------- management helpers ---------------------- + def list_users(self) -> List[Dict[str, Any]]: + listing: List[Dict[str, Any]] = [] + for access_key, record in self._users.items(): + listing.append( + { + "access_key": access_key, + "display_name": record["display_name"], + "policies": [ + {"bucket": policy.bucket, "actions": sorted(policy.actions)} + for policy in record["policies"] + ], + } + ) + return listing + + def create_user( + self, + *, + display_name: str, + policies: Optional[Sequence[Dict[str, Any]]] = None, + access_key: str | None = None, + secret_key: str | None = None, + ) -> Dict[str, str]: + access_key = (access_key or self._generate_access_key()).strip() + if not access_key: + raise IamError("Access key cannot be empty") + if access_key in self._users: + raise IamError("Access key already exists") + secret_key = secret_key or self._generate_secret_key() + sanitized_policies = self._prepare_policy_payload(policies) + record = { + "access_key": access_key, + "secret_key": secret_key, + "display_name": display_name or access_key, + "policies": sanitized_policies, + } + self._raw_config.setdefault("users", []).append(record) + self._save() + self._load() + return {"access_key": access_key, "secret_key": secret_key} + + def rotate_secret(self, access_key: str) -> str: + user = self._get_raw_user(access_key) + new_secret = self._generate_secret_key() + user["secret_key"] = new_secret + self._save() + self._load() + return new_secret + + def update_user(self, access_key: str, display_name: str) -> None: + user = self._get_raw_user(access_key) + user["display_name"] = display_name + self._save() + self._load() + + def delete_user(self, access_key: str) -> None: + users = self._raw_config.get("users", []) + if len(users) <= 1: + raise IamError("Cannot delete the only user") + remaining = [user for user in users if user["access_key"] != access_key] + if len(remaining) == len(users): + raise IamError("User not found") + self._raw_config["users"] = remaining + self._save() + self._load() + + def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: + user = self._get_raw_user(access_key) + user["policies"] = self._prepare_policy_payload(policies) + self._save() + self._load() + + # ---------------------- config helpers ---------------------- + def _load(self) -> None: + try: + content = self.config_path.read_text(encoding='utf-8') + raw = json.loads(content) + except FileNotFoundError: + raise IamError(f"IAM config not found: {self.config_path}") + except json.JSONDecodeError as e: + raise IamError(f"Corrupted IAM config (invalid JSON): {e}") + except PermissionError as e: + raise IamError(f"Cannot read IAM config (permission denied): {e}") + except (OSError, ValueError) as e: + raise IamError(f"Failed to load IAM config: {e}") + + users: Dict[str, Dict[str, Any]] = {} + for user in raw.get("users", []): + policies = self._build_policy_objects(user.get("policies", [])) + users[user["access_key"]] = { + "secret_key": user["secret_key"], + "display_name": user.get("display_name", user["access_key"]), + "policies": policies, + } + if not users: + raise IamError("IAM configuration contains no users") + self._users = users + self._raw_config = { + "users": [ + { + "access_key": entry["access_key"], + "secret_key": entry["secret_key"], + "display_name": entry.get("display_name", entry["access_key"]), + "policies": entry.get("policies", []), + } + for entry in raw.get("users", []) + ] + } + + def _save(self) -> None: + try: + temp_path = self.config_path.with_suffix('.json.tmp') + temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8') + temp_path.replace(self.config_path) + except (OSError, PermissionError) as e: + raise IamError(f"Cannot save IAM config: {e}") + + # ---------------------- insight helpers ---------------------- + def config_summary(self) -> Dict[str, Any]: + return { + "path": str(self.config_path), + "user_count": len(self._users), + "allowed_actions": sorted(ALLOWED_ACTIONS), + } + + def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]: + payload: Dict[str, Any] = {"users": []} + for user in self._raw_config.get("users", []): + record = dict(user) + if mask_secrets and "secret_key" in record: + record["secret_key"] = "••••••••••" + payload["users"].append(record) + return payload + + def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]: + entries: List[Policy] = [] + for policy in policies: + bucket = str(policy.get("bucket", "*")).lower() + raw_actions = policy.get("actions", []) + if isinstance(raw_actions, str): + raw_actions = [raw_actions] + action_set: Set[str] = set() + for action in raw_actions: + canonical = self._normalize_action(action) + if canonical == "*": + action_set = set(ALLOWED_ACTIONS) + break + if canonical: + action_set.add(canonical) + if action_set: + entries.append(Policy(bucket=bucket, actions=action_set)) + return entries + + def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]: + if not policies: + policies = ( + { + "bucket": "*", + "actions": ["list", "read", "write", "delete", "share", "policy"], + }, + ) + sanitized: List[Dict[str, Any]] = [] + for policy in policies: + bucket = str(policy.get("bucket", "*")).lower() + raw_actions = policy.get("actions", []) + if isinstance(raw_actions, str): + raw_actions = [raw_actions] + action_set: Set[str] = set() + for action in raw_actions: + canonical = self._normalize_action(action) + if canonical == "*": + action_set = set(ALLOWED_ACTIONS) + break + if canonical: + action_set.add(canonical) + if not action_set: + continue + sanitized.append({"bucket": bucket, "actions": sorted(action_set)}) + if not sanitized: + raise IamError("At least one policy with valid actions is required") + return sanitized + + def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal: + return Principal( + access_key=access_key, + display_name=record["display_name"], + policies=record["policies"], + ) + + def _normalize_action(self, action: str) -> str: + if not action: + return "" + lowered = action.strip().lower() + if lowered == "*": + return "*" + candidate = ACTION_ALIASES.get(lowered, lowered) + return candidate if candidate in ALLOWED_ACTIONS else "" + + def _write_default(self) -> None: + default = { + "users": [ + { + "access_key": "localadmin", + "secret_key": "localadmin", + "display_name": "Local Admin", + "policies": [ + {"bucket": "*", "actions": list(ALLOWED_ACTIONS)} + ], + } + ] + } + self.config_path.write_text(json.dumps(default, indent=2)) + + def _generate_access_key(self) -> str: + return secrets.token_hex(8) + + def _generate_secret_key(self) -> str: + return secrets.token_urlsafe(24) + + def _get_raw_user(self, access_key: str) -> Dict[str, Any]: + for user in self._raw_config.get("users", []): + if user["access_key"] == access_key: + return user + raise IamError("User not found") + + def get_secret_key(self, access_key: str) -> str | None: + record = self._users.get(access_key) + return record["secret_key"] if record else None + + def get_principal(self, access_key: str) -> Principal | None: + record = self._users.get(access_key) + return self._build_principal(access_key, record) if record else None diff --git a/app/replication.py b/app/replication.py new file mode 100644 index 0000000..b9d86ee --- /dev/null +++ b/app/replication.py @@ -0,0 +1,121 @@ +"""Background replication worker.""" +from __future__ import annotations + +import logging +import threading +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Optional + +import boto3 +from botocore.exceptions import ClientError + +from .connections import ConnectionStore, RemoteConnection +from .storage import ObjectStorage + +logger = logging.getLogger(__name__) + + +@dataclass +class ReplicationRule: + bucket_name: str + target_connection_id: str + target_bucket: str + enabled: bool = True + + +class ReplicationManager: + def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None: + self.storage = storage + self.connections = connections + self.rules_path = rules_path + self._rules: Dict[str, ReplicationRule] = {} + self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker") + self.reload_rules() + + def reload_rules(self) -> None: + if not self.rules_path.exists(): + self._rules = {} + return + try: + import json + with open(self.rules_path, "r") as f: + data = json.load(f) + for bucket, rule_data in data.items(): + self._rules[bucket] = ReplicationRule(**rule_data) + except (OSError, ValueError) as e: + logger.error(f"Failed to load replication rules: {e}") + + def save_rules(self) -> None: + import json + data = {b: rule.__dict__ for b, rule in self._rules.items()} + self.rules_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.rules_path, "w") as f: + json.dump(data, f, indent=2) + + def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]: + return self._rules.get(bucket_name) + + def set_rule(self, rule: ReplicationRule) -> None: + self._rules[rule.bucket_name] = rule + self.save_rules() + + def delete_rule(self, bucket_name: str) -> None: + if bucket_name in self._rules: + del self._rules[bucket_name] + self.save_rules() + + def trigger_replication(self, bucket_name: str, object_key: str) -> None: + rule = self.get_rule(bucket_name) + if not rule or not rule.enabled: + return + + connection = self.connections.get(rule.target_connection_id) + if not connection: + logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") + return + + self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection) + + def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection) -> None: + try: + # 1. Get local file path + # Note: We are accessing internal storage structure here. + # Ideally storage.py should expose a 'get_file_path' or we read the stream. + # For efficiency, we'll try to read the file directly if we can, or use storage.get_object + + # Using boto3 to upload + s3 = boto3.client( + "s3", + endpoint_url=conn.endpoint_url, + aws_access_key_id=conn.access_key, + aws_secret_access_key=conn.secret_key, + region_name=conn.region, + ) + + # We need the file content. + # Since ObjectStorage is filesystem based, let's get the stream. + # We need to be careful about closing it. + meta = self.storage.get_object_meta(bucket_name, object_key) + if not meta: + return + + with self.storage.open_object(bucket_name, object_key) as f: + extra_args = {} + if meta.metadata: + extra_args["Metadata"] = meta.metadata + + s3.upload_fileobj( + f, + rule.target_bucket, + object_key, + ExtraArgs=extra_args + ) + + logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") + + except (ClientError, OSError, ValueError) as e: + logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}") + except Exception: + logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}") diff --git a/app/s3_api.py b/app/s3_api.py new file mode 100644 index 0000000..584074b --- /dev/null +++ b/app/s3_api.py @@ -0,0 +1,1245 @@ +"""Flask blueprint exposing a subset of the S3 REST API.""" +from __future__ import annotations + +import hashlib +import hmac +import mimetypes +import re +import uuid +from datetime import datetime, timedelta, timezone +from typing import Any, Dict +from urllib.parse import quote, urlencode +from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError + +from flask import Blueprint, Response, current_app, jsonify, request +from werkzeug.http import http_date + +from .bucket_policies import BucketPolicyStore +from .extensions import limiter +from .iam import IamError, Principal +from .storage import ObjectStorage, StorageError + +s3_api_bp = Blueprint("s3_api", __name__) + + +# ---------------------- helpers ---------------------- +def _storage() -> ObjectStorage: + return current_app.extensions["object_storage"] + + +def _iam(): + return current_app.extensions["iam"] + + + +def _bucket_policies() -> BucketPolicyStore: + store: BucketPolicyStore = current_app.extensions["bucket_policies"] + store.maybe_reload() + return store + + +def _xml_response(element: Element, status: int = 200) -> Response: + xml_bytes = tostring(element, encoding="utf-8") + return Response(xml_bytes, status=status, mimetype="application/xml") + + +def _error_response(code: str, message: str, status: int) -> Response: + error = Element("Error") + SubElement(error, "Code").text = code + SubElement(error, "Message").text = message + SubElement(error, "Resource").text = request.path + SubElement(error, "RequestId").text = uuid.uuid4().hex + return _xml_response(error, status) + + +def _sign(key: bytes, msg: str) -> bytes: + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes: + k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp) + k_region = _sign(k_date, region_name) + k_service = _sign(k_region, service_name) + k_signing = _sign(k_service, "aws4_request") + return k_signing + + +def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None: + # Parse Authorization header + # AWS4-HMAC-SHA256 Credential=AKIA.../20230101/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-date, Signature=... + match = re.match( + r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)", + auth_header, + ) + if not match: + return None + + access_key, date_stamp, region, service, signed_headers_str, signature = match.groups() + + # Get secret key + secret_key = _iam().get_secret_key(access_key) + if not secret_key: + raise IamError("Invalid access key") + + # Canonical Request + method = req.method + canonical_uri = quote(req.path, safe="/-_.~") + + # Canonical Query String + query_args = [] + for key, value in req.args.items(multi=True): + query_args.append((key, value)) + query_args.sort(key=lambda x: (x[0], x[1])) + + canonical_query_parts = [] + for k, v in query_args: + canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") + canonical_query_string = "&".join(canonical_query_parts) + + # Canonical Headers + signed_headers_list = signed_headers_str.split(";") + canonical_headers_parts = [] + for header in signed_headers_list: + header_val = req.headers.get(header) + if header_val is None: + header_val = "" + + header_val = " ".join(header_val.split()) + canonical_headers_parts.append(f"{header.lower()}:{header_val}\n") + canonical_headers = "".join(canonical_headers_parts) + + # Payload Hash + payload_hash = req.headers.get("X-Amz-Content-Sha256") + if not payload_hash: + payload_hash = hashlib.sha256(req.get_data()).hexdigest() + + canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}" + + # String to Sign + amz_date = req.headers.get("X-Amz-Date") + if not amz_date: + amz_date = req.headers.get("Date") + + if not amz_date: + raise IamError("Missing Date header") + + credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" + string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" + + # Calculate Signature + signing_key = _get_signature_key(secret_key, date_stamp, region, service) + calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() + + if calculated_signature != signature: + raise IamError("SignatureDoesNotMatch") + + return _iam().get_principal(access_key) + + +def _verify_sigv4_query(req: Any) -> Principal | None: + credential = req.args.get("X-Amz-Credential") + signed_headers_str = req.args.get("X-Amz-SignedHeaders") + signature = req.args.get("X-Amz-Signature") + amz_date = req.args.get("X-Amz-Date") + expires = req.args.get("X-Amz-Expires") + + if not (credential and signed_headers_str and signature and amz_date and expires): + return None + + try: + access_key, date_stamp, region, service, _ = credential.split("/") + except ValueError: + raise IamError("Invalid Credential format") + + # Check expiration + try: + req_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) + except ValueError: + raise IamError("Invalid Date format") + + now = datetime.now(timezone.utc) + if now > req_time + timedelta(seconds=int(expires)): + raise IamError("Request expired") + + secret_key = _iam().get_secret_key(access_key) + if not secret_key: + raise IamError("Invalid access key") + + # Canonical Request + method = req.method + canonical_uri = quote(req.path, safe="/-_.~") + + # Canonical Query String + query_args = [] + for key, value in req.args.items(multi=True): + if key != "X-Amz-Signature": + query_args.append((key, value)) + query_args.sort(key=lambda x: (x[0], x[1])) + + canonical_query_parts = [] + for k, v in query_args: + canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}") + canonical_query_string = "&".join(canonical_query_parts) + + # Canonical Headers + signed_headers_list = signed_headers_str.split(";") + canonical_headers_parts = [] + for header in signed_headers_list: + val = req.headers.get(header, "").strip() + # Collapse multiple spaces + val = " ".join(val.split()) + canonical_headers_parts.append(f"{header}:{val}\n") + canonical_headers = "".join(canonical_headers_parts) + + # Payload Hash + payload_hash = "UNSIGNED-PAYLOAD" + + canonical_request = "\n".join([ + method, + canonical_uri, + canonical_query_string, + canonical_headers, + signed_headers_str, + payload_hash + ]) + + # String to Sign + algorithm = "AWS4-HMAC-SHA256" + credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" + hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest() + string_to_sign = "\n".join([ + algorithm, + amz_date, + credential_scope, + hashed_request + ]) + + # Signature + signing_key = _get_signature_key(secret_key, date_stamp, region, service) + calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() + + if not hmac.compare_digest(calculated_signature, signature): + raise IamError("SignatureDoesNotMatch") + + return _iam().get_principal(access_key) + + +def _verify_sigv4(req: Any) -> Principal | None: + auth_header = req.headers.get("Authorization") + if auth_header and auth_header.startswith("AWS4-HMAC-SHA256"): + return _verify_sigv4_header(req, auth_header) + + if req.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256": + return _verify_sigv4_query(req) + + return None + + +def _require_principal(): + # Try SigV4 first + if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \ + (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"): + try: + principal = _verify_sigv4(request) + if principal: + return principal, None + except IamError as exc: + return None, _error_response("AccessDenied", str(exc), 403) + except (ValueError, TypeError): + return None, _error_response("AccessDenied", "Signature verification failed", 403) + + access_key = request.headers.get("X-Access-Key") + secret_key = request.headers.get("X-Secret-Key") + if not access_key or not secret_key: + return None, _error_response("AccessDenied", "Missing credentials", 403) + try: + principal = _iam().authenticate(access_key, secret_key) + return principal, None + except IamError as exc: + return None, _error_response("AccessDenied", str(exc), 403) + + +def _authorize_action(principal: Principal | None, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None: + iam_allowed = False + iam_error: IamError | None = None + if principal is not None: + try: + _iam().authorize(principal, bucket_name, action) + iam_allowed = True + except IamError as exc: + iam_error = exc + else: + iam_error = IamError("Missing credentials") + + policy_decision = None + access_key = principal.access_key if principal else None + if bucket_name: + policy_decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action) + if policy_decision == "deny": + raise IamError("Access denied by bucket policy") + + if iam_allowed: + return + if policy_decision == "allow": + return + raise iam_error or IamError("Access denied") + + +def _enforce_bucket_policy(principal: Principal | None, bucket_name: str | None, object_key: str | None, action: str) -> None: + if not bucket_name: + return + decision = _bucket_policies().evaluate( + principal.access_key if principal else None, + bucket_name, + object_key, + action, + ) + if decision == "deny": + raise IamError("Access denied by bucket policy") + + +def _object_principal(action: str, bucket_name: str, object_key: str): + principal, error = _require_principal() + try: + _authorize_action(principal, bucket_name, action, object_key=object_key) + return principal, None + except IamError as exc: + if not error: + return None, _error_response("AccessDenied", str(exc), 403) + if not _has_presign_params(): + return None, error + try: + principal = _validate_presigned_request(action, bucket_name, object_key) + _enforce_bucket_policy(principal, bucket_name, object_key, action) + return principal, None + except IamError as exc: + return None, _error_response("AccessDenied", str(exc), 403) + + +def _has_presign_params() -> bool: + return bool(request.args.get("X-Amz-Algorithm")) + + +def _validate_presigned_request(action: str, bucket_name: str, object_key: str) -> Principal: + algorithm = request.args.get("X-Amz-Algorithm") + credential = request.args.get("X-Amz-Credential") + amz_date = request.args.get("X-Amz-Date") + signed_headers = request.args.get("X-Amz-SignedHeaders") + expires = request.args.get("X-Amz-Expires") + signature = request.args.get("X-Amz-Signature") + if not all([algorithm, credential, amz_date, signed_headers, expires, signature]): + raise IamError("Malformed presigned URL") + if algorithm != "AWS4-HMAC-SHA256": + raise IamError("Unsupported signing algorithm") + + parts = credential.split("/") + if len(parts) != 5: + raise IamError("Invalid credential scope") + access_key, date_stamp, region, service, terminal = parts + if terminal != "aws4_request": + raise IamError("Invalid credential scope") + config_region = current_app.config["AWS_REGION"] + config_service = current_app.config["AWS_SERVICE"] + if region != config_region or service != config_service: + raise IamError("Credential scope mismatch") + + try: + expiry = int(expires) + except ValueError as exc: + raise IamError("Invalid expiration") from exc + if expiry < 1 or expiry > 7 * 24 * 3600: + raise IamError("Expiration must be between 1 second and 7 days") + + try: + request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) + except ValueError as exc: + raise IamError("Invalid X-Amz-Date") from exc + if datetime.now(timezone.utc) > request_time + timedelta(seconds=expiry): + raise IamError("Presigned URL expired") + + signed_headers_list = [header.strip().lower() for header in signed_headers.split(";") if header] + signed_headers_list.sort() + canonical_headers = _canonical_headers_from_request(signed_headers_list) + canonical_query = _canonical_query_from_request() + payload_hash = request.args.get("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD") + canonical_request = "\n".join( + [ + request.method, + _canonical_uri(bucket_name, object_key), + canonical_query, + canonical_headers, + ";".join(signed_headers_list), + payload_hash, + ] + ) + hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest() + scope = f"{date_stamp}/{region}/{service}/aws4_request" + string_to_sign = "\n".join([ + "AWS4-HMAC-SHA256", + amz_date, + scope, + hashed_request, + ]) + secret = _iam().secret_for_key(access_key) + signing_key = _derive_signing_key(secret, date_stamp, region, service) + expected = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() + if not hmac.compare_digest(expected, signature): + raise IamError("Signature mismatch") + return _iam().principal_for_key(access_key) + + +def _canonical_query_from_request() -> str: + parts = [] + for key in sorted(request.args.keys()): + if key == "X-Amz-Signature": + continue + values = request.args.getlist(key) + encoded_key = quote(str(key), safe="-_.~") + for value in sorted(values): + encoded_value = quote(str(value), safe="-_.~") + parts.append(f"{encoded_key}={encoded_value}") + return "&".join(parts) + + +def _canonical_headers_from_request(headers: list[str]) -> str: + lines = [] + for header in headers: + if header == "host": + value = request.host + else: + value = request.headers.get(header, "") + canonical_value = " ".join(value.strip().split()) if value else "" + lines.append(f"{header}:{canonical_value}") + return "\n".join(lines) + "\n" + + +def _canonical_uri(bucket_name: str, object_key: str | None) -> str: + segments = [bucket_name] + if object_key: + segments.extend(object_key.split("/")) + encoded = [quote(segment, safe="-_.~") for segment in segments] + return "/" + "/".join(encoded) + + +def _extract_request_metadata() -> Dict[str, str]: + metadata: Dict[str, str] = {} + for header, value in request.headers.items(): + if header.lower().startswith("x-amz-meta-"): + key = header[11:] + if key: + metadata[key] = value + return metadata + + +def _derive_signing_key(secret: str, date_stamp: str, region: str, service: str) -> bytes: + def _sign(key: bytes, msg: str) -> bytes: + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + k_date = _sign(("AWS4" + secret).encode("utf-8"), date_stamp) + k_region = _sign(k_date, region) + k_service = _sign(k_region, service) + return _sign(k_service, "aws4_request") + + +def _generate_presigned_url( + *, + principal: Principal, + secret_key: str, + method: str, + bucket_name: str, + object_key: str, + expires_in: int, +) -> str: + region = current_app.config["AWS_REGION"] + service = current_app.config["AWS_SERVICE"] + algorithm = "AWS4-HMAC-SHA256" + now = datetime.now(timezone.utc) + amz_date = now.strftime("%Y%m%dT%H%M%SZ") + date_stamp = now.strftime("%Y%m%d") + credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" + credential = f"{principal.access_key}/{credential_scope}" + + query_params = { + "X-Amz-Algorithm": algorithm, + "X-Amz-Credential": credential, + "X-Amz-Date": amz_date, + "X-Amz-Expires": str(expires_in), + "X-Amz-SignedHeaders": "host", + "X-Amz-Content-Sha256": "UNSIGNED-PAYLOAD", + } + canonical_query = _encode_query_params(query_params) + host = request.host + canonical_headers = f"host:{host}\n" + canonical_request = "\n".join( + [ + method, + _canonical_uri(bucket_name, object_key), + canonical_query, + canonical_headers, + "host", + "UNSIGNED-PAYLOAD", + ] + ) + hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest() + string_to_sign = "\n".join( + [ + algorithm, + amz_date, + credential_scope, + hashed_request, + ] + ) + signing_key = _derive_signing_key(secret_key, date_stamp, region, service) + signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest() + query_with_sig = canonical_query + f"&X-Amz-Signature={signature}" + scheme = request.scheme or "http" + return f"{scheme}://{host}{_canonical_uri(bucket_name, object_key)}?{query_with_sig}" + + +def _encode_query_params(params: dict[str, str]) -> str: + parts = [] + for key in sorted(params.keys()): + value = params[key] + encoded_key = quote(str(key), safe="-_.~") + encoded_value = quote(str(value), safe="-_.~") + parts.append(f"{encoded_key}={encoded_value}") + return "&".join(parts) + + +def _strip_ns(tag: str | None) -> str: + if not tag: + return "" + return tag.split("}")[-1] + + +def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]: + try: + root = fromstring(payload) + except ParseError as exc: + raise ValueError("Malformed XML") from exc + if _strip_ns(root.tag) != "Tagging": + raise ValueError("Root element must be Tagging") + tagset = root.find(".//{*}TagSet") + if tagset is None: + tagset = root.find("TagSet") + if tagset is None: + return [] + tags: list[dict[str, str]] = [] + for tag_el in list(tagset): + if _strip_ns(tag_el.tag) != "Tag": + continue + key_el = tag_el.find("{*}Key") + if key_el is None: + key_el = tag_el.find("Key") + value_el = tag_el.find("{*}Value") + if value_el is None: + value_el = tag_el.find("Value") + key = (key_el.text or "").strip() if key_el is not None else "" + if not key: + continue + value = value_el.text if value_el is not None else "" + tags.append({"Key": key, "Value": value or ""}) + return tags + + +def _render_tagging_document(tags: list[dict[str, str]]) -> Element: + root = Element("Tagging") + tagset_el = SubElement(root, "TagSet") + for tag in tags: + tag_el = SubElement(tagset_el, "Tag") + SubElement(tag_el, "Key").text = tag.get("Key", "") + SubElement(tag_el, "Value").text = tag.get("Value", "") + return root + + +def _parse_cors_document(payload: bytes) -> list[dict[str, Any]]: + try: + root = fromstring(payload) + except ParseError as exc: + raise ValueError("Malformed XML") from exc + if _strip_ns(root.tag) != "CORSConfiguration": + raise ValueError("Root element must be CORSConfiguration") + rules: list[dict[str, Any]] = [] + for rule_el in list(root): + if _strip_ns(rule_el.tag) != "CORSRule": + continue + rule: dict[str, Any] = { + "AllowedOrigins": [], + "AllowedMethods": [], + "AllowedHeaders": [], + "ExposeHeaders": [], + } + for child in list(rule_el): + name = _strip_ns(child.tag) + if name == "AllowedOrigin": + rule["AllowedOrigins"].append((child.text or "")) + elif name == "AllowedMethod": + rule["AllowedMethods"].append((child.text or "")) + elif name == "AllowedHeader": + rule["AllowedHeaders"].append((child.text or "")) + elif name == "ExposeHeader": + rule["ExposeHeaders"].append((child.text or "")) + elif name == "MaxAgeSeconds": + try: + rule["MaxAgeSeconds"] = int(child.text or 0) + except ValueError: + raise ValueError("MaxAgeSeconds must be an integer") from None + rules.append(rule) + return rules + + +def _render_cors_document(rules: list[dict[str, Any]]) -> Element: + root = Element("CORSConfiguration") + for rule in rules: + rule_el = SubElement(root, "CORSRule") + for origin in rule.get("AllowedOrigins", []): + SubElement(rule_el, "AllowedOrigin").text = origin + for method in rule.get("AllowedMethods", []): + SubElement(rule_el, "AllowedMethod").text = method + for header in rule.get("AllowedHeaders", []): + SubElement(rule_el, "AllowedHeader").text = header + for header in rule.get("ExposeHeaders", []): + SubElement(rule_el, "ExposeHeader").text = header + if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None: + SubElement(rule_el, "MaxAgeSeconds").text = str(rule["MaxAgeSeconds"]) + return root + + +def _parse_encryption_document(payload: bytes) -> dict[str, Any]: + try: + root = fromstring(payload) + except ParseError as exc: + raise ValueError("Malformed XML") from exc + if _strip_ns(root.tag) != "ServerSideEncryptionConfiguration": + raise ValueError("Root element must be ServerSideEncryptionConfiguration") + rules: list[dict[str, Any]] = [] + for rule_el in list(root): + if _strip_ns(rule_el.tag) != "Rule": + continue + default_el = None + bucket_key_el = None + for child in list(rule_el): + name = _strip_ns(child.tag) + if name == "ApplyServerSideEncryptionByDefault": + default_el = child + elif name == "BucketKeyEnabled": + bucket_key_el = child + if default_el is None: + continue + algo_el = default_el.find("{*}SSEAlgorithm") + if algo_el is None: + algo_el = default_el.find("SSEAlgorithm") + if algo_el is None or not (algo_el.text or "").strip(): + raise ValueError("SSEAlgorithm is required") + rule: dict[str, Any] = {"SSEAlgorithm": algo_el.text.strip()} + kms_el = default_el.find("{*}KMSMasterKeyID") + if kms_el is None: + kms_el = default_el.find("KMSMasterKeyID") + if kms_el is not None and kms_el.text: + rule["KMSMasterKeyID"] = kms_el.text.strip() + if bucket_key_el is not None and bucket_key_el.text: + rule["BucketKeyEnabled"] = bucket_key_el.text.strip().lower() in {"true", "1"} + rules.append(rule) + if not rules: + raise ValueError("At least one Rule is required") + return {"Rules": rules} + + +def _render_encryption_document(config: dict[str, Any]) -> Element: + root = Element("ServerSideEncryptionConfiguration") + for rule in config.get("Rules", []): + rule_el = SubElement(root, "Rule") + default_el = SubElement(rule_el, "ApplyServerSideEncryptionByDefault") + SubElement(default_el, "SSEAlgorithm").text = rule.get("SSEAlgorithm", "") + if rule.get("KMSMasterKeyID"): + SubElement(default_el, "KMSMasterKeyID").text = rule["KMSMasterKeyID"] + if "BucketKeyEnabled" in rule: + SubElement(rule_el, "BucketKeyEnabled").text = "true" if rule["BucketKeyEnabled"] else "false" + return root + + +def _stream_file(path, chunk_size: int = 64 * 1024): + with path.open("rb") as handle: + while True: + chunk = handle.read(chunk_size) + if not chunk: + break + yield chunk + + +def _method_not_allowed(allowed: list[str]) -> Response: + response = _error_response( + "MethodNotAllowed", + "The specified method is not allowed for this resource", + 405, + ) + response.headers["Allow"] = ", ".join(sorted({method.upper() for method in allowed})) + return response + + +def _apply_object_headers( + response: Response, + *, + file_stat, + metadata: Dict[str, str] | None, + etag: str, +) -> None: + response.headers["Content-Length"] = str(file_stat.st_size) + response.headers["Last-Modified"] = http_date(file_stat.st_mtime) + response.headers["ETag"] = f'"{etag}"' + response.headers["Accept-Ranges"] = "bytes" + for key, value in (metadata or {}).items(): + response.headers[f"X-Amz-Meta-{key}"] = value + + +def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None: + handlers = { + "versioning": _bucket_versioning_handler, + "tagging": _bucket_tagging_handler, + "cors": _bucket_cors_handler, + "encryption": _bucket_encryption_handler, + } + requested = [key for key in handlers if key in request.args] + if not requested: + return None + if len(requested) > 1: + return _error_response( + "InvalidRequest", + "Only a single bucket subresource can be requested at a time", + 400, + ) + handler = handlers[requested[0]] + return handler(bucket_name) + + +def _bucket_versioning_handler(bucket_name: str) -> Response: + if request.method != "GET": + return _method_not_allowed(["GET"]) + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "policy") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + try: + enabled = storage.is_versioning_enabled(bucket_name) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + root = Element("VersioningConfiguration") + SubElement(root, "Status").text = "Enabled" if enabled else "Suspended" + return _xml_response(root) + + +def _bucket_tagging_handler(bucket_name: str) -> Response: + if request.method not in {"GET", "PUT"}: + return _method_not_allowed(["GET", "PUT"]) + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "policy") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + if request.method == "GET": + try: + tags = storage.get_bucket_tags(bucket_name) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + if not tags: + return _error_response("NoSuchTagSet", "No tags are configured for this bucket", 404) + return _xml_response(_render_tagging_document(tags)) + payload = request.get_data(cache=False) or b"" + try: + tags = _parse_tagging_document(payload) + except ValueError as exc: + return _error_response("MalformedXML", str(exc), 400) + if len(tags) > 50: + return _error_response("InvalidTag", "A maximum of 50 tags is supported", 400) + try: + storage.set_bucket_tags(bucket_name, tags) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + current_app.logger.info("Bucket tags updated", extra={"bucket": bucket_name, "tags": len(tags)}) + return Response(status=204) + + +def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]: + sanitized: list[dict[str, Any]] = [] + for rule in rules: + allowed_origins = [origin.strip() for origin in rule.get("AllowedOrigins", []) if origin and origin.strip()] + allowed_methods = [method.strip().upper() for method in rule.get("AllowedMethods", []) if method and method.strip()] + allowed_headers = [header.strip() for header in rule.get("AllowedHeaders", []) if header and header.strip()] + expose_headers = [header.strip() for header in rule.get("ExposeHeaders", []) if header and header.strip()] + if not allowed_origins or not allowed_methods: + raise ValueError("Each CORSRule must include AllowedOrigin and AllowedMethod entries") + sanitized_rule: dict[str, Any] = { + "AllowedOrigins": allowed_origins, + "AllowedMethods": allowed_methods, + } + if allowed_headers: + sanitized_rule["AllowedHeaders"] = allowed_headers + if expose_headers: + sanitized_rule["ExposeHeaders"] = expose_headers + if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None: + sanitized_rule["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"]) + sanitized.append(sanitized_rule) + return sanitized + + +def _bucket_cors_handler(bucket_name: str) -> Response: + if request.method not in {"GET", "PUT"}: + return _method_not_allowed(["GET", "PUT"]) + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "policy") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + if request.method == "GET": + try: + rules = storage.get_bucket_cors(bucket_name) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + if not rules: + return _error_response("NoSuchCORSConfiguration", "No CORS configuration found", 404) + return _xml_response(_render_cors_document(rules)) + payload = request.get_data(cache=False) or b"" + if not payload.strip(): + try: + storage.set_bucket_cors(bucket_name, None) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + current_app.logger.info("Bucket CORS cleared", extra={"bucket": bucket_name}) + return Response(status=204) + try: + rules = _parse_cors_document(payload) + sanitized = _sanitize_cors_rules(rules) + except ValueError as exc: + return _error_response("MalformedXML", str(exc), 400) + if not sanitized: + return _error_response("InvalidRequest", "At least one CORSRule must be supplied", 400) + try: + storage.set_bucket_cors(bucket_name, sanitized) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + current_app.logger.info("Bucket CORS updated", extra={"bucket": bucket_name, "rules": len(sanitized)}) + return Response(status=204) + + +def _bucket_encryption_handler(bucket_name: str) -> Response: + if request.method not in {"GET", "PUT"}: + return _method_not_allowed(["GET", "PUT"]) + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "policy") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + if request.method == "GET": + try: + config = storage.get_bucket_encryption(bucket_name) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + if not config: + return _error_response( + "ServerSideEncryptionConfigurationNotFoundError", + "No server-side encryption configuration found", + 404, + ) + return _xml_response(_render_encryption_document(config)) + payload = request.get_data(cache=False) or b"" + if not payload.strip(): + try: + storage.set_bucket_encryption(bucket_name, None) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + current_app.logger.info("Bucket encryption cleared", extra={"bucket": bucket_name}) + return Response(status=204) + try: + config = _parse_encryption_document(payload) + except ValueError as exc: + return _error_response("MalformedXML", str(exc), 400) + try: + storage.set_bucket_encryption(bucket_name, config) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + current_app.logger.info("Bucket encryption updated", extra={"bucket": bucket_name}) + return Response(status=204) + + +def _bulk_delete_handler(bucket_name: str) -> Response: + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "delete") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + + payload = request.get_data(cache=False) or b"" + if not payload.strip(): + return _error_response("MalformedXML", "Request body must include a Delete specification", 400) + try: + root = fromstring(payload) + except ParseError: + return _error_response("MalformedXML", "Unable to parse XML document", 400) + if _strip_ns(root.tag) != "Delete": + return _error_response("MalformedXML", "Root element must be Delete", 400) + + quiet = False + objects: list[dict[str, str | None]] = [] + for child in list(root): + name = _strip_ns(child.tag) + if name == "Quiet": + quiet = (child.text or "").strip().lower() in {"true", "1"} + continue + if name != "Object": + continue + key_text = "" + version_text: str | None = None + for entry in list(child): + entry_name = _strip_ns(entry.tag) + if entry_name == "Key": + key_text = (entry.text or "").strip() + elif entry_name == "VersionId": + version_text = (entry.text or "").strip() or None + if not key_text: + continue + objects.append({"Key": key_text, "VersionId": version_text}) + + if not objects: + return _error_response("MalformedXML", "At least one Object entry is required", 400) + if len(objects) > 1000: + return _error_response("MalformedXML", "A maximum of 1000 objects can be deleted per request", 400) + + storage = _storage() + deleted: list[str] = [] + errors: list[dict[str, str]] = [] + for entry in objects: + key = entry["Key"] or "" + version_id = entry.get("VersionId") + if version_id: + errors.append({ + "Key": key, + "Code": "InvalidRequest", + "Message": "VersionId is not supported for bulk deletes", + }) + continue + try: + storage.delete_object(bucket_name, key) + deleted.append(key) + except StorageError as exc: + errors.append({"Key": key, "Code": "InvalidRequest", "Message": str(exc)}) + + result = Element("DeleteResult") + if not quiet: + for key in deleted: + deleted_el = SubElement(result, "Deleted") + SubElement(deleted_el, "Key").text = key + for err in errors: + error_el = SubElement(result, "Error") + SubElement(error_el, "Key").text = err.get("Key", "") + SubElement(error_el, "Code").text = err.get("Code", "InvalidRequest") + SubElement(error_el, "Message").text = err.get("Message", "Request failed") + + current_app.logger.info( + "Bulk object delete", + extra={"bucket": bucket_name, "deleted": len(deleted), "errors": len(errors)}, + ) + return _xml_response(result, status=200) + + +# ---------------------- routes ---------------------- +@s3_api_bp.get("/") +@limiter.limit("60 per minute") +def list_buckets() -> Response: + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, None, "list") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + root = Element("ListAllMyBucketsResult") + owner = SubElement(root, "Owner") + SubElement(owner, "ID").text = principal.access_key + SubElement(owner, "DisplayName").text = principal.display_name + buckets_el = SubElement(root, "Buckets") + + storage_buckets = _storage().list_buckets() + allowed = set(_iam().buckets_for_principal(principal, [b.name for b in storage_buckets])) + for bucket in storage_buckets: + if bucket.name not in allowed: + continue + bucket_el = SubElement(buckets_el, "Bucket") + SubElement(bucket_el, "Name").text = bucket.name + SubElement(bucket_el, "CreationDate").text = bucket.created_at.isoformat() + + return _xml_response(root) + + +@s3_api_bp.route("/", methods=["PUT", "DELETE", "GET", "POST"], strict_slashes=False) +@limiter.limit("120 per minute") +def bucket_handler(bucket_name: str) -> Response: + storage = _storage() + subresource_response = _maybe_handle_bucket_subresource(bucket_name) + if subresource_response is not None: + return subresource_response + + if request.method == "POST": + if "delete" not in request.args: + return _method_not_allowed(["GET", "PUT", "DELETE"]) + return _bulk_delete_handler(bucket_name) + + if request.method == "PUT": + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "write") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + try: + storage.create_bucket(bucket_name) + except FileExistsError: + return _error_response("BucketAlreadyExists", "Bucket exists", 409) + except StorageError as exc: + return _error_response("InvalidBucketName", str(exc), 400) + current_app.logger.info("Bucket created", extra={"bucket": bucket_name}) + return Response(status=200) + + if request.method == "DELETE": + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "delete") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + try: + storage.delete_bucket(bucket_name) + _bucket_policies().delete_policy(bucket_name) + except StorageError as exc: + code = "BucketNotEmpty" if "not empty" in str(exc) else "NoSuchBucket" + status = 409 if code == "BucketNotEmpty" else 404 + return _error_response(code, str(exc), status) + current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name}) + return Response(status=204) + + # GET - list objects + principal, error = _require_principal() + try: + _authorize_action(principal, bucket_name, "list") + except IamError as exc: + if error: + return error + return _error_response("AccessDenied", str(exc), 403) + try: + objects = storage.list_objects(bucket_name) + except StorageError as exc: + return _error_response("NoSuchBucket", str(exc), 404) + + root = Element("ListBucketResult") + SubElement(root, "Name").text = bucket_name + SubElement(root, "MaxKeys").text = str(current_app.config["UI_PAGE_SIZE"]) + SubElement(root, "IsTruncated").text = "false" + for meta in objects: + obj_el = SubElement(root, "Contents") + SubElement(obj_el, "Key").text = meta.key + SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat() + SubElement(obj_el, "ETag").text = f'"{meta.etag}"' + SubElement(obj_el, "Size").text = str(meta.size) + + return _xml_response(root) + + +@s3_api_bp.route("//", methods=["PUT", "GET", "DELETE", "HEAD"], strict_slashes=False) +@limiter.limit("240 per minute") +def object_handler(bucket_name: str, object_key: str): + storage = _storage() + + if request.method == "PUT": + _, error = _object_principal("write", bucket_name, object_key) + if error: + return error + stream = request.stream + metadata = _extract_request_metadata() + try: + meta = storage.put_object( + bucket_name, + object_key, + stream, + metadata=metadata or None, + ) + except StorageError as exc: + message = str(exc) + if "Bucket" in message: + return _error_response("NoSuchBucket", message, 404) + return _error_response("InvalidArgument", message, 400) + current_app.logger.info( + "Object uploaded", + extra={"bucket": bucket_name, "key": object_key, "size": meta.size}, + ) + response = Response(status=200) + response.headers["ETag"] = f'"{meta.etag}"' + return response + + if request.method in {"GET", "HEAD"}: + _, error = _object_principal("read", bucket_name, object_key) + if error: + return error + try: + path = storage.get_object_path(bucket_name, object_key) + except StorageError as exc: + return _error_response("NoSuchKey", str(exc), 404) + metadata = storage.get_object_metadata(bucket_name, object_key) + stat = path.stat() + mimetype = mimetypes.guess_type(path.name)[0] or "application/octet-stream" + etag = storage._compute_etag(path) + + if request.method == "GET": + response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True) + logged_bytes = stat.st_size + else: + response = Response(status=200) + response.headers["Content-Type"] = mimetype + logged_bytes = 0 + + _apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag) + action = "Object read" if request.method == "GET" else "Object head" + current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes}) + return response + + # DELETE + _, error = _object_principal("delete", bucket_name, object_key) + if error: + return error + storage.delete_object(bucket_name, object_key) + current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key}) + return Response(status=204) + + + + +@s3_api_bp.route("/bucket-policy/", methods=["GET", "PUT", "DELETE"]) +@limiter.limit("30 per minute") +def bucket_policy_handler(bucket_name: str) -> Response: + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "policy") + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + if not storage.bucket_exists(bucket_name): + return _error_response("NoSuchBucket", "Bucket does not exist", 404) + store = _bucket_policies() + if request.method == "GET": + policy = store.get_policy(bucket_name) + if not policy: + return _error_response("NoSuchBucketPolicy", "No bucket policy attached", 404) + return jsonify(policy) + if request.method == "DELETE": + store.delete_policy(bucket_name) + current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name}) + return Response(status=204) + payload = request.get_json(silent=True) + if not payload: + return _error_response("MalformedPolicy", "Policy document must be JSON", 400) + try: + store.set_policy(bucket_name, payload) + current_app.logger.info("Bucket policy updated", extra={"bucket": bucket_name}) + except ValueError as exc: + return _error_response("MalformedPolicy", str(exc), 400) + return Response(status=204) + + +@s3_api_bp.post("/presign//") +@limiter.limit("45 per minute") +def presign_object(bucket_name: str, object_key: str): + payload = request.get_json(silent=True) or {} + method = str(payload.get("method", "GET")).upper() + allowed_methods = {"GET", "PUT", "DELETE"} + if method not in allowed_methods: + return _error_response("InvalidRequest", "Method must be GET, PUT, or DELETE", 400) + try: + expires = int(payload.get("expires_in", 900)) + except (TypeError, ValueError): + return _error_response("InvalidRequest", "expires_in must be an integer", 400) + expires = max(1, min(expires, 7 * 24 * 3600)) + action = "read" if method == "GET" else ("delete" if method == "DELETE" else "write") + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, action, object_key=object_key) + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + storage = _storage() + if not storage.bucket_exists(bucket_name): + return _error_response("NoSuchBucket", "Bucket does not exist", 404) + if action != "write": + try: + storage.get_object_path(bucket_name, object_key) + except StorageError: + return _error_response("NoSuchKey", "Object not found", 404) + secret = _iam().secret_for_key(principal.access_key) + url = _generate_presigned_url( + principal=principal, + secret_key=secret, + method=method, + bucket_name=bucket_name, + object_key=object_key, + expires_in=expires, + ) + current_app.logger.info( + "Presigned URL generated", + extra={"bucket": bucket_name, "key": object_key, "method": method}, + ) + return jsonify({"url": url, "method": method, "expires_in": expires}) + + +@s3_api_bp.route("/", methods=["HEAD"]) +@limiter.limit("100 per minute") +def head_bucket(bucket_name: str) -> Response: + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "list") + if not _storage().bucket_exists(bucket_name): + return _error_response("NoSuchBucket", "Bucket not found", 404) + return Response(status=200) + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) + + +@s3_api_bp.route("//", methods=["HEAD"]) +@limiter.limit("100 per minute") +def head_object(bucket_name: str, object_key: str) -> Response: + principal, error = _require_principal() + if error: + return error + try: + _authorize_action(principal, bucket_name, "read", object_key=object_key) + path = _storage().get_object_path(bucket_name, object_key) + metadata = _storage().get_object_metadata(bucket_name, object_key) + stat = path.stat() + etag = _storage()._compute_etag(path) + + response = Response(status=200) + _apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag) + response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream" + return response + except (StorageError, FileNotFoundError): + return _error_response("NoSuchKey", "Object not found", 404) + except IamError as exc: + return _error_response("AccessDenied", str(exc), 403) diff --git a/app/secret_store.py b/app/secret_store.py new file mode 100644 index 0000000..a7c3416 --- /dev/null +++ b/app/secret_store.py @@ -0,0 +1,37 @@ +"""Ephemeral store for one-time secrets communicated to the UI.""" +from __future__ import annotations + +import secrets +import time +from typing import Any, Dict, Optional + + +class EphemeralSecretStore: + """Keeps values in-memory for a short period and returns them once.""" + + def __init__(self, default_ttl: int = 300) -> None: + self._default_ttl = max(default_ttl, 1) + self._store: Dict[str, tuple[Any, float]] = {} + + def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str: + token = secrets.token_urlsafe(16) + expires_at = time.time() + (ttl or self._default_ttl) + self._store[token] = (payload, expires_at) + return token + + def pop(self, token: str | None) -> Any | None: + if not token: + return None + entry = self._store.pop(token, None) + if not entry: + return None + payload, expires_at = entry + if expires_at < time.time(): + return None + return payload + + def purge_expired(self) -> None: + now = time.time() + stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now] + for token in stale: + self._store.pop(token, None) diff --git a/app/storage.py b/app/storage.py new file mode 100644 index 0000000..37b31db --- /dev/null +++ b/app/storage.py @@ -0,0 +1,935 @@ +"""Filesystem-backed object storage helpers.""" +from __future__ import annotations + +import hashlib +import json +import os +import re +import shutil +import stat +import time +import unicodedata +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, BinaryIO, Dict, List, Optional + +WINDOWS_RESERVED_NAMES = { + "CON", + "PRN", + "AUX", + "NUL", + "COM1", + "COM2", + "COM3", + "COM4", + "COM5", + "COM6", + "COM7", + "COM8", + "COM9", + "LPT1", + "LPT2", + "LPT3", + "LPT4", + "LPT5", + "LPT6", + "LPT7", + "LPT8", + "LPT9", +} + + +class StorageError(RuntimeError): + """Raised when the storage layer encounters an unrecoverable problem.""" + + +@dataclass +class ObjectMeta: + key: str + size: int + last_modified: datetime + etag: str + metadata: Optional[Dict[str, str]] = None + + +@dataclass +class BucketMeta: + name: str + created_at: datetime + + +def _utcnow() -> datetime: + return datetime.now(timezone.utc) + + +def _utc_isoformat() -> str: + return _utcnow().isoformat().replace("+00:00", "Z") + + +class ObjectStorage: + """Very small filesystem wrapper implementing the bare S3 primitives.""" + + INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"} + SYSTEM_ROOT = ".myfsio.sys" + SYSTEM_BUCKETS_DIR = "buckets" + SYSTEM_MULTIPART_DIR = "multipart" + SYSTEM_TMP_DIR = "tmp" + BUCKET_META_DIR = "meta" + BUCKET_VERSIONS_DIR = "versions" + MULTIPART_MANIFEST = "manifest.json" + BUCKET_CONFIG_FILE = ".bucket.json" + + def __init__(self, root: Path) -> None: + self.root = Path(root) + self.root.mkdir(parents=True, exist_ok=True) + self._ensure_system_roots() + + # ---------------------- Bucket helpers ---------------------- + def list_buckets(self) -> List[BucketMeta]: + buckets: List[BucketMeta] = [] + for bucket in sorted(self.root.iterdir()): + if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT: + stat = bucket.stat() + buckets.append( + BucketMeta( + name=bucket.name, + created_at=datetime.fromtimestamp(stat.st_ctime), + ) + ) + return buckets + + def bucket_exists(self, bucket_name: str) -> bool: + return self._bucket_path(bucket_name).exists() + + def _validate_bucket_name(self, bucket_name: str) -> None: + if len(bucket_name) < 3 or len(bucket_name) > 63: + raise StorageError("Bucket name must be between 3 and 63 characters") + if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name): + raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number") + if ".." in bucket_name: + raise StorageError("Bucket name must not contain consecutive periods") + if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name): + raise StorageError("Bucket name must not be formatted as an IP address") + + def create_bucket(self, bucket_name: str) -> None: + self._validate_bucket_name(bucket_name) + bucket_path = self._bucket_path(bucket_name) + bucket_path.mkdir(parents=True, exist_ok=False) + self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True) + + def bucket_stats(self, bucket_name: str) -> dict[str, int]: + """Return object count and total size for the bucket without hashing files.""" + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + object_count = 0 + total_bytes = 0 + for path in bucket_path.rglob("*"): + if path.is_file(): + rel = path.relative_to(bucket_path) + if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: + continue + stat = path.stat() + object_count += 1 + total_bytes += stat.st_size + return {"objects": object_count, "bytes": total_bytes} + + def delete_bucket(self, bucket_name: str) -> None: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + if self._has_visible_objects(bucket_path): + raise StorageError("Bucket not empty") + if self._has_archived_versions(bucket_path): + raise StorageError("Bucket contains archived object versions") + if self._has_active_multipart_uploads(bucket_path): + raise StorageError("Bucket has active multipart uploads") + self._remove_tree(bucket_path) + self._remove_tree(self._system_bucket_root(bucket_path.name)) + self._remove_tree(self._multipart_bucket_root(bucket_path.name)) + + # ---------------------- Object helpers ---------------------- + def list_objects(self, bucket_name: str) -> List[ObjectMeta]: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + + objects: List[ObjectMeta] = [] + for path in bucket_path.rglob("*"): + if path.is_file(): + stat = path.stat() + rel = path.relative_to(bucket_path) + if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: + continue + metadata = self._read_metadata(bucket_id, rel) + objects.append( + ObjectMeta( + key=str(rel.as_posix()), + size=stat.st_size, + last_modified=datetime.fromtimestamp(stat.st_mtime), + etag=self._compute_etag(path), + metadata=metadata or None, + ) + ) + objects.sort(key=lambda meta: meta.key) + return objects + + def put_object( + self, + bucket_name: str, + object_key: str, + stream: BinaryIO, + *, + metadata: Optional[Dict[str, str]] = None, + ) -> ObjectMeta: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + + safe_key = self._sanitize_object_key(object_key) + destination = bucket_path / safe_key + destination.parent.mkdir(parents=True, exist_ok=True) + + if self._is_versioning_enabled(bucket_path) and destination.exists(): + self._archive_current_version(bucket_id, safe_key, reason="overwrite") + + checksum = hashlib.md5() + with destination.open("wb") as target: + shutil.copyfileobj(_HashingReader(stream, checksum), target) + + stat = destination.stat() + if metadata: + self._write_metadata(bucket_id, safe_key, metadata) + else: + self._delete_metadata(bucket_id, safe_key) + return ObjectMeta( + key=safe_key.as_posix(), + size=stat.st_size, + last_modified=datetime.fromtimestamp(stat.st_mtime), + etag=checksum.hexdigest(), + metadata=metadata, + ) + + def get_object_path(self, bucket_name: str, object_key: str) -> Path: + path = self._object_path(bucket_name, object_key) + if not path.exists(): + raise StorageError("Object not found") + return path + + def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + return {} + safe_key = self._sanitize_object_key(object_key) + return self._read_metadata(bucket_path.name, safe_key) or {} + + def delete_object(self, bucket_name: str, object_key: str) -> None: + bucket_path = self._bucket_path(bucket_name) + path = self._object_path(bucket_name, object_key) + if not path.exists(): + return + safe_key = path.relative_to(bucket_path) + bucket_id = bucket_path.name + if self._is_versioning_enabled(bucket_path): + self._archive_current_version(bucket_id, safe_key, reason="delete") + rel = path.relative_to(bucket_path) + self._safe_unlink(path) + self._delete_metadata(bucket_id, rel) + # Clean up now empty parents inside the bucket. + for parent in path.parents: + if parent == bucket_path: + break + if parent.exists() and not any(parent.iterdir()): + parent.rmdir() + + def purge_object(self, bucket_name: str, object_key: str) -> None: + bucket_path = self._bucket_path(bucket_name) + target = self._object_path(bucket_name, object_key) + bucket_id = bucket_path.name + if target.exists(): + rel = target.relative_to(bucket_path) + self._safe_unlink(target) + self._delete_metadata(bucket_id, rel) + else: + rel = self._sanitize_object_key(object_key) + self._delete_metadata(bucket_id, rel) + version_dir = self._version_dir(bucket_id, rel) + if version_dir.exists(): + shutil.rmtree(version_dir, ignore_errors=True) + legacy_version_dir = self._legacy_version_dir(bucket_id, rel) + if legacy_version_dir.exists(): + shutil.rmtree(legacy_version_dir, ignore_errors=True) + for parent in target.parents: + if parent == bucket_path: + break + if parent.exists() and not any(parent.iterdir()): + parent.rmdir() + + # ---------------------- Versioning helpers ---------------------- + def is_versioning_enabled(self, bucket_name: str) -> bool: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + return self._is_versioning_enabled(bucket_path) + + def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None: + bucket_path = self._require_bucket_path(bucket_name) + config = self._read_bucket_config(bucket_path.name) + config["versioning_enabled"] = bool(enabled) + self._write_bucket_config(bucket_path.name, config) + + # ---------------------- Bucket configuration helpers ---------------------- + def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]: + bucket_path = self._require_bucket_path(bucket_name) + config = self._read_bucket_config(bucket_path.name) + raw_tags = config.get("tags") + if not isinstance(raw_tags, list): + return [] + tags: List[Dict[str, str]] = [] + for entry in raw_tags: + if not isinstance(entry, dict): + continue + key = str(entry.get("Key", "")).strip() + if not key: + continue + value = str(entry.get("Value", "")) + tags.append({"Key": key, "Value": value}) + return tags + + def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None: + bucket_path = self._require_bucket_path(bucket_name) + if not tags: + self._set_bucket_config_entry(bucket_path.name, "tags", None) + return + clean: List[Dict[str, str]] = [] + for entry in tags: + if not isinstance(entry, dict): + continue + key = str(entry.get("Key", "")).strip() + if not key: + continue + clean.append({"Key": key, "Value": str(entry.get("Value", ""))}) + self._set_bucket_config_entry(bucket_path.name, "tags", clean or None) + + def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]: + bucket_path = self._require_bucket_path(bucket_name) + config = self._read_bucket_config(bucket_path.name) + cors_rules = config.get("cors") + return cors_rules if isinstance(cors_rules, list) else [] + + def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None: + bucket_path = self._require_bucket_path(bucket_name) + self._set_bucket_config_entry(bucket_path.name, "cors", rules or None) + + def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]: + bucket_path = self._require_bucket_path(bucket_name) + config = self._read_bucket_config(bucket_path.name) + payload = config.get("encryption") + return payload if isinstance(payload, dict) else {} + + def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None: + bucket_path = self._require_bucket_path(bucket_name) + self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None) + + def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + safe_key = self._sanitize_object_key(object_key) + version_dir = self._version_dir(bucket_id, safe_key) + if not version_dir.exists(): + version_dir = self._legacy_version_dir(bucket_id, safe_key) + if not version_dir.exists(): + version_dir = self._legacy_version_dir(bucket_id, safe_key) + if not version_dir.exists(): + return [] + versions: List[Dict[str, Any]] = [] + for meta_file in version_dir.glob("*.json"): + try: + payload = json.loads(meta_file.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if not isinstance(payload, dict): + continue + payload.setdefault("version_id", meta_file.stem) + versions.append(payload) + versions.sort(key=lambda item: item.get("archived_at", ""), reverse=True) + return versions + + def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + safe_key = self._sanitize_object_key(object_key) + version_dir = self._version_dir(bucket_id, safe_key) + data_path = version_dir / f"{version_id}.bin" + meta_path = version_dir / f"{version_id}.json" + if not data_path.exists() or not meta_path.exists(): + raise StorageError("Version not found") + try: + payload = json.loads(meta_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + payload = {} + metadata = payload.get("metadata") if isinstance(payload, dict) else {} + if not isinstance(metadata, dict): + metadata = {} + destination = bucket_path / safe_key + if self._is_versioning_enabled(bucket_path) and destination.exists(): + self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite") + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(data_path, destination) + if metadata: + self._write_metadata(bucket_id, safe_key, metadata) + else: + self._delete_metadata(bucket_id, safe_key) + stat = destination.stat() + return ObjectMeta( + key=safe_key.as_posix(), + size=stat.st_size, + last_modified=datetime.fromtimestamp(stat.st_mtime), + etag=self._compute_etag(destination), + metadata=metadata or None, + ) + + def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)] + if not any(root.exists() for root in version_roots): + return [] + aggregated: Dict[str, Dict[str, Any]] = {} + skipped: set[str] = set() + for version_root in version_roots: + if not version_root.exists(): + continue + for meta_file in version_root.glob("**/*.json"): + if not meta_file.is_file(): + continue + rel = meta_file.parent.relative_to(version_root) + rel_key = rel.as_posix() + if rel_key in skipped: + continue + object_path = bucket_path / rel + if object_path.exists(): + skipped.add(rel_key) + continue + try: + payload = json.loads(meta_file.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + payload = {} + version_id = payload.get("version_id") or meta_file.stem + archived_at = payload.get("archived_at") or "" + size = int(payload.get("size") or 0) + reason = payload.get("reason") or "update" + record = aggregated.setdefault( + rel_key, + { + "key": rel_key, + "versions": 0, + "total_size": 0, + "latest": None, + "_latest_sort": None, + }, + ) + record["versions"] += 1 + record["total_size"] += size + candidate = { + "version_id": version_id, + "archived_at": archived_at, + "size": size, + "reason": reason, + } + sort_key = ( + archived_at, + meta_file.stat().st_mtime, + ) + current_sort = record.get("_latest_sort") + if current_sort is None or sort_key > current_sort: + record["_latest_sort"] = sort_key + record["latest"] = candidate + for record in aggregated.values(): + record.pop("_latest_sort", None) + return sorted(aggregated.values(), key=lambda item: item["key"]) + + # ---------------------- Multipart helpers ---------------------- + def initiate_multipart_upload( + self, + bucket_name: str, + object_key: str, + *, + metadata: Optional[Dict[str, str]] = None, + ) -> str: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + bucket_id = bucket_path.name + safe_key = self._sanitize_object_key(object_key) + upload_id = uuid.uuid4().hex + upload_root = self._multipart_dir(bucket_id, upload_id) + upload_root.mkdir(parents=True, exist_ok=False) + manifest = { + "upload_id": upload_id, + "object_key": safe_key.as_posix(), + "metadata": self._normalize_metadata(metadata), + "parts": {}, + "created_at": _utc_isoformat(), + } + self._write_multipart_manifest(upload_root, manifest) + return upload_id + + def upload_multipart_part( + self, + bucket_name: str, + upload_id: str, + part_number: int, + stream: BinaryIO, + ) -> str: + if part_number < 1: + raise StorageError("part_number must be >= 1") + bucket_path = self._bucket_path(bucket_name) + manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id) + checksum = hashlib.md5() + part_filename = f"part-{part_number:05d}.part" + part_path = upload_root / part_filename + with part_path.open("wb") as target: + shutil.copyfileobj(_HashingReader(stream, checksum), target) + record = { + "etag": checksum.hexdigest(), + "size": part_path.stat().st_size, + "filename": part_filename, + } + parts = manifest.setdefault("parts", {}) + parts[str(part_number)] = record + self._write_multipart_manifest(upload_root, manifest) + return record["etag"] + + def complete_multipart_upload( + self, + bucket_name: str, + upload_id: str, + ordered_parts: List[Dict[str, Any]], + ) -> ObjectMeta: + if not ordered_parts: + raise StorageError("parts list required") + bucket_path = self._bucket_path(bucket_name) + bucket_id = bucket_path.name + manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id) + parts_map = manifest.get("parts") or {} + if not parts_map: + raise StorageError("No uploaded parts found") + validated: List[tuple[int, Dict[str, Any]]] = [] + for part in ordered_parts: + raw_number = part.get("part_number") + if raw_number is None: + raw_number = part.get("PartNumber") + try: + number = int(raw_number) + except (TypeError, ValueError) as exc: + raise StorageError("Each part must include part_number") from exc + if number < 1: + raise StorageError("part numbers must be >= 1") + key = str(number) + record = parts_map.get(key) + if not record: + raise StorageError(f"Part {number} missing from upload") + raw_etag = part.get("etag", part.get("ETag", "")) + supplied_etag = str(raw_etag).strip() or record.get("etag") + if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]: + raise StorageError(f"ETag mismatch for part {number}") + validated.append((number, record)) + validated.sort(key=lambda entry: entry[0]) + + safe_key = self._sanitize_object_key(manifest["object_key"]) + destination = bucket_path / safe_key + destination.parent.mkdir(parents=True, exist_ok=True) + if self._is_versioning_enabled(bucket_path) and destination.exists(): + self._archive_current_version(bucket_id, safe_key, reason="overwrite") + checksum = hashlib.md5() + with destination.open("wb") as target: + for _, record in validated: + part_path = upload_root / record["filename"] + if not part_path.exists(): + raise StorageError(f"Missing part file {record['filename']}") + with part_path.open("rb") as chunk: + while True: + data = chunk.read(1024 * 1024) + if not data: + break + checksum.update(data) + target.write(data) + + metadata = manifest.get("metadata") + if metadata: + self._write_metadata(bucket_id, safe_key, metadata) + else: + self._delete_metadata(bucket_id, safe_key) + + shutil.rmtree(upload_root, ignore_errors=True) + stat = destination.stat() + return ObjectMeta( + key=safe_key.as_posix(), + size=stat.st_size, + last_modified=datetime.fromtimestamp(stat.st_mtime), + etag=checksum.hexdigest(), + metadata=metadata, + ) + + def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None: + bucket_path = self._bucket_path(bucket_name) + upload_root = self._multipart_dir(bucket_path.name, upload_id) + if upload_root.exists(): + shutil.rmtree(upload_root, ignore_errors=True) + return + legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id) + if legacy_root.exists(): + shutil.rmtree(legacy_root, ignore_errors=True) + + # ---------------------- internal helpers ---------------------- + def _bucket_path(self, bucket_name: str) -> Path: + safe_name = self._sanitize_bucket_name(bucket_name) + return self.root / safe_name + + def _require_bucket_path(self, bucket_name: str) -> Path: + bucket_path = self._bucket_path(bucket_name) + if not bucket_path.exists(): + raise StorageError("Bucket does not exist") + return bucket_path + + def _object_path(self, bucket_name: str, object_key: str) -> Path: + bucket_path = self._bucket_path(bucket_name) + safe_key = self._sanitize_object_key(object_key) + return bucket_path / safe_key + + def _system_root_path(self) -> Path: + return self.root / self.SYSTEM_ROOT + + def _system_buckets_root(self) -> Path: + return self._system_root_path() / self.SYSTEM_BUCKETS_DIR + + def _system_bucket_root(self, bucket_name: str) -> Path: + return self._system_buckets_root() / bucket_name + + def _bucket_meta_root(self, bucket_name: str) -> Path: + return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR + + def _bucket_versions_root(self, bucket_name: str) -> Path: + return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR + + def _multipart_root(self) -> Path: + return self._system_root_path() / self.SYSTEM_MULTIPART_DIR + + def _multipart_bucket_root(self, bucket_name: str) -> Path: + return self._multipart_root() / bucket_name + + def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path: + meta_root = self._legacy_meta_root(bucket_name) + meta_rel = Path(key.as_posix() + ".meta.json") + return meta_root / meta_rel + + def _legacy_meta_root(self, bucket_name: str) -> Path: + return self._bucket_path(bucket_name) / ".meta" + + def _legacy_versions_root(self, bucket_name: str) -> Path: + return self._bucket_path(bucket_name) / ".versions" + + def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path: + return self._legacy_versions_root(bucket_name) / key + + def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path: + return self._bucket_path(bucket_name) / ".multipart" + + def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path: + return self._legacy_multipart_bucket_root(bucket_name) / upload_id + + def _ensure_system_roots(self) -> None: + for path in ( + self._system_root_path(), + self._system_buckets_root(), + self._multipart_root(), + self._system_root_path() / self.SYSTEM_TMP_DIR, + ): + path.mkdir(parents=True, exist_ok=True) + + def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path: + return self._multipart_bucket_root(bucket_name) / upload_id + + def _version_dir(self, bucket_name: str, key: Path) -> Path: + return self._bucket_versions_root(bucket_name) / key + + def _bucket_config_path(self, bucket_name: str) -> Path: + return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE + + def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]: + config_path = self._bucket_config_path(bucket_name) + if not config_path.exists(): + return {} + try: + data = json.loads(config_path.read_text(encoding="utf-8")) + return data if isinstance(data, dict) else {} + except (OSError, json.JSONDecodeError): + return {} + + def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None: + config_path = self._bucket_config_path(bucket_name) + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(payload), encoding="utf-8") + + def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None: + config = self._read_bucket_config(bucket_name) + if value is None: + config.pop(key, None) + else: + config[key] = value + self._write_bucket_config(bucket_name, config) + + def _is_versioning_enabled(self, bucket_path: Path) -> bool: + config = self._read_bucket_config(bucket_path.name) + return bool(config.get("versioning_enabled")) + + def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]: + upload_root = self._multipart_dir(bucket_name, upload_id) + if not upload_root.exists(): + upload_root = self._legacy_multipart_dir(bucket_name, upload_id) + manifest_path = upload_root / self.MULTIPART_MANIFEST + if not manifest_path.exists(): + raise StorageError("Multipart upload not found") + try: + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + raise StorageError("Multipart manifest unreadable") from exc + return manifest, upload_root + + def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None: + manifest_path = upload_root / self.MULTIPART_MANIFEST + manifest_path.parent.mkdir(parents=True, exist_ok=True) + manifest_path.write_text(json.dumps(manifest), encoding="utf-8") + + def _metadata_file(self, bucket_name: str, key: Path) -> Path: + meta_root = self._bucket_meta_root(bucket_name) + meta_rel = Path(key.as_posix() + ".meta.json") + return meta_root / meta_rel + + def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: + if not metadata: + return None + clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()} + return clean or None + + def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None: + clean = self._normalize_metadata(metadata) + if not clean: + self._delete_metadata(bucket_name, key) + return + meta_file = self._metadata_file(bucket_name, key) + meta_file.parent.mkdir(parents=True, exist_ok=True) + meta_file.write_text(json.dumps({"metadata": clean}), encoding="utf-8") + + def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None: + bucket_path = self._bucket_path(bucket_name) + source = bucket_path / key + if not source.exists(): + return + version_dir = self._version_dir(bucket_name, key) + version_dir.mkdir(parents=True, exist_ok=True) + now = _utcnow() + version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}" + data_path = version_dir / f"{version_id}.bin" + shutil.copy2(source, data_path) + metadata = self._read_metadata(bucket_name, key) + record = { + "version_id": version_id, + "key": key.as_posix(), + "size": source.stat().st_size, + "archived_at": now.isoformat().replace("+00:00", "Z"), + "etag": self._compute_etag(source), + "metadata": metadata or {}, + "reason": reason, + } + manifest_path = version_dir / f"{version_id}.json" + manifest_path.write_text(json.dumps(record), encoding="utf-8") + + def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]: + for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)): + if not meta_file.exists(): + continue + try: + payload = json.loads(meta_file.read_text(encoding="utf-8")) + data = payload.get("metadata") + return data if isinstance(data, dict) else {} + except (OSError, json.JSONDecodeError): + return {} + return {} + + def _safe_unlink(self, path: Path) -> None: + attempts = 3 + last_error: PermissionError | None = None + for attempt in range(attempts): + try: + path.unlink() + return + except FileNotFoundError: + return + except PermissionError as exc: + last_error = exc + if os.name == "nt": + time.sleep(0.15 * (attempt + 1)) + except OSError as exc: + raise StorageError(f"Unable to delete object: {exc}") from exc + message = "Object file is currently in use. Close active previews or wait and try again." + raise StorageError(message) from last_error + + def _delete_metadata(self, bucket_name: str, key: Path) -> None: + locations = ( + (self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)), + (self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)), + ) + for meta_file, meta_root in locations: + try: + if meta_file.exists(): + meta_file.unlink() + parent = meta_file.parent + while parent != meta_root and parent.exists() and not any(parent.iterdir()): + parent.rmdir() + parent = parent.parent + except OSError: + continue + + def _has_visible_objects(self, bucket_path: Path) -> bool: + for path in bucket_path.rglob("*"): + if not path.is_file(): + continue + rel = path.relative_to(bucket_path) + if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS: + continue + return True + return False + + def _has_archived_versions(self, bucket_path: Path) -> bool: + for version_root in ( + self._bucket_versions_root(bucket_path.name), + self._legacy_versions_root(bucket_path.name), + ): + if version_root.exists() and any(path.is_file() for path in version_root.rglob("*")): + return True + return False + + def _has_active_multipart_uploads(self, bucket_path: Path) -> bool: + for uploads_root in ( + self._multipart_bucket_root(bucket_path.name), + self._legacy_multipart_bucket_root(bucket_path.name), + ): + if uploads_root.exists() and any(path.is_file() for path in uploads_root.rglob("*")): + return True + return False + + def _remove_tree(self, path: Path) -> None: + if not path.exists(): + return + def _handle_error(func, target_path, exc_info): + try: + os.chmod(target_path, stat.S_IRWXU) + func(target_path) + except Exception as exc: # pragma: no cover - fallback failure + raise StorageError(f"Unable to delete bucket contents: {exc}") from exc + + try: + shutil.rmtree(path, onerror=_handle_error) + except FileNotFoundError: + return + except PermissionError as exc: + raise StorageError("Bucket in use. Close open files and try again") from exc + + @staticmethod + def _sanitize_bucket_name(bucket_name: str) -> str: + if not bucket_name: + raise StorageError("Bucket name required") + + name = bucket_name.lower() + if len(name) < 3 or len(name) > 63: + raise StorageError("Bucket name must be between 3 and 63 characters") + + if name.startswith("-") or name.endswith("-"): + raise StorageError("Bucket name cannot start or end with a hyphen") + + if ".." in name: + raise StorageError("Bucket name cannot contain consecutive periods") + + if name.startswith("xn--"): + raise StorageError("Bucket name cannot start with 'xn--'") + + if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name): + raise StorageError("Bucket name cannot be formatted like an IP address") + + if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name): + raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens") + + return name + + @staticmethod + def _sanitize_object_key(object_key: str) -> Path: + if not object_key: + raise StorageError("Object key required") + if len(object_key.encode("utf-8")) > 1024: + raise StorageError("Object key exceeds maximum length of 1024 bytes") + if "\x00" in object_key: + raise StorageError("Object key contains null bytes") + if object_key.startswith(("/", "\\")): + raise StorageError("Object key cannot start with a slash") + normalized = unicodedata.normalize("NFC", object_key) + if normalized != object_key: + raise StorageError("Object key must use normalized Unicode") + candidate = Path(normalized) + if candidate.is_absolute(): + raise StorageError("Absolute object keys are not allowed") + if getattr(candidate, "drive", ""): + raise StorageError("Object key cannot include a drive letter") + parts = [] + for part in candidate.parts: + if part in ("", ".", ".."): + raise StorageError("Object key contains invalid segments") + if any(ord(ch) < 32 for ch in part): + raise StorageError("Object key contains control characters") + if os.name == "nt": + if any(ch in part for ch in "<>:\"/\\|?*"): + raise StorageError("Object key contains characters not supported on Windows filesystems") + if part.endswith((" ", ".")): + raise StorageError("Object key segments cannot end with spaces or periods on Windows") + trimmed = part.upper().rstrip(". ") + if trimmed in WINDOWS_RESERVED_NAMES: + raise StorageError(f"Invalid filename segment: {part}") + parts.append(part) + if parts: + top_level = parts[0] + if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT: + raise StorageError("Object key uses a reserved prefix") + return Path(*parts) + + @staticmethod + def _compute_etag(path: Path) -> str: + checksum = hashlib.md5() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + checksum.update(chunk) + return checksum.hexdigest() + + +class _HashingReader: + """Wraps a binary stream, updating the checksum as it is read.""" + + def __init__(self, stream: BinaryIO, checksum: Any) -> None: + self.stream = stream + self.checksum = checksum + + def read(self, size: int = -1) -> bytes: + data = self.stream.read(size) + if data: + self.checksum.update(data) + return data diff --git a/app/ui.py b/app/ui.py new file mode 100644 index 0000000..cdc587c --- /dev/null +++ b/app/ui.py @@ -0,0 +1,1134 @@ +"""Authenticated HTML UI for browsing buckets and objects.""" +from __future__ import annotations + +import json +import uuid +from typing import Any +from urllib.parse import urlparse + +import requests +from flask import ( + Blueprint, + Response, + current_app, + flash, + jsonify, + redirect, + render_template, + request, + send_file, + session, + url_for, +) +from flask_wtf.csrf import generate_csrf + +from .bucket_policies import BucketPolicyStore +from .connections import ConnectionStore, RemoteConnection +from .extensions import limiter +from .iam import IamError +from .replication import ReplicationManager, ReplicationRule +from .secret_store import EphemeralSecretStore +from .storage import ObjectStorage, StorageError + +ui_bp = Blueprint("ui", __name__, template_folder="../templates", url_prefix="/ui") + + + +def _storage() -> ObjectStorage: + return current_app.extensions["object_storage"] + + +def _iam(): + return current_app.extensions["iam"] + + + +def _bucket_policies() -> BucketPolicyStore: + store: BucketPolicyStore = current_app.extensions["bucket_policies"] + store.maybe_reload() + return store + + +def _connections() -> ConnectionStore: + return current_app.extensions["connections"] + + +def _replication() -> ReplicationManager: + return current_app.extensions["replication"] + + +def _secret_store() -> EphemeralSecretStore: + store: EphemeralSecretStore = current_app.extensions["secret_store"] + store.purge_expired() + return store + + +def _format_bytes(num: int) -> str: + step = 1024 + units = ["B", "KB", "MB", "GB", "TB", "PB"] + value = float(num) + for unit in units: + if value < step or unit == units[-1]: + if unit == "B": + return f"{int(value)} B" + return f"{value:.1f} {unit}" + value /= step + return f"{value:.1f} PB" + + +def _friendly_error_message(exc: Exception) -> str: + message = str(exc) or "An unexpected error occurred" + if isinstance(exc, IamError): + return f"Access issue: {message}" + if isinstance(exc, StorageError): + return f"Storage issue: {message}" + return message + + +def _policy_allows_public_read(policy: dict[str, Any]) -> bool: + statements = policy.get("Statement", []) + if isinstance(statements, dict): + statements = [statements] + list_allowed = False + get_allowed = False + for statement in statements: + if not isinstance(statement, dict): + continue + if statement.get("Effect") != "Allow": + continue + if statement.get("Condition"): + continue + principal = statement.get("Principal") + principal_all = principal == "*" or ( + isinstance(principal, dict) + and any(value == "*" or value == ["*"] for value in principal.values()) + ) + if not principal_all: + continue + actions = statement.get("Action", []) + if isinstance(actions, str): + actions = [actions] + normalized = {action.lower() for action in actions} + if not list_allowed: + list_allowed = any(action in {"*", "s3:*", "s3:listbucket"} for action in normalized) + if not get_allowed: + get_allowed = any(action in {"*", "s3:*", "s3:getobject"} for action in normalized) + if list_allowed and get_allowed: + return True + return False + + +def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]: + if not policy: + return ("IAM only", "text-bg-secondary") + if _policy_allows_public_read(policy): + return ("Public read", "text-bg-warning") + return ("Custom policy", "text-bg-info") + + +def _current_principal(): + creds = session.get("credentials") + if not creds: + return None + try: + return _iam().authenticate(creds["access_key"], creds["secret_key"]) + except IamError: + session.pop("credentials", None) + return None + + +def _authorize_ui(principal, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None: + iam_allowed = True + iam_error: IamError | None = None + try: + _iam().authorize(principal, bucket_name, action) + except IamError as exc: + iam_allowed = False + iam_error = exc + decision = None + enforce_bucket_policies = current_app.config.get("UI_ENFORCE_BUCKET_POLICIES", True) + if bucket_name and enforce_bucket_policies: + access_key = principal.access_key if principal else None + decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action) + if decision == "deny": + raise IamError("Access denied by bucket policy") + if not iam_allowed and decision != "allow": + raise iam_error or IamError("Access denied") + + +def _api_headers() -> dict[str, str]: + creds = session.get("credentials") or {} + return { + "X-Access-Key": creds.get("access_key", ""), + "X-Secret-Key": creds.get("secret_key", ""), + } + + +@ui_bp.app_context_processor +def inject_nav_state() -> dict[str, Any]: + principal = _current_principal() + can_manage = False + if principal: + try: + _iam().authorize(principal, None, "iam:list_users") + can_manage = True + except IamError: + can_manage = False + return { + "principal": principal, + "can_manage_iam": can_manage, + "csrf_token": generate_csrf, + } + + +@ui_bp.before_request +def ensure_authenticated(): + exempt = {"ui.login"} + if request.endpoint in exempt or request.endpoint is None: + return None + if _current_principal() is None: + return redirect(url_for("ui.login")) + return None + + +@ui_bp.route("/login", methods=["GET", "POST"]) +def login(): + if request.method == "POST": + access_key = request.form.get("access_key", "").strip() + secret_key = request.form.get("secret_key", "").strip() + try: + principal = _iam().authenticate(access_key, secret_key) + except IamError as exc: + flash(_friendly_error_message(exc), "danger") + return render_template("login.html") + session["credentials"] = {"access_key": access_key, "secret_key": secret_key} + session.permanent = True + flash(f"Welcome back, {principal.display_name}", "success") + return redirect(url_for("ui.buckets_overview")) + return render_template("login.html") + + +@ui_bp.post("/logout") +def logout(): + session.pop("credentials", None) + flash("Signed out", "info") + return redirect(url_for("ui.login")) + + +@ui_bp.get("/docs") +def docs_page(): + principal = _current_principal() + api_base = current_app.config.get("API_BASE_URL") or "http://127.0.0.1:5000" + api_base = api_base.rstrip("/") + parsed = urlparse(api_base) + api_host = parsed.netloc or parsed.path or api_base + return render_template( + "docs.html", + principal=principal, + api_base=api_base, + api_host=api_host, + ) + + +@ui_bp.get("/") +def buckets_overview(): + principal = _current_principal() + buckets = _storage().list_buckets() + allowed_names = set(_iam().buckets_for_principal(principal, [b.name for b in buckets])) + visible_buckets = [] + policy_store = _bucket_policies() + for bucket in buckets: + if bucket.name not in allowed_names: + continue + policy = policy_store.get_policy(bucket.name) + stats = _storage().bucket_stats(bucket.name) + access_label, access_badge = _bucket_access_descriptor(policy) + visible_buckets.append({ + "meta": bucket, + "summary": { + "objects": stats["objects"], + "total_bytes": stats["bytes"], + "human_size": _format_bytes(stats["bytes"]), + }, + "access_label": access_label, + "access_badge": access_badge, + "has_policy": bool(policy), + "detail_url": url_for("ui.bucket_detail", bucket_name=bucket.name), + }) + return render_template("buckets.html", buckets=visible_buckets, principal=principal) + + +@ui_bp.post("/buckets") +def create_bucket(): + principal = _current_principal() + bucket_name = request.form.get("bucket_name", "").strip() + if not bucket_name: + flash("Bucket name is required", "danger") + return redirect(url_for("ui.buckets_overview")) + try: + _authorize_ui(principal, bucket_name, "write") + _storage().create_bucket(bucket_name) + flash(f"Bucket '{bucket_name}' created", "success") + except (StorageError, FileExistsError, IamError) as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.buckets_overview")) + + +@ui_bp.get("/buckets/") +def bucket_detail(bucket_name: str): + principal = _current_principal() + storage = _storage() + try: + _authorize_ui(principal, bucket_name, "list") + objects = storage.list_objects(bucket_name) + except (StorageError, IamError) as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.buckets_overview")) + bucket_policy = _bucket_policies().get_policy(bucket_name) + policy_text = json.dumps(bucket_policy, indent=2) if bucket_policy else "" + default_policy = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowList", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:ListBucket"], + "Resource": [f"arn:aws:s3:::{bucket_name}"], + }, + { + "Sid": "AllowRead", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObject"], + "Resource": [f"arn:aws:s3:::{bucket_name}/*"], + }, + ], + }, + indent=2, + ) + can_edit_policy = False + if principal: + try: + _iam().authorize(principal, bucket_name, "policy") + can_edit_policy = True + except IamError: + can_edit_policy = False + try: + versioning_enabled = storage.is_versioning_enabled(bucket_name) + except StorageError: + versioning_enabled = False + can_manage_versioning = False + if principal: + try: + _iam().authorize(principal, bucket_name, "write") + can_manage_versioning = True + except IamError: + can_manage_versioning = False + + # Replication info + replication_rule = _replication().get_rule(bucket_name) + connections = _connections().list() + + return render_template( + "bucket_detail.html", + bucket_name=bucket_name, + objects=objects, + principal=principal, + bucket_policy_text=policy_text, + bucket_policy=bucket_policy, + can_edit_policy=can_edit_policy, + can_manage_versioning=can_manage_versioning, + default_policy=default_policy, + versioning_enabled=versioning_enabled, + replication_rule=replication_rule, + connections=connections, + ) + + +@ui_bp.post("/buckets//upload") +@limiter.limit("30 per minute") +def upload_object(bucket_name: str): + principal = _current_principal() + file = request.files.get("object") + object_key = request.form.get("object_key") + metadata_raw = (request.form.get("metadata") or "").strip() + wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" + + def _response(success: bool, message: str, status: int = 200): + if wants_json: + payload = {"status": "ok" if success else "error", "message": message} + return jsonify(payload), status + flash(message, "success" if success else "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="objects")) + + if file and not object_key: + object_key = file.filename + if not object_key: + return _response(False, "Object key is required", 400) + if not file: + return _response(False, "Choose a file to upload", 400) + + metadata = None + if metadata_raw: + try: + parsed = json.loads(metadata_raw) + if not isinstance(parsed, dict): + raise ValueError + metadata = {str(k): str(v) for k, v in parsed.items()} + except ValueError: + return _response(False, "Metadata must be a JSON object", 400) + + try: + _authorize_ui(principal, bucket_name, "write") + _storage().put_object(bucket_name, object_key, file.stream, metadata=metadata) + + # Trigger replication + _replication().trigger_replication(bucket_name, object_key) + + message = f"Uploaded '{object_key}'" + if metadata: + message += " with metadata" + return _response(True, message) + except (StorageError, IamError) as exc: + return _response(False, _friendly_error_message(exc), 400) + + +@ui_bp.post("/buckets//multipart/initiate") +def initiate_multipart_upload(bucket_name: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + payload = request.get_json(silent=True) or {} + object_key = str(payload.get("object_key", "")).strip() + if not object_key: + return jsonify({"error": "object_key is required"}), 400 + metadata_payload = payload.get("metadata") + metadata = None + if metadata_payload is not None: + if not isinstance(metadata_payload, dict): + return jsonify({"error": "metadata must be an object"}), 400 + metadata = {str(k): str(v) for k, v in metadata_payload.items()} + try: + upload_id = _storage().initiate_multipart_upload(bucket_name, object_key, metadata=metadata) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + return jsonify({"upload_id": upload_id}) + + +@ui_bp.put("/buckets//multipart//parts") +def upload_multipart_part(bucket_name: str, upload_id: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + try: + part_number = int(request.args.get("partNumber", "0")) + except ValueError: + return jsonify({"error": "partNumber must be an integer"}), 400 + if part_number < 1: + return jsonify({"error": "partNumber must be >= 1"}), 400 + try: + etag = _storage().upload_multipart_part(bucket_name, upload_id, part_number, request.stream) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + return jsonify({"etag": etag, "part_number": part_number}) + + +@ui_bp.post("/buckets//multipart//complete") +def complete_multipart_upload(bucket_name: str, upload_id: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + payload = request.get_json(silent=True) or {} + parts_payload = payload.get("parts") + if not isinstance(parts_payload, list) or not parts_payload: + return jsonify({"error": "parts array required"}), 400 + normalized = [] + for part in parts_payload: + if not isinstance(part, dict): + return jsonify({"error": "Each part must be an object"}), 400 + raw_number = part.get("part_number") or part.get("PartNumber") + try: + number = int(raw_number) + except (TypeError, ValueError): + return jsonify({"error": "Each part must include part_number"}), 400 + etag = str(part.get("etag") or part.get("ETag") or "").strip() + normalized.append({"part_number": number, "etag": etag}) + try: + result = _storage().complete_multipart_upload(bucket_name, upload_id, normalized) + + # Trigger replication + _replication().trigger_replication(bucket_name, result["key"]) + + return jsonify(result) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + + +@ui_bp.delete("/buckets//multipart/") +def abort_multipart_upload(bucket_name: str, upload_id: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + try: + _storage().abort_multipart_upload(bucket_name, upload_id) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + return jsonify({"status": "aborted"}) + + +@ui_bp.post("/buckets//delete") +@limiter.limit("20 per minute") +def delete_bucket(bucket_name: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "delete") + _storage().delete_bucket(bucket_name) + _bucket_policies().delete_policy(bucket_name) + flash(f"Bucket '{bucket_name}' removed", "success") + except (StorageError, IamError) as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.buckets_overview")) + + +@ui_bp.post("/buckets//objects//delete") +@limiter.limit("60 per minute") +def delete_object(bucket_name: str, object_key: str): + principal = _current_principal() + purge_versions = request.form.get("purge_versions") == "1" + try: + _authorize_ui(principal, bucket_name, "delete", object_key=object_key) + if purge_versions: + _storage().purge_object(bucket_name, object_key) + flash(f"Permanently deleted '{object_key}' and all versions", "success") + else: + _storage().delete_object(bucket_name, object_key) + flash(f"Deleted '{object_key}'", "success") + except (IamError, StorageError) as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + + +@ui_bp.post("/buckets//objects/bulk-delete") +@limiter.limit("40 per minute") +def bulk_delete_objects(bucket_name: str): + principal = _current_principal() + wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" or request.is_json + payload = request.get_json(silent=True) or {} + keys_payload = payload.get("keys") + purge_versions = bool(payload.get("purge_versions")) + + def _respond(success: bool, message: str, *, deleted=None, errors=None, status_code: int = 200): + if wants_json: + body = { + "status": "ok" if success else "partial", + "message": message, + "deleted": deleted or [], + "errors": errors or [], + } + if not success and not errors: + body["status"] = "error" + return jsonify(body), status_code + flash(message, "success" if success and not errors else "warning") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + + if not isinstance(keys_payload, list): + return _respond(False, "keys must be provided as a JSON array", status_code=400) + + cleaned: list[str] = [] + for entry in keys_payload: + if isinstance(entry, str): + candidate = entry.strip() + if candidate: + cleaned.append(candidate) + if not cleaned: + return _respond(False, "Select at least one object to delete", status_code=400) + + MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) + if len(cleaned) > MAX_KEYS: + return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400) + + unique_keys = list(dict.fromkeys(cleaned)) + storage = _storage() + try: + _authorize_ui(principal, bucket_name, "delete") + except IamError as exc: + return _respond(False, _friendly_error_message(exc), status_code=403) + + deleted: list[str] = [] + errors: list[dict[str, str]] = [] + for key in unique_keys: + try: + if purge_versions: + storage.purge_object(bucket_name, key) + else: + storage.delete_object(bucket_name, key) + deleted.append(key) + except StorageError as exc: + errors.append({"key": key, "error": str(exc)}) + + if not deleted and errors: + return _respond(False, "Unable to delete the selected objects", deleted=deleted, errors=errors, status_code=400) + + message = f"Deleted {len(deleted)} object{'s' if len(deleted) != 1 else ''}" + if purge_versions and deleted: + message += " (including archived versions)" + if errors: + message += f"; {len(errors)} failed" + return _respond(not errors, message, deleted=deleted, errors=errors) + + +@ui_bp.post("/buckets//objects/bulk-download") +@limiter.limit("10 per minute") +def bulk_download_objects(bucket_name: str): + import io + import zipfile + + principal = _current_principal() + payload = request.get_json(silent=True) or {} + keys_payload = payload.get("keys") + + if not isinstance(keys_payload, list): + return jsonify({"error": "keys must be provided as a JSON array"}), 400 + + cleaned: list[str] = [] + for entry in keys_payload: + if isinstance(entry, str): + candidate = entry.strip() + if candidate: + cleaned.append(candidate) + if not cleaned: + return jsonify({"error": "Select at least one object to download"}), 400 + + MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500) # Reuse same limit for now + if len(cleaned) > MAX_KEYS: + return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400 + + unique_keys = list(dict.fromkeys(cleaned)) + storage = _storage() + + # Check permissions for all keys first (or at least bucket read) + # We'll check bucket read once, then object read for each if needed? + # _authorize_ui checks bucket level if object_key is None, but we need to check each object if fine-grained policies exist. + # For simplicity/performance, we check bucket list/read. + try: + _authorize_ui(principal, bucket_name, "read") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + + # Create ZIP + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: + for key in unique_keys: + try: + # Verify individual object permission if needed? + # _authorize_ui(principal, bucket_name, "read", object_key=key) + # This might be slow for many objects. Assuming bucket read is enough for now or we accept the overhead. + # Let's skip individual check for bulk speed, assuming bucket read implies object read unless denied. + # But strictly we should check. Let's check. + _authorize_ui(principal, bucket_name, "read", object_key=key) + + path = storage.get_object_path(bucket_name, key) + # Use the key as the filename in the zip + zf.write(path, arcname=key) + except (StorageError, IamError): + # Skip files we can't read or don't exist + continue + + buffer.seek(0) + return send_file( + buffer, + as_attachment=True, + download_name=f"{bucket_name}-download.zip", + mimetype="application/zip" + ) + + +@ui_bp.post("/buckets//objects//purge") +@limiter.limit("30 per minute") +def purge_object_versions(bucket_name: str, object_key: str): + principal = _current_principal() + wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" + try: + _authorize_ui(principal, bucket_name, "delete", object_key=object_key) + _storage().purge_object(bucket_name, object_key) + except IamError as exc: + if wants_json: + return jsonify({"error": str(exc)}), 403 + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + except StorageError as exc: + if wants_json: + return jsonify({"error": str(exc)}), 400 + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + message = f"Removed archived versions for '{object_key}'" + if wants_json: + return jsonify({"status": "ok", "message": message}) + flash(message, "success") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + + +@ui_bp.get("/buckets//objects//preview") +def object_preview(bucket_name: str, object_key: str) -> Response: + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "read", object_key=object_key) + path = _storage().get_object_path(bucket_name, object_key) + except (StorageError, IamError) as exc: + status = 403 if isinstance(exc, IamError) else 404 + return Response(str(exc), status=status) + download = request.args.get("download") == "1" + return send_file(path, as_attachment=download, download_name=path.name) + + +@ui_bp.post("/buckets//objects//presign") +def object_presign(bucket_name: str, object_key: str): + principal = _current_principal() + payload = request.get_json(silent=True) or {} + method = str(payload.get("method", "GET")).upper() + action = "read" if method == "GET" else ("delete" if method == "DELETE" else "write") + try: + _authorize_ui(principal, bucket_name, action, object_key=object_key) + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + api_base = current_app.config["API_BASE_URL"].rstrip("/") + url = f"{api_base}/presign/{bucket_name}/{object_key}" + try: + response = requests.post(url, headers=_api_headers(), json=payload, timeout=5) + except requests.RequestException as exc: + return jsonify({"error": f"API unavailable: {exc}"}), 502 + try: + body = response.json() + except ValueError: + # Handle XML error responses from S3 backend + text = response.text or "" + if text.strip().startswith("<"): + import xml.etree.ElementTree as ET + try: + root = ET.fromstring(text) + # Try to find Message or Code + message = root.findtext(".//Message") or root.findtext(".//Code") or "Unknown S3 error" + body = {"error": message} + except ET.ParseError: + body = {"error": text or "API returned an empty response"} + else: + body = {"error": text or "API returned an empty response"} + return jsonify(body), response.status_code + + +@ui_bp.get("/buckets//objects//versions") +def object_versions(bucket_name: str, object_key: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "read", object_key=object_key) + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + try: + versions = _storage().list_object_versions(bucket_name, object_key) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + return jsonify({"versions": versions}) + + +@ui_bp.get("/buckets//archived") +def archived_objects(bucket_name: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "list") + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + try: + entries = _storage().list_orphaned_objects(bucket_name) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + payload: list[dict[str, Any]] = [] + for entry in entries: + latest = entry.get("latest") or {} + restore_url = None + if latest.get("version_id"): + restore_url = url_for( + "ui.restore_object_version", + bucket_name=bucket_name, + object_key=entry["key"], + version_id=latest["version_id"], + ) + purge_url = url_for("ui.purge_object_versions", bucket_name=bucket_name, object_key=entry["key"]) + payload.append( + { + "key": entry["key"], + "versions": entry.get("versions", 0), + "total_size": entry.get("total_size", 0), + "latest": entry.get("latest"), + "restore_url": restore_url, + "purge_url": purge_url, + } + ) + return jsonify({"objects": payload}) + + +@ui_bp.post("/buckets//objects//versions//restore") +def restore_object_version(bucket_name: str, object_key: str, version_id: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write", object_key=object_key) + except IamError as exc: + return jsonify({"error": str(exc)}), 403 + try: + meta = _storage().restore_object_version(bucket_name, object_key, version_id) + except StorageError as exc: + return jsonify({"error": str(exc)}), 400 + message = f"Restored '{meta.key}'" if meta else "Object restored" + return jsonify({"status": "ok", "message": message}) + + +@ui_bp.post("/buckets//policy") +@limiter.limit("10 per minute") +def update_bucket_policy(bucket_name: str): + principal = _current_principal() + action = request.form.get("mode", "upsert") + try: + _authorize_ui(principal, bucket_name, "policy") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name)) + store = _bucket_policies() + if action == "delete": + store.delete_policy(bucket_name) + flash("Bucket policy removed", "info") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) + document = request.form.get("policy_document", "").strip() + if not document: + flash("Provide a JSON policy document", "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) + try: + payload = json.loads(document) + store.set_policy(bucket_name, payload) + flash("Bucket policy saved", "success") + except (json.JSONDecodeError, ValueError) as exc: + flash(f"Policy error: {exc}", "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions")) + + +@ui_bp.post("/buckets//versioning") +def update_bucket_versioning(bucket_name: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) + state = request.form.get("state", "enable") + enable = state == "enable" + try: + _storage().set_bucket_versioning(bucket_name, enable) + except StorageError as exc: + flash(_friendly_error_message(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) + flash("Versioning enabled" if enable else "Versioning suspended", "success") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties")) + + +@ui_bp.get("/iam") +def iam_dashboard(): + principal = _current_principal() + iam_service = _iam() + secret_token = request.args.get("secret_token") + disclosed_secret: dict[str, str] | None = None + if secret_token: + payload = _secret_store().pop(secret_token) + if isinstance(payload, dict): + access_key = str(payload.get("access_key", "")) + secret_key = payload.get("secret_key") + if secret_key: + disclosed_secret = { + "access_key": access_key, + "secret_key": str(secret_key), + "operation": str(payload.get("operation", "create")), + } + locked = False + locked_reason = None + try: + iam_service.authorize(principal, None, "iam:list_users") + except IamError as exc: + locked = True + locked_reason = str(exc) + users = iam_service.list_users() if not locked else [] + config_summary = iam_service.config_summary() + config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2) + return render_template( + "iam.html", + users=users, + principal=principal, + iam_locked=locked, + locked_reason=locked_reason, + config_summary=config_summary, + config_document=config_document, + disclosed_secret=disclosed_secret, + ) + + +@ui_bp.post("/iam/users") +def create_iam_user(): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:create_user") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + display_name = request.form.get("display_name", "").strip() or "Unnamed" + if len(display_name) > 64: + flash("Display name must be 64 characters or fewer", "danger") + return redirect(url_for("ui.iam_dashboard")) + policies_text = request.form.get("policies", "").strip() + policies = None + if policies_text: + try: + policies = json.loads(policies_text) + except json.JSONDecodeError as exc: + flash(f"Invalid JSON: {exc}", "danger") + return redirect(url_for("ui.iam_dashboard")) + try: + created = _iam().create_user(display_name=display_name, policies=policies) + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + token = _secret_store().remember( + { + "access_key": created["access_key"], + "secret_key": created["secret_key"], + "operation": "create", + } + ) + flash(f"Created user {created['access_key']}. Copy the secret below.", "success") + return redirect(url_for("ui.iam_dashboard", secret_token=token)) + + +@ui_bp.post("/iam/users//rotate") +def rotate_iam_secret(access_key: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:rotate_key") + except IamError as exc: + if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: + return jsonify({"error": str(exc)}), 403 + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + try: + new_secret = _iam().rotate_secret(access_key) + except IamError as exc: + if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: + return jsonify({"error": str(exc)}), 400 + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html: + return jsonify({ + "access_key": access_key, + "secret_key": new_secret, + "message": f"Secret rotated for {access_key}", + }) + + token = _secret_store().remember( + { + "access_key": access_key, + "secret_key": new_secret, + "operation": "rotate", + } + ) + flash(f"Rotated secret for {access_key}. Copy the secret below.", "info") + return redirect(url_for("ui.iam_dashboard", secret_token=token)) + + +@ui_bp.post("/iam/users//update") +def update_iam_user(access_key: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:create_user") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + display_name = request.form.get("display_name", "").strip() + if display_name: + if len(display_name) > 64: + flash("Display name must be 64 characters or fewer", "danger") + else: + try: + _iam().update_user(access_key, display_name) + flash(f"Updated user {access_key}", "success") + except IamError as exc: + flash(str(exc), "danger") + + return redirect(url_for("ui.iam_dashboard")) + + +@ui_bp.post("/iam/users//delete") +def delete_iam_user(access_key: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:delete_user") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + if access_key == principal.access_key: + # Self-deletion + try: + _iam().delete_user(access_key) + session.pop("credentials", None) + flash("Your account has been deleted.", "info") + return redirect(url_for("ui.login")) + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + try: + _iam().delete_user(access_key) + flash(f"Deleted user {access_key}", "success") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + +@ui_bp.post("/iam/users//policies") +def update_iam_policies(access_key: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:update_policy") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.iam_dashboard")) + + policies_raw = request.form.get("policies", "").strip() + if not policies_raw: + # Empty policies list is valid (clears permissions) + policies = [] + else: + try: + policies = json.loads(policies_raw) + if not isinstance(policies, list): + raise ValueError("Policies must be a list") + except (ValueError, json.JSONDecodeError): + flash("Invalid JSON format for policies", "danger") + return redirect(url_for("ui.iam_dashboard")) + + try: + _iam().update_user_policies(access_key, policies) + flash(f"Updated policies for {access_key}", "success") + except IamError as exc: + flash(str(exc), "danger") + + return redirect(url_for("ui.iam_dashboard")) + + +@ui_bp.post("/connections") +def create_connection(): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:list_users") + except IamError: + flash("Access denied", "danger") + return redirect(url_for("ui.buckets_overview")) + + name = request.form.get("name", "").strip() + endpoint = request.form.get("endpoint_url", "").strip() + access_key = request.form.get("access_key", "").strip() + secret_key = request.form.get("secret_key", "").strip() + region = request.form.get("region", "us-east-1").strip() + + if not all([name, endpoint, access_key, secret_key]): + flash("All fields are required", "danger") + return redirect(url_for("ui.connections_dashboard")) + + conn = RemoteConnection( + id=str(uuid.uuid4()), + name=name, + endpoint_url=endpoint, + access_key=access_key, + secret_key=secret_key, + region=region + ) + _connections().add(conn) + flash(f"Connection '{name}' created", "success") + return redirect(url_for("ui.connections_dashboard")) + + +@ui_bp.post("/connections//delete") +def delete_connection(connection_id: str): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:list_users") + except IamError: + flash("Access denied", "danger") + return redirect(url_for("ui.buckets_overview")) + + _connections().delete(connection_id) + flash("Connection deleted", "success") + return redirect(url_for("ui.connections_dashboard")) + + +@ui_bp.post("/buckets//replication") +def update_bucket_replication(bucket_name: str): + principal = _current_principal() + try: + _authorize_ui(principal, bucket_name, "write") + except IamError as exc: + flash(str(exc), "danger") + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) + + action = request.form.get("action") + + if action == "delete": + _replication().delete_rule(bucket_name) + flash("Replication disabled", "info") + else: + target_conn_id = request.form.get("target_connection_id") + target_bucket = request.form.get("target_bucket", "").strip() + + if not target_conn_id or not target_bucket: + flash("Target connection and bucket are required", "danger") + else: + rule = ReplicationRule( + bucket_name=bucket_name, + target_connection_id=target_conn_id, + target_bucket=target_bucket, + enabled=True + ) + _replication().set_rule(rule) + flash("Replication configured", "success") + + return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication")) + + +@ui_bp.get("/connections") +def connections_dashboard(): + principal = _current_principal() + try: + _iam().authorize(principal, None, "iam:list_users") + except IamError: + flash("Access denied", "danger") + return redirect(url_for("ui.buckets_overview")) + + connections = _connections().list() + return render_template("connections.html", connections=connections, principal=principal) + + +@ui_bp.app_errorhandler(404) +def ui_not_found(error): # type: ignore[override] + prefix = ui_bp.url_prefix or "" + path = request.path or "" + wants_html = request.accept_mimetypes.accept_html + if wants_html and (not prefix or path.startswith(prefix)): + return render_template("404.html"), 404 + return error diff --git a/app/version.py b/app/version.py new file mode 100644 index 0000000..d2eb696 --- /dev/null +++ b/app/version.py @@ -0,0 +1,9 @@ +"""Central location for the application version string.""" +from __future__ import annotations + +APP_VERSION = "0.1.0" + + +def get_version() -> str: + """Return the current application version.""" + return APP_VERSION diff --git a/docs.md b/docs.md new file mode 100644 index 0000000..e55a737 --- /dev/null +++ b/docs.md @@ -0,0 +1,304 @@ +# MyFSIO Documentation + +This document expands on the README to describe the full workflow for running, configuring, and extending MyFSIO. Use it as a playbook for local S3-style experimentation. + +## 1. System Overview + +MyFSIO ships two Flask entrypoints that share the same storage, IAM, and bucket-policy state: + +- **API server** – Implements the S3-compatible REST API, policy evaluation, and Signature Version 4 presign service. +- **UI server** – Provides the browser console for buckets, IAM, and policies. It proxies to the API for presign operations. + +Both servers read `AppConfig`, so editing JSON stores on disk instantly affects both surfaces. + +## 2. Quickstart + +```bash +python -m venv .venv +. .venv/Scripts/activate # PowerShell: .\.venv\Scripts\Activate.ps1 +pip install -r requirements.txt + +# Run both API and UI +python run.py +``` + +Visit `http://127.0.0.1:5100/ui` to use the console and `http://127.0.0.1:5000/` (with IAM headers) for raw API calls. + +### Run modes + +You can run services individually if needed: + +```bash +python run.py --mode api # API only (port 5000) +python run.py --mode ui # UI only (port 5100) +``` + +### Docker quickstart + +The repo now ships a `Dockerfile` so you can run both services in one container: + +```bash +docker build -t myfsio . +docker run --rm -p 5000:5000 -p 5100:5100 \ + -v "$PWD/data:/app/data" \ + -v "$PWD/logs:/app/logs" \ + -e SECRET_KEY="change-me" \ + --name myfsio myfsio +``` + +PowerShell (Windows) example: + +```powershell +docker run --rm -p 5000:5000 -p 5100:5100 ` + -v ${PWD}\data:/app/data ` + -v ${PWD}\logs:/app/logs ` + -e SECRET_KEY="change-me" ` + --name myfsio myfsio +``` + +Key mount points: +- `/app/data` → persists buckets directly under `/app/data/` while system metadata (IAM config, bucket policies, versions, multipart uploads, etc.) lives under `/app/data/.myfsio.sys` (for example, `/app/data/.myfsio.sys/config/iam.json`). +- `/app/logs` → captures the rotating app log. +- `/app/tmp-storage` (optional) if you rely on the demo upload staging folders. + +With these volumes attached you can rebuild/restart the container without losing stored objects or credentials. + +### Versioning + +The repo now tracks a human-friendly release string inside `app/version.py` (see the `APP_VERSION` constant). Edit that value whenever you cut a release. The constant flows into Flask as `APP_VERSION` and is exposed via `GET /healthz`, so you can monitor deployments or surface it in UIs. + +## 3. Configuration Reference + +| Variable | Default | Notes | +| --- | --- | --- | +| `STORAGE_ROOT` | `/data` | Filesystem home for all buckets/objects. | +| `MAX_UPLOAD_SIZE` | `1073741824` | Bytes. Caps incoming uploads in both API + UI. | +| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint shown in listings. | +| `SECRET_KEY` | `dev-secret-key` | Flask session key for UI auth. | +| `IAM_CONFIG` | `/data/.myfsio.sys/config/iam.json` | Stores users, secrets, and inline policies. | +| `BUCKET_POLICY_PATH` | `/data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store (auto hot-reload). | +| `API_BASE_URL` | `http://127.0.0.1:5000` | Used by the UI to hit API endpoints (presign/policy). | +| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. | +| `AWS_SERVICE` | `s3` | Service string for SigV4. | + +Set env vars (or pass overrides to `create_app`) to point the servers at custom paths. + +## 4. Authentication & IAM + +1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access. +2. Sign into the UI using those credentials, then open **IAM**: + - **Create user**: supply a display name and optional JSON inline policy array. + - **Rotate secret**: generates a new secret key; the UI surfaces it once. + - **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`). +3. Wildcard action `iam:*` is supported for admin user definitions. + +The API expects every request to include `X-Access-Key` and `X-Secret-Key` headers. The UI persists them in the Flask session after login. + +## 5. Bucket Policies & Presets + +- **Storage**: Policies are persisted in `data/.myfsio.sys/config/bucket_policies.json` under `{"policies": {"bucket": {...}}}`. +- **Hot reload**: Both API and UI call `maybe_reload()` before evaluating policies. Editing the JSON on disk is immediately reflected—no restarts required. +- **UI editor**: Each bucket detail page includes: + - A preset selector: **Private** detaches the policy (delete mode), **Public** injects an allow policy granting anonymous `s3:ListBucket` + `s3:GetObject`, and **Custom** restores your draft. + - A read-only preview of the attached policy. + - Autosave behavior for custom drafts while you type. + +### Editing via CLI + +```bash +curl -X PUT http://127.0.0.1:5000/bucket-policy/test \ + -H "Content-Type: application/json" \ + -H "X-Access-Key: ..." -H "X-Secret-Key: ..." \ + -d '{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::test"] + } + ] + }' +``` + +The UI will reflect this change as soon as the request completes thanks to the hot reload. + +## 6. Presigned URLs + +- Trigger from the UI using the **Presign** button after selecting an object. +- Or call `POST /presign//` with JSON `{ "method": "GET", "expires_in": 900 }`. +- Supported methods: `GET`, `PUT`, `DELETE`; expiration must be `1..604800` seconds. +- The service signs requests using the caller’s IAM credentials and enforces bucket policies both when issuing and when the presigned URL is used. +- Legacy share links have been removed; presigned URLs now handle both private and public workflows. + +### Multipart Upload Example + +```python +import boto3 + +s3 = boto3.client('s3', endpoint_url='http://localhost:5000') + +# Initiate +response = s3.create_multipart_upload(Bucket='mybucket', Key='large.bin') +upload_id = response['UploadId'] + +# Upload parts +parts = [] +chunks = [b'chunk1', b'chunk2'] # Example data chunks +for part_number, chunk in enumerate(chunks, start=1): + response = s3.upload_part( + Bucket='mybucket', + Key='large.bin', + PartNumber=part_number, + UploadId=upload_id, + Body=chunk + ) + parts.append({'PartNumber': part_number, 'ETag': response['ETag']}) + +# Complete +s3.complete_multipart_upload( + Bucket='mybucket', + Key='large.bin', + UploadId=upload_id, + MultipartUpload={'Parts': parts} +) +``` + +## 6. Site Replication + +MyFSIO supports **Site Replication**, allowing you to automatically copy new objects from one MyFSIO instance (Source) to another (Target). This is useful for disaster recovery, data locality, or backups. + +### Architecture + +- **Source Instance**: The MyFSIO instance where you upload files. It runs the replication worker. +- **Target Instance**: Another MyFSIO instance (or any S3-compatible service like AWS S3, MinIO) that receives the copies. + +Replication is **asynchronous** (happens in the background) and **one-way** (Source -> Target). + +### Setup Guide + +#### 1. Prepare the Target Instance + +If your target is another MyFSIO server (e.g., running on a different machine or port), you need to create a destination bucket and a user with write permissions. + +**Option A: Using the UI (Easiest)** +If you have access to the UI of the target instance: +1. Log in to the Target UI. +2. Create a new bucket (e.g., `backup-bucket`). +3. Go to **IAM**, create a new user (e.g., `replication-user`), and copy the Access/Secret keys. + +**Option B: Headless Setup (API Only)** +If the target server is only running the API (`run_api.py`) and has no UI access, you can bootstrap the credentials and bucket by running a Python script on the server itself. + +Run this script on the **Target Server**: + +```python +# setup_target.py +from pathlib import Path +from app.iam import IamService +from app.storage import ObjectStorage + +# Initialize services (paths match default config) +data_dir = Path("data") +iam = IamService(data_dir / ".myfsio.sys" / "config" / "iam.json") +storage = ObjectStorage(data_dir) + +# 1. Create the bucket +bucket_name = "backup-bucket" +try: + storage.create_bucket(bucket_name) + print(f"Bucket '{bucket_name}' created.") +except Exception as e: + print(f"Bucket creation skipped: {e}") + +# 2. Create the user +try: + # Create user with full access (or restrict policy as needed) + creds = iam.create_user( + display_name="Replication User", + policies=[{"bucket": bucket_name, "actions": ["write", "read", "list"]}] + ) + print("\n--- CREDENTIALS GENERATED ---") + print(f"Access Key: {creds['access_key']}") + print(f"Secret Key: {creds['secret_key']}") + print("-----------------------------") +except Exception as e: + print(f"User creation failed: {e}") +``` + +Save and run: `python setup_target.py` + +#### 2. Configure the Source Instance + +Now, configure the primary instance to replicate to the target. + +1. **Access the Console**: + Log in to the UI of your Source Instance. + +2. **Add a Connection**: + - Navigate to **Connections** in the top menu. + - Click **Add Connection**. + - **Name**: `Secondary Site`. + - **Endpoint URL**: The URL of your Target Instance's API (e.g., `http://target-server:5002`). + - **Access Key**: The key you generated on the Target. + - **Secret Key**: The secret you generated on the Target. + - Click **Add Connection**. + +3. **Enable Replication**: + - Navigate to **Buckets** and select the source bucket. + - Switch to the **Replication** tab. + - Select the `Secondary Site` connection. + - Enter the target bucket name (`backup-bucket`). + - Click **Enable Replication**. + +### Verification + +1. Upload a file to the source bucket. +2. Check the target bucket (via UI, CLI, or API). The file should appear shortly. + +```bash +# Verify on target using AWS CLI +aws --endpoint-url http://target-server:5002 s3 ls s3://backup-bucket +``` + +## 7. Running Tests + +```bash +pytest -q +``` + +The suite now includes a boto3 integration test that spins up a live HTTP server and drives the API through the official AWS SDK. If you want to skip it (for faster unit-only loops), run `pytest -m "not integration"`. + +The suite covers bucket CRUD, presigned downloads, bucket policy enforcement, and regression tests for anonymous reads when a Public policy is attached. + +## 8. Troubleshooting + +| Symptom | Likely Cause | Fix | +| --- | --- | --- | +| 403 from API despite Public preset | Policy didn’t save or bucket key path mismatch | Reapply Public preset, confirm bucket name in `Resource` matches `arn:aws:s3:::bucket/*`. | +| UI still shows old policy text | Browser cached view before hot reload | Refresh; JSON is already reloaded on server. | +| Presign modal errors with 403 | IAM user lacks `read/write/delete` for target bucket or bucket policy denies | Update IAM inline policies or remove conflicting deny statements. | +| Large upload rejected immediately | File exceeds `MAX_UPLOAD_SIZE` | Increase env var or shrink object. | + +## 9. API Matrix + +``` +GET / # List buckets +PUT / # Create bucket +DELETE / # Remove bucket +GET / # List objects +PUT // # Upload object +GET // # Download object +DELETE // # Delete object +POST /presign// # Generate SigV4 URL +GET /bucket-policy/ # Fetch policy +PUT /bucket-policy/ # Upsert policy +DELETE /bucket-policy/ # Delete policy +``` + +## 10. Next Steps + +- Tailor IAM + policy JSON files for team-ready presets. +- Wrap `run_api.py` with gunicorn or another WSGI server for long-running workloads. +- Extend `bucket_policies.json` to cover Deny statements that simulate production security controls. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..356c1f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +Flask>=3.0.2 +Flask-Limiter>=3.5.0 +Flask-Cors>=4.0.0 +Flask-WTF>=1.2.1 +pytest>=7.4 +requests>=2.31 +boto3>=1.34 diff --git a/run.py b/run.py new file mode 100644 index 0000000..efd12a2 --- /dev/null +++ b/run.py @@ -0,0 +1,58 @@ +"""Helper script to run the API server, UI server, or both.""" +from __future__ import annotations + +import argparse +import os +import warnings +from multiprocessing import Process + +from app import create_api_app, create_ui_app + + +def _server_host() -> str: + """Return the bind host for API and UI servers.""" + return os.getenv("APP_HOST", "0.0.0.0") + + +def _is_debug_enabled() -> bool: + return os.getenv("FLASK_DEBUG", "0").lower() in ("1", "true", "yes") + + +def serve_api(port: int) -> None: + app = create_api_app() + debug = _is_debug_enabled() + if debug: + warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) + app.run(host=_server_host(), port=port, debug=debug) + + +def serve_ui(port: int) -> None: + app = create_ui_app() + debug = _is_debug_enabled() + if debug: + warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning) + app.run(host=_server_host(), port=port, debug=debug) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the S3 clone services.") + parser.add_argument("--mode", choices=["api", "ui", "both"], default="both") + parser.add_argument("--api-port", type=int, default=5000) + parser.add_argument("--ui-port", type=int, default=5100) + args = parser.parse_args() + + if args.mode in {"api", "both"}: + print(f"Starting API server on port {args.api_port}...") + api_proc = Process(target=serve_api, args=(args.api_port,), daemon=True) + api_proc.start() + else: + api_proc = None + + if args.mode in {"ui", "both"}: + print(f"Starting UI server on port {args.ui_port}...") + serve_ui(args.ui_port) + elif api_proc: + try: + api_proc.join() + except KeyboardInterrupt: + pass diff --git a/static/css/main.css b/static/css/main.css new file mode 100644 index 0000000..190f509 --- /dev/null +++ b/static/css/main.css @@ -0,0 +1,937 @@ +:root { + --myfsio-body-bg: #f5f6fa; + --myfsio-text: #0f172a; + --myfsio-card-bg: #ffffff; + --myfsio-card-border: #e2e8f0; + --myfsio-muted: #475569; + --myfsio-input-bg: #ffffff; + --myfsio-input-border: #cbd5f5; + --myfsio-nav-gradient: linear-gradient(90deg, #0f172a, #1d4ed8); + --myfsio-nav-link: rgba(255, 255, 255, 0.85); + --myfsio-nav-link-hover: #ffffff; + --myfsio-preview-bg: #f8f9fb; + --myfsio-policy-bg: #0f172a; + --myfsio-policy-fg: #e2e8f0; + --myfsio-hover-bg: rgba(59, 130, 246, 0.12); +} + +[data-theme='dark'] { + --myfsio-body-bg: #0b1120; + --myfsio-text: #e2e8f0; + --myfsio-card-bg: #1a1f2e; + --myfsio-card-border: #2d3548; + --myfsio-muted: #94a3b8; + --myfsio-input-bg: #111827; + --myfsio-input-border: #374151; + --myfsio-nav-gradient: linear-gradient(90deg, #020617, #1e3a8a); + --myfsio-nav-link: rgba(248, 250, 252, 0.85); + --myfsio-nav-link-hover: #ffffff; + --myfsio-preview-bg: #1f2937; + --myfsio-policy-bg: #0f1419; + --myfsio-policy-fg: #f8fafc; + --myfsio-hover-bg: rgba(59, 130, 246, 0.2); +} + +[data-theme='dark'] body, +[data-theme='dark'] html { + color-scheme: dark; +} + +body { + background-color: var(--myfsio-body-bg); + color: var(--myfsio-text); + transition: background-color 0.3s ease, color 0.3s ease; +} + +html, body { + min-height: 100%; +} + +main { + color: var(--myfsio-text); + background-color: var(--myfsio-body-bg); +} + +html { + background-color: var(--myfsio-body-bg); + scroll-behavior: smooth; +} + +.text-muted, +.form-text { + color: var(--myfsio-muted) !important; +} + +.table-responsive { border-radius: 0.5rem; overflow: hidden; } +.message-stack { position: sticky; top: 1rem; z-index: 100; } +code { font-size: 0.85rem; } + +code { + background-color: rgba(15, 23, 42, 0.08); + color: var(--myfsio-text); + padding: 0.15rem 0.4rem; + border-radius: 0.25rem; +} +[data-theme='dark'] code { + background-color: rgba(148, 163, 184, 0.15); + color: #93c5fd; +} + +.card, +.card-header, +.modal-content, +.dropdown-menu, +.list-group-item { + background-color: var(--myfsio-card-bg); + color: var(--myfsio-text); + border-color: var(--myfsio-card-border); +} + +.bg-panel { + background-color: var(--myfsio-preview-bg); + color: var(--myfsio-text); + border-color: var(--myfsio-card-border) !important; +} + +.border-dashed { + border-style: dashed !important; +} + +.card { + box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); +} + +[data-theme='dark'] .card { + box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.3), 0 1px 2px 0 rgba(0, 0, 0, 0.2); +} + +.card-header { + font-weight: 500; +} + +/* Drag and Drop Zone */ +.drop-zone { + position: relative; + transition: all 0.2s ease; +} + +.drop-zone.drag-over { + background-color: var(--myfsio-hover-bg); + border: 2px dashed var(--myfsio-input-border); +} + +.drop-zone.drag-over::after { + content: 'Drop files here to upload'; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 1.5rem; + font-weight: 600; + color: var(--myfsio-muted); + pointer-events: none; + z-index: 10; +} + +.drop-zone.drag-over table { + opacity: 0.3; +} + +.modal-header, +.modal-footer { + border-color: var(--myfsio-card-border); +} + +.myfsio-nav { + background: var(--myfsio-nav-gradient); + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} +.myfsio-nav .navbar-brand { + color: #fff; + font-weight: 600; + letter-spacing: -0.02em; + display: inline-flex; + align-items: center; + gap: 0.5rem; +} +.myfsio-logo { + border-radius: 0.35rem; + box-shadow: 0 0 6px rgba(15, 23, 42, 0.35); + background-color: rgba(255, 255, 255, 0.1); +} +.myfsio-title { + display: inline-block; +} +.myfsio-nav .nav-link { + color: var(--myfsio-nav-link); + transition: color 0.2s ease; +} +.myfsio-nav .nav-link:hover { + color: var(--myfsio-nav-link-hover); +} +.myfsio-nav .nav-link.nav-link-muted { opacity: 0.75; } +.myfsio-nav .nav-link.nav-link-muted .badge { + color: #0f172a; + background-color: #fef08a; +} +[data-theme='dark'] .myfsio-nav .nav-link.nav-link-muted .badge { + color: #0f172a; + background-color: #fde047; +} +.myfsio-nav .navbar-toggler { + border-color: rgba(255, 255, 255, 0.6); +} +.myfsio-nav .navbar-toggler-icon { + filter: invert(1); +} + +.docs-hero { + background: var(--myfsio-nav-gradient); + color: #fff !important; + border: 1px solid rgba(255, 255, 255, 0.2); + box-shadow: 0 15px 35px rgba(15, 23, 42, 0.3); +} + +.docs-hero * { + color: inherit; +} + +.docs-callout { + background-color: rgba(15, 23, 42, 0.35); + border: 1px solid rgba(255, 255, 255, 0.35); + border-radius: 0.75rem; + padding: 1rem 1.25rem; +} + +.docs-callout code { + color: #fff; + background-color: rgba(0, 0, 0, 0.2); +} + +[data-theme='dark'] .docs-callout { + background-color: rgba(2, 6, 23, 0.55); + border-color: rgba(255, 255, 255, 0.25); +} + +.docs-feature-card + .docs-feature-card { + margin-top: 1.25rem; +} + +.docs-checklist { + padding-left: 1.25rem; + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.docs-checklist li { + margin: 0; +} + +.docs-section { + border: 1px solid var(--myfsio-card-border); + border-radius: 1rem; +} + +.docs-section-kicker { + display: inline-flex; + align-items: center; + justify-content: center; + width: 40px; + height: 40px; + border-radius: 999px; + background: rgba(59, 130, 246, 0.12); + color: #1d4ed8; + font-weight: 600; +} + +[data-theme='dark'] .docs-section-kicker { + background: rgba(59, 130, 246, 0.25); + color: #93c5fd; +} + +.docs-steps { + counter-reset: docs-step; + margin: 1rem 0 1.25rem; + padding-left: 1.25rem; +} + +.docs-steps li { + margin-bottom: 0.4rem; +} + +.docs-highlight { + background: rgba(59, 130, 246, 0.08); + border-radius: 0.75rem; + padding: 1rem 1.25rem; + border: 1px solid rgba(59, 130, 246, 0.2); +} + +[data-theme='dark'] .docs-highlight { + background: rgba(59, 130, 246, 0.18); + border-color: rgba(59, 130, 246, 0.35); +} + +.docs-pill-list { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 1.5rem; +} + +.docs-pill-list ul { + padding-left: 1.1rem; + margin-bottom: 0; +} + +.docs-table thead { + text-transform: uppercase; + font-size: 0.75rem; + letter-spacing: 0.08em; +} + +.docs-sidebar { + position: sticky; + top: 1.5rem; + border-radius: 1rem; + border: 1px solid var(--myfsio-card-border); +} + +.docs-sidebar-callouts { + display: flex; + flex-direction: column; + gap: 0.85rem; + padding: 1rem; + border-radius: 0.75rem; + background-color: rgba(15, 23, 42, 0.04); +} + +[data-theme='dark'] .docs-sidebar-callouts { + background-color: rgba(248, 250, 252, 0.05); +} + +.docs-sidebar-callouts code { + font-size: 0.85rem; +} + +.docs-toc a { + color: var(--myfsio-text); + text-decoration: none; + display: inline-flex; + gap: 0.35rem; + align-items: center; + padding: 0.2rem 0; +} + +.docs-toc a:hover { + color: #2563eb; +} + +.badge { + font-weight: 500; + padding: 0.35em 0.65em; +} + +.theme-toggle { + min-width: auto; + width: 38px; + height: 32px; + padding: 0; + display: inline-flex; + align-items: center; + justify-content: center; + border-radius: 999px; + transition: all 0.2s ease; +} + +.theme-toggle:hover { + transform: translateY(-1px); +} + +.theme-toggle .theme-icon { + transition: opacity 0.2s ease, transform 0.2s ease; +} + +.config-copy { + position: absolute; + top: 0.5rem; + right: 0.5rem; + opacity: 0.8; + transition: opacity 0.2s; + background-color: rgba(0, 0, 0, 0.5); + border: none; + color: white; +} + +.config-copy:hover { + opacity: 1; + background-color: rgba(0, 0, 0, 0.7); + color: white; +} + +.bucket-table td:last-child, +.bucket-table th:last-child { white-space: nowrap; } + +.object-key { + word-break: break-word; + max-width: 32rem; +} + +.preview-card { top: 1rem; } + +.preview-stage { + min-height: 260px; + background-color: var(--myfsio-preview-bg); + overflow: hidden; + border-color: var(--myfsio-card-border) !important; +} + +.upload-progress-stack { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.upload-progress-item { + border: 1px solid var(--myfsio-card-border); + border-radius: 0.5rem; + background-color: var(--myfsio-card-bg); + padding: 0.75rem 0.9rem; + transition: border-color 0.2s ease, background-color 0.2s ease; +} + +.upload-progress-item[data-state='success'] { + border-color: rgba(34, 197, 94, 0.6); +} + +.upload-progress-item[data-state='error'] { + border-color: rgba(239, 68, 68, 0.7); +} + +.progress-thin { + height: 0.35rem; + background-color: rgba(15, 23, 42, 0.1); +} + +[data-theme='dark'] .progress-thin { + background-color: rgba(248, 250, 252, 0.15); +} + +#deleteObjectKey { + word-break: break-all; + max-width: 100%; +} + +.preview-stage img, +.preview-stage video, +.preview-stage iframe { + border: 0; + max-height: 360px; +} + +.upload-dropzone { + border: 2px dashed var(--myfsio-card-border); + border-radius: 0.75rem; + padding: 1.5rem; + cursor: pointer; + transition: border-color 0.2s ease, background-color 0.2s ease; +} + +.upload-dropzone.is-dragover { + background-color: rgba(59, 130, 246, 0.08); + border-color: #3b82f6; +} + +.metadata-stack .metadata-entry + .metadata-entry { + margin-top: 0.75rem; +} + +.metadata-stack .metadata-key { + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--myfsio-muted); +} + +.metadata-stack .metadata-value { + font-weight: 600; +} + +.policy-preview { + background-color: var(--myfsio-policy-bg); + color: var(--myfsio-policy-fg); + border-radius: 0.5rem; + padding: 1rem; + font-size: 0.85rem; + max-height: 320px; + overflow: auto; + border: 1px solid var(--myfsio-card-border); +} + +.objects-table-container { + max-height: 600px; + overflow-y: auto; +} + +.btn-group form { display: inline; } + +.font-monospace { font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; } + +.table { + color: var(--myfsio-text); + background-color: var(--myfsio-card-bg); +} + +.table th, +.table td { + border-color: var(--myfsio-card-border); +} + +.table-light th { + background-color: rgba(15, 23, 42, 0.04); +} + +[data-theme='dark'] .table-light th { + background-color: rgba(248, 250, 252, 0.05); + color: var(--myfsio-text); +} + +.table-hover tbody tr:hover { + background-color: var(--myfsio-hover-bg); + cursor: pointer; + transition: background-color 0.15s ease; +} + +.table thead { + background-color: rgba(15, 23, 42, 0.04); + color: var(--myfsio-text); +} + +[data-theme='dark'] .table thead { + background-color: rgba(248, 250, 252, 0.05); + color: var(--myfsio-text); +} + +.form-control, +.form-select { + background-color: var(--myfsio-input-bg); + color: var(--myfsio-text); + border-color: var(--myfsio-input-border); + transition: border-color 0.15s ease, box-shadow 0.15s ease; +} + +.form-control::placeholder { + color: var(--myfsio-muted); + opacity: 0.6; +} + +[data-theme='dark'] .form-control::placeholder { + opacity: 0.5; +} + +.form-control:focus, +.form-select:focus { + background-color: var(--myfsio-input-bg); + color: var(--myfsio-text); + border-color: #3b82f6; + box-shadow: 0 0 0 0.2rem rgba(59, 130, 246, 0.25); +} + +.alert { + color: var(--myfsio-text); + border-color: var(--myfsio-card-border); + border-width: 1px; + border-left-width: 4px; +} + +.alert-success { + background-color: rgba(34, 197, 94, 0.1); + border-left-color: #22c55e; +} + +[data-theme='dark'] .alert-success { + background-color: rgba(34, 197, 94, 0.15); + color: #86efac; +} + +.alert-danger { + background-color: rgba(239, 68, 68, 0.1); + border-left-color: #ef4444; +} + +[data-theme='dark'] .alert-danger { + background-color: rgba(239, 68, 68, 0.15); + color: #fca5a5; +} + +.alert-warning { + background-color: rgba(251, 191, 36, 0.1); + border-left-color: #fbbf24; +} + +[data-theme='dark'] .alert-warning { + background-color: rgba(251, 191, 36, 0.15); + color: #fde047; +} + +.alert-info { + background-color: rgba(59, 130, 246, 0.1); + border-left-color: #3b82f6; +} + +[data-theme='dark'] .alert-info { + background-color: rgba(59, 130, 246, 0.15); + color: #93c5fd; +} + +.btn { + color: inherit; + transition: all 0.2s ease; +} + +.btn:hover { + transform: translateY(-1px); +} + +.btn:active { + transform: translateY(0); +} + +.btn-icon { + width: 36px; + height: 36px; + padding: 0.4rem; + display: inline-flex; + align-items: center; + justify-content: center; +} + +.btn-group-sm .btn-icon { + width: 34px; + height: 34px; +} + +[data-theme='dark'] .btn-outline-secondary { + color: #e2e8f0; + border-color: #475569; +} + +[data-theme='dark'] .btn-outline-secondary:hover { + background-color: rgba(148, 163, 184, 0.2); + border-color: #64748b; + color: #f8fafc; +} + +[data-theme='dark'] .btn-outline-danger { + color: #fca5a5; + border-color: #f87171; +} + +[data-theme='dark'] .btn-outline-danger:hover { + background-color: rgba(248, 113, 113, 0.2); + border-color: #ef4444; + color: #fecaca; +} + +[data-theme='dark'] .btn-outline-primary { + color: #93c5fd; + border-color: #60a5fa; +} + +[data-theme='dark'] .btn-outline-primary:hover { + background-color: rgba(59, 130, 246, 0.2); + border-color: #3b82f6; + color: #bfdbfe; +} + +[data-theme='dark'] .btn-primary { + background-color: #2563eb; + border-color: #1d4ed8; + color: #ffffff; +} + +[data-theme='dark'] .btn-primary:hover { + background-color: #1d4ed8; + border-color: #1e40af; +} + +.btn-primary { + color: #ffffff; +} + +[data-theme='dark'] .btn-danger { + background-color: #dc2626; + border-color: #b91c1c; +} + +[data-theme='dark'] .btn-danger:hover { + background-color: #b91c1c; + border-color: #991b1b; +} + +.badge.text-bg-info { + background-color: #bae6fd; + color: #0f172a; +} + +[data-theme='dark'] .badge.text-bg-info { + background-color: #0ea5e9; + color: #e2e8f0; +} + +[data-theme='dark'] .badge.text-bg-warning { + background-color: #fde047; + color: #0f172a; +} + +[data-theme='dark'] .badge.text-bg-secondary { + background-color: #475569; + color: #e2e8f0; +} + +[data-theme='dark'] .badge.text-bg-success { + background-color: #22c55e; + color: #ffffff; +} + +[data-theme='dark'] .badge.text-bg-primary { + background-color: #3b82f6; + color: #ffffff; +} + +.dropdown-menu { + border-color: var(--myfsio-card-border); +} + +[data-theme='dark'] .form-label, +[data-theme='dark'] label, +[data-theme='dark'] .modal-title, +[data-theme='dark'] .fw-semibold { + color: var(--myfsio-text); +} + +.modal-backdrop.show { + opacity: 0.6; +} + +[data-theme='dark'] .btn-close { + filter: invert(1) grayscale(100%) brightness(200%); +} + +.config-copy { + color: #ffffff; + border-color: rgba(255, 255, 255, 0.7); +} + +.config-copy:hover { + color: #0f172a; + background-color: #ffffff; + border-color: #ffffff; +} + +[data-theme='dark'] .border { + border-color: var(--myfsio-card-border) !important; +} + +.btn-link { + color: #3b82f6; + text-decoration: none; +} + +.btn-link:hover { + color: #2563eb; + text-decoration: underline; + transform: none; +} + +[data-theme='dark'] .btn-link { + color: #60a5fa; +} + +[data-theme='dark'] .btn-link:hover { + color: #93c5fd; +} + +[data-theme='dark'] .input-group-text { + background-color: var(--myfsio-input-bg); + color: var(--myfsio-text); + border-color: var(--myfsio-input-border); +} + +.page-header { + margin-bottom: 2rem; +} + +.page-header h1 { + font-weight: 600; + letter-spacing: -0.02em; +} + +.config-copy { + position: absolute; + top: 0.5rem; + right: 0.5rem; + opacity: 0.8; + transition: opacity 0.2s; + background-color: rgba(0, 0, 0, 0.5); + border: none; + color: white; +} + +.config-copy:hover { + opacity: 1; + background-color: rgba(0, 0, 0, 0.7); + color: white; +} + +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.5; } +} + +.loading { + animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite; +} + +[data-theme='dark'] .text-primary { + color: #60a5fa !important; +} + +[data-theme='dark'] .text-success { + color: #86efac !important; +} + +[data-theme='dark'] .text-danger { + color: #fca5a5 !important; +} + +[data-theme='dark'] .text-warning { + color: #fde047 !important; +} + +[data-theme='dark'] .lead { + color: var(--myfsio-muted); +} + +.btn-sm { + transition: all 0.15s ease; +} + +[data-theme='dark'] .btn-outline-light { + color: #f8fafc; + border-color: rgba(248, 250, 252, 0.3); +} + +[data-theme='dark'] .btn-outline-light:hover { + background-color: rgba(248, 250, 252, 0.1); + border-color: rgba(248, 250, 252, 0.5); +} + +pre { + background-color: rgba(15, 23, 42, 0.05); + border: 1px solid var(--myfsio-card-border); + border-radius: 0.5rem; + padding: 1rem; + overflow-x: auto; + font-size: 0.875rem; + line-height: 1.6; +} + +[data-theme='dark'] pre { + background-color: rgba(248, 250, 252, 0.05); +} + +pre code { + background: none; + padding: 0; + color: inherit; +} + +.docs-section + .docs-section { + margin-top: 1.25rem; +} + +/* Breadcrumb styling */ +.breadcrumb { + background-color: transparent; + padding: 0.5rem 0; + font-size: 0.9rem; +} + +.breadcrumb-item + .breadcrumb-item::before { + content: "›"; + color: var(--myfsio-muted); +} + +.breadcrumb-item a { + color: var(--myfsio-text); + text-decoration: none; + transition: color 0.2s ease; +} + +.breadcrumb-item a:hover { + color: #3b82f6; + text-decoration: underline; +} + +[data-theme='dark'] .breadcrumb-item a:hover { + color: #60a5fa; +} + +.breadcrumb-item.active { + color: var(--myfsio-muted); +} + +/* Icon alignment */ +.bi { + vertical-align: -0.125em; +} + +/* Sticky improvements */ +.sticky-top { + top: 1.5rem; +} + +/* Better card spacing */ +.card-body dl:last-child { + margin-bottom: 0; +} + +/* Empty state improvements */ +.text-center svg { + display: inline-block; +} + +/* Input group improvements */ +[data-theme='dark'] .input-group .btn-outline-primary { + background-color: transparent; +} + +/* File size nowrap */ +.text-nowrap { + white-space: nowrap; +} + +/* Alert improvements */ +.alert svg { + flex-shrink: 0; +} + +/* Better hover for table rows with data */ +[data-object-row]:hover { + background-color: var(--myfsio-hover-bg) !important; +} + +/* Improve spacing in button groups */ +.btn-group-sm .btn { + padding: 0.25rem 0.6rem; + font-size: 0.875rem; +} + +/* Better modal styling */ +.modal-header { + background-color: var(--myfsio-card-bg); +} + +/* Badge improvements */ +.badge { + font-size: 0.8125rem; +} diff --git a/static/images/MyFISO.ico b/static/images/MyFISO.ico new file mode 100644 index 0000000..50ca797 Binary files /dev/null and b/static/images/MyFISO.ico differ diff --git a/static/images/MyFISO.png b/static/images/MyFISO.png new file mode 100644 index 0000000..ef9a3a5 Binary files /dev/null and b/static/images/MyFISO.png differ diff --git a/templates/404.html b/templates/404.html new file mode 100644 index 0000000..edee52f --- /dev/null +++ b/templates/404.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} +{% block content %} +
+

HTTP 404

+

We can't find that page

+

The requested console route isn't available in MyFSIO. Double-check the URL or head back to your buckets.

+ +
+{% endblock %} diff --git a/templates/500.html b/templates/500.html new file mode 100644 index 0000000..b8de624 --- /dev/null +++ b/templates/500.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} +{% block content %} +
+

HTTP 500

+

Internal Server Error

+

Something went wrong on our end. Please try again later or contact support.

+ +
+{% endblock %} diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..1bf96ce --- /dev/null +++ b/templates/base.html @@ -0,0 +1,242 @@ + + + + + + + MyFSIO Console + + + + + + + + +
+ {% block content %}{% endblock %} +
+
+ +
+ + + + + + {% block extra_scripts %}{% endblock %} + + diff --git a/templates/bucket_detail.html b/templates/bucket_detail.html new file mode 100644 index 0000000..af93fe8 --- /dev/null +++ b/templates/bucket_detail.html @@ -0,0 +1,1752 @@ +{% extends "base.html" %} +{% block content %} +{% set active_tab = request.args.get('tab', 'objects') %} +
+ +
+
+ +

+ + + + {{ bucket_name }} +

+
+
+ +
+
+ + + + + +
+ + +
+
+
+
+
+
+ Objects + + + + +
+
+
+ + + + + + + + + + + {% for obj in objects %} + + + + + + + {% else %} + + + + {% endfor %} + +
+ + KeySizeActions
+ + +
{{ obj.key }}
+
Modified {{ obj.last_modified.strftime('%b %d, %Y %H:%M') }}
+
+ {{ obj.size | filesizeformat }} + +
+ + + + +
+
+

No objects yet

+

Upload files using the console or any compatible client.

+
+
+
+
+
+
+
+ Object details +
+
+
+ + + +

Select an object to view details

+
+
+
+

+
+
+
+ + + + + + Download + + +
+
+
+
Last modified
+
+
ETag
+
+
+
+
Metadata
+
+ +
+
+
+
Versions
+ +
+
+
+
+
No preview available
+ Object preview + + +
+
+
+
+
+
+
+ + +
+
+
+
+
+
+ Bucket policy +
+
+ {% if bucket_policy %} + Policy attached + {% else %} + IAM only + {% endif %} +
+
+
+ {% if not bucket_policy %} + + {% endif %} + {% if can_edit_policy %} + {% set preset_choice = 'custom' %} + {% if not bucket_policy %} + {% set preset_choice = 'private' %} + {% elif bucket_policy_text and bucket_policy_text.strip() == default_policy.strip() %} + {% set preset_choice = 'public' %} + {% endif %} +
+ + +
+ + +
+ + +
Use presets for common scenarios or switch to custom to paste AWS-style statements.
+
+ +
+
+ {% else %} +

You do not have permission to edit this policy.

+ {% endif %} +
+
+
+
+
+ + +
+
+
+
+
+ Versioning + + {{ 'Enabled' if versioning_enabled else 'Suspended' }} + +
+
+

When enabled, previous versions of objects are preserved so you can roll back accidental changes or deletions.

+ {% if can_manage_versioning %} +
+ + {% if versioning_enabled %} + + + {% else %} + + + {% endif %} +
+ {% else %} +

You do not have permission to modify versioning for this bucket.

+ {% endif %} +
+
+ +{% if versioning_enabled %} +
+
+ Archived objects +
+ 0 items + +
+
+
+

When objects are deleted while versioning is enabled, their previous versions remain here until you restore or purge them.

+
+ + + + + + + + + + + + + + +
KeyLatest versionVersionsActions
No archived-only objects.
+
+
+
+ {% endif %} +
+
+
+ + +
+
+
+
+
+ Replication Configuration +
+
+ {% if replication_rule %} + + +
+
Current Rule
+
+
Target Connection
+
+ {% set target_conn = connections | selectattr("id", "equalto", replication_rule.target_connection_id) | first %} + {{ target_conn.name if target_conn else replication_rule.target_connection_id }} +
+ +
Target Bucket
+
{{ replication_rule.target_bucket }}
+ +
Status
+
+ {% if replication_rule.enabled %} + Enabled + {% else %} + Disabled + {% endif %} +
+
+
+ +
+ + +
+ + {% else %} +

Replication allows you to automatically copy new objects from this bucket to a bucket in another S3-compatible service.

+ + {% if connections %} +
+ + +
+ + +
Select the remote service where objects should be replicated.
+
+ +
+ + +
The bucket on the remote service must already exist.
+
+ + +
+ {% else %} + + {% endif %} + + {% endif %} +
+
+
+ +
+
+
+ About Replication +
+
+

Site Replication asynchronously copies objects to a remote destination.

+
    +
  • Only new uploads are replicated. Existing objects are not copied retroactively.
  • +
  • Deletes are not replicated to prevent accidental data loss.
  • +
  • Replication happens in the background.
  • +
  • Ensure the target bucket exists and the connection credentials have write permissions.
  • +
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + +{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/templates/buckets.html b/templates/buckets.html new file mode 100644 index 0000000..5e427b2 --- /dev/null +++ b/templates/buckets.html @@ -0,0 +1,198 @@ +{% extends "base.html" %} +{% block content %} +
+
+

Buckets

+

Manage your S3-compatible storage containers.

+
+ +
+ +
+
+ + + + +
+
+ + + + + +
+
+ +
+ {% for bucket in buckets %} +
+
+
+
+
+
+ + + + +
+
{{ bucket.meta.name }}
+
+ {{ bucket.access_label }} +
+ +
+
+
Storage Used
+
{{ bucket.summary.human_size }}
+
+
+
Objects
+
{{ bucket.summary.objects }}
+
+
+
+ +
+
+ {% else %} +
+
+
+ + + +
+
No buckets found
+

Get started by creating your first storage bucket.

+ +
+
+ {% endfor %} +
+ + +{% endblock %} + +{% block extra_scripts %} +{{ super() }} + +{% endblock %} diff --git a/templates/connections.html b/templates/connections.html new file mode 100644 index 0000000..fbe5175 --- /dev/null +++ b/templates/connections.html @@ -0,0 +1,89 @@ +{% extends "base.html" %} + +{% block title %}Connections - S3 Compatible Storage{% endblock %} + +{% block content %} +
+
+

Remote Connections

+

Manage connections to other S3-compatible services for replication.

+
+
+ +
+
+
+
+ Add New Connection +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+
+
+
+ +
+
+
+ Existing Connections +
+
+ {% if connections %} +
+ + + + + + + + + + + + {% for conn in connections %} + + + + + + + + {% endfor %} + +
NameEndpointRegionAccess KeyActions
{{ conn.name }}{{ conn.endpoint_url }}{{ conn.region }}{{ conn.access_key }} +
+ +
+
+
+ {% else %} +

No remote connections configured.

+ {% endif %} +
+
+
+
+{% endblock %} diff --git a/templates/csrf_error.html b/templates/csrf_error.html new file mode 100644 index 0000000..03f29bb --- /dev/null +++ b/templates/csrf_error.html @@ -0,0 +1,14 @@ +{% extends "base.html" %} +{% block content %} +
+

Security Check Failed

+

Session Expired or Invalid

+

Your session may have expired or the form submission was invalid. Please refresh the page and try again.

+ {% if reason %} +

{{ reason }}

+ {% endif %} + +
+{% endblock %} diff --git a/templates/docs.html b/templates/docs.html new file mode 100644 index 0000000..e25c124 --- /dev/null +++ b/templates/docs.html @@ -0,0 +1,376 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+

Documentation

+

Your guide to MyFSIO

+

Follow these steps to install, authenticate, master the console, and automate everything through the API.

+
+
+
API base URL
+ {{ api_base }} +
+
+
+
+
+
+
+
+ 01 +

Set up & run locally

+
+

Prepare a virtual environment, install dependencies, and launch both servers for a complete console + API experience.

+
    +
  1. Install Python 3.11+ plus system build tools.
  2. +
  3. Create a virtual environment and install requirements.txt.
  4. +
  5. Start the services with python run.py.
  6. +
+
python -m venv .venv
+. .venv/Scripts/activate      # PowerShell: .\\.venv\\Scripts\\Activate.ps1
+pip install -r requirements.txt
+
+# Run both API and UI
+python run.py
+
+# Or run individually
+python run.py --mode api
+python run.py --mode ui
+
+

Configuration lives in app/config.py; override variables via the shell (e.g., STORAGE_ROOT, API_BASE_URL, SECRET_KEY, MAX_UPLOAD_SIZE).

+
+
+
+
+
+ 02 +

Authenticate & manage IAM

+
+

MyFSIO seeds data/.myfsio.sys/config/iam.json with localadmin/localadmin. Sign in once, rotate it, then grant least-privilege access to teammates and tools.

+
+
    +
  1. Visit /ui/login, enter the bootstrap credentials, and rotate them immediately from the IAM page.
  2. +
  3. Create additional users with descriptive display names and AWS-style inline policies (for example {"bucket": "*", "actions": ["list", "read"]}).
  4. +
  5. Rotate secrets when sharing with CI jobs—new secrets display once and persist to data/.myfsio.sys/config/iam.json.
  6. +
  7. Bucket policies layer on top of IAM. Apply Private/Public presets or paste custom JSON; changes reload instantly.
  8. +
+
+

All API calls require X-Access-Key and X-Secret-Key headers. The UI stores them in the Flask session after you log in.

+
+
+
+
+
+ 03 +

Use the console effectively

+
+

Each workspace models an S3 workflow so you can administer buckets end-to-end.

+
+
+

Buckets

+
    +
  • Create/delete buckets from the overview. Badges reveal IAM-only, public-read, or custom-policy states.
  • +
  • Summary stats show live object counts and total capacity; click through for inventories.
  • +
+
+
+

Uploads

+
    +
  • Drag and drop folders or files into the upload modal. Objects above 16 MB switch to multipart automatically.
  • +
  • Progress rows highlight retries, throughput, and completion even if you close the modal.
  • +
+
+
+

Object details

+
    +
  • Selecting an object opens the preview card with metadata, inline viewers, presign generator, and version history.
  • +
  • Trigger downloads, deletes, restores, or metadata refreshes without leaving the panel.
  • +
+
+
+

Policies & versioning

+
    +
  • Toggle versioning (requires write access). Archived-only keys are flagged so you can restore them quickly.
  • +
  • The policy editor saves drafts, ships with presets, and hot-reloads data/.myfsio.sys/config/bucket_policies.json.
  • +
+
+
+
+
+
+
+
+ 04 +

Automate with CLI & tools

+
+

Point standard S3 clients at {{ api_base }} and reuse the same IAM credentials.

+
+

AWS CLI

+
aws configure set aws_access_key_id <access_key>
+aws configure set aws_secret_access_key <secret_key>
+aws configure set default.region us-east-1
+
+aws --endpoint-url {{ api_base }} s3 ls
+aws --endpoint-url {{ api_base }} s3api create-bucket --bucket demo
+aws --endpoint-url {{ api_base }} s3 cp ./sample.txt s3://demo/sample.txt
+
+
+
+

s3cmd

+
cat > ~/.s3cfg-myfsio <<'EOF'
+host_base = {{ api_host }}
+host_bucket = %(bucket)s.{{ api_host }}
+access_key = <access_key>
+secret_key = <secret_key>
+use_https = False
+signature_v2 = False
+EOF
+
+s3cmd --config ~/.s3cfg-myfsio ls
+s3cmd --config ~/.s3cfg-myfsio put notes.txt s3://demo/notes.txt
+
+
+
+

curl / HTTPie

+
curl {{ api_base }}/ \
+  -H "X-Access-Key: <access_key>" \
+  -H "X-Secret-Key: <secret_key>"
+
+curl -X PUT {{ api_base }}/demo/notes.txt \
+  -H "X-Access-Key: <access_key>" \
+  -H "X-Secret-Key: <secret_key>" \
+  --data-binary @notes.txt
+
+curl -X POST {{ api_base }}/presign/demo/notes.txt \
+  -H "Content-Type: application/json" \
+  -H "X-Access-Key: <access_key>" \
+  -H "X-Secret-Key: <secret_key>" \
+  -d '{"method":"GET", "expires_in": 900}'
+
+
+
+
+
+
+
+ 05 +

Key REST endpoints

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodPathPurpose
GET/List buckets accessible to the caller.
PUT/<bucket>Create a bucket.
DELETE/<bucket>Delete a bucket (must be empty).
GET/<bucket>List objects (supports prefix / max-keys queries).
PUT/<bucket>/<key>Upload or overwrite an object; UI helper handles multipart flows.
GET/<bucket>/<key>Download an object (UI adds ?download=1 to force attachment).
DELETE/<bucket>/<key>Delete an object.
GET/PUT/DELETE/bucket-policy/<bucket>Fetch, upsert, or remove a bucket policy.
POST/presign/<bucket>/<key>Generate SigV4 URLs for GET/PUT/DELETE with custom expiry.
+
+

All responses include X-Request-Id for tracing. Logs land in logs/api.log and logs/ui.log.

+
+
+
+
+
+ 06 +

API Examples

+
+

Common operations using boto3.

+ +
Multipart Upload
+
import boto3
+
+s3 = boto3.client('s3', endpoint_url='{{ api_base }}')
+
+# Initiate
+response = s3.create_multipart_upload(Bucket='mybucket', Key='large.bin')
+upload_id = response['UploadId']
+
+# Upload parts
+parts = []
+chunks = [b'chunk1', b'chunk2'] # Example data chunks
+for part_number, chunk in enumerate(chunks, start=1):
+    response = s3.upload_part(
+        Bucket='mybucket',
+        Key='large.bin',
+        PartNumber=part_number,
+        UploadId=upload_id,
+        Body=chunk
+    )
+    parts.append({'PartNumber': part_number, 'ETag': response['ETag']})
+
+# Complete
+s3.complete_multipart_upload(
+    Bucket='mybucket',
+    Key='large.bin',
+    UploadId=upload_id,
+    MultipartUpload={'Parts': parts}
+)
+
+
+
+
+
+ 07 +

Site Replication

+
+

Automatically copy new objects to another MyFSIO instance or S3-compatible service for backup or disaster recovery.

+ +

Setup Guide

+
    +
  1. + Prepare Target: On the destination server, create a bucket (e.g., backup-bucket) and an IAM user with write permissions. +
  2. +
  3. + Connect Source: On this server, go to Connections and add the target's API URL and credentials. +
  4. +
  5. + Enable Rule: Go to the source bucket's Replication tab, select the connection, and enter the target bucket name. +
  6. +
+ +
+
+ + + + +
+ Headless Target Setup? +

If your target server has no UI, use the Python API directly to bootstrap credentials. See docs.md in the project root for the setup_target.py script.

+
+
+
+
+
+
+
+
+ 08 +

Troubleshooting & tips

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SymptomLikely causeFix
403 from API despite Public presetPolicy not saved or ARN mismatchReapply the preset and confirm arn:aws:s3:::bucket/* matches the bucket name.
UI shows stale policy/object dataBrowser cached prior stateRefresh; the server hot-reloads data/.myfsio.sys/config/bucket_policies.json and storage metadata.
Presign dialog returns 403User lacks required read/write/delete action or bucket policy deniesUpdate IAM inline policies or remove conflicting deny statements.
Large uploads fail instantlyMAX_UPLOAD_SIZE exceededRaise the env var or split the object.
Requests hit the wrong hostAPI_BASE_URL not updated after tunneling/forwardingSet API_BASE_URL in your shell or .env to match the published host.
+
+
+
+
+
+ +
+
+{% endblock %} diff --git a/templates/iam.html b/templates/iam.html new file mode 100644 index 0000000..0b52091 --- /dev/null +++ b/templates/iam.html @@ -0,0 +1,557 @@ +{% extends "base.html" %} +{% block content %} +{% set iam_disabled = 'disabled' if iam_locked else '' %} + + +{% if iam_locked %} + +{% endif %} + +{% if disclosed_secret %} + +{% endif %} + +{% if not iam_locked %} +
+
+
+ Configuration Preview + {{ config_summary.user_count }} users +
+
+
+
{{ config_document }}
+ +
+

Secrets are masked above. Access {{ config_summary.path }} directly to view full credentials.

+
+
+
+{% endif %} + +
+
+ Users + {% if iam_locked %}View only{% endif %} +
+ {% if iam_locked %} +
+

Sign in with an administrator to list or edit IAM users.

+
+ {% else %} +
+ + + + + + + + + + + {% for user in users %} + + + + + + + {% else %} + + + + {% endfor %} + +
Access KeyDisplay NamePoliciesActions
{{ user.access_key }}{{ user.display_name }} + {% for policy in user.policies %} + + {{ policy.bucket }} + {% if '*' in policy.actions %} + (*) + {% else %} + ({{ policy.actions|length }}) + {% endif %} + + {% endfor %} + +
+ + + + +
+
No IAM users defined.
+
+ {% endif %} +
+ + + + + + + + + + + + + + + + + +{% endblock %} + +{% block extra_scripts %} +{{ super() }} + +{% endblock %} diff --git a/templates/login.html b/templates/login.html new file mode 100644 index 0000000..84c18ab --- /dev/null +++ b/templates/login.html @@ -0,0 +1,29 @@ +{% extends "base.html" %} +{% block content %} +
+
+

Welcome to MyFSIO

+

A developer-friendly object storage solution for prototyping and local development.

+

Need help getting started? Review the project README and docs for bootstrap credentials, IAM walkthroughs, and bucket policy samples.

+
+
+
+
+

Sign in

+
+ +
+ + +
+
+ + +
+ +
+
+
+
+
+{% endblock %} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5b216db --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,167 @@ +import json +import sys +import threading +import time +from datetime import datetime, timezone +from pathlib import Path +from urllib.parse import quote, urlparse +import hashlib +import hmac + +import pytest +from werkzeug.serving import make_server + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from app import create_api_app + + +@pytest.fixture() +def app(tmp_path: Path): + storage_root = tmp_path / "data" + iam_config = tmp_path / "iam.json" + bucket_policies = tmp_path / "bucket_policies.json" + iam_payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}], + } + ] + } + iam_config.write_text(json.dumps(iam_payload)) + flask_app = create_api_app( + { + "TESTING": True, + "STORAGE_ROOT": storage_root, + "IAM_CONFIG": iam_config, + "BUCKET_POLICY_PATH": bucket_policies, + "API_BASE_URL": "http://testserver", + } + ) + yield flask_app + + +@pytest.fixture() +def client(app): + return app.test_client() + + +@pytest.fixture() +def live_server(app): + server = make_server("127.0.0.1", 0, app) + host, port = server.server_address + + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + time.sleep(0.05) + + try: + yield f"http://{host}:{port}" + finally: + server.shutdown() + thread.join(timeout=1) + + +def _sign(key, msg): + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def _get_signature_key(key, date_stamp, region_name, service_name): + k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp) + k_region = _sign(k_date, region_name) + k_service = _sign(k_region, service_name) + k_signing = _sign(k_service, "aws4_request") + return k_signing + + +@pytest.fixture +def signer(): + def _signer( + method, + path, + headers=None, + body=None, + access_key="test", + secret_key="secret", + region="us-east-1", + service="s3", + ): + if headers is None: + headers = {} + + now = datetime.now(timezone.utc) + amz_date = now.strftime("%Y%m%dT%H%M%SZ") + date_stamp = now.strftime("%Y%m%d") + + headers["X-Amz-Date"] = amz_date + + # Host header is required for SigV4 + if "Host" not in headers: + headers["Host"] = "localhost" # Default for Flask test client + + # Payload hash + if body is None: + body = b"" + elif isinstance(body, str): + body = body.encode("utf-8") + + payload_hash = hashlib.sha256(body).hexdigest() + headers["X-Amz-Content-Sha256"] = payload_hash + + # Canonical Request + canonical_uri = quote(path.split("?")[0]) + + # Query string + parsed = urlparse(path) + query_args = [] + if parsed.query: + for pair in parsed.query.split("&"): + if "=" in pair: + k, v = pair.split("=", 1) + else: + k, v = pair, "" + query_args.append((k, v)) + query_args.sort(key=lambda x: (x[0], x[1])) + + canonical_query_parts = [] + for k, v in query_args: + canonical_query_parts.append(f"{quote(k, safe='')}={quote(v, safe='')}") + canonical_query_string = "&".join(canonical_query_parts) + + # Canonical Headers + canonical_headers_parts = [] + signed_headers_parts = [] + for k, v in sorted(headers.items(), key=lambda x: x[0].lower()): + k_lower = k.lower() + v_trim = " ".join(str(v).split()) + canonical_headers_parts.append(f"{k_lower}:{v_trim}\n") + signed_headers_parts.append(k_lower) + + canonical_headers = "".join(canonical_headers_parts) + signed_headers = ";".join(signed_headers_parts) + + canonical_request = ( + f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + ) + + # String to Sign + credential_scope = f"{date_stamp}/{region}/{service}/aws4_request" + string_to_sign = ( + f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" + ) + + # Signature + signing_key = _get_signature_key(secret_key, date_stamp, region, service) + signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() + + authorization = ( + f"AWS4-HMAC-SHA256 Credential={access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}" + ) + headers["Authorization"] = authorization + + return headers + + return _signer diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..affe9ec --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,485 @@ +from urllib.parse import urlsplit + + +def test_bucket_and_object_lifecycle(client, signer): + headers = signer("PUT", "/photos") + response = client.put("/photos", headers=headers) + assert response.status_code == 200 + + headers = signer("GET", "/") + response = client.get("/", headers=headers) + assert response.status_code == 200 + assert b"photos" in response.data + + data = b"hello world" + headers = signer("PUT", "/photos/image.txt", body=data) + response = client.put("/photos/image.txt", headers=headers, data=data) + assert response.status_code == 200 + assert "ETag" in response.headers + + headers = signer("GET", "/photos") + response = client.get("/photos", headers=headers) + assert response.status_code == 200 + assert b"image.txt" in response.data + + headers = signer("GET", "/photos/image.txt") + response = client.get("/photos/image.txt", headers=headers) + assert response.status_code == 200 + assert response.data == b"hello world" + + headers = signer("DELETE", "/photos/image.txt") + response = client.delete("/photos/image.txt", headers=headers) + assert response.status_code == 204 + + headers = signer("DELETE", "/photos") + response = client.delete("/photos", headers=headers) + assert response.status_code == 204 + + +def test_bulk_delete_objects(client, signer): + headers = signer("PUT", "/bulk") + assert client.put("/bulk", headers=headers).status_code == 200 + + headers = signer("PUT", "/bulk/first.txt", body=b"first") + assert client.put("/bulk/first.txt", headers=headers, data=b"first").status_code == 200 + + headers = signer("PUT", "/bulk/second.txt", body=b"second") + assert client.put("/bulk/second.txt", headers=headers, data=b"second").status_code == 200 + + delete_xml = b""" + + first.txt + missing.txt + + """ + # Note: query_string is part of the path for signing + headers = signer("POST", "/bulk?delete", headers={"Content-Type": "application/xml"}, body=delete_xml) + response = client.post( + "/bulk", + headers=headers, + query_string={"delete": ""}, + data=delete_xml, + ) + assert response.status_code == 200 + assert b"" in response.data + + headers = signer("GET", "/bulk") + listing = client.get("/bulk", headers=headers) + assert b"first.txt" not in listing.data + assert b"missing.txt" not in listing.data + assert b"second.txt" in listing.data + + +def test_bulk_delete_rejects_version_ids(client, signer): + headers = signer("PUT", "/bulkv") + assert client.put("/bulkv", headers=headers).status_code == 200 + + headers = signer("PUT", "/bulkv/keep.txt", body=b"keep") + assert client.put("/bulkv/keep.txt", headers=headers, data=b"keep").status_code == 200 + + delete_xml = b""" + + keep.txt123 + + """ + headers = signer("POST", "/bulkv?delete", headers={"Content-Type": "application/xml"}, body=delete_xml) + response = client.post( + "/bulkv", + headers=headers, + query_string={"delete": ""}, + data=delete_xml, + ) + assert response.status_code == 200 + assert b"InvalidRequest" in response.data + + headers = signer("GET", "/bulkv") + listing = client.get("/bulkv", headers=headers) + assert b"keep.txt" in listing.data + + +def test_request_id_header_present(client, signer): + headers = signer("GET", "/") + response = client.get("/", headers=headers) + assert response.status_code == 200 + assert response.headers.get("X-Request-ID") + + +def test_healthcheck_returns_version(client): + response = client.get("/healthz") + data = response.get_json() + assert response.status_code == 200 + assert data["status"] == "ok" + assert "version" in data + + +def test_missing_credentials_denied(client): + response = client.get("/") + assert response.status_code == 403 + + +def test_presign_and_bucket_policies(client, signer): + # Create bucket and object + headers = signer("PUT", "/docs") + assert client.put("/docs", headers=headers).status_code == 200 + + headers = signer("PUT", "/docs/readme.txt", body=b"content") + assert client.put("/docs/readme.txt", headers=headers, data=b"content").status_code == 200 + + # Generate presigned GET URL and follow it + json_body = {"method": "GET", "expires_in": 120} + # Flask test client json parameter automatically sets Content-Type and serializes body + # But for signing we need the body bytes. + import json + body_bytes = json.dumps(json_body).encode("utf-8") + headers = signer("POST", "/presign/docs/readme.txt", headers={"Content-Type": "application/json"}, body=body_bytes) + + response = client.post( + "/presign/docs/readme.txt", + headers=headers, + json=json_body, + ) + assert response.status_code == 200 + presigned_url = response.get_json()["url"] + parts = urlsplit(presigned_url) + presigned_path = f"{parts.path}?{parts.query}" + download = client.get(presigned_path) + assert download.status_code == 200 + assert download.data == b"content" + + # Attach a deny policy for GETs + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "DenyReads", + "Effect": "Deny", + "Principal": "*", + "Action": ["s3:GetObject"], + "Resource": ["arn:aws:s3:::docs/*"], + } + ], + } + policy_bytes = json.dumps(policy).encode("utf-8") + headers = signer("PUT", "/bucket-policy/docs", headers={"Content-Type": "application/json"}, body=policy_bytes) + assert client.put("/bucket-policy/docs", headers=headers, json=policy).status_code == 204 + + headers = signer("GET", "/bucket-policy/docs") + fetched = client.get("/bucket-policy/docs", headers=headers) + assert fetched.status_code == 200 + assert fetched.get_json()["Version"] == "2012-10-17" + + # Reads are now denied by bucket policy + headers = signer("GET", "/docs/readme.txt") + denied = client.get("/docs/readme.txt", headers=headers) + assert denied.status_code == 403 + + # Presign attempts are also denied + json_body = {"method": "GET", "expires_in": 60} + body_bytes = json.dumps(json_body).encode("utf-8") + headers = signer("POST", "/presign/docs/readme.txt", headers={"Content-Type": "application/json"}, body=body_bytes) + response = client.post( + "/presign/docs/readme.txt", + headers=headers, + json=json_body, + ) + assert response.status_code == 403 + + +def test_trailing_slash_returns_xml(client): + response = client.get("/ghost/") + assert response.status_code == 403 + assert response.mimetype == "application/xml" + assert b"" in response.data + + +def test_public_policy_allows_anonymous_list_and_read(client, signer): + headers = signer("PUT", "/public") + assert client.put("/public", headers=headers).status_code == 200 + + headers = signer("PUT", "/public/hello.txt", body=b"hi") + assert client.put("/public/hello.txt", headers=headers, data=b"hi").status_code == 200 + + assert client.get("/public").status_code == 403 + assert client.get("/public/hello.txt").status_code == 403 + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowList", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::public"], + }, + { + "Sid": "AllowRead", + "Effect": "Allow", + "Principal": "*", + "Action": ["s3:GetObject"], + "Resource": ["arn:aws:s3:::public/*"], + }, + ], + } + import json + policy_bytes = json.dumps(policy).encode("utf-8") + headers = signer("PUT", "/bucket-policy/public", headers={"Content-Type": "application/json"}, body=policy_bytes) + assert client.put("/bucket-policy/public", headers=headers, json=policy).status_code == 204 + + list_response = client.get("/public") + assert list_response.status_code == 200 + assert b"hello.txt" in list_response.data + + obj_response = client.get("/public/hello.txt") + assert obj_response.status_code == 200 + assert obj_response.data == b"hi" + + headers = signer("DELETE", "/public/hello.txt") + assert client.delete("/public/hello.txt", headers=headers).status_code == 204 + + headers = signer("DELETE", "/bucket-policy/public") + assert client.delete("/bucket-policy/public", headers=headers).status_code == 204 + + headers = signer("DELETE", "/public") + assert client.delete("/public", headers=headers).status_code == 204 + + +def test_principal_dict_with_object_get_only(client, signer): + headers = signer("PUT", "/mixed") + assert client.put("/mixed", headers=headers).status_code == 200 + + headers = signer("PUT", "/mixed/only.txt", body=b"ok") + assert client.put("/mixed/only.txt", headers=headers, data=b"ok").status_code == 200 + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowObjects", + "Effect": "Allow", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:GetObject"], + "Resource": ["arn:aws:s3:::mixed/*"], + }, + { + "Sid": "DenyList", + "Effect": "Deny", + "Principal": "*", + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::mixed"], + }, + ], + } + import json + policy_bytes = json.dumps(policy).encode("utf-8") + headers = signer("PUT", "/bucket-policy/mixed", headers={"Content-Type": "application/json"}, body=policy_bytes) + assert client.put("/bucket-policy/mixed", headers=headers, json=policy).status_code == 204 + + assert client.get("/mixed").status_code == 403 + allowed = client.get("/mixed/only.txt") + assert allowed.status_code == 200 + assert allowed.data == b"ok" + + headers = signer("DELETE", "/mixed/only.txt") + assert client.delete("/mixed/only.txt", headers=headers).status_code == 204 + + headers = signer("DELETE", "/bucket-policy/mixed") + assert client.delete("/bucket-policy/mixed", headers=headers).status_code == 204 + + headers = signer("DELETE", "/mixed") + assert client.delete("/mixed", headers=headers).status_code == 204 + + +def test_bucket_policy_wildcard_resource_allows_object_get(client, signer): + headers = signer("PUT", "/test") + assert client.put("/test", headers=headers).status_code == 200 + + headers = signer("PUT", "/test/vid.mp4", body=b"video") + assert client.put("/test/vid.mp4", headers=headers, data=b"video").status_code == 200 + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:GetObject"], + "Resource": ["arn:aws:s3:::*/*"], + }, + { + "Effect": "Deny", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::*"], + }, + ], + } + import json + policy_bytes = json.dumps(policy).encode("utf-8") + headers = signer("PUT", "/bucket-policy/test", headers={"Content-Type": "application/json"}, body=policy_bytes) + assert client.put("/bucket-policy/test", headers=headers, json=policy).status_code == 204 + + listing = client.get("/test") + assert listing.status_code == 403 + payload = client.get("/test/vid.mp4") + assert payload.status_code == 200 + assert payload.data == b"video" + + headers = signer("DELETE", "/test/vid.mp4") + assert client.delete("/test/vid.mp4", headers=headers).status_code == 204 + + headers = signer("DELETE", "/bucket-policy/test") + assert client.delete("/bucket-policy/test", headers=headers).status_code == 204 + + headers = signer("DELETE", "/test") + assert client.delete("/test", headers=headers).status_code == 204 + + +def test_head_object_returns_metadata(client, signer): + headers = signer("PUT", "/media") + assert client.put("/media", headers=headers).status_code == 200 + + payload = b"metadata" + upload_headers = {"X-Amz-Meta-Test": "demo"} + # Signer needs to know about custom headers + headers = signer("PUT", "/media/info.txt", headers=upload_headers, body=payload) + assert client.put("/media/info.txt", headers=headers, data=payload).status_code == 200 + + headers = signer("HEAD", "/media/info.txt") + head = client.head("/media/info.txt", headers=headers) + assert head.status_code == 200 + assert head.data == b"" + assert head.headers["Content-Length"] == str(len(payload)) + assert head.headers["X-Amz-Meta-Test"] == "demo" + + +def test_bucket_versioning_endpoint(client, signer): + headers = signer("PUT", "/history") + assert client.put("/history", headers=headers).status_code == 200 + + headers = signer("GET", "/history?versioning") + response = client.get("/history", headers=headers, query_string={"versioning": ""}) + assert response.status_code == 200 + assert b"Suspended" in response.data + + storage = client.application.extensions["object_storage"] + storage.set_bucket_versioning("history", True) + + headers = signer("GET", "/history?versioning") + enabled = client.get("/history", headers=headers, query_string={"versioning": ""}) + assert enabled.status_code == 200 + assert b"Enabled" in enabled.data + + +def test_bucket_tagging_cors_and_encryption_round_trip(client, signer): + headers = signer("PUT", "/config") + assert client.put("/config", headers=headers).status_code == 200 + + headers = signer("GET", "/config?tagging") + missing_tags = client.get("/config", headers=headers, query_string={"tagging": ""}) + assert missing_tags.status_code == 404 + + tagging_xml = b""" + + + envdev + teamplatform + + + """ + headers = signer("PUT", "/config?tagging", headers={"Content-Type": "application/xml"}, body=tagging_xml) + assert ( + client.put( + "/config", + headers=headers, + query_string={"tagging": ""}, + data=tagging_xml, + content_type="application/xml", + ).status_code + == 204 + ) + + headers = signer("GET", "/config?tagging") + tags = client.get("/config", headers=headers, query_string={"tagging": ""}) + assert tags.status_code == 200 + assert b"env" in tags.data + assert b"platform" in tags.data + + headers = signer("GET", "/config?cors") + missing_cors = client.get("/config", headers=headers, query_string={"cors": ""}) + assert missing_cors.status_code == 404 + + cors_xml = b""" + + + * + GET + * + X-Test + 600 + + + """ + headers = signer("PUT", "/config?cors", headers={"Content-Type": "application/xml"}, body=cors_xml) + assert ( + client.put( + "/config", + headers=headers, + query_string={"cors": ""}, + data=cors_xml, + content_type="application/xml", + ).status_code + == 204 + ) + + headers = signer("GET", "/config?cors") + cors = client.get("/config", headers=headers, query_string={"cors": ""}) + assert cors.status_code == 200 + assert b"*" in cors.data + assert b"GET" in cors.data + + # Clearing CORS rules with an empty payload removes the configuration + headers = signer("PUT", "/config?cors", body=b"") + assert ( + client.put( + "/config", + headers=headers, + query_string={"cors": ""}, + data=b"", + ).status_code + == 204 + ) + + headers = signer("GET", "/config?cors") + cleared_cors = client.get("/config", headers=headers, query_string={"cors": ""}) + assert cleared_cors.status_code == 404 + + headers = signer("GET", "/config?encryption") + missing_enc = client.get("/config", headers=headers, query_string={"encryption": ""}) + assert missing_enc.status_code == 404 + + encryption_xml = b""" + + + + AES256 + + + + """ + headers = signer("PUT", "/config?encryption", headers={"Content-Type": "application/xml"}, body=encryption_xml) + assert ( + client.put( + "/config", + headers=headers, + query_string={"encryption": ""}, + data=encryption_xml, + content_type="application/xml", + ).status_code + == 204 + ) + + headers = signer("GET", "/config?encryption") + encryption = client.get("/config", headers=headers, query_string={"encryption": ""}) + assert encryption.status_code == 200 + assert b"AES256" in encryption.data \ No newline at end of file diff --git a/tests/test_aws_sdk_compat.py b/tests/test_aws_sdk_compat.py new file mode 100644 index 0000000..e610a0c --- /dev/null +++ b/tests/test_aws_sdk_compat.py @@ -0,0 +1,54 @@ +import uuid + +import boto3 +import pytest +from botocore.client import Config + + +@pytest.mark.integration +def test_boto3_basic_operations(live_server): + bucket_name = f"boto3-test-{uuid.uuid4().hex[:8]}" + object_key = "folder/hello.txt" + + s3 = boto3.client( + "s3", + endpoint_url=live_server, + aws_access_key_id="test", + aws_secret_access_key="secret", + region_name="us-east-1", + use_ssl=False, + config=Config( + signature_version="s3v4", + retries={"max_attempts": 1}, + s3={"addressing_style": "path"}, + ), + ) + + # No need to inject custom headers anymore, as we support SigV4 + # def _inject_headers(params, **_kwargs): + # headers = params.setdefault("headers", {}) + # headers["X-Access-Key"] = "test" + # headers["X-Secret-Key"] = "secret" + + # s3.meta.events.register("before-call.s3", _inject_headers) + + s3.create_bucket(Bucket=bucket_name) + + try: + put_response = s3.put_object(Bucket=bucket_name, Key=object_key, Body=b"hello from boto3") + assert "ETag" in put_response + + obj = s3.get_object(Bucket=bucket_name, Key=object_key) + assert obj["Body"].read() == b"hello from boto3" + + listing = s3.list_objects_v2(Bucket=bucket_name) + contents = listing.get("Contents", []) + assert contents, "list_objects_v2 should return at least the object we uploaded" + keys = {entry["Key"] for entry in contents} + assert object_key in keys + + s3.delete_object(Bucket=bucket_name, Key=object_key) + post_delete = s3.list_objects_v2(Bucket=bucket_name) + assert not post_delete.get("Contents"), "Object should be removed before deleting bucket" + finally: + s3.delete_bucket(Bucket=bucket_name) \ No newline at end of file diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py new file mode 100644 index 0000000..dae31f6 --- /dev/null +++ b/tests/test_edge_cases.py @@ -0,0 +1,67 @@ +import io +import pytest +from pathlib import Path +from app.storage import ObjectStorage, StorageError + +def test_concurrent_bucket_deletion(tmp_path: Path): + # This is a simplified test since true concurrency is hard to simulate deterministically in this setup + # We verify that deleting a non-existent bucket raises StorageError + storage = ObjectStorage(tmp_path) + storage.create_bucket("race") + storage.delete_bucket("race") + + with pytest.raises(StorageError, match="Bucket does not exist"): + storage.delete_bucket("race") + +def test_maximum_object_key_length(tmp_path: Path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("maxkey") + + # AWS S3 max key length is 1024 bytes (UTF-8) + # Our implementation relies on the filesystem, so we might hit OS limits before 1024 + # But let's test a reasonably long key that should work + long_key = "a" * 200 + storage.put_object("maxkey", long_key, io.BytesIO(b"data")) + assert storage.get_object_path("maxkey", long_key).exists() + +def test_unicode_bucket_and_object_names(tmp_path: Path): + storage = ObjectStorage(tmp_path) + # Bucket names must be lowercase, numbers, hyphens, periods + # So unicode in bucket names is NOT allowed by our validation + with pytest.raises(StorageError): + storage.create_bucket("café") + + storage.create_bucket("unicode-test") + # Unicode in object keys IS allowed + key = "café/image.jpg" + storage.put_object("unicode-test", key, io.BytesIO(b"data")) + assert storage.get_object_path("unicode-test", key).exists() + + # Verify listing + objects = storage.list_objects("unicode-test") + assert any(o.key == key for o in objects) + +def test_special_characters_in_metadata(tmp_path: Path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("meta-test") + + metadata = {"key": "value with spaces", "special": "!@#$%^&*()"} + storage.put_object("meta-test", "obj", io.BytesIO(b"data"), metadata=metadata) + + meta = storage.get_object_metadata("meta-test", "obj") + assert meta["key"] == "value with spaces" + assert meta["special"] == "!@#$%^&*()" + +def test_disk_full_scenario(tmp_path: Path, monkeypatch): + # Simulate disk full by mocking write to fail + storage = ObjectStorage(tmp_path) + storage.create_bucket("full") + + def mock_copyfileobj(*args, **kwargs): + raise OSError(28, "No space left on device") + + import shutil + monkeypatch.setattr(shutil, "copyfileobj", mock_copyfileobj) + + with pytest.raises(OSError, match="No space left on device"): + storage.put_object("full", "file", io.BytesIO(b"data")) diff --git a/tests/test_iam_lockout.py b/tests/test_iam_lockout.py new file mode 100644 index 0000000..28f867e --- /dev/null +++ b/tests/test_iam_lockout.py @@ -0,0 +1,58 @@ +import json +import time +from datetime import timedelta + +import pytest + +from app.iam import IamError, IamService + + +def _make_service(tmp_path, *, max_attempts=3, lockout_seconds=2): + config = tmp_path / "iam.json" + payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [ + { + "bucket": "*", + "actions": ["list", "read", "write", "delete", "policy"], + } + ], + } + ] + } + config.write_text(json.dumps(payload)) + service = IamService(config, auth_max_attempts=max_attempts, auth_lockout_minutes=lockout_seconds/60) + return service + + +def test_lockout_triggers_after_failed_attempts(tmp_path): + service = _make_service(tmp_path, max_attempts=3, lockout_seconds=30) + + for _ in range(service.auth_max_attempts): + with pytest.raises(IamError) as exc: + service.authenticate("test", "bad-secret") + assert "Invalid credentials" in str(exc.value) + + with pytest.raises(IamError) as exc: + service.authenticate("test", "bad-secret") + assert "Access temporarily locked" in str(exc.value) + + +def test_lockout_expires_and_allows_auth(tmp_path): + service = _make_service(tmp_path, max_attempts=2, lockout_seconds=1) + + for _ in range(service.auth_max_attempts): + with pytest.raises(IamError): + service.authenticate("test", "bad-secret") + + with pytest.raises(IamError) as exc: + service.authenticate("test", "secret") + assert "Access temporarily locked" in str(exc.value) + + time.sleep(1.1) + principal = service.authenticate("test", "secret") + assert principal.access_key == "test" diff --git a/tests/test_storage_features.py b/tests/test_storage_features.py new file mode 100644 index 0000000..d5c5ffc --- /dev/null +++ b/tests/test_storage_features.py @@ -0,0 +1,234 @@ +import io +import os +from pathlib import Path + +import pytest + +from app.storage import ObjectStorage, StorageError + + +def test_multipart_upload_round_trip(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("media") + upload_id = storage.initiate_multipart_upload("media", "large.bin", metadata={"env": "test"}) + + first_etag = storage.upload_multipart_part("media", upload_id, 1, io.BytesIO(b"hello ")) + second_etag = storage.upload_multipart_part("media", upload_id, 2, io.BytesIO(b"world")) + + meta = storage.complete_multipart_upload( + "media", + upload_id, + [ + {"part_number": 1, "etag": first_etag}, + {"part_number": 2, "etag": second_etag}, + ], + ) + + assert meta.key == "large.bin" + assert meta.size == len(b"hello world") + assert meta.metadata == {"env": "test"} + assert (tmp_path / "media" / "large.bin").read_bytes() == b"hello world" + + +def test_abort_multipart_upload(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("docs") + upload_id = storage.initiate_multipart_upload("docs", "draft.txt") + + storage.abort_multipart_upload("docs", upload_id) + + with pytest.raises(StorageError): + storage.upload_multipart_part("docs", upload_id, 1, io.BytesIO(b"data")) + + +def test_bucket_versioning_toggle_and_restore(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("history") + assert storage.is_versioning_enabled("history") is False + storage.set_bucket_versioning("history", True) + assert storage.is_versioning_enabled("history") is True + + storage.put_object("history", "note.txt", io.BytesIO(b"v1")) + storage.put_object("history", "note.txt", io.BytesIO(b"v2")) + versions = storage.list_object_versions("history", "note.txt") + assert versions + assert versions[0]["size"] == len(b"v1") + + storage.delete_object("history", "note.txt") + versions = storage.list_object_versions("history", "note.txt") + assert len(versions) >= 2 + + target_version = versions[-1]["version_id"] + storage.restore_object_version("history", "note.txt", target_version) + restored = (tmp_path / "history" / "note.txt").read_bytes() + assert restored == b"v1" + + +def test_bucket_configuration_helpers(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("cfg") + + assert storage.get_bucket_tags("cfg") == [] + storage.set_bucket_tags("cfg", [{"Key": "env", "Value": "dev"}]) + tags = storage.get_bucket_tags("cfg") + assert tags == [{"Key": "env", "Value": "dev"}] + storage.set_bucket_tags("cfg", None) + assert storage.get_bucket_tags("cfg") == [] + + assert storage.get_bucket_cors("cfg") == [] + cors_rules = [{"AllowedOrigins": ["*"], "AllowedMethods": ["GET"], "AllowedHeaders": ["*"]}] + storage.set_bucket_cors("cfg", cors_rules) + assert storage.get_bucket_cors("cfg") == cors_rules + storage.set_bucket_cors("cfg", None) + assert storage.get_bucket_cors("cfg") == [] + + assert storage.get_bucket_encryption("cfg") == {} + encryption = {"Rules": [{"SSEAlgorithm": "AES256"}]} + storage.set_bucket_encryption("cfg", encryption) + assert storage.get_bucket_encryption("cfg") == encryption + storage.set_bucket_encryption("cfg", None) + assert storage.get_bucket_encryption("cfg") == {} + + +def test_delete_object_retries_when_locked(tmp_path, monkeypatch): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + storage.put_object("demo", "video.mp4", io.BytesIO(b"data")) + + target_path = tmp_path / "demo" / "video.mp4" + original_unlink = Path.unlink + attempts = {"count": 0} + + def flaky_unlink(self): + if self == target_path and attempts["count"] < 1: + attempts["count"] += 1 + raise PermissionError("locked") + return original_unlink(self) + + monkeypatch.setattr(Path, "unlink", flaky_unlink) + + storage.delete_object("demo", "video.mp4") + assert attempts["count"] == 1 + + +def test_delete_bucket_handles_metadata_residue(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + storage.put_object("demo", "file.txt", io.BytesIO(b"data"), metadata={"env": "test"}) + storage.delete_object("demo", "file.txt") + meta_dir = tmp_path / ".myfsio.sys" / "buckets" / "demo" / "meta" + assert meta_dir.exists() + + storage.delete_bucket("demo") + assert not (tmp_path / "demo").exists() + assert not (tmp_path / ".myfsio.sys" / "buckets" / "demo").exists() + + +def test_delete_bucket_requires_archives_removed(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + storage.set_bucket_versioning("demo", True) + storage.put_object("demo", "file.txt", io.BytesIO(b"data")) + storage.delete_object("demo", "file.txt") + versions_dir = tmp_path / ".myfsio.sys" / "buckets" / "demo" / "versions" + assert versions_dir.exists() + + with pytest.raises(StorageError): + storage.delete_bucket("demo") + + storage.purge_object("demo", "file.txt") + storage.delete_bucket("demo") + assert not (tmp_path / "demo").exists() + assert not (tmp_path / ".myfsio.sys" / "buckets" / "demo").exists() + + +def test_delete_bucket_handles_multipart_residue(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + upload_id = storage.initiate_multipart_upload("demo", "file.txt") + # Leave upload incomplete so the system multipart directory sticks around. + multipart_dir = tmp_path / ".myfsio.sys" / "multipart" / "demo" + assert multipart_dir.exists() + assert (multipart_dir / upload_id).exists() + + with pytest.raises(StorageError): + storage.delete_bucket("demo") + + storage.abort_multipart_upload("demo", upload_id) + storage.delete_bucket("demo") + assert not (tmp_path / "demo").exists() + assert not multipart_dir.exists() + + +def test_purge_object_raises_when_file_in_use(tmp_path, monkeypatch): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + storage.put_object("demo", "clip.mp4", io.BytesIO(b"data")) + + target_path = tmp_path / "demo" / "clip.mp4" + original_unlink = Path.unlink + + def always_locked(self): + if self == target_path: + raise PermissionError("still locked") + return original_unlink(self) + + monkeypatch.setattr(Path, "unlink", always_locked) + + with pytest.raises(StorageError) as exc: + storage.purge_object("demo", "clip.mp4") + assert "in use" in str(exc.value) + + +@pytest.mark.parametrize( + "object_key", + [ + "../secret.txt", + "folder/../secret.txt", + "/absolute.txt", + "\\backslash.txt", + "bad\x00key", + ], +) +def test_object_key_sanitization_blocks_traversal(object_key): + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key(object_key) + + +def test_object_key_length_limit_enforced(): + key = "a" * 1025 + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key(key) + + +@pytest.mark.parametrize( + "object_key", + [ + ".meta/data.bin", + ".versions/foo.bin", + ".multipart/upload.part", + ".myfsio.sys/system.bin", + ], +) +def test_object_key_blocks_reserved_paths(object_key): + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key(object_key) + + +def test_bucket_config_filename_allowed(tmp_path): + storage = ObjectStorage(tmp_path) + storage.create_bucket("demo") + storage.put_object("demo", ".bucket.json", io.BytesIO(b"{}")) + + objects = storage.list_objects("demo") + assert any(meta.key == ".bucket.json" for meta in objects) + + +@pytest.mark.skipif(os.name != "nt", reason="Windows-specific filename rules") +def test_windows_filename_rules_enforced(): + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key("CON/file.txt") + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key("folder/spaces ") + with pytest.raises(StorageError): + ObjectStorage._sanitize_object_key("C:drivepath.txt") diff --git a/tests/test_ui_bulk_delete.py b/tests/test_ui_bulk_delete.py new file mode 100644 index 0000000..a26135e --- /dev/null +++ b/tests/test_ui_bulk_delete.py @@ -0,0 +1,96 @@ +import io +import json +from pathlib import Path + +from app import create_app + + +def _build_app(tmp_path: Path): + storage_root = tmp_path / "data" + iam_config = tmp_path / "iam.json" + bucket_policies = tmp_path / "bucket_policies.json" + iam_payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Bulk Tester", + "policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}], + } + ] + } + iam_config.write_text(json.dumps(iam_payload)) + app = create_app( + { + "TESTING": True, + "STORAGE_ROOT": storage_root, + "IAM_CONFIG": iam_config, + "BUCKET_POLICY_PATH": bucket_policies, + "API_BASE_URL": "http://localhost", + "SECRET_KEY": "testing", + } + ) + return app + + +def _login(client): + return client.post( + "/ui/login", + data={"access_key": "test", "secret_key": "secret"}, + follow_redirects=True, + ) + + +def test_bulk_delete_json_route(tmp_path: Path): + app = _build_app(tmp_path) + storage = app.extensions["object_storage"] + storage.create_bucket("demo") + storage.put_object("demo", "first.txt", io.BytesIO(b"first")) + storage.put_object("demo", "second.txt", io.BytesIO(b"second")) + + client = app.test_client() + assert _login(client).status_code == 200 + + response = client.post( + "/ui/buckets/demo/objects/bulk-delete", + json={"keys": ["first.txt", "missing.txt"]}, + headers={"X-Requested-With": "XMLHttpRequest"}, + ) + assert response.status_code == 200 + payload = response.get_json() + assert payload["status"] == "ok" + assert set(payload["deleted"]) == {"first.txt", "missing.txt"} + assert payload["errors"] == [] + + listing = storage.list_objects("demo") + assert {meta.key for meta in listing} == {"second.txt"} + + +def test_bulk_delete_validation(tmp_path: Path): + app = _build_app(tmp_path) + storage = app.extensions["object_storage"] + storage.create_bucket("demo") + storage.put_object("demo", "keep.txt", io.BytesIO(b"keep")) + + client = app.test_client() + assert _login(client).status_code == 200 + + bad_response = client.post( + "/ui/buckets/demo/objects/bulk-delete", + json={"keys": []}, + headers={"X-Requested-With": "XMLHttpRequest"}, + ) + assert bad_response.status_code == 400 + assert bad_response.get_json()["status"] == "error" + + too_many = [f"obj-{index}.txt" for index in range(501)] + limit_response = client.post( + "/ui/buckets/demo/objects/bulk-delete", + json={"keys": too_many}, + headers={"X-Requested-With": "XMLHttpRequest"}, + ) + assert limit_response.status_code == 400 + assert limit_response.get_json()["status"] == "error" + + still_there = storage.list_objects("demo") + assert {meta.key for meta in still_there} == {"keep.txt"} diff --git a/tests/test_ui_docs.py b/tests/test_ui_docs.py new file mode 100644 index 0000000..84afe35 --- /dev/null +++ b/tests/test_ui_docs.py @@ -0,0 +1,56 @@ +import json +from pathlib import Path + +from app import create_app + + +def _build_ui_app(tmp_path: Path): + storage_root = tmp_path / "data" + iam_config = tmp_path / "iam.json" + bucket_policies = tmp_path / "bucket_policies.json" + iam_payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}], + } + ] + } + iam_config.write_text(json.dumps(iam_payload)) + return create_app( + { + "TESTING": True, + "STORAGE_ROOT": storage_root, + "IAM_CONFIG": iam_config, + "BUCKET_POLICY_PATH": bucket_policies, + "API_BASE_URL": "http://example.test:9000", + "SECRET_KEY": "testing", + } + ) + + +def test_docs_requires_login(tmp_path: Path): + app = _build_ui_app(tmp_path) + client = app.test_client() + response = client.get("/ui/docs") + assert response.status_code == 302 + assert response.headers["Location"].endswith("/ui/login") + + +def test_docs_render_for_authenticated_user(tmp_path: Path): + app = _build_ui_app(tmp_path) + client = app.test_client() + # Prime session by signing in + login_response = client.post( + "/ui/login", + data={"access_key": "test", "secret_key": "secret"}, + follow_redirects=True, + ) + assert login_response.status_code == 200 + + response = client.get("/ui/docs") + assert response.status_code == 200 + assert b"Your guide to MyFSIO" in response.data + assert b"http://example.test:9000" in response.data diff --git a/tests/test_ui_policy.py b/tests/test_ui_policy.py new file mode 100644 index 0000000..966dc12 --- /dev/null +++ b/tests/test_ui_policy.py @@ -0,0 +1,113 @@ +import io +import json +from pathlib import Path + +import pytest + +from app import create_app + + +DENY_LIST_ALLOW_GET_POLICY = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:GetObject"], + "Resource": ["arn:aws:s3:::testbucket/*"], + }, + { + "Effect": "Deny", + "Principal": {"AWS": ["*"]}, + "Action": ["s3:ListBucket"], + "Resource": ["arn:aws:s3:::testbucket"], + }, + ], +} + + +def _make_ui_app(tmp_path: Path, *, enforce_policies: bool): + storage_root = tmp_path / "data" + iam_config = tmp_path / "iam.json" + bucket_policies = tmp_path / "bucket_policies.json" + iam_payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}], + } + ] + } + iam_config.write_text(json.dumps(iam_payload)) + app = create_app( + { + "TESTING": True, + "STORAGE_ROOT": storage_root, + "IAM_CONFIG": iam_config, + "BUCKET_POLICY_PATH": bucket_policies, + "API_BASE_URL": "http://testserver", + "SECRET_KEY": "testing", + "UI_ENFORCE_BUCKET_POLICIES": enforce_policies, + } + ) + storage = app.extensions["object_storage"] + storage.create_bucket("testbucket") + storage.put_object("testbucket", "vid.mp4", io.BytesIO(b"video")) + policy_store = app.extensions["bucket_policies"] + policy_store.set_policy("testbucket", DENY_LIST_ALLOW_GET_POLICY) + return app + + +@pytest.mark.parametrize("enforce", [True, False]) +def test_ui_bucket_policy_enforcement_toggle(tmp_path: Path, enforce: bool): + app = _make_ui_app(tmp_path, enforce_policies=enforce) + client = app.test_client() + client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True) + response = client.get("/ui/buckets/testbucket", follow_redirects=True) + if enforce: + assert b"Access denied by bucket policy" in response.data + else: + assert response.status_code == 200 + assert b"vid.mp4" in response.data + assert b"Access denied by bucket policy" not in response.data + + +def test_ui_bucket_policy_disabled_by_default(tmp_path: Path): + storage_root = tmp_path / "data" + iam_config = tmp_path / "iam.json" + bucket_policies = tmp_path / "bucket_policies.json" + iam_payload = { + "users": [ + { + "access_key": "test", + "secret_key": "secret", + "display_name": "Test User", + "policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}], + } + ] + } + iam_config.write_text(json.dumps(iam_payload)) + app = create_app( + { + "TESTING": True, + "STORAGE_ROOT": storage_root, + "IAM_CONFIG": iam_config, + "BUCKET_POLICY_PATH": bucket_policies, + "API_BASE_URL": "http://testserver", + "SECRET_KEY": "testing", + } + ) + storage = app.extensions["object_storage"] + storage.create_bucket("testbucket") + storage.put_object("testbucket", "vid.mp4", io.BytesIO(b"video")) + policy_store = app.extensions["bucket_policies"] + policy_store.set_policy("testbucket", DENY_LIST_ALLOW_GET_POLICY) + + client = app.test_client() + client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True) + response = client.get("/ui/buckets/testbucket", follow_redirects=True) + assert response.status_code == 200 + assert b"vid.mp4" in response.data + assert b"Access denied by bucket policy" not in response.data