diff --git a/rust/.dockerignore b/.dockerignore similarity index 57% rename from rust/.dockerignore rename to .dockerignore index ca8fb51..713410f 100644 --- a/rust/.dockerignore +++ b/.dockerignore @@ -3,7 +3,7 @@ logs data tmp -myfsio-engine/target -myfsio-engine/tests +target +crates/*/tests Dockerfile .dockerignore diff --git a/.gitignore b/.gitignore index 781c734..fa994a2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,12 +26,8 @@ dist/ *.egg-info/ .eggs/ -# Rust / maturin build artifacts -python/myfsio_core/target/ -python/myfsio_core/Cargo.lock - # Rust engine build artifacts -rust/myfsio-engine/target/ +target/ # Local runtime artifacts logs/ diff --git a/rust/myfsio-engine/Cargo.lock b/Cargo.lock similarity index 100% rename from rust/myfsio-engine/Cargo.lock rename to Cargo.lock diff --git a/rust/myfsio-engine/Cargo.toml b/Cargo.toml similarity index 100% rename from rust/myfsio-engine/Cargo.toml rename to Cargo.toml diff --git a/rust/Dockerfile b/Dockerfile similarity index 73% rename from rust/Dockerfile rename to Dockerfile index ed52350..b1b96a0 100644 --- a/rust/Dockerfile +++ b/Dockerfile @@ -6,10 +6,10 @@ RUN apt-get update \ && apt-get install -y --no-install-recommends build-essential pkg-config libssl-dev \ && rm -rf /var/lib/apt/lists/* -COPY myfsio-engine ./myfsio-engine +COPY Cargo.toml Cargo.lock ./ +COPY crates ./crates -RUN cd myfsio-engine \ - && cargo build --release --bin myfsio-server \ +RUN cargo build --release --bin myfsio-server \ && strip target/release/myfsio-server @@ -24,9 +24,9 @@ RUN apt-get update \ && useradd -m -u 1000 myfsio \ && chown -R myfsio:myfsio /app -COPY --from=builder /build/myfsio-engine/target/release/myfsio-server /usr/local/bin/myfsio-server -COPY --from=builder /build/myfsio-engine/crates/myfsio-server/templates /app/templates -COPY --from=builder /build/myfsio-engine/crates/myfsio-server/static /app/static +COPY --from=builder /build/target/release/myfsio-server /usr/local/bin/myfsio-server +COPY --from=builder /build/crates/myfsio-server/templates /app/templates +COPY --from=builder /build/crates/myfsio-server/static /app/static COPY docker-entrypoint.sh /app/docker-entrypoint.sh RUN chmod +x /app/docker-entrypoint.sh \ diff --git a/README.md b/README.md index 8d59b81..0eba2df 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # MyFSIO -MyFSIO is an S3-compatible object storage server with a Rust runtime and a filesystem-backed storage engine. The active server lives under `rust/myfsio-engine` and serves both the S3 API and the built-in web UI from a single process. - -The `python/` implementation is deprecated as of 2026-04-21. It remains in the repository for migration reference and legacy tests, but new development and supported runtime usage should target the Rust server. +MyFSIO is an S3-compatible object storage server with a Rust runtime and a filesystem-backed storage engine. The repository root is the Cargo workspace; the server serves both the S3 API and the built-in web UI from a single process. ## Features @@ -29,7 +27,6 @@ If you want API-only mode, set `UI_ENABLED=false`. There is no separate "UI-only From the repository root: ```bash -cd rust/myfsio-engine cargo run -p myfsio-server -- ``` @@ -60,14 +57,13 @@ UI_ENABLED=false cargo run -p myfsio-server -- ## Building a Binary ```bash -cd rust/myfsio-engine cargo build --release -p myfsio-server ``` Binary locations: -- Linux/macOS: `rust/myfsio-engine/target/release/myfsio-server` -- Windows: `rust/myfsio-engine/target/release/myfsio-server.exe` +- Linux/macOS: `target/release/myfsio-server` +- Windows: `target/release/myfsio-server.exe` Run the built binary directly: @@ -166,10 +162,10 @@ data/ ## Docker -Build the Rust image from the `rust/` directory: +Build the Rust image from the repository root: ```bash -docker build -t myfsio ./rust +docker build -t myfsio . docker run --rm -p 5000:5000 -p 5100:5100 -v "${PWD}/data:/app/data" myfsio ``` @@ -180,11 +176,9 @@ If the instance sits behind a reverse proxy, set `API_BASE_URL` to the public S3 The repository includes `scripts/install.sh` for systemd-style Linux installs. Build the Rust binary first, then pass it to the installer: ```bash -cd rust/myfsio-engine cargo build --release -p myfsio-server -cd ../.. -sudo ./scripts/install.sh --binary ./rust/myfsio-engine/target/release/myfsio-server +sudo ./scripts/install.sh --binary ./target/release/myfsio-server ``` The installer copies the binary into `/opt/myfsio/myfsio`, writes `/opt/myfsio/myfsio.env`, and can register a `myfsio.service` unit. @@ -194,7 +188,6 @@ The installer copies the binary into `/opt/myfsio/myfsio`, writes `/opt/myfsio/m Run the Rust test suite from the workspace: ```bash -cd rust/myfsio-engine cargo test ``` @@ -209,4 +202,4 @@ cargo test } ``` -The `version` field comes from the Rust crate version in `rust/myfsio-engine/crates/myfsio-server/Cargo.toml`. +The `version` field comes from the Rust crate version in `crates/myfsio-server/Cargo.toml`. diff --git a/rust/myfsio-engine/crates/myfsio-auth/Cargo.toml b/crates/myfsio-auth/Cargo.toml similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/Cargo.toml rename to crates/myfsio-auth/Cargo.toml diff --git a/rust/myfsio-engine/crates/myfsio-auth/src/fernet.rs b/crates/myfsio-auth/src/fernet.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/src/fernet.rs rename to crates/myfsio-auth/src/fernet.rs diff --git a/rust/myfsio-engine/crates/myfsio-auth/src/iam.rs b/crates/myfsio-auth/src/iam.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/src/iam.rs rename to crates/myfsio-auth/src/iam.rs diff --git a/rust/myfsio-engine/crates/myfsio-auth/src/lib.rs b/crates/myfsio-auth/src/lib.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/src/lib.rs rename to crates/myfsio-auth/src/lib.rs diff --git a/rust/myfsio-engine/crates/myfsio-auth/src/principal.rs b/crates/myfsio-auth/src/principal.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/src/principal.rs rename to crates/myfsio-auth/src/principal.rs diff --git a/rust/myfsio-engine/crates/myfsio-auth/src/sigv4.rs b/crates/myfsio-auth/src/sigv4.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-auth/src/sigv4.rs rename to crates/myfsio-auth/src/sigv4.rs diff --git a/rust/myfsio-engine/crates/myfsio-common/Cargo.toml b/crates/myfsio-common/Cargo.toml similarity index 100% rename from rust/myfsio-engine/crates/myfsio-common/Cargo.toml rename to crates/myfsio-common/Cargo.toml diff --git a/rust/myfsio-engine/crates/myfsio-common/src/constants.rs b/crates/myfsio-common/src/constants.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-common/src/constants.rs rename to crates/myfsio-common/src/constants.rs diff --git a/rust/myfsio-engine/crates/myfsio-common/src/error.rs b/crates/myfsio-common/src/error.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-common/src/error.rs rename to crates/myfsio-common/src/error.rs diff --git a/rust/myfsio-engine/crates/myfsio-common/src/lib.rs b/crates/myfsio-common/src/lib.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-common/src/lib.rs rename to crates/myfsio-common/src/lib.rs diff --git a/rust/myfsio-engine/crates/myfsio-common/src/types.rs b/crates/myfsio-common/src/types.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-common/src/types.rs rename to crates/myfsio-common/src/types.rs diff --git a/rust/myfsio-engine/crates/myfsio-crypto/Cargo.toml b/crates/myfsio-crypto/Cargo.toml similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/Cargo.toml rename to crates/myfsio-crypto/Cargo.toml diff --git a/rust/myfsio-engine/crates/myfsio-crypto/src/aes_gcm.rs b/crates/myfsio-crypto/src/aes_gcm.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/src/aes_gcm.rs rename to crates/myfsio-crypto/src/aes_gcm.rs diff --git a/rust/myfsio-engine/crates/myfsio-crypto/src/encryption.rs b/crates/myfsio-crypto/src/encryption.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/src/encryption.rs rename to crates/myfsio-crypto/src/encryption.rs diff --git a/rust/myfsio-engine/crates/myfsio-crypto/src/hashing.rs b/crates/myfsio-crypto/src/hashing.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/src/hashing.rs rename to crates/myfsio-crypto/src/hashing.rs diff --git a/rust/myfsio-engine/crates/myfsio-crypto/src/kms.rs b/crates/myfsio-crypto/src/kms.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/src/kms.rs rename to crates/myfsio-crypto/src/kms.rs diff --git a/rust/myfsio-engine/crates/myfsio-crypto/src/lib.rs b/crates/myfsio-crypto/src/lib.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-crypto/src/lib.rs rename to crates/myfsio-crypto/src/lib.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/Cargo.toml b/crates/myfsio-server/Cargo.toml similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/Cargo.toml rename to crates/myfsio-server/Cargo.toml diff --git a/rust/myfsio-engine/crates/myfsio-server/src/config.rs b/crates/myfsio-server/src/config.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/config.rs rename to crates/myfsio-server/src/config.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/admin.rs b/crates/myfsio-server/src/handlers/admin.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/admin.rs rename to crates/myfsio-server/src/handlers/admin.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/chunked.rs b/crates/myfsio-server/src/handlers/chunked.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/chunked.rs rename to crates/myfsio-server/src/handlers/chunked.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/config.rs b/crates/myfsio-server/src/handlers/config.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/config.rs rename to crates/myfsio-server/src/handlers/config.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/kms.rs b/crates/myfsio-server/src/handlers/kms.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/kms.rs rename to crates/myfsio-server/src/handlers/kms.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/mod.rs b/crates/myfsio-server/src/handlers/mod.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/mod.rs rename to crates/myfsio-server/src/handlers/mod.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/select.rs b/crates/myfsio-server/src/handlers/select.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/select.rs rename to crates/myfsio-server/src/handlers/select.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/ui.rs b/crates/myfsio-server/src/handlers/ui.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/ui.rs rename to crates/myfsio-server/src/handlers/ui.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/ui_api.rs b/crates/myfsio-server/src/handlers/ui_api.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/ui_api.rs rename to crates/myfsio-server/src/handlers/ui_api.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/handlers/ui_pages.rs b/crates/myfsio-server/src/handlers/ui_pages.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/handlers/ui_pages.rs rename to crates/myfsio-server/src/handlers/ui_pages.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/lib.rs b/crates/myfsio-server/src/lib.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/lib.rs rename to crates/myfsio-server/src/lib.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/main.rs b/crates/myfsio-server/src/main.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/main.rs rename to crates/myfsio-server/src/main.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/middleware/auth.rs b/crates/myfsio-server/src/middleware/auth.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/middleware/auth.rs rename to crates/myfsio-server/src/middleware/auth.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/middleware/mod.rs b/crates/myfsio-server/src/middleware/mod.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/middleware/mod.rs rename to crates/myfsio-server/src/middleware/mod.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/middleware/ratelimit.rs b/crates/myfsio-server/src/middleware/ratelimit.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/middleware/ratelimit.rs rename to crates/myfsio-server/src/middleware/ratelimit.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/middleware/session.rs b/crates/myfsio-server/src/middleware/session.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/middleware/session.rs rename to crates/myfsio-server/src/middleware/session.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/access_logging.rs b/crates/myfsio-server/src/services/access_logging.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/access_logging.rs rename to crates/myfsio-server/src/services/access_logging.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/acl.rs b/crates/myfsio-server/src/services/acl.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/acl.rs rename to crates/myfsio-server/src/services/acl.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/gc.rs b/crates/myfsio-server/src/services/gc.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/gc.rs rename to crates/myfsio-server/src/services/gc.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/integrity.rs b/crates/myfsio-server/src/services/integrity.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/integrity.rs rename to crates/myfsio-server/src/services/integrity.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/lifecycle.rs b/crates/myfsio-server/src/services/lifecycle.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/lifecycle.rs rename to crates/myfsio-server/src/services/lifecycle.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/metrics.rs b/crates/myfsio-server/src/services/metrics.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/metrics.rs rename to crates/myfsio-server/src/services/metrics.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/mod.rs b/crates/myfsio-server/src/services/mod.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/mod.rs rename to crates/myfsio-server/src/services/mod.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/notifications.rs b/crates/myfsio-server/src/services/notifications.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/notifications.rs rename to crates/myfsio-server/src/services/notifications.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/object_lock.rs b/crates/myfsio-server/src/services/object_lock.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/object_lock.rs rename to crates/myfsio-server/src/services/object_lock.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/replication.rs b/crates/myfsio-server/src/services/replication.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/replication.rs rename to crates/myfsio-server/src/services/replication.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/s3_client.rs b/crates/myfsio-server/src/services/s3_client.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/s3_client.rs rename to crates/myfsio-server/src/services/s3_client.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/site_registry.rs b/crates/myfsio-server/src/services/site_registry.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/site_registry.rs rename to crates/myfsio-server/src/services/site_registry.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/site_sync.rs b/crates/myfsio-server/src/services/site_sync.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/site_sync.rs rename to crates/myfsio-server/src/services/site_sync.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/system_metrics.rs b/crates/myfsio-server/src/services/system_metrics.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/system_metrics.rs rename to crates/myfsio-server/src/services/system_metrics.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/services/website_domains.rs b/crates/myfsio-server/src/services/website_domains.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/services/website_domains.rs rename to crates/myfsio-server/src/services/website_domains.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/session.rs b/crates/myfsio-server/src/session.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/session.rs rename to crates/myfsio-server/src/session.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/state.rs b/crates/myfsio-server/src/state.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/state.rs rename to crates/myfsio-server/src/state.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/stores/connections.rs b/crates/myfsio-server/src/stores/connections.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/stores/connections.rs rename to crates/myfsio-server/src/stores/connections.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/stores/mod.rs b/crates/myfsio-server/src/stores/mod.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/stores/mod.rs rename to crates/myfsio-server/src/stores/mod.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/src/templates.rs b/crates/myfsio-server/src/templates.rs similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/src/templates.rs rename to crates/myfsio-server/src/templates.rs diff --git a/rust/myfsio-engine/crates/myfsio-server/static/css/main.css b/crates/myfsio-server/static/css/main.css similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/static/css/main.css rename to crates/myfsio-server/static/css/main.css diff --git a/python/static/images/MyFSIO.ico b/crates/myfsio-server/static/images/MyFSIO.ico similarity index 100% rename from python/static/images/MyFSIO.ico rename to crates/myfsio-server/static/images/MyFSIO.ico diff --git a/python/static/images/MyFSIO.png b/crates/myfsio-server/static/images/MyFSIO.png similarity index 100% rename from python/static/images/MyFSIO.png rename to crates/myfsio-server/static/images/MyFSIO.png diff --git a/rust/myfsio-engine/crates/myfsio-server/static/js/bucket-detail-main.js b/crates/myfsio-server/static/js/bucket-detail-main.js similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/static/js/bucket-detail-main.js rename to crates/myfsio-server/static/js/bucket-detail-main.js diff --git a/python/static/js/bucket-detail-operations.js b/crates/myfsio-server/static/js/bucket-detail-operations.js similarity index 100% rename from python/static/js/bucket-detail-operations.js rename to crates/myfsio-server/static/js/bucket-detail-operations.js diff --git a/rust/myfsio-engine/crates/myfsio-server/static/js/bucket-detail-upload.js b/crates/myfsio-server/static/js/bucket-detail-upload.js similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/static/js/bucket-detail-upload.js rename to crates/myfsio-server/static/js/bucket-detail-upload.js diff --git a/python/static/js/bucket-detail-utils.js b/crates/myfsio-server/static/js/bucket-detail-utils.js similarity index 100% rename from python/static/js/bucket-detail-utils.js rename to crates/myfsio-server/static/js/bucket-detail-utils.js diff --git a/python/static/js/connections-management.js b/crates/myfsio-server/static/js/connections-management.js similarity index 100% rename from python/static/js/connections-management.js rename to crates/myfsio-server/static/js/connections-management.js diff --git a/rust/myfsio-engine/crates/myfsio-server/static/js/iam-management.js b/crates/myfsio-server/static/js/iam-management.js similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/static/js/iam-management.js rename to crates/myfsio-server/static/js/iam-management.js diff --git a/rust/myfsio-engine/crates/myfsio-server/static/js/ui-core.js b/crates/myfsio-server/static/js/ui-core.js similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/static/js/ui-core.js rename to crates/myfsio-server/static/js/ui-core.js diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/404.html b/crates/myfsio-server/templates/404.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/404.html rename to crates/myfsio-server/templates/404.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/500.html b/crates/myfsio-server/templates/500.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/500.html rename to crates/myfsio-server/templates/500.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/_convert.py b/crates/myfsio-server/templates/_convert.py similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/_convert.py rename to crates/myfsio-server/templates/_convert.py diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/base.html b/crates/myfsio-server/templates/base.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/base.html rename to crates/myfsio-server/templates/base.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/bucket_detail.html b/crates/myfsio-server/templates/bucket_detail.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/bucket_detail.html rename to crates/myfsio-server/templates/bucket_detail.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/buckets.html b/crates/myfsio-server/templates/buckets.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/buckets.html rename to crates/myfsio-server/templates/buckets.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/connections.html b/crates/myfsio-server/templates/connections.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/connections.html rename to crates/myfsio-server/templates/connections.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/csrf_error.html b/crates/myfsio-server/templates/csrf_error.html similarity index 100% rename from rust/myfsio-engine/crates/myfsio-server/templates/csrf_error.html rename to crates/myfsio-server/templates/csrf_error.html diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/docs.html b/crates/myfsio-server/templates/docs.html similarity index 99% rename from rust/myfsio-engine/crates/myfsio-server/templates/docs.html rename to crates/myfsio-server/templates/docs.html index ab6cc86..300d1cb 100644 --- a/rust/myfsio-engine/crates/myfsio-server/templates/docs.html +++ b/crates/myfsio-server/templates/docs.html @@ -73,16 +73,13 @@
Build or run the Rust server and launch the API plus web UI from a single process.
rust/myfsio-engine. For the verified runtime configuration list, use the repository docs.md.
+ Runtime note: the repository root is the Cargo workspace. For the verified runtime configuration list, use the repository docs.md.
rust/myfsio-engine.cargo run -p myfsio-server --.cargo run -p myfsio-server --.cd rust/myfsio-engine
-
-# Run API + UI
+# Run API + UI
cargo run -p myfsio-server --
# Show resolved configuration
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/iam.html b/crates/myfsio-server/templates/iam.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/iam.html
rename to crates/myfsio-server/templates/iam.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/login.html b/crates/myfsio-server/templates/login.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/login.html
rename to crates/myfsio-server/templates/login.html
diff --git a/python/templates/metrics.html b/crates/myfsio-server/templates/metrics.html
similarity index 100%
rename from python/templates/metrics.html
rename to crates/myfsio-server/templates/metrics.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/replication_wizard.html b/crates/myfsio-server/templates/replication_wizard.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/replication_wizard.html
rename to crates/myfsio-server/templates/replication_wizard.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/sites.html b/crates/myfsio-server/templates/sites.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/sites.html
rename to crates/myfsio-server/templates/sites.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/system.html b/crates/myfsio-server/templates/system.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/system.html
rename to crates/myfsio-server/templates/system.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/templates/website_domains.html b/crates/myfsio-server/templates/website_domains.html
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/templates/website_domains.html
rename to crates/myfsio-server/templates/website_domains.html
diff --git a/rust/myfsio-engine/crates/myfsio-server/tests/integration.rs b/crates/myfsio-server/tests/integration.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/tests/integration.rs
rename to crates/myfsio-server/tests/integration.rs
diff --git a/rust/myfsio-engine/crates/myfsio-server/tests/template_render.rs b/crates/myfsio-server/tests/template_render.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-server/tests/template_render.rs
rename to crates/myfsio-server/tests/template_render.rs
diff --git a/rust/myfsio-engine/crates/myfsio-storage/Cargo.toml b/crates/myfsio-storage/Cargo.toml
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/Cargo.toml
rename to crates/myfsio-storage/Cargo.toml
diff --git a/rust/myfsio-engine/crates/myfsio-storage/src/error.rs b/crates/myfsio-storage/src/error.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/src/error.rs
rename to crates/myfsio-storage/src/error.rs
diff --git a/rust/myfsio-engine/crates/myfsio-storage/src/fs_backend.rs b/crates/myfsio-storage/src/fs_backend.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/src/fs_backend.rs
rename to crates/myfsio-storage/src/fs_backend.rs
diff --git a/rust/myfsio-engine/crates/myfsio-storage/src/lib.rs b/crates/myfsio-storage/src/lib.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/src/lib.rs
rename to crates/myfsio-storage/src/lib.rs
diff --git a/rust/myfsio-engine/crates/myfsio-storage/src/traits.rs b/crates/myfsio-storage/src/traits.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/src/traits.rs
rename to crates/myfsio-storage/src/traits.rs
diff --git a/rust/myfsio-engine/crates/myfsio-storage/src/validation.rs b/crates/myfsio-storage/src/validation.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-storage/src/validation.rs
rename to crates/myfsio-storage/src/validation.rs
diff --git a/rust/myfsio-engine/crates/myfsio-xml/Cargo.toml b/crates/myfsio-xml/Cargo.toml
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-xml/Cargo.toml
rename to crates/myfsio-xml/Cargo.toml
diff --git a/rust/myfsio-engine/crates/myfsio-xml/src/lib.rs b/crates/myfsio-xml/src/lib.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-xml/src/lib.rs
rename to crates/myfsio-xml/src/lib.rs
diff --git a/rust/myfsio-engine/crates/myfsio-xml/src/request.rs b/crates/myfsio-xml/src/request.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-xml/src/request.rs
rename to crates/myfsio-xml/src/request.rs
diff --git a/rust/myfsio-engine/crates/myfsio-xml/src/response.rs b/crates/myfsio-xml/src/response.rs
similarity index 100%
rename from rust/myfsio-engine/crates/myfsio-xml/src/response.rs
rename to crates/myfsio-xml/src/response.rs
diff --git a/rust/docker-entrypoint.sh b/docker-entrypoint.sh
similarity index 100%
rename from rust/docker-entrypoint.sh
rename to docker-entrypoint.sh
diff --git a/docs.md b/docs.md
index 7b236a1..5a7dd52 100644
--- a/docs.md
+++ b/docs.md
@@ -1,26 +1,19 @@
# MyFSIO Rust Operations Guide
-This document describes the current Rust server in `rust/myfsio-engine`. It replaces the older Python-oriented runbook.
+This document describes the MyFSIO Rust server. The repository root is the Cargo workspace.
-The `python/` implementation is deprecated as of 2026-04-21. It is retained for migration reference and legacy validation only; production usage and new development should use the Rust server.
-
-## 1. What Changed
-
-The active runtime is now Rust:
+## 1. Overview
- One process serves both the S3 API and the web UI.
- The server entrypoint is `myfsio-server`.
- The main development workflow is `cargo run -p myfsio-server --`.
- API-only mode is controlled with `UI_ENABLED=false`.
-The deprecated `python/` directory may still contain older implementation code, templates, and tests, but it is not required to run the current server.
-
## 2. Quick Start
From the repository root:
```bash
-cd rust/myfsio-engine
cargo run -p myfsio-server --
```
@@ -34,10 +27,9 @@ On first startup, MyFSIO bootstraps an admin user in `data/.myfsio.sys/config/ia
### Windows
-From PowerShell:
+From PowerShell at the repository root:
```powershell
-cd rust\myfsio-engine
cargo run -p myfsio-server --
```
@@ -52,7 +44,6 @@ There is no separate UI-only mode in the Rust server.
## 3. Build and Run a Binary
```bash
-cd rust/myfsio-engine
cargo build --release -p myfsio-server
```
@@ -104,7 +95,7 @@ That makes local development and systemd installs behave consistently.
## 6. Verified Configuration Reference
-These values are taken from `rust/myfsio-engine/crates/myfsio-server/src/config.rs`.
+These values are taken from `crates/myfsio-server/src/config.rs`.
### Network and runtime
@@ -311,10 +302,10 @@ Notes:
## 10. Docker
-Build the Rust image from the `rust/` directory:
+Build the Rust image from the repository root:
```bash
-docker build -t myfsio ./rust
+docker build -t myfsio .
docker run --rm \
-p 5000:5000 \
-p 5100:5100 \
@@ -337,11 +328,9 @@ If you want generated links and presigned URLs to use an external hostname, set
The repository includes `scripts/install.sh`. For the Rust server, build the binary first and pass the path explicitly:
```bash
-cd rust/myfsio-engine
cargo build --release -p myfsio-server
-cd ../..
-sudo ./scripts/install.sh --binary ./rust/myfsio-engine/target/release/myfsio-server
+sudo ./scripts/install.sh --binary ./target/release/myfsio-server
```
The installer copies that binary to `/opt/myfsio/myfsio`, creates `/opt/myfsio/myfsio.env`, and can register a `myfsio.service` systemd unit.
@@ -396,14 +385,13 @@ The command:
Run the Rust test suite:
```bash
-cd rust/myfsio-engine
cargo test
```
If you are validating documentation changes for the UI, the most relevant coverage lives under:
-- `rust/myfsio-engine/crates/myfsio-server/tests`
-- `rust/myfsio-engine/crates/myfsio-storage/src`
+- `crates/myfsio-server/tests`
+- `crates/myfsio-storage/src`
## 15. API Notes
@@ -417,5 +405,5 @@ The Rust server exposes:
For a route-level view, inspect:
-- `rust/myfsio-engine/crates/myfsio-server/src/lib.rs`
-- `rust/myfsio-engine/crates/myfsio-server/src/handlers/`
+- `crates/myfsio-server/src/lib.rs`
+- `crates/myfsio-server/src/handlers/`
diff --git a/python/.dockerignore b/python/.dockerignore
deleted file mode 100644
index 33aab4c..0000000
--- a/python/.dockerignore
+++ /dev/null
@@ -1,17 +0,0 @@
-.git
-.gitignore
-.venv
-__pycache__
-*.pyc
-*.pyo
-*.pyd
-.pytest_cache
-.coverage
-htmlcov
-logs
-data
-tmp
-tests
-myfsio_core/target
-Dockerfile
-.dockerignore
diff --git a/python/Dockerfile b/python/Dockerfile
deleted file mode 100644
index c9f3066..0000000
--- a/python/Dockerfile
+++ /dev/null
@@ -1,56 +0,0 @@
-FROM python:3.14.3-slim AS builder
-
-ENV PYTHONDONTWRITEBYTECODE=1 \
- PYTHONUNBUFFERED=1
-
-WORKDIR /build
-
-RUN apt-get update \
- && apt-get install -y --no-install-recommends build-essential curl \
- && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \
- && rm -rf /var/lib/apt/lists/*
-
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-RUN pip install --no-cache-dir maturin
-
-COPY myfsio_core ./myfsio_core
-RUN cd myfsio_core \
- && maturin build --release --out /wheels
-
-
-FROM python:3.14.3-slim
-
-ENV PYTHONDONTWRITEBYTECODE=1 \
- PYTHONUNBUFFERED=1
-
-WORKDIR /app
-
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY --from=builder /wheels/*.whl /tmp/
-RUN pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl
-
-COPY app ./app
-COPY templates ./templates
-COPY static ./static
-COPY run.py ./
-COPY docker-entrypoint.sh ./
-
-RUN chmod +x docker-entrypoint.sh \
- && mkdir -p /app/data \
- && useradd -m -u 1000 myfsio \
- && chown -R myfsio:myfsio /app
-
-USER myfsio
-
-EXPOSE 5000 5100
-ENV APP_HOST=0.0.0.0 \
- FLASK_ENV=production \
- FLASK_DEBUG=0
-
-HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
- CMD python -c "import requests; requests.get('http://localhost:5000/myfsio/health', timeout=2)"
-
-CMD ["./docker-entrypoint.sh"]
diff --git a/python/README.md b/python/README.md
deleted file mode 100644
index 853b88a..0000000
--- a/python/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Deprecated Python Implementation
-
-The Python implementation of MyFSIO is deprecated as of 2026-04-21.
-
-The supported server runtime now lives in `../rust/myfsio-engine` and serves the S3 API and web UI from the Rust `myfsio-server` binary. Keep this tree for migration reference, compatibility checks, and legacy tests only.
-
-For normal development and operations, run:
-
-```bash
-cd ../rust/myfsio-engine
-cargo run -p myfsio-server --
-```
-
-Do not add new product features to the Python implementation unless they are needed to unblock a migration or compare behavior with the Rust server.
diff --git a/python/app/__init__.py b/python/app/__init__.py
deleted file mode 100644
index fc465fb..0000000
--- a/python/app/__init__.py
+++ /dev/null
@@ -1,763 +0,0 @@
-from __future__ import annotations
-
-import itertools
-import logging
-import mimetypes
-import os
-import shutil
-import sys
-import time
-from logging.handlers import RotatingFileHandler
-from pathlib import Path
-from datetime import timedelta
-from typing import Any, Dict, List, Optional
-
-from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
-from flask_cors import CORS
-from flask_wtf.csrf import CSRFError
-from werkzeug.middleware.proxy_fix import ProxyFix
-
-import io
-
-from .access_logging import AccessLoggingService
-from .operation_metrics import OperationMetricsCollector, classify_endpoint
-from .compression import GzipMiddleware
-from .acl import AclService
-from .bucket_policies import BucketPolicyStore
-from .config import AppConfig
-from .connections import ConnectionStore
-from .encryption import EncryptionManager
-from .extensions import limiter, csrf
-from .iam import IamService
-from .kms import KMSManager
-from .gc import GarbageCollector
-from .integrity import IntegrityChecker
-from .lifecycle import LifecycleManager
-from .notifications import NotificationService
-from .object_lock import ObjectLockService
-from .replication import ReplicationManager
-from .secret_store import EphemeralSecretStore
-from .site_registry import SiteRegistry, SiteInfo
-from .storage import ObjectStorage, StorageError
-from .version import get_version
-from .website_domains import WebsiteDomainStore
-
-_request_counter = itertools.count(1)
-
-
-class _ChunkedTransferMiddleware:
-
- def __init__(self, app):
- self.app = app
-
- def __call__(self, environ, start_response):
- if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
- return self.app(environ, start_response)
-
- transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
- content_length = environ.get("CONTENT_LENGTH")
-
- if "chunked" in transfer_encoding.lower():
- if content_length:
- del environ["HTTP_TRANSFER_ENCODING"]
- else:
- raw = environ.get("wsgi.input")
- if raw:
- try:
- if hasattr(raw, "seek"):
- raw.seek(0)
- body = raw.read()
- except Exception:
- body = b""
- if body:
- environ["wsgi.input"] = io.BytesIO(body)
- environ["CONTENT_LENGTH"] = str(len(body))
- del environ["HTTP_TRANSFER_ENCODING"]
-
- content_length = environ.get("CONTENT_LENGTH")
- if not content_length or content_length == "0":
- sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
- decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
- content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
- if ("STREAMING" in sha256.upper() or decoded_len
- or "aws-chunked" in content_encoding.lower()):
- raw = environ.get("wsgi.input")
- if raw:
- try:
- if hasattr(raw, "seek"):
- raw.seek(0)
- body = raw.read()
- except Exception:
- body = b""
- if body:
- environ["wsgi.input"] = io.BytesIO(body)
- environ["CONTENT_LENGTH"] = str(len(body))
-
- raw = environ.get("wsgi.input")
- if raw and hasattr(raw, "seek"):
- try:
- raw.seek(0)
- except Exception:
- pass
-
- return self.app(environ, start_response)
-
-
-def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
- """Migrate config file from legacy locations to the active path.
-
- Checks each legacy path in order and moves the first one found to the active path.
- This ensures backward compatibility for users upgrading from older versions.
- """
- active_path.parent.mkdir(parents=True, exist_ok=True)
-
- if active_path.exists():
- return active_path
-
- for legacy_path in legacy_paths:
- if legacy_path.exists():
- try:
- shutil.move(str(legacy_path), str(active_path))
- except OSError:
- shutil.copy2(legacy_path, active_path)
- try:
- legacy_path.unlink(missing_ok=True)
- except OSError:
- pass
- break
-
- return active_path
-
-
-def create_app(
- test_config: Optional[Dict[str, Any]] = None,
- *,
- include_api: bool = True,
- include_ui: bool = True,
-) -> Flask:
- """Create and configure the Flask application."""
- config = AppConfig.from_env(test_config)
-
- if getattr(sys, "frozen", False):
- project_root = Path(sys._MEIPASS)
- else:
- project_root = Path(__file__).resolve().parent.parent
-
- app = Flask(
- __name__,
- static_folder=str(project_root / "static"),
- template_folder=str(project_root / "templates"),
- )
- app.config.update(config.to_flask_config())
- if test_config:
- app.config.update(test_config)
- app.config.setdefault("APP_VERSION", get_version())
- app.permanent_session_lifetime = timedelta(days=int(app.config.get("SESSION_LIFETIME_DAYS", 30)))
- if app.config.get("TESTING"):
- app.config.setdefault("WTF_CSRF_ENABLED", False)
-
- # Trust X-Forwarded-* headers from proxies
- num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1)
- if num_proxies:
- if "NUM_TRUSTED_PROXIES" not in os.environ:
- logging.getLogger(__name__).warning(
- "NUM_TRUSTED_PROXIES not set, defaulting to 1. "
- "Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy."
- )
- app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
-
- if app.config.get("ENABLE_GZIP", True):
- app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
-
- app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
-
- _configure_cors(app)
- _configure_logging(app)
-
- limiter.init_app(app)
- csrf.init_app(app)
-
- storage = ObjectStorage(
- Path(app.config["STORAGE_ROOT"]),
- cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
- object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
- bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
- object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
- meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048),
- )
-
- if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
- storage.warm_cache_async()
-
- iam = IamService(
- Path(app.config["IAM_CONFIG"]),
- auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
- auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
- encryption_key=app.config.get("SECRET_KEY"),
- )
- bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
- secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
-
- storage_root = Path(app.config["STORAGE_ROOT"])
- config_dir = storage_root / ".myfsio.sys" / "config"
- config_dir.mkdir(parents=True, exist_ok=True)
-
- connections_path = _migrate_config_file(
- active_path=config_dir / "connections.json",
- legacy_paths=[
- storage_root / ".myfsio.sys" / "connections.json",
- storage_root / ".connections.json",
- ],
- )
- replication_rules_path = _migrate_config_file(
- active_path=config_dir / "replication_rules.json",
- legacy_paths=[
- storage_root / ".myfsio.sys" / "replication_rules.json",
- storage_root / ".replication_rules.json",
- ],
- )
-
- connections = ConnectionStore(connections_path)
- replication = ReplicationManager(
- storage,
- connections,
- replication_rules_path,
- storage_root,
- connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
- read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
- max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
- streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
- max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
- )
-
- site_registry_path = config_dir / "site_registry.json"
- site_registry = SiteRegistry(site_registry_path)
- if app.config.get("SITE_ID") and not site_registry.get_local_site():
- site_registry.set_local_site(SiteInfo(
- site_id=app.config["SITE_ID"],
- endpoint=app.config.get("SITE_ENDPOINT") or "",
- region=app.config.get("SITE_REGION", "us-east-1"),
- priority=app.config.get("SITE_PRIORITY", 100),
- ))
-
- encryption_config = {
- "encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
- "encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
- "default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
- "encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
- }
- encryption_manager = EncryptionManager(encryption_config)
-
- kms_manager = None
- if app.config.get("KMS_ENABLED", False):
- kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
- kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
- kms_manager = KMSManager(
- kms_keys_path,
- kms_master_key_path,
- generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
- generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
- )
- encryption_manager.set_kms_provider(kms_manager)
-
- if app.config.get("ENCRYPTION_ENABLED", False):
- from .encrypted_storage import EncryptedObjectStorage
- storage = EncryptedObjectStorage(storage, encryption_manager)
-
- acl_service = AclService(storage_root)
- object_lock_service = ObjectLockService(storage_root)
- notification_service = NotificationService(
- storage_root,
- allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
- )
- access_logging_service = AccessLoggingService(storage_root)
- access_logging_service.set_storage(storage)
-
- lifecycle_manager = None
- if app.config.get("LIFECYCLE_ENABLED", False):
- base_storage = storage.storage if hasattr(storage, 'storage') else storage
- lifecycle_manager = LifecycleManager(
- base_storage,
- interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
- storage_root=storage_root,
- max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
- )
- lifecycle_manager.start()
-
- gc_collector = None
- if app.config.get("GC_ENABLED", False):
- gc_collector = GarbageCollector(
- storage_root=storage_root,
- interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
- temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
- multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
- lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
- dry_run=app.config.get("GC_DRY_RUN", False),
- io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
- )
- gc_collector.start()
-
- integrity_checker = None
- if app.config.get("INTEGRITY_ENABLED", False):
- integrity_checker = IntegrityChecker(
- storage_root=storage_root,
- interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
- batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
- auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
- dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
- io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
- )
- integrity_checker.start()
-
- app.extensions["object_storage"] = storage
- app.extensions["iam"] = iam
- app.extensions["bucket_policies"] = bucket_policies
- app.extensions["secret_store"] = secret_store
- app.extensions["limiter"] = limiter
- app.extensions["connections"] = connections
- app.extensions["replication"] = replication
- app.extensions["encryption"] = encryption_manager
- app.extensions["kms"] = kms_manager
- app.extensions["acl"] = acl_service
- app.extensions["lifecycle"] = lifecycle_manager
- app.extensions["gc"] = gc_collector
- app.extensions["integrity"] = integrity_checker
- app.extensions["object_lock"] = object_lock_service
- app.extensions["notifications"] = notification_service
- app.extensions["access_logging"] = access_logging_service
- app.extensions["site_registry"] = site_registry
-
- website_domains_store = None
- if app.config.get("WEBSITE_HOSTING_ENABLED", False):
- website_domains_path = config_dir / "website_domains.json"
- website_domains_store = WebsiteDomainStore(website_domains_path)
- app.extensions["website_domains"] = website_domains_store
-
- from .s3_client import S3ProxyClient
- api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
- app.extensions["s3_proxy"] = S3ProxyClient(
- api_base_url=api_base,
- region=app.config.get("AWS_REGION", "us-east-1"),
- )
-
- operation_metrics_collector = None
- if app.config.get("OPERATION_METRICS_ENABLED", False):
- operation_metrics_collector = OperationMetricsCollector(
- storage_root,
- interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
- retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
- )
- app.extensions["operation_metrics"] = operation_metrics_collector
-
- system_metrics_collector = None
- if app.config.get("METRICS_HISTORY_ENABLED", False):
- from .system_metrics import SystemMetricsCollector
- system_metrics_collector = SystemMetricsCollector(
- storage_root,
- interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
- retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
- )
- system_metrics_collector.set_storage(storage)
- app.extensions["system_metrics"] = system_metrics_collector
-
- site_sync_worker = None
- if app.config.get("SITE_SYNC_ENABLED", False):
- from .site_sync import SiteSyncWorker
- site_sync_worker = SiteSyncWorker(
- storage=storage,
- connections=connections,
- replication_manager=replication,
- storage_root=storage_root,
- interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
- batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
- connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
- read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
- max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
- clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
- )
- site_sync_worker.start()
- app.extensions["site_sync"] = site_sync_worker
-
- @app.errorhandler(500)
- def internal_error(error):
- wants_html = request.accept_mimetypes.accept_html
- path = request.path or ""
- if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
- return render_template('500.html'), 500
- error_xml = (
- ''
- ''
- 'InternalError'
- 'An internal server error occurred '
- f'{path} '
- f'{getattr(g, "request_id", "-")} '
- ' '
- )
- return error_xml, 500, {'Content-Type': 'application/xml'}
-
- @app.errorhandler(CSRFError)
- def handle_csrf_error(e):
- wants_html = request.accept_mimetypes.accept_html
- path = request.path or ""
- if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
- return render_template('csrf_error.html', reason=e.description), 400
- error_xml = (
- ''
- ''
- 'CSRFError'
- f'{e.description} '
- f'{path} '
- f'{getattr(g, "request_id", "-")} '
- ' '
- )
- return error_xml, 400, {'Content-Type': 'application/xml'}
-
- @app.template_filter("filesizeformat")
- def filesizeformat(value: int) -> str:
- """Format bytes as human-readable file size."""
- for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
- if abs(value) < 1024.0 or unit == "PB":
- if unit == "B":
- return f"{int(value)} {unit}"
- return f"{value:.1f} {unit}"
- value /= 1024.0
- return f"{value:.1f} PB"
-
- @app.template_filter("timestamp_to_datetime")
- def timestamp_to_datetime(value: float) -> str:
- """Format Unix timestamp as human-readable datetime in configured timezone."""
- from datetime import datetime, timezone as dt_timezone
- from zoneinfo import ZoneInfo
- if not value:
- return "Never"
- try:
- dt_utc = datetime.fromtimestamp(value, dt_timezone.utc)
- display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
- if display_tz and display_tz != "UTC":
- try:
- tz = ZoneInfo(display_tz)
- dt_local = dt_utc.astimezone(tz)
- return dt_local.strftime("%Y-%m-%d %H:%M:%S")
- except (KeyError, ValueError):
- pass
- return dt_utc.strftime("%Y-%m-%d %H:%M:%S UTC")
- except (ValueError, OSError):
- return "Unknown"
-
- @app.template_filter("format_datetime")
- def format_datetime_filter(dt, include_tz: bool = True) -> str:
- """Format datetime object as human-readable string in configured timezone."""
- from datetime import datetime, timezone as dt_timezone
- from zoneinfo import ZoneInfo
- if not dt:
- return ""
- try:
- display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
- if display_tz and display_tz != "UTC":
- try:
- tz = ZoneInfo(display_tz)
- if dt.tzinfo is None:
- dt = dt.replace(tzinfo=dt_timezone.utc)
- dt = dt.astimezone(tz)
- except (KeyError, ValueError):
- pass
- tz_abbr = dt.strftime("%Z") or "UTC"
- if include_tz:
- return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
- return dt.strftime("%b %d, %Y %H:%M")
- except (ValueError, AttributeError):
- return str(dt)
-
- if include_api:
- from .s3_api import s3_api_bp
- from .kms_api import kms_api_bp
- from .admin_api import admin_api_bp
-
- app.register_blueprint(s3_api_bp)
- app.register_blueprint(kms_api_bp)
- app.register_blueprint(admin_api_bp)
- csrf.exempt(s3_api_bp)
- csrf.exempt(kms_api_bp)
- csrf.exempt(admin_api_bp)
-
- if include_ui:
- from .ui import ui_bp
-
- app.register_blueprint(ui_bp)
- if not include_api:
- @app.get("/")
- def ui_root_redirect():
- return redirect(url_for("ui.buckets_overview"))
-
- @app.errorhandler(404)
- def handle_not_found(error):
- wants_html = request.accept_mimetypes.accept_html
- path = request.path or ""
- if include_ui and wants_html:
- if not include_api or path.startswith("/ui") or path == "/":
- return render_template("404.html"), 404
- return error
-
- @app.get("/myfsio/health")
- def healthcheck() -> Dict[str, str]:
- return {"status": "ok"}
-
- return app
-
-
-def create_api_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
- return create_app(test_config, include_api=True, include_ui=False)
-
-
-def create_ui_app(test_config: Optional[Dict[str, Any]] = None) -> Flask:
- return create_app(test_config, include_api=False, include_ui=True)
-
-
-def _configure_cors(app: Flask) -> None:
- origins = app.config.get("CORS_ORIGINS", ["*"])
- methods = app.config.get("CORS_METHODS", ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"])
- allow_headers = app.config.get("CORS_ALLOW_HEADERS", ["*"])
- expose_headers = app.config.get("CORS_EXPOSE_HEADERS", ["*"])
- CORS(
- app,
- resources={r"/*": {"origins": origins, "methods": methods, "allow_headers": allow_headers, "expose_headers": expose_headers}},
- supports_credentials=True,
- )
-
-
-class _RequestContextFilter(logging.Filter):
- """Inject request-specific attributes into log records."""
-
- def filter(self, record: logging.LogRecord) -> bool:
- if has_request_context():
- record.request_id = getattr(g, "request_id", "-")
- record.path = request.path
- record.method = request.method
- record.remote_addr = request.remote_addr or "-"
- else:
- record.request_id = getattr(record, "request_id", "-")
- record.path = getattr(record, "path", "-")
- record.method = getattr(record, "method", "-")
- record.remote_addr = getattr(record, "remote_addr", "-")
- return True
-
-
-def _configure_logging(app: Flask) -> None:
- formatter = logging.Formatter(
- "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
- )
-
- stream_handler = logging.StreamHandler(sys.stdout)
- stream_handler.setFormatter(formatter)
- stream_handler.addFilter(_RequestContextFilter())
-
- logger = app.logger
- for handler in logger.handlers[:]:
- handler.close()
- logger.handlers.clear()
- logger.addHandler(stream_handler)
-
- if app.config.get("LOG_TO_FILE"):
- log_file = Path(app.config["LOG_FILE"])
- log_file.parent.mkdir(parents=True, exist_ok=True)
- file_handler = RotatingFileHandler(
- log_file,
- maxBytes=int(app.config.get("LOG_MAX_BYTES", 5 * 1024 * 1024)),
- backupCount=int(app.config.get("LOG_BACKUP_COUNT", 3)),
- encoding="utf-8",
- )
- file_handler.setFormatter(formatter)
- file_handler.addFilter(_RequestContextFilter())
- logger.addHandler(file_handler)
-
- logger.setLevel(getattr(logging, app.config.get("LOG_LEVEL", "INFO"), logging.INFO))
-
- @app.before_request
- def _log_request_start() -> None:
- g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
- g.request_started_at = time.perf_counter()
- g.request_bytes_in = request.content_length or 0
-
- @app.before_request
- def _maybe_serve_website():
- if not app.config.get("WEBSITE_HOSTING_ENABLED"):
- return None
- if request.method not in {"GET", "HEAD"}:
- return None
- host = request.host
- if ":" in host:
- host = host.rsplit(":", 1)[0]
- host = host.lower()
- store = app.extensions.get("website_domains")
- if not store:
- return None
- bucket = store.get_bucket(host)
- if not bucket:
- return None
- storage = app.extensions["object_storage"]
- if not storage.bucket_exists(bucket):
- return _website_error_response(404, "Not Found")
- website_config = storage.get_bucket_website(bucket)
- if not website_config:
- return _website_error_response(404, "Not Found")
- index_doc = website_config.get("index_document", "index.html")
- error_doc = website_config.get("error_document")
- req_path = request.path.lstrip("/")
- if not req_path or req_path.endswith("/"):
- object_key = req_path + index_doc
- else:
- object_key = req_path
- try:
- obj_path = storage.get_object_path(bucket, object_key)
- except (StorageError, OSError):
- if object_key == req_path:
- try:
- obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
- object_key = req_path + "/" + index_doc
- except (StorageError, OSError):
- return _serve_website_error(storage, bucket, error_doc, 404)
- else:
- return _serve_website_error(storage, bucket, error_doc, 404)
- content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
- is_encrypted = False
- try:
- metadata = storage.get_object_metadata(bucket, object_key)
- is_encrypted = "x-amz-server-side-encryption" in metadata
- except (StorageError, OSError):
- pass
- if is_encrypted and hasattr(storage, "get_object_data"):
- try:
- data, _ = storage.get_object_data(bucket, object_key)
- file_size = len(data)
- except (StorageError, OSError):
- return _website_error_response(500, "Internal Server Error")
- else:
- data = None
- try:
- stat = obj_path.stat()
- file_size = stat.st_size
- except OSError:
- return _website_error_response(500, "Internal Server Error")
- if request.method == "HEAD":
- response = Response(status=200)
- response.headers["Content-Length"] = file_size
- response.headers["Content-Type"] = content_type
- response.headers["Accept-Ranges"] = "bytes"
- return response
- from .s3_api import _parse_range_header
- range_header = request.headers.get("Range")
- if range_header:
- ranges = _parse_range_header(range_header, file_size)
- if ranges is None:
- return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
- start, end = ranges[0]
- length = end - start + 1
- if data is not None:
- partial_data = data[start:end + 1]
- response = Response(partial_data, status=206, mimetype=content_type)
- else:
- def _stream_range(file_path, start_pos, length_to_read):
- with file_path.open("rb") as f:
- f.seek(start_pos)
- remaining = length_to_read
- while remaining > 0:
- chunk = f.read(min(262144, remaining))
- if not chunk:
- break
- remaining -= len(chunk)
- yield chunk
- response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
- response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
- response.headers["Content-Length"] = length
- response.headers["Accept-Ranges"] = "bytes"
- return response
- if data is not None:
- response = Response(data, mimetype=content_type)
- response.headers["Content-Length"] = file_size
- response.headers["Accept-Ranges"] = "bytes"
- return response
- def _stream(file_path):
- with file_path.open("rb") as f:
- while True:
- chunk = f.read(65536)
- if not chunk:
- break
- yield chunk
- response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
- response.headers["Content-Length"] = file_size
- response.headers["Accept-Ranges"] = "bytes"
- return response
-
- def _serve_website_error(storage, bucket, error_doc_key, status_code):
- if not error_doc_key:
- return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
- try:
- obj_path = storage.get_object_path(bucket, error_doc_key)
- except (StorageError, OSError):
- return _website_error_response(status_code, "Not Found")
- content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
- is_encrypted = False
- try:
- metadata = storage.get_object_metadata(bucket, error_doc_key)
- is_encrypted = "x-amz-server-side-encryption" in metadata
- except (StorageError, OSError):
- pass
- if is_encrypted and hasattr(storage, "get_object_data"):
- try:
- data, _ = storage.get_object_data(bucket, error_doc_key)
- response = Response(data, status=status_code, mimetype=content_type)
- response.headers["Content-Length"] = len(data)
- return response
- except (StorageError, OSError):
- return _website_error_response(status_code, "Not Found")
- try:
- data = obj_path.read_bytes()
- response = Response(data, status=status_code, mimetype=content_type)
- response.headers["Content-Length"] = len(data)
- return response
- except OSError:
- return _website_error_response(status_code, "Not Found")
-
- def _website_error_response(status_code, message):
- if status_code == 404:
- body = "404 page not found
"
- else:
- body = f"{status_code} {message}"
- return Response(body, status=status_code, mimetype="text/html")
-
- @app.after_request
- def _log_request_end(response):
- duration_ms = 0.0
- if hasattr(g, "request_started_at"):
- duration_ms = (time.perf_counter() - g.request_started_at) * 1000
- request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
- response.headers.setdefault("X-Request-ID", request_id)
- if app.logger.isEnabledFor(logging.INFO):
- app.logger.info(
- "Request completed",
- extra={
- "path": request.path,
- "method": request.method,
- "remote_addr": request.remote_addr,
- },
- )
- response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
- response.headers["Server"] = "MyFSIO"
-
- operation_metrics = app.extensions.get("operation_metrics")
- if operation_metrics:
- bytes_in = getattr(g, "request_bytes_in", 0)
- bytes_out = response.content_length or 0
- error_code = getattr(g, "s3_error_code", None)
- endpoint_type = classify_endpoint(request.path)
- operation_metrics.record_request(
- method=request.method,
- endpoint_type=endpoint_type,
- status_code=response.status_code,
- latency_ms=duration_ms,
- bytes_in=bytes_in,
- bytes_out=bytes_out,
- error_code=error_code,
- )
-
- return response
diff --git a/python/app/access_logging.py b/python/app/access_logging.py
deleted file mode 100644
index f07ac99..0000000
--- a/python/app/access_logging.py
+++ /dev/null
@@ -1,265 +0,0 @@
-from __future__ import annotations
-
-import io
-import json
-import logging
-import queue
-import threading
-import time
-import uuid
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class AccessLogEntry:
- bucket_owner: str = "-"
- bucket: str = "-"
- timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
- remote_ip: str = "-"
- requester: str = "-"
- request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16].upper())
- operation: str = "-"
- key: str = "-"
- request_uri: str = "-"
- http_status: int = 200
- error_code: str = "-"
- bytes_sent: int = 0
- object_size: int = 0
- total_time_ms: int = 0
- turn_around_time_ms: int = 0
- referrer: str = "-"
- user_agent: str = "-"
- version_id: str = "-"
- host_id: str = "-"
- signature_version: str = "SigV4"
- cipher_suite: str = "-"
- authentication_type: str = "AuthHeader"
- host_header: str = "-"
- tls_version: str = "-"
-
- def to_log_line(self) -> str:
- time_str = self.timestamp.strftime("[%d/%b/%Y:%H:%M:%S %z]")
- return (
- f'{self.bucket_owner} {self.bucket} {time_str} {self.remote_ip} '
- f'{self.requester} {self.request_id} {self.operation} {self.key} '
- f'"{self.request_uri}" {self.http_status} {self.error_code or "-"} '
- f'{self.bytes_sent or "-"} {self.object_size or "-"} {self.total_time_ms or "-"} '
- f'{self.turn_around_time_ms or "-"} "{self.referrer}" "{self.user_agent}" {self.version_id}'
- )
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "bucket_owner": self.bucket_owner,
- "bucket": self.bucket,
- "timestamp": self.timestamp.isoformat(),
- "remote_ip": self.remote_ip,
- "requester": self.requester,
- "request_id": self.request_id,
- "operation": self.operation,
- "key": self.key,
- "request_uri": self.request_uri,
- "http_status": self.http_status,
- "error_code": self.error_code,
- "bytes_sent": self.bytes_sent,
- "object_size": self.object_size,
- "total_time_ms": self.total_time_ms,
- "referrer": self.referrer,
- "user_agent": self.user_agent,
- "version_id": self.version_id,
- }
-
-
-@dataclass
-class LoggingConfiguration:
- target_bucket: str
- target_prefix: str = ""
- enabled: bool = True
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "LoggingEnabled": {
- "TargetBucket": self.target_bucket,
- "TargetPrefix": self.target_prefix,
- }
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> Optional["LoggingConfiguration"]:
- logging_enabled = data.get("LoggingEnabled")
- if not logging_enabled:
- return None
- return cls(
- target_bucket=logging_enabled.get("TargetBucket", ""),
- target_prefix=logging_enabled.get("TargetPrefix", ""),
- enabled=True,
- )
-
-
-class AccessLoggingService:
- def __init__(self, storage_root: Path, flush_interval: int = 60, max_buffer_size: int = 1000):
- self.storage_root = storage_root
- self.flush_interval = flush_interval
- self.max_buffer_size = max_buffer_size
- self._configs: Dict[str, LoggingConfiguration] = {}
- self._buffer: Dict[str, List[AccessLogEntry]] = {}
- self._buffer_lock = threading.Lock()
- self._shutdown = threading.Event()
- self._storage = None
-
- self._flush_thread = threading.Thread(target=self._flush_loop, name="access-log-flush", daemon=True)
- self._flush_thread.start()
-
- def set_storage(self, storage: Any) -> None:
- self._storage = storage
-
- def _config_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "logging.json"
-
- def get_bucket_logging(self, bucket_name: str) -> Optional[LoggingConfiguration]:
- if bucket_name in self._configs:
- return self._configs[bucket_name]
-
- config_path = self._config_path(bucket_name)
- if not config_path.exists():
- return None
-
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- config = LoggingConfiguration.from_dict(data)
- if config:
- self._configs[bucket_name] = config
- return config
- except (json.JSONDecodeError, OSError) as e:
- logger.warning(f"Failed to load logging config for {bucket_name}: {e}")
- return None
-
- def set_bucket_logging(self, bucket_name: str, config: LoggingConfiguration) -> None:
- config_path = self._config_path(bucket_name)
- config_path.parent.mkdir(parents=True, exist_ok=True)
- config_path.write_text(json.dumps(config.to_dict(), indent=2), encoding="utf-8")
- self._configs[bucket_name] = config
-
- def delete_bucket_logging(self, bucket_name: str) -> None:
- config_path = self._config_path(bucket_name)
- try:
- if config_path.exists():
- config_path.unlink()
- except OSError:
- pass
- self._configs.pop(bucket_name, None)
-
- def log_request(
- self,
- bucket_name: str,
- *,
- operation: str,
- key: str = "-",
- remote_ip: str = "-",
- requester: str = "-",
- request_uri: str = "-",
- http_status: int = 200,
- error_code: str = "",
- bytes_sent: int = 0,
- object_size: int = 0,
- total_time_ms: int = 0,
- referrer: str = "-",
- user_agent: str = "-",
- version_id: str = "-",
- request_id: str = "",
- ) -> None:
- config = self.get_bucket_logging(bucket_name)
- if not config or not config.enabled:
- return
-
- entry = AccessLogEntry(
- bucket_owner="local-owner",
- bucket=bucket_name,
- remote_ip=remote_ip,
- requester=requester,
- request_id=request_id or uuid.uuid4().hex[:16].upper(),
- operation=operation,
- key=key,
- request_uri=request_uri,
- http_status=http_status,
- error_code=error_code,
- bytes_sent=bytes_sent,
- object_size=object_size,
- total_time_ms=total_time_ms,
- referrer=referrer,
- user_agent=user_agent,
- version_id=version_id,
- )
-
- target_key = f"{config.target_bucket}:{config.target_prefix}"
- should_flush = False
- with self._buffer_lock:
- if target_key not in self._buffer:
- self._buffer[target_key] = []
- self._buffer[target_key].append(entry)
- should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
-
- if should_flush:
- self._flush_buffer(target_key)
-
- def _flush_loop(self) -> None:
- while not self._shutdown.is_set():
- self._shutdown.wait(timeout=self.flush_interval)
- if not self._shutdown.is_set():
- self._flush_all()
-
- def _flush_all(self) -> None:
- with self._buffer_lock:
- targets = list(self._buffer.keys())
-
- for target_key in targets:
- self._flush_buffer(target_key)
-
- def _flush_buffer(self, target_key: str) -> None:
- with self._buffer_lock:
- entries = self._buffer.pop(target_key, [])
-
- if not entries or not self._storage:
- return
-
- try:
- bucket_name, prefix = target_key.split(":", 1)
- except ValueError:
- logger.error(f"Invalid target key: {target_key}")
- return
-
- now = datetime.now(timezone.utc)
- log_key = f"{prefix}{now.strftime('%Y-%m-%d-%H-%M-%S')}-{uuid.uuid4().hex[:8]}"
-
- log_content = "\n".join(entry.to_log_line() for entry in entries) + "\n"
-
- try:
- stream = io.BytesIO(log_content.encode("utf-8"))
- self._storage.put_object(bucket_name, log_key, stream, enforce_quota=False)
- logger.info(f"Flushed {len(entries)} access log entries to {bucket_name}/{log_key}")
- except Exception as e:
- logger.error(f"Failed to write access log to {bucket_name}/{log_key}: {e}")
- with self._buffer_lock:
- if target_key not in self._buffer:
- self._buffer[target_key] = []
- self._buffer[target_key] = entries + self._buffer[target_key]
-
- def flush(self) -> None:
- self._flush_all()
-
- def shutdown(self) -> None:
- self._shutdown.set()
- self._flush_all()
- self._flush_thread.join(timeout=5.0)
-
- def get_stats(self) -> Dict[str, Any]:
- with self._buffer_lock:
- buffered = sum(len(entries) for entries in self._buffer.values())
- return {
- "buffered_entries": buffered,
- "target_buckets": len(self._buffer),
- }
diff --git a/python/app/acl.py b/python/app/acl.py
deleted file mode 100644
index 6ee2be0..0000000
--- a/python/app/acl.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Set
-
-
-ACL_PERMISSION_FULL_CONTROL = "FULL_CONTROL"
-ACL_PERMISSION_WRITE = "WRITE"
-ACL_PERMISSION_WRITE_ACP = "WRITE_ACP"
-ACL_PERMISSION_READ = "READ"
-ACL_PERMISSION_READ_ACP = "READ_ACP"
-
-ALL_PERMISSIONS = {
- ACL_PERMISSION_FULL_CONTROL,
- ACL_PERMISSION_WRITE,
- ACL_PERMISSION_WRITE_ACP,
- ACL_PERMISSION_READ,
- ACL_PERMISSION_READ_ACP,
-}
-
-PERMISSION_TO_ACTIONS = {
- ACL_PERMISSION_FULL_CONTROL: {"read", "write", "delete", "list", "share"},
- ACL_PERMISSION_WRITE: {"write", "delete"},
- ACL_PERMISSION_WRITE_ACP: {"share"},
- ACL_PERMISSION_READ: {"read", "list"},
- ACL_PERMISSION_READ_ACP: {"share"},
-}
-
-GRANTEE_ALL_USERS = "*"
-GRANTEE_AUTHENTICATED_USERS = "authenticated"
-
-
-@dataclass
-class AclGrant:
- grantee: str
- permission: str
-
- def to_dict(self) -> Dict[str, str]:
- return {"grantee": self.grantee, "permission": self.permission}
-
- @classmethod
- def from_dict(cls, data: Dict[str, str]) -> "AclGrant":
- return cls(grantee=data["grantee"], permission=data["permission"])
-
-
-@dataclass
-class Acl:
- owner: str
- grants: List[AclGrant] = field(default_factory=list)
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "owner": self.owner,
- "grants": [g.to_dict() for g in self.grants],
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "Acl":
- return cls(
- owner=data.get("owner", ""),
- grants=[AclGrant.from_dict(g) for g in data.get("grants", [])],
- )
-
- def get_allowed_actions(self, principal_id: Optional[str], is_authenticated: bool = True) -> Set[str]:
- actions: Set[str] = set()
- if principal_id and principal_id == self.owner:
- actions.update(PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL])
- for grant in self.grants:
- if grant.grantee == GRANTEE_ALL_USERS:
- actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
- elif grant.grantee == GRANTEE_AUTHENTICATED_USERS and is_authenticated:
- actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
- elif principal_id and grant.grantee == principal_id:
- actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
- return actions
-
-
-CANNED_ACLS = {
- "private": lambda owner: Acl(
- owner=owner,
- grants=[AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL)],
- ),
- "public-read": lambda owner: Acl(
- owner=owner,
- grants=[
- AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
- AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
- ],
- ),
- "public-read-write": lambda owner: Acl(
- owner=owner,
- grants=[
- AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
- AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
- AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_WRITE),
- ],
- ),
- "authenticated-read": lambda owner: Acl(
- owner=owner,
- grants=[
- AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
- AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_READ),
- ],
- ),
- "bucket-owner-read": lambda owner: Acl(
- owner=owner,
- grants=[
- AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
- ],
- ),
- "bucket-owner-full-control": lambda owner: Acl(
- owner=owner,
- grants=[
- AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
- ],
- ),
-}
-
-
-def create_canned_acl(canned_acl: str, owner: str) -> Acl:
- factory = CANNED_ACLS.get(canned_acl)
- if not factory:
- return CANNED_ACLS["private"](owner)
- return factory(owner)
-
-
-class AclService:
- def __init__(self, storage_root: Path):
- self.storage_root = storage_root
- self._bucket_acl_cache: Dict[str, Acl] = {}
-
- def _bucket_acl_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".acl.json"
-
- def get_bucket_acl(self, bucket_name: str) -> Optional[Acl]:
- if bucket_name in self._bucket_acl_cache:
- return self._bucket_acl_cache[bucket_name]
- acl_path = self._bucket_acl_path(bucket_name)
- if not acl_path.exists():
- return None
- try:
- data = json.loads(acl_path.read_text(encoding="utf-8"))
- acl = Acl.from_dict(data)
- self._bucket_acl_cache[bucket_name] = acl
- return acl
- except (OSError, json.JSONDecodeError):
- return None
-
- def set_bucket_acl(self, bucket_name: str, acl: Acl) -> None:
- acl_path = self._bucket_acl_path(bucket_name)
- acl_path.parent.mkdir(parents=True, exist_ok=True)
- acl_path.write_text(json.dumps(acl.to_dict(), indent=2), encoding="utf-8")
- self._bucket_acl_cache[bucket_name] = acl
-
- def set_bucket_canned_acl(self, bucket_name: str, canned_acl: str, owner: str) -> Acl:
- acl = create_canned_acl(canned_acl, owner)
- self.set_bucket_acl(bucket_name, acl)
- return acl
-
- def delete_bucket_acl(self, bucket_name: str) -> None:
- acl_path = self._bucket_acl_path(bucket_name)
- if acl_path.exists():
- acl_path.unlink()
- self._bucket_acl_cache.pop(bucket_name, None)
-
- def evaluate_bucket_acl(
- self,
- bucket_name: str,
- principal_id: Optional[str],
- action: str,
- is_authenticated: bool = True,
- ) -> bool:
- acl = self.get_bucket_acl(bucket_name)
- if not acl:
- return False
- allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
- return action in allowed_actions
-
- def get_object_acl(self, bucket_name: str, object_key: str, object_metadata: Dict[str, Any]) -> Optional[Acl]:
- acl_data = object_metadata.get("__acl__")
- if not acl_data:
- return None
- try:
- return Acl.from_dict(acl_data)
- except (TypeError, KeyError):
- return None
-
- def create_object_acl_metadata(self, acl: Acl) -> Dict[str, Any]:
- return {"__acl__": acl.to_dict()}
-
- def evaluate_object_acl(
- self,
- object_metadata: Dict[str, Any],
- principal_id: Optional[str],
- action: str,
- is_authenticated: bool = True,
- ) -> bool:
- acl = self.get_object_acl("", "", object_metadata)
- if not acl:
- return False
- allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
- return action in allowed_actions
diff --git a/python/app/admin_api.py b/python/app/admin_api.py
deleted file mode 100644
index 89c87f0..0000000
--- a/python/app/admin_api.py
+++ /dev/null
@@ -1,984 +0,0 @@
-from __future__ import annotations
-
-import ipaddress
-import json
-import logging
-import re
-import socket
-import time
-from typing import Any, Dict, Optional, Tuple
-from urllib.parse import urlparse
-
-import requests
-from flask import Blueprint, Response, current_app, jsonify, request
-
-from .connections import ConnectionStore
-from .extensions import limiter
-from .gc import GarbageCollector
-from .integrity import IntegrityChecker
-from .iam import IamError, Principal
-from .replication import ReplicationManager
-from .site_registry import PeerSite, SiteInfo, SiteRegistry
-from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
-
-
-def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
- """Check if a URL is safe to make requests to (not internal/private).
-
- Args:
- url: The URL to check.
- allow_internal: If True, allows internal/private IP addresses.
- Use for self-hosted deployments on internal networks.
- """
- try:
- parsed = urlparse(url)
- hostname = parsed.hostname
- if not hostname:
- return False
- cloud_metadata_hosts = {
- "metadata.google.internal",
- "169.254.169.254",
- }
- if hostname.lower() in cloud_metadata_hosts:
- return False
- if allow_internal:
- return True
- blocked_hosts = {
- "localhost",
- "127.0.0.1",
- "0.0.0.0",
- "::1",
- "[::1]",
- }
- if hostname.lower() in blocked_hosts:
- return False
- try:
- resolved_ip = socket.gethostbyname(hostname)
- ip = ipaddress.ip_address(resolved_ip)
- if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
- return False
- except (socket.gaierror, ValueError):
- return False
- return True
- except Exception:
- return False
-
-
-def _validate_endpoint(endpoint: str) -> Optional[str]:
- """Validate endpoint URL format. Returns error message or None."""
- try:
- parsed = urlparse(endpoint)
- if not parsed.scheme or parsed.scheme not in ("http", "https"):
- return "Endpoint must be http or https URL"
- if not parsed.netloc:
- return "Endpoint must have a host"
- return None
- except Exception:
- return "Invalid endpoint URL"
-
-
-def _validate_priority(priority: Any) -> Optional[str]:
- """Validate priority value. Returns error message or None."""
- try:
- p = int(priority)
- if p < 0 or p > 1000:
- return "Priority must be between 0 and 1000"
- return None
- except (TypeError, ValueError):
- return "Priority must be an integer"
-
-
-def _validate_region(region: str) -> Optional[str]:
- """Validate region format. Returns error message or None."""
- if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
- return "Region must match format like us-east-1"
- return None
-
-
-def _validate_site_id(site_id: str) -> Optional[str]:
- """Validate site_id format. Returns error message or None."""
- if not site_id or len(site_id) > 63:
- return "site_id must be 1-63 characters"
- if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
- return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
- return None
-
-
-logger = logging.getLogger(__name__)
-
-admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
-
-
-def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
- from .s3_api import _require_principal as s3_require_principal
- return s3_require_principal()
-
-
-def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
- principal, error = _require_principal()
- if error:
- return None, error
-
- try:
- _iam().authorize(principal, None, "iam:*")
- return principal, None
- except IamError:
- return None, _json_error("AccessDenied", "Admin access required", 403)
-
-
-def _site_registry() -> SiteRegistry:
- return current_app.extensions["site_registry"]
-
-
-def _connections() -> ConnectionStore:
- return current_app.extensions["connections"]
-
-
-def _replication() -> ReplicationManager:
- return current_app.extensions["replication"]
-
-
-def _iam():
- return current_app.extensions["iam"]
-
-
-def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
- return {"error": {"code": code, "message": message}}, status
-
-
-def _get_admin_rate_limit() -> str:
- return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
-
-
-@admin_api_bp.route("/site", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def get_local_site():
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- local_site = registry.get_local_site()
-
- if local_site:
- return jsonify(local_site.to_dict())
-
- config_site_id = current_app.config.get("SITE_ID")
- config_endpoint = current_app.config.get("SITE_ENDPOINT")
-
- if config_site_id:
- return jsonify({
- "site_id": config_site_id,
- "endpoint": config_endpoint or "",
- "region": current_app.config.get("SITE_REGION", "us-east-1"),
- "priority": current_app.config.get("SITE_PRIORITY", 100),
- "display_name": config_site_id,
- "source": "environment",
- })
-
- return _json_error("NotFound", "Local site not configured", 404)
-
-
-@admin_api_bp.route("/site", methods=["PUT"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def update_local_site():
- principal, error = _require_admin()
- if error:
- return error
-
- payload = request.get_json(silent=True) or {}
-
- site_id = payload.get("site_id")
- endpoint = payload.get("endpoint")
-
- if not site_id:
- return _json_error("ValidationError", "site_id is required", 400)
-
- site_id_error = _validate_site_id(site_id)
- if site_id_error:
- return _json_error("ValidationError", site_id_error, 400)
-
- if endpoint:
- endpoint_error = _validate_endpoint(endpoint)
- if endpoint_error:
- return _json_error("ValidationError", endpoint_error, 400)
-
- if "priority" in payload:
- priority_error = _validate_priority(payload["priority"])
- if priority_error:
- return _json_error("ValidationError", priority_error, 400)
-
- if "region" in payload:
- region_error = _validate_region(payload["region"])
- if region_error:
- return _json_error("ValidationError", region_error, 400)
-
- registry = _site_registry()
- existing = registry.get_local_site()
-
- site = SiteInfo(
- site_id=site_id,
- endpoint=endpoint or "",
- region=payload.get("region", "us-east-1"),
- priority=payload.get("priority", 100),
- display_name=payload.get("display_name", site_id),
- created_at=existing.created_at if existing else None,
- )
-
- registry.set_local_site(site)
-
- logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
- return jsonify(site.to_dict())
-
-
-@admin_api_bp.route("/sites", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def list_all_sites():
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- local = registry.get_local_site()
- peers = registry.list_peers()
-
- result = {
- "local": local.to_dict() if local else None,
- "peers": [peer.to_dict() for peer in peers],
- "total_peers": len(peers),
- }
-
- return jsonify(result)
-
-
-@admin_api_bp.route("/sites", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def register_peer_site():
- principal, error = _require_admin()
- if error:
- return error
-
- payload = request.get_json(silent=True) or {}
-
- site_id = payload.get("site_id")
- endpoint = payload.get("endpoint")
-
- if not site_id:
- return _json_error("ValidationError", "site_id is required", 400)
-
- site_id_error = _validate_site_id(site_id)
- if site_id_error:
- return _json_error("ValidationError", site_id_error, 400)
-
- if not endpoint:
- return _json_error("ValidationError", "endpoint is required", 400)
-
- endpoint_error = _validate_endpoint(endpoint)
- if endpoint_error:
- return _json_error("ValidationError", endpoint_error, 400)
-
- region = payload.get("region", "us-east-1")
- region_error = _validate_region(region)
- if region_error:
- return _json_error("ValidationError", region_error, 400)
-
- priority = payload.get("priority", 100)
- priority_error = _validate_priority(priority)
- if priority_error:
- return _json_error("ValidationError", priority_error, 400)
-
- registry = _site_registry()
-
- if registry.get_peer(site_id):
- return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
-
- connection_id = payload.get("connection_id")
- if connection_id:
- if not _connections().get(connection_id):
- return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
-
- peer = PeerSite(
- site_id=site_id,
- endpoint=endpoint,
- region=region,
- priority=int(priority),
- display_name=payload.get("display_name", site_id),
- connection_id=connection_id,
- )
-
- registry.add_peer(peer)
-
- logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
- return jsonify(peer.to_dict()), 201
-
-
-@admin_api_bp.route("/sites/", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def get_peer_site(site_id: str):
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
-
- if not peer:
- return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
-
- return jsonify(peer.to_dict())
-
-
-@admin_api_bp.route("/sites/", methods=["PUT"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def update_peer_site(site_id: str):
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- existing = registry.get_peer(site_id)
-
- if not existing:
- return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
-
- payload = request.get_json(silent=True) or {}
-
- if "endpoint" in payload:
- endpoint_error = _validate_endpoint(payload["endpoint"])
- if endpoint_error:
- return _json_error("ValidationError", endpoint_error, 400)
-
- if "priority" in payload:
- priority_error = _validate_priority(payload["priority"])
- if priority_error:
- return _json_error("ValidationError", priority_error, 400)
-
- if "region" in payload:
- region_error = _validate_region(payload["region"])
- if region_error:
- return _json_error("ValidationError", region_error, 400)
-
- if "connection_id" in payload:
- if payload["connection_id"] and not _connections().get(payload["connection_id"]):
- return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
-
- peer = PeerSite(
- site_id=site_id,
- endpoint=payload.get("endpoint", existing.endpoint),
- region=payload.get("region", existing.region),
- priority=payload.get("priority", existing.priority),
- display_name=payload.get("display_name", existing.display_name),
- connection_id=payload.get("connection_id", existing.connection_id),
- created_at=existing.created_at,
- is_healthy=existing.is_healthy,
- last_health_check=existing.last_health_check,
- )
-
- registry.update_peer(peer)
-
- logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
- return jsonify(peer.to_dict())
-
-
-@admin_api_bp.route("/sites/", methods=["DELETE"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def delete_peer_site(site_id: str):
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
-
- if not registry.delete_peer(site_id):
- return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
-
- logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
- return Response(status=204)
-
-
-@admin_api_bp.route("/sites//health", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def check_peer_health(site_id: str):
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
-
- if not peer:
- return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
-
- is_healthy = False
- error_message = None
-
- if peer.connection_id:
- connection = _connections().get(peer.connection_id)
- if connection:
- is_healthy = _replication().check_endpoint_health(connection)
- else:
- error_message = f"Connection '{peer.connection_id}' not found"
- else:
- error_message = "No connection configured for this peer"
-
- registry.update_health(site_id, is_healthy)
-
- result = {
- "site_id": site_id,
- "is_healthy": is_healthy,
- "checked_at": time.time(),
- }
- if error_message:
- result["error"] = error_message
-
- return jsonify(result)
-
-
-@admin_api_bp.route("/topology", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def get_topology():
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- local = registry.get_local_site()
- peers = registry.list_peers()
-
- sites = []
-
- if local:
- sites.append({
- **local.to_dict(),
- "is_local": True,
- "is_healthy": True,
- })
-
- for peer in peers:
- sites.append({
- **peer.to_dict(),
- "is_local": False,
- })
-
- sites.sort(key=lambda s: s.get("priority", 100))
-
- return jsonify({
- "sites": sites,
- "total": len(sites),
- "healthy_count": sum(1 for s in sites if s.get("is_healthy")),
- })
-
-
-@admin_api_bp.route("/sites//bidirectional-status", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def check_bidirectional_status(site_id: str):
- principal, error = _require_admin()
- if error:
- return error
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
-
- if not peer:
- return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
-
- local_site = registry.get_local_site()
- replication = _replication()
- local_rules = replication.list_rules()
-
- local_bidir_rules = []
- for rule in local_rules:
- if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
- local_bidir_rules.append({
- "bucket_name": rule.bucket_name,
- "target_bucket": rule.target_bucket,
- "enabled": rule.enabled,
- })
-
- result = {
- "site_id": site_id,
- "local_site_id": local_site.site_id if local_site else None,
- "local_endpoint": local_site.endpoint if local_site else None,
- "local_bidirectional_rules": local_bidir_rules,
- "local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
- "remote_status": None,
- "issues": [],
- "is_fully_configured": False,
- }
-
- if not local_site or not local_site.site_id:
- result["issues"].append({
- "code": "NO_LOCAL_SITE_ID",
- "message": "Local site identity not configured",
- "severity": "error",
- })
-
- if not local_site or not local_site.endpoint:
- result["issues"].append({
- "code": "NO_LOCAL_ENDPOINT",
- "message": "Local site endpoint not configured (remote site cannot reach back)",
- "severity": "error",
- })
-
- if not peer.connection_id:
- result["issues"].append({
- "code": "NO_CONNECTION",
- "message": "No connection configured for this peer",
- "severity": "error",
- })
- return jsonify(result)
-
- connection = _connections().get(peer.connection_id)
- if not connection:
- result["issues"].append({
- "code": "CONNECTION_NOT_FOUND",
- "message": f"Connection '{peer.connection_id}' not found",
- "severity": "error",
- })
- return jsonify(result)
-
- if not local_bidir_rules:
- result["issues"].append({
- "code": "NO_LOCAL_BIDIRECTIONAL_RULES",
- "message": "No bidirectional replication rules configured on this site",
- "severity": "warning",
- })
-
- if not result["local_site_sync_enabled"]:
- result["issues"].append({
- "code": "SITE_SYNC_DISABLED",
- "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
- "severity": "warning",
- })
-
- if not replication.check_endpoint_health(connection):
- result["issues"].append({
- "code": "REMOTE_UNREACHABLE",
- "message": "Remote endpoint is not reachable",
- "severity": "error",
- })
- return jsonify(result)
-
- allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
- if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
- result["issues"].append({
- "code": "ENDPOINT_NOT_ALLOWED",
- "message": "Peer endpoint points to cloud metadata service (SSRF protection)",
- "severity": "error",
- })
- return jsonify(result)
-
- try:
- admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
- resp = requests.get(
- admin_url,
- timeout=10,
- headers={
- "Accept": "application/json",
- "X-Access-Key": connection.access_key,
- "X-Secret-Key": connection.secret_key,
- },
- )
-
- if resp.status_code == 200:
- try:
- remote_data = resp.json()
- if not isinstance(remote_data, dict):
- raise ValueError("Expected JSON object")
- remote_local = remote_data.get("local")
- if remote_local is not None and not isinstance(remote_local, dict):
- raise ValueError("Expected 'local' to be an object")
- remote_peers = remote_data.get("peers", [])
- if not isinstance(remote_peers, list):
- raise ValueError("Expected 'peers' to be a list")
- except (ValueError, json.JSONDecodeError) as e:
- logger.warning("Invalid JSON from remote admin API: %s", e)
- result["remote_status"] = {"reachable": True, "invalid_response": True}
- result["issues"].append({
- "code": "REMOTE_INVALID_RESPONSE",
- "message": "Remote admin API returned invalid JSON",
- "severity": "warning",
- })
- return jsonify(result)
-
- result["remote_status"] = {
- "reachable": True,
- "local_site": remote_local,
- "site_sync_enabled": None,
- "has_peer_for_us": False,
- "peer_connection_configured": False,
- "has_bidirectional_rules_for_us": False,
- }
-
- for rp in remote_peers:
- if not isinstance(rp, dict):
- continue
- if local_site and (
- rp.get("site_id") == local_site.site_id or
- rp.get("endpoint") == local_site.endpoint
- ):
- result["remote_status"]["has_peer_for_us"] = True
- result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
- break
-
- if not result["remote_status"]["has_peer_for_us"]:
- result["issues"].append({
- "code": "REMOTE_NO_PEER_FOR_US",
- "message": "Remote site does not have this site registered as a peer",
- "severity": "error",
- })
- elif not result["remote_status"]["peer_connection_configured"]:
- result["issues"].append({
- "code": "REMOTE_NO_CONNECTION_FOR_US",
- "message": "Remote site has us as peer but no connection configured (cannot push back)",
- "severity": "error",
- })
- elif resp.status_code == 401 or resp.status_code == 403:
- result["remote_status"] = {
- "reachable": True,
- "admin_access_denied": True,
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_ACCESS_DENIED",
- "message": "Cannot verify remote configuration (admin access denied)",
- "severity": "warning",
- })
- else:
- result["remote_status"] = {
- "reachable": True,
- "admin_api_error": resp.status_code,
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_API_ERROR",
- "message": f"Remote admin API returned status {resp.status_code}",
- "severity": "warning",
- })
- except requests.RequestException as e:
- logger.warning("Remote admin API unreachable: %s", e)
- result["remote_status"] = {
- "reachable": False,
- "error": "Connection failed",
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_UNREACHABLE",
- "message": "Could not reach remote admin API",
- "severity": "warning",
- })
- except Exception as e:
- logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
- result["issues"].append({
- "code": "VERIFICATION_ERROR",
- "message": "Internal error during verification",
- "severity": "warning",
- })
-
- error_issues = [i for i in result["issues"] if i["severity"] == "error"]
- result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
-
- return jsonify(result)
-
-
-def _website_domains() -> WebsiteDomainStore:
- return current_app.extensions["website_domains"]
-
-
-def _storage():
- return current_app.extensions["object_storage"]
-
-
-def _require_iam_action(action: str):
- principal, error = _require_principal()
- if error:
- return None, error
- try:
- _iam().authorize(principal, None, action)
- return principal, None
- except IamError:
- return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
-
-
-@admin_api_bp.route("/iam/users", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_list_users():
- principal, error = _require_iam_action("iam:list_users")
- if error:
- return error
- return jsonify({"users": _iam().list_users()})
-
-
-@admin_api_bp.route("/iam/users/", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_get_user(identifier):
- principal, error = _require_iam_action("iam:get_user")
- if error:
- return error
- try:
- user_id = _iam().resolve_user_id(identifier)
- return jsonify(_iam().get_user_by_id(user_id))
- except IamError as exc:
- return _json_error("NotFound", str(exc), 404)
-
-
-@admin_api_bp.route("/iam/users//policies", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_get_user_policies(identifier):
- principal, error = _require_iam_action("iam:get_policy")
- if error:
- return error
- try:
- return jsonify({"policies": _iam().get_user_policies(identifier)})
- except IamError as exc:
- return _json_error("NotFound", str(exc), 404)
-
-
-@admin_api_bp.route("/iam/users//keys", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_create_access_key(identifier):
- principal, error = _require_iam_action("iam:create_key")
- if error:
- return error
- try:
- result = _iam().create_access_key(identifier)
- logger.info("Access key created for %s by %s", identifier, principal.access_key)
- return jsonify(result), 201
- except IamError as exc:
- return _json_error("InvalidRequest", str(exc), 400)
-
-
-@admin_api_bp.route("/iam/users//keys/", methods=["DELETE"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_delete_access_key(identifier, access_key):
- principal, error = _require_iam_action("iam:delete_key")
- if error:
- return error
- try:
- _iam().delete_access_key(access_key)
- logger.info("Access key %s deleted by %s", access_key, principal.access_key)
- return "", 204
- except IamError as exc:
- return _json_error("InvalidRequest", str(exc), 400)
-
-
-@admin_api_bp.route("/iam/users//disable", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_disable_user(identifier):
- principal, error = _require_iam_action("iam:disable_user")
- if error:
- return error
- try:
- _iam().disable_user(identifier)
- logger.info("User %s disabled by %s", identifier, principal.access_key)
- return jsonify({"status": "disabled"})
- except IamError as exc:
- return _json_error("InvalidRequest", str(exc), 400)
-
-
-@admin_api_bp.route("/iam/users//enable", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def iam_enable_user(identifier):
- principal, error = _require_iam_action("iam:disable_user")
- if error:
- return error
- try:
- _iam().enable_user(identifier)
- logger.info("User %s enabled by %s", identifier, principal.access_key)
- return jsonify({"status": "enabled"})
- except IamError as exc:
- return _json_error("InvalidRequest", str(exc), 400)
-
-
-@admin_api_bp.route("/website-domains", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def list_website_domains():
- principal, error = _require_admin()
- if error:
- return error
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
- return jsonify(_website_domains().list_all())
-
-
-@admin_api_bp.route("/website-domains", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def create_website_domain():
- principal, error = _require_admin()
- if error:
- return error
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
- payload = request.get_json(silent=True) or {}
- domain = normalize_domain(payload.get("domain") or "")
- bucket = (payload.get("bucket") or "").strip()
- if not domain:
- return _json_error("ValidationError", "domain is required", 400)
- if not is_valid_domain(domain):
- return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
- if not bucket:
- return _json_error("ValidationError", "bucket is required", 400)
- storage = _storage()
- if not storage.bucket_exists(bucket):
- return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
- store = _website_domains()
- existing = store.get_bucket(domain)
- if existing:
- return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
- store.set_mapping(domain, bucket)
- logger.info("Website domain mapping created: %s -> %s", domain, bucket)
- return jsonify({"domain": domain, "bucket": bucket}), 201
-
-
-@admin_api_bp.route("/website-domains/", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def get_website_domain(domain: str):
- principal, error = _require_admin()
- if error:
- return error
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
- domain = normalize_domain(domain)
- bucket = _website_domains().get_bucket(domain)
- if not bucket:
- return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
- return jsonify({"domain": domain, "bucket": bucket})
-
-
-@admin_api_bp.route("/website-domains/", methods=["PUT"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def update_website_domain(domain: str):
- principal, error = _require_admin()
- if error:
- return error
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
- domain = normalize_domain(domain)
- payload = request.get_json(silent=True) or {}
- bucket = (payload.get("bucket") or "").strip()
- if not bucket:
- return _json_error("ValidationError", "bucket is required", 400)
- storage = _storage()
- if not storage.bucket_exists(bucket):
- return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
- store = _website_domains()
- if not store.get_bucket(domain):
- return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
- store.set_mapping(domain, bucket)
- logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
- return jsonify({"domain": domain, "bucket": bucket})
-
-
-@admin_api_bp.route("/website-domains/", methods=["DELETE"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def delete_website_domain(domain: str):
- principal, error = _require_admin()
- if error:
- return error
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
- domain = normalize_domain(domain)
- if not _website_domains().delete_mapping(domain):
- return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
- logger.info("Website domain mapping deleted: %s", domain)
- return Response(status=204)
-
-
-def _gc() -> Optional[GarbageCollector]:
- return current_app.extensions.get("gc")
-
-
-@admin_api_bp.route("/gc/status", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def gc_status():
- principal, error = _require_admin()
- if error:
- return error
- gc = _gc()
- if not gc:
- return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
- return jsonify(gc.get_status())
-
-
-@admin_api_bp.route("/gc/run", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def gc_run_now():
- principal, error = _require_admin()
- if error:
- return error
- gc = _gc()
- if not gc:
- return _json_error("InvalidRequest", "GC is not enabled", 400)
- payload = request.get_json(silent=True) or {}
- started = gc.run_async(dry_run=payload.get("dry_run"))
- logger.info("GC manual run by %s", principal.access_key)
- if not started:
- return _json_error("Conflict", "GC is already in progress", 409)
- return jsonify({"status": "started"})
-
-
-@admin_api_bp.route("/gc/history", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def gc_history():
- principal, error = _require_admin()
- if error:
- return error
- gc = _gc()
- if not gc:
- return jsonify({"executions": []})
- limit = min(int(request.args.get("limit", 50)), 200)
- offset = int(request.args.get("offset", 0))
- records = gc.get_history(limit=limit, offset=offset)
- return jsonify({"executions": records})
-
-
-def _integrity() -> Optional[IntegrityChecker]:
- return current_app.extensions.get("integrity")
-
-
-@admin_api_bp.route("/integrity/status", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def integrity_status():
- principal, error = _require_admin()
- if error:
- return error
- checker = _integrity()
- if not checker:
- return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
- return jsonify(checker.get_status())
-
-
-@admin_api_bp.route("/integrity/run", methods=["POST"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def integrity_run_now():
- principal, error = _require_admin()
- if error:
- return error
- checker = _integrity()
- if not checker:
- return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
- payload = request.get_json(silent=True) or {}
- override_dry_run = payload.get("dry_run")
- override_auto_heal = payload.get("auto_heal")
- started = checker.run_async(
- auto_heal=override_auto_heal if override_auto_heal is not None else None,
- dry_run=override_dry_run if override_dry_run is not None else None,
- )
- logger.info("Integrity manual run by %s", principal.access_key)
- if not started:
- return _json_error("Conflict", "A scan is already in progress", 409)
- return jsonify({"status": "started"})
-
-
-@admin_api_bp.route("/integrity/history", methods=["GET"])
-@limiter.limit(lambda: _get_admin_rate_limit())
-def integrity_history():
- principal, error = _require_admin()
- if error:
- return error
- checker = _integrity()
- if not checker:
- return jsonify({"executions": []})
- limit = min(int(request.args.get("limit", 50)), 200)
- offset = int(request.args.get("offset", 0))
- records = checker.get_history(limit=limit, offset=offset)
- return jsonify({"executions": records})
-
-
diff --git a/python/app/bucket_policies.py b/python/app/bucket_policies.py
deleted file mode 100644
index 61a9337..0000000
--- a/python/app/bucket_policies.py
+++ /dev/null
@@ -1,404 +0,0 @@
-from __future__ import annotations
-
-import ipaddress
-import json
-import os
-import re
-import time
-from dataclasses import dataclass, field
-from fnmatch import fnmatch, translate
-from functools import lru_cache
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
-
-
-RESOURCE_PREFIX = "arn:aws:s3:::"
-
-
-@lru_cache(maxsize=256)
-def _compile_pattern(pattern: str) -> Pattern[str]:
- return re.compile(translate(pattern), re.IGNORECASE)
-
-
-def _match_string_like(value: str, pattern: str) -> bool:
- compiled = _compile_pattern(pattern)
- return bool(compiled.match(value))
-
-
-def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
- try:
- ip = ipaddress.ip_address(ip_str)
- network = ipaddress.ip_network(cidr, strict=False)
- return ip in network
- except ValueError:
- return False
-
-
-def _evaluate_condition_operator(
- operator: str,
- condition_key: str,
- condition_values: List[str],
- context: Dict[str, Any],
-) -> bool:
- context_value = context.get(condition_key)
- op_lower = operator.lower()
- if_exists = op_lower.endswith("ifexists")
- if if_exists:
- op_lower = op_lower[:-8]
-
- if context_value is None:
- return if_exists
-
- context_value_str = str(context_value)
- context_value_lower = context_value_str.lower()
-
- if op_lower == "stringequals":
- return context_value_str in condition_values
- elif op_lower == "stringnotequals":
- return context_value_str not in condition_values
- elif op_lower == "stringequalsignorecase":
- return context_value_lower in [v.lower() for v in condition_values]
- elif op_lower == "stringnotequalsignorecase":
- return context_value_lower not in [v.lower() for v in condition_values]
- elif op_lower == "stringlike":
- return any(_match_string_like(context_value_str, p) for p in condition_values)
- elif op_lower == "stringnotlike":
- return not any(_match_string_like(context_value_str, p) for p in condition_values)
- elif op_lower == "ipaddress":
- return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
- elif op_lower == "notipaddress":
- return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
- elif op_lower == "bool":
- bool_val = context_value_lower in ("true", "1", "yes")
- return str(bool_val).lower() in [v.lower() for v in condition_values]
- elif op_lower == "null":
- is_null = context_value is None or context_value == ""
- expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
- return is_null == expected_null
-
- return False
-
-ACTION_ALIASES = {
- "s3:listbucket": "list",
- "s3:listallmybuckets": "list",
- "s3:listbucketversions": "list",
- "s3:listmultipartuploads": "list",
- "s3:listparts": "list",
- "s3:getobject": "read",
- "s3:getobjectversion": "read",
- "s3:getobjecttagging": "read",
- "s3:getobjectversiontagging": "read",
- "s3:getobjectacl": "read",
- "s3:getbucketversioning": "read",
- "s3:headobject": "read",
- "s3:headbucket": "read",
- "s3:putobject": "write",
- "s3:createbucket": "write",
- "s3:putobjecttagging": "write",
- "s3:putbucketversioning": "write",
- "s3:createmultipartupload": "write",
- "s3:uploadpart": "write",
- "s3:completemultipartupload": "write",
- "s3:abortmultipartupload": "write",
- "s3:copyobject": "write",
- "s3:deleteobject": "delete",
- "s3:deleteobjectversion": "delete",
- "s3:deletebucket": "delete",
- "s3:deleteobjecttagging": "delete",
- "s3:putobjectacl": "share",
- "s3:putbucketacl": "share",
- "s3:getbucketacl": "share",
- "s3:putbucketpolicy": "policy",
- "s3:getbucketpolicy": "policy",
- "s3:deletebucketpolicy": "policy",
- "s3:getreplicationconfiguration": "replication",
- "s3:putreplicationconfiguration": "replication",
- "s3:deletereplicationconfiguration": "replication",
- "s3:replicateobject": "replication",
- "s3:replicatetags": "replication",
- "s3:replicatedelete": "replication",
- "s3:getlifecycleconfiguration": "lifecycle",
- "s3:putlifecycleconfiguration": "lifecycle",
- "s3:deletelifecycleconfiguration": "lifecycle",
- "s3:getbucketlifecycle": "lifecycle",
- "s3:putbucketlifecycle": "lifecycle",
- "s3:getbucketcors": "cors",
- "s3:putbucketcors": "cors",
- "s3:deletebucketcors": "cors",
-}
-
-
-def _normalize_action(action: str) -> str:
- action = action.strip().lower()
- if action == "*":
- return "*"
- return ACTION_ALIASES.get(action, action)
-
-
-def _normalize_actions(actions: Iterable[str]) -> List[str]:
- values: List[str] = []
- for action in actions:
- canonical = _normalize_action(action)
- if canonical == "*" and "*" not in values:
- return ["*"]
- if canonical and canonical not in values:
- values.append(canonical)
- return values
-
-
-def _normalize_principals(principal_field: Any) -> List[str] | str:
- if principal_field == "*":
- return "*"
-
- def _collect(values: Any) -> List[str]:
- if values is None:
- return []
- if values == "*":
- return ["*"]
- if isinstance(values, str):
- return [values]
- if isinstance(values, dict):
- aggregated: List[str] = []
- for nested in values.values():
- chunk = _collect(nested)
- if "*" in chunk:
- return ["*"]
- aggregated.extend(chunk)
- return aggregated
- if isinstance(values, Iterable):
- aggregated = []
- for nested in values:
- chunk = _collect(nested)
- if "*" in chunk:
- return ["*"]
- aggregated.extend(chunk)
- return aggregated
- return [str(values)]
-
- normalized: List[str] = []
- for entry in _collect(principal_field):
- token = str(entry).strip()
- if token == "*":
- return "*"
- if token and token not in normalized:
- normalized.append(token)
- return normalized or "*"
-
-
-def _parse_resource(resource: str) -> tuple[str | None, str | None]:
- if not resource.startswith(RESOURCE_PREFIX):
- return None, None
- remainder = resource[len(RESOURCE_PREFIX) :]
- if "/" not in remainder:
- bucket = remainder or "*"
- return bucket, None
- bucket, _, key_pattern = remainder.partition("/")
- return bucket or "*", key_pattern or "*"
-
-
-@dataclass
-class BucketPolicyStatement:
- sid: Optional[str]
- effect: str
- principals: List[str] | str
- actions: List[str]
- resources: List[Tuple[str | None, str | None]]
- conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
- _compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
-
- def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
- if self._compiled_patterns is None:
- self._compiled_patterns = []
- for resource_bucket, key_pattern in self.resources:
- if key_pattern is None:
- self._compiled_patterns.append((resource_bucket, None))
- else:
- regex_pattern = translate(key_pattern)
- self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
- return self._compiled_patterns
-
- def matches_principal(self, access_key: Optional[str]) -> bool:
- if self.principals == "*":
- return True
- if access_key is None:
- return False
- return access_key in self.principals
-
- def matches_action(self, action: str) -> bool:
- action = _normalize_action(action)
- return "*" in self.actions or action in self.actions
-
- def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
- bucket = (bucket or "*").lower()
- key = object_key or ""
- for resource_bucket, compiled_pattern in self._get_compiled_patterns():
- resource_bucket = (resource_bucket or "*").lower()
- if resource_bucket not in {"*", bucket}:
- continue
- if compiled_pattern is None:
- if not key:
- return True
- continue
- if compiled_pattern.match(key):
- return True
- return False
-
- def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
- if not self.conditions:
- return True
- if context is None:
- context = {}
- for operator, key_values in self.conditions.items():
- for condition_key, condition_values in key_values.items():
- if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
- return False
- return True
-
-
-class BucketPolicyStore:
- """Loads bucket policies from disk and evaluates statements."""
-
- def __init__(self, policy_path: Path) -> None:
- self.policy_path = Path(policy_path)
- self.policy_path.parent.mkdir(parents=True, exist_ok=True)
- if not self.policy_path.exists():
- self.policy_path.write_text(json.dumps({"policies": {}}, indent=2))
- self._raw: Dict[str, Any] = {}
- self._policies: Dict[str, List[BucketPolicyStatement]] = {}
- self._load()
- self._last_mtime = self._current_mtime()
- # Performance: Avoid stat() on every request
- self._last_stat_check = 0.0
- self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
-
- def maybe_reload(self) -> None:
- # Performance: Skip stat check if we checked recently
- now = time.time()
- if now - self._last_stat_check < self._stat_check_interval:
- return
- self._last_stat_check = now
- current = self._current_mtime()
- if current is None or current == self._last_mtime:
- return
- self._load()
- self._last_mtime = current
-
- def _current_mtime(self) -> float | None:
- try:
- return self.policy_path.stat().st_mtime
- except FileNotFoundError:
- return None
-
- def evaluate(
- self,
- access_key: Optional[str],
- bucket: Optional[str],
- object_key: Optional[str],
- action: str,
- context: Optional[Dict[str, Any]] = None,
- ) -> str | None:
- bucket = (bucket or "").lower()
- statements = self._policies.get(bucket) or []
- decision: Optional[str] = None
- for statement in statements:
- if not statement.matches_principal(access_key):
- continue
- if not statement.matches_action(action):
- continue
- if not statement.matches_resource(bucket, object_key):
- continue
- if not statement.matches_condition(context):
- continue
- if statement.effect == "deny":
- return "deny"
- decision = "allow"
- return decision
-
- def get_policy(self, bucket: str) -> Dict[str, Any] | None:
- return self._raw.get(bucket.lower())
-
- def set_policy(self, bucket: str, policy_payload: Dict[str, Any]) -> None:
- bucket = bucket.lower()
- statements = self._normalize_policy(policy_payload)
- if not statements:
- raise ValueError("Policy must include at least one valid statement")
- self._raw[bucket] = policy_payload
- self._policies[bucket] = statements
- self._persist()
-
- def delete_policy(self, bucket: str) -> None:
- bucket = bucket.lower()
- self._raw.pop(bucket, None)
- self._policies.pop(bucket, None)
- self._persist()
-
- def _load(self) -> None:
- try:
- content = self.policy_path.read_text(encoding='utf-8')
- raw_payload = json.loads(content)
- except FileNotFoundError:
- raw_payload = {"policies": {}}
- except json.JSONDecodeError as e:
- raise ValueError(f"Corrupted bucket policy file (invalid JSON): {e}")
- except PermissionError as e:
- raise ValueError(f"Cannot read bucket policy file (permission denied): {e}")
- except (OSError, ValueError) as e:
- raise ValueError(f"Failed to load bucket policies: {e}")
-
- policies: Dict[str, Any] = raw_payload.get("policies", {})
- parsed: Dict[str, List[BucketPolicyStatement]] = {}
- for bucket, policy in policies.items():
- parsed[bucket.lower()] = self._normalize_policy(policy)
- self._raw = {bucket.lower(): policy for bucket, policy in policies.items()}
- self._policies = parsed
-
- def _persist(self) -> None:
- payload = {"policies": self._raw}
- self.policy_path.write_text(json.dumps(payload, indent=2))
-
- def _normalize_policy(self, policy: Dict[str, Any]) -> List[BucketPolicyStatement]:
- statements_raw: Sequence[Dict[str, Any]] = policy.get("Statement", [])
- statements: List[BucketPolicyStatement] = []
- for statement in statements_raw:
- actions = _normalize_actions(statement.get("Action", []))
- principals = _normalize_principals(statement.get("Principal", "*"))
- resources_field = statement.get("Resource", [])
- if isinstance(resources_field, str):
- resources_field = [resources_field]
- resources: List[tuple[str | None, str | None]] = []
- for resource in resources_field:
- bucket, pattern = _parse_resource(str(resource))
- if bucket:
- resources.append((bucket, pattern))
- if not resources:
- continue
- effect = statement.get("Effect", "Allow").lower()
- conditions = self._normalize_conditions(statement.get("Condition", {}))
- statements.append(
- BucketPolicyStatement(
- sid=statement.get("Sid"),
- effect=effect,
- principals=principals,
- actions=actions or ["*"],
- resources=resources,
- conditions=conditions,
- )
- )
- return statements
-
- def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
- if not condition_block or not isinstance(condition_block, dict):
- return {}
- normalized: Dict[str, Dict[str, List[str]]] = {}
- for operator, key_values in condition_block.items():
- if not isinstance(key_values, dict):
- continue
- normalized[operator] = {}
- for cond_key, cond_values in key_values.items():
- if isinstance(cond_values, str):
- normalized[operator][cond_key] = [cond_values]
- elif isinstance(cond_values, list):
- normalized[operator][cond_key] = [str(v) for v in cond_values]
- else:
- normalized[operator][cond_key] = [str(cond_values)]
- return normalized
\ No newline at end of file
diff --git a/python/app/compression.py b/python/app/compression.py
deleted file mode 100644
index bf32504..0000000
--- a/python/app/compression.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from __future__ import annotations
-
-import gzip
-import io
-from typing import Callable, Iterable, List, Tuple
-
-COMPRESSIBLE_MIMES = frozenset([
- 'application/json',
- 'application/javascript',
- 'application/xml',
- 'text/html',
- 'text/css',
- 'text/plain',
- 'text/xml',
- 'text/javascript',
- 'application/x-ndjson',
-])
-
-MIN_SIZE_FOR_COMPRESSION = 500
-
-
-class GzipMiddleware:
- def __init__(self, app: Callable, compression_level: int = 6, min_size: int = MIN_SIZE_FOR_COMPRESSION):
- self.app = app
- self.compression_level = compression_level
- self.min_size = min_size
-
- def __call__(self, environ: dict, start_response: Callable) -> Iterable[bytes]:
- accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '')
- if 'gzip' not in accept_encoding.lower():
- return self.app(environ, start_response)
-
- response_started = False
- status_code = None
- response_headers: List[Tuple[str, str]] = []
- content_type = None
- content_length = None
- should_compress = False
- passthrough = False
- exc_info_holder = [None]
-
- def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
- nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
- response_started = True
- status_code = int(status.split(' ', 1)[0])
- response_headers = list(headers)
- exc_info_holder[0] = exc_info
-
- for name, value in headers:
- name_lower = name.lower()
- if name_lower == 'content-type':
- content_type = value.split(';')[0].strip().lower()
- elif name_lower == 'content-length':
- try:
- content_length = int(value)
- except (ValueError, TypeError):
- pass
- elif name_lower == 'content-encoding':
- passthrough = True
- return start_response(status, headers, exc_info)
- elif name_lower == 'x-stream-response':
- passthrough = True
- return start_response(status, headers, exc_info)
-
- if content_type and content_type in COMPRESSIBLE_MIMES:
- if content_length is None or content_length >= self.min_size:
- should_compress = True
- else:
- passthrough = True
- return start_response(status, headers, exc_info)
-
- return None
-
- app_iter = self.app(environ, custom_start_response)
-
- if passthrough:
- return app_iter
-
- response_body = b''.join(app_iter)
-
- if not response_started:
- return [response_body]
-
- if should_compress and len(response_body) >= self.min_size:
- buf = io.BytesIO()
- with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=self.compression_level) as gz:
- gz.write(response_body)
- compressed = buf.getvalue()
-
- if len(compressed) < len(response_body):
- response_body = compressed
- new_headers = []
- for name, value in response_headers:
- if name.lower() not in ('content-length', 'content-encoding'):
- new_headers.append((name, value))
- new_headers.append(('Content-Encoding', 'gzip'))
- new_headers.append(('Content-Length', str(len(response_body))))
- new_headers.append(('Vary', 'Accept-Encoding'))
- response_headers = new_headers
-
- status_str = f"{status_code} " + {
- 200: "OK", 201: "Created", 204: "No Content", 206: "Partial Content",
- 301: "Moved Permanently", 302: "Found", 304: "Not Modified",
- 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found",
- 405: "Method Not Allowed", 409: "Conflict", 500: "Internal Server Error",
- }.get(status_code, "Unknown")
-
- start_response(status_str, response_headers, exc_info_holder[0])
- return [response_body]
diff --git a/python/app/config.py b/python/app/config.py
deleted file mode 100644
index 38906bc..0000000
--- a/python/app/config.py
+++ /dev/null
@@ -1,683 +0,0 @@
-from __future__ import annotations
-
-import os
-import re
-import secrets
-import shutil
-import sys
-import warnings
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-import psutil
-
-
-def _calculate_auto_threads() -> int:
- cpu_count = psutil.cpu_count(logical=True) or 4
- return max(1, min(cpu_count * 2, 64))
-
-
-def _calculate_auto_connection_limit() -> int:
- available_mb = psutil.virtual_memory().available / (1024 * 1024)
- calculated = int(available_mb / 5)
- return max(20, min(calculated, 1000))
-
-
-def _calculate_auto_backlog(connection_limit: int) -> int:
- return max(128, min(connection_limit * 2, 4096))
-
-
-def _validate_rate_limit(value: str) -> str:
- pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
- if not re.match(pattern, value):
- raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'")
- return value
-
-if getattr(sys, "frozen", False):
- # Running in a PyInstaller bundle
- PROJECT_ROOT = Path(sys._MEIPASS)
-else:
- # Running in a normal Python environment
- PROJECT_ROOT = Path(__file__).resolve().parent.parent
-
-
-def _prepare_config_file(active_path: Path, legacy_path: Optional[Path] = None) -> Path:
- """Ensure config directories exist and migrate legacy files when possible."""
- active_path = Path(active_path)
- active_path.parent.mkdir(parents=True, exist_ok=True)
- if legacy_path:
- legacy_path = Path(legacy_path)
- if not active_path.exists() and legacy_path.exists():
- legacy_path.parent.mkdir(parents=True, exist_ok=True)
- try:
- shutil.move(str(legacy_path), str(active_path))
- except OSError:
- shutil.copy2(legacy_path, active_path)
- try:
- legacy_path.unlink(missing_ok=True)
- except OSError:
- pass
- return active_path
-
-
-@dataclass
-class AppConfig:
- storage_root: Path
- max_upload_size: int
- ui_page_size: int
- secret_key: str
- iam_config_path: Path
- bucket_policy_path: Path
- api_base_url: Optional[str]
- aws_region: str
- aws_service: str
- ui_enforce_bucket_policies: bool
- log_level: str
- log_to_file: bool
- log_path: Path
- log_max_bytes: int
- log_backup_count: int
- ratelimit_default: str
- ratelimit_storage_uri: str
- ratelimit_list_buckets: str
- ratelimit_bucket_ops: str
- ratelimit_object_ops: str
- ratelimit_head_ops: str
- cors_origins: list[str]
- cors_methods: list[str]
- cors_allow_headers: list[str]
- cors_expose_headers: list[str]
- session_lifetime_days: int
- auth_max_attempts: int
- auth_lockout_minutes: int
- bulk_delete_max_keys: int
- secret_ttl_seconds: int
- stream_chunk_size: int
- multipart_min_part_size: int
- bucket_stats_cache_ttl: int
- object_cache_ttl: int
- encryption_enabled: bool
- encryption_master_key_path: Path
- kms_enabled: bool
- kms_keys_path: Path
- default_encryption_algorithm: str
- display_timezone: str
- lifecycle_enabled: bool
- lifecycle_interval_seconds: int
- metrics_history_enabled: bool
- metrics_history_retention_hours: int
- metrics_history_interval_minutes: int
- operation_metrics_enabled: bool
- operation_metrics_interval_minutes: int
- operation_metrics_retention_hours: int
- server_threads: int
- server_connection_limit: int
- server_backlog: int
- server_channel_timeout: int
- server_max_buffer_size: int
- server_threads_auto: bool
- server_connection_limit_auto: bool
- server_backlog_auto: bool
- site_sync_enabled: bool
- site_sync_interval_seconds: int
- site_sync_batch_size: int
- sigv4_timestamp_tolerance_seconds: int
- presigned_url_min_expiry_seconds: int
- presigned_url_max_expiry_seconds: int
- replication_connect_timeout_seconds: int
- replication_read_timeout_seconds: int
- replication_max_retries: int
- replication_streaming_threshold_bytes: int
- replication_max_failures_per_bucket: int
- site_sync_connect_timeout_seconds: int
- site_sync_read_timeout_seconds: int
- site_sync_max_retries: int
- site_sync_clock_skew_tolerance_seconds: float
- object_key_max_length_bytes: int
- object_cache_max_size: int
- meta_read_cache_max: int
- bucket_config_cache_ttl_seconds: float
- object_tag_limit: int
- encryption_chunk_size_bytes: int
- kms_generate_data_key_min_bytes: int
- kms_generate_data_key_max_bytes: int
- lifecycle_max_history_per_bucket: int
- site_id: Optional[str]
- site_endpoint: Optional[str]
- site_region: str
- site_priority: int
- ratelimit_admin: str
- num_trusted_proxies: int
- allowed_redirect_hosts: list[str]
- allow_internal_endpoints: bool
- website_hosting_enabled: bool
- gc_enabled: bool
- gc_interval_hours: float
- gc_temp_file_max_age_hours: float
- gc_multipart_max_age_days: int
- gc_lock_file_max_age_hours: float
- gc_dry_run: bool
- gc_io_throttle_ms: int
- integrity_enabled: bool
- integrity_interval_hours: float
- integrity_batch_size: int
- integrity_auto_heal: bool
- integrity_dry_run: bool
- integrity_io_throttle_ms: int
-
- @classmethod
- def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
- overrides = overrides or {}
-
- def _get(name: str, default: Any) -> Any:
- return overrides.get(name, os.getenv(name, default))
-
- storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
- max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))
- ui_page_size = int(_get("UI_PAGE_SIZE", 100))
- auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
- auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
- bulk_delete_max_keys = int(_get("BULK_DELETE_MAX_KEYS", 500))
- secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
- stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
- multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
- lifecycle_enabled = _get("LIFECYCLE_ENABLED", "false").lower() in ("true", "1", "yes")
- lifecycle_interval_seconds = int(_get("LIFECYCLE_INTERVAL_SECONDS", 3600))
- default_secret = "dev-secret-key"
- secret_key = str(_get("SECRET_KEY", default_secret))
-
- if not secret_key or secret_key == default_secret:
- secret_file = storage_root / ".myfsio.sys" / "config" / ".secret"
- if secret_file.exists():
- secret_key = secret_file.read_text().strip()
- else:
- generated = secrets.token_urlsafe(32)
- if secret_key == default_secret:
- warnings.warn("Using insecure default SECRET_KEY. A random value has been generated and persisted; set SECRET_KEY for production", RuntimeWarning)
- try:
- secret_file.parent.mkdir(parents=True, exist_ok=True)
- secret_file.write_text(generated)
- try:
- os.chmod(secret_file, 0o600)
- except OSError:
- pass
- secret_key = generated
- except OSError:
- secret_key = generated
-
- iam_env_override = "IAM_CONFIG" in overrides or "IAM_CONFIG" in os.environ
- bucket_policy_override = "BUCKET_POLICY_PATH" in overrides or "BUCKET_POLICY_PATH" in os.environ
-
- default_iam_path = storage_root / ".myfsio.sys" / "config" / "iam.json"
- default_bucket_policy_path = storage_root / ".myfsio.sys" / "config" / "bucket_policies.json"
-
- iam_config_path = Path(_get("IAM_CONFIG", default_iam_path)).resolve()
- bucket_policy_path = Path(_get("BUCKET_POLICY_PATH", default_bucket_policy_path)).resolve()
-
- iam_config_path = _prepare_config_file(
- iam_config_path,
- legacy_path=None if iam_env_override else storage_root / "iam.json",
- )
- bucket_policy_path = _prepare_config_file(
- bucket_policy_path,
- legacy_path=None if bucket_policy_override else storage_root / "bucket_policies.json",
- )
- api_base_url = _get("API_BASE_URL", None)
- if api_base_url:
- api_base_url = str(api_base_url)
-
- aws_region = str(_get("AWS_REGION", "us-east-1"))
- aws_service = str(_get("AWS_SERVICE", "s3"))
- enforce_ui_policies = str(_get("UI_ENFORCE_BUCKET_POLICIES", "0")).lower() in {"1", "true", "yes", "on"}
- log_level = str(_get("LOG_LEVEL", "INFO")).upper()
- log_to_file = str(_get("LOG_TO_FILE", "1")).lower() in {"1", "true", "yes", "on"}
- log_dir = Path(_get("LOG_DIR", storage_root.parent / "logs")).resolve()
- log_dir.mkdir(parents=True, exist_ok=True)
- log_path = log_dir / str(_get("LOG_FILE", "app.log"))
- log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
- log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
- ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
- ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
- ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
- ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
- ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
- ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
-
- def _csv(value: str, default: list[str]) -> list[str]:
- if not value:
- return default
- parts = [segment.strip() for segment in value.split(",") if segment.strip()]
- return parts or default
-
- cors_origins = _csv(str(_get("CORS_ORIGINS", "*")), ["*"])
- cors_methods = _csv(str(_get("CORS_METHODS", "GET,PUT,POST,DELETE,OPTIONS,HEAD")), ["GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD"])
- cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"])
- cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
- session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
- bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
- object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
-
- encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
- encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve()
- kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
- default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
- display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
- metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
- metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
- operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
- operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
-
- _raw_threads = int(_get("SERVER_THREADS", 0))
- if _raw_threads == 0:
- server_threads = _calculate_auto_threads()
- server_threads_auto = True
- else:
- server_threads = _raw_threads
- server_threads_auto = False
-
- _raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
- if _raw_conn_limit == 0:
- server_connection_limit = _calculate_auto_connection_limit()
- server_connection_limit_auto = True
- else:
- server_connection_limit = _raw_conn_limit
- server_connection_limit_auto = False
-
- _raw_backlog = int(_get("SERVER_BACKLOG", 0))
- if _raw_backlog == 0:
- server_backlog = _calculate_auto_backlog(server_connection_limit)
- server_backlog_auto = True
- else:
- server_backlog = _raw_backlog
- server_backlog_auto = False
-
- server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
- server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
- site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
- site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
-
- sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
- presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
- presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
- replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
- replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
- replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
- replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
- replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
- site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
- site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
- site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
- site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
- object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
- object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
- meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048))
- bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
- object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
- encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
- kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
- kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
- lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
-
- site_id_raw = _get("SITE_ID", None)
- site_id = str(site_id_raw).strip() if site_id_raw else None
- site_endpoint_raw = _get("SITE_ENDPOINT", None)
- site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
- site_region = str(_get("SITE_REGION", "us-east-1"))
- site_priority = int(_get("SITE_PRIORITY", 100))
- ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
- num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1))
- allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
- allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
- allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
- website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
- gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
- gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
- gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
- gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
- gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
- integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
- integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
- integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
- integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
- integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
- integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
-
- return cls(storage_root=storage_root,
- max_upload_size=max_upload_size,
- ui_page_size=ui_page_size,
- secret_key=secret_key,
- iam_config_path=iam_config_path,
- bucket_policy_path=bucket_policy_path,
- api_base_url=api_base_url,
- aws_region=aws_region,
- aws_service=aws_service,
- ui_enforce_bucket_policies=enforce_ui_policies,
- log_level=log_level,
- log_to_file=log_to_file,
- log_path=log_path,
- log_max_bytes=log_max_bytes,
- log_backup_count=log_backup_count,
- ratelimit_default=ratelimit_default,
- ratelimit_storage_uri=ratelimit_storage_uri,
- ratelimit_list_buckets=ratelimit_list_buckets,
- ratelimit_bucket_ops=ratelimit_bucket_ops,
- ratelimit_object_ops=ratelimit_object_ops,
- ratelimit_head_ops=ratelimit_head_ops,
- cors_origins=cors_origins,
- cors_methods=cors_methods,
- cors_allow_headers=cors_allow_headers,
- cors_expose_headers=cors_expose_headers,
- session_lifetime_days=session_lifetime_days,
- auth_max_attempts=auth_max_attempts,
- auth_lockout_minutes=auth_lockout_minutes,
- bulk_delete_max_keys=bulk_delete_max_keys,
- secret_ttl_seconds=secret_ttl_seconds,
- stream_chunk_size=stream_chunk_size,
- multipart_min_part_size=multipart_min_part_size,
- bucket_stats_cache_ttl=bucket_stats_cache_ttl,
- object_cache_ttl=object_cache_ttl,
- encryption_enabled=encryption_enabled,
- encryption_master_key_path=encryption_master_key_path,
- kms_enabled=kms_enabled,
- kms_keys_path=kms_keys_path,
- default_encryption_algorithm=default_encryption_algorithm,
- display_timezone=display_timezone,
- lifecycle_enabled=lifecycle_enabled,
- lifecycle_interval_seconds=lifecycle_interval_seconds,
- metrics_history_enabled=metrics_history_enabled,
- metrics_history_retention_hours=metrics_history_retention_hours,
- metrics_history_interval_minutes=metrics_history_interval_minutes,
- operation_metrics_enabled=operation_metrics_enabled,
- operation_metrics_interval_minutes=operation_metrics_interval_minutes,
- operation_metrics_retention_hours=operation_metrics_retention_hours,
- server_threads=server_threads,
- server_connection_limit=server_connection_limit,
- server_backlog=server_backlog,
- server_channel_timeout=server_channel_timeout,
- server_max_buffer_size=server_max_buffer_size,
- server_threads_auto=server_threads_auto,
- server_connection_limit_auto=server_connection_limit_auto,
- server_backlog_auto=server_backlog_auto,
- site_sync_enabled=site_sync_enabled,
- site_sync_interval_seconds=site_sync_interval_seconds,
- site_sync_batch_size=site_sync_batch_size,
- sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
- presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
- presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
- replication_connect_timeout_seconds=replication_connect_timeout_seconds,
- replication_read_timeout_seconds=replication_read_timeout_seconds,
- replication_max_retries=replication_max_retries,
- replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
- replication_max_failures_per_bucket=replication_max_failures_per_bucket,
- site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
- site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
- site_sync_max_retries=site_sync_max_retries,
- site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
- object_key_max_length_bytes=object_key_max_length_bytes,
- object_cache_max_size=object_cache_max_size,
- meta_read_cache_max=meta_read_cache_max,
- bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
- object_tag_limit=object_tag_limit,
- encryption_chunk_size_bytes=encryption_chunk_size_bytes,
- kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
- kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
- lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
- site_id=site_id,
- site_endpoint=site_endpoint,
- site_region=site_region,
- site_priority=site_priority,
- ratelimit_admin=ratelimit_admin,
- num_trusted_proxies=num_trusted_proxies,
- allowed_redirect_hosts=allowed_redirect_hosts,
- allow_internal_endpoints=allow_internal_endpoints,
- website_hosting_enabled=website_hosting_enabled,
- gc_enabled=gc_enabled,
- gc_interval_hours=gc_interval_hours,
- gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
- gc_multipart_max_age_days=gc_multipart_max_age_days,
- gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
- gc_dry_run=gc_dry_run,
- gc_io_throttle_ms=gc_io_throttle_ms,
- integrity_enabled=integrity_enabled,
- integrity_interval_hours=integrity_interval_hours,
- integrity_batch_size=integrity_batch_size,
- integrity_auto_heal=integrity_auto_heal,
- integrity_dry_run=integrity_dry_run,
- integrity_io_throttle_ms=integrity_io_throttle_ms)
-
- def validate_and_report(self) -> list[str]:
- """Validate configuration and return a list of warnings/issues.
-
- Call this at startup to detect potential misconfigurations before
- the application fully commits to running.
- """
- issues = []
-
- try:
- test_file = self.storage_root / ".write_test"
- test_file.touch()
- test_file.unlink()
- except (OSError, PermissionError) as e:
- issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}")
-
- storage_str = str(self.storage_root).lower()
- if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str:
- issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!")
-
- try:
- self.iam_config_path.relative_to(self.storage_root)
- except ValueError:
- issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.")
-
- try:
- self.bucket_policy_path.relative_to(self.storage_root)
- except ValueError:
- issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.")
-
- try:
- self.log_path.parent.mkdir(parents=True, exist_ok=True)
- test_log = self.log_path.parent / ".write_test"
- test_log.touch()
- test_log.unlink()
- except (OSError, PermissionError) as e:
- issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}")
-
- log_str = str(self.log_path).lower()
- if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str:
- issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!")
-
- if self.encryption_enabled:
- try:
- self.encryption_master_key_path.relative_to(self.storage_root)
- except ValueError:
- issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
-
- if self.kms_enabled:
- try:
- self.kms_keys_path.relative_to(self.storage_root)
- except ValueError:
- issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
-
- if self.secret_key == "dev-secret-key":
- issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.")
-
- if "*" in self.cors_origins:
- issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
-
- if not (1 <= self.server_threads <= 64):
- issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
- if not (10 <= self.server_connection_limit <= 1000):
- issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
- if not (128 <= self.server_backlog <= 4096):
- issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
- if not (10 <= self.server_channel_timeout <= 300):
- issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
- if self.server_max_buffer_size < 1024 * 1024:
- issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
-
- if sys.platform != "win32":
- try:
- import resource
- soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
- threshold = int(soft_limit * 0.8)
- if self.server_connection_limit > threshold:
- issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
- except (ImportError, OSError):
- pass
-
- try:
- import psutil
- available_mb = psutil.virtual_memory().available / (1024 * 1024)
- estimated_mb = self.server_threads * 50
- if estimated_mb > available_mb * 0.5:
- issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
- except ImportError:
- pass
-
- return issues
-
- def print_startup_summary(self) -> None:
- """Print a summary of the configuration at startup."""
- print("\n" + "=" * 60)
- print("MyFSIO Configuration Summary")
- print("=" * 60)
- print(f" STORAGE_ROOT: {self.storage_root}")
- print(f" IAM_CONFIG: {self.iam_config_path}")
- print(f" BUCKET_POLICY: {self.bucket_policy_path}")
- print(f" LOG_PATH: {self.log_path}")
- if self.api_base_url:
- print(f" API_BASE_URL: {self.api_base_url}")
- if self.encryption_enabled:
- print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
- if self.kms_enabled:
- print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
- if self.website_hosting_enabled:
- print(f" WEBSITE_HOSTING: Enabled")
- def _auto(flag: bool) -> str:
- return " (auto)" if flag else ""
- print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
- print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
- print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
- print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
- print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
- print("=" * 60)
-
- issues = self.validate_and_report()
- if issues:
- print("\nConfiguration Issues Detected:")
- for issue in issues:
- print(f" • {issue}")
- print()
- else:
- print(" ✓ Configuration validated successfully\n")
-
- def to_flask_config(self) -> Dict[str, Any]:
- return {
- "STORAGE_ROOT": str(self.storage_root),
- "MAX_CONTENT_LENGTH": self.max_upload_size,
- "UI_PAGE_SIZE": self.ui_page_size,
- "SECRET_KEY": self.secret_key,
- "IAM_CONFIG": str(self.iam_config_path),
- "BUCKET_POLICY_PATH": str(self.bucket_policy_path),
- "API_BASE_URL": self.api_base_url,
- "AWS_REGION": self.aws_region,
- "AWS_SERVICE": self.aws_service,
- "UI_ENFORCE_BUCKET_POLICIES": self.ui_enforce_bucket_policies,
- "AUTH_MAX_ATTEMPTS": self.auth_max_attempts,
- "AUTH_LOCKOUT_MINUTES": self.auth_lockout_minutes,
- "BULK_DELETE_MAX_KEYS": self.bulk_delete_max_keys,
- "SECRET_TTL_SECONDS": self.secret_ttl_seconds,
- "STREAM_CHUNK_SIZE": self.stream_chunk_size,
- "MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
- "BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl,
- "OBJECT_CACHE_TTL": self.object_cache_ttl,
- "LOG_LEVEL": self.log_level,
- "LOG_TO_FILE": self.log_to_file,
- "LOG_FILE": str(self.log_path),
- "LOG_MAX_BYTES": self.log_max_bytes,
- "LOG_BACKUP_COUNT": self.log_backup_count,
- "RATELIMIT_DEFAULT": self.ratelimit_default,
- "RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
- "RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
- "RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
- "RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
- "RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
- "CORS_ORIGINS": self.cors_origins,
- "CORS_METHODS": self.cors_methods,
- "CORS_ALLOW_HEADERS": self.cors_allow_headers,
- "CORS_EXPOSE_HEADERS": self.cors_expose_headers,
- "SESSION_LIFETIME_DAYS": self.session_lifetime_days,
- "ENCRYPTION_ENABLED": self.encryption_enabled,
- "ENCRYPTION_MASTER_KEY_PATH": str(self.encryption_master_key_path),
- "KMS_ENABLED": self.kms_enabled,
- "KMS_KEYS_PATH": str(self.kms_keys_path),
- "DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
- "DISPLAY_TIMEZONE": self.display_timezone,
- "LIFECYCLE_ENABLED": self.lifecycle_enabled,
- "LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds,
- "METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
- "METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
- "METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
- "OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
- "OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
- "OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
- "SERVER_THREADS": self.server_threads,
- "SERVER_CONNECTION_LIMIT": self.server_connection_limit,
- "SERVER_BACKLOG": self.server_backlog,
- "SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
- "SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
- "SITE_SYNC_ENABLED": self.site_sync_enabled,
- "SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
- "SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
- "SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
- "PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
- "PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
- "REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
- "REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
- "REPLICATION_MAX_RETRIES": self.replication_max_retries,
- "REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
- "REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
- "SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
- "SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
- "SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
- "SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
- "OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
- "OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
- "META_READ_CACHE_MAX": self.meta_read_cache_max,
- "BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
- "OBJECT_TAG_LIMIT": self.object_tag_limit,
- "ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
- "KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
- "KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
- "LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
- "SITE_ID": self.site_id,
- "SITE_ENDPOINT": self.site_endpoint,
- "SITE_REGION": self.site_region,
- "SITE_PRIORITY": self.site_priority,
- "RATE_LIMIT_ADMIN": self.ratelimit_admin,
- "NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
- "ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
- "ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
- "WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
- "GC_ENABLED": self.gc_enabled,
- "GC_INTERVAL_HOURS": self.gc_interval_hours,
- "GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
- "GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
- "GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
- "GC_DRY_RUN": self.gc_dry_run,
- "GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
- "INTEGRITY_ENABLED": self.integrity_enabled,
- "INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
- "INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
- "INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
- "INTEGRITY_DRY_RUN": self.integrity_dry_run,
- "INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
- }
diff --git a/python/app/connections.py b/python/app/connections.py
deleted file mode 100644
index b694af9..0000000
--- a/python/app/connections.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from __future__ import annotations
-
-import json
-from dataclasses import asdict, dataclass
-from pathlib import Path
-from typing import Dict, List, Optional
-
-from .config import AppConfig
-
-
-@dataclass
-class RemoteConnection:
- id: str
- name: str
- endpoint_url: str
- access_key: str
- secret_key: str
- region: str = "us-east-1"
-
-
-class ConnectionStore:
- def __init__(self, config_path: Path) -> None:
- self.config_path = config_path
- self._connections: Dict[str, RemoteConnection] = {}
- self.reload()
-
- def reload(self) -> None:
- if not self.config_path.exists():
- self._connections = {}
- return
-
- try:
- with open(self.config_path, "r") as f:
- data = json.load(f)
- for item in data:
- conn = RemoteConnection(**item)
- self._connections[conn.id] = conn
- except (OSError, json.JSONDecodeError):
- self._connections = {}
-
- def save(self) -> None:
- self.config_path.parent.mkdir(parents=True, exist_ok=True)
- data = [asdict(conn) for conn in self._connections.values()]
- with open(self.config_path, "w") as f:
- json.dump(data, f, indent=2)
-
- def list(self) -> List[RemoteConnection]:
- return list(self._connections.values())
-
- def get(self, connection_id: str) -> Optional[RemoteConnection]:
- return self._connections.get(connection_id)
-
- def add(self, connection: RemoteConnection) -> None:
- self._connections[connection.id] = connection
- self.save()
-
- def delete(self, connection_id: str) -> None:
- if connection_id in self._connections:
- del self._connections[connection_id]
- self.save()
diff --git a/python/app/encrypted_storage.py b/python/app/encrypted_storage.py
deleted file mode 100644
index b64e1d1..0000000
--- a/python/app/encrypted_storage.py
+++ /dev/null
@@ -1,293 +0,0 @@
-from __future__ import annotations
-
-import io
-from pathlib import Path
-from typing import Any, BinaryIO, Dict, Optional
-
-from .encryption import EncryptionManager, EncryptionMetadata, EncryptionError
-from .storage import ObjectStorage, ObjectMeta, StorageError
-
-
-class EncryptedObjectStorage:
- """Object storage with transparent server-side encryption.
-
- This class wraps ObjectStorage and provides transparent encryption/decryption
- of objects based on bucket encryption configuration.
-
- Encryption is applied when:
- 1. Bucket has default encryption configured (SSE-S3 or SSE-KMS)
- 2. Client explicitly requests encryption via headers
-
- The encryption metadata is stored alongside object metadata.
- """
-
- STREAMING_THRESHOLD = 64 * 1024
-
- def __init__(self, storage: ObjectStorage, encryption_manager: EncryptionManager):
- self.storage = storage
- self.encryption = encryption_manager
-
- @property
- def root(self) -> Path:
- return self.storage.root
-
- def _should_encrypt(self, bucket_name: str,
- server_side_encryption: str | None = None) -> tuple[bool, str, str | None]:
- """Determine if object should be encrypted.
-
- Returns:
- Tuple of (should_encrypt, algorithm, kms_key_id)
- """
- if not self.encryption.enabled:
- return False, "", None
-
- if server_side_encryption:
- if server_side_encryption == "AES256":
- return True, "AES256", None
- elif server_side_encryption.startswith("aws:kms"):
- parts = server_side_encryption.split(":")
- kms_key_id = parts[2] if len(parts) > 2 else None
- return True, "aws:kms", kms_key_id
-
- try:
- encryption_config = self.storage.get_bucket_encryption(bucket_name)
- if encryption_config and encryption_config.get("Rules"):
- rule = encryption_config["Rules"][0]
- # AWS format: Rules[].ApplyServerSideEncryptionByDefault.SSEAlgorithm
- sse_default = rule.get("ApplyServerSideEncryptionByDefault", {})
- algorithm = sse_default.get("SSEAlgorithm", "AES256")
- kms_key_id = sse_default.get("KMSMasterKeyID")
- return True, algorithm, kms_key_id
- except StorageError:
- pass
-
- return False, "", None
-
- def _is_encrypted(self, metadata: Dict[str, str]) -> bool:
- """Check if object is encrypted based on its metadata."""
- return "x-amz-server-side-encryption" in metadata
-
- def put_object(
- self,
- bucket_name: str,
- object_key: str,
- stream: BinaryIO,
- *,
- metadata: Optional[Dict[str, str]] = None,
- server_side_encryption: Optional[str] = None,
- kms_key_id: Optional[str] = None,
- ) -> ObjectMeta:
- """Store an object, optionally with encryption.
-
- Args:
- bucket_name: Name of the bucket
- object_key: Key for the object
- stream: Binary stream of object data
- metadata: Optional user metadata
- server_side_encryption: Encryption algorithm ("AES256" or "aws:kms")
- kms_key_id: KMS key ID (for aws:kms encryption)
-
- Returns:
- ObjectMeta with object information
-
- Performance: Uses streaming encryption for large files to reduce memory usage.
- """
- should_encrypt, algorithm, detected_kms_key = self._should_encrypt(
- bucket_name, server_side_encryption
- )
-
- if kms_key_id is None:
- kms_key_id = detected_kms_key
-
- if should_encrypt:
- try:
- # Performance: Use streaming encryption to avoid loading entire file into memory
- encrypted_stream, enc_metadata = self.encryption.encrypt_stream(
- stream,
- algorithm=algorithm,
- context={"bucket": bucket_name, "key": object_key},
- )
-
- combined_metadata = metadata.copy() if metadata else {}
- combined_metadata.update(enc_metadata.to_dict())
-
- result = self.storage.put_object(
- bucket_name,
- object_key,
- encrypted_stream,
- metadata=combined_metadata,
- )
-
- result.metadata = combined_metadata
- return result
-
- except EncryptionError as exc:
- raise StorageError(f"Encryption failed: {exc}") from exc
- else:
- return self.storage.put_object(
- bucket_name,
- object_key,
- stream,
- metadata=metadata,
- )
-
- def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]:
- """Get object data, decrypting if necessary.
-
- Returns:
- Tuple of (data, metadata)
-
- Performance: Uses streaming decryption to reduce memory usage.
- """
- path = self.storage.get_object_path(bucket_name, object_key)
- metadata = self.storage.get_object_metadata(bucket_name, object_key)
-
- enc_metadata = EncryptionMetadata.from_dict(metadata)
- if enc_metadata:
- try:
- # Performance: Use streaming decryption to avoid loading entire file into memory
- with path.open("rb") as f:
- decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata)
- data = decrypted_stream.read()
- except EncryptionError as exc:
- raise StorageError(f"Decryption failed: {exc}") from exc
- else:
- with path.open("rb") as f:
- data = f.read()
-
- clean_metadata = {
- k: v for k, v in metadata.items()
- if not k.startswith("x-amz-encryption")
- and k != "x-amz-encrypted-data-key"
- }
-
- return data, clean_metadata
-
- def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]:
- """Get object as a stream, decrypting if necessary.
-
- Returns:
- Tuple of (stream, metadata, original_size)
- """
- data, metadata = self.get_object_data(bucket_name, object_key)
- return io.BytesIO(data), metadata, len(data)
-
- def list_buckets(self):
- return self.storage.list_buckets()
-
- def bucket_exists(self, bucket_name: str) -> bool:
- return self.storage.bucket_exists(bucket_name)
-
- def create_bucket(self, bucket_name: str) -> None:
- return self.storage.create_bucket(bucket_name)
-
- def delete_bucket(self, bucket_name: str) -> None:
- return self.storage.delete_bucket(bucket_name)
-
- def bucket_stats(self, bucket_name: str, cache_ttl: int = 60):
- return self.storage.bucket_stats(bucket_name, cache_ttl)
-
- def list_objects(self, bucket_name: str, **kwargs):
- return self.storage.list_objects(bucket_name, **kwargs)
-
- def list_objects_shallow(self, bucket_name: str, **kwargs):
- return self.storage.list_objects_shallow(bucket_name, **kwargs)
-
- def iter_objects_shallow(self, bucket_name: str, **kwargs):
- return self.storage.iter_objects_shallow(bucket_name, **kwargs)
-
- def search_objects(self, bucket_name: str, query: str, **kwargs):
- return self.storage.search_objects(bucket_name, query, **kwargs)
-
- def list_objects_all(self, bucket_name: str):
- return self.storage.list_objects_all(bucket_name)
-
- def get_object_path(self, bucket_name: str, object_key: str):
- return self.storage.get_object_path(bucket_name, object_key)
-
- def get_object_metadata(self, bucket_name: str, object_key: str):
- return self.storage.get_object_metadata(bucket_name, object_key)
-
- def delete_object(self, bucket_name: str, object_key: str) -> None:
- return self.storage.delete_object(bucket_name, object_key)
-
- def purge_object(self, bucket_name: str, object_key: str) -> None:
- return self.storage.purge_object(bucket_name, object_key)
-
- def is_versioning_enabled(self, bucket_name: str) -> bool:
- return self.storage.is_versioning_enabled(bucket_name)
-
- def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
- return self.storage.set_bucket_versioning(bucket_name, enabled)
-
- def get_bucket_tags(self, bucket_name: str):
- return self.storage.get_bucket_tags(bucket_name)
-
- def set_bucket_tags(self, bucket_name: str, tags):
- return self.storage.set_bucket_tags(bucket_name, tags)
-
- def get_bucket_cors(self, bucket_name: str):
- return self.storage.get_bucket_cors(bucket_name)
-
- def set_bucket_cors(self, bucket_name: str, rules):
- return self.storage.set_bucket_cors(bucket_name, rules)
-
- def get_bucket_encryption(self, bucket_name: str):
- return self.storage.get_bucket_encryption(bucket_name)
-
- def set_bucket_encryption(self, bucket_name: str, config_payload):
- return self.storage.set_bucket_encryption(bucket_name, config_payload)
-
- def get_bucket_lifecycle(self, bucket_name: str):
- return self.storage.get_bucket_lifecycle(bucket_name)
-
- def set_bucket_lifecycle(self, bucket_name: str, rules):
- return self.storage.set_bucket_lifecycle(bucket_name, rules)
-
- def get_object_tags(self, bucket_name: str, object_key: str):
- return self.storage.get_object_tags(bucket_name, object_key)
-
- def set_object_tags(self, bucket_name: str, object_key: str, tags):
- return self.storage.set_object_tags(bucket_name, object_key, tags)
-
- def delete_object_tags(self, bucket_name: str, object_key: str):
- return self.storage.delete_object_tags(bucket_name, object_key)
-
- def list_object_versions(self, bucket_name: str, object_key: str):
- return self.storage.list_object_versions(bucket_name, object_key)
-
- def restore_object_version(self, bucket_name: str, object_key: str, version_id: str):
- return self.storage.restore_object_version(bucket_name, object_key, version_id)
-
- def list_orphaned_objects(self, bucket_name: str):
- return self.storage.list_orphaned_objects(bucket_name)
-
- def initiate_multipart_upload(self, bucket_name: str, object_key: str, *, metadata=None) -> str:
- return self.storage.initiate_multipart_upload(bucket_name, object_key, metadata=metadata)
-
- def upload_multipart_part(self, bucket_name: str, upload_id: str, part_number: int, stream: BinaryIO) -> str:
- return self.storage.upload_multipart_part(bucket_name, upload_id, part_number, stream)
-
- def complete_multipart_upload(self, bucket_name: str, upload_id: str, ordered_parts):
- return self.storage.complete_multipart_upload(bucket_name, upload_id, ordered_parts)
-
- def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
- return self.storage.abort_multipart_upload(bucket_name, upload_id)
-
- def list_multipart_parts(self, bucket_name: str, upload_id: str):
- return self.storage.list_multipart_parts(bucket_name, upload_id)
-
- def get_bucket_quota(self, bucket_name: str):
- return self.storage.get_bucket_quota(bucket_name)
-
- def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
- return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
-
- def get_bucket_website(self, bucket_name: str):
- return self.storage.get_bucket_website(bucket_name)
-
- def set_bucket_website(self, bucket_name: str, website_config):
- return self.storage.set_bucket_website(bucket_name, website_config)
-
- def _compute_etag(self, path: Path) -> str:
- return self.storage._compute_etag(path)
diff --git a/python/app/encryption.py b/python/app/encryption.py
deleted file mode 100644
index f000176..0000000
--- a/python/app/encryption.py
+++ /dev/null
@@ -1,653 +0,0 @@
-from __future__ import annotations
-
-import base64
-import io
-import json
-import logging
-import os
-import secrets
-import subprocess
-import sys
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, BinaryIO, Dict, Generator, Optional
-
-from cryptography.hazmat.primitives.ciphers.aead import AESGCM
-from cryptography.hazmat.primitives.kdf.hkdf import HKDF
-from cryptography.hazmat.primitives import hashes
-
-if sys.platform != "win32":
- import fcntl
-
-try:
- import myfsio_core as _rc
- if not all(hasattr(_rc, f) for f in (
- "encrypt_stream_chunked", "decrypt_stream_chunked",
- )):
- raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
- _HAS_RUST = True
-except ImportError:
- _rc = None
- _HAS_RUST = False
-
-logger = logging.getLogger(__name__)
-
-
-def _set_secure_file_permissions(file_path: Path) -> None:
- """Set restrictive file permissions (owner read/write only)."""
- if sys.platform == "win32":
- try:
- username = os.environ.get("USERNAME", "")
- if username:
- subprocess.run(
- ["icacls", str(file_path), "/inheritance:r",
- "/grant:r", f"{username}:F"],
- check=True, capture_output=True
- )
- else:
- logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
- except (subprocess.SubprocessError, OSError) as exc:
- logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
- else:
- os.chmod(file_path, 0o600)
-
-
-class EncryptionError(Exception):
- """Raised when encryption/decryption fails."""
-
-
-@dataclass
-class EncryptionResult:
- """Result of encrypting data."""
- ciphertext: bytes
- nonce: bytes
- key_id: str
- encrypted_data_key: bytes
-
-
-@dataclass
-class EncryptionMetadata:
- """Metadata stored with encrypted objects."""
- algorithm: str
- key_id: str
- nonce: bytes
- encrypted_data_key: bytes
-
- def to_dict(self) -> Dict[str, str]:
- return {
- "x-amz-server-side-encryption": self.algorithm,
- "x-amz-encryption-key-id": self.key_id,
- "x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
- "x-amz-encrypted-data-key": base64.b64encode(self.encrypted_data_key).decode(),
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, str]) -> Optional["EncryptionMetadata"]:
- algorithm = data.get("x-amz-server-side-encryption")
- if not algorithm:
- return None
- try:
- return cls(
- algorithm=algorithm,
- key_id=data.get("x-amz-encryption-key-id", "local"),
- nonce=base64.b64decode(data.get("x-amz-encryption-nonce", "")),
- encrypted_data_key=base64.b64decode(data.get("x-amz-encrypted-data-key", "")),
- )
- except Exception:
- return None
-
-
-class EncryptionProvider:
- """Base class for encryption providers."""
-
- def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
- raise NotImplementedError
-
- def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
- key_id: str, context: Dict[str, str] | None = None) -> bytes:
- raise NotImplementedError
-
- def generate_data_key(self) -> tuple[bytes, bytes]:
- """Generate a data key and its encrypted form.
-
- Returns:
- Tuple of (plaintext_key, encrypted_key)
- """
- raise NotImplementedError
-
- def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
- """Decrypt an encrypted data key.
-
- Args:
- encrypted_data_key: The encrypted data key bytes
- key_id: Optional key identifier (used by KMS providers)
-
- Returns:
- The decrypted data key
- """
- raise NotImplementedError
-
-
-class LocalKeyEncryption(EncryptionProvider):
- """SSE-S3 style encryption using a local master key.
-
- Uses envelope encryption:
- 1. Generate a unique data key for each object
- 2. Encrypt the data with the data key (AES-256-GCM)
- 3. Encrypt the data key with the master key
- 4. Store the encrypted data key alongside the ciphertext
- """
-
- KEY_ID = "local"
-
- def __init__(self, master_key_path: Path):
- self.master_key_path = master_key_path
- self._master_key: bytes | None = None
-
- @property
- def master_key(self) -> bytes:
- if self._master_key is None:
- self._master_key = self._load_or_create_master_key()
- return self._master_key
-
- def _load_or_create_master_key(self) -> bytes:
- """Load master key from file or generate a new one (with file locking)."""
- lock_path = self.master_key_path.with_suffix(".lock")
- lock_path.parent.mkdir(parents=True, exist_ok=True)
-
- try:
- with open(lock_path, "w") as lock_file:
- if sys.platform == "win32":
- import msvcrt
- msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
- else:
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
- try:
- if self.master_key_path.exists():
- try:
- return base64.b64decode(self.master_key_path.read_text().strip())
- except Exception as exc:
- raise EncryptionError(f"Failed to load master key: {exc}") from exc
- key = secrets.token_bytes(32)
- try:
- self.master_key_path.write_text(base64.b64encode(key).decode())
- _set_secure_file_permissions(self.master_key_path)
- except OSError as exc:
- raise EncryptionError(f"Failed to save master key: {exc}") from exc
- return key
- finally:
- if sys.platform == "win32":
- import msvcrt
- msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
- else:
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
- except OSError as exc:
- raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
-
- DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
-
- def _encrypt_data_key(self, data_key: bytes) -> bytes:
- """Encrypt the data key with the master key."""
- aesgcm = AESGCM(self.master_key)
- nonce = secrets.token_bytes(12)
- encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
- return nonce + encrypted
-
- def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
- """Decrypt the data key using the master key."""
- if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
- raise EncryptionError("Invalid encrypted data key")
- aesgcm = AESGCM(self.master_key)
- nonce = encrypted_data_key[:12]
- ciphertext = encrypted_data_key[12:]
- try:
- return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
- except Exception:
- try:
- return aesgcm.decrypt(nonce, ciphertext, None)
- except Exception as exc:
- raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
-
- def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
- """Decrypt an encrypted data key (key_id ignored for local encryption)."""
- return self._decrypt_data_key(encrypted_data_key)
-
- def generate_data_key(self) -> tuple[bytes, bytes]:
- """Generate a data key and its encrypted form."""
- plaintext_key = secrets.token_bytes(32)
- encrypted_key = self._encrypt_data_key(plaintext_key)
- return plaintext_key, encrypted_key
-
- def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
- """Encrypt data using envelope encryption."""
- data_key, encrypted_data_key = self.generate_data_key()
-
- aesgcm = AESGCM(data_key)
- nonce = secrets.token_bytes(12)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
-
- return EncryptionResult(
- ciphertext=ciphertext,
- nonce=nonce,
- key_id=self.KEY_ID,
- encrypted_data_key=encrypted_data_key,
- )
-
- def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
- key_id: str, context: Dict[str, str] | None = None) -> bytes:
- """Decrypt data using envelope encryption."""
- data_key = self._decrypt_data_key(encrypted_data_key)
- aesgcm = AESGCM(data_key)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- try:
- return aesgcm.decrypt(nonce, ciphertext, aad)
- except Exception as exc:
- raise EncryptionError("Failed to decrypt data") from exc
-
-
-class StreamingEncryptor:
- """Encrypts/decrypts data in streaming fashion for large files.
-
- For large files, we encrypt in chunks. Each chunk is encrypted with the
- same data key but a unique nonce derived from the base nonce + chunk index.
- """
-
- CHUNK_SIZE = 64 * 1024
- HEADER_SIZE = 4
-
- def __init__(self, provider: EncryptionProvider, chunk_size: int = CHUNK_SIZE):
- self.provider = provider
- self.chunk_size = chunk_size
-
- def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
- """Derive a unique nonce for each chunk using HKDF."""
- hkdf = HKDF(
- algorithm=hashes.SHA256(),
- length=12,
- salt=base_nonce,
- info=chunk_index.to_bytes(4, "big"),
- )
- return hkdf.derive(b"chunk_nonce")
-
- def encrypt_stream(self, stream: BinaryIO,
- context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
- """Encrypt a stream and return encrypted stream + metadata.
-
- Performance: Writes chunks directly to output buffer instead of accumulating in list.
- """
- data_key, encrypted_data_key = self.provider.generate_data_key()
- base_nonce = secrets.token_bytes(12)
-
- aesgcm = AESGCM(data_key)
- # Performance: Write directly to BytesIO instead of accumulating chunks
- output = io.BytesIO()
- output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count
- chunk_index = 0
-
- while True:
- chunk = stream.read(self.chunk_size)
- if not chunk:
- break
-
- chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
- encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
-
- # Write size prefix + encrypted chunk directly
- output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
- output.write(encrypted_chunk)
- chunk_index += 1
-
- # Write actual chunk count to header
- output.seek(0)
- output.write(chunk_index.to_bytes(4, "big"))
- output.seek(0)
-
- metadata = EncryptionMetadata(
- algorithm="AES256",
- key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
- nonce=base_nonce,
- encrypted_data_key=encrypted_data_key,
- )
-
- return output, metadata
-
- def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
- """Decrypt a stream using the provided metadata.
-
- Performance: Writes chunks directly to output buffer instead of accumulating in list.
- """
- data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
-
- aesgcm = AESGCM(data_key)
- base_nonce = metadata.nonce
-
- chunk_count_bytes = stream.read(4)
- if len(chunk_count_bytes) < 4:
- raise EncryptionError("Invalid encrypted stream: missing header")
- chunk_count = int.from_bytes(chunk_count_bytes, "big")
-
- # Performance: Write directly to BytesIO instead of accumulating chunks
- output = io.BytesIO()
- for chunk_index in range(chunk_count):
- size_bytes = stream.read(self.HEADER_SIZE)
- if len(size_bytes) < self.HEADER_SIZE:
- raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
- chunk_size = int.from_bytes(size_bytes, "big")
-
- encrypted_chunk = stream.read(chunk_size)
- if len(encrypted_chunk) < chunk_size:
- raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
-
- chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
- try:
- decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
- output.write(decrypted_chunk) # Write directly instead of appending to list
- except Exception as exc:
- raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
-
- output.seek(0)
- return output
-
- def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
- data_key, encrypted_data_key = self.provider.generate_data_key()
- base_nonce = secrets.token_bytes(12)
-
- if _HAS_RUST:
- _rc.encrypt_stream_chunked(
- input_path, output_path, data_key, base_nonce, self.chunk_size
- )
- else:
- with open(input_path, "rb") as stream:
- aesgcm = AESGCM(data_key)
- with open(output_path, "wb") as out:
- out.write(b"\x00\x00\x00\x00")
- chunk_index = 0
- while True:
- chunk = stream.read(self.chunk_size)
- if not chunk:
- break
- chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
- encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
- out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
- out.write(encrypted_chunk)
- chunk_index += 1
- out.seek(0)
- out.write(chunk_index.to_bytes(4, "big"))
-
- return EncryptionMetadata(
- algorithm="AES256",
- key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
- nonce=base_nonce,
- encrypted_data_key=encrypted_data_key,
- )
-
- def decrypt_file(self, input_path: str, output_path: str,
- metadata: EncryptionMetadata) -> None:
- data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
- base_nonce = metadata.nonce
-
- if _HAS_RUST:
- _rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
- else:
- with open(input_path, "rb") as stream:
- chunk_count_bytes = stream.read(4)
- if len(chunk_count_bytes) < 4:
- raise EncryptionError("Invalid encrypted stream: missing header")
- chunk_count = int.from_bytes(chunk_count_bytes, "big")
- aesgcm = AESGCM(data_key)
- with open(output_path, "wb") as out:
- for chunk_index in range(chunk_count):
- size_bytes = stream.read(self.HEADER_SIZE)
- if len(size_bytes) < self.HEADER_SIZE:
- raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
- chunk_size = int.from_bytes(size_bytes, "big")
- encrypted_chunk = stream.read(chunk_size)
- if len(encrypted_chunk) < chunk_size:
- raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
- chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
- try:
- decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
- out.write(decrypted_chunk)
- except Exception as exc:
- raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
-
-
-class EncryptionManager:
- """Manages encryption providers and operations."""
-
- def __init__(self, config: Dict[str, Any]):
- self.config = config
- self._local_provider: LocalKeyEncryption | None = None
- self._kms_provider: Any = None # Set by KMS module
- self._streaming_encryptor: StreamingEncryptor | None = None
-
- @property
- def enabled(self) -> bool:
- return self.config.get("encryption_enabled", False)
-
- @property
- def default_algorithm(self) -> str:
- return self.config.get("default_encryption_algorithm", "AES256")
-
- def get_local_provider(self) -> LocalKeyEncryption:
- if self._local_provider is None:
- key_path = Path(self.config.get("encryption_master_key_path", "data/.myfsio.sys/keys/master.key"))
- self._local_provider = LocalKeyEncryption(key_path)
- return self._local_provider
-
- def set_kms_provider(self, kms_provider: Any) -> None:
- """Set the KMS provider (injected from kms module)."""
- self._kms_provider = kms_provider
-
- def get_provider(self, algorithm: str, kms_key_id: str | None = None) -> EncryptionProvider:
- """Get the appropriate encryption provider for the algorithm."""
- if algorithm == "AES256":
- return self.get_local_provider()
- elif algorithm == "aws:kms":
- if self._kms_provider is None:
- raise EncryptionError("KMS is not configured")
- return self._kms_provider.get_provider(kms_key_id)
- else:
- raise EncryptionError(f"Unsupported encryption algorithm: {algorithm}")
-
- def get_streaming_encryptor(self) -> StreamingEncryptor:
- if self._streaming_encryptor is None:
- chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
- self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
- return self._streaming_encryptor
-
- def encrypt_object(self, data: bytes, algorithm: str = "AES256",
- kms_key_id: str | None = None,
- context: Dict[str, str] | None = None) -> tuple[bytes, EncryptionMetadata]:
- """Encrypt object data."""
- provider = self.get_provider(algorithm, kms_key_id)
- result = provider.encrypt(data, context)
-
- metadata = EncryptionMetadata(
- algorithm=algorithm,
- key_id=result.key_id,
- nonce=result.nonce,
- encrypted_data_key=result.encrypted_data_key,
- )
-
- return result.ciphertext, metadata
-
- def decrypt_object(self, ciphertext: bytes, metadata: EncryptionMetadata,
- context: Dict[str, str] | None = None) -> bytes:
- """Decrypt object data."""
- provider = self.get_provider(metadata.algorithm, metadata.key_id)
- return provider.decrypt(
- ciphertext,
- metadata.nonce,
- metadata.encrypted_data_key,
- metadata.key_id,
- context,
- )
-
- def encrypt_stream(self, stream: BinaryIO, algorithm: str = "AES256",
- context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
- """Encrypt a stream for large files."""
- encryptor = self.get_streaming_encryptor()
- return encryptor.encrypt_stream(stream, context)
-
- def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
- """Decrypt a stream."""
- encryptor = self.get_streaming_encryptor()
- return encryptor.decrypt_stream(stream, metadata)
-
-
-class SSECEncryption(EncryptionProvider):
- """SSE-C: Server-Side Encryption with Customer-Provided Keys.
-
- The client provides the encryption key with each request.
- Server encrypts/decrypts but never stores the key.
-
- Required headers for PUT:
- - x-amz-server-side-encryption-customer-algorithm: AES256
- - x-amz-server-side-encryption-customer-key: Base64-encoded 256-bit key
- - x-amz-server-side-encryption-customer-key-MD5: Base64-encoded MD5 of key
- """
-
- KEY_ID = "customer-provided"
-
- def __init__(self, customer_key: bytes):
- if len(customer_key) != 32:
- raise EncryptionError("Customer key must be exactly 256 bits (32 bytes)")
- self.customer_key = customer_key
-
- @classmethod
- def from_headers(cls, headers: Dict[str, str]) -> "SSECEncryption":
- algorithm = headers.get("x-amz-server-side-encryption-customer-algorithm", "")
- if algorithm.upper() != "AES256":
- raise EncryptionError(f"Unsupported SSE-C algorithm: {algorithm}. Only AES256 is supported.")
-
- key_b64 = headers.get("x-amz-server-side-encryption-customer-key", "")
- if not key_b64:
- raise EncryptionError("Missing x-amz-server-side-encryption-customer-key header")
-
- key_md5_b64 = headers.get("x-amz-server-side-encryption-customer-key-md5", "")
-
- try:
- customer_key = base64.b64decode(key_b64)
- except Exception as e:
- raise EncryptionError(f"Invalid base64 in customer key: {e}") from e
-
- if len(customer_key) != 32:
- raise EncryptionError(f"Customer key must be 256 bits, got {len(customer_key) * 8} bits")
-
- if key_md5_b64:
- import hashlib
- expected_md5 = base64.b64encode(hashlib.md5(customer_key).digest()).decode()
- if key_md5_b64 != expected_md5:
- raise EncryptionError("Customer key MD5 mismatch")
-
- return cls(customer_key)
-
- def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
- aesgcm = AESGCM(self.customer_key)
- nonce = secrets.token_bytes(12)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
-
- return EncryptionResult(
- ciphertext=ciphertext,
- nonce=nonce,
- key_id=self.KEY_ID,
- encrypted_data_key=b"",
- )
-
- def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
- key_id: str, context: Dict[str, str] | None = None) -> bytes:
- aesgcm = AESGCM(self.customer_key)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- try:
- return aesgcm.decrypt(nonce, ciphertext, aad)
- except Exception as exc:
- raise EncryptionError("SSE-C decryption failed") from exc
-
- def generate_data_key(self) -> tuple[bytes, bytes]:
- return self.customer_key, b""
-
-
-@dataclass
-class SSECMetadata:
- algorithm: str = "AES256"
- nonce: bytes = b""
- key_md5: str = ""
-
- def to_dict(self) -> Dict[str, str]:
- return {
- "x-amz-server-side-encryption-customer-algorithm": self.algorithm,
- "x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
- "x-amz-server-side-encryption-customer-key-MD5": self.key_md5,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, str]) -> Optional["SSECMetadata"]:
- algorithm = data.get("x-amz-server-side-encryption-customer-algorithm")
- if not algorithm:
- return None
- try:
- nonce = base64.b64decode(data.get("x-amz-encryption-nonce", ""))
- return cls(
- algorithm=algorithm,
- nonce=nonce,
- key_md5=data.get("x-amz-server-side-encryption-customer-key-MD5", ""),
- )
- except Exception:
- return None
-
-
-class ClientEncryptionHelper:
- """Helpers for client-side encryption.
-
- Client-side encryption is performed by the client, but this helper
- provides key generation and materials for clients that need them.
- """
-
- @staticmethod
- def generate_client_key() -> Dict[str, str]:
- """Generate a new client encryption key."""
- from datetime import datetime, timezone
- key = secrets.token_bytes(32)
- return {
- "key": base64.b64encode(key).decode(),
- "algorithm": "AES-256-GCM",
- "created_at": datetime.now(timezone.utc).isoformat(),
- }
-
- @staticmethod
- def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
- """Encrypt data with a client-provided key."""
- key = base64.b64decode(key_b64)
- if len(key) != 32:
- raise EncryptionError("Key must be 256 bits (32 bytes)")
-
- aesgcm = AESGCM(key)
- nonce = secrets.token_bytes(12)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
-
- return {
- "ciphertext": base64.b64encode(ciphertext).decode(),
- "nonce": base64.b64encode(nonce).decode(),
- "algorithm": "AES-256-GCM",
- }
-
- @staticmethod
- def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
- """Decrypt data with a client-provided key."""
- key = base64.b64decode(key_b64)
- nonce = base64.b64decode(nonce_b64)
- ciphertext = base64.b64decode(ciphertext_b64)
-
- if len(key) != 32:
- raise EncryptionError("Key must be 256 bits (32 bytes)")
-
- aesgcm = AESGCM(key)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- try:
- return aesgcm.decrypt(nonce, ciphertext, aad)
- except Exception as exc:
- raise EncryptionError("Decryption failed") from exc
diff --git a/python/app/errors.py b/python/app/errors.py
deleted file mode 100644
index 049187d..0000000
--- a/python/app/errors.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass, field
-from typing import Optional, Dict, Any
-from xml.etree.ElementTree import Element, SubElement, tostring
-
-from flask import Response, jsonify, request, flash, redirect, url_for, g
-from flask_limiter import RateLimitExceeded
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class AppError(Exception):
- """Base application error with multi-format response support."""
- code: str
- message: str
- status_code: int = 500
- details: Optional[Dict[str, Any]] = field(default=None)
-
- def __post_init__(self):
- super().__init__(self.message)
-
- def to_xml_response(self) -> Response:
- """Convert to S3 API XML error response."""
- error = Element("Error")
- SubElement(error, "Code").text = self.code
- SubElement(error, "Message").text = self.message
- request_id = getattr(g, 'request_id', None) if g else None
- SubElement(error, "RequestId").text = request_id or "unknown"
- xml_bytes = tostring(error, encoding="utf-8")
- return Response(xml_bytes, status=self.status_code, mimetype="application/xml")
-
- def to_json_response(self) -> tuple[Response, int]:
- """Convert to JSON error response for UI AJAX calls."""
- payload: Dict[str, Any] = {
- "success": False,
- "error": {
- "code": self.code,
- "message": self.message
- }
- }
- if self.details:
- payload["error"]["details"] = self.details
- return jsonify(payload), self.status_code
-
- def to_flash_message(self) -> str:
- """Convert to user-friendly flash message."""
- return self.message
-
-
-@dataclass
-class BucketNotFoundError(AppError):
- """Bucket does not exist."""
- code: str = "NoSuchBucket"
- message: str = "The specified bucket does not exist"
- status_code: int = 404
-
-
-@dataclass
-class BucketAlreadyExistsError(AppError):
- """Bucket already exists."""
- code: str = "BucketAlreadyExists"
- message: str = "The requested bucket name is not available"
- status_code: int = 409
-
-
-@dataclass
-class BucketNotEmptyError(AppError):
- """Bucket is not empty."""
- code: str = "BucketNotEmpty"
- message: str = "The bucket you tried to delete is not empty"
- status_code: int = 409
-
-
-@dataclass
-class ObjectNotFoundError(AppError):
- """Object does not exist."""
- code: str = "NoSuchKey"
- message: str = "The specified key does not exist"
- status_code: int = 404
-
-
-@dataclass
-class InvalidObjectKeyError(AppError):
- """Invalid object key."""
- code: str = "InvalidKey"
- message: str = "The specified key is not valid"
- status_code: int = 400
-
-
-@dataclass
-class AccessDeniedError(AppError):
- """Access denied."""
- code: str = "AccessDenied"
- message: str = "Access Denied"
- status_code: int = 403
-
-
-@dataclass
-class InvalidCredentialsError(AppError):
- """Invalid credentials."""
- code: str = "InvalidAccessKeyId"
- message: str = "The access key ID you provided does not exist"
- status_code: int = 403
-
-@dataclass
-class MalformedRequestError(AppError):
- """Malformed request."""
- code: str = "MalformedXML"
- message: str = "The XML you provided was not well-formed"
- status_code: int = 400
-
-
-@dataclass
-class InvalidArgumentError(AppError):
- """Invalid argument."""
- code: str = "InvalidArgument"
- message: str = "Invalid argument"
- status_code: int = 400
-
-
-@dataclass
-class EntityTooLargeError(AppError):
- """Entity too large."""
- code: str = "EntityTooLarge"
- message: str = "Your proposed upload exceeds the maximum allowed size"
- status_code: int = 413
-
-
-@dataclass
-class QuotaExceededAppError(AppError):
- """Bucket quota exceeded."""
- code: str = "QuotaExceeded"
- message: str = "The bucket quota has been exceeded"
- status_code: int = 403
- quota: Optional[Dict[str, Any]] = None
- usage: Optional[Dict[str, int]] = None
-
- def __post_init__(self):
- if self.quota or self.usage:
- self.details = {}
- if self.quota:
- self.details["quota"] = self.quota
- if self.usage:
- self.details["usage"] = self.usage
- super().__post_init__()
-
-
-def handle_app_error(error: AppError) -> Response:
- """Handle application errors with appropriate response format."""
- log_extra = {"error_code": error.code}
- if error.details:
- log_extra["details"] = error.details
-
- logger.error(f"{error.code}: {error.message}", extra=log_extra)
-
- if request.path.startswith('/ui'):
- wants_json = (
- request.is_json or
- request.headers.get('X-Requested-With') == 'XMLHttpRequest' or
- 'application/json' in request.accept_mimetypes.values()
- )
- if wants_json:
- return error.to_json_response()
- flash(error.to_flash_message(), 'danger')
- referrer = request.referrer
- if referrer and request.host in referrer:
- return redirect(referrer)
- return redirect(url_for('ui.buckets_overview'))
- else:
- return error.to_xml_response()
-
-
-def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
- g.s3_error_code = "SlowDown"
- if request.path.startswith("/ui") or request.path.startswith("/buckets"):
- wants_json = (
- request.is_json or
- request.headers.get("X-Requested-With") == "XMLHttpRequest" or
- "application/json" in request.accept_mimetypes.values()
- )
- if wants_json:
- return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
- error = Element("Error")
- SubElement(error, "Code").text = "SlowDown"
- SubElement(error, "Message").text = "Please reduce your request rate."
- SubElement(error, "Resource").text = request.path
- SubElement(error, "RequestId").text = getattr(g, "request_id", "")
- xml_bytes = tostring(error, encoding="utf-8")
- return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
-
-
-def register_error_handlers(app):
- """Register error handlers with a Flask app."""
- app.register_error_handler(AppError, handle_app_error)
- app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
-
- for error_class in [
- BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
- ObjectNotFoundError, InvalidObjectKeyError,
- AccessDeniedError, InvalidCredentialsError,
- MalformedRequestError, InvalidArgumentError, EntityTooLargeError,
- QuotaExceededAppError,
- ]:
- app.register_error_handler(error_class, handle_app_error)
diff --git a/python/app/extensions.py b/python/app/extensions.py
deleted file mode 100644
index 7da36ae..0000000
--- a/python/app/extensions.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from flask import g
-from flask_limiter import Limiter
-from flask_limiter.util import get_remote_address
-from flask_wtf import CSRFProtect
-
-def get_rate_limit_key():
- """Generate rate limit key based on authenticated user."""
- if hasattr(g, 'principal') and g.principal:
- return g.principal.access_key
- return get_remote_address()
-
-# Shared rate limiter instance; configured in app factory.
-limiter = Limiter(key_func=get_rate_limit_key)
-
-# Global CSRF protection for UI routes.
-csrf = CSRFProtect()
diff --git a/python/app/gc.py b/python/app/gc.py
deleted file mode 100644
index 16fa3b7..0000000
--- a/python/app/gc.py
+++ /dev/null
@@ -1,596 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import os
-import shutil
-import threading
-import time
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class GCResult:
- temp_files_deleted: int = 0
- temp_bytes_freed: int = 0
- multipart_uploads_deleted: int = 0
- multipart_bytes_freed: int = 0
- lock_files_deleted: int = 0
- orphaned_metadata_deleted: int = 0
- orphaned_versions_deleted: int = 0
- orphaned_version_bytes_freed: int = 0
- empty_dirs_removed: int = 0
- errors: List[str] = field(default_factory=list)
- execution_time_seconds: float = 0.0
-
- def to_dict(self) -> dict:
- return {
- "temp_files_deleted": self.temp_files_deleted,
- "temp_bytes_freed": self.temp_bytes_freed,
- "multipart_uploads_deleted": self.multipart_uploads_deleted,
- "multipart_bytes_freed": self.multipart_bytes_freed,
- "lock_files_deleted": self.lock_files_deleted,
- "orphaned_metadata_deleted": self.orphaned_metadata_deleted,
- "orphaned_versions_deleted": self.orphaned_versions_deleted,
- "orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
- "empty_dirs_removed": self.empty_dirs_removed,
- "errors": self.errors,
- "execution_time_seconds": self.execution_time_seconds,
- }
-
- @property
- def total_bytes_freed(self) -> int:
- return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
-
- @property
- def has_work(self) -> bool:
- return (
- self.temp_files_deleted > 0
- or self.multipart_uploads_deleted > 0
- or self.lock_files_deleted > 0
- or self.orphaned_metadata_deleted > 0
- or self.orphaned_versions_deleted > 0
- or self.empty_dirs_removed > 0
- )
-
-
-@dataclass
-class GCExecutionRecord:
- timestamp: float
- result: dict
- dry_run: bool
-
- def to_dict(self) -> dict:
- return {
- "timestamp": self.timestamp,
- "result": self.result,
- "dry_run": self.dry_run,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> GCExecutionRecord:
- return cls(
- timestamp=data["timestamp"],
- result=data["result"],
- dry_run=data.get("dry_run", False),
- )
-
-
-class GCHistoryStore:
- def __init__(self, storage_root: Path, max_records: int = 50) -> None:
- self.storage_root = storage_root
- self.max_records = max_records
- self._lock = threading.Lock()
-
- def _get_path(self) -> Path:
- return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
-
- def load(self) -> List[GCExecutionRecord]:
- path = self._get_path()
- if not path.exists():
- return []
- try:
- with open(path, "r", encoding="utf-8") as f:
- data = json.load(f)
- return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
- except (OSError, ValueError, KeyError) as e:
- logger.error("Failed to load GC history: %s", e)
- return []
-
- def save(self, records: List[GCExecutionRecord]) -> None:
- path = self._get_path()
- path.parent.mkdir(parents=True, exist_ok=True)
- data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
- try:
- with open(path, "w", encoding="utf-8") as f:
- json.dump(data, f, indent=2)
- except OSError as e:
- logger.error("Failed to save GC history: %s", e)
-
- def add(self, record: GCExecutionRecord) -> None:
- with self._lock:
- records = self.load()
- records.insert(0, record)
- self.save(records)
-
- def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
- return self.load()[offset : offset + limit]
-
-
-def _dir_size(path: Path) -> int:
- total = 0
- try:
- for f in path.rglob("*"):
- if f.is_file():
- try:
- total += f.stat().st_size
- except OSError:
- pass
- except OSError:
- pass
- return total
-
-
-def _file_age_hours(path: Path) -> float:
- try:
- mtime = path.stat().st_mtime
- return (time.time() - mtime) / 3600.0
- except OSError:
- return 0.0
-
-
-class GarbageCollector:
- SYSTEM_ROOT = ".myfsio.sys"
- SYSTEM_TMP_DIR = "tmp"
- SYSTEM_MULTIPART_DIR = "multipart"
- SYSTEM_BUCKETS_DIR = "buckets"
- BUCKET_META_DIR = "meta"
- BUCKET_VERSIONS_DIR = "versions"
- INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
-
- def __init__(
- self,
- storage_root: Path,
- interval_hours: float = 6.0,
- temp_file_max_age_hours: float = 24.0,
- multipart_max_age_days: int = 7,
- lock_file_max_age_hours: float = 1.0,
- dry_run: bool = False,
- max_history: int = 50,
- io_throttle_ms: int = 10,
- ) -> None:
- self.storage_root = Path(storage_root)
- self.interval_seconds = interval_hours * 3600.0
- self.temp_file_max_age_hours = temp_file_max_age_hours
- self.multipart_max_age_days = multipart_max_age_days
- self.lock_file_max_age_hours = lock_file_max_age_hours
- self.dry_run = dry_run
- self._timer: Optional[threading.Timer] = None
- self._shutdown = False
- self._lock = threading.Lock()
- self._scanning = False
- self._scan_start_time: Optional[float] = None
- self._io_throttle = max(0, io_throttle_ms) / 1000.0
- self.history_store = GCHistoryStore(storage_root, max_records=max_history)
-
- def start(self) -> None:
- if self._timer is not None:
- return
- self._shutdown = False
- self._schedule_next()
- logger.info(
- "GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
- self.interval_seconds / 3600.0,
- self.temp_file_max_age_hours,
- self.multipart_max_age_days,
- self.lock_file_max_age_hours,
- self.dry_run,
- )
-
- def stop(self) -> None:
- self._shutdown = True
- if self._timer:
- self._timer.cancel()
- self._timer = None
- logger.info("GC stopped")
-
- def _schedule_next(self) -> None:
- if self._shutdown:
- return
- self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
- self._timer.daemon = True
- self._timer.start()
-
- def _run_cycle(self) -> None:
- if self._shutdown:
- return
- try:
- self.run_now()
- except Exception as e:
- logger.error("GC cycle failed: %s", e)
- finally:
- self._schedule_next()
-
- def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
- if not self._lock.acquire(blocking=False):
- raise RuntimeError("GC is already in progress")
-
- effective_dry_run = dry_run if dry_run is not None else self.dry_run
-
- try:
- self._scanning = True
- self._scan_start_time = time.time()
-
- start = self._scan_start_time
- result = GCResult()
-
- original_dry_run = self.dry_run
- self.dry_run = effective_dry_run
- try:
- self._clean_temp_files(result)
- self._clean_orphaned_multipart(result)
- self._clean_stale_locks(result)
- self._clean_orphaned_metadata(result)
- self._clean_orphaned_versions(result)
- self._clean_empty_dirs(result)
- finally:
- self.dry_run = original_dry_run
-
- result.execution_time_seconds = time.time() - start
-
- if result.has_work or result.errors:
- logger.info(
- "GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
- "locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
- result.execution_time_seconds,
- result.temp_files_deleted,
- result.temp_bytes_freed / (1024 * 1024),
- result.multipart_uploads_deleted,
- result.multipart_bytes_freed / (1024 * 1024),
- result.lock_files_deleted,
- result.orphaned_metadata_deleted,
- result.orphaned_versions_deleted,
- result.orphaned_version_bytes_freed / (1024 * 1024),
- result.empty_dirs_removed,
- len(result.errors),
- " (dry run)" if effective_dry_run else "",
- )
-
- record = GCExecutionRecord(
- timestamp=time.time(),
- result=result.to_dict(),
- dry_run=effective_dry_run,
- )
- self.history_store.add(record)
-
- return result
- finally:
- self._scanning = False
- self._scan_start_time = None
- self._lock.release()
-
- def run_async(self, dry_run: Optional[bool] = None) -> bool:
- if self._scanning:
- return False
- t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
- t.start()
- return True
-
- def _system_path(self) -> Path:
- return self.storage_root / self.SYSTEM_ROOT
-
- def _throttle(self) -> bool:
- if self._shutdown:
- return True
- if self._io_throttle > 0:
- time.sleep(self._io_throttle)
- return self._shutdown
-
- def _list_bucket_names(self) -> List[str]:
- names = []
- try:
- for entry in self.storage_root.iterdir():
- if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
- names.append(entry.name)
- except OSError:
- pass
- return names
-
- def _clean_temp_files(self, result: GCResult) -> None:
- tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
- if not tmp_dir.exists():
- return
- try:
- for entry in tmp_dir.iterdir():
- if self._throttle():
- return
- if not entry.is_file():
- continue
- age = _file_age_hours(entry)
- if age < self.temp_file_max_age_hours:
- continue
- try:
- size = entry.stat().st_size
- if not self.dry_run:
- entry.unlink()
- result.temp_files_deleted += 1
- result.temp_bytes_freed += size
- except OSError as e:
- result.errors.append(f"temp file {entry.name}: {e}")
- except OSError as e:
- result.errors.append(f"scan tmp dir: {e}")
-
- def _clean_orphaned_multipart(self, result: GCResult) -> None:
- cutoff_hours = self.multipart_max_age_days * 24.0
- bucket_names = self._list_bucket_names()
-
- for bucket_name in bucket_names:
- if self._shutdown:
- return
- for multipart_root in (
- self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
- self.storage_root / bucket_name / ".multipart",
- ):
- if not multipart_root.exists():
- continue
- try:
- for upload_dir in multipart_root.iterdir():
- if self._throttle():
- return
- if not upload_dir.is_dir():
- continue
- self._maybe_clean_upload(upload_dir, cutoff_hours, result)
- except OSError as e:
- result.errors.append(f"scan multipart {bucket_name}: {e}")
-
- def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
- manifest_path = upload_dir / "manifest.json"
- age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
-
- if age < cutoff_hours:
- return
-
- dir_bytes = _dir_size(upload_dir)
- try:
- if not self.dry_run:
- shutil.rmtree(upload_dir, ignore_errors=True)
- result.multipart_uploads_deleted += 1
- result.multipart_bytes_freed += dir_bytes
- except OSError as e:
- result.errors.append(f"multipart {upload_dir.name}: {e}")
-
- def _clean_stale_locks(self, result: GCResult) -> None:
- buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
- if not buckets_root.exists():
- return
-
- try:
- for bucket_dir in buckets_root.iterdir():
- if self._shutdown:
- return
- if not bucket_dir.is_dir():
- continue
- locks_dir = bucket_dir / "locks"
- if not locks_dir.exists():
- continue
- try:
- for lock_file in locks_dir.iterdir():
- if self._throttle():
- return
- if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
- continue
- age = _file_age_hours(lock_file)
- if age < self.lock_file_max_age_hours:
- continue
- try:
- if not self.dry_run:
- lock_file.unlink(missing_ok=True)
- result.lock_files_deleted += 1
- except OSError as e:
- result.errors.append(f"lock {lock_file.name}: {e}")
- except OSError as e:
- result.errors.append(f"scan locks {bucket_dir.name}: {e}")
- except OSError as e:
- result.errors.append(f"scan buckets for locks: {e}")
-
- def _clean_orphaned_metadata(self, result: GCResult) -> None:
- bucket_names = self._list_bucket_names()
-
- for bucket_name in bucket_names:
- if self._shutdown:
- return
- legacy_meta = self.storage_root / bucket_name / ".meta"
- if legacy_meta.exists():
- self._clean_legacy_metadata(bucket_name, legacy_meta, result)
-
- new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
- if new_meta.exists():
- self._clean_index_metadata(bucket_name, new_meta, result)
-
- def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
- bucket_path = self.storage_root / bucket_name
- try:
- for meta_file in meta_root.rglob("*.meta.json"):
- if self._throttle():
- return
- if not meta_file.is_file():
- continue
- try:
- rel = meta_file.relative_to(meta_root)
- object_key = rel.as_posix().removesuffix(".meta.json")
- object_path = bucket_path / object_key
- if not object_path.exists():
- if not self.dry_run:
- meta_file.unlink(missing_ok=True)
- result.orphaned_metadata_deleted += 1
- except (OSError, ValueError) as e:
- result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
- except OSError as e:
- result.errors.append(f"scan legacy meta {bucket_name}: {e}")
-
- def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
- bucket_path = self.storage_root / bucket_name
- try:
- for index_file in meta_root.rglob("_index.json"):
- if self._throttle():
- return
- if not index_file.is_file():
- continue
- try:
- with open(index_file, "r", encoding="utf-8") as f:
- index_data = json.load(f)
- except (OSError, json.JSONDecodeError):
- continue
-
- keys_to_remove = []
- for key in index_data:
- rel_dir = index_file.parent.relative_to(meta_root)
- if rel_dir == Path("."):
- full_key = key
- else:
- full_key = rel_dir.as_posix() + "/" + key
- object_path = bucket_path / full_key
- if not object_path.exists():
- keys_to_remove.append(key)
-
- if keys_to_remove:
- if not self.dry_run:
- for k in keys_to_remove:
- index_data.pop(k, None)
- if index_data:
- try:
- with open(index_file, "w", encoding="utf-8") as f:
- json.dump(index_data, f)
- except OSError as e:
- result.errors.append(f"write index {bucket_name}: {e}")
- continue
- else:
- try:
- index_file.unlink(missing_ok=True)
- except OSError:
- pass
- result.orphaned_metadata_deleted += len(keys_to_remove)
- except OSError as e:
- result.errors.append(f"scan index meta {bucket_name}: {e}")
-
- def _clean_orphaned_versions(self, result: GCResult) -> None:
- bucket_names = self._list_bucket_names()
-
- for bucket_name in bucket_names:
- if self._shutdown:
- return
- bucket_path = self.storage_root / bucket_name
- for versions_root in (
- self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
- self.storage_root / bucket_name / ".versions",
- ):
- if not versions_root.exists():
- continue
- try:
- for key_dir in versions_root.iterdir():
- if self._throttle():
- return
- if not key_dir.is_dir():
- continue
- self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
- except OSError as e:
- result.errors.append(f"scan versions {bucket_name}: {e}")
-
- def _clean_versions_for_key(
- self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
- ) -> None:
- try:
- rel = key_dir.relative_to(versions_root)
- except ValueError:
- return
-
- object_path = bucket_path / rel
- if object_path.exists():
- return
-
- version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
- if not version_files:
- return
-
- for vf in version_files:
- try:
- size = vf.stat().st_size if vf.suffix == ".bin" else 0
- if not self.dry_run:
- vf.unlink(missing_ok=True)
- if vf.suffix == ".bin":
- result.orphaned_version_bytes_freed += size
- result.orphaned_versions_deleted += 1
- except OSError as e:
- result.errors.append(f"version file {vf.name}: {e}")
-
- def _clean_empty_dirs(self, result: GCResult) -> None:
- targets = [
- self._system_path() / self.SYSTEM_TMP_DIR,
- self._system_path() / self.SYSTEM_MULTIPART_DIR,
- self._system_path() / self.SYSTEM_BUCKETS_DIR,
- ]
- for bucket_name in self._list_bucket_names():
- targets.append(self.storage_root / bucket_name / ".meta")
- targets.append(self.storage_root / bucket_name / ".versions")
- targets.append(self.storage_root / bucket_name / ".multipart")
-
- for root in targets:
- if not root.exists():
- continue
- self._remove_empty_dirs_recursive(root, root, result)
-
- def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
- if self._shutdown:
- return False
- if not path.is_dir():
- return False
-
- try:
- children = list(path.iterdir())
- except OSError:
- return False
-
- all_empty = True
- for child in children:
- if self._throttle():
- return False
- if child.is_dir():
- if not self._remove_empty_dirs_recursive(child, stop_at, result):
- all_empty = False
- else:
- all_empty = False
-
- if all_empty and path != stop_at:
- try:
- if not self.dry_run:
- path.rmdir()
- result.empty_dirs_removed += 1
- return True
- except OSError:
- return False
- return all_empty
-
- def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
- records = self.history_store.get_history(limit, offset)
- return [r.to_dict() for r in records]
-
- def get_status(self) -> dict:
- status: Dict[str, Any] = {
- "enabled": not self._shutdown or self._timer is not None,
- "running": self._timer is not None and not self._shutdown,
- "scanning": self._scanning,
- "interval_hours": self.interval_seconds / 3600.0,
- "temp_file_max_age_hours": self.temp_file_max_age_hours,
- "multipart_max_age_days": self.multipart_max_age_days,
- "lock_file_max_age_hours": self.lock_file_max_age_hours,
- "dry_run": self.dry_run,
- "io_throttle_ms": round(self._io_throttle * 1000),
- }
- if self._scanning and self._scan_start_time:
- status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
- return status
diff --git a/python/app/iam.py b/python/app/iam.py
deleted file mode 100644
index 7e8f3fa..0000000
--- a/python/app/iam.py
+++ /dev/null
@@ -1,1095 +0,0 @@
-from __future__ import annotations
-
-import base64
-import hashlib
-import hmac
-import json
-import math
-import os
-import secrets
-import threading
-import time
-from collections import deque
-from dataclasses import dataclass, field
-from datetime import datetime, timedelta, timezone
-from pathlib import Path
-from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
-
-from cryptography.fernet import Fernet, InvalidToken
-
-
-class IamError(RuntimeError):
- """Raised when authentication or authorization fails."""
-
-
-S3_ACTIONS = {
- "list", "read", "write", "delete", "share", "policy",
- "replication", "lifecycle", "cors",
- "create_bucket", "delete_bucket",
- "versioning", "tagging", "encryption", "quota",
- "object_lock", "notification", "logging", "website",
-}
-IAM_ACTIONS = {
- "iam:list_users",
- "iam:create_user",
- "iam:delete_user",
- "iam:rotate_key",
- "iam:update_policy",
- "iam:create_key",
- "iam:delete_key",
- "iam:get_user",
- "iam:get_policy",
- "iam:disable_user",
-}
-ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"}
-
-_V1_IMPLIED_ACTIONS = {
- "write": {"create_bucket"},
- "delete": {"delete_bucket"},
- "policy": {
- "versioning", "tagging", "encryption", "quota",
- "object_lock", "notification", "logging", "website",
- "cors", "lifecycle", "replication", "share",
- },
-}
-
-ACTION_ALIASES = {
- "list": "list",
- "s3:listbucket": "list",
- "s3:listallmybuckets": "list",
- "s3:listbucketversions": "list",
- "s3:listmultipartuploads": "list",
- "s3:listparts": "list",
- "read": "read",
- "s3:getobject": "read",
- "s3:getobjectversion": "read",
- "s3:getobjecttagging": "read",
- "s3:getobjectversiontagging": "read",
- "s3:getobjectacl": "read",
- "s3:headobject": "read",
- "s3:headbucket": "read",
- "write": "write",
- "s3:putobject": "write",
- "s3:putobjecttagging": "write",
- "s3:createmultipartupload": "write",
- "s3:uploadpart": "write",
- "s3:completemultipartupload": "write",
- "s3:abortmultipartupload": "write",
- "s3:copyobject": "write",
- "delete": "delete",
- "s3:deleteobject": "delete",
- "s3:deleteobjectversion": "delete",
- "s3:deleteobjecttagging": "delete",
- "create_bucket": "create_bucket",
- "s3:createbucket": "create_bucket",
- "delete_bucket": "delete_bucket",
- "s3:deletebucket": "delete_bucket",
- "share": "share",
- "s3:putobjectacl": "share",
- "s3:putbucketacl": "share",
- "s3:getbucketacl": "share",
- "policy": "policy",
- "s3:putbucketpolicy": "policy",
- "s3:getbucketpolicy": "policy",
- "s3:deletebucketpolicy": "policy",
- "replication": "replication",
- "s3:getreplicationconfiguration": "replication",
- "s3:putreplicationconfiguration": "replication",
- "s3:deletereplicationconfiguration": "replication",
- "s3:replicateobject": "replication",
- "s3:replicatetags": "replication",
- "s3:replicatedelete": "replication",
- "lifecycle": "lifecycle",
- "s3:getlifecycleconfiguration": "lifecycle",
- "s3:putlifecycleconfiguration": "lifecycle",
- "s3:deletelifecycleconfiguration": "lifecycle",
- "s3:getbucketlifecycle": "lifecycle",
- "s3:putbucketlifecycle": "lifecycle",
- "cors": "cors",
- "s3:getbucketcors": "cors",
- "s3:putbucketcors": "cors",
- "s3:deletebucketcors": "cors",
- "versioning": "versioning",
- "s3:getbucketversioning": "versioning",
- "s3:putbucketversioning": "versioning",
- "tagging": "tagging",
- "s3:getbuckettagging": "tagging",
- "s3:putbuckettagging": "tagging",
- "s3:deletebuckettagging": "tagging",
- "encryption": "encryption",
- "s3:getencryptionconfiguration": "encryption",
- "s3:putencryptionconfiguration": "encryption",
- "s3:deleteencryptionconfiguration": "encryption",
- "quota": "quota",
- "s3:getbucketquota": "quota",
- "s3:putbucketquota": "quota",
- "s3:deletebucketquota": "quota",
- "object_lock": "object_lock",
- "s3:getobjectlockconfiguration": "object_lock",
- "s3:putobjectlockconfiguration": "object_lock",
- "s3:putobjectretention": "object_lock",
- "s3:getobjectretention": "object_lock",
- "s3:putobjectlegalhold": "object_lock",
- "s3:getobjectlegalhold": "object_lock",
- "notification": "notification",
- "s3:getbucketnotificationconfiguration": "notification",
- "s3:putbucketnotificationconfiguration": "notification",
- "s3:deletebucketnotificationconfiguration": "notification",
- "logging": "logging",
- "s3:getbucketlogging": "logging",
- "s3:putbucketlogging": "logging",
- "s3:deletebucketlogging": "logging",
- "website": "website",
- "s3:getbucketwebsite": "website",
- "s3:putbucketwebsite": "website",
- "s3:deletebucketwebsite": "website",
- "iam:listusers": "iam:list_users",
- "iam:createuser": "iam:create_user",
- "iam:deleteuser": "iam:delete_user",
- "iam:rotateaccesskey": "iam:rotate_key",
- "iam:putuserpolicy": "iam:update_policy",
- "iam:createaccesskey": "iam:create_key",
- "iam:deleteaccesskey": "iam:delete_key",
- "iam:getuser": "iam:get_user",
- "iam:getpolicy": "iam:get_policy",
- "iam:disableuser": "iam:disable_user",
- "iam:*": "iam:*",
-}
-
-
-@dataclass
-class Policy:
- bucket: str
- actions: Set[str]
- prefix: str = "*"
-
-
-@dataclass
-class Principal:
- access_key: str
- display_name: str
- policies: List[Policy]
-
-
-def _derive_fernet_key(secret: str) -> bytes:
- raw = hashlib.pbkdf2_hmac("sha256", secret.encode(), b"myfsio-iam-encryption", 100_000)
- return base64.urlsafe_b64encode(raw)
-
-
-_IAM_ENCRYPTED_PREFIX = b"MYFSIO_IAM_ENC:"
-
-_CONFIG_VERSION = 2
-
-
-def _expand_v1_actions(actions: Set[str]) -> Set[str]:
- expanded = set(actions)
- for action, implied in _V1_IMPLIED_ACTIONS.items():
- if action in expanded:
- expanded.update(implied)
- return expanded
-
-
-class IamService:
- """Loads IAM configuration, manages users, and evaluates policies."""
-
- def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15, encryption_key: str | None = None) -> None:
- self.config_path = Path(config_path)
- self.auth_max_attempts = auth_max_attempts
- self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
- self._fernet: Fernet | None = None
- if encryption_key:
- self._fernet = Fernet(_derive_fernet_key(encryption_key))
- self.config_path.parent.mkdir(parents=True, exist_ok=True)
- if not self.config_path.exists():
- self._write_default()
- self._user_records: Dict[str, Dict[str, Any]] = {}
- self._key_index: Dict[str, str] = {}
- self._key_secrets: Dict[str, str] = {}
- self._key_status: Dict[str, str] = {}
- self._raw_config: Dict[str, Any] = {}
- self._failed_attempts: Dict[str, Deque[datetime]] = {}
- self._last_load_time = 0.0
- self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
- self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
- self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
- self._last_stat_check = 0.0
- self._stat_check_interval = float(os.environ.get("IAM_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
- self._sessions: Dict[str, Dict[str, Any]] = {}
- self._session_lock = threading.Lock()
- self._load()
- self._load_lockout_state()
-
- def _maybe_reload(self) -> None:
- now = time.time()
- if now - self._last_stat_check < self._stat_check_interval:
- return
- self._last_stat_check = now
- try:
- if self.config_path.stat().st_mtime > self._last_load_time:
- self._load()
- self._principal_cache.clear()
- self._secret_key_cache.clear()
- except OSError:
- pass
-
- def _check_expiry(self, access_key: str, record: Dict[str, Any]) -> None:
- expires_at = record.get("expires_at")
- if not expires_at:
- return
- try:
- exp_dt = datetime.fromisoformat(expires_at)
- if exp_dt.tzinfo is None:
- exp_dt = exp_dt.replace(tzinfo=timezone.utc)
- if datetime.now(timezone.utc) >= exp_dt:
- raise IamError(f"Credentials for '{access_key}' have expired")
- except (ValueError, TypeError):
- pass
-
- def authenticate(self, access_key: str, secret_key: str) -> Principal:
- self._maybe_reload()
- access_key = (access_key or "").strip()
- secret_key = (secret_key or "").strip()
- if not access_key or not secret_key:
- raise IamError("Missing access credentials")
- if self._is_locked_out(access_key):
- seconds = self._seconds_until_unlock(access_key)
- raise IamError(
- f"Access temporarily locked. Try again in {seconds} seconds."
- )
- user_id = self._key_index.get(access_key)
- stored_secret = self._key_secrets.get(access_key, secrets.token_urlsafe(24))
- if not user_id or not hmac.compare_digest(stored_secret, secret_key):
- self._record_failed_attempt(access_key)
- raise IamError("Invalid credentials")
- key_status = self._key_status.get(access_key, "active")
- if key_status != "active":
- raise IamError("Access key is inactive")
- record = self._user_records.get(user_id)
- if not record:
- self._record_failed_attempt(access_key)
- raise IamError("Invalid credentials")
- if not record.get("enabled", True):
- raise IamError("User account is disabled")
- self._check_expiry(access_key, record)
- self._clear_failed_attempts(access_key)
- return self._build_principal(access_key, record)
-
- _MAX_LOCKOUT_KEYS = 10000
-
- def _record_failed_attempt(self, access_key: str) -> None:
- if not access_key:
- return
- if access_key not in self._failed_attempts and len(self._failed_attempts) >= self._MAX_LOCKOUT_KEYS:
- oldest_key = min(self._failed_attempts, key=lambda k: self._failed_attempts[k][0] if self._failed_attempts[k] else datetime.min.replace(tzinfo=timezone.utc))
- del self._failed_attempts[oldest_key]
- attempts = self._failed_attempts.setdefault(access_key, deque())
- self._prune_attempts(attempts)
- attempts.append(datetime.now(timezone.utc))
- self._save_lockout_state()
-
- def _clear_failed_attempts(self, access_key: str) -> None:
- if not access_key:
- return
- if self._failed_attempts.pop(access_key, None) is not None:
- self._save_lockout_state()
-
- def _lockout_file(self) -> Path:
- return self.config_path.parent / "lockout_state.json"
-
- def _load_lockout_state(self) -> None:
- try:
- if self._lockout_file().exists():
- data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
- cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
- for key, timestamps in data.get("failed_attempts", {}).items():
- valid = []
- for ts in timestamps:
- try:
- dt = datetime.fromisoformat(ts)
- if dt > cutoff:
- valid.append(dt)
- except (ValueError, TypeError):
- continue
- if valid:
- self._failed_attempts[key] = deque(valid)
- except (OSError, json.JSONDecodeError):
- pass
-
- def _save_lockout_state(self) -> None:
- data: Dict[str, Any] = {"failed_attempts": {}}
- for key, attempts in self._failed_attempts.items():
- data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
- try:
- self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
- except OSError:
- pass
-
- def _prune_attempts(self, attempts: Deque[datetime]) -> None:
- cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
- while attempts and attempts[0] < cutoff:
- attempts.popleft()
-
- def _is_locked_out(self, access_key: str) -> bool:
- if not access_key:
- return False
- attempts = self._failed_attempts.get(access_key)
- if not attempts:
- return False
- self._prune_attempts(attempts)
- return len(attempts) >= self.auth_max_attempts
-
- def _seconds_until_unlock(self, access_key: str) -> int:
- attempts = self._failed_attempts.get(access_key)
- if not attempts:
- return 0
- self._prune_attempts(attempts)
- if len(attempts) < self.auth_max_attempts:
- return 0
- oldest = attempts[0]
- elapsed = (datetime.now(timezone.utc) - oldest).total_seconds()
- return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
-
- def create_session_token(self, access_key: str, duration_seconds: int = 3600) -> str:
- self._maybe_reload()
- user_id = self._key_index.get(access_key)
- if not user_id or user_id not in self._user_records:
- raise IamError("Unknown access key")
- self._cleanup_expired_sessions()
- token = secrets.token_urlsafe(32)
- expires_at = time.time() + duration_seconds
- self._sessions[token] = {
- "access_key": access_key,
- "expires_at": expires_at,
- }
- return token
-
- def validate_session_token(self, access_key: str, session_token: str) -> bool:
- dummy_key = secrets.token_urlsafe(16)
- dummy_token = secrets.token_urlsafe(32)
- with self._session_lock:
- session = self._sessions.get(session_token)
- if not session:
- hmac.compare_digest(access_key, dummy_key)
- hmac.compare_digest(session_token, dummy_token)
- return False
- key_match = hmac.compare_digest(session["access_key"], access_key)
- if not key_match:
- hmac.compare_digest(session_token, dummy_token)
- return False
- if time.time() > session["expires_at"]:
- self._sessions.pop(session_token, None)
- return False
- return True
-
- def _cleanup_expired_sessions(self) -> None:
- now = time.time()
- expired = [token for token, data in self._sessions.items() if now > data["expires_at"]]
- for token in expired:
- del self._sessions[token]
-
- def principal_for_key(self, access_key: str) -> Principal:
- now = time.time()
- cached = self._principal_cache.get(access_key)
- if cached:
- principal, cached_time = cached
- if now - cached_time < self._cache_ttl:
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- self._enforce_key_and_user_status(access_key)
- return principal
-
- self._maybe_reload()
- self._enforce_key_and_user_status(access_key)
- user_id = self._key_index.get(access_key)
- if not user_id:
- raise IamError("Unknown access key")
- record = self._user_records.get(user_id)
- if not record:
- raise IamError("Unknown access key")
- self._check_expiry(access_key, record)
- principal = self._build_principal(access_key, record)
- self._principal_cache[access_key] = (principal, now)
- return principal
-
- def secret_for_key(self, access_key: str) -> str:
- self._maybe_reload()
- self._enforce_key_and_user_status(access_key)
- secret = self._key_secrets.get(access_key)
- if not secret:
- raise IamError("Unknown access key")
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- return secret
-
- def authorize(self, principal: Principal, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None:
- action = self._normalize_action(action)
- if action not in ALLOWED_ACTIONS:
- raise IamError(f"Unknown action '{action}'")
- bucket_name = bucket_name or "*"
- normalized = bucket_name.lower() if bucket_name != "*" else bucket_name
- if not self._is_allowed(principal, normalized, action, object_key=object_key):
- raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
-
- def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str], *, object_key: str | None = None) -> Dict[str, bool]:
- self._maybe_reload()
- bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
- normalized_actions = {a: self._normalize_action(a) for a in actions}
- results: Dict[str, bool] = {}
- for original, canonical in normalized_actions.items():
- if canonical not in ALLOWED_ACTIONS:
- results[original] = False
- else:
- results[original] = self._is_allowed(principal, bucket_name, canonical, object_key=object_key)
- return results
-
- def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
- return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
-
- def _is_allowed(self, principal: Principal, bucket_name: str, action: str, *, object_key: str | None = None) -> bool:
- bucket_name = bucket_name.lower()
- for policy in principal.policies:
- if policy.bucket not in {"*", bucket_name}:
- continue
- action_match = "*" in policy.actions or action in policy.actions
- if not action_match and "iam:*" in policy.actions and action.startswith("iam:"):
- action_match = True
- if not action_match:
- continue
- if object_key is not None and policy.prefix != "*":
- prefix = policy.prefix.rstrip("*")
- if not object_key.startswith(prefix):
- continue
- return True
- return False
-
- def list_users(self) -> List[Dict[str, Any]]:
- listing: List[Dict[str, Any]] = []
- for user_id, record in self._user_records.items():
- access_keys = []
- for key_info in record.get("access_keys", []):
- access_keys.append({
- "access_key": key_info["access_key"],
- "status": key_info.get("status", "active"),
- "created_at": key_info.get("created_at"),
- })
- user_entry: Dict[str, Any] = {
- "user_id": user_id,
- "display_name": record["display_name"],
- "enabled": record.get("enabled", True),
- "expires_at": record.get("expires_at"),
- "access_keys": access_keys,
- "policies": [
- {**{"bucket": policy.bucket, "actions": sorted(policy.actions)}, **({"prefix": policy.prefix} if policy.prefix != "*" else {})}
- for policy in record["policies"]
- ],
- }
- if access_keys:
- user_entry["access_key"] = access_keys[0]["access_key"]
- listing.append(user_entry)
- return listing
-
- def create_user(
- self,
- *,
- display_name: str,
- policies: Optional[Sequence[Dict[str, Any]]] = None,
- access_key: str | None = None,
- secret_key: str | None = None,
- expires_at: str | None = None,
- user_id: str | None = None,
- ) -> Dict[str, str]:
- access_key = (access_key or self._generate_access_key()).strip()
- if not access_key:
- raise IamError("Access key cannot be empty")
- if access_key in self._key_index:
- raise IamError("Access key already exists")
- if expires_at:
- self._validate_expires_at(expires_at)
- secret_key = secret_key or self._generate_secret_key()
- sanitized_policies = self._prepare_policy_payload(policies)
- user_id = user_id or self._generate_user_id()
- if user_id in self._user_records:
- raise IamError("User ID already exists")
- now_iso = datetime.now(timezone.utc).isoformat()
- record: Dict[str, Any] = {
- "user_id": user_id,
- "display_name": display_name or access_key,
- "enabled": True,
- "access_keys": [
- {
- "access_key": access_key,
- "secret_key": secret_key,
- "status": "active",
- "created_at": now_iso,
- }
- ],
- "policies": sanitized_policies,
- }
- if expires_at:
- record["expires_at"] = expires_at
- self._raw_config.setdefault("users", []).append(record)
- self._save()
- self._load()
- return {"user_id": user_id, "access_key": access_key, "secret_key": secret_key}
-
- def create_access_key(self, identifier: str) -> Dict[str, str]:
- user_raw, _ = self._resolve_raw_user(identifier)
- new_access_key = self._generate_access_key()
- new_secret_key = self._generate_secret_key()
- now_iso = datetime.now(timezone.utc).isoformat()
- key_entry = {
- "access_key": new_access_key,
- "secret_key": new_secret_key,
- "status": "active",
- "created_at": now_iso,
- }
- user_raw.setdefault("access_keys", []).append(key_entry)
- self._save()
- self._load()
- return {"access_key": new_access_key, "secret_key": new_secret_key}
-
- def delete_access_key(self, access_key: str) -> None:
- user_raw, _ = self._resolve_raw_user(access_key)
- keys = user_raw.get("access_keys", [])
- if len(keys) <= 1:
- raise IamError("Cannot delete the only access key for a user")
- remaining = [k for k in keys if k["access_key"] != access_key]
- if len(remaining) == len(keys):
- raise IamError("Access key not found")
- user_raw["access_keys"] = remaining
- self._save()
- self._principal_cache.pop(access_key, None)
- self._secret_key_cache.pop(access_key, None)
- from .s3_api import clear_signing_key_cache
- clear_signing_key_cache()
- self._load()
-
- def disable_user(self, identifier: str) -> None:
- user_raw, _ = self._resolve_raw_user(identifier)
- user_raw["enabled"] = False
- self._save()
- for key_info in user_raw.get("access_keys", []):
- ak = key_info["access_key"]
- self._principal_cache.pop(ak, None)
- self._secret_key_cache.pop(ak, None)
- from .s3_api import clear_signing_key_cache
- clear_signing_key_cache()
- self._load()
-
- def enable_user(self, identifier: str) -> None:
- user_raw, _ = self._resolve_raw_user(identifier)
- user_raw["enabled"] = True
- self._save()
- self._load()
-
- def get_user_by_id(self, user_id: str) -> Dict[str, Any]:
- record = self._user_records.get(user_id)
- if not record:
- raise IamError("User not found")
- access_keys = []
- for key_info in record.get("access_keys", []):
- access_keys.append({
- "access_key": key_info["access_key"],
- "status": key_info.get("status", "active"),
- "created_at": key_info.get("created_at"),
- })
- return {
- "user_id": user_id,
- "display_name": record["display_name"],
- "enabled": record.get("enabled", True),
- "expires_at": record.get("expires_at"),
- "access_keys": access_keys,
- "policies": [
- {"bucket": p.bucket, "actions": sorted(p.actions), "prefix": p.prefix}
- for p in record["policies"]
- ],
- }
-
- def get_user_policies(self, identifier: str) -> List[Dict[str, Any]]:
- _, user_id = self._resolve_raw_user(identifier)
- record = self._user_records.get(user_id)
- if not record:
- raise IamError("User not found")
- return [
- {**{"bucket": p.bucket, "actions": sorted(p.actions)}, **({"prefix": p.prefix} if p.prefix != "*" else {})}
- for p in record["policies"]
- ]
-
- def resolve_user_id(self, identifier: str) -> str:
- if identifier in self._user_records:
- return identifier
- user_id = self._key_index.get(identifier)
- if user_id:
- return user_id
- raise IamError("User not found")
-
- def rotate_secret(self, access_key: str) -> str:
- user_raw, _ = self._resolve_raw_user(access_key)
- new_secret = self._generate_secret_key()
- for key_info in user_raw.get("access_keys", []):
- if key_info["access_key"] == access_key:
- key_info["secret_key"] = new_secret
- break
- else:
- raise IamError("Access key not found")
- self._save()
- self._principal_cache.pop(access_key, None)
- self._secret_key_cache.pop(access_key, None)
- from .s3_api import clear_signing_key_cache
- clear_signing_key_cache()
- self._load()
- return new_secret
-
- def update_user(self, access_key: str, display_name: str) -> None:
- user_raw, _ = self._resolve_raw_user(access_key)
- user_raw["display_name"] = display_name
- self._save()
- self._load()
-
- def delete_user(self, access_key: str) -> None:
- users = self._raw_config.get("users", [])
- if len(users) <= 1:
- raise IamError("Cannot delete the only user")
- _, target_user_id = self._resolve_raw_user(access_key)
- target_user_raw = None
- remaining = []
- for u in users:
- if u.get("user_id") == target_user_id:
- target_user_raw = u
- else:
- remaining.append(u)
- if target_user_raw is None:
- raise IamError("User not found")
- self._raw_config["users"] = remaining
- self._save()
- for key_info in target_user_raw.get("access_keys", []):
- ak = key_info["access_key"]
- self._principal_cache.pop(ak, None)
- self._secret_key_cache.pop(ak, None)
- from .s3_api import clear_signing_key_cache
- clear_signing_key_cache()
- self._load()
-
- def update_user_expiry(self, access_key: str, expires_at: str | None) -> None:
- user_raw, _ = self._resolve_raw_user(access_key)
- if expires_at:
- self._validate_expires_at(expires_at)
- user_raw["expires_at"] = expires_at
- else:
- user_raw.pop("expires_at", None)
- self._save()
- for key_info in user_raw.get("access_keys", []):
- ak = key_info["access_key"]
- self._principal_cache.pop(ak, None)
- self._secret_key_cache.pop(ak, None)
- self._load()
-
- def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
- user_raw, _ = self._resolve_raw_user(access_key)
- user_raw["policies"] = self._prepare_policy_payload(policies)
- self._save()
- self._load()
-
- def _decrypt_content(self, raw_bytes: bytes) -> str:
- if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
- if not self._fernet:
- raise IamError("IAM config is encrypted but no encryption key provided. Set SECRET_KEY or use 'python run.py reset-cred'.")
- try:
- encrypted_data = raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]
- return self._fernet.decrypt(encrypted_data).decode("utf-8")
- except InvalidToken:
- raise IamError("Cannot decrypt IAM config. SECRET_KEY may have changed. Use 'python run.py reset-cred' to reset credentials.")
- return raw_bytes.decode("utf-8")
-
- def _is_v2_config(self, raw: Dict[str, Any]) -> bool:
- return raw.get("version", 1) >= _CONFIG_VERSION
-
- def _migrate_v1_to_v2(self, raw: Dict[str, Any]) -> Dict[str, Any]:
- migrated_users = []
- now_iso = datetime.now(timezone.utc).isoformat()
- for user in raw.get("users", []):
- old_policies = user.get("policies", [])
- expanded_policies = []
- for p in old_policies:
- raw_actions = p.get("actions", [])
- if isinstance(raw_actions, str):
- raw_actions = [raw_actions]
- action_set: Set[str] = set()
- for a in raw_actions:
- canonical = self._normalize_action(a)
- if canonical == "*":
- action_set = set(ALLOWED_ACTIONS)
- break
- if canonical:
- action_set.add(canonical)
- action_set = _expand_v1_actions(action_set)
- expanded_policies.append({
- "bucket": p.get("bucket", "*"),
- "actions": sorted(action_set),
- "prefix": p.get("prefix", "*"),
- })
- migrated_user: Dict[str, Any] = {
- "user_id": user["access_key"],
- "display_name": user.get("display_name", user["access_key"]),
- "enabled": True,
- "access_keys": [
- {
- "access_key": user["access_key"],
- "secret_key": user["secret_key"],
- "status": "active",
- "created_at": now_iso,
- }
- ],
- "policies": expanded_policies,
- }
- if user.get("expires_at"):
- migrated_user["expires_at"] = user["expires_at"]
- migrated_users.append(migrated_user)
- return {"version": _CONFIG_VERSION, "users": migrated_users}
-
- def _load(self) -> None:
- try:
- self._last_load_time = self.config_path.stat().st_mtime
- raw_bytes = self.config_path.read_bytes()
- content = self._decrypt_content(raw_bytes)
- raw = json.loads(content)
- except IamError:
- raise
- except FileNotFoundError:
- raise IamError(f"IAM config not found: {self.config_path}")
- except json.JSONDecodeError as e:
- raise IamError(f"Corrupted IAM config (invalid JSON): {e}")
- except PermissionError as e:
- raise IamError(f"Cannot read IAM config (permission denied): {e}")
- except (OSError, ValueError) as e:
- raise IamError(f"Failed to load IAM config: {e}")
-
- was_plaintext = not raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX)
- was_v1 = not self._is_v2_config(raw)
-
- if was_v1:
- raw = self._migrate_v1_to_v2(raw)
-
- user_records: Dict[str, Dict[str, Any]] = {}
- key_index: Dict[str, str] = {}
- key_secrets: Dict[str, str] = {}
- key_status_map: Dict[str, str] = {}
-
- for user in raw.get("users", []):
- user_id = user["user_id"]
- policies = self._build_policy_objects(user.get("policies", []))
- access_keys_raw = user.get("access_keys", [])
- access_keys_info = []
- for key_entry in access_keys_raw:
- ak = key_entry["access_key"]
- sk = key_entry["secret_key"]
- status = key_entry.get("status", "active")
- key_index[ak] = user_id
- key_secrets[ak] = sk
- key_status_map[ak] = status
- access_keys_info.append({
- "access_key": ak,
- "secret_key": sk,
- "status": status,
- "created_at": key_entry.get("created_at"),
- })
- record: Dict[str, Any] = {
- "display_name": user.get("display_name", user_id),
- "enabled": user.get("enabled", True),
- "policies": policies,
- "access_keys": access_keys_info,
- }
- if user.get("expires_at"):
- record["expires_at"] = user["expires_at"]
- user_records[user_id] = record
-
- if not user_records:
- raise IamError("IAM configuration contains no users")
-
- self._user_records = user_records
- self._key_index = key_index
- self._key_secrets = key_secrets
- self._key_status = key_status_map
-
- raw_users: List[Dict[str, Any]] = []
- for user in raw.get("users", []):
- raw_entry: Dict[str, Any] = {
- "user_id": user["user_id"],
- "display_name": user.get("display_name", user["user_id"]),
- "enabled": user.get("enabled", True),
- "access_keys": user.get("access_keys", []),
- "policies": user.get("policies", []),
- }
- if user.get("expires_at"):
- raw_entry["expires_at"] = user["expires_at"]
- raw_users.append(raw_entry)
- self._raw_config = {"version": _CONFIG_VERSION, "users": raw_users}
-
- if was_v1 or (was_plaintext and self._fernet):
- self._save()
-
- def _save(self) -> None:
- try:
- json_text = json.dumps(self._raw_config, indent=2)
- temp_path = self.config_path.with_suffix('.json.tmp')
- if self._fernet:
- encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
- temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
- else:
- temp_path.write_text(json_text, encoding='utf-8')
- temp_path.replace(self.config_path)
- except (OSError, PermissionError) as e:
- raise IamError(f"Cannot save IAM config: {e}")
-
- def config_summary(self) -> Dict[str, Any]:
- return {
- "path": str(self.config_path),
- "user_count": len(self._user_records),
- "allowed_actions": sorted(ALLOWED_ACTIONS),
- }
-
- def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
- payload: Dict[str, Any] = {"version": _CONFIG_VERSION, "users": []}
- for user in self._raw_config.get("users", []):
- access_keys = []
- for key_info in user.get("access_keys", []):
- access_keys.append({
- "access_key": key_info["access_key"],
- "secret_key": "\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022" if mask_secrets else key_info["secret_key"],
- "status": key_info.get("status", "active"),
- "created_at": key_info.get("created_at"),
- })
- record: Dict[str, Any] = {
- "user_id": user["user_id"],
- "display_name": user["display_name"],
- "enabled": user.get("enabled", True),
- "access_keys": access_keys,
- "policies": user["policies"],
- }
- if access_keys:
- record["access_key"] = access_keys[0]["access_key"]
- if user.get("expires_at"):
- record["expires_at"] = user["expires_at"]
- payload["users"].append(record)
- return payload
-
- def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]:
- entries: List[Policy] = []
- for policy in policies:
- bucket = str(policy.get("bucket", "*")).lower()
- prefix = str(policy.get("prefix", "*"))
- raw_actions = policy.get("actions", [])
- if isinstance(raw_actions, str):
- raw_actions = [raw_actions]
- action_set: Set[str] = set()
- for action in raw_actions:
- canonical = self._normalize_action(action)
- if canonical == "*":
- action_set = set(ALLOWED_ACTIONS)
- break
- if canonical:
- action_set.add(canonical)
- if action_set:
- entries.append(Policy(bucket=bucket, actions=action_set, prefix=prefix))
- return entries
-
- def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]:
- if not policies:
- policies = (
- {
- "bucket": "*",
- "actions": ["list", "read", "write", "delete", "share", "policy",
- "create_bucket", "delete_bucket"],
- },
- )
- sanitized: List[Dict[str, Any]] = []
- for policy in policies:
- bucket = str(policy.get("bucket", "*")).lower()
- prefix = str(policy.get("prefix", "*"))
- raw_actions = policy.get("actions", [])
- if isinstance(raw_actions, str):
- raw_actions = [raw_actions]
- action_set: Set[str] = set()
- for action in raw_actions:
- canonical = self._normalize_action(action)
- if canonical == "*":
- action_set = set(ALLOWED_ACTIONS)
- break
- if canonical:
- action_set.add(canonical)
- if not action_set:
- continue
- entry: Dict[str, Any] = {"bucket": bucket, "actions": sorted(action_set)}
- if prefix != "*":
- entry["prefix"] = prefix
- sanitized.append(entry)
- if not sanitized:
- raise IamError("At least one policy with valid actions is required")
- return sanitized
-
- def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal:
- return Principal(
- access_key=access_key,
- display_name=record["display_name"],
- policies=record["policies"],
- )
-
- def _normalize_action(self, action: str) -> str:
- if not action:
- return ""
- lowered = action.strip().lower()
- if lowered == "*":
- return "*"
- candidate = ACTION_ALIASES.get(lowered, lowered)
- return candidate if candidate in ALLOWED_ACTIONS else ""
-
- def _write_default(self) -> None:
- access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
- secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
- custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
- user_id = self._generate_user_id()
- now_iso = datetime.now(timezone.utc).isoformat()
- default = {
- "version": _CONFIG_VERSION,
- "users": [
- {
- "user_id": user_id,
- "display_name": "Local Admin",
- "enabled": True,
- "access_keys": [
- {
- "access_key": access_key,
- "secret_key": secret_key,
- "status": "active",
- "created_at": now_iso,
- }
- ],
- "policies": [
- {"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
- ],
- }
- ]
- }
- json_text = json.dumps(default, indent=2)
- if self._fernet:
- encrypted = self._fernet.encrypt(json_text.encode("utf-8"))
- self.config_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
- else:
- self.config_path.write_text(json_text)
- print(f"\n{'='*60}")
- print("MYFSIO FIRST RUN - ADMIN CREDENTIALS")
- print(f"{'='*60}")
- if custom_keys:
- print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
- print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
- else:
- print(f"Access Key: {access_key}")
- print(f"Secret Key: {secret_key}")
- print(f"User ID: {user_id}")
- print(f"{'='*60}")
- if self._fernet:
- print("IAM config is encrypted at rest.")
- print("Lost credentials? Run: python run.py reset-cred")
- else:
- print(f"Missed this? Check: {self.config_path}")
- print(f"{'='*60}\n")
-
- def _validate_expires_at(self, expires_at: str) -> None:
- try:
- dt = datetime.fromisoformat(expires_at)
- if dt.tzinfo is None:
- dt = dt.replace(tzinfo=timezone.utc)
- except (ValueError, TypeError):
- raise IamError(f"Invalid expires_at format: {expires_at}. Use ISO 8601 (e.g. 2026-12-31T23:59:59Z)")
-
- def _generate_access_key(self) -> str:
- return secrets.token_hex(8)
-
- def _generate_secret_key(self) -> str:
- return secrets.token_urlsafe(24)
-
- def _generate_user_id(self) -> str:
- return f"u-{secrets.token_hex(8)}"
-
- def _resolve_raw_user(self, identifier: str) -> Tuple[Dict[str, Any], str]:
- for user in self._raw_config.get("users", []):
- if user.get("user_id") == identifier:
- return user, identifier
- for user in self._raw_config.get("users", []):
- for key_info in user.get("access_keys", []):
- if key_info["access_key"] == identifier:
- return user, user["user_id"]
- raise IamError("User not found")
-
- def _get_raw_user(self, access_key: str) -> Dict[str, Any]:
- user, _ = self._resolve_raw_user(access_key)
- return user
-
- def _enforce_key_and_user_status(self, access_key: str) -> None:
- key_status = self._key_status.get(access_key, "active")
- if key_status != "active":
- raise IamError("Access key is inactive")
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record and not record.get("enabled", True):
- raise IamError("User account is disabled")
-
- def get_secret_key(self, access_key: str) -> str | None:
- now = time.time()
- cached = self._secret_key_cache.get(access_key)
- if cached:
- secret_key, cached_time = cached
- if now - cached_time < self._cache_ttl:
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- self._enforce_key_and_user_status(access_key)
- return secret_key
-
- self._maybe_reload()
- secret = self._key_secrets.get(access_key)
- if secret:
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- self._enforce_key_and_user_status(access_key)
- self._secret_key_cache[access_key] = (secret, now)
- return secret
- return None
-
- def get_principal(self, access_key: str) -> Principal | None:
- now = time.time()
- cached = self._principal_cache.get(access_key)
- if cached:
- principal, cached_time = cached
- if now - cached_time < self._cache_ttl:
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- self._enforce_key_and_user_status(access_key)
- return principal
-
- self._maybe_reload()
- self._enforce_key_and_user_status(access_key)
- user_id = self._key_index.get(access_key)
- if user_id:
- record = self._user_records.get(user_id)
- if record:
- self._check_expiry(access_key, record)
- principal = self._build_principal(access_key, record)
- self._principal_cache[access_key] = (principal, now)
- return principal
- return None
diff --git a/python/app/integrity.py b/python/app/integrity.py
deleted file mode 100644
index 2ca3eb5..0000000
--- a/python/app/integrity.py
+++ /dev/null
@@ -1,995 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-import os
-import threading
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-try:
- import myfsio_core as _rc
- if not hasattr(_rc, "md5_file"):
- raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
- _HAS_RUST = True
-except ImportError:
- _HAS_RUST = False
-
-logger = logging.getLogger(__name__)
-
-
-def _compute_etag(path: Path) -> str:
- if _HAS_RUST:
- return _rc.md5_file(str(path))
- checksum = hashlib.md5()
- with path.open("rb") as handle:
- for chunk in iter(lambda: handle.read(8192), b""):
- checksum.update(chunk)
- return checksum.hexdigest()
-
-
-@dataclass
-class IntegrityIssue:
- issue_type: str
- bucket: str
- key: str
- detail: str
- healed: bool = False
- heal_action: str = ""
-
- def to_dict(self) -> dict:
- return {
- "issue_type": self.issue_type,
- "bucket": self.bucket,
- "key": self.key,
- "detail": self.detail,
- "healed": self.healed,
- "heal_action": self.heal_action,
- }
-
-
-@dataclass
-class IntegrityResult:
- corrupted_objects: int = 0
- orphaned_objects: int = 0
- phantom_metadata: int = 0
- stale_versions: int = 0
- etag_cache_inconsistencies: int = 0
- legacy_metadata_drifts: int = 0
- issues_healed: int = 0
- issues: List[IntegrityIssue] = field(default_factory=list)
- errors: List[str] = field(default_factory=list)
- objects_scanned: int = 0
- buckets_scanned: int = 0
- execution_time_seconds: float = 0.0
-
- def to_dict(self) -> dict:
- return {
- "corrupted_objects": self.corrupted_objects,
- "orphaned_objects": self.orphaned_objects,
- "phantom_metadata": self.phantom_metadata,
- "stale_versions": self.stale_versions,
- "etag_cache_inconsistencies": self.etag_cache_inconsistencies,
- "legacy_metadata_drifts": self.legacy_metadata_drifts,
- "issues_healed": self.issues_healed,
- "issues": [i.to_dict() for i in self.issues],
- "errors": self.errors,
- "objects_scanned": self.objects_scanned,
- "buckets_scanned": self.buckets_scanned,
- "execution_time_seconds": self.execution_time_seconds,
- }
-
- @property
- def total_issues(self) -> int:
- return (
- self.corrupted_objects
- + self.orphaned_objects
- + self.phantom_metadata
- + self.stale_versions
- + self.etag_cache_inconsistencies
- + self.legacy_metadata_drifts
- )
-
- @property
- def has_issues(self) -> bool:
- return self.total_issues > 0
-
-
-@dataclass
-class IntegrityExecutionRecord:
- timestamp: float
- result: dict
- dry_run: bool
- auto_heal: bool
-
- def to_dict(self) -> dict:
- return {
- "timestamp": self.timestamp,
- "result": self.result,
- "dry_run": self.dry_run,
- "auto_heal": self.auto_heal,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
- return cls(
- timestamp=data["timestamp"],
- result=data["result"],
- dry_run=data.get("dry_run", False),
- auto_heal=data.get("auto_heal", False),
- )
-
-
-class IntegrityHistoryStore:
- def __init__(self, storage_root: Path, max_records: int = 50) -> None:
- self.storage_root = storage_root
- self.max_records = max_records
- self._lock = threading.Lock()
-
- def _get_path(self) -> Path:
- return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
-
- def load(self) -> List[IntegrityExecutionRecord]:
- path = self._get_path()
- if not path.exists():
- return []
- try:
- with open(path, "r", encoding="utf-8") as f:
- data = json.load(f)
- return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
- except (OSError, ValueError, KeyError) as e:
- logger.error("Failed to load integrity history: %s", e)
- return []
-
- def save(self, records: List[IntegrityExecutionRecord]) -> None:
- path = self._get_path()
- path.parent.mkdir(parents=True, exist_ok=True)
- data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
- try:
- with open(path, "w", encoding="utf-8") as f:
- json.dump(data, f, indent=2)
- except OSError as e:
- logger.error("Failed to save integrity history: %s", e)
-
- def add(self, record: IntegrityExecutionRecord) -> None:
- with self._lock:
- records = self.load()
- records.insert(0, record)
- self.save(records)
-
- def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
- return self.load()[offset : offset + limit]
-
-
-class IntegrityCursorStore:
- def __init__(self, storage_root: Path) -> None:
- self.storage_root = storage_root
- self._lock = threading.Lock()
-
- def _get_path(self) -> Path:
- return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
-
- def load(self) -> Dict[str, Any]:
- path = self._get_path()
- if not path.exists():
- return {"buckets": {}}
- try:
- with open(path, "r", encoding="utf-8") as f:
- data = json.load(f)
- if not isinstance(data.get("buckets"), dict):
- return {"buckets": {}}
- return data
- except (OSError, ValueError, KeyError):
- return {"buckets": {}}
-
- def save(self, data: Dict[str, Any]) -> None:
- path = self._get_path()
- path.parent.mkdir(parents=True, exist_ok=True)
- try:
- with open(path, "w", encoding="utf-8") as f:
- json.dump(data, f, indent=2)
- except OSError as e:
- logger.error("Failed to save integrity cursor: %s", e)
-
- def update_bucket(
- self,
- bucket_name: str,
- timestamp: float,
- last_key: Optional[str] = None,
- completed: bool = False,
- ) -> None:
- with self._lock:
- data = self.load()
- entry = data["buckets"].get(bucket_name, {})
- if completed:
- entry["last_scanned"] = timestamp
- entry.pop("last_key", None)
- entry["completed"] = True
- else:
- entry["last_scanned"] = timestamp
- if last_key is not None:
- entry["last_key"] = last_key
- entry["completed"] = False
- data["buckets"][bucket_name] = entry
- self.save(data)
-
- def clean_stale(self, existing_buckets: List[str]) -> None:
- with self._lock:
- data = self.load()
- existing_set = set(existing_buckets)
- stale_keys = [k for k in data["buckets"] if k not in existing_set]
- if stale_keys:
- for k in stale_keys:
- del data["buckets"][k]
- self.save(data)
-
- def get_last_key(self, bucket_name: str) -> Optional[str]:
- data = self.load()
- entry = data.get("buckets", {}).get(bucket_name)
- if entry is None:
- return None
- return entry.get("last_key")
-
- def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
- data = self.load()
- buckets_info = data.get("buckets", {})
-
- incomplete = []
- complete = []
- for name in bucket_names:
- entry = buckets_info.get(name)
- if entry is None:
- incomplete.append((name, 0.0))
- elif entry.get("last_key") is not None:
- incomplete.append((name, entry.get("last_scanned", 0.0)))
- else:
- complete.append((name, entry.get("last_scanned", 0.0)))
-
- incomplete.sort(key=lambda x: x[1])
- complete.sort(key=lambda x: x[1])
-
- return [n for n, _ in incomplete] + [n for n, _ in complete]
-
- def get_info(self) -> Dict[str, Any]:
- data = self.load()
- buckets = data.get("buckets", {})
- return {
- "tracked_buckets": len(buckets),
- "buckets": {
- name: {
- "last_scanned": info.get("last_scanned"),
- "last_key": info.get("last_key"),
- "completed": info.get("completed", False),
- }
- for name, info in buckets.items()
- },
- }
-
-
-MAX_ISSUES = 500
-
-
-class IntegrityChecker:
- SYSTEM_ROOT = ".myfsio.sys"
- SYSTEM_BUCKETS_DIR = "buckets"
- BUCKET_META_DIR = "meta"
- BUCKET_VERSIONS_DIR = "versions"
- INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
-
- def __init__(
- self,
- storage_root: Path,
- interval_hours: float = 24.0,
- batch_size: int = 1000,
- auto_heal: bool = False,
- dry_run: bool = False,
- max_history: int = 50,
- io_throttle_ms: int = 10,
- ) -> None:
- self.storage_root = Path(storage_root)
- self.interval_seconds = interval_hours * 3600.0
- self.batch_size = batch_size
- self.auto_heal = auto_heal
- self.dry_run = dry_run
- self._timer: Optional[threading.Timer] = None
- self._shutdown = False
- self._lock = threading.Lock()
- self._scanning = False
- self._scan_start_time: Optional[float] = None
- self._io_throttle = max(0, io_throttle_ms) / 1000.0
- self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
- self.cursor_store = IntegrityCursorStore(self.storage_root)
-
- def start(self) -> None:
- if self._timer is not None:
- return
- self._shutdown = False
- self._schedule_next()
- logger.info(
- "Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
- self.interval_seconds / 3600.0,
- self.batch_size,
- self.auto_heal,
- self.dry_run,
- )
-
- def stop(self) -> None:
- self._shutdown = True
- if self._timer:
- self._timer.cancel()
- self._timer = None
- logger.info("Integrity checker stopped")
-
- def _schedule_next(self) -> None:
- if self._shutdown:
- return
- self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
- self._timer.daemon = True
- self._timer.start()
-
- def _run_cycle(self) -> None:
- if self._shutdown:
- return
- try:
- self.run_now()
- except Exception as e:
- logger.error("Integrity check cycle failed: %s", e)
- finally:
- self._schedule_next()
-
- def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
- if not self._lock.acquire(blocking=False):
- raise RuntimeError("Integrity scan is already in progress")
-
- try:
- self._scanning = True
- self._scan_start_time = time.time()
-
- effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
- effective_dry_run = dry_run if dry_run is not None else self.dry_run
-
- start = self._scan_start_time
- result = IntegrityResult()
-
- bucket_names = self._list_bucket_names()
- self.cursor_store.clean_stale(bucket_names)
- ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
-
- for bucket_name in ordered_buckets:
- if self._batch_exhausted(result):
- break
- result.buckets_scanned += 1
- cursor_key = self.cursor_store.get_last_key(bucket_name)
- key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
- key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
- key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
- self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
- self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
- self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
- returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None]
- bucket_exhausted = self._batch_exhausted(result)
- if bucket_exhausted and returned_keys:
- self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys))
- else:
- self.cursor_store.update_bucket(bucket_name, time.time(), completed=True)
-
- result.execution_time_seconds = time.time() - start
-
- if result.has_issues or result.errors:
- logger.info(
- "Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
- "stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
- result.execution_time_seconds,
- result.corrupted_objects,
- result.orphaned_objects,
- result.phantom_metadata,
- result.stale_versions,
- result.etag_cache_inconsistencies,
- result.legacy_metadata_drifts,
- result.issues_healed,
- len(result.errors),
- " (dry run)" if effective_dry_run else "",
- )
-
- record = IntegrityExecutionRecord(
- timestamp=time.time(),
- result=result.to_dict(),
- dry_run=effective_dry_run,
- auto_heal=effective_auto_heal,
- )
- self.history_store.add(record)
-
- return result
- finally:
- self._scanning = False
- self._scan_start_time = None
- self._lock.release()
-
- def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
- if self._scanning:
- return False
- t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
- t.start()
- return True
-
- def _system_path(self) -> Path:
- return self.storage_root / self.SYSTEM_ROOT
-
- def _list_bucket_names(self) -> List[str]:
- names = []
- try:
- for entry in self.storage_root.iterdir():
- if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
- names.append(entry.name)
- except OSError:
- pass
- return names
-
- def _throttle(self) -> bool:
- if self._shutdown:
- return True
- if self._io_throttle > 0:
- time.sleep(self._io_throttle)
- return self._shutdown
-
- def _batch_exhausted(self, result: IntegrityResult) -> bool:
- return self._shutdown or result.objects_scanned >= self.batch_size
-
- def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
- if len(result.issues) < MAX_ISSUES:
- result.issues.append(issue)
-
- def _collect_index_keys(
- self, meta_root: Path, cursor_key: Optional[str] = None,
- ) -> Dict[str, Dict[str, Any]]:
- all_keys: Dict[str, Dict[str, Any]] = {}
- if not meta_root.exists():
- return all_keys
- try:
- for index_file in meta_root.rglob("_index.json"):
- if not index_file.is_file():
- continue
- rel_dir = index_file.parent.relative_to(meta_root)
- dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix()
- if cursor_key is not None and dir_prefix:
- full_prefix = dir_prefix + "/"
- if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
- continue
- try:
- index_data = json.loads(index_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- continue
- for key_name, entry in index_data.items():
- full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name
- if cursor_key is not None and full_key <= cursor_key:
- continue
- all_keys[full_key] = {
- "entry": entry,
- "index_file": index_file,
- "key_name": key_name,
- }
- except OSError:
- pass
- return all_keys
-
- def _walk_bucket_files_sorted(
- self, bucket_path: Path, cursor_key: Optional[str] = None,
- ):
- def _walk(dir_path: Path, prefix: str):
- try:
- entries = list(os.scandir(dir_path))
- except OSError:
- return
-
- def _sort_key(e):
- if e.is_dir(follow_symlinks=False):
- return e.name + "/"
- return e.name
-
- entries.sort(key=_sort_key)
-
- for entry in entries:
- if entry.is_dir(follow_symlinks=False):
- if not prefix and entry.name in self.INTERNAL_FOLDERS:
- continue
- new_prefix = (prefix + "/" + entry.name) if prefix else entry.name
- if cursor_key is not None:
- full_prefix = new_prefix + "/"
- if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
- continue
- yield from _walk(Path(entry.path), new_prefix)
- elif entry.is_file(follow_symlinks=False):
- full_key = (prefix + "/" + entry.name) if prefix else entry.name
- if cursor_key is not None and full_key <= cursor_key:
- continue
- yield full_key
-
- yield from _walk(bucket_path, "")
-
- def _check_corrupted_objects(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
- cursor_key: Optional[str] = None,
- ) -> Optional[str]:
- if self._batch_exhausted(result):
- return None
- bucket_path = self.storage_root / bucket_name
- meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
-
- if not meta_root.exists():
- return None
-
- last_key = None
- try:
- all_keys = self._collect_index_keys(meta_root, cursor_key)
- sorted_keys = sorted(all_keys.keys())
-
- for full_key in sorted_keys:
- if self._throttle():
- return last_key
- if self._batch_exhausted(result):
- return last_key
-
- info = all_keys[full_key]
- entry = info["entry"]
- index_file = info["index_file"]
- key_name = info["key_name"]
-
- object_path = bucket_path / full_key
- if not object_path.exists():
- continue
-
- result.objects_scanned += 1
- last_key = full_key
-
- meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
- stored_etag = meta.get("__etag__")
- if not stored_etag:
- continue
-
- try:
- actual_etag = _compute_etag(object_path)
- except OSError:
- continue
-
- if actual_etag != stored_etag:
- result.corrupted_objects += 1
- issue = IntegrityIssue(
- issue_type="corrupted_object",
- bucket=bucket_name,
- key=full_key,
- detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
- )
-
- if auto_heal and not dry_run:
- try:
- stat = object_path.stat()
- meta["__etag__"] = actual_etag
- meta["__size__"] = str(stat.st_size)
- meta["__last_modified__"] = str(stat.st_mtime)
- try:
- index_data = json.loads(index_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- index_data = {}
- index_data[key_name] = {"metadata": meta}
- self._atomic_write_index(index_file, index_data)
- issue.healed = True
- issue.heal_action = "updated etag in index"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
-
- self._add_issue(result, issue)
- except OSError as e:
- result.errors.append(f"check corrupted {bucket_name}: {e}")
- return last_key
-
- def _check_orphaned_objects(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
- cursor_key: Optional[str] = None,
- ) -> Optional[str]:
- if self._batch_exhausted(result):
- return None
- bucket_path = self.storage_root / bucket_name
- meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
-
- last_key = None
- try:
- for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key):
- if self._throttle():
- return last_key
- if self._batch_exhausted(result):
- return last_key
-
- result.objects_scanned += 1
- last_key = full_key
- key_path = Path(full_key)
- key_name = key_path.name
- parent = key_path.parent
-
- if parent == Path("."):
- index_path = meta_root / "_index.json"
- else:
- index_path = meta_root / parent / "_index.json"
-
- has_entry = False
- if index_path.exists():
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- has_entry = key_name in index_data
- except (OSError, json.JSONDecodeError):
- pass
-
- if not has_entry:
- result.orphaned_objects += 1
- issue = IntegrityIssue(
- issue_type="orphaned_object",
- bucket=bucket_name,
- key=full_key,
- detail="file exists without metadata entry",
- )
-
- if auto_heal and not dry_run:
- try:
- object_path = bucket_path / full_key
- etag = _compute_etag(object_path)
- stat = object_path.stat()
- meta = {
- "__etag__": etag,
- "__size__": str(stat.st_size),
- "__last_modified__": str(stat.st_mtime),
- }
- index_data = {}
- if index_path.exists():
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- pass
- index_data[key_name] = {"metadata": meta}
- self._atomic_write_index(index_path, index_data)
- issue.healed = True
- issue.heal_action = "created metadata entry"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
-
- self._add_issue(result, issue)
- except OSError as e:
- result.errors.append(f"check orphaned {bucket_name}: {e}")
- return last_key
-
- def _check_phantom_metadata(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
- cursor_key: Optional[str] = None,
- ) -> Optional[str]:
- if self._batch_exhausted(result):
- return None
- bucket_path = self.storage_root / bucket_name
- meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
-
- if not meta_root.exists():
- return None
-
- last_key = None
- try:
- all_keys = self._collect_index_keys(meta_root, cursor_key)
- sorted_keys = sorted(all_keys.keys())
-
- heal_by_index: Dict[Path, List[str]] = {}
-
- for full_key in sorted_keys:
- if self._batch_exhausted(result):
- break
-
- result.objects_scanned += 1
- last_key = full_key
-
- object_path = bucket_path / full_key
- if not object_path.exists():
- result.phantom_metadata += 1
- info = all_keys[full_key]
- issue = IntegrityIssue(
- issue_type="phantom_metadata",
- bucket=bucket_name,
- key=full_key,
- detail="metadata entry without file on disk",
- )
- if auto_heal and not dry_run:
- index_file = info["index_file"]
- heal_by_index.setdefault(index_file, []).append(info["key_name"])
- issue.healed = True
- issue.heal_action = "removed stale index entry"
- result.issues_healed += 1
- self._add_issue(result, issue)
-
- if heal_by_index and auto_heal and not dry_run:
- for index_file, keys_to_remove in heal_by_index.items():
- try:
- index_data = json.loads(index_file.read_text(encoding="utf-8"))
- for k in keys_to_remove:
- index_data.pop(k, None)
- if index_data:
- self._atomic_write_index(index_file, index_data)
- else:
- index_file.unlink(missing_ok=True)
- except OSError as e:
- result.errors.append(f"heal phantom {bucket_name}: {e}")
- except OSError as e:
- result.errors.append(f"check phantom {bucket_name}: {e}")
- return last_key
-
- def _check_stale_versions(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
- ) -> None:
- if self._batch_exhausted(result):
- return
- versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
-
- if not versions_root.exists():
- return
-
- try:
- for key_dir in versions_root.rglob("*"):
- if self._throttle():
- return
- if self._batch_exhausted(result):
- return
- if not key_dir.is_dir():
- continue
-
- bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
- json_files = {f.stem: f for f in key_dir.glob("*.json")}
-
- for stem, bin_file in bin_files.items():
- if self._batch_exhausted(result):
- return
- result.objects_scanned += 1
- if stem not in json_files:
- result.stale_versions += 1
- issue = IntegrityIssue(
- issue_type="stale_version",
- bucket=bucket_name,
- key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
- detail="version data without manifest",
- )
- if auto_heal and not dry_run:
- try:
- bin_file.unlink(missing_ok=True)
- issue.healed = True
- issue.heal_action = "removed orphaned version data"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal stale version {bin_file}: {e}")
- self._add_issue(result, issue)
-
- for stem, json_file in json_files.items():
- if self._batch_exhausted(result):
- return
- result.objects_scanned += 1
- if stem not in bin_files:
- result.stale_versions += 1
- issue = IntegrityIssue(
- issue_type="stale_version",
- bucket=bucket_name,
- key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
- detail="version manifest without data",
- )
- if auto_heal and not dry_run:
- try:
- json_file.unlink(missing_ok=True)
- issue.healed = True
- issue.heal_action = "removed orphaned version manifest"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal stale version {json_file}: {e}")
- self._add_issue(result, issue)
- except OSError as e:
- result.errors.append(f"check stale versions {bucket_name}: {e}")
-
- def _check_etag_cache(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
- ) -> None:
- if self._batch_exhausted(result):
- return
- etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
-
- if not etag_index_path.exists():
- return
-
- meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
- if not meta_root.exists():
- return
-
- try:
- etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- return
-
- found_mismatch = False
-
- for full_key, cached_etag in etag_cache.items():
- if self._batch_exhausted(result):
- break
- result.objects_scanned += 1
- key_path = Path(full_key)
- key_name = key_path.name
- parent = key_path.parent
-
- if parent == Path("."):
- index_path = meta_root / "_index.json"
- else:
- index_path = meta_root / parent / "_index.json"
-
- if not index_path.exists():
- continue
-
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- continue
-
- entry = index_data.get(key_name)
- if not entry:
- continue
-
- meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
- stored_etag = meta.get("__etag__")
-
- if stored_etag and cached_etag != stored_etag:
- result.etag_cache_inconsistencies += 1
- found_mismatch = True
- issue = IntegrityIssue(
- issue_type="etag_cache_inconsistency",
- bucket=bucket_name,
- key=full_key,
- detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
- )
- self._add_issue(result, issue)
-
- if found_mismatch and auto_heal and not dry_run:
- try:
- etag_index_path.unlink(missing_ok=True)
- for issue in result.issues:
- if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
- issue.healed = True
- issue.heal_action = "deleted etag_index.json"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal etag cache {bucket_name}: {e}")
-
- def _check_legacy_metadata(
- self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
- ) -> None:
- if self._batch_exhausted(result):
- return
- legacy_meta_root = self.storage_root / bucket_name / ".meta"
- if not legacy_meta_root.exists():
- return
-
- meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
-
- try:
- for meta_file in legacy_meta_root.rglob("*.meta.json"):
- if self._throttle():
- return
- if self._batch_exhausted(result):
- return
- if not meta_file.is_file():
- continue
-
- result.objects_scanned += 1
- try:
- rel = meta_file.relative_to(legacy_meta_root)
- except ValueError:
- continue
-
- full_key = rel.as_posix().removesuffix(".meta.json")
- key_path = Path(full_key)
- key_name = key_path.name
- parent = key_path.parent
-
- if parent == Path("."):
- index_path = meta_root / "_index.json"
- else:
- index_path = meta_root / parent / "_index.json"
-
- try:
- legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- continue
-
- index_entry = None
- if index_path.exists():
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- index_entry = index_data.get(key_name)
- except (OSError, json.JSONDecodeError):
- pass
-
- if index_entry is None:
- result.legacy_metadata_drifts += 1
- issue = IntegrityIssue(
- issue_type="legacy_metadata_drift",
- bucket=bucket_name,
- key=full_key,
- detail="unmigrated legacy .meta.json",
- )
-
- if auto_heal and not dry_run:
- try:
- index_data = {}
- if index_path.exists():
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- pass
- index_data[key_name] = {"metadata": legacy_data}
- self._atomic_write_index(index_path, index_data)
- meta_file.unlink(missing_ok=True)
- issue.healed = True
- issue.heal_action = "migrated to index and deleted legacy file"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
-
- self._add_issue(result, issue)
- else:
- index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
- if legacy_data != index_meta:
- result.legacy_metadata_drifts += 1
- issue = IntegrityIssue(
- issue_type="legacy_metadata_drift",
- bucket=bucket_name,
- key=full_key,
- detail="legacy .meta.json differs from index entry",
- )
-
- if auto_heal and not dry_run:
- try:
- meta_file.unlink(missing_ok=True)
- issue.healed = True
- issue.heal_action = "deleted legacy file (index is authoritative)"
- result.issues_healed += 1
- except OSError as e:
- result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
-
- self._add_issue(result, issue)
- except OSError as e:
- result.errors.append(f"check legacy meta {bucket_name}: {e}")
-
- @staticmethod
- def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
- index_path.parent.mkdir(parents=True, exist_ok=True)
- tmp_path = index_path.with_suffix(".tmp")
- try:
- with open(tmp_path, "w", encoding="utf-8") as f:
- json.dump(data, f)
- os.replace(str(tmp_path), str(index_path))
- except BaseException:
- try:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
-
- def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
- records = self.history_store.get_history(limit, offset)
- return [r.to_dict() for r in records]
-
- def get_status(self) -> dict:
- status: Dict[str, Any] = {
- "enabled": not self._shutdown or self._timer is not None,
- "running": self._timer is not None and not self._shutdown,
- "scanning": self._scanning,
- "interval_hours": self.interval_seconds / 3600.0,
- "batch_size": self.batch_size,
- "auto_heal": self.auto_heal,
- "dry_run": self.dry_run,
- "io_throttle_ms": round(self._io_throttle * 1000),
- }
- if self._scanning and self._scan_start_time is not None:
- status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
- status["cursor"] = self.cursor_store.get_info()
- return status
diff --git a/python/app/kms.py b/python/app/kms.py
deleted file mode 100644
index 3ac02b8..0000000
--- a/python/app/kms.py
+++ /dev/null
@@ -1,422 +0,0 @@
-from __future__ import annotations
-
-import base64
-import json
-import logging
-import os
-import secrets
-import subprocess
-import sys
-import uuid
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from cryptography.hazmat.primitives.ciphers.aead import AESGCM
-
-from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
-
-if sys.platform != "win32":
- import fcntl
-
-logger = logging.getLogger(__name__)
-
-
-def _set_secure_file_permissions(file_path: Path) -> None:
- """Set restrictive file permissions (owner read/write only)."""
- if sys.platform == "win32":
- try:
- username = os.environ.get("USERNAME", "")
- if username:
- subprocess.run(
- ["icacls", str(file_path), "/inheritance:r",
- "/grant:r", f"{username}:F"],
- check=True, capture_output=True
- )
- else:
- logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
- except (subprocess.SubprocessError, OSError) as exc:
- logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
- else:
- os.chmod(file_path, 0o600)
-
-
-@dataclass
-class KMSKey:
- """Represents a KMS encryption key."""
- key_id: str
- description: str
- created_at: str
- enabled: bool = True
- key_material: bytes = field(default_factory=lambda: b"", repr=False)
-
- @property
- def arn(self) -> str:
- return f"arn:aws:kms:local:000000000000:key/{self.key_id}"
-
- def to_dict(self, include_key: bool = False) -> Dict[str, Any]:
- data = {
- "KeyId": self.key_id,
- "Arn": self.arn,
- "Description": self.description,
- "CreationDate": self.created_at,
- "Enabled": self.enabled,
- "KeyState": "Enabled" if self.enabled else "Disabled",
- "KeyUsage": "ENCRYPT_DECRYPT",
- "KeySpec": "SYMMETRIC_DEFAULT",
- }
- if include_key:
- data["KeyMaterial"] = base64.b64encode(self.key_material).decode()
- return data
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "KMSKey":
- key_material = b""
- if "KeyMaterial" in data:
- key_material = base64.b64decode(data["KeyMaterial"])
- return cls(
- key_id=data["KeyId"],
- description=data.get("Description", ""),
- created_at=data.get("CreationDate", datetime.now(timezone.utc).isoformat()),
- enabled=data.get("Enabled", True),
- key_material=key_material,
- )
-
-
-class KMSEncryptionProvider(EncryptionProvider):
- """Encryption provider using a specific KMS key."""
-
- def __init__(self, kms: "KMSManager", key_id: str):
- self.kms = kms
- self.key_id = key_id
-
- @property
- def KEY_ID(self) -> str:
- return self.key_id
-
- def generate_data_key(self) -> tuple[bytes, bytes]:
- """Generate a data key encrypted with the KMS key."""
- return self.kms.generate_data_key(self.key_id)
-
- def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
- """Encrypt data using envelope encryption with KMS."""
- data_key, encrypted_data_key = self.generate_data_key()
-
- aesgcm = AESGCM(data_key)
- nonce = secrets.token_bytes(12)
- ciphertext = aesgcm.encrypt(nonce, plaintext,
- json.dumps(context, sort_keys=True).encode() if context else None)
-
- return EncryptionResult(
- ciphertext=ciphertext,
- nonce=nonce,
- key_id=self.key_id,
- encrypted_data_key=encrypted_data_key,
- )
-
- def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
- key_id: str, context: Dict[str, str] | None = None) -> bytes:
- """Decrypt data using envelope encryption with KMS."""
- data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
- if len(data_key) != 32:
- raise EncryptionError("Invalid data key size")
-
- aesgcm = AESGCM(data_key)
- try:
- return aesgcm.decrypt(nonce, ciphertext,
- json.dumps(context, sort_keys=True).encode() if context else None)
- except Exception as exc:
- logger.debug("KMS decryption failed: %s", exc)
- raise EncryptionError("Failed to decrypt data") from exc
-
- def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
- """Decrypt an encrypted data key using KMS."""
- if key_id is None:
- key_id = self.key_id
- data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
- if len(data_key) != 32:
- raise EncryptionError("Invalid data key size")
- return data_key
-
-
-class KMSManager:
- """Manages KMS keys and operations.
-
- This is a local implementation that mimics AWS KMS functionality.
- Keys are stored encrypted on disk.
- """
-
- def __init__(
- self,
- keys_path: Path,
- master_key_path: Path,
- generate_data_key_min_bytes: int = 1,
- generate_data_key_max_bytes: int = 1024,
- ):
- self.keys_path = keys_path
- self.master_key_path = master_key_path
- self.generate_data_key_min_bytes = generate_data_key_min_bytes
- self.generate_data_key_max_bytes = generate_data_key_max_bytes
- self._keys: Dict[str, KMSKey] = {}
- self._master_key: bytes | None = None
- self._master_aesgcm: AESGCM | None = None
- self._loaded = False
-
- @property
- def master_key(self) -> bytes:
- """Load or create the master key for encrypting KMS keys (with file locking)."""
- if self._master_key is None:
- lock_path = self.master_key_path.with_suffix(".lock")
- lock_path.parent.mkdir(parents=True, exist_ok=True)
- with open(lock_path, "w") as lock_file:
- if sys.platform == "win32":
- import msvcrt
- msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
- else:
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
- try:
- if self.master_key_path.exists():
- self._master_key = base64.b64decode(
- self.master_key_path.read_text().strip()
- )
- else:
- self._master_key = secrets.token_bytes(32)
- self.master_key_path.write_text(
- base64.b64encode(self._master_key).decode()
- )
- _set_secure_file_permissions(self.master_key_path)
- finally:
- if sys.platform == "win32":
- import msvcrt
- msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
- else:
- fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
- self._master_aesgcm = AESGCM(self._master_key)
- return self._master_key
-
- def _load_keys(self) -> None:
- """Load keys from disk."""
- if self._loaded:
- return
-
- if self.keys_path.exists():
- try:
- data = json.loads(self.keys_path.read_text(encoding="utf-8"))
- for key_data in data.get("keys", []):
- key = KMSKey.from_dict(key_data)
- if key_data.get("EncryptedKeyMaterial"):
- encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
- key.key_material = self._decrypt_key_material(encrypted)
- self._keys[key.key_id] = key
- except json.JSONDecodeError as exc:
- logger.error("Failed to parse KMS keys file: %s", exc)
- except (ValueError, KeyError) as exc:
- logger.error("Invalid KMS key data: %s", exc)
-
- self._loaded = True
-
- def _save_keys(self) -> None:
- """Save keys to disk (with encrypted key material)."""
- keys_data = []
- for key in self._keys.values():
- data = key.to_dict(include_key=False)
- encrypted = self._encrypt_key_material(key.key_material)
- data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
- keys_data.append(data)
-
- self.keys_path.parent.mkdir(parents=True, exist_ok=True)
- self.keys_path.write_text(
- json.dumps({"keys": keys_data}, indent=2),
- encoding="utf-8"
- )
- _set_secure_file_permissions(self.keys_path)
-
- def _encrypt_key_material(self, key_material: bytes) -> bytes:
- _ = self.master_key
- nonce = secrets.token_bytes(12)
- ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
- return nonce + ciphertext
-
- def _decrypt_key_material(self, encrypted: bytes) -> bytes:
- _ = self.master_key
- nonce = encrypted[:12]
- ciphertext = encrypted[12:]
- return self._master_aesgcm.decrypt(nonce, ciphertext, None)
-
- def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
- """Create a new KMS key."""
- self._load_keys()
-
- if key_id is None:
- key_id = str(uuid.uuid4())
-
- if key_id in self._keys:
- raise EncryptionError(f"Key already exists: {key_id}")
-
- key = KMSKey(
- key_id=key_id,
- description=description,
- created_at=datetime.now(timezone.utc).isoformat(),
- enabled=True,
- key_material=secrets.token_bytes(32),
- )
-
- self._keys[key_id] = key
- self._save_keys()
- return key
-
- def get_key(self, key_id: str) -> KMSKey | None:
- """Get a key by ID."""
- self._load_keys()
- return self._keys.get(key_id)
-
- def list_keys(self) -> List[KMSKey]:
- """List all keys."""
- self._load_keys()
- return list(self._keys.values())
-
- def get_default_key_id(self) -> str:
- """Get the default KMS key ID, creating one if none exist."""
- self._load_keys()
- for key in self._keys.values():
- if key.enabled:
- return key.key_id
- default_key = self.create_key(description="Default KMS Key")
- return default_key.key_id
-
- def get_provider(self, key_id: str | None = None) -> "KMSEncryptionProvider":
- """Get a KMS encryption provider for the specified key."""
- if key_id is None:
- key_id = self.get_default_key_id()
- key = self.get_key(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- if not key.enabled:
- raise EncryptionError(f"Key is disabled: {key_id}")
- return KMSEncryptionProvider(self, key_id)
-
- def enable_key(self, key_id: str) -> None:
- """Enable a key."""
- self._load_keys()
- key = self._keys.get(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- key.enabled = True
- self._save_keys()
-
- def disable_key(self, key_id: str) -> None:
- """Disable a key."""
- self._load_keys()
- key = self._keys.get(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- key.enabled = False
- self._save_keys()
-
- def delete_key(self, key_id: str) -> None:
- """Delete a key (schedule for deletion in real KMS)."""
- self._load_keys()
- if key_id not in self._keys:
- raise EncryptionError(f"Key not found: {key_id}")
- del self._keys[key_id]
- self._save_keys()
-
- def encrypt(self, key_id: str, plaintext: bytes,
- context: Dict[str, str] | None = None) -> bytes:
- """Encrypt data directly with a KMS key."""
- self._load_keys()
- key = self._keys.get(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- if not key.enabled:
- raise EncryptionError(f"Key is disabled: {key_id}")
-
- aesgcm = AESGCM(key.key_material)
- nonce = secrets.token_bytes(12)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
-
- key_id_bytes = key_id.encode("utf-8")
- return len(key_id_bytes).to_bytes(2, "big") + key_id_bytes + nonce + ciphertext
-
- def decrypt(self, ciphertext: bytes,
- context: Dict[str, str] | None = None) -> tuple[bytes, str]:
- """Decrypt data directly with a KMS key.
-
- Returns:
- Tuple of (plaintext, key_id)
- """
- self._load_keys()
-
- key_id_len = int.from_bytes(ciphertext[:2], "big")
- key_id = ciphertext[2:2 + key_id_len].decode("utf-8")
- rest = ciphertext[2 + key_id_len:]
-
- key = self._keys.get(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- if not key.enabled:
- raise EncryptionError(f"Key is disabled: {key_id}")
-
- nonce = rest[:12]
- encrypted = rest[12:]
-
- aesgcm = AESGCM(key.key_material)
- aad = json.dumps(context, sort_keys=True).encode() if context else None
- try:
- plaintext = aesgcm.decrypt(nonce, encrypted, aad)
- return plaintext, key_id
- except Exception as exc:
- logger.debug("KMS decrypt operation failed: %s", exc)
- raise EncryptionError("Decryption failed") from exc
-
- def generate_data_key(self, key_id: str,
- context: Dict[str, str] | None = None,
- key_spec: str = "AES_256") -> tuple[bytes, bytes]:
- """Generate a data key and return both plaintext and encrypted versions.
-
- Args:
- key_id: The KMS key ID to use for encryption
- context: Optional encryption context
- key_spec: Key specification - AES_128 or AES_256 (default)
-
- Returns:
- Tuple of (plaintext_key, encrypted_key)
- """
- self._load_keys()
- key = self._keys.get(key_id)
- if not key:
- raise EncryptionError(f"Key not found: {key_id}")
- if not key.enabled:
- raise EncryptionError(f"Key is disabled: {key_id}")
-
- key_bytes = 32 if key_spec == "AES_256" else 16
- plaintext_key = secrets.token_bytes(key_bytes)
-
- encrypted_key = self.encrypt(key_id, plaintext_key, context)
-
- return plaintext_key, encrypted_key
-
- def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
- context: Dict[str, str] | None = None) -> bytes:
- """Decrypt a data key."""
- plaintext, _ = self.decrypt(encrypted_key, context)
- return plaintext
-
- def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
- source_context: Dict[str, str] | None = None,
- destination_context: Dict[str, str] | None = None) -> bytes:
- """Re-encrypt data with a different key."""
-
- plaintext, source_key_id = self.decrypt(ciphertext, source_context)
-
- return self.encrypt(destination_key_id, plaintext, destination_context)
-
- def generate_random(self, num_bytes: int = 32) -> bytes:
- """Generate cryptographically secure random bytes."""
- if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
- raise EncryptionError(
- f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
- )
- return secrets.token_bytes(num_bytes)
diff --git a/python/app/kms_api.py b/python/app/kms_api.py
deleted file mode 100644
index 332c012..0000000
--- a/python/app/kms_api.py
+++ /dev/null
@@ -1,444 +0,0 @@
-from __future__ import annotations
-
-import base64
-import uuid
-from typing import Any, Dict
-
-from flask import Blueprint, Response, current_app, jsonify, request
-
-from .encryption import ClientEncryptionHelper, EncryptionError
-from .extensions import limiter
-from .iam import IamError
-
-kms_api_bp = Blueprint("kms_api", __name__, url_prefix="/kms")
-
-
-def _require_principal():
- """Require authentication for KMS operations."""
- from .s3_api import _require_principal as s3_require_principal
- return s3_require_principal()
-
-
-def _kms():
- """Get KMS manager from app extensions."""
- return current_app.extensions.get("kms")
-
-
-def _encryption():
- """Get encryption manager from app extensions."""
- return current_app.extensions.get("encryption")
-
-
-def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]:
- return {"__type": code, "message": message}, status
-
-@kms_api_bp.route("/keys", methods=["GET", "POST"])
-@limiter.limit("30 per minute")
-def list_or_create_keys():
- """List all KMS keys or create a new key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- if request.method == "POST":
- payload = request.get_json(silent=True) or {}
- key_id = payload.get("KeyId") or payload.get("key_id")
- description = payload.get("Description") or payload.get("description", "")
-
- try:
- key = kms.create_key(description=description, key_id=key_id)
- current_app.logger.info(
- "KMS key created",
- extra={"key_id": key.key_id, "principal": principal.access_key},
- )
- return jsonify({
- "KeyMetadata": key.to_dict(),
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
-
- keys = kms.list_keys()
- return jsonify({
- "Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys],
- "Truncated": False,
- })
-
-
-@kms_api_bp.route("/keys/", methods=["GET", "DELETE"])
-@limiter.limit("30 per minute")
-def get_or_delete_key(key_id: str):
- """Get or delete a specific KMS key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- if request.method == "DELETE":
- try:
- kms.delete_key(key_id)
- current_app.logger.info(
- "KMS key deleted",
- extra={"key_id": key_id, "principal": principal.access_key},
- )
- return Response(status=204)
- except EncryptionError as exc:
- return _error_response("NotFoundException", str(exc), 404)
-
- key = kms.get_key(key_id)
- if not key:
- return _error_response("NotFoundException", f"Key not found: {key_id}", 404)
-
- return jsonify({"KeyMetadata": key.to_dict()})
-
-
-@kms_api_bp.route("/keys//enable", methods=["POST"])
-@limiter.limit("30 per minute")
-def enable_key(key_id: str):
- """Enable a KMS key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- try:
- kms.enable_key(key_id)
- current_app.logger.info(
- "KMS key enabled",
- extra={"key_id": key_id, "principal": principal.access_key},
- )
- return Response(status=200)
- except EncryptionError as exc:
- return _error_response("NotFoundException", str(exc), 404)
-
-
-@kms_api_bp.route("/keys//disable", methods=["POST"])
-@limiter.limit("30 per minute")
-def disable_key(key_id: str):
- """Disable a KMS key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- try:
- kms.disable_key(key_id)
- current_app.logger.info(
- "KMS key disabled",
- extra={"key_id": key_id, "principal": principal.access_key},
- )
- return Response(status=200)
- except EncryptionError as exc:
- return _error_response("NotFoundException", str(exc), 404)
-
-@kms_api_bp.route("/encrypt", methods=["POST"])
-@limiter.limit("60 per minute")
-def encrypt_data():
- """Encrypt data using a KMS key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- key_id = payload.get("KeyId")
- plaintext_b64 = payload.get("Plaintext")
- context = payload.get("EncryptionContext")
-
- if not key_id:
- return _error_response("ValidationException", "KeyId is required", 400)
- if not plaintext_b64:
- return _error_response("ValidationException", "Plaintext is required", 400)
-
- try:
- plaintext = base64.b64decode(plaintext_b64)
- except Exception:
- return _error_response("ValidationException", "Plaintext must be base64 encoded", 400)
-
- try:
- ciphertext = kms.encrypt(key_id, plaintext, context)
- return jsonify({
- "CiphertextBlob": base64.b64encode(ciphertext).decode(),
- "KeyId": key_id,
- "EncryptionAlgorithm": "SYMMETRIC_DEFAULT",
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
-
-
-@kms_api_bp.route("/decrypt", methods=["POST"])
-@limiter.limit("60 per minute")
-def decrypt_data():
- """Decrypt data using a KMS key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- ciphertext_b64 = payload.get("CiphertextBlob")
- context = payload.get("EncryptionContext")
-
- if not ciphertext_b64:
- return _error_response("ValidationException", "CiphertextBlob is required", 400)
-
- try:
- ciphertext = base64.b64decode(ciphertext_b64)
- except Exception:
- return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
-
- try:
- plaintext, key_id = kms.decrypt(ciphertext, context)
- return jsonify({
- "Plaintext": base64.b64encode(plaintext).decode(),
- "KeyId": key_id,
- "EncryptionAlgorithm": "SYMMETRIC_DEFAULT",
- })
- except EncryptionError as exc:
- return _error_response("InvalidCiphertextException", str(exc), 400)
-
-
-@kms_api_bp.route("/generate-data-key", methods=["POST"])
-@limiter.limit("60 per minute")
-def generate_data_key():
- """Generate a data encryption key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- key_id = payload.get("KeyId")
- context = payload.get("EncryptionContext")
- key_spec = payload.get("KeySpec", "AES_256")
-
- if not key_id:
- return _error_response("ValidationException", "KeyId is required", 400)
-
- if key_spec not in {"AES_256", "AES_128"}:
- return _error_response("ValidationException", "KeySpec must be AES_256 or AES_128", 400)
-
- try:
- plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
-
- if key_spec == "AES_128":
- plaintext_key = plaintext_key[:16]
-
- return jsonify({
- "Plaintext": base64.b64encode(plaintext_key).decode(),
- "CiphertextBlob": base64.b64encode(encrypted_key).decode(),
- "KeyId": key_id,
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
-
-
-@kms_api_bp.route("/generate-data-key-without-plaintext", methods=["POST"])
-@limiter.limit("60 per minute")
-def generate_data_key_without_plaintext():
- """Generate a data encryption key without returning the plaintext."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- key_id = payload.get("KeyId")
- context = payload.get("EncryptionContext")
-
- if not key_id:
- return _error_response("ValidationException", "KeyId is required", 400)
-
- try:
- _, encrypted_key = kms.generate_data_key(key_id, context)
- return jsonify({
- "CiphertextBlob": base64.b64encode(encrypted_key).decode(),
- "KeyId": key_id,
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
-
-
-@kms_api_bp.route("/re-encrypt", methods=["POST"])
-@limiter.limit("30 per minute")
-def re_encrypt():
- """Re-encrypt data with a different key."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- ciphertext_b64 = payload.get("CiphertextBlob")
- destination_key_id = payload.get("DestinationKeyId")
- source_context = payload.get("SourceEncryptionContext")
- destination_context = payload.get("DestinationEncryptionContext")
-
- if not ciphertext_b64:
- return _error_response("ValidationException", "CiphertextBlob is required", 400)
- if not destination_key_id:
- return _error_response("ValidationException", "DestinationKeyId is required", 400)
-
- try:
- ciphertext = base64.b64decode(ciphertext_b64)
- except Exception:
- return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
-
- try:
- plaintext, source_key_id = kms.decrypt(ciphertext, source_context)
- new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context)
-
- return jsonify({
- "CiphertextBlob": base64.b64encode(new_ciphertext).decode(),
- "SourceKeyId": source_key_id,
- "KeyId": destination_key_id,
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
-
-
-@kms_api_bp.route("/generate-random", methods=["POST"])
-@limiter.limit("60 per minute")
-def generate_random():
- """Generate random bytes."""
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- num_bytes = payload.get("NumberOfBytes", 32)
-
- try:
- num_bytes = int(num_bytes)
- except (TypeError, ValueError):
- return _error_response("ValidationException", "NumberOfBytes must be an integer", 400)
-
- try:
- random_bytes = kms.generate_random(num_bytes)
- return jsonify({
- "Plaintext": base64.b64encode(random_bytes).decode(),
- })
- except EncryptionError as exc:
- return _error_response("ValidationException", str(exc), 400)
-
-@kms_api_bp.route("/client/generate-key", methods=["POST"])
-@limiter.limit("30 per minute")
-def generate_client_key():
- """Generate a client-side encryption key."""
- principal, error = _require_principal()
- if error:
- return error
-
- key_info = ClientEncryptionHelper.generate_client_key()
- return jsonify(key_info)
-
-
-@kms_api_bp.route("/client/encrypt", methods=["POST"])
-@limiter.limit("60 per minute")
-def client_encrypt():
- """Encrypt data using client-side encryption."""
- principal, error = _require_principal()
- if error:
- return error
-
- payload = request.get_json(silent=True) or {}
- plaintext_b64 = payload.get("Plaintext")
- key_b64 = payload.get("Key")
-
- if not plaintext_b64 or not key_b64:
- return _error_response("ValidationException", "Plaintext and Key are required", 400)
-
- try:
- plaintext = base64.b64decode(plaintext_b64)
- result = ClientEncryptionHelper.encrypt_with_key(plaintext, key_b64)
- return jsonify(result)
- except Exception as exc:
- return _error_response("EncryptionError", str(exc), 400)
-
-
-@kms_api_bp.route("/client/decrypt", methods=["POST"])
-@limiter.limit("60 per minute")
-def client_decrypt():
- """Decrypt data using client-side encryption."""
- principal, error = _require_principal()
- if error:
- return error
-
- payload = request.get_json(silent=True) or {}
- ciphertext_b64 = payload.get("Ciphertext") or payload.get("ciphertext")
- nonce_b64 = payload.get("Nonce") or payload.get("nonce")
- key_b64 = payload.get("Key") or payload.get("key")
-
- if not ciphertext_b64 or not nonce_b64 or not key_b64:
- return _error_response("ValidationException", "Ciphertext, Nonce, and Key are required", 400)
-
- try:
- plaintext = ClientEncryptionHelper.decrypt_with_key(ciphertext_b64, nonce_b64, key_b64)
- return jsonify({
- "Plaintext": base64.b64encode(plaintext).decode(),
- })
- except Exception as exc:
- return _error_response("DecryptionError", str(exc), 400)
-
-@kms_api_bp.route("/materials/", methods=["POST"])
-@limiter.limit("60 per minute")
-def get_encryption_materials(key_id: str):
- """Get encryption materials for client-side S3 encryption.
-
- This is used by S3 encryption clients that want to use KMS for
- key management but perform encryption client-side.
- """
- principal, error = _require_principal()
- if error:
- return error
-
- kms = _kms()
- if not kms:
- return _error_response("KMSNotEnabled", "KMS is not configured", 400)
-
- payload = request.get_json(silent=True) or {}
- context = payload.get("EncryptionContext")
-
- try:
- plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
-
- return jsonify({
- "PlaintextKey": base64.b64encode(plaintext_key).decode(),
- "EncryptedKey": base64.b64encode(encrypted_key).decode(),
- "KeyId": key_id,
- "Algorithm": "AES-256-GCM",
- "KeyWrapAlgorithm": "kms",
- })
- except EncryptionError as exc:
- return _error_response("KMSInternalException", str(exc), 400)
diff --git a/python/app/lifecycle.py b/python/app/lifecycle.py
deleted file mode 100644
index ea2c262..0000000
--- a/python/app/lifecycle.py
+++ /dev/null
@@ -1,340 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import threading
-import time
-from dataclasses import dataclass, field
-from datetime import datetime, timedelta, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from .storage import ObjectStorage, StorageError
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class LifecycleResult:
- bucket_name: str
- objects_deleted: int = 0
- versions_deleted: int = 0
- uploads_aborted: int = 0
- errors: List[str] = field(default_factory=list)
- execution_time_seconds: float = 0.0
-
-
-@dataclass
-class LifecycleExecutionRecord:
- timestamp: float
- bucket_name: str
- objects_deleted: int
- versions_deleted: int
- uploads_aborted: int
- errors: List[str]
- execution_time_seconds: float
-
- def to_dict(self) -> dict:
- return {
- "timestamp": self.timestamp,
- "bucket_name": self.bucket_name,
- "objects_deleted": self.objects_deleted,
- "versions_deleted": self.versions_deleted,
- "uploads_aborted": self.uploads_aborted,
- "errors": self.errors,
- "execution_time_seconds": self.execution_time_seconds,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
- return cls(
- timestamp=data["timestamp"],
- bucket_name=data["bucket_name"],
- objects_deleted=data["objects_deleted"],
- versions_deleted=data["versions_deleted"],
- uploads_aborted=data["uploads_aborted"],
- errors=data.get("errors", []),
- execution_time_seconds=data["execution_time_seconds"],
- )
-
- @classmethod
- def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
- return cls(
- timestamp=time.time(),
- bucket_name=result.bucket_name,
- objects_deleted=result.objects_deleted,
- versions_deleted=result.versions_deleted,
- uploads_aborted=result.uploads_aborted,
- errors=result.errors.copy(),
- execution_time_seconds=result.execution_time_seconds,
- )
-
-
-class LifecycleHistoryStore:
- def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
- self.storage_root = storage_root
- self.max_history_per_bucket = max_history_per_bucket
- self._lock = threading.Lock()
-
- def _get_history_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
-
- def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
- path = self._get_history_path(bucket_name)
- if not path.exists():
- return []
- try:
- with open(path, "r") as f:
- data = json.load(f)
- return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
- except (OSError, ValueError, KeyError) as e:
- logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
- return []
-
- def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
- path = self._get_history_path(bucket_name)
- path.parent.mkdir(parents=True, exist_ok=True)
- data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
- try:
- with open(path, "w") as f:
- json.dump(data, f, indent=2)
- except OSError as e:
- logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
-
- def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
- with self._lock:
- records = self.load_history(bucket_name)
- records.insert(0, record)
- self.save_history(bucket_name, records)
-
- def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
- records = self.load_history(bucket_name)
- return records[offset:offset + limit]
-
-
-class LifecycleManager:
- def __init__(
- self,
- storage: ObjectStorage,
- interval_seconds: int = 3600,
- storage_root: Optional[Path] = None,
- max_history_per_bucket: int = 50,
- ):
- self.storage = storage
- self.interval_seconds = interval_seconds
- self.storage_root = storage_root
- self._timer: Optional[threading.Timer] = None
- self._shutdown = False
- self._lock = threading.Lock()
- self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
-
- def start(self) -> None:
- if self._timer is not None:
- return
- self._shutdown = False
- self._schedule_next()
- logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s")
-
- def stop(self) -> None:
- self._shutdown = True
- if self._timer:
- self._timer.cancel()
- self._timer = None
- logger.info("Lifecycle manager stopped")
-
- def _schedule_next(self) -> None:
- if self._shutdown:
- return
- self._timer = threading.Timer(self.interval_seconds, self._run_enforcement)
- self._timer.daemon = True
- self._timer.start()
-
- def _run_enforcement(self) -> None:
- if self._shutdown:
- return
- try:
- self.enforce_all_buckets()
- except Exception as e:
- logger.error(f"Lifecycle enforcement failed: {e}")
- finally:
- self._schedule_next()
-
- def enforce_all_buckets(self) -> Dict[str, LifecycleResult]:
- results = {}
- try:
- buckets = self.storage.list_buckets()
- for bucket in buckets:
- result = self.enforce_rules(bucket.name)
- if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
- results[bucket.name] = result
- except StorageError as e:
- logger.error(f"Failed to list buckets for lifecycle: {e}")
- return results
-
- def enforce_rules(self, bucket_name: str) -> LifecycleResult:
- start_time = time.time()
- result = LifecycleResult(bucket_name=bucket_name)
-
- try:
- lifecycle = self.storage.get_bucket_lifecycle(bucket_name)
- if not lifecycle:
- return result
-
- for rule in lifecycle:
- if rule.get("Status") != "Enabled":
- continue
- rule_id = rule.get("ID", "unknown")
- prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", ""))
-
- self._enforce_expiration(bucket_name, rule, prefix, result)
- self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result)
- self._enforce_abort_multipart(bucket_name, rule, result)
-
- except StorageError as e:
- result.errors.append(str(e))
- logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
-
- result.execution_time_seconds = time.time() - start_time
- if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
- logger.info(
- f"Lifecycle enforcement for {bucket_name}: "
- f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
- f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
- )
- if self.history_store:
- record = LifecycleExecutionRecord.from_result(result)
- self.history_store.add_record(bucket_name, record)
- return result
-
- def _enforce_expiration(
- self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
- ) -> None:
- expiration = rule.get("Expiration", {})
- if not expiration:
- return
-
- days = expiration.get("Days")
- date_str = expiration.get("Date")
-
- if days:
- cutoff = datetime.now(timezone.utc) - timedelta(days=days)
- elif date_str:
- try:
- cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
- except ValueError:
- return
- else:
- return
-
- try:
- objects = self.storage.list_objects_all(bucket_name)
- for obj in objects:
- if prefix and not obj.key.startswith(prefix):
- continue
- if obj.last_modified < cutoff:
- try:
- self.storage.delete_object(bucket_name, obj.key)
- result.objects_deleted += 1
- except StorageError as e:
- result.errors.append(f"Failed to delete {obj.key}: {e}")
- except StorageError as e:
- result.errors.append(f"Failed to list objects: {e}")
-
- def _enforce_noncurrent_expiration(
- self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
- ) -> None:
- noncurrent = rule.get("NoncurrentVersionExpiration", {})
- noncurrent_days = noncurrent.get("NoncurrentDays")
- if not noncurrent_days:
- return
-
- cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days)
-
- try:
- objects = self.storage.list_objects_all(bucket_name)
- for obj in objects:
- if prefix and not obj.key.startswith(prefix):
- continue
- try:
- versions = self.storage.list_object_versions(bucket_name, obj.key)
- for version in versions:
- archived_at_str = version.get("archived_at", "")
- if not archived_at_str:
- continue
- try:
- archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
- if archived_at < cutoff:
- version_id = version.get("version_id")
- if version_id:
- self.storage.delete_object_version(bucket_name, obj.key, version_id)
- result.versions_deleted += 1
- except (ValueError, StorageError) as e:
- result.errors.append(f"Failed to process version: {e}")
- except StorageError:
- pass
- except StorageError as e:
- result.errors.append(f"Failed to list objects: {e}")
-
- try:
- orphaned = self.storage.list_orphaned_objects(bucket_name)
- for item in orphaned:
- obj_key = item.get("key", "")
- if prefix and not obj_key.startswith(prefix):
- continue
- try:
- versions = self.storage.list_object_versions(bucket_name, obj_key)
- for version in versions:
- archived_at_str = version.get("archived_at", "")
- if not archived_at_str:
- continue
- try:
- archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
- if archived_at < cutoff:
- version_id = version.get("version_id")
- if version_id:
- self.storage.delete_object_version(bucket_name, obj_key, version_id)
- result.versions_deleted += 1
- except (ValueError, StorageError) as e:
- result.errors.append(f"Failed to process orphaned version: {e}")
- except StorageError:
- pass
- except StorageError as e:
- result.errors.append(f"Failed to list orphaned objects: {e}")
-
- def _enforce_abort_multipart(
- self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult
- ) -> None:
- abort_config = rule.get("AbortIncompleteMultipartUpload", {})
- days_after = abort_config.get("DaysAfterInitiation")
- if not days_after:
- return
-
- cutoff = datetime.now(timezone.utc) - timedelta(days=days_after)
-
- try:
- uploads = self.storage.list_multipart_uploads(bucket_name)
- for upload in uploads:
- created_at_str = upload.get("created_at", "")
- if not created_at_str:
- continue
- try:
- created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
- if created_at < cutoff:
- upload_id = upload.get("upload_id")
- if upload_id:
- self.storage.abort_multipart_upload(bucket_name, upload_id)
- result.uploads_aborted += 1
- except (ValueError, StorageError) as e:
- result.errors.append(f"Failed to abort upload: {e}")
- except StorageError as e:
- result.errors.append(f"Failed to list multipart uploads: {e}")
-
- def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]:
- if bucket_name:
- return {bucket_name: self.enforce_rules(bucket_name)}
- return self.enforce_all_buckets()
-
- def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
- if not self.history_store:
- return []
- return self.history_store.get_history(bucket_name, limit, offset)
diff --git a/python/app/notifications.py b/python/app/notifications.py
deleted file mode 100644
index ee03ba8..0000000
--- a/python/app/notifications.py
+++ /dev/null
@@ -1,406 +0,0 @@
-from __future__ import annotations
-
-import ipaddress
-import json
-import logging
-import queue
-import socket
-import threading
-import time
-import uuid
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-from urllib.parse import urlparse
-
-import requests
-from urllib3.util.connection import create_connection as _urllib3_create_connection
-
-
-def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]:
- try:
- parsed = urlparse(url)
- hostname = parsed.hostname
- if not hostname:
- return None
- cloud_metadata_hosts = {
- "metadata.google.internal",
- "169.254.169.254",
- }
- if hostname.lower() in cloud_metadata_hosts:
- return None
- if allow_internal:
- return hostname
- blocked_hosts = {
- "localhost",
- "127.0.0.1",
- "0.0.0.0",
- "::1",
- "[::1]",
- }
- if hostname.lower() in blocked_hosts:
- return None
- try:
- resolved_ip = socket.gethostbyname(hostname)
- ip = ipaddress.ip_address(resolved_ip)
- if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
- return None
- return resolved_ip
- except (socket.gaierror, ValueError):
- return None
- except Exception:
- return None
-
-
-def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
- return _resolve_and_check_url(url, allow_internal) is not None
-
-
-_dns_pin_lock = threading.Lock()
-
-
-def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response:
- parsed = urlparse(url)
- hostname = parsed.hostname or ""
- session = requests.Session()
- original_create = _urllib3_create_connection
-
- def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any:
- host, req_port = address
- if host == hostname:
- return original_create((pinned_ip, req_port), *args, **kw)
- return original_create(address, *args, **kw)
-
- import urllib3.util.connection as _conn_mod
- with _dns_pin_lock:
- _conn_mod.create_connection = _create_pinned
- try:
- return session.post(url, **kwargs)
- finally:
- _conn_mod.create_connection = original_create
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class NotificationEvent:
- event_name: str
- bucket_name: str
- object_key: str
- object_size: int = 0
- etag: str = ""
- version_id: Optional[str] = None
- timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
- request_id: str = field(default_factory=lambda: uuid.uuid4().hex)
- source_ip: str = ""
- user_identity: str = ""
-
- def to_s3_event(self) -> Dict[str, Any]:
- return {
- "Records": [
- {
- "eventVersion": "2.1",
- "eventSource": "myfsio:s3",
- "awsRegion": "local",
- "eventTime": self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
- "eventName": self.event_name,
- "userIdentity": {
- "principalId": self.user_identity or "ANONYMOUS",
- },
- "requestParameters": {
- "sourceIPAddress": self.source_ip or "127.0.0.1",
- },
- "responseElements": {
- "x-amz-request-id": self.request_id,
- "x-amz-id-2": self.request_id,
- },
- "s3": {
- "s3SchemaVersion": "1.0",
- "configurationId": "notification",
- "bucket": {
- "name": self.bucket_name,
- "ownerIdentity": {"principalId": "local"},
- "arn": f"arn:aws:s3:::{self.bucket_name}",
- },
- "object": {
- "key": self.object_key,
- "size": self.object_size,
- "eTag": self.etag,
- "versionId": self.version_id or "null",
- "sequencer": f"{int(time.time() * 1000):016X}",
- },
- },
- }
- ]
- }
-
-
-@dataclass
-class WebhookDestination:
- url: str
- headers: Dict[str, str] = field(default_factory=dict)
- timeout_seconds: int = 30
- retry_count: int = 3
- retry_delay_seconds: int = 1
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "url": self.url,
- "headers": self.headers,
- "timeout_seconds": self.timeout_seconds,
- "retry_count": self.retry_count,
- "retry_delay_seconds": self.retry_delay_seconds,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "WebhookDestination":
- return cls(
- url=data.get("url", ""),
- headers=data.get("headers", {}),
- timeout_seconds=data.get("timeout_seconds", 30),
- retry_count=data.get("retry_count", 3),
- retry_delay_seconds=data.get("retry_delay_seconds", 1),
- )
-
-
-@dataclass
-class NotificationConfiguration:
- id: str
- events: List[str]
- destination: WebhookDestination
- prefix_filter: str = ""
- suffix_filter: str = ""
-
- def matches_event(self, event_name: str, object_key: str) -> bool:
- event_match = False
- for pattern in self.events:
- if pattern.endswith("*"):
- base = pattern[:-1]
- if event_name.startswith(base):
- event_match = True
- break
- elif pattern == event_name:
- event_match = True
- break
-
- if not event_match:
- return False
-
- if self.prefix_filter and not object_key.startswith(self.prefix_filter):
- return False
- if self.suffix_filter and not object_key.endswith(self.suffix_filter):
- return False
-
- return True
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "Id": self.id,
- "Events": self.events,
- "Destination": self.destination.to_dict(),
- "Filter": {
- "Key": {
- "FilterRules": [
- {"Name": "prefix", "Value": self.prefix_filter},
- {"Name": "suffix", "Value": self.suffix_filter},
- ]
- }
- },
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "NotificationConfiguration":
- prefix = ""
- suffix = ""
- filter_data = data.get("Filter", {})
- key_filter = filter_data.get("Key", {})
- for rule in key_filter.get("FilterRules", []):
- if rule.get("Name") == "prefix":
- prefix = rule.get("Value", "")
- elif rule.get("Name") == "suffix":
- suffix = rule.get("Value", "")
-
- return cls(
- id=data.get("Id", uuid.uuid4().hex),
- events=data.get("Events", []),
- destination=WebhookDestination.from_dict(data.get("Destination", {})),
- prefix_filter=prefix,
- suffix_filter=suffix,
- )
-
-
-class NotificationService:
- def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
- self.storage_root = storage_root
- self._allow_internal_endpoints = allow_internal_endpoints
- self._configs: Dict[str, List[NotificationConfiguration]] = {}
- self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
- self._workers: List[threading.Thread] = []
- self._shutdown = threading.Event()
- self._stats = {
- "events_queued": 0,
- "events_sent": 0,
- "events_failed": 0,
- }
-
- for i in range(worker_count):
- worker = threading.Thread(target=self._worker_loop, name=f"notification-worker-{i}", daemon=True)
- worker.start()
- self._workers.append(worker)
-
- def _config_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "notifications.json"
-
- def get_bucket_notifications(self, bucket_name: str) -> List[NotificationConfiguration]:
- if bucket_name in self._configs:
- return self._configs[bucket_name]
-
- config_path = self._config_path(bucket_name)
- if not config_path.exists():
- return []
-
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- configs = [NotificationConfiguration.from_dict(c) for c in data.get("configurations", [])]
- self._configs[bucket_name] = configs
- return configs
- except (json.JSONDecodeError, OSError) as e:
- logger.warning(f"Failed to load notification config for {bucket_name}: {e}")
- return []
-
- def set_bucket_notifications(
- self, bucket_name: str, configurations: List[NotificationConfiguration]
- ) -> None:
- config_path = self._config_path(bucket_name)
- config_path.parent.mkdir(parents=True, exist_ok=True)
-
- data = {"configurations": [c.to_dict() for c in configurations]}
- config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
- self._configs[bucket_name] = configurations
-
- def delete_bucket_notifications(self, bucket_name: str) -> None:
- config_path = self._config_path(bucket_name)
- try:
- if config_path.exists():
- config_path.unlink()
- except OSError:
- pass
- self._configs.pop(bucket_name, None)
-
- def emit_event(self, event: NotificationEvent) -> None:
- configurations = self.get_bucket_notifications(event.bucket_name)
- if not configurations:
- return
-
- for config in configurations:
- if config.matches_event(event.event_name, event.object_key):
- self._queue.put((event, config.destination))
- self._stats["events_queued"] += 1
- logger.debug(
- f"Queued notification for {event.event_name} on {event.bucket_name}/{event.object_key}"
- )
-
- def emit_object_created(
- self,
- bucket_name: str,
- object_key: str,
- *,
- size: int = 0,
- etag: str = "",
- version_id: Optional[str] = None,
- request_id: str = "",
- source_ip: str = "",
- user_identity: str = "",
- operation: str = "Put",
- ) -> None:
- event = NotificationEvent(
- event_name=f"s3:ObjectCreated:{operation}",
- bucket_name=bucket_name,
- object_key=object_key,
- object_size=size,
- etag=etag,
- version_id=version_id,
- request_id=request_id or uuid.uuid4().hex,
- source_ip=source_ip,
- user_identity=user_identity,
- )
- self.emit_event(event)
-
- def emit_object_removed(
- self,
- bucket_name: str,
- object_key: str,
- *,
- version_id: Optional[str] = None,
- request_id: str = "",
- source_ip: str = "",
- user_identity: str = "",
- operation: str = "Delete",
- ) -> None:
- event = NotificationEvent(
- event_name=f"s3:ObjectRemoved:{operation}",
- bucket_name=bucket_name,
- object_key=object_key,
- version_id=version_id,
- request_id=request_id or uuid.uuid4().hex,
- source_ip=source_ip,
- user_identity=user_identity,
- )
- self.emit_event(event)
-
- def _worker_loop(self) -> None:
- while not self._shutdown.is_set():
- try:
- event, destination = self._queue.get(timeout=1.0)
- except queue.Empty:
- continue
-
- try:
- self._send_notification(event, destination)
- self._stats["events_sent"] += 1
- except Exception as e:
- self._stats["events_failed"] += 1
- logger.error(f"Failed to send notification: {e}")
- finally:
- self._queue.task_done()
-
- def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
- resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints)
- if not resolved_ip:
- raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}")
- payload = event.to_s3_event()
- headers = {"Content-Type": "application/json", **destination.headers}
-
- last_error = None
- for attempt in range(destination.retry_count):
- try:
- response = _pinned_post(
- destination.url,
- resolved_ip,
- json=payload,
- headers=headers,
- timeout=destination.timeout_seconds,
- )
- if response.status_code < 400:
- logger.info(
- f"Notification sent: {event.event_name} -> {destination.url} (status={response.status_code})"
- )
- return
- last_error = f"HTTP {response.status_code}: {response.text[:200]}"
- except requests.RequestException as e:
- last_error = str(e)
-
- if attempt < destination.retry_count - 1:
- time.sleep(destination.retry_delay_seconds * (attempt + 1))
-
- raise RuntimeError(f"Failed after {destination.retry_count} attempts: {last_error}")
-
- def get_stats(self) -> Dict[str, int]:
- return dict(self._stats)
-
- def shutdown(self) -> None:
- self._shutdown.set()
- for worker in self._workers:
- worker.join(timeout=5.0)
diff --git a/python/app/object_lock.py b/python/app/object_lock.py
deleted file mode 100644
index a5aab2c..0000000
--- a/python/app/object_lock.py
+++ /dev/null
@@ -1,234 +0,0 @@
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from enum import Enum
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-
-class RetentionMode(Enum):
- GOVERNANCE = "GOVERNANCE"
- COMPLIANCE = "COMPLIANCE"
-
-
-class ObjectLockError(Exception):
- pass
-
-
-@dataclass
-class ObjectLockRetention:
- mode: RetentionMode
- retain_until_date: datetime
-
- def to_dict(self) -> Dict[str, str]:
- return {
- "Mode": self.mode.value,
- "RetainUntilDate": self.retain_until_date.isoformat(),
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> Optional["ObjectLockRetention"]:
- if not data:
- return None
- mode_str = data.get("Mode")
- date_str = data.get("RetainUntilDate")
- if not mode_str or not date_str:
- return None
- try:
- mode = RetentionMode(mode_str)
- retain_until = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
- return cls(mode=mode, retain_until_date=retain_until)
- except (ValueError, KeyError):
- return None
-
- def is_expired(self) -> bool:
- return datetime.now(timezone.utc) > self.retain_until_date
-
-
-@dataclass
-class ObjectLockConfig:
- enabled: bool = False
- default_retention: Optional[ObjectLockRetention] = None
-
- def to_dict(self) -> Dict[str, Any]:
- result: Dict[str, Any] = {"ObjectLockEnabled": "Enabled" if self.enabled else "Disabled"}
- if self.default_retention:
- result["Rule"] = {
- "DefaultRetention": {
- "Mode": self.default_retention.mode.value,
- "Days": None,
- "Years": None,
- }
- }
- return result
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "ObjectLockConfig":
- enabled = data.get("ObjectLockEnabled") == "Enabled"
- default_retention = None
- rule = data.get("Rule")
- if rule and "DefaultRetention" in rule:
- dr = rule["DefaultRetention"]
- mode_str = dr.get("Mode", "GOVERNANCE")
- days = dr.get("Days")
- years = dr.get("Years")
- if days or years:
- from datetime import timedelta
- now = datetime.now(timezone.utc)
- if years:
- delta = timedelta(days=int(years) * 365)
- else:
- delta = timedelta(days=int(days))
- default_retention = ObjectLockRetention(
- mode=RetentionMode(mode_str),
- retain_until_date=now + delta,
- )
- return cls(enabled=enabled, default_retention=default_retention)
-
-
-class ObjectLockService:
- def __init__(self, storage_root: Path):
- self.storage_root = storage_root
- self._config_cache: Dict[str, ObjectLockConfig] = {}
-
- def _bucket_lock_config_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "object_lock.json"
-
- def _object_lock_meta_path(self, bucket_name: str, object_key: str) -> Path:
- safe_key = object_key.replace("/", "_").replace("\\", "_")
- return (
- self.storage_root / ".myfsio.sys" / "buckets" / bucket_name /
- "locks" / f"{safe_key}.lock.json"
- )
-
- def get_bucket_lock_config(self, bucket_name: str) -> ObjectLockConfig:
- if bucket_name in self._config_cache:
- return self._config_cache[bucket_name]
-
- config_path = self._bucket_lock_config_path(bucket_name)
- if not config_path.exists():
- return ObjectLockConfig(enabled=False)
-
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- config = ObjectLockConfig.from_dict(data)
- self._config_cache[bucket_name] = config
- return config
- except (json.JSONDecodeError, OSError):
- return ObjectLockConfig(enabled=False)
-
- def set_bucket_lock_config(self, bucket_name: str, config: ObjectLockConfig) -> None:
- config_path = self._bucket_lock_config_path(bucket_name)
- config_path.parent.mkdir(parents=True, exist_ok=True)
- config_path.write_text(json.dumps(config.to_dict()), encoding="utf-8")
- self._config_cache[bucket_name] = config
-
- def enable_bucket_lock(self, bucket_name: str) -> None:
- config = self.get_bucket_lock_config(bucket_name)
- config.enabled = True
- self.set_bucket_lock_config(bucket_name, config)
-
- def is_bucket_lock_enabled(self, bucket_name: str) -> bool:
- return self.get_bucket_lock_config(bucket_name).enabled
-
- def get_object_retention(self, bucket_name: str, object_key: str) -> Optional[ObjectLockRetention]:
- meta_path = self._object_lock_meta_path(bucket_name, object_key)
- if not meta_path.exists():
- return None
- try:
- data = json.loads(meta_path.read_text(encoding="utf-8"))
- return ObjectLockRetention.from_dict(data.get("retention", {}))
- except (json.JSONDecodeError, OSError):
- return None
-
- def set_object_retention(
- self,
- bucket_name: str,
- object_key: str,
- retention: ObjectLockRetention,
- bypass_governance: bool = False,
- ) -> None:
- existing = self.get_object_retention(bucket_name, object_key)
- if existing and not existing.is_expired():
- if existing.mode == RetentionMode.COMPLIANCE:
- raise ObjectLockError(
- "Cannot modify retention on object with COMPLIANCE mode until retention expires"
- )
- if existing.mode == RetentionMode.GOVERNANCE and not bypass_governance:
- raise ObjectLockError(
- "Cannot modify GOVERNANCE retention without bypass-governance permission"
- )
-
- meta_path = self._object_lock_meta_path(bucket_name, object_key)
- meta_path.parent.mkdir(parents=True, exist_ok=True)
-
- existing_data: Dict[str, Any] = {}
- if meta_path.exists():
- try:
- existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
- except (json.JSONDecodeError, OSError):
- pass
-
- existing_data["retention"] = retention.to_dict()
- meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
-
- def get_legal_hold(self, bucket_name: str, object_key: str) -> bool:
- meta_path = self._object_lock_meta_path(bucket_name, object_key)
- if not meta_path.exists():
- return False
- try:
- data = json.loads(meta_path.read_text(encoding="utf-8"))
- return data.get("legal_hold", False)
- except (json.JSONDecodeError, OSError):
- return False
-
- def set_legal_hold(self, bucket_name: str, object_key: str, enabled: bool) -> None:
- meta_path = self._object_lock_meta_path(bucket_name, object_key)
- meta_path.parent.mkdir(parents=True, exist_ok=True)
-
- existing_data: Dict[str, Any] = {}
- if meta_path.exists():
- try:
- existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
- except (json.JSONDecodeError, OSError):
- pass
-
- existing_data["legal_hold"] = enabled
- meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
-
- def can_delete_object(
- self,
- bucket_name: str,
- object_key: str,
- bypass_governance: bool = False,
- ) -> tuple[bool, str]:
- if self.get_legal_hold(bucket_name, object_key):
- return False, "Object is under legal hold"
-
- retention = self.get_object_retention(bucket_name, object_key)
- if retention and not retention.is_expired():
- if retention.mode == RetentionMode.COMPLIANCE:
- return False, f"Object is locked in COMPLIANCE mode until {retention.retain_until_date.isoformat()}"
- if retention.mode == RetentionMode.GOVERNANCE:
- if not bypass_governance:
- return False, f"Object is locked in GOVERNANCE mode until {retention.retain_until_date.isoformat()}"
-
- return True, ""
-
- def can_overwrite_object(
- self,
- bucket_name: str,
- object_key: str,
- bypass_governance: bool = False,
- ) -> tuple[bool, str]:
- return self.can_delete_object(bucket_name, object_key, bypass_governance)
-
- def delete_object_lock_metadata(self, bucket_name: str, object_key: str) -> None:
- meta_path = self._object_lock_meta_path(bucket_name, object_key)
- try:
- if meta_path.exists():
- meta_path.unlink()
- except OSError:
- pass
diff --git a/python/app/operation_metrics.py b/python/app/operation_metrics.py
deleted file mode 100644
index 0917d8e..0000000
--- a/python/app/operation_metrics.py
+++ /dev/null
@@ -1,296 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import random
-import threading
-import time
-from collections import defaultdict
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-MAX_LATENCY_SAMPLES = 5000
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class OperationStats:
- count: int = 0
- success_count: int = 0
- error_count: int = 0
- latency_sum_ms: float = 0.0
- latency_min_ms: float = float("inf")
- latency_max_ms: float = 0.0
- bytes_in: int = 0
- bytes_out: int = 0
- latency_samples: List[float] = field(default_factory=list)
-
- @staticmethod
- def _compute_percentile(sorted_data: List[float], p: float) -> float:
- if not sorted_data:
- return 0.0
- k = (len(sorted_data) - 1) * (p / 100.0)
- f = int(k)
- c = min(f + 1, len(sorted_data) - 1)
- d = k - f
- return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
-
- def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
- self.count += 1
- if success:
- self.success_count += 1
- else:
- self.error_count += 1
- self.latency_sum_ms += latency_ms
- if latency_ms < self.latency_min_ms:
- self.latency_min_ms = latency_ms
- if latency_ms > self.latency_max_ms:
- self.latency_max_ms = latency_ms
- self.bytes_in += bytes_in
- self.bytes_out += bytes_out
- if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
- self.latency_samples.append(latency_ms)
- else:
- j = random.randint(0, self.count - 1)
- if j < MAX_LATENCY_SAMPLES:
- self.latency_samples[j] = latency_ms
-
- def to_dict(self) -> Dict[str, Any]:
- avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
- min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
- sorted_latencies = sorted(self.latency_samples)
- return {
- "count": self.count,
- "success_count": self.success_count,
- "error_count": self.error_count,
- "latency_avg_ms": round(avg_latency, 2),
- "latency_min_ms": round(min_latency, 2),
- "latency_max_ms": round(self.latency_max_ms, 2),
- "latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
- "latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
- "latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
- "bytes_in": self.bytes_in,
- "bytes_out": self.bytes_out,
- }
-
- def merge(self, other: "OperationStats") -> None:
- self.count += other.count
- self.success_count += other.success_count
- self.error_count += other.error_count
- self.latency_sum_ms += other.latency_sum_ms
- if other.latency_min_ms < self.latency_min_ms:
- self.latency_min_ms = other.latency_min_ms
- if other.latency_max_ms > self.latency_max_ms:
- self.latency_max_ms = other.latency_max_ms
- self.bytes_in += other.bytes_in
- self.bytes_out += other.bytes_out
- combined = self.latency_samples + other.latency_samples
- if len(combined) > MAX_LATENCY_SAMPLES:
- random.shuffle(combined)
- combined = combined[:MAX_LATENCY_SAMPLES]
- self.latency_samples = combined
-
-
-@dataclass
-class MetricsSnapshot:
- timestamp: datetime
- window_seconds: int
- by_method: Dict[str, Dict[str, Any]]
- by_endpoint: Dict[str, Dict[str, Any]]
- by_status_class: Dict[str, int]
- error_codes: Dict[str, int]
- totals: Dict[str, Any]
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "timestamp": self.timestamp.isoformat(),
- "window_seconds": self.window_seconds,
- "by_method": self.by_method,
- "by_endpoint": self.by_endpoint,
- "by_status_class": self.by_status_class,
- "error_codes": self.error_codes,
- "totals": self.totals,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
- return cls(
- timestamp=datetime.fromisoformat(data["timestamp"]),
- window_seconds=data.get("window_seconds", 300),
- by_method=data.get("by_method", {}),
- by_endpoint=data.get("by_endpoint", {}),
- by_status_class=data.get("by_status_class", {}),
- error_codes=data.get("error_codes", {}),
- totals=data.get("totals", {}),
- )
-
-
-class OperationMetricsCollector:
- def __init__(
- self,
- storage_root: Path,
- interval_minutes: int = 5,
- retention_hours: int = 24,
- ):
- self.storage_root = storage_root
- self.interval_seconds = interval_minutes * 60
- self.retention_hours = retention_hours
- self._lock = threading.Lock()
- self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
- self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
- self._by_status_class: Dict[str, int] = {}
- self._error_codes: Dict[str, int] = {}
- self._totals = OperationStats()
- self._window_start = time.time()
- self._shutdown = threading.Event()
- self._snapshots: List[MetricsSnapshot] = []
-
- self._load_history()
-
- self._snapshot_thread = threading.Thread(
- target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
- )
- self._snapshot_thread.start()
-
- def _config_path(self) -> Path:
- return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
-
- def _load_history(self) -> None:
- config_path = self._config_path()
- if not config_path.exists():
- return
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- snapshots_data = data.get("snapshots", [])
- self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
- self._prune_old_snapshots()
- except (json.JSONDecodeError, OSError, KeyError) as e:
- logger.warning(f"Failed to load operation metrics history: {e}")
-
- def _save_history(self) -> None:
- config_path = self._config_path()
- config_path.parent.mkdir(parents=True, exist_ok=True)
- try:
- data = {"snapshots": [s.to_dict() for s in self._snapshots]}
- config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
- except OSError as e:
- logger.warning(f"Failed to save operation metrics history: {e}")
-
- def _prune_old_snapshots(self) -> None:
- if not self._snapshots:
- return
- cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
- self._snapshots = [
- s for s in self._snapshots if s.timestamp.timestamp() > cutoff
- ]
-
- def _snapshot_loop(self) -> None:
- while not self._shutdown.is_set():
- self._shutdown.wait(timeout=self.interval_seconds)
- if not self._shutdown.is_set():
- self._take_snapshot()
-
- def _take_snapshot(self) -> None:
- with self._lock:
- now = datetime.now(timezone.utc)
- window_seconds = int(time.time() - self._window_start)
-
- snapshot = MetricsSnapshot(
- timestamp=now,
- window_seconds=window_seconds,
- by_method={k: v.to_dict() for k, v in self._by_method.items()},
- by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
- by_status_class=dict(self._by_status_class),
- error_codes=dict(self._error_codes),
- totals=self._totals.to_dict(),
- )
-
- self._snapshots.append(snapshot)
- self._prune_old_snapshots()
- self._save_history()
-
- self._by_method = defaultdict(OperationStats)
- self._by_endpoint = defaultdict(OperationStats)
- self._by_status_class.clear()
- self._error_codes.clear()
- self._totals = OperationStats()
- self._window_start = time.time()
-
- def record_request(
- self,
- method: str,
- endpoint_type: str,
- status_code: int,
- latency_ms: float,
- bytes_in: int = 0,
- bytes_out: int = 0,
- error_code: Optional[str] = None,
- ) -> None:
- success = 200 <= status_code < 400
- status_class = f"{status_code // 100}xx"
-
- with self._lock:
- self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
- self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
-
- self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
-
- if error_code:
- self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
-
- self._totals.record(latency_ms, success, bytes_in, bytes_out)
-
- def get_current_stats(self) -> Dict[str, Any]:
- with self._lock:
- window_seconds = int(time.time() - self._window_start)
- return {
- "timestamp": datetime.now(timezone.utc).isoformat(),
- "window_seconds": window_seconds,
- "by_method": {k: v.to_dict() for k, v in self._by_method.items()},
- "by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
- "by_status_class": dict(self._by_status_class),
- "error_codes": dict(self._error_codes),
- "totals": self._totals.to_dict(),
- }
-
- def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
- with self._lock:
- snapshots = list(self._snapshots)
-
- if hours:
- cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
- snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
-
- return [s.to_dict() for s in snapshots]
-
- def shutdown(self) -> None:
- self._shutdown.set()
- self._take_snapshot()
- self._snapshot_thread.join(timeout=5.0)
-
-
-def classify_endpoint(path: str) -> str:
- if not path or path == "/":
- return "service"
-
- path = path.rstrip("/")
-
- if path.startswith("/ui"):
- return "ui"
-
- if path.startswith("/kms"):
- return "kms"
-
- if path.startswith("/myfsio"):
- return "service"
-
- parts = path.lstrip("/").split("/")
- if len(parts) == 0:
- return "service"
- elif len(parts) == 1:
- return "bucket"
- else:
- return "object"
diff --git a/python/app/replication.py b/python/app/replication.py
deleted file mode 100644
index ec2d113..0000000
--- a/python/app/replication.py
+++ /dev/null
@@ -1,667 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import mimetypes
-import threading
-import time
-from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import boto3
-from botocore.config import Config
-from botocore.exceptions import ClientError
-from boto3.exceptions import S3UploadFailedError
-
-from .connections import ConnectionStore, RemoteConnection
-from .storage import ObjectStorage, StorageError
-
-logger = logging.getLogger(__name__)
-
-REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
-
-REPLICATION_MODE_NEW_ONLY = "new_only"
-REPLICATION_MODE_ALL = "all"
-REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
-
-
-def _create_s3_client(
- connection: RemoteConnection,
- *,
- health_check: bool = False,
- connect_timeout: int = 5,
- read_timeout: int = 30,
- max_retries: int = 2,
-) -> Any:
- """Create a boto3 S3 client for the given connection.
- Args:
- connection: Remote S3 connection configuration
- health_check: If True, use minimal retries for quick health checks
- """
- config = Config(
- user_agent_extra=REPLICATION_USER_AGENT,
- connect_timeout=connect_timeout,
- read_timeout=read_timeout,
- retries={'max_attempts': 1 if health_check else max_retries},
- signature_version='s3v4',
- s3={'addressing_style': 'path'},
- request_checksum_calculation='when_required',
- response_checksum_validation='when_required',
- )
- return boto3.client(
- "s3",
- endpoint_url=connection.endpoint_url,
- aws_access_key_id=connection.access_key,
- aws_secret_access_key=connection.secret_key,
- region_name=connection.region or 'us-east-1',
- config=config,
- )
-
-
-@dataclass
-class ReplicationStats:
- """Statistics for replication operations - computed dynamically."""
- objects_synced: int = 0
- objects_pending: int = 0
- objects_orphaned: int = 0
- bytes_synced: int = 0
- last_sync_at: Optional[float] = None
- last_sync_key: Optional[str] = None
-
- def to_dict(self) -> dict:
- return {
- "objects_synced": self.objects_synced,
- "objects_pending": self.objects_pending,
- "objects_orphaned": self.objects_orphaned,
- "bytes_synced": self.bytes_synced,
- "last_sync_at": self.last_sync_at,
- "last_sync_key": self.last_sync_key,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> "ReplicationStats":
- return cls(
- objects_synced=data.get("objects_synced", 0),
- objects_pending=data.get("objects_pending", 0),
- objects_orphaned=data.get("objects_orphaned", 0),
- bytes_synced=data.get("bytes_synced", 0),
- last_sync_at=data.get("last_sync_at"),
- last_sync_key=data.get("last_sync_key"),
- )
-
-
-@dataclass
-class ReplicationFailure:
- object_key: str
- error_message: str
- timestamp: float
- failure_count: int
- bucket_name: str
- action: str
- last_error_code: Optional[str] = None
-
- def to_dict(self) -> dict:
- return {
- "object_key": self.object_key,
- "error_message": self.error_message,
- "timestamp": self.timestamp,
- "failure_count": self.failure_count,
- "bucket_name": self.bucket_name,
- "action": self.action,
- "last_error_code": self.last_error_code,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> "ReplicationFailure":
- return cls(
- object_key=data["object_key"],
- error_message=data["error_message"],
- timestamp=data["timestamp"],
- failure_count=data["failure_count"],
- bucket_name=data["bucket_name"],
- action=data["action"],
- last_error_code=data.get("last_error_code"),
- )
-
-
-@dataclass
-class ReplicationRule:
- bucket_name: str
- target_connection_id: str
- target_bucket: str
- enabled: bool = True
- mode: str = REPLICATION_MODE_NEW_ONLY
- created_at: Optional[float] = None
- stats: ReplicationStats = field(default_factory=ReplicationStats)
- sync_deletions: bool = True
- last_pull_at: Optional[float] = None
- filter_prefix: Optional[str] = None
-
- def to_dict(self) -> dict:
- return {
- "bucket_name": self.bucket_name,
- "target_connection_id": self.target_connection_id,
- "target_bucket": self.target_bucket,
- "enabled": self.enabled,
- "mode": self.mode,
- "created_at": self.created_at,
- "stats": self.stats.to_dict(),
- "sync_deletions": self.sync_deletions,
- "last_pull_at": self.last_pull_at,
- "filter_prefix": self.filter_prefix,
- }
-
- @classmethod
- def from_dict(cls, data: dict) -> "ReplicationRule":
- stats_data = data.pop("stats", {})
- if "mode" not in data:
- data["mode"] = REPLICATION_MODE_NEW_ONLY
- if "created_at" not in data:
- data["created_at"] = None
- if "sync_deletions" not in data:
- data["sync_deletions"] = True
- if "last_pull_at" not in data:
- data["last_pull_at"] = None
- if "filter_prefix" not in data:
- data["filter_prefix"] = None
- rule = cls(**data)
- rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
- return rule
-
-
-class ReplicationFailureStore:
- def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
- self.storage_root = storage_root
- self.max_failures_per_bucket = max_failures_per_bucket
- self._lock = threading.Lock()
- self._cache: Dict[str, List[ReplicationFailure]] = {}
-
- def _get_failures_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
-
- def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
- path = self._get_failures_path(bucket_name)
- if not path.exists():
- return []
- try:
- with open(path, "r") as f:
- data = json.load(f)
- return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])]
- except (OSError, ValueError, KeyError) as e:
- logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
- return []
-
- def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
- path = self._get_failures_path(bucket_name)
- path.parent.mkdir(parents=True, exist_ok=True)
- data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
- try:
- with open(path, "w") as f:
- json.dump(data, f, indent=2)
- except OSError as e:
- logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
-
- def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
- if bucket_name in self._cache:
- return list(self._cache[bucket_name])
- failures = self._load_from_disk(bucket_name)
- self._cache[bucket_name] = failures
- return list(failures)
-
- def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
- trimmed = failures[:self.max_failures_per_bucket]
- self._cache[bucket_name] = trimmed
- self._save_to_disk(bucket_name, trimmed)
-
- def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
- with self._lock:
- failures = self.load_failures(bucket_name)
- existing = next((f for f in failures if f.object_key == failure.object_key), None)
- if existing:
- existing.failure_count += 1
- existing.timestamp = failure.timestamp
- existing.error_message = failure.error_message
- existing.last_error_code = failure.last_error_code
- else:
- failures.insert(0, failure)
- self.save_failures(bucket_name, failures)
-
- def remove_failure(self, bucket_name: str, object_key: str) -> bool:
- with self._lock:
- failures = self.load_failures(bucket_name)
- original_len = len(failures)
- failures = [f for f in failures if f.object_key != object_key]
- if len(failures) < original_len:
- self.save_failures(bucket_name, failures)
- return True
- return False
-
- def clear_failures(self, bucket_name: str) -> None:
- with self._lock:
- self._cache.pop(bucket_name, None)
- path = self._get_failures_path(bucket_name)
- if path.exists():
- path.unlink()
-
- def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]:
- failures = self.load_failures(bucket_name)
- return next((f for f in failures if f.object_key == object_key), None)
-
- def get_failure_count(self, bucket_name: str) -> int:
- return len(self.load_failures(bucket_name))
-
-
-class ReplicationManager:
- def __init__(
- self,
- storage: ObjectStorage,
- connections: ConnectionStore,
- rules_path: Path,
- storage_root: Path,
- connect_timeout: int = 5,
- read_timeout: int = 30,
- max_retries: int = 2,
- streaming_threshold_bytes: int = 10 * 1024 * 1024,
- max_failures_per_bucket: int = 50,
- ) -> None:
- self.storage = storage
- self.connections = connections
- self.rules_path = rules_path
- self.storage_root = storage_root
- self.connect_timeout = connect_timeout
- self.read_timeout = read_timeout
- self.max_retries = max_retries
- self.streaming_threshold_bytes = streaming_threshold_bytes
- self._rules: Dict[str, ReplicationRule] = {}
- self._stats_lock = threading.Lock()
- self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
- self._shutdown = False
- self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
- self.reload_rules()
-
- def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
- """Create an S3 client with the manager's configured timeouts."""
- return _create_s3_client(
- connection,
- health_check=health_check,
- connect_timeout=self.connect_timeout,
- read_timeout=self.read_timeout,
- max_retries=self.max_retries,
- )
-
- def shutdown(self, wait: bool = True) -> None:
- """Shutdown the replication executor gracefully.
-
- Args:
- wait: If True, wait for pending tasks to complete
- """
- self._shutdown = True
- self._executor.shutdown(wait=wait)
- logger.info("Replication manager shut down")
-
- def reload_rules(self) -> None:
- if not self.rules_path.exists():
- self._rules = {}
- return
- try:
- with open(self.rules_path, "r") as f:
- data = json.load(f)
- for bucket, rule_data in data.items():
- self._rules[bucket] = ReplicationRule.from_dict(rule_data)
- except (OSError, ValueError) as e:
- logger.error(f"Failed to load replication rules: {e}")
-
- def save_rules(self) -> None:
- data = {b: rule.to_dict() for b, rule in self._rules.items()}
- self.rules_path.parent.mkdir(parents=True, exist_ok=True)
- with open(self.rules_path, "w") as f:
- json.dump(data, f, indent=2)
-
- def check_endpoint_health(self, connection: RemoteConnection) -> bool:
- """Check if a remote endpoint is reachable and responsive.
-
- Returns True if endpoint is healthy, False otherwise.
- Uses short timeouts to prevent blocking.
- """
- try:
- s3 = self._create_client(connection, health_check=True)
- s3.list_buckets()
- return True
- except Exception as e:
- logger.warning(f"Endpoint health check failed for {connection.name} ({connection.endpoint_url}): {e}")
- return False
-
- def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
- return self._rules.get(bucket_name)
-
- def list_rules(self) -> List[ReplicationRule]:
- return list(self._rules.values())
-
- def set_rule(self, rule: ReplicationRule) -> None:
- old_rule = self._rules.get(rule.bucket_name)
- was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
- self._rules[rule.bucket_name] = rule
- self.save_rules()
-
- if rule.mode == REPLICATION_MODE_ALL and rule.enabled and not was_all_mode:
- logger.info(f"Replication mode ALL enabled for {rule.bucket_name}, triggering sync of existing objects")
- self._executor.submit(self.replicate_existing_objects, rule.bucket_name)
-
- def delete_rule(self, bucket_name: str) -> None:
- if bucket_name in self._rules:
- del self._rules[bucket_name]
- self.save_rules()
-
- def _update_last_sync(self, bucket_name: str, object_key: str = "") -> None:
- """Update last sync timestamp after a successful operation."""
- with self._stats_lock:
- rule = self._rules.get(bucket_name)
- if not rule:
- return
- rule.stats.last_sync_at = time.time()
- rule.stats.last_sync_key = object_key
- self.save_rules()
-
- def get_sync_status(self, bucket_name: str) -> Optional[ReplicationStats]:
- """Dynamically compute replication status by comparing source and destination buckets."""
- rule = self.get_rule(bucket_name)
- if not rule:
- return None
-
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- return rule.stats
-
- try:
- source_objects = self.storage.list_objects_all(bucket_name)
- source_keys = {obj.key: obj.size for obj in source_objects}
-
- s3 = self._create_client(connection)
-
- dest_keys = set()
- bytes_synced = 0
- paginator = s3.get_paginator('list_objects_v2')
- try:
- for page in paginator.paginate(Bucket=rule.target_bucket):
- for obj in page.get('Contents', []):
- dest_keys.add(obj['Key'])
- if obj['Key'] in source_keys:
- bytes_synced += obj.get('Size', 0)
- except ClientError as e:
- if e.response['Error']['Code'] == 'NoSuchBucket':
- dest_keys = set()
- else:
- raise
-
- synced = source_keys.keys() & dest_keys
- orphaned = dest_keys - source_keys.keys()
-
- if rule.mode == REPLICATION_MODE_ALL:
- pending = source_keys.keys() - dest_keys
- else:
- pending = set()
-
- rule.stats.objects_synced = len(synced)
- rule.stats.objects_pending = len(pending)
- rule.stats.objects_orphaned = len(orphaned)
- rule.stats.bytes_synced = bytes_synced
-
- return rule.stats
-
- except (ClientError, StorageError) as e:
- logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
- return rule.stats
-
- def replicate_existing_objects(self, bucket_name: str) -> None:
- """Trigger replication for all existing objects in a bucket."""
- rule = self.get_rule(bucket_name)
- if not rule or not rule.enabled:
- return
-
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
- return
-
- if not self.check_endpoint_health(connection):
- logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
- return
-
- try:
- objects = self.storage.list_objects_all(bucket_name)
- logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
- for obj in objects:
- self._executor.submit(self._replicate_task, bucket_name, obj.key, rule, connection, "write")
- except StorageError as e:
- logger.error(f"Failed to list objects for replication: {e}")
-
- def create_remote_bucket(self, connection_id: str, bucket_name: str) -> None:
- """Create a bucket on the remote connection."""
- connection = self.connections.get(connection_id)
- if not connection:
- raise ValueError(f"Connection {connection_id} not found")
-
- try:
- s3 = self._create_client(connection)
- s3.create_bucket(Bucket=bucket_name)
- except ClientError as e:
- logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
- raise
-
- def trigger_replication(self, bucket_name: str, object_key: str, action: str = "write") -> None:
- rule = self.get_rule(bucket_name)
- if not rule or not rule.enabled:
- return
-
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
- return
-
- if not self.check_endpoint_health(connection):
- logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
- return
-
- self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
-
- def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
- if self._shutdown:
- return
-
- current_rule = self.get_rule(bucket_name)
- if not current_rule or not current_rule.enabled:
- logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
- return
-
- if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
- logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
- return
-
- try:
- from .storage import ObjectStorage
- ObjectStorage._sanitize_object_key(object_key)
- except StorageError as e:
- logger.error(f"Object key validation failed in replication: {e}")
- return
-
- try:
- s3 = self._create_client(conn)
-
- if action == "delete":
- try:
- s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
- logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
- self._update_last_sync(bucket_name, object_key)
- self.failure_store.remove_failure(bucket_name, object_key)
- except ClientError as e:
- error_code = e.response.get('Error', {}).get('Code')
- logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
- self.failure_store.add_failure(bucket_name, ReplicationFailure(
- object_key=object_key,
- error_message=str(e),
- timestamp=time.time(),
- failure_count=1,
- bucket_name=bucket_name,
- action="delete",
- last_error_code=error_code,
- ))
- return
-
- try:
- path = self.storage.get_object_path(bucket_name, object_key)
- except StorageError:
- logger.error(f"Source object not found: {bucket_name}/{object_key}")
- return
-
- content_type, _ = mimetypes.guess_type(path)
- file_size = path.stat().st_size
-
- logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
-
- def do_upload() -> None:
- """Upload object using appropriate method based on file size.
-
- For small files (< 10 MiB): Read into memory for simpler handling
- For large files: Use streaming upload to avoid memory issues
- """
- extra_args = {}
- if content_type:
- extra_args["ContentType"] = content_type
-
- if file_size >= self.streaming_threshold_bytes:
- s3.upload_file(
- str(path),
- rule.target_bucket,
- object_key,
- ExtraArgs=extra_args if extra_args else None,
- )
- else:
- file_content = path.read_bytes()
- put_kwargs = {
- "Bucket": rule.target_bucket,
- "Key": object_key,
- "Body": file_content,
- **extra_args,
- }
- s3.put_object(**put_kwargs)
-
- try:
- do_upload()
- except (ClientError, S3UploadFailedError) as e:
- error_code = None
- if isinstance(e, ClientError):
- error_code = e.response['Error']['Code']
- elif isinstance(e, S3UploadFailedError):
- if "NoSuchBucket" in str(e):
- error_code = 'NoSuchBucket'
-
- if error_code == 'NoSuchBucket':
- logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
- bucket_ready = False
- try:
- s3.create_bucket(Bucket=rule.target_bucket)
- bucket_ready = True
- logger.info(f"Created target bucket {rule.target_bucket}")
- except ClientError as bucket_err:
- if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'):
- logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)")
- bucket_ready = True
- else:
- logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}")
- raise e
-
- if bucket_ready:
- do_upload()
- else:
- raise e
-
- logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
- self._update_last_sync(bucket_name, object_key)
- self.failure_store.remove_failure(bucket_name, object_key)
-
- except (ClientError, OSError, ValueError) as e:
- error_code = None
- if isinstance(e, ClientError):
- error_code = e.response.get('Error', {}).get('Code')
- logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
- self.failure_store.add_failure(bucket_name, ReplicationFailure(
- object_key=object_key,
- error_message=str(e),
- timestamp=time.time(),
- failure_count=1,
- bucket_name=bucket_name,
- action=action,
- last_error_code=error_code,
- ))
- except Exception as e:
- logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
- self.failure_store.add_failure(bucket_name, ReplicationFailure(
- object_key=object_key,
- error_message=str(e),
- timestamp=time.time(),
- failure_count=1,
- bucket_name=bucket_name,
- action=action,
- last_error_code=None,
- ))
-
- def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]:
- failures = self.failure_store.load_failures(bucket_name)
- return failures[offset:offset + limit]
-
- def get_failure_count(self, bucket_name: str) -> int:
- return self.failure_store.get_failure_count(bucket_name)
-
- def retry_failed_item(self, bucket_name: str, object_key: str) -> bool:
- failure = self.failure_store.get_failure(bucket_name, object_key)
- if not failure:
- return False
-
- rule = self.get_rule(bucket_name)
- if not rule or not rule.enabled:
- return False
-
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
- return False
-
- if not self.check_endpoint_health(connection):
- logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
- return False
-
- self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action)
- return True
-
- def retry_all_failed(self, bucket_name: str) -> Dict[str, int]:
- failures = self.failure_store.load_failures(bucket_name)
- if not failures:
- return {"submitted": 0, "skipped": 0}
-
- rule = self.get_rule(bucket_name)
- if not rule or not rule.enabled:
- return {"submitted": 0, "skipped": len(failures)}
-
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
- return {"submitted": 0, "skipped": len(failures)}
-
- if not self.check_endpoint_health(connection):
- logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
- return {"submitted": 0, "skipped": len(failures)}
-
- submitted = 0
- for failure in failures:
- self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action)
- submitted += 1
-
- return {"submitted": submitted, "skipped": 0}
-
- def dismiss_failure(self, bucket_name: str, object_key: str) -> bool:
- return self.failure_store.remove_failure(bucket_name, object_key)
-
- def clear_failures(self, bucket_name: str) -> None:
- self.failure_store.clear_failures(bucket_name)
diff --git a/python/app/s3_api.py b/python/app/s3_api.py
deleted file mode 100644
index 97cfd0a..0000000
--- a/python/app/s3_api.py
+++ /dev/null
@@ -1,3975 +0,0 @@
-from __future__ import annotations
-
-import base64
-import hashlib
-import hmac
-import json
-import logging
-import mimetypes
-import re
-import threading
-import time
-import uuid
-from collections import OrderedDict
-from datetime import datetime, timedelta, timezone
-from typing import Any, Dict, Optional, Tuple
-from urllib.parse import quote, urlencode, urlparse, unquote
-from xml.etree.ElementTree import Element, SubElement, tostring, ParseError
-from defusedxml.ElementTree import fromstring
-
-try:
- import myfsio_core as _rc
- if not all(hasattr(_rc, f) for f in (
- "verify_sigv4_signature", "derive_signing_key", "clear_signing_key_cache",
- )):
- raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
- _HAS_RUST = True
-except ImportError:
- _rc = None
- _HAS_RUST = False
-
-from flask import Blueprint, Response, current_app, jsonify, request, g
-from werkzeug.http import http_date
-
-from .access_logging import AccessLoggingService, LoggingConfiguration
-from .acl import AclService
-from .bucket_policies import BucketPolicyStore
-from .encryption import SSECEncryption, SSECMetadata, EncryptionError
-from .extensions import limiter
-from .iam import IamError, Principal
-from .notifications import NotificationService, NotificationConfiguration, WebhookDestination
-from .object_lock import ObjectLockService, ObjectLockRetention, ObjectLockConfig, ObjectLockError, RetentionMode
-from .replication import ReplicationManager
-from .storage import ObjectStorage, StorageError, QuotaExceededError, BucketNotFoundError, ObjectNotFoundError
-
-logger = logging.getLogger(__name__)
-
-S3_NS = "http://s3.amazonaws.com/doc/2006-03-01/"
-
-_HEADER_CONTROL_CHARS = re.compile(r'[\r\n\x00-\x1f\x7f]')
-
-
-def _sanitize_header_value(value: str) -> str:
- return _HEADER_CONTROL_CHARS.sub('', value)
-
-
-MAX_XML_PAYLOAD_SIZE = 1048576 # 1 MB
-
-
-def _parse_xml_with_limit(payload: bytes) -> Element:
- """Parse XML payload with size limit to prevent DoS attacks."""
- max_size = current_app.config.get("MAX_XML_PAYLOAD_SIZE", MAX_XML_PAYLOAD_SIZE)
- if len(payload) > max_size:
- raise ParseError(f"XML payload exceeds maximum size of {max_size} bytes")
- return fromstring(payload)
-
-
-s3_api_bp = Blueprint("s3_api", __name__)
-
-def _storage() -> ObjectStorage:
- return current_app.extensions["object_storage"]
-
-
-def _acl() -> AclService:
- return current_app.extensions["acl"]
-
-
-def _iam():
- return current_app.extensions["iam"]
-
-
-def _replication_manager() -> ReplicationManager:
- return current_app.extensions["replication"]
-
-
-def _bucket_policies() -> BucketPolicyStore:
- store: BucketPolicyStore = current_app.extensions["bucket_policies"]
- store.maybe_reload()
- return store
-
-
-def _build_policy_context() -> Dict[str, Any]:
- cached = getattr(g, "_policy_context", None)
- if cached is not None:
- return cached
- ctx: Dict[str, Any] = {}
- if request.headers.get("Referer"):
- ctx["aws:Referer"] = request.headers.get("Referer")
- num_proxies = current_app.config.get("NUM_TRUSTED_PROXIES", 0)
- if num_proxies > 0 and request.access_route and len(request.access_route) > num_proxies:
- ctx["aws:SourceIp"] = request.access_route[-num_proxies]
- elif request.remote_addr:
- ctx["aws:SourceIp"] = request.remote_addr
- elif request.access_route:
- ctx["aws:SourceIp"] = request.access_route[0]
- ctx["aws:SecureTransport"] = str(request.is_secure).lower()
- if request.headers.get("User-Agent"):
- ctx["aws:UserAgent"] = request.headers.get("User-Agent")
- g._policy_context = ctx
- return ctx
-
-
-def _object_lock() -> ObjectLockService:
- return current_app.extensions["object_lock"]
-
-
-def _notifications() -> NotificationService:
- return current_app.extensions["notifications"]
-
-
-def _access_logging() -> AccessLoggingService:
- return current_app.extensions["access_logging"]
-
-
-def _get_list_buckets_limit() -> str:
- return current_app.config.get("RATELIMIT_LIST_BUCKETS", "60 per minute")
-
-
-def _get_bucket_ops_limit() -> str:
- return current_app.config.get("RATELIMIT_BUCKET_OPS", "120 per minute")
-
-
-def _get_object_ops_limit() -> str:
- return current_app.config.get("RATELIMIT_OBJECT_OPS", "240 per minute")
-
-
-def _get_head_ops_limit() -> str:
- return current_app.config.get("RATELIMIT_HEAD_OPS", "100 per minute")
-
-
-def _xml_response(element: Element, status: int = 200) -> Response:
- xml_bytes = tostring(element, encoding="utf-8")
- return Response(xml_bytes, status=status, mimetype="application/xml")
-
-
-def _error_response(code: str, message: str, status: int) -> Response:
- g.s3_error_code = code
- error = Element("Error")
- SubElement(error, "Code").text = code
- SubElement(error, "Message").text = message
- SubElement(error, "Resource").text = request.path
- SubElement(error, "RequestId").text = uuid.uuid4().hex
- return _xml_response(error, status)
-
-
-def _require_xml_content_type() -> Response | None:
- ct = request.headers.get("Content-Type", "")
- if ct and not ct.startswith(("application/xml", "text/xml")):
- return _error_response("InvalidRequest", "Content-Type must be application/xml or text/xml", 400)
- return None
-
-
-def _parse_range_header(range_header: str, file_size: int) -> list[tuple[int, int]] | None:
- if not range_header.startswith("bytes="):
- return None
- max_range_value = 2**63 - 1
- ranges = []
- range_spec = range_header[6:]
- for part in range_spec.split(","):
- part = part.strip()
- if not part:
- continue
- try:
- if part.startswith("-"):
- suffix_length = int(part[1:])
- if suffix_length <= 0 or suffix_length > max_range_value:
- return None
- start = max(0, file_size - suffix_length)
- end = file_size - 1
- elif part.endswith("-"):
- start = int(part[:-1])
- if start < 0 or start > max_range_value or start >= file_size:
- return None
- end = file_size - 1
- else:
- start_str, end_str = part.split("-", 1)
- start = int(start_str)
- end = int(end_str)
- if start < 0 or end < 0 or start > max_range_value or end > max_range_value:
- return None
- if start > end or start >= file_size:
- return None
- end = min(end, file_size - 1)
- except (ValueError, OverflowError):
- return None
- ranges.append((start, end))
- return ranges if ranges else None
-
-
-def _sign(key: bytes, msg: str) -> bytes:
- return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
-
-
-_SIGNING_KEY_CACHE: OrderedDict[Tuple[str, str, str, str], Tuple[bytes, float]] = OrderedDict()
-_SIGNING_KEY_CACHE_LOCK = threading.Lock()
-_SIGNING_KEY_CACHE_TTL = 60.0
-_SIGNING_KEY_CACHE_MAX_SIZE = 256
-
-_SIGV4_HEADER_RE = re.compile(
- r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)"
-)
-_SIGV4_REQUIRED_HEADERS = frozenset({'host', 'x-amz-date'})
-
-
-def clear_signing_key_cache() -> None:
- if _HAS_RUST:
- _rc.clear_signing_key_cache()
- with _SIGNING_KEY_CACHE_LOCK:
- _SIGNING_KEY_CACHE.clear()
-
-
-def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes:
- if _HAS_RUST:
- return bytes(_rc.derive_signing_key(key, date_stamp, region_name, service_name))
-
- cache_key = (key, date_stamp, region_name, service_name)
- now = time.time()
-
- with _SIGNING_KEY_CACHE_LOCK:
- cached = _SIGNING_KEY_CACHE.get(cache_key)
- if cached:
- signing_key, cached_time = cached
- if now - cached_time < _SIGNING_KEY_CACHE_TTL:
- _SIGNING_KEY_CACHE.move_to_end(cache_key)
- return signing_key
- else:
- del _SIGNING_KEY_CACHE[cache_key]
-
- k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp)
- k_region = _sign(k_date, region_name)
- k_service = _sign(k_region, service_name)
- k_signing = _sign(k_service, "aws4_request")
-
- with _SIGNING_KEY_CACHE_LOCK:
- if len(_SIGNING_KEY_CACHE) >= _SIGNING_KEY_CACHE_MAX_SIZE:
- _SIGNING_KEY_CACHE.popitem(last=False)
- _SIGNING_KEY_CACHE[cache_key] = (k_signing, now)
-
- return k_signing
-
-
-def _get_canonical_uri(req: Any) -> str:
- """Get the canonical URI for SigV4 signature verification.
-
- AWS SigV4 requires the canonical URI to be URL-encoded exactly as the client
- sent it. Flask/Werkzeug automatically URL-decodes request.path, so we need
- to get the raw path from the environ.
-
- The canonical URI should have each path segment URL-encoded (with '/' preserved),
- and the encoding should match what the client used when signing.
- """
- raw_uri = req.environ.get('RAW_URI') or req.environ.get('REQUEST_URI')
-
- if raw_uri:
- path = raw_uri.split('?')[0]
- return path
-
- return quote(req.path, safe="/-_.~")
-
-
-def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
- match = _SIGV4_HEADER_RE.match(auth_header)
- if not match:
- return None
-
- access_key, date_stamp, region, service, signed_headers_str, signature = match.groups()
- secret_key = _iam().get_secret_key(access_key)
- if not secret_key:
- raise IamError("SignatureDoesNotMatch")
-
- amz_date = req.headers.get("X-Amz-Date") or req.headers.get("Date")
- if not amz_date:
- raise IamError("Missing Date header")
-
- try:
- request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
- except ValueError:
- raise IamError("Invalid X-Amz-Date format")
-
- now = datetime.now(timezone.utc)
- time_diff = abs((now - request_time).total_seconds())
- tolerance = current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900)
- if time_diff > tolerance:
- raise IamError("Request timestamp too old or too far in the future")
-
- signed_headers_set = set(signed_headers_str.split(';'))
- if not _SIGV4_REQUIRED_HEADERS.issubset(signed_headers_set):
- if not ({'host', 'date'}.issubset(signed_headers_set)):
- raise IamError("Required headers not signed")
-
- canonical_uri = _get_canonical_uri(req)
- payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD"
-
- if _HAS_RUST:
- query_params = list(req.args.items(multi=True))
- header_values = []
- for h in signed_headers_str.split(";"):
- val = req.headers.get(h) or ""
- if h.lower() == "expect" and val == "":
- val = "100-continue"
- header_values.append((h, val))
- if not _rc.verify_sigv4_signature(
- req.method, canonical_uri, query_params, signed_headers_str,
- header_values, payload_hash, amz_date, date_stamp, region,
- service, secret_key, signature,
- ):
- raise IamError("SignatureDoesNotMatch")
- else:
- method = req.method
- query_args = sorted(req.args.items(multi=True), key=lambda x: (x[0], x[1]))
- canonical_query_parts = []
- for k, v in query_args:
- canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
- canonical_query_string = "&".join(canonical_query_parts)
-
- signed_headers_list = signed_headers_str.split(";")
- canonical_headers_parts = []
- for header in signed_headers_list:
- header_val = req.headers.get(header)
- if header_val is None:
- header_val = ""
- if header.lower() == 'expect' and header_val == "":
- header_val = "100-continue"
- header_val = " ".join(header_val.split())
- canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
- canonical_headers = "".join(canonical_headers_parts)
-
- canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
-
- credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
- signing_key = _get_signature_key(secret_key, date_stamp, region, service)
- string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
- calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
- if not hmac.compare_digest(calculated_signature, signature):
- raise IamError("SignatureDoesNotMatch")
-
- session_token = req.headers.get("X-Amz-Security-Token")
- if session_token:
- if not _iam().validate_session_token(access_key, session_token):
- raise IamError("InvalidToken")
-
- return _iam().get_principal(access_key)
-
-
-def _verify_sigv4_query(req: Any) -> Principal | None:
- credential = req.args.get("X-Amz-Credential")
- signed_headers_str = req.args.get("X-Amz-SignedHeaders")
- signature = req.args.get("X-Amz-Signature")
- amz_date = req.args.get("X-Amz-Date")
- expires = req.args.get("X-Amz-Expires")
-
- if not (credential and signed_headers_str and signature and amz_date and expires):
- return None
-
- try:
- access_key, date_stamp, region, service, _ = credential.split("/")
- except ValueError:
- raise IamError("Invalid Credential format")
-
- try:
- req_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
- except ValueError:
- raise IamError("Invalid Date format")
-
- now = datetime.now(timezone.utc)
- tolerance = timedelta(seconds=current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
- if req_time > now + tolerance:
- raise IamError("Request date is too far in the future")
- try:
- expires_seconds = int(expires)
- if expires_seconds <= 0:
- raise IamError("Invalid Expires value: must be positive")
- except ValueError:
- raise IamError("Invalid Expires value: must be an integer")
- min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)
- max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)
- if expires_seconds < min_expiry or expires_seconds > max_expiry:
- raise IamError(f"Expiration must be between {min_expiry} second(s) and {max_expiry} seconds")
- if now > req_time + timedelta(seconds=expires_seconds):
- raise IamError("Request expired")
-
- secret_key = _iam().get_secret_key(access_key)
- if not secret_key:
- raise IamError("Invalid access key")
-
- canonical_uri = _get_canonical_uri(req)
-
- if _HAS_RUST:
- query_params = [(k, v) for k, v in req.args.items(multi=True) if k != "X-Amz-Signature"]
- header_values = []
- for h in signed_headers_str.split(";"):
- val = req.headers.get(h) or ""
- if h.lower() == "expect" and val == "":
- val = "100-continue"
- header_values.append((h, val))
- if not _rc.verify_sigv4_signature(
- req.method, canonical_uri, query_params, signed_headers_str,
- header_values, "UNSIGNED-PAYLOAD", amz_date, date_stamp, region,
- service, secret_key, signature,
- ):
- raise IamError("SignatureDoesNotMatch")
- else:
- method = req.method
- query_args = []
- for key, value in req.args.items(multi=True):
- if key != "X-Amz-Signature":
- query_args.append((key, value))
- query_args.sort(key=lambda x: (x[0], x[1]))
-
- canonical_query_parts = []
- for k, v in query_args:
- canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
- canonical_query_string = "&".join(canonical_query_parts)
-
- signed_headers_list = signed_headers_str.split(";")
- canonical_headers_parts = []
- for header in signed_headers_list:
- val = req.headers.get(header, "").strip()
- if header.lower() == 'expect' and val == "":
- val = "100-continue"
- val = " ".join(val.split())
- canonical_headers_parts.append(f"{header.lower()}:{val}\n")
- canonical_headers = "".join(canonical_headers_parts)
-
- payload_hash = "UNSIGNED-PAYLOAD"
-
- canonical_request = "\n".join([
- method,
- canonical_uri,
- canonical_query_string,
- canonical_headers,
- signed_headers_str,
- payload_hash
- ])
-
- credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
- signing_key = _get_signature_key(secret_key, date_stamp, region, service)
- hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
- string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}"
- calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
- if not hmac.compare_digest(calculated_signature, signature):
- raise IamError("SignatureDoesNotMatch")
-
- session_token = req.args.get("X-Amz-Security-Token")
- if session_token:
- if not _iam().validate_session_token(access_key, session_token):
- raise IamError("InvalidToken")
-
- return _iam().get_principal(access_key)
-
-
-def _verify_sigv4(req: Any) -> Principal | None:
- auth_header = req.headers.get("Authorization")
- if auth_header and auth_header.startswith("AWS4-HMAC-SHA256"):
- return _verify_sigv4_header(req, auth_header)
-
- if req.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256":
- return _verify_sigv4_query(req)
-
- return None
-
-
-def _require_principal():
- sigv4_attempted = ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \
- (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256")
- if sigv4_attempted:
- try:
- principal = _verify_sigv4(request)
- if principal:
- return principal, None
- return None, _error_response("AccessDenied", "Signature verification failed", 403)
- except IamError as exc:
- return None, _error_response("AccessDenied", str(exc), 403)
- except (ValueError, TypeError):
- return None, _error_response("AccessDenied", "Signature verification failed", 403)
-
- access_key = request.headers.get("X-Access-Key")
- secret_key = request.headers.get("X-Secret-Key")
- if not access_key or not secret_key:
- return None, _error_response("AccessDenied", "Missing credentials", 403)
- try:
- principal = _iam().authenticate(access_key, secret_key)
- return principal, None
- except IamError as exc:
- return None, _error_response("AccessDenied", str(exc), 403)
-
-
-def _authorize_action(principal: Principal | None, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None:
- iam_allowed = False
- iam_error: IamError | None = None
- if principal is not None:
- try:
- _iam().authorize(principal, bucket_name, action, object_key=object_key)
- iam_allowed = True
- except IamError as exc:
- iam_error = exc
- else:
- iam_error = IamError("Missing credentials")
-
- policy_decision = None
- access_key = principal.access_key if principal else None
- if bucket_name:
- policy_context = _build_policy_context()
- policy_decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action, policy_context)
- if policy_decision == "deny":
- raise IamError("Access denied by bucket policy")
-
- if iam_allowed:
- return
- if policy_decision == "allow":
- return
-
- acl_allowed = False
- if bucket_name:
- acl_service = _acl()
- acl_allowed = acl_service.evaluate_bucket_acl(
- bucket_name,
- access_key,
- action,
- is_authenticated=principal is not None,
- )
- if acl_allowed:
- return
-
- raise iam_error or IamError("Access denied")
-
-
-def _object_principal(action: str, bucket_name: str, object_key: str):
- principal, error = _require_principal()
- try:
- _authorize_action(principal, bucket_name, action, object_key=object_key)
- return principal, None
- except IamError as exc:
- if not error:
- return None, _error_response("AccessDenied", str(exc), 403)
- return None, error
-
-
-def _canonical_uri(bucket_name: str, object_key: str | None) -> str:
- segments = [bucket_name]
- if object_key:
- segments.extend(object_key.split("/"))
- encoded = [quote(segment, safe="-_.~") for segment in segments]
- return "/" + "/".join(encoded)
-
-
-def _extract_request_metadata() -> Dict[str, str]:
- metadata: Dict[str, str] = {}
- for header, value in request.headers.items():
- if header.lower().startswith("x-amz-meta-"):
- key = header[11:]
- if key and not (key.startswith("__") and key.endswith("__")):
- metadata[key] = value
- return metadata
-
-
-def _derive_signing_key(secret: str, date_stamp: str, region: str, service: str) -> bytes:
- def _sign(key: bytes, msg: str) -> bytes:
- return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
-
- k_date = _sign(("AWS4" + secret).encode("utf-8"), date_stamp)
- k_region = _sign(k_date, region)
- k_service = _sign(k_region, service)
- return _sign(k_service, "aws4_request")
-
-
-def _generate_presigned_url(
- *,
- principal: Principal,
- secret_key: str,
- method: str,
- bucket_name: str,
- object_key: str,
- expires_in: int,
- api_base_url: str | None = None,
-) -> str:
- region = current_app.config["AWS_REGION"]
- service = current_app.config["AWS_SERVICE"]
- algorithm = "AWS4-HMAC-SHA256"
- now = datetime.now(timezone.utc)
- amz_date = now.strftime("%Y%m%dT%H%M%SZ")
- date_stamp = now.strftime("%Y%m%d")
- credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
- credential = f"{principal.access_key}/{credential_scope}"
-
- query_params = {
- "X-Amz-Algorithm": algorithm,
- "X-Amz-Credential": credential,
- "X-Amz-Date": amz_date,
- "X-Amz-Expires": str(expires_in),
- "X-Amz-SignedHeaders": "host",
- "X-Amz-Content-Sha256": "UNSIGNED-PAYLOAD",
- }
- canonical_query = _encode_query_params(query_params)
-
- api_base = api_base_url or current_app.config.get("API_BASE_URL")
- if api_base:
- parsed = urlparse(api_base)
- host = parsed.netloc
- scheme = parsed.scheme
- else:
- host = request.host
- scheme = request.scheme or "http"
-
- canonical_headers = f"host:{host}\n"
- canonical_request = "\n".join(
- [
- method,
- _canonical_uri(bucket_name, object_key),
- canonical_query,
- canonical_headers,
- "host",
- "UNSIGNED-PAYLOAD",
- ]
- )
- hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest()
- string_to_sign = "\n".join(
- [
- algorithm,
- amz_date,
- credential_scope,
- hashed_request,
- ]
- )
- signing_key = _derive_signing_key(secret_key, date_stamp, region, service)
- signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest()
- query_with_sig = canonical_query + f"&X-Amz-Signature={signature}"
- return f"{scheme}://{host}{_canonical_uri(bucket_name, object_key)}?{query_with_sig}"
-
-
-def _encode_query_params(params: dict[str, str]) -> str:
- parts = []
- for key in sorted(params.keys()):
- value = params[key]
- encoded_key = quote(str(key), safe="-_.~")
- encoded_value = quote(str(value), safe="-_.~")
- parts.append(f"{encoded_key}={encoded_value}")
- return "&".join(parts)
-
-
-def _strip_ns(tag: str | None) -> str:
- if not tag:
- return ""
- return tag.split("}")[-1]
-
-
-def _find_element(parent: Element, name: str) -> Optional[Element]:
- """Find a child element by name, trying S3 namespace then no namespace.
-
- This handles XML documents that may or may not include namespace prefixes.
- """
- el = parent.find(f"{{{S3_NS}}}{name}")
- if el is None:
- el = parent.find(name)
- return el
-
-
-def _find_element_text(parent: Element, name: str, default: str = "") -> str:
- """Find a child element and return its text content.
-
- Returns the default value if element not found or has no text.
- """
- el = _find_element(parent, name)
- if el is None or el.text is None:
- return default
- return el.text.strip()
-
-
-def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError as exc:
- raise ValueError("Malformed XML") from exc
- if _strip_ns(root.tag) != "Tagging":
- raise ValueError("Root element must be Tagging")
- tagset = root.find(".//{http://s3.amazonaws.com/doc/2006-03-01/}TagSet")
- if tagset is None:
- tagset = root.find("TagSet")
- if tagset is None:
- return []
- tags: list[dict[str, str]] = []
- for tag_el in list(tagset):
- if _strip_ns(tag_el.tag) != "Tag":
- continue
- key = _find_element_text(tag_el, "Key")
- if not key:
- continue
- value = _find_element_text(tag_el, "Value")
- tags.append({"Key": key, "Value": value})
- return tags
-
-
-def _render_tagging_document(tags: list[dict[str, str]]) -> Element:
- root = Element("Tagging")
- tagset_el = SubElement(root, "TagSet")
- for tag in tags:
- tag_el = SubElement(tagset_el, "Tag")
- SubElement(tag_el, "Key").text = tag.get("Key", "")
- SubElement(tag_el, "Value").text = tag.get("Value", "")
- return root
-
-DANGEROUS_CONTENT_TYPES = frozenset([
- "text/html",
- "application/xhtml+xml",
- "application/javascript",
- "text/javascript",
- "application/x-javascript",
- "text/ecmascript",
- "application/ecmascript",
- "image/svg+xml",
-])
-
-SAFE_EXTENSION_MAP = {
- ".txt": ["text/plain"],
- ".json": ["application/json"],
- ".xml": ["application/xml", "text/xml"],
- ".csv": ["text/csv"],
- ".pdf": ["application/pdf"],
- ".png": ["image/png"],
- ".jpg": ["image/jpeg"],
- ".jpeg": ["image/jpeg"],
- ".gif": ["image/gif"],
- ".webp": ["image/webp"],
- ".mp4": ["video/mp4"],
- ".mp3": ["audio/mpeg"],
- ".zip": ["application/zip"],
- ".gz": ["application/gzip"],
- ".tar": ["application/x-tar"],
-}
-
-
-def _validate_content_type(object_key: str, content_type: str | None) -> str | None:
- """Validate Content-Type header for security.
-
- Returns an error message if validation fails, None otherwise.
-
- Rules:
- 1. Block dangerous MIME types that can execute scripts (unless explicitly allowed)
- 2. Warn if Content-Type doesn't match file extension (but don't block)
- """
- if not content_type:
- return None
-
- base_type = content_type.split(";")[0].strip().lower()
-
- if base_type in DANGEROUS_CONTENT_TYPES:
- ext = "." + object_key.rsplit(".", 1)[-1].lower() if "." in object_key else ""
-
- allowed_dangerous = {
- ".svg": "image/svg+xml",
- ".html": "text/html",
- ".htm": "text/html",
- ".xhtml": "application/xhtml+xml",
- ".js": "application/javascript",
- ".mjs": "application/javascript",
- }
-
- if ext in allowed_dangerous and base_type == allowed_dangerous[ext]:
- return None
-
- return (
- f"Content-Type '{content_type}' is potentially dangerous and not allowed "
- f"for object key '{object_key}'. Use a safe Content-Type or rename the file "
- f"with an appropriate extension."
- )
-
- return None
-
-
-def _parse_cors_document(payload: bytes) -> list[dict[str, Any]]:
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError as exc:
- raise ValueError("Malformed XML") from exc
- if _strip_ns(root.tag) != "CORSConfiguration":
- raise ValueError("Root element must be CORSConfiguration")
- rules: list[dict[str, Any]] = []
- for rule_el in list(root):
- if _strip_ns(rule_el.tag) != "CORSRule":
- continue
- rule: dict[str, Any] = {
- "AllowedOrigins": [],
- "AllowedMethods": [],
- "AllowedHeaders": [],
- "ExposeHeaders": [],
- }
- for child in list(rule_el):
- name = _strip_ns(child.tag)
- if name == "AllowedOrigin":
- rule["AllowedOrigins"].append((child.text or ""))
- elif name == "AllowedMethod":
- rule["AllowedMethods"].append((child.text or ""))
- elif name == "AllowedHeader":
- rule["AllowedHeaders"].append((child.text or ""))
- elif name == "ExposeHeader":
- rule["ExposeHeaders"].append((child.text or ""))
- elif name == "MaxAgeSeconds":
- try:
- rule["MaxAgeSeconds"] = int(child.text or 0)
- except ValueError:
- raise ValueError("MaxAgeSeconds must be an integer") from None
- rules.append(rule)
- return rules
-
-
-def _render_cors_document(rules: list[dict[str, Any]]) -> Element:
- root = Element("CORSConfiguration")
- for rule in rules:
- rule_el = SubElement(root, "CORSRule")
- for origin in rule.get("AllowedOrigins", []):
- SubElement(rule_el, "AllowedOrigin").text = origin
- for method in rule.get("AllowedMethods", []):
- SubElement(rule_el, "AllowedMethod").text = method
- for header in rule.get("AllowedHeaders", []):
- SubElement(rule_el, "AllowedHeader").text = header
- for header in rule.get("ExposeHeaders", []):
- SubElement(rule_el, "ExposeHeader").text = header
- if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None:
- SubElement(rule_el, "MaxAgeSeconds").text = str(rule["MaxAgeSeconds"])
- return root
-
-
-def _parse_encryption_document(payload: bytes) -> dict[str, Any]:
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError as exc:
- raise ValueError("Malformed XML") from exc
- if _strip_ns(root.tag) != "ServerSideEncryptionConfiguration":
- raise ValueError("Root element must be ServerSideEncryptionConfiguration")
- rules: list[dict[str, Any]] = []
- for rule_el in list(root):
- if _strip_ns(rule_el.tag) != "Rule":
- continue
- default_el = None
- bucket_key_el = None
- for child in list(rule_el):
- name = _strip_ns(child.tag)
- if name == "ApplyServerSideEncryptionByDefault":
- default_el = child
- elif name == "BucketKeyEnabled":
- bucket_key_el = child
- if default_el is None:
- continue
- algo_el = default_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}SSEAlgorithm")
- if algo_el is None:
- algo_el = default_el.find("SSEAlgorithm")
- if algo_el is None or not (algo_el.text or "").strip():
- raise ValueError("SSEAlgorithm is required")
- rule: dict[str, Any] = {"SSEAlgorithm": algo_el.text.strip()}
- kms_el = default_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}KMSMasterKeyID")
- if kms_el is None:
- kms_el = default_el.find("KMSMasterKeyID")
- if kms_el is not None and kms_el.text:
- rule["KMSMasterKeyID"] = kms_el.text.strip()
- if bucket_key_el is not None and bucket_key_el.text:
- rule["BucketKeyEnabled"] = bucket_key_el.text.strip().lower() in {"true", "1"}
- rules.append(rule)
- if not rules:
- raise ValueError("At least one Rule is required")
- return {"Rules": rules}
-
-
-def _render_encryption_document(config: dict[str, Any]) -> Element:
- root = Element("ServerSideEncryptionConfiguration")
- for rule in config.get("Rules", []):
- rule_el = SubElement(root, "Rule")
- default_el = SubElement(rule_el, "ApplyServerSideEncryptionByDefault")
- SubElement(default_el, "SSEAlgorithm").text = rule.get("SSEAlgorithm", "")
- if rule.get("KMSMasterKeyID"):
- SubElement(default_el, "KMSMasterKeyID").text = rule["KMSMasterKeyID"]
- if "BucketKeyEnabled" in rule:
- SubElement(rule_el, "BucketKeyEnabled").text = "true" if rule["BucketKeyEnabled"] else "false"
- return root
-
-
-def _stream_file(path, chunk_size: int = 1024 * 1024):
- with path.open("rb") as handle:
- while True:
- chunk = handle.read(chunk_size)
- if not chunk:
- break
- yield chunk
-
-
-def _method_not_allowed(allowed: list[str]) -> Response:
- response = _error_response(
- "MethodNotAllowed",
- "The specified method is not allowed for this resource",
- 405,
- )
- response.headers["Allow"] = ", ".join(sorted({method.upper() for method in allowed}))
- return response
-
-
-def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None:
- from email.utils import parsedate_to_datetime
-
- if_match = request.headers.get("If-Match")
- if if_match:
- if if_match.strip() != "*":
- match_etags = [e.strip().strip('"') for e in if_match.split(",")]
- if etag not in match_etags:
- return Response(status=412)
-
- if_unmodified = request.headers.get("If-Unmodified-Since")
- if not if_match and if_unmodified and last_modified is not None:
- try:
- dt = parsedate_to_datetime(if_unmodified)
- obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
- if obj_dt > dt:
- return Response(status=412)
- except (TypeError, ValueError):
- pass
-
- if_none_match = request.headers.get("If-None-Match")
- if if_none_match:
- if if_none_match.strip() == "*":
- resp = Response(status=304)
- resp.headers["ETag"] = f'"{etag}"'
- if last_modified is not None:
- resp.headers["Last-Modified"] = http_date(last_modified)
- return resp
- none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")]
- if etag in none_match_etags:
- resp = Response(status=304)
- resp.headers["ETag"] = f'"{etag}"'
- if last_modified is not None:
- resp.headers["Last-Modified"] = http_date(last_modified)
- return resp
-
- if_modified = request.headers.get("If-Modified-Since")
- if not if_none_match and if_modified and last_modified is not None:
- try:
- dt = parsedate_to_datetime(if_modified)
- obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
- if obj_dt <= dt:
- resp = Response(status=304)
- resp.headers["ETag"] = f'"{etag}"'
- resp.headers["Last-Modified"] = http_date(last_modified)
- return resp
- except (TypeError, ValueError):
- pass
-
- return None
-
-
-def _apply_object_headers(
- response: Response,
- *,
- file_stat,
- metadata: Dict[str, str] | None,
- etag: str,
- size_override: int | None = None,
- mtime_override: float | None = None,
-) -> None:
- effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None)
- effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None)
- if effective_size is not None and response.status_code != 206:
- response.headers["Content-Length"] = str(effective_size)
- if effective_mtime is not None:
- response.headers["Last-Modified"] = http_date(effective_mtime)
- response.headers["ETag"] = f'"{etag}"'
- response.headers["Accept-Ranges"] = "bytes"
- for key, value in (metadata or {}).items():
- if key.startswith("__") and key.endswith("__"):
- continue
- safe_value = _sanitize_header_value(str(value))
- response.headers[f"X-Amz-Meta-{key}"] = safe_value
-
-
-def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
- handlers = {
- "versioning": _bucket_versioning_handler,
- "tagging": _bucket_tagging_handler,
- "cors": _bucket_cors_handler,
- "encryption": _bucket_encryption_handler,
- "location": _bucket_location_handler,
- "acl": _bucket_acl_handler,
- "versions": _bucket_list_versions_handler,
- "lifecycle": _bucket_lifecycle_handler,
- "quota": _bucket_quota_handler,
- "object-lock": _bucket_object_lock_handler,
- "notification": _bucket_notification_handler,
- "logging": _bucket_logging_handler,
- "uploads": _bucket_uploads_handler,
- "policy": _bucket_policy_handler,
- "policyStatus": _bucket_policy_status_handler,
- "replication": _bucket_replication_handler,
- "website": _bucket_website_handler,
- }
- requested = [key for key in handlers if key in request.args]
- if not requested:
- return None
- if len(requested) > 1:
- return _error_response(
- "InvalidRequest",
- "Only a single bucket subresource can be requested at a time",
- 400,
- )
- handler = handlers[requested[0]]
- return handler(bucket_name)
-
-
-def _bucket_versioning_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "versioning")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
-
- if request.method == "PUT":
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
- if _strip_ns(root.tag) != "VersioningConfiguration":
- return _error_response("MalformedXML", "Root element must be VersioningConfiguration", 400)
- status_el = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}Status")
- if status_el is None:
- status_el = root.find("Status")
- status = (status_el.text or "").strip() if status_el is not None else ""
- if status not in {"Enabled", "Suspended", ""}:
- return _error_response("MalformedXML", "Status must be Enabled or Suspended", 400)
- try:
- storage.set_bucket_versioning(bucket_name, status == "Enabled")
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status})
- return Response(status=200)
-
- try:
- enabled = storage.is_versioning_enabled(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- root = Element("VersioningConfiguration")
- SubElement(root, "Status").text = "Enabled" if enabled else "Suspended"
- return _xml_response(root)
-
-
-def _bucket_tagging_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "tagging")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if request.method == "GET":
- try:
- tags = storage.get_bucket_tags(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- if not tags:
- return _error_response("NoSuchTagSet", "No tags are configured for this bucket", 404)
- return _xml_response(_render_tagging_document(tags))
- if request.method == "DELETE":
- try:
- storage.set_bucket_tags(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- try:
- tags = _parse_tagging_document(payload)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- tag_limit = current_app.config.get("OBJECT_TAG_LIMIT", 50)
- if len(tags) > tag_limit:
- return _error_response("InvalidTag", f"A maximum of {tag_limit} tags is supported", 400)
- try:
- storage.set_bucket_tags(bucket_name, tags)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket tags updated", extra={"bucket": bucket_name, "tags": len(tags)})
- return Response(status=204)
-
-
-def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
- """Handle object tagging operations (GET/PUT/DELETE //?tagging)."""
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- principal, error = _require_principal()
- if error:
- return error
-
- action = "read" if request.method == "GET" else "write"
- try:
- _authorize_action(principal, bucket_name, action, object_key=object_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
-
- if request.method == "GET":
- try:
- tags = storage.get_object_tags(bucket_name, object_key)
- except BucketNotFoundError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- except ObjectNotFoundError as exc:
- return _error_response("NoSuchKey", str(exc), 404)
- except StorageError as exc:
- return _error_response("InternalError", str(exc), 500)
- return _xml_response(_render_tagging_document(tags))
-
- if request.method == "DELETE":
- try:
- storage.delete_object_tags(bucket_name, object_key)
- except BucketNotFoundError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- except ObjectNotFoundError as exc:
- return _error_response("NoSuchKey", str(exc), 404)
- except StorageError as exc:
- return _error_response("InternalError", str(exc), 500)
- current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- try:
- tags = _parse_tagging_document(payload)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- if len(tags) > 10:
- return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
- try:
- storage.set_object_tags(bucket_name, object_key, tags)
- except BucketNotFoundError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- except ObjectNotFoundError as exc:
- return _error_response("NoSuchKey", str(exc), 404)
- except StorageError as exc:
- return _error_response("InternalError", str(exc), 500)
- current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
- return Response(status=204)
-
-
-def _validate_cors_origin(origin: str) -> bool:
- """Validate a CORS origin pattern."""
- import re
- origin = origin.strip()
- if not origin:
- return False
- if origin == "*":
- return True
- if origin.startswith("*."):
- domain = origin[2:]
- if not domain or ".." in domain:
- return False
- return bool(re.match(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$', domain))
- if origin.startswith(("http://", "https://")):
- try:
- from urllib.parse import urlparse
- parsed = urlparse(origin)
- if not parsed.netloc:
- return False
- if parsed.path and parsed.path != "/":
- return False
- return True
- except Exception:
- return False
- return False
-
-
-def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]:
- sanitized: list[dict[str, Any]] = []
- for rule in rules:
- allowed_origins = [origin.strip() for origin in rule.get("AllowedOrigins", []) if origin and origin.strip()]
- allowed_methods = [method.strip().upper() for method in rule.get("AllowedMethods", []) if method and method.strip()]
- allowed_headers = [header.strip() for header in rule.get("AllowedHeaders", []) if header and header.strip()]
- expose_headers = [header.strip() for header in rule.get("ExposeHeaders", []) if header and header.strip()]
- if not allowed_origins or not allowed_methods:
- raise ValueError("Each CORSRule must include AllowedOrigin and AllowedMethod entries")
- for origin in allowed_origins:
- if not _validate_cors_origin(origin):
- raise ValueError(f"Invalid CORS origin: {origin}")
- valid_methods = {"GET", "PUT", "POST", "DELETE", "HEAD"}
- for method in allowed_methods:
- if method not in valid_methods:
- raise ValueError(f"Invalid CORS method: {method}")
- sanitized_rule: dict[str, Any] = {
- "AllowedOrigins": allowed_origins,
- "AllowedMethods": allowed_methods,
- }
- if allowed_headers:
- sanitized_rule["AllowedHeaders"] = allowed_headers
- if expose_headers:
- sanitized_rule["ExposeHeaders"] = expose_headers
- if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None:
- sanitized_rule["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"])
- sanitized.append(sanitized_rule)
- return sanitized
-
-
-def _bucket_cors_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "cors")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if request.method == "GET":
- try:
- rules = storage.get_bucket_cors(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- if not rules:
- return _error_response("NoSuchCORSConfiguration", "No CORS configuration found", 404)
- return _xml_response(_render_cors_document(rules))
- if request.method == "DELETE":
- try:
- storage.set_bucket_cors(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket CORS deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- try:
- storage.set_bucket_cors(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket CORS cleared", extra={"bucket": bucket_name})
- return Response(status=204)
- try:
- rules = _parse_cors_document(payload)
- sanitized = _sanitize_cors_rules(rules)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- if not sanitized:
- return _error_response("InvalidRequest", "At least one CORSRule must be supplied", 400)
- try:
- storage.set_bucket_cors(bucket_name, sanitized)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket CORS updated", extra={"bucket": bucket_name, "rules": len(sanitized)})
- return Response(status=204)
-
-
-def _bucket_encryption_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "encryption")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if request.method == "GET":
- try:
- config = storage.get_bucket_encryption(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- if not config:
- return _error_response(
- "ServerSideEncryptionConfigurationNotFoundError",
- "No server-side encryption configuration found",
- 404,
- )
- return _xml_response(_render_encryption_document(config))
- if request.method == "DELETE":
- try:
- storage.set_bucket_encryption(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket encryption deleted", extra={"bucket": bucket_name})
- return Response(status=204)
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- try:
- storage.set_bucket_encryption(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket encryption cleared", extra={"bucket": bucket_name})
- return Response(status=204)
- try:
- config = _parse_encryption_document(payload)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- try:
- storage.set_bucket_encryption(bucket_name, config)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket encryption updated", extra={"bucket": bucket_name})
- return Response(status=204)
-
-
-def _bucket_location_handler(bucket_name: str) -> Response:
- if request.method != "GET":
- return _method_not_allowed(["GET"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "list")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- region = current_app.config.get("AWS_REGION", "us-east-1")
- root = Element("LocationConstraint")
- root.text = region if region != "us-east-1" else None
- return _xml_response(root)
-
-
-def _bucket_acl_handler(bucket_name: str) -> Response:
- from .acl import create_canned_acl, Acl, AclGrant, GRANTEE_ALL_USERS, GRANTEE_AUTHENTICATED_USERS
-
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "share")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- acl_service = _acl()
- owner_id = principal.access_key if principal else "anonymous"
-
- if request.method == "PUT":
- canned_acl = request.headers.get("x-amz-acl", "private")
- acl = acl_service.set_bucket_canned_acl(bucket_name, canned_acl, owner_id)
- current_app.logger.info("Bucket ACL set", extra={"bucket": bucket_name, "acl": canned_acl})
- return Response(status=200)
-
- acl = acl_service.get_bucket_acl(bucket_name)
- if not acl:
- acl = create_canned_acl("private", owner_id)
-
- root = Element("AccessControlPolicy")
- owner_el = SubElement(root, "Owner")
- SubElement(owner_el, "ID").text = acl.owner
- SubElement(owner_el, "DisplayName").text = acl.owner
-
- acl_el = SubElement(root, "AccessControlList")
- for grant in acl.grants:
- grant_el = SubElement(acl_el, "Grant")
- grantee = SubElement(grant_el, "Grantee")
- if grant.grantee == GRANTEE_ALL_USERS:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
- SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AllUsers"
- elif grant.grantee == GRANTEE_AUTHENTICATED_USERS:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
- SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AuthenticatedUsers"
- else:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser")
- SubElement(grantee, "ID").text = grant.grantee
- SubElement(grantee, "DisplayName").text = grant.grantee
- SubElement(grant_el, "Permission").text = grant.permission
-
- return _xml_response(root)
-
-
-def _object_acl_handler(bucket_name: str, object_key: str) -> Response:
- from .acl import create_canned_acl, GRANTEE_ALL_USERS, GRANTEE_AUTHENTICATED_USERS
-
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
- storage = _storage()
- try:
- path = storage.get_object_path(bucket_name, object_key)
- except (StorageError, FileNotFoundError):
- return _error_response("NoSuchKey", "Object not found", 404)
-
- if request.method == "PUT":
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
- owner_id = principal.access_key if principal else "anonymous"
- canned_acl = request.headers.get("x-amz-acl", "private")
- acl = create_canned_acl(canned_acl, owner_id)
- acl_service = _acl()
- metadata = storage.get_object_metadata(bucket_name, object_key)
- metadata.update(acl_service.create_object_acl_metadata(acl))
- safe_key = storage._sanitize_object_key(object_key, storage._object_key_max_length_bytes)
- storage._write_metadata(bucket_name, safe_key, metadata)
- current_app.logger.info("Object ACL set", extra={"bucket": bucket_name, "key": object_key, "acl": canned_acl})
- return Response(status=200)
-
- principal, error = _object_principal("read", bucket_name, object_key)
- if error:
- return error
- owner_id = principal.access_key if principal else "anonymous"
- acl_service = _acl()
- metadata = storage.get_object_metadata(bucket_name, object_key)
- acl = acl_service.get_object_acl(bucket_name, object_key, metadata)
- if not acl:
- acl = create_canned_acl("private", owner_id)
-
- root = Element("AccessControlPolicy")
- owner_el = SubElement(root, "Owner")
- SubElement(owner_el, "ID").text = acl.owner
- SubElement(owner_el, "DisplayName").text = acl.owner
- acl_el = SubElement(root, "AccessControlList")
- for grant in acl.grants:
- grant_el = SubElement(acl_el, "Grant")
- grantee = SubElement(grant_el, "Grantee")
- if grant.grantee == GRANTEE_ALL_USERS:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
- SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AllUsers"
- elif grant.grantee == GRANTEE_AUTHENTICATED_USERS:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "Group")
- SubElement(grantee, "URI").text = "http://acs.amazonaws.com/groups/global/AuthenticatedUsers"
- else:
- grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser")
- SubElement(grantee, "ID").text = grant.grantee
- SubElement(grantee, "DisplayName").text = grant.grantee
- SubElement(grant_el, "Permission").text = grant.permission
- return _xml_response(root)
-
-
-def _object_attributes_handler(bucket_name: str, object_key: str) -> Response:
- if request.method != "GET":
- return _method_not_allowed(["GET"])
- principal, error = _object_principal("read", bucket_name, object_key)
- if error:
- return error
- storage = _storage()
- try:
- path = storage.get_object_path(bucket_name, object_key)
- file_stat = path.stat()
- metadata = storage.get_object_metadata(bucket_name, object_key)
- except (StorageError, FileNotFoundError):
- return _error_response("NoSuchKey", "Object not found", 404)
-
- requested = request.headers.get("x-amz-object-attributes", "")
- attrs = {a.strip() for a in requested.split(",") if a.strip()}
-
- root = Element("GetObjectAttributesResponse")
- if "ETag" in attrs:
- etag = metadata.get("__etag__") or storage._compute_etag(path)
- SubElement(root, "ETag").text = etag
- if "StorageClass" in attrs:
- SubElement(root, "StorageClass").text = "STANDARD"
- if "ObjectSize" in attrs:
- SubElement(root, "ObjectSize").text = str(file_stat.st_size)
- if "Checksum" in attrs:
- SubElement(root, "Checksum")
- if "ObjectParts" in attrs:
- SubElement(root, "ObjectParts")
-
- response = _xml_response(root)
- response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
- return response
-
-
-def _bucket_list_versions_handler(bucket_name: str) -> Response:
- """Handle ListObjectVersions (GET /?versions)."""
- if request.method != "GET":
- return _method_not_allowed(["GET"])
-
- principal, error = _require_principal()
- try:
- _authorize_action(principal, bucket_name, "list")
- except IamError as exc:
- if error:
- return error
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
-
- try:
- objects = storage.list_objects_all(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
-
- prefix = request.args.get("prefix", "")
- delimiter = request.args.get("delimiter", "")
- try:
- max_keys = int(request.args.get("max-keys", 1000))
- if max_keys < 1:
- return _error_response("InvalidArgument", "max-keys must be a positive integer", 400)
- max_keys = min(max_keys, 1000)
- except ValueError:
- return _error_response("InvalidArgument", "max-keys must be an integer", 400)
- key_marker = request.args.get("key-marker", "")
-
- if prefix:
- objects = [obj for obj in objects if obj.key.startswith(prefix)]
-
- if key_marker:
- objects = [obj for obj in objects if obj.key > key_marker]
-
- root = Element("ListVersionsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- SubElement(root, "Name").text = bucket_name
- SubElement(root, "Prefix").text = prefix
- SubElement(root, "KeyMarker").text = key_marker
- SubElement(root, "MaxKeys").text = str(max_keys)
- if delimiter:
- SubElement(root, "Delimiter").text = delimiter
-
- version_count = 0
- is_truncated = False
- next_key_marker = ""
-
- for obj in objects:
- if version_count >= max_keys:
- is_truncated = True
- break
-
- version = SubElement(root, "Version")
- SubElement(version, "Key").text = obj.key
- SubElement(version, "VersionId").text = "null"
- SubElement(version, "IsLatest").text = "true"
- SubElement(version, "LastModified").text = obj.last_modified.strftime("%Y-%m-%dT%H:%M:%S.000Z")
- if obj.etag:
- SubElement(version, "ETag").text = f'"{obj.etag}"'
- SubElement(version, "Size").text = str(obj.size)
- SubElement(version, "StorageClass").text = "STANDARD"
-
- owner = SubElement(version, "Owner")
- SubElement(owner, "ID").text = "local-owner"
- SubElement(owner, "DisplayName").text = "Local Owner"
-
- version_count += 1
- next_key_marker = obj.key
-
- try:
- versions = storage.list_object_versions(bucket_name, obj.key)
- for v in versions:
- if version_count >= max_keys:
- is_truncated = True
- break
-
- ver_elem = SubElement(root, "Version")
- SubElement(ver_elem, "Key").text = obj.key
- SubElement(ver_elem, "VersionId").text = v.get("version_id", "unknown")
- SubElement(ver_elem, "IsLatest").text = "false"
- SubElement(ver_elem, "LastModified").text = v.get("archived_at") or "1970-01-01T00:00:00Z"
- SubElement(ver_elem, "ETag").text = f'"{v.get("etag", "")}"'
- SubElement(ver_elem, "Size").text = str(v.get("size", 0))
- SubElement(ver_elem, "StorageClass").text = "STANDARD"
-
- owner = SubElement(ver_elem, "Owner")
- SubElement(owner, "ID").text = "local-owner"
- SubElement(owner, "DisplayName").text = "Local Owner"
-
- version_count += 1
- except StorageError:
- pass
-
- SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
- if is_truncated and next_key_marker:
- SubElement(root, "NextKeyMarker").text = next_key_marker
-
- return _xml_response(root)
-
-
-def _bucket_lifecycle_handler(bucket_name: str) -> Response:
- """Handle bucket lifecycle configuration (GET/PUT/DELETE /?lifecycle)."""
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "lifecycle")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
-
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- if request.method == "GET":
- config = storage.get_bucket_lifecycle(bucket_name)
- if not config:
- return _error_response("NoSuchLifecycleConfiguration", "The lifecycle configuration does not exist", 404)
- return _xml_response(_render_lifecycle_config(config))
-
- if request.method == "DELETE":
- storage.set_bucket_lifecycle(bucket_name, None)
- current_app.logger.info("Bucket lifecycle deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
- try:
- config = _parse_lifecycle_config(payload)
- storage.set_bucket_lifecycle(bucket_name, config)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
-
- current_app.logger.info("Bucket lifecycle updated", extra={"bucket": bucket_name})
- return Response(status=200)
-
-
-def _render_lifecycle_config(config: list) -> Element:
- """Render lifecycle configuration to XML."""
- root = Element("LifecycleConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- for rule in config:
- rule_el = SubElement(root, "Rule")
- SubElement(rule_el, "ID").text = rule.get("ID", "")
-
- filter_el = SubElement(rule_el, "Filter")
- if rule.get("Prefix"):
- SubElement(filter_el, "Prefix").text = rule.get("Prefix", "")
-
- SubElement(rule_el, "Status").text = rule.get("Status", "Enabled")
-
- if "Expiration" in rule:
- exp = rule["Expiration"]
- exp_el = SubElement(rule_el, "Expiration")
- if "Days" in exp:
- SubElement(exp_el, "Days").text = str(exp["Days"])
- if "Date" in exp:
- SubElement(exp_el, "Date").text = exp["Date"]
- if exp.get("ExpiredObjectDeleteMarker"):
- SubElement(exp_el, "ExpiredObjectDeleteMarker").text = "true"
-
- if "NoncurrentVersionExpiration" in rule:
- nve = rule["NoncurrentVersionExpiration"]
- nve_el = SubElement(rule_el, "NoncurrentVersionExpiration")
- if "NoncurrentDays" in nve:
- SubElement(nve_el, "NoncurrentDays").text = str(nve["NoncurrentDays"])
-
- if "AbortIncompleteMultipartUpload" in rule:
- aimu = rule["AbortIncompleteMultipartUpload"]
- aimu_el = SubElement(rule_el, "AbortIncompleteMultipartUpload")
- if "DaysAfterInitiation" in aimu:
- SubElement(aimu_el, "DaysAfterInitiation").text = str(aimu["DaysAfterInitiation"])
-
- return root
-
-
-def _parse_lifecycle_config(payload: bytes) -> list:
- """Parse lifecycle configuration from XML."""
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError as exc:
- raise ValueError(f"Unable to parse XML document: {exc}") from exc
-
- if _strip_ns(root.tag) != "LifecycleConfiguration":
- raise ValueError("Root element must be LifecycleConfiguration")
-
- rules = []
- for rule_el in root.findall("{http://s3.amazonaws.com/doc/2006-03-01/}Rule") or root.findall("Rule"):
- rule: dict = {}
-
- id_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ID") or rule_el.find("ID")
- if id_el is not None and id_el.text:
- rule["ID"] = id_el.text.strip()
-
- filter_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Filter") or rule_el.find("Filter")
- if filter_el is not None:
- prefix_el = filter_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Prefix") or filter_el.find("Prefix")
- if prefix_el is not None and prefix_el.text:
- rule["Prefix"] = prefix_el.text
-
- if "Prefix" not in rule:
- prefix_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Prefix") or rule_el.find("Prefix")
- if prefix_el is not None:
- rule["Prefix"] = prefix_el.text or ""
-
- status_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Status") or rule_el.find("Status")
- rule["Status"] = (status_el.text or "Enabled").strip() if status_el is not None else "Enabled"
-
- exp_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Expiration") or rule_el.find("Expiration")
- if exp_el is not None:
- expiration: dict = {}
- days_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Days") or exp_el.find("Days")
- if days_el is not None and days_el.text:
- days_val = int(days_el.text.strip())
- if days_val <= 0:
- raise ValueError("Expiration Days must be a positive integer")
- expiration["Days"] = days_val
- date_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}Date") or exp_el.find("Date")
- if date_el is not None and date_el.text:
- expiration["Date"] = date_el.text.strip()
- eodm_el = exp_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ExpiredObjectDeleteMarker") or exp_el.find("ExpiredObjectDeleteMarker")
- if eodm_el is not None and (eodm_el.text or "").strip().lower() in {"true", "1"}:
- expiration["ExpiredObjectDeleteMarker"] = True
- if expiration:
- rule["Expiration"] = expiration
-
- nve_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}NoncurrentVersionExpiration") or rule_el.find("NoncurrentVersionExpiration")
- if nve_el is not None:
- nve: dict = {}
- days_el = nve_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}NoncurrentDays") or nve_el.find("NoncurrentDays")
- if days_el is not None and days_el.text:
- noncurrent_days = int(days_el.text.strip())
- if noncurrent_days <= 0:
- raise ValueError("NoncurrentDays must be a positive integer")
- nve["NoncurrentDays"] = noncurrent_days
- if nve:
- rule["NoncurrentVersionExpiration"] = nve
-
- aimu_el = rule_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}AbortIncompleteMultipartUpload") or rule_el.find("AbortIncompleteMultipartUpload")
- if aimu_el is not None:
- aimu: dict = {}
- days_el = aimu_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}DaysAfterInitiation") or aimu_el.find("DaysAfterInitiation")
- if days_el is not None and days_el.text:
- days_after = int(days_el.text.strip())
- if days_after <= 0:
- raise ValueError("DaysAfterInitiation must be a positive integer")
- aimu["DaysAfterInitiation"] = days_after
- if aimu:
- rule["AbortIncompleteMultipartUpload"] = aimu
-
- rules.append(rule)
-
- return rules
-
-
-def _bucket_quota_handler(bucket_name: str) -> Response:
- """Handle bucket quota configuration (GET/PUT/DELETE /?quota)."""
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "quota")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
-
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- if request.method == "GET":
- quota = storage.get_bucket_quota(bucket_name)
- if not quota:
- return _error_response("NoSuchQuotaConfiguration", "No quota configuration found", 404)
-
- stats = storage.bucket_stats(bucket_name)
- return jsonify({
- "quota": quota,
- "usage": {
- "bytes": stats.get("bytes", 0),
- "objects": stats.get("objects", 0),
- }
- })
-
- if request.method == "DELETE":
- try:
- storage.set_bucket_quota(bucket_name, max_bytes=None, max_objects=None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket quota deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- payload = request.get_json(silent=True)
- if not payload:
- return _error_response("MalformedRequest", "Request body must be JSON with quota limits", 400)
-
- max_size_bytes = payload.get("max_size_bytes")
- max_objects = payload.get("max_objects")
-
- if max_size_bytes is None and max_objects is None:
- return _error_response("InvalidArgument", "At least one of max_size_bytes or max_objects is required", 400)
-
- if max_size_bytes is not None:
- try:
- max_size_bytes = int(max_size_bytes)
- if max_size_bytes < 0:
- raise ValueError("must be non-negative")
- except (TypeError, ValueError) as exc:
- return _error_response("InvalidArgument", f"max_size_bytes {exc}", 400)
-
- if max_objects is not None:
- try:
- max_objects = int(max_objects)
- if max_objects < 0:
- raise ValueError("must be non-negative")
- except (TypeError, ValueError) as exc:
- return _error_response("InvalidArgument", f"max_objects {exc}", 400)
-
- try:
- storage.set_bucket_quota(bucket_name, max_bytes=max_size_bytes, max_objects=max_objects)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
-
- current_app.logger.info(
- "Bucket quota updated",
- extra={"bucket": bucket_name, "max_size_bytes": max_size_bytes, "max_objects": max_objects}
- )
- return Response(status=204)
-
-
-def _bucket_object_lock_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "object_lock")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- lock_service = _object_lock()
-
- if request.method == "GET":
- config = lock_service.get_bucket_lock_config(bucket_name)
- root = Element("ObjectLockConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- SubElement(root, "ObjectLockEnabled").text = "Enabled" if config.enabled else "Disabled"
- return _xml_response(root)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
-
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- enabled_el = root.find("{http://s3.amazonaws.com/doc/2006-03-01/}ObjectLockEnabled") or root.find("ObjectLockEnabled")
- enabled = (enabled_el.text or "").strip() == "Enabled" if enabled_el is not None else False
-
- config = ObjectLockConfig(enabled=enabled)
- lock_service.set_bucket_lock_config(bucket_name, config)
-
- current_app.logger.info("Bucket object lock updated", extra={"bucket": bucket_name, "enabled": enabled})
- return Response(status=200)
-
-
-def _bucket_notification_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "notification")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- notification_service = _notifications()
-
- if request.method == "GET":
- configs = notification_service.get_bucket_notifications(bucket_name)
- root = Element("NotificationConfiguration", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- for config in configs:
- webhook_el = SubElement(root, "WebhookConfiguration")
- SubElement(webhook_el, "Id").text = config.id
- for event in config.events:
- SubElement(webhook_el, "Event").text = event
- dest_el = SubElement(webhook_el, "Destination")
- SubElement(dest_el, "Url").text = config.destination.url
- if config.prefix_filter or config.suffix_filter:
- filter_el = SubElement(webhook_el, "Filter")
- key_el = SubElement(filter_el, "S3Key")
- if config.prefix_filter:
- rule_el = SubElement(key_el, "FilterRule")
- SubElement(rule_el, "Name").text = "prefix"
- SubElement(rule_el, "Value").text = config.prefix_filter
- if config.suffix_filter:
- rule_el = SubElement(key_el, "FilterRule")
- SubElement(rule_el, "Name").text = "suffix"
- SubElement(rule_el, "Value").text = config.suffix_filter
- return _xml_response(root)
-
- if request.method == "DELETE":
- notification_service.delete_bucket_notifications(bucket_name)
- current_app.logger.info("Bucket notifications deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- notification_service.delete_bucket_notifications(bucket_name)
- return Response(status=200)
-
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- configs: list[NotificationConfiguration] = []
- for webhook_el in root.findall("{http://s3.amazonaws.com/doc/2006-03-01/}WebhookConfiguration") or root.findall("WebhookConfiguration"):
- config_id = _find_element_text(webhook_el, "Id") or uuid.uuid4().hex
- events = [el.text for el in webhook_el.findall("{http://s3.amazonaws.com/doc/2006-03-01/}Event") or webhook_el.findall("Event") if el.text]
-
- dest_el = _find_element(webhook_el, "Destination")
- url = _find_element_text(dest_el, "Url") if dest_el else ""
- if not url:
- return _error_response("InvalidArgument", "Destination URL is required", 400)
-
- prefix = ""
- suffix = ""
- filter_el = _find_element(webhook_el, "Filter")
- if filter_el:
- key_el = _find_element(filter_el, "S3Key")
- if key_el:
- for rule_el in key_el.findall("{http://s3.amazonaws.com/doc/2006-03-01/}FilterRule") or key_el.findall("FilterRule"):
- name = _find_element_text(rule_el, "Name")
- value = _find_element_text(rule_el, "Value")
- if name == "prefix":
- prefix = value
- elif name == "suffix":
- suffix = value
-
- configs.append(NotificationConfiguration(
- id=config_id,
- events=events,
- destination=WebhookDestination(url=url),
- prefix_filter=prefix,
- suffix_filter=suffix,
- ))
-
- notification_service.set_bucket_notifications(bucket_name, configs)
- current_app.logger.info("Bucket notifications updated", extra={"bucket": bucket_name, "configs": len(configs)})
- return Response(status=200)
-
-
-def _bucket_logging_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "logging")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- logging_service = _access_logging()
-
- if request.method == "GET":
- config = logging_service.get_bucket_logging(bucket_name)
- root = Element("BucketLoggingStatus", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- if config and config.enabled:
- logging_enabled = SubElement(root, "LoggingEnabled")
- SubElement(logging_enabled, "TargetBucket").text = config.target_bucket
- SubElement(logging_enabled, "TargetPrefix").text = config.target_prefix
- return _xml_response(root)
-
- if request.method == "DELETE":
- logging_service.delete_bucket_logging(bucket_name)
- current_app.logger.info("Bucket logging deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- logging_service.delete_bucket_logging(bucket_name)
- return Response(status=200)
-
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- logging_enabled = _find_element(root, "LoggingEnabled")
- if logging_enabled is None:
- logging_service.delete_bucket_logging(bucket_name)
- return Response(status=200)
-
- target_bucket = _find_element_text(logging_enabled, "TargetBucket")
- if not target_bucket:
- return _error_response("InvalidArgument", "TargetBucket is required", 400)
-
- if not storage.bucket_exists(target_bucket):
- return _error_response("InvalidTargetBucketForLogging", "Target bucket does not exist", 400)
-
- target_prefix = _find_element_text(logging_enabled, "TargetPrefix")
-
- config = LoggingConfiguration(
- target_bucket=target_bucket,
- target_prefix=target_prefix,
- enabled=True,
- )
- logging_service.set_bucket_logging(bucket_name, config)
-
- current_app.logger.info(
- "Bucket logging updated",
- extra={"bucket": bucket_name, "target_bucket": target_bucket, "target_prefix": target_prefix}
- )
- return Response(status=200)
-
-
-def _bucket_uploads_handler(bucket_name: str) -> Response:
- if request.method != "GET":
- return _method_not_allowed(["GET"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "list")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- key_marker = request.args.get("key-marker", "")
- upload_id_marker = request.args.get("upload-id-marker", "")
- prefix = request.args.get("prefix", "")
- delimiter = request.args.get("delimiter", "")
- try:
- max_uploads = int(request.args.get("max-uploads", 1000))
- if max_uploads < 1:
- return _error_response("InvalidArgument", "max-uploads must be a positive integer", 400)
- max_uploads = min(max_uploads, 1000)
- except ValueError:
- return _error_response("InvalidArgument", "max-uploads must be an integer", 400)
-
- uploads = storage.list_multipart_uploads(bucket_name, include_orphaned=True)
-
- if prefix:
- uploads = [u for u in uploads if u["object_key"].startswith(prefix)]
- if key_marker:
- uploads = [u for u in uploads if u["object_key"] > key_marker or
- (u["object_key"] == key_marker and upload_id_marker and u["upload_id"] > upload_id_marker)]
-
- uploads.sort(key=lambda u: (u["object_key"], u["upload_id"]))
-
- is_truncated = len(uploads) > max_uploads
- if is_truncated:
- uploads = uploads[:max_uploads]
-
- root = Element("ListMultipartUploadsResult", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "KeyMarker").text = key_marker
- SubElement(root, "UploadIdMarker").text = upload_id_marker
- if prefix:
- SubElement(root, "Prefix").text = prefix
- if delimiter:
- SubElement(root, "Delimiter").text = delimiter
- SubElement(root, "MaxUploads").text = str(max_uploads)
- SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
-
- if is_truncated and uploads:
- SubElement(root, "NextKeyMarker").text = uploads[-1]["object_key"]
- SubElement(root, "NextUploadIdMarker").text = uploads[-1]["upload_id"]
-
- for upload in uploads:
- upload_el = SubElement(root, "Upload")
- SubElement(upload_el, "Key").text = upload["object_key"]
- SubElement(upload_el, "UploadId").text = upload["upload_id"]
- if upload.get("created_at"):
- SubElement(upload_el, "Initiated").text = upload["created_at"]
- if upload.get("orphaned"):
- SubElement(upload_el, "StorageClass").text = "ORPHANED"
-
- return _xml_response(root)
-
-
-def _object_retention_handler(bucket_name: str, object_key: str) -> Response:
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- try:
- storage.get_object_path(bucket_name, object_key)
- except StorageError:
- return _error_response("NoSuchKey", "Object does not exist", 404)
-
- lock_service = _object_lock()
-
- if request.method == "GET":
- retention = lock_service.get_object_retention(bucket_name, object_key)
- if not retention:
- return _error_response("NoSuchObjectLockConfiguration", "No retention policy", 404)
-
- root = Element("Retention", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- SubElement(root, "Mode").text = retention.mode.value
- SubElement(root, "RetainUntilDate").text = retention.retain_until_date.strftime("%Y-%m-%dT%H:%M:%S.000Z")
- return _xml_response(root)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
-
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- mode_str = _find_element_text(root, "Mode")
- retain_until_str = _find_element_text(root, "RetainUntilDate")
-
- if not mode_str or not retain_until_str:
- return _error_response("InvalidArgument", "Mode and RetainUntilDate are required", 400)
-
- try:
- mode = RetentionMode(mode_str)
- except ValueError:
- return _error_response("InvalidArgument", f"Invalid retention mode: {mode_str}", 400)
-
- try:
- retain_until = datetime.fromisoformat(retain_until_str.replace("Z", "+00:00"))
- except ValueError:
- return _error_response("InvalidArgument", f"Invalid date format: {retain_until_str}", 400)
-
- bypass = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true"
-
- retention = ObjectLockRetention(mode=mode, retain_until_date=retain_until)
- try:
- lock_service.set_object_retention(bucket_name, object_key, retention, bypass_governance=bypass)
- except ObjectLockError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- current_app.logger.info(
- "Object retention set",
- extra={"bucket": bucket_name, "key": object_key, "mode": mode_str, "until": retain_until_str}
- )
- return Response(status=200)
-
-
-def _object_legal_hold_handler(bucket_name: str, object_key: str) -> Response:
- if request.method not in {"GET", "PUT"}:
- return _method_not_allowed(["GET", "PUT"])
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
-
- try:
- storage.get_object_path(bucket_name, object_key)
- except StorageError:
- return _error_response("NoSuchKey", "Object does not exist", 404)
-
- lock_service = _object_lock()
-
- if request.method == "GET":
- enabled = lock_service.get_legal_hold(bucket_name, object_key)
- root = Element("LegalHold", xmlns="http://s3.amazonaws.com/doc/2006-03-01/")
- SubElement(root, "Status").text = "ON" if enabled else "OFF"
- return _xml_response(root)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
-
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- status = _find_element_text(root, "Status")
- if status not in {"ON", "OFF"}:
- return _error_response("InvalidArgument", "Status must be ON or OFF", 400)
-
- lock_service.set_legal_hold(bucket_name, object_key, status == "ON")
-
- current_app.logger.info(
- "Object legal hold set",
- extra={"bucket": bucket_name, "key": object_key, "status": status}
- )
- return Response(status=200)
-
-
-def _bulk_delete_handler(bucket_name: str) -> Response:
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "delete")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body must include a Delete specification", 400)
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
- if _strip_ns(root.tag) != "Delete":
- return _error_response("MalformedXML", "Root element must be Delete", 400)
-
- quiet = False
- objects: list[dict[str, str | None]] = []
- for child in list(root):
- name = _strip_ns(child.tag)
- if name == "Quiet":
- quiet = (child.text or "").strip().lower() in {"true", "1"}
- continue
- if name != "Object":
- continue
- key_text = ""
- version_text: str | None = None
- for entry in list(child):
- entry_name = _strip_ns(entry.tag)
- if entry_name == "Key":
- key_text = (entry.text or "").strip()
- elif entry_name == "VersionId":
- version_text = (entry.text or "").strip() or None
- if not key_text:
- continue
- objects.append({"Key": key_text, "VersionId": version_text})
-
- if not objects:
- return _error_response("MalformedXML", "At least one Object entry is required", 400)
- if len(objects) > 1000:
- return _error_response("MalformedXML", "A maximum of 1000 objects can be deleted per request", 400)
-
- storage = _storage()
- deleted: list[dict[str, str | None]] = []
- errors: list[dict[str, str]] = []
- for entry in objects:
- key = entry["Key"] or ""
- version_id = entry.get("VersionId")
- try:
- if version_id:
- storage.delete_object_version(bucket_name, key, version_id)
- deleted.append({"Key": key, "VersionId": version_id})
- else:
- storage.delete_object(bucket_name, key)
- deleted.append({"Key": key, "VersionId": None})
- except StorageError as exc:
- errors.append({"Key": key, "Code": "InvalidRequest", "Message": str(exc)})
-
- result = Element("DeleteResult")
- if not quiet:
- for item in deleted:
- deleted_el = SubElement(result, "Deleted")
- SubElement(deleted_el, "Key").text = item["Key"]
- if item.get("VersionId"):
- SubElement(deleted_el, "VersionId").text = item["VersionId"]
- for err in errors:
- error_el = SubElement(result, "Error")
- SubElement(error_el, "Key").text = err.get("Key", "")
- SubElement(error_el, "Code").text = err.get("Code", "InvalidRequest")
- SubElement(error_el, "Message").text = err.get("Message", "Request failed")
-
- current_app.logger.info(
- "Bulk object delete",
- extra={"bucket": bucket_name, "deleted": len(deleted), "errors": len(errors)},
- )
- return _xml_response(result, status=200)
-
-
-def _post_object(bucket_name: str) -> Response:
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
- object_key = request.form.get("key")
- policy_b64 = request.form.get("policy")
- signature = request.form.get("x-amz-signature")
- credential = request.form.get("x-amz-credential")
- algorithm = request.form.get("x-amz-algorithm")
- amz_date = request.form.get("x-amz-date")
- if not all([object_key, policy_b64, signature, credential, algorithm, amz_date]):
- return _error_response("InvalidArgument", "Missing required form fields", 400)
- if algorithm != "AWS4-HMAC-SHA256":
- return _error_response("InvalidArgument", "Unsupported signing algorithm", 400)
- try:
- policy_json = base64.b64decode(policy_b64).decode("utf-8")
- policy = __import__("json").loads(policy_json)
- except (ValueError, __import__("json").JSONDecodeError) as exc:
- return _error_response("InvalidPolicyDocument", f"Invalid policy: {exc}", 400)
- expiration = policy.get("expiration")
- if expiration:
- try:
- exp_time = datetime.fromisoformat(expiration.replace("Z", "+00:00"))
- if datetime.now(timezone.utc) > exp_time:
- return _error_response("AccessDenied", "Policy expired", 403)
- except ValueError:
- return _error_response("InvalidPolicyDocument", "Invalid expiration format", 400)
- conditions = policy.get("conditions", [])
- validation_error = _validate_post_policy_conditions(bucket_name, object_key, conditions, request.form, request.content_length or 0)
- if validation_error:
- return _error_response("AccessDenied", validation_error, 403)
- try:
- parts = credential.split("/")
- if len(parts) != 5:
- raise ValueError("Invalid credential format")
- access_key, date_stamp, region, service, _ = parts
- except ValueError:
- return _error_response("InvalidArgument", "Invalid credential format", 400)
- secret_key = _iam().get_secret_key(access_key)
- if not secret_key:
- return _error_response("AccessDenied", "Invalid access key", 403)
- signing_key = _derive_signing_key(secret_key, date_stamp, region, service)
- expected_signature = hmac.new(signing_key, policy_b64.encode("utf-8"), hashlib.sha256).hexdigest()
- if not hmac.compare_digest(expected_signature, signature):
- return _error_response("SignatureDoesNotMatch", "Signature verification failed", 403)
- principal = _iam().get_principal(access_key)
- if not principal:
- return _error_response("AccessDenied", "Invalid access key", 403)
- if "${filename}" in object_key:
- temp_key = object_key.replace("${filename}", request.files.get("file").filename if request.files.get("file") else "upload")
- else:
- temp_key = object_key
- try:
- _authorize_action(principal, bucket_name, "write", object_key=temp_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- file = request.files.get("file")
- if not file:
- return _error_response("InvalidArgument", "Missing file field", 400)
- if "${filename}" in object_key:
- object_key = object_key.replace("${filename}", file.filename or "upload")
- metadata = {}
- for field_name, value in request.form.items():
- if field_name.lower().startswith("x-amz-meta-"):
- key = field_name[11:]
- if key and not (key.startswith("__") and key.endswith("__")):
- metadata[key] = value
- try:
- meta = storage.put_object(bucket_name, object_key, file.stream, metadata=metadata or None)
- except QuotaExceededError as exc:
- return _error_response("QuotaExceeded", str(exc), 403)
- except StorageError as exc:
- return _error_response("InvalidArgument", str(exc), 400)
- current_app.logger.info("Object uploaded via POST", extra={"bucket": bucket_name, "key": object_key, "size": meta.size})
- success_action_status = request.form.get("success_action_status", "204")
- success_action_redirect = request.form.get("success_action_redirect")
- if success_action_redirect:
- allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", [])
- if not allowed_hosts:
- current_app.logger.warning(
- "ALLOWED_REDIRECT_HOSTS not configured, falling back to request Host header. "
- "Set ALLOWED_REDIRECT_HOSTS for production deployments."
- )
- allowed_hosts = [request.host]
- parsed = urlparse(success_action_redirect)
- if parsed.scheme not in ("http", "https"):
- return _error_response("InvalidArgument", "Redirect URL must use http or https", 400)
- if parsed.netloc not in allowed_hosts:
- return _error_response("InvalidArgument", "Redirect URL host not allowed", 400)
- redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}"
- return Response(status=303, headers={"Location": redirect_url})
- if success_action_status == "200":
- root = Element("PostResponse")
- SubElement(root, "Location").text = f"/{bucket_name}/{object_key}"
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "Key").text = object_key
- SubElement(root, "ETag").text = f'"{meta.etag}"'
- return _xml_response(root, status=200)
- if success_action_status == "201":
- root = Element("PostResponse")
- SubElement(root, "Location").text = f"/{bucket_name}/{object_key}"
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "Key").text = object_key
- SubElement(root, "ETag").text = f'"{meta.etag}"'
- return _xml_response(root, status=201)
- return Response(status=204)
-
-
-def _validate_post_policy_conditions(bucket_name: str, object_key: str, conditions: list, form_data, content_length: int) -> Optional[str]:
- for condition in conditions:
- if isinstance(condition, dict):
- for key, expected_value in condition.items():
- if key == "bucket":
- if bucket_name != expected_value:
- return f"Bucket must be {expected_value}"
- elif key == "key":
- if object_key != expected_value:
- return f"Key must be {expected_value}"
- else:
- actual_value = form_data.get(key, "")
- if actual_value != expected_value:
- return f"Field {key} must be {expected_value}"
- elif isinstance(condition, list) and len(condition) >= 2:
- operator = condition[0].lower() if isinstance(condition[0], str) else ""
- if operator == "starts-with" and len(condition) == 3:
- field = condition[1].lstrip("$")
- prefix = condition[2]
- if field == "key":
- if not object_key.startswith(prefix):
- return f"Key must start with {prefix}"
- else:
- actual_value = form_data.get(field, "")
- if not actual_value.startswith(prefix):
- return f"Field {field} must start with {prefix}"
- elif operator == "eq" and len(condition) == 3:
- field = condition[1].lstrip("$")
- expected = condition[2]
- if field == "key":
- if object_key != expected:
- return f"Key must equal {expected}"
- else:
- actual_value = form_data.get(field, "")
- if actual_value != expected:
- return f"Field {field} must equal {expected}"
- elif operator == "content-length-range" and len(condition) == 3:
- try:
- min_size, max_size = int(condition[1]), int(condition[2])
- except (TypeError, ValueError):
- return "Invalid content-length-range values"
- if content_length < min_size or content_length > max_size:
- return f"Content length must be between {min_size} and {max_size}"
- return None
-
-
-@s3_api_bp.get("/")
-@limiter.limit(_get_list_buckets_limit)
-def list_buckets() -> Response:
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, None, "list")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- root = Element("ListAllMyBucketsResult")
- owner = SubElement(root, "Owner")
- SubElement(owner, "ID").text = principal.access_key
- SubElement(owner, "DisplayName").text = principal.display_name
- buckets_el = SubElement(root, "Buckets")
-
- storage_buckets = _storage().list_buckets()
- allowed = set(_iam().buckets_for_principal(principal, [b.name for b in storage_buckets]))
- for bucket in storage_buckets:
- if bucket.name not in allowed:
- continue
- bucket_el = SubElement(buckets_el, "Bucket")
- SubElement(bucket_el, "Name").text = bucket.name
- SubElement(bucket_el, "CreationDate").text = bucket.created_at.isoformat()
-
- return _xml_response(root)
-
-
-@s3_api_bp.route("/", methods=["PUT", "DELETE", "GET", "POST"], strict_slashes=False)
-@limiter.limit(_get_bucket_ops_limit)
-def bucket_handler(bucket_name: str) -> Response:
- storage = _storage()
- subresource_response = _maybe_handle_bucket_subresource(bucket_name)
- if subresource_response is not None:
- return subresource_response
-
- if request.method == "POST":
- if "delete" in request.args:
- return _bulk_delete_handler(bucket_name)
- content_type = request.headers.get("Content-Type", "")
- if "multipart/form-data" in content_type:
- return _post_object(bucket_name)
- return _method_not_allowed(["GET", "PUT", "DELETE"])
-
- if request.method == "PUT":
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "create_bucket")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- try:
- storage.create_bucket(bucket_name)
- except FileExistsError:
- return _error_response("BucketAlreadyExists", "Bucket exists", 409)
- except StorageError as exc:
- return _error_response("InvalidBucketName", str(exc), 400)
- current_app.logger.info("Bucket created", extra={"bucket": bucket_name})
- return Response(status=200)
-
- if request.method == "DELETE":
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "delete_bucket")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- try:
- storage.delete_bucket(bucket_name)
- _bucket_policies().delete_policy(bucket_name)
- _replication_manager().delete_rule(bucket_name)
- except StorageError as exc:
- code = "BucketNotEmpty" if "not empty" in str(exc) else "NoSuchBucket"
- status = 409 if code == "BucketNotEmpty" else 404
- return _error_response(code, str(exc), status)
- current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name})
- return Response(status=204)
-
- principal, error = _require_principal()
- try:
- _authorize_action(principal, bucket_name, "list")
- except IamError as exc:
- if error:
- return error
- return _error_response("AccessDenied", str(exc), 403)
-
- list_type = request.args.get("list-type")
- prefix = request.args.get("prefix", "")
- delimiter = request.args.get("delimiter", "")
- try:
- max_keys = int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"]))
- if max_keys < 1:
- return _error_response("InvalidArgument", "max-keys must be a positive integer", 400)
- max_keys = min(max_keys, 1000)
- except ValueError:
- return _error_response("InvalidArgument", "max-keys must be an integer", 400)
-
- marker = request.args.get("marker", "") # ListObjects v1
- continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
- start_after = request.args.get("start-after", "") # ListObjectsV2
-
- effective_start = ""
- if list_type == "2":
- if continuation_token:
- try:
- effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
- except (ValueError, UnicodeDecodeError):
- return _error_response("InvalidArgument", "Invalid continuation token", 400)
- elif start_after:
- effective_start = start_after
- else:
- effective_start = marker
-
- try:
- if delimiter:
- shallow_result = storage.list_objects_shallow(
- bucket_name,
- prefix=prefix,
- delimiter=delimiter,
- max_keys=max_keys,
- continuation_token=effective_start or None,
- )
- objects = shallow_result.objects
- common_prefixes = shallow_result.common_prefixes
- is_truncated = shallow_result.is_truncated
-
- next_marker = shallow_result.next_continuation_token or ""
- next_continuation_token = ""
- if is_truncated and next_marker and list_type == "2":
- next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
- else:
- list_result = storage.list_objects(
- bucket_name,
- max_keys=max_keys,
- continuation_token=effective_start or None,
- prefix=prefix or None,
- )
- objects = list_result.objects
- common_prefixes = []
- is_truncated = list_result.is_truncated
-
- next_marker = ""
- next_continuation_token = ""
- if is_truncated:
- if objects:
- next_marker = objects[-1].key
- if list_type == "2" and next_marker:
- next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
-
- if list_type == "2":
- root = Element("ListBucketResult")
- SubElement(root, "Name").text = bucket_name
- SubElement(root, "Prefix").text = prefix
- SubElement(root, "MaxKeys").text = str(max_keys)
- SubElement(root, "KeyCount").text = str(len(objects) + len(common_prefixes))
- SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
- if delimiter:
- SubElement(root, "Delimiter").text = delimiter
-
- continuation_token = request.args.get("continuation-token", "")
- start_after = request.args.get("start-after", "")
- if continuation_token:
- SubElement(root, "ContinuationToken").text = continuation_token
- if start_after:
- SubElement(root, "StartAfter").text = start_after
-
- if is_truncated and next_continuation_token:
- SubElement(root, "NextContinuationToken").text = next_continuation_token
-
- for meta in objects:
- obj_el = SubElement(root, "Contents")
- SubElement(obj_el, "Key").text = meta.key
- SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
- if meta.etag:
- SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
- SubElement(obj_el, "Size").text = str(meta.size)
- SubElement(obj_el, "StorageClass").text = "STANDARD"
-
- for cp in common_prefixes:
- cp_el = SubElement(root, "CommonPrefixes")
- SubElement(cp_el, "Prefix").text = cp
- else:
- root = Element("ListBucketResult")
- SubElement(root, "Name").text = bucket_name
- SubElement(root, "Prefix").text = prefix
- SubElement(root, "Marker").text = marker
- SubElement(root, "MaxKeys").text = str(max_keys)
- SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
- if delimiter:
- SubElement(root, "Delimiter").text = delimiter
-
- if is_truncated and delimiter and next_marker:
- SubElement(root, "NextMarker").text = next_marker
-
- for meta in objects:
- obj_el = SubElement(root, "Contents")
- SubElement(obj_el, "Key").text = meta.key
- SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
- if meta.etag:
- SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
- SubElement(obj_el, "Size").text = str(meta.size)
-
- for cp in common_prefixes:
- cp_el = SubElement(root, "CommonPrefixes")
- SubElement(cp_el, "Prefix").text = cp
-
- return _xml_response(root)
-
-
-@s3_api_bp.route("//", methods=["PUT", "GET", "DELETE", "HEAD", "POST"], strict_slashes=False)
-@limiter.limit(_get_object_ops_limit)
-def object_handler(bucket_name: str, object_key: str):
- storage = _storage()
-
- if "tagging" in request.args:
- return _object_tagging_handler(bucket_name, object_key)
-
- if "retention" in request.args:
- return _object_retention_handler(bucket_name, object_key)
-
- if "legal-hold" in request.args:
- return _object_legal_hold_handler(bucket_name, object_key)
-
- if "acl" in request.args:
- return _object_acl_handler(bucket_name, object_key)
-
- if "attributes" in request.args:
- return _object_attributes_handler(bucket_name, object_key)
-
- if request.method == "POST":
- if "uploads" in request.args:
- return _initiate_multipart_upload(bucket_name, object_key)
- if "uploadId" in request.args:
- return _complete_multipart_upload(bucket_name, object_key)
- if "select" in request.args:
- return _select_object_content(bucket_name, object_key)
- return _method_not_allowed(["GET", "PUT", "DELETE", "HEAD", "POST"])
-
- if request.method == "PUT":
- if "partNumber" in request.args and "uploadId" in request.args:
- return _upload_part(bucket_name, object_key)
-
- copy_source = request.headers.get("x-amz-copy-source")
- if copy_source:
- return _copy_object(bucket_name, object_key, copy_source)
-
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
-
- bypass_governance = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true"
- lock_service = _object_lock()
- can_overwrite, lock_reason = lock_service.can_overwrite_object(bucket_name, object_key, bypass_governance=bypass_governance)
- if not can_overwrite:
- return _error_response("AccessDenied", lock_reason, 403)
-
- stream = request.stream
- content_encoding = request.headers.get("Content-Encoding", "").lower()
- if "aws-chunked" in content_encoding:
- stream = AwsChunkedDecoder(stream)
-
- metadata = _extract_request_metadata()
-
- content_type = request.headers.get("Content-Type")
- validation_error = _validate_content_type(object_key, content_type)
- if validation_error:
- return _error_response("InvalidArgument", validation_error, 400)
-
- metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
-
- try:
- meta = storage.put_object(
- bucket_name,
- object_key,
- stream,
- metadata=metadata or None,
- )
- except QuotaExceededError as exc:
- return _error_response("QuotaExceeded", str(exc), 403)
- except StorageError as exc:
- message = str(exc)
- if "Bucket" in message:
- return _error_response("NoSuchBucket", message, 404)
- return _error_response("InvalidArgument", message, 400)
-
- content_md5 = request.headers.get("Content-MD5")
- if content_md5 and meta.etag:
- try:
- expected_md5 = base64.b64decode(content_md5).hex()
- except Exception:
- storage.delete_object(bucket_name, object_key)
- return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
- if expected_md5 != meta.etag:
- storage.delete_object(bucket_name, object_key)
- return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
-
- if current_app.logger.isEnabledFor(logging.INFO):
- current_app.logger.info(
- "Object uploaded",
- extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
- )
- response = Response(status=200)
- if meta.etag:
- response.headers["ETag"] = f'"{meta.etag}"'
-
- _notifications().emit_object_created(
- bucket_name,
- object_key,
- size=meta.size,
- etag=meta.etag,
- request_id=getattr(g, "request_id", ""),
- source_ip=request.remote_addr or "",
- user_identity=principal.access_key if principal else "",
- operation="Put",
- )
-
- user_agent = request.headers.get("User-Agent", "")
- if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent:
- _replication_manager().trigger_replication(bucket_name, object_key, action="write")
-
- return response
-
- if request.method in {"GET", "HEAD"}:
- if request.method == "GET" and "uploadId" in request.args:
- return _list_parts(bucket_name, object_key)
-
- _, error = _object_principal("read", bucket_name, object_key)
- if error:
- return error
- try:
- path = storage.get_object_path(bucket_name, object_key)
- except StorageError as exc:
- return _error_response("NoSuchKey", str(exc), 404)
- metadata = storage.get_object_metadata(bucket_name, object_key)
- mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
-
- is_encrypted = "x-amz-server-side-encryption" in metadata
-
- cond_etag = metadata.get("__etag__")
- _etag_was_healed = False
- if not cond_etag and not is_encrypted:
- try:
- cond_etag = storage._compute_etag(path)
- _etag_was_healed = True
- storage.heal_missing_etag(bucket_name, object_key, cond_etag)
- except OSError:
- cond_etag = None
- if cond_etag:
- cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
- if cond_mtime is None:
- try:
- cond_mtime = path.stat().st_mtime
- except OSError:
- pass
- cond_resp = _check_conditional_headers(cond_etag, cond_mtime)
- if cond_resp:
- return cond_resp
-
- if request.method == "GET":
- range_header = request.headers.get("Range")
-
- if is_encrypted and hasattr(storage, 'get_object_data'):
- try:
- data, clean_metadata = storage.get_object_data(bucket_name, object_key)
- file_size = len(data)
- etag = hashlib.md5(data).hexdigest()
-
- if range_header:
- try:
- ranges = _parse_range_header(range_header, file_size)
- except (ValueError, TypeError):
- ranges = None
- if ranges is None:
- return _error_response("InvalidRange", "Range Not Satisfiable", 416)
- start, end = ranges[0]
- partial_data = data[start:end + 1]
- response = Response(partial_data, status=206, mimetype=mimetype)
- response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
- response.headers["Content-Length"] = len(partial_data)
- logged_bytes = len(partial_data)
- else:
- response = Response(data, mimetype=mimetype)
- response.headers["Content-Length"] = file_size
- logged_bytes = file_size
- except StorageError as exc:
- return _error_response("InternalError", str(exc), 500)
- else:
- try:
- stat = path.stat()
- file_size = stat.st_size
- etag = cond_etag or storage._compute_etag(path)
- except PermissionError:
- return _error_response("AccessDenied", "Permission denied accessing object", 403)
- except OSError as exc:
- return _error_response("InternalError", f"Failed to access object: {exc}", 500)
-
- if range_header:
- try:
- ranges = _parse_range_header(range_header, file_size)
- except (ValueError, TypeError):
- ranges = None
- if ranges is None:
- return _error_response("InvalidRange", "Range Not Satisfiable", 416)
- start, end = ranges[0]
- length = end - start + 1
-
- def stream_range(file_path, start_pos, length_to_read):
- with open(file_path, "rb") as f:
- f.seek(start_pos)
- remaining = length_to_read
- while remaining > 0:
- chunk_size = min(262144, remaining)
- chunk = f.read(chunk_size)
- if not chunk:
- break
- remaining -= len(chunk)
- yield chunk
-
- response = Response(stream_range(path, start, length), status=206, mimetype=mimetype, direct_passthrough=True)
- response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
- response.headers["Content-Length"] = length
- logged_bytes = length
- else:
- response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True)
- logged_bytes = file_size
- else:
- if is_encrypted and hasattr(storage, 'get_object_data'):
- try:
- data, _ = storage.get_object_data(bucket_name, object_key)
- response = Response(status=200)
- response.headers["Content-Length"] = len(data)
- etag = hashlib.md5(data).hexdigest()
- except StorageError as exc:
- return _error_response("InternalError", str(exc), 500)
- else:
- try:
- stat = path.stat()
- response = Response(status=200)
- etag = cond_etag or storage._compute_etag(path)
- except PermissionError:
- return _error_response("AccessDenied", "Permission denied accessing object", 403)
- except OSError as exc:
- return _error_response("InternalError", f"Failed to access object: {exc}", 500)
- response.headers["Content-Type"] = mimetype
- logged_bytes = 0
-
- file_stat = stat if not is_encrypted else None
- _apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
-
- if request.method == "GET":
- response_overrides = {
- "response-content-type": "Content-Type",
- "response-content-language": "Content-Language",
- "response-expires": "Expires",
- "response-cache-control": "Cache-Control",
- "response-content-disposition": "Content-Disposition",
- "response-content-encoding": "Content-Encoding",
- }
- for param, header in response_overrides.items():
- value = request.args.get(param)
- if value:
- response.headers[header] = _sanitize_header_value(value)
-
- if current_app.logger.isEnabledFor(logging.INFO):
- action = "Object read" if request.method == "GET" else "Object head"
- current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
- return response
-
- if "uploadId" in request.args:
- return _abort_multipart_upload(bucket_name, object_key)
-
- _, error = _object_principal("delete", bucket_name, object_key)
- if error:
- return error
-
- bypass_governance = request.headers.get("x-amz-bypass-governance-retention", "").lower() == "true"
- lock_service = _object_lock()
- can_delete, lock_reason = lock_service.can_delete_object(bucket_name, object_key, bypass_governance=bypass_governance)
- if not can_delete:
- return _error_response("AccessDenied", lock_reason, 403)
-
- storage.delete_object(bucket_name, object_key)
- lock_service.delete_object_lock_metadata(bucket_name, object_key)
- if current_app.logger.isEnabledFor(logging.INFO):
- current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
-
- principal, _ = _require_principal()
- _notifications().emit_object_removed(
- bucket_name,
- object_key,
- request_id=getattr(g, "request_id", ""),
- source_ip=request.remote_addr or "",
- user_identity=principal.access_key if principal else "",
- )
-
- user_agent = request.headers.get("User-Agent", "")
- if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent:
- _replication_manager().trigger_replication(bucket_name, object_key, action="delete")
-
- return Response(status=204)
-
-
-def _list_parts(bucket_name: str, object_key: str) -> Response:
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- upload_id = request.args.get("uploadId")
- if not upload_id:
- return _error_response("InvalidArgument", "uploadId is required", 400)
-
- try:
- parts = _storage().list_multipart_parts(bucket_name, upload_id)
- except StorageError as exc:
- return _error_response("NoSuchUpload", str(exc), 404)
-
- root = Element("ListPartsResult")
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "Key").text = object_key
- SubElement(root, "UploadId").text = upload_id
-
- initiator = SubElement(root, "Initiator")
- SubElement(initiator, "ID").text = principal.access_key
- SubElement(initiator, "DisplayName").text = principal.display_name
-
- owner = SubElement(root, "Owner")
- SubElement(owner, "ID").text = principal.access_key
- SubElement(owner, "DisplayName").text = principal.display_name
-
- SubElement(root, "StorageClass").text = "STANDARD"
- SubElement(root, "PartNumberMarker").text = "0"
- SubElement(root, "NextPartNumberMarker").text = str(parts[-1]["PartNumber"]) if parts else "0"
- SubElement(root, "MaxParts").text = "1000"
- SubElement(root, "IsTruncated").text = "false"
-
- for part in parts:
- p = SubElement(root, "Part")
- SubElement(p, "PartNumber").text = str(part["PartNumber"])
- SubElement(p, "LastModified").text = part["LastModified"].isoformat()
- SubElement(p, "ETag").text = f'"{part["ETag"]}"'
- SubElement(p, "Size").text = str(part["Size"])
-
- return _xml_response(root)
-
-
-def _bucket_policy_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "policy")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
- store = _bucket_policies()
- if request.method == "GET":
- policy = store.get_policy(bucket_name)
- if not policy:
- return _error_response("NoSuchBucketPolicy", "No bucket policy attached", 404)
- return jsonify(policy)
- if request.method == "DELETE":
- store.delete_policy(bucket_name)
- current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name})
- return Response(status=204)
- raw_body = request.get_data(cache=False) or b""
- try:
- payload = json.loads(raw_body)
- except (json.JSONDecodeError, ValueError):
- return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
- if not payload:
- return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
- try:
- store.set_policy(bucket_name, payload)
- current_app.logger.info("Bucket policy updated", extra={"bucket": bucket_name})
- except ValueError as exc:
- return _error_response("MalformedPolicy", str(exc), 400)
- return Response(status=204)
-
-
-def _bucket_policy_status_handler(bucket_name: str) -> Response:
- if request.method != "GET":
- return _method_not_allowed(["GET"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "policy")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
- store = _bucket_policies()
- policy = store.get_policy(bucket_name)
- is_public = False
- if policy:
- for statement in policy.get("Statement", []):
- if statement.get("Effect") == "Allow" and statement.get("Principal") == "*":
- is_public = True
- break
- root = Element("PolicyStatus")
- SubElement(root, "IsPublic").text = "TRUE" if is_public else "FALSE"
- return _xml_response(root)
-
-
-def _bucket_replication_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "replication")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if not storage.bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket does not exist", 404)
- replication = _replication_manager()
- if request.method == "GET":
- rule = replication.get_rule(bucket_name)
- if not rule:
- return _error_response("ReplicationConfigurationNotFoundError", "Replication configuration not found", 404)
- return _xml_response(_render_replication_config(rule))
- if request.method == "DELETE":
- replication.delete_rule(bucket_name)
- current_app.logger.info("Bucket replication removed", extra={"bucket": bucket_name})
- return Response(status=204)
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- try:
- rule = _parse_replication_config(bucket_name, payload)
- except ValueError as exc:
- return _error_response("MalformedXML", str(exc), 400)
- replication.set_rule(rule)
- current_app.logger.info("Bucket replication updated", extra={"bucket": bucket_name})
- return Response(status=200)
-
-
-def _parse_replication_config(bucket_name: str, payload: bytes):
- from .replication import ReplicationRule, REPLICATION_MODE_ALL
- root = _parse_xml_with_limit(payload)
- if _strip_ns(root.tag) != "ReplicationConfiguration":
- raise ValueError("Root element must be ReplicationConfiguration")
- rule_el = None
- for child in list(root):
- if _strip_ns(child.tag) == "Rule":
- rule_el = child
- break
- if rule_el is None:
- raise ValueError("At least one Rule is required")
- status_el = _find_element(rule_el, "Status")
- status = status_el.text if status_el is not None and status_el.text else "Enabled"
- enabled = status.lower() == "enabled"
- filter_prefix = None
- filter_el = _find_element(rule_el, "Filter")
- if filter_el is not None:
- prefix_el = _find_element(filter_el, "Prefix")
- if prefix_el is not None and prefix_el.text:
- filter_prefix = prefix_el.text
- dest_el = _find_element(rule_el, "Destination")
- if dest_el is None:
- raise ValueError("Destination element is required")
- bucket_el = _find_element(dest_el, "Bucket")
- if bucket_el is None or not bucket_el.text:
- raise ValueError("Destination Bucket is required")
- target_bucket, target_connection_id = _parse_destination_arn(bucket_el.text)
- sync_deletions = True
- dm_el = _find_element(rule_el, "DeleteMarkerReplication")
- if dm_el is not None:
- dm_status_el = _find_element(dm_el, "Status")
- if dm_status_el is not None and dm_status_el.text:
- sync_deletions = dm_status_el.text.lower() == "enabled"
- return ReplicationRule(
- bucket_name=bucket_name,
- target_connection_id=target_connection_id,
- target_bucket=target_bucket,
- enabled=enabled,
- mode=REPLICATION_MODE_ALL,
- sync_deletions=sync_deletions,
- filter_prefix=filter_prefix,
- )
-
-
-def _bucket_website_handler(bucket_name: str) -> Response:
- if request.method not in {"GET", "PUT", "DELETE"}:
- return _method_not_allowed(["GET", "PUT", "DELETE"])
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- return _error_response("InvalidRequest", "Website hosting is not enabled", 400)
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "website")
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
- storage = _storage()
- if request.method == "GET":
- try:
- config = storage.get_bucket_website(bucket_name)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- if not config:
- return _error_response("NoSuchWebsiteConfiguration", "The specified bucket does not have a website configuration", 404)
- root = Element("WebsiteConfiguration")
- root.set("xmlns", S3_NS)
- index_doc = config.get("index_document")
- if index_doc:
- idx_el = SubElement(root, "IndexDocument")
- SubElement(idx_el, "Suffix").text = index_doc
- error_doc = config.get("error_document")
- if error_doc:
- err_el = SubElement(root, "ErrorDocument")
- SubElement(err_el, "Key").text = error_doc
- return _xml_response(root)
- if request.method == "DELETE":
- try:
- storage.set_bucket_website(bucket_name, None)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket website config deleted", extra={"bucket": bucket_name})
- return Response(status=204)
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- if not payload.strip():
- return _error_response("MalformedXML", "Request body is required", 400)
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
- if _strip_ns(root.tag) != "WebsiteConfiguration":
- return _error_response("MalformedXML", "Root element must be WebsiteConfiguration", 400)
- index_el = _find_element(root, "IndexDocument")
- if index_el is None:
- return _error_response("InvalidArgument", "IndexDocument is required", 400)
- suffix_el = _find_element(index_el, "Suffix")
- if suffix_el is None or not (suffix_el.text or "").strip():
- return _error_response("InvalidArgument", "IndexDocument Suffix is required", 400)
- index_suffix = suffix_el.text.strip()
- if "/" in index_suffix:
- return _error_response("InvalidArgument", "IndexDocument Suffix must not contain '/'", 400)
- website_config: Dict[str, Any] = {"index_document": index_suffix}
- error_el = _find_element(root, "ErrorDocument")
- if error_el is not None:
- key_el = _find_element(error_el, "Key")
- if key_el is not None and (key_el.text or "").strip():
- website_config["error_document"] = key_el.text.strip()
- try:
- storage.set_bucket_website(bucket_name, website_config)
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- current_app.logger.info("Bucket website config updated", extra={"bucket": bucket_name, "index": index_suffix})
- return Response(status=200)
-
-
-def _parse_destination_arn(arn: str) -> tuple:
- if not arn.startswith("arn:aws:s3:::"):
- raise ValueError(f"Invalid ARN format: {arn}")
- bucket_part = arn[13:]
- if "/" in bucket_part:
- connection_id, bucket_name = bucket_part.split("/", 1)
- else:
- connection_id = "local"
- bucket_name = bucket_part
- return bucket_name, connection_id
-
-
-def _render_replication_config(rule) -> Element:
- root = Element("ReplicationConfiguration")
- SubElement(root, "Role").text = "arn:aws:iam::000000000000:role/replication"
- rule_el = SubElement(root, "Rule")
- SubElement(rule_el, "ID").text = f"{rule.bucket_name}-replication"
- SubElement(rule_el, "Status").text = "Enabled" if rule.enabled else "Disabled"
- SubElement(rule_el, "Priority").text = "1"
- filter_el = SubElement(rule_el, "Filter")
- if rule.filter_prefix:
- SubElement(filter_el, "Prefix").text = rule.filter_prefix
- dest_el = SubElement(rule_el, "Destination")
- if rule.target_connection_id == "local":
- arn = f"arn:aws:s3:::{rule.target_bucket}"
- else:
- arn = f"arn:aws:s3:::{rule.target_connection_id}/{rule.target_bucket}"
- SubElement(dest_el, "Bucket").text = arn
- dm_el = SubElement(rule_el, "DeleteMarkerReplication")
- SubElement(dm_el, "Status").text = "Enabled" if rule.sync_deletions else "Disabled"
- return root
-
-
-@s3_api_bp.route("/", methods=["HEAD"])
-@limiter.limit(_get_head_ops_limit)
-def head_bucket(bucket_name: str) -> Response:
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "list")
- if not _storage().bucket_exists(bucket_name):
- return _error_response("NoSuchBucket", "Bucket not found", 404)
- return Response(status=200)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
-
-@s3_api_bp.route("//", methods=["HEAD"])
-@limiter.limit(_get_head_ops_limit)
-def head_object(bucket_name: str, object_key: str) -> Response:
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, bucket_name, "read", object_key=object_key)
- storage = _storage()
- path = storage.get_object_path(bucket_name, object_key)
- metadata = storage.get_object_metadata(bucket_name, object_key)
- etag = metadata.get("__etag__")
- if not etag:
- etag = storage._compute_etag(path)
- storage.heal_missing_etag(bucket_name, object_key, etag)
-
- head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
- if head_mtime is None:
- try:
- head_mtime = path.stat().st_mtime
- except OSError:
- pass
- cond_resp = _check_conditional_headers(etag, head_mtime)
- if cond_resp:
- return cond_resp
-
- cached_size = metadata.get("__size__")
- cached_mtime = metadata.get("__last_modified__")
- if cached_size is not None and cached_mtime is not None:
- size_val = int(cached_size)
- mtime_val = float(cached_mtime)
- response = Response(status=200)
- _apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val)
- else:
- stat = path.stat()
- response = Response(status=200)
- _apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
- response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
- return response
- except (StorageError, FileNotFoundError):
- return _error_response("NoSuchKey", "Object not found", 404)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
-
-def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
- """Handle S3 CopyObject operation."""
- from urllib.parse import unquote
- copy_source = unquote(copy_source)
- if copy_source.startswith("/"):
- copy_source = copy_source[1:]
-
- parts = copy_source.split("/", 1)
- if len(parts) != 2:
- return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
-
- source_bucket, source_key = parts
- if not source_bucket or not source_key:
- return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
-
- principal, error = _require_principal()
- if error:
- return error
- try:
- _authorize_action(principal, source_bucket, "read", object_key=source_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- try:
- _authorize_action(principal, dest_bucket, "write", object_key=dest_key)
- except IamError as exc:
- return _error_response("AccessDenied", str(exc), 403)
-
- storage = _storage()
-
- try:
- source_path = storage.get_object_path(source_bucket, source_key)
- except StorageError:
- return _error_response("NoSuchKey", "Source object not found", 404)
-
- source_stat = source_path.stat()
- source_etag = storage._compute_etag(source_path)
- source_mtime = datetime.fromtimestamp(source_stat.st_mtime, timezone.utc)
-
- copy_source_if_match = request.headers.get("x-amz-copy-source-if-match")
- if copy_source_if_match:
- expected_etag = copy_source_if_match.strip('"')
- if source_etag != expected_etag:
- return _error_response("PreconditionFailed", "Source ETag does not match", 412)
-
- copy_source_if_none_match = request.headers.get("x-amz-copy-source-if-none-match")
- if copy_source_if_none_match:
- not_expected_etag = copy_source_if_none_match.strip('"')
- if source_etag == not_expected_etag:
- return _error_response("PreconditionFailed", "Source ETag matches", 412)
-
- copy_source_if_modified_since = request.headers.get("x-amz-copy-source-if-modified-since")
- if copy_source_if_modified_since:
- from email.utils import parsedate_to_datetime
- try:
- if_modified = parsedate_to_datetime(copy_source_if_modified_since)
- if source_mtime <= if_modified:
- return _error_response("PreconditionFailed", "Source not modified since specified date", 412)
- except (TypeError, ValueError):
- pass
-
- copy_source_if_unmodified_since = request.headers.get("x-amz-copy-source-if-unmodified-since")
- if copy_source_if_unmodified_since:
- from email.utils import parsedate_to_datetime
- try:
- if_unmodified = parsedate_to_datetime(copy_source_if_unmodified_since)
- if source_mtime > if_unmodified:
- return _error_response("PreconditionFailed", "Source modified since specified date", 412)
- except (TypeError, ValueError):
- pass
-
- source_metadata = storage.get_object_metadata(source_bucket, source_key)
-
- metadata_directive = request.headers.get("x-amz-metadata-directive", "COPY").upper()
- if metadata_directive == "REPLACE":
- metadata = _extract_request_metadata()
- content_type = request.headers.get("Content-Type")
- validation_error = _validate_content_type(dest_key, content_type)
- if validation_error:
- return _error_response("InvalidArgument", validation_error, 400)
- else:
- metadata = {k: v for k, v in source_metadata.items() if not (k.startswith("__") and k.endswith("__"))}
-
- try:
- with source_path.open("rb") as stream:
- meta = storage.put_object(
- dest_bucket,
- dest_key,
- stream,
- metadata=metadata or None,
- )
- except StorageError as exc:
- message = str(exc)
- if "Bucket" in message:
- return _error_response("NoSuchBucket", message, 404)
- return _error_response("InvalidArgument", message, 400)
-
- current_app.logger.info(
- "Object copied",
- extra={
- "source_bucket": source_bucket,
- "source_key": source_key,
- "dest_bucket": dest_bucket,
- "dest_key": dest_key,
- "size": meta.size,
- },
- )
-
- user_agent = request.headers.get("User-Agent", "")
- if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent:
- _replication_manager().trigger_replication(dest_bucket, dest_key, action="write")
-
- root = Element("CopyObjectResult")
- SubElement(root, "LastModified").text = meta.last_modified.isoformat()
- if meta.etag:
- SubElement(root, "ETag").text = f'"{meta.etag}"'
- return _xml_response(root)
-
-
-class AwsChunkedDecoder:
- """Decodes aws-chunked encoded streams.
-
- Performance optimized with buffered line reading instead of byte-by-byte.
- """
-
- def __init__(self, stream):
- self.stream = stream
- self._read_buffer = bytearray()
- self.chunk_remaining = 0
- self.finished = False
-
- def _read_line(self) -> bytes:
- """Read until CRLF using buffered reads instead of byte-by-byte.
-
- Performance: Reads in batches of 64-256 bytes instead of 1 byte at a time.
- """
- line = bytearray()
- while True:
- if self._read_buffer:
- idx = self._read_buffer.find(b"\r\n")
- if idx != -1:
- line.extend(self._read_buffer[: idx + 2])
- del self._read_buffer[: idx + 2]
- return bytes(line)
- line.extend(self._read_buffer)
- self._read_buffer.clear()
-
- chunk = self.stream.read(64)
- if not chunk:
- return bytes(line) if line else b""
- self._read_buffer.extend(chunk)
-
- def _read_exact(self, n: int) -> bytes:
- """Read exactly n bytes, using buffer first."""
- result = bytearray()
- if self._read_buffer:
- take = min(len(self._read_buffer), n)
- result.extend(self._read_buffer[:take])
- del self._read_buffer[:take]
- n -= take
- if n > 0:
- data = self.stream.read(n)
- if data:
- result.extend(data)
-
- return bytes(result)
-
- def read(self, size=-1):
- if self.finished:
- return b""
-
- result = bytearray()
- while size == -1 or len(result) < size:
- if self.chunk_remaining > 0:
- to_read = self.chunk_remaining
- if size != -1:
- to_read = min(to_read, size - len(result))
-
- chunk = self._read_exact(to_read)
- if not chunk:
- raise IOError("Unexpected EOF in chunk data")
-
- result.extend(chunk)
- self.chunk_remaining -= len(chunk)
-
- if self.chunk_remaining == 0:
- crlf = self._read_exact(2)
- if crlf != b"\r\n":
- raise IOError("Malformed chunk: missing CRLF")
- else:
- line = self._read_line()
- if not line:
- self.finished = True
- return bytes(result)
-
- try:
- line_str = line.decode("ascii").strip()
- if ";" in line_str:
- line_str = line_str.split(";")[0]
- chunk_size = int(line_str, 16)
- except ValueError:
- raise IOError(f"Invalid chunk size: {line}")
-
- if chunk_size == 0:
- self.finished = True
- while True:
- trailer = self._read_line()
- if trailer == b"\r\n" or not trailer:
- break
- return bytes(result)
-
- self.chunk_remaining = chunk_size
-
- return bytes(result)
-
-
-def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
-
- metadata = _extract_request_metadata()
- content_type = request.headers.get("Content-Type")
- metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
- try:
- upload_id = _storage().initiate_multipart_upload(
- bucket_name,
- object_key,
- metadata=metadata or None
- )
- except StorageError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
-
- root = Element("InitiateMultipartUploadResult")
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "Key").text = object_key
- SubElement(root, "UploadId").text = upload_id
- return _xml_response(root)
-
-
-def _upload_part(bucket_name: str, object_key: str) -> Response:
- copy_source = request.headers.get("x-amz-copy-source")
- if copy_source:
- return _upload_part_copy(bucket_name, object_key, copy_source)
-
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
-
- upload_id = request.args.get("uploadId")
- part_number_str = request.args.get("partNumber")
- if not upload_id or not part_number_str:
- return _error_response("InvalidArgument", "uploadId and partNumber are required", 400)
-
- try:
- part_number = int(part_number_str)
- except ValueError:
- return _error_response("InvalidArgument", "partNumber must be an integer", 400)
-
- if part_number < 1 or part_number > 10000:
- return _error_response("InvalidArgument", "partNumber must be between 1 and 10000", 400)
-
- stream = request.stream
- content_encoding = request.headers.get("Content-Encoding", "").lower()
- if "aws-chunked" in content_encoding:
- stream = AwsChunkedDecoder(stream)
-
- try:
- etag = _storage().upload_multipart_part(bucket_name, upload_id, part_number, stream)
- except StorageError as exc:
- if "NoSuchBucket" in str(exc):
- return _error_response("NoSuchBucket", str(exc), 404)
- if "Multipart upload not found" in str(exc):
- return _error_response("NoSuchUpload", str(exc), 404)
- return _error_response("InvalidArgument", str(exc), 400)
-
- content_md5 = request.headers.get("Content-MD5")
- if content_md5 and etag:
- try:
- expected_md5 = base64.b64decode(content_md5).hex()
- except Exception:
- return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
- if expected_md5 != etag:
- return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
-
- response = Response(status=200)
- response.headers["ETag"] = f'"{etag}"'
- return response
-
-
-def _upload_part_copy(bucket_name: str, object_key: str, copy_source: str) -> Response:
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
-
- upload_id = request.args.get("uploadId")
- part_number_str = request.args.get("partNumber")
- if not upload_id or not part_number_str:
- return _error_response("InvalidArgument", "uploadId and partNumber are required", 400)
-
- try:
- part_number = int(part_number_str)
- except ValueError:
- return _error_response("InvalidArgument", "partNumber must be an integer", 400)
-
- if part_number < 1 or part_number > 10000:
- return _error_response("InvalidArgument", "partNumber must be between 1 and 10000", 400)
-
- copy_source = unquote(copy_source)
- if copy_source.startswith("/"):
- copy_source = copy_source[1:]
- parts = copy_source.split("/", 1)
- if len(parts) != 2:
- return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
- source_bucket, source_key = parts
- if not source_bucket or not source_key:
- return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
-
- _, read_error = _object_principal("read", source_bucket, source_key)
- if read_error:
- return read_error
-
- copy_source_range = request.headers.get("x-amz-copy-source-range")
- start_byte, end_byte = None, None
- if copy_source_range:
- match = re.match(r"bytes=(\d+)-(\d+)", copy_source_range)
- if not match:
- return _error_response("InvalidArgument", "Invalid x-amz-copy-source-range format", 400)
- start_byte, end_byte = int(match.group(1)), int(match.group(2))
-
- try:
- result = _storage().upload_part_copy(
- bucket_name, upload_id, part_number,
- source_bucket, source_key,
- start_byte, end_byte
- )
- except ObjectNotFoundError:
- return _error_response("NoSuchKey", "Source object not found", 404)
- except StorageError as exc:
- if "Multipart upload not found" in str(exc):
- return _error_response("NoSuchUpload", str(exc), 404)
- if "Invalid byte range" in str(exc):
- return _error_response("InvalidRange", str(exc), 416)
- return _error_response("InvalidArgument", str(exc), 400)
-
- root = Element("CopyPartResult")
- SubElement(root, "LastModified").text = result["last_modified"].strftime("%Y-%m-%dT%H:%M:%S.000Z")
- SubElement(root, "ETag").text = f'"{result["etag"]}"'
- return _xml_response(root)
-
-
-def _complete_multipart_upload(bucket_name: str, object_key: str) -> Response:
- principal, error = _object_principal("write", bucket_name, object_key)
- if error:
- return error
-
- upload_id = request.args.get("uploadId")
- if not upload_id:
- return _error_response("InvalidArgument", "uploadId is required", 400)
-
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
-
- if _strip_ns(root.tag) != "CompleteMultipartUpload":
- return _error_response("MalformedXML", "Root element must be CompleteMultipartUpload", 400)
-
- parts = []
- for part_el in list(root):
- if _strip_ns(part_el.tag) != "Part":
- continue
- part_number_el = part_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}PartNumber")
- if part_number_el is None:
- part_number_el = part_el.find("PartNumber")
-
- etag_el = part_el.find("{http://s3.amazonaws.com/doc/2006-03-01/}ETag")
- if etag_el is None:
- etag_el = part_el.find("ETag")
-
- if part_number_el is not None and etag_el is not None:
- try:
- part_num = int(part_number_el.text or 0)
- except ValueError:
- return _error_response("InvalidArgument", "PartNumber must be an integer", 400)
- if part_num < 1 or part_num > 10000:
- return _error_response("InvalidArgument", f"PartNumber {part_num} must be between 1 and 10000", 400)
- parts.append({
- "PartNumber": part_num,
- "ETag": (etag_el.text or "").strip('"')
- })
-
- try:
- meta = _storage().complete_multipart_upload(bucket_name, upload_id, parts)
- except QuotaExceededError as exc:
- return _error_response("QuotaExceeded", str(exc), 403)
- except StorageError as exc:
- if "NoSuchBucket" in str(exc):
- return _error_response("NoSuchBucket", str(exc), 404)
- if "Multipart upload not found" in str(exc):
- return _error_response("NoSuchUpload", str(exc), 404)
- return _error_response("InvalidPart", str(exc), 400)
-
- user_agent = request.headers.get("User-Agent", "")
- if "S3ReplicationAgent" not in user_agent and "SiteSyncAgent" not in user_agent:
- _replication_manager().trigger_replication(bucket_name, object_key, action="write")
-
- root = Element("CompleteMultipartUploadResult")
- location = f"{request.host_url}{bucket_name}/{object_key}"
- SubElement(root, "Location").text = location
- SubElement(root, "Bucket").text = bucket_name
- SubElement(root, "Key").text = object_key
- if meta.etag:
- SubElement(root, "ETag").text = f'"{meta.etag}"'
-
- return _xml_response(root)
-
-
-def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
- principal, error = _object_principal("delete", bucket_name, object_key)
- if error:
- return error
-
- upload_id = request.args.get("uploadId")
- if not upload_id:
- return _error_response("InvalidArgument", "uploadId is required", 400)
-
- try:
- _storage().abort_multipart_upload(bucket_name, upload_id)
- except BucketNotFoundError as exc:
- return _error_response("NoSuchBucket", str(exc), 404)
- except StorageError as exc:
- current_app.logger.warning(f"Error aborting multipart upload: {exc}")
-
- return Response(status=204)
-
-
-def _select_object_content(bucket_name: str, object_key: str) -> Response:
- _, error = _object_principal("read", bucket_name, object_key)
- if error:
- return error
- ct_error = _require_xml_content_type()
- if ct_error:
- return ct_error
- payload = request.get_data(cache=False) or b""
- try:
- root = _parse_xml_with_limit(payload)
- except ParseError:
- return _error_response("MalformedXML", "Unable to parse XML document", 400)
- if _strip_ns(root.tag) != "SelectObjectContentRequest":
- return _error_response("MalformedXML", "Root element must be SelectObjectContentRequest", 400)
- expression_el = _find_element(root, "Expression")
- if expression_el is None or not expression_el.text:
- return _error_response("InvalidRequest", "Expression is required", 400)
- expression = expression_el.text
- expression_type_el = _find_element(root, "ExpressionType")
- expression_type = expression_type_el.text if expression_type_el is not None and expression_type_el.text else "SQL"
- if expression_type.upper() != "SQL":
- return _error_response("InvalidRequest", "Only SQL expression type is supported", 400)
- input_el = _find_element(root, "InputSerialization")
- if input_el is None:
- return _error_response("InvalidRequest", "InputSerialization is required", 400)
- try:
- input_format, input_config = _parse_select_input_serialization(input_el)
- except ValueError as exc:
- return _error_response("InvalidRequest", str(exc), 400)
- output_el = _find_element(root, "OutputSerialization")
- if output_el is None:
- return _error_response("InvalidRequest", "OutputSerialization is required", 400)
- try:
- output_format, output_config = _parse_select_output_serialization(output_el)
- except ValueError as exc:
- return _error_response("InvalidRequest", str(exc), 400)
- storage = _storage()
- try:
- path = storage.get_object_path(bucket_name, object_key)
- except ObjectNotFoundError:
- return _error_response("NoSuchKey", "Object not found", 404)
- except StorageError:
- return _error_response("NoSuchKey", "Object not found", 404)
- from .select_content import execute_select_query, SelectError
- try:
- result_stream = execute_select_query(
- file_path=path,
- expression=expression,
- input_format=input_format,
- input_config=input_config,
- output_format=output_format,
- output_config=output_config,
- )
- except SelectError as exc:
- return _error_response("InvalidRequest", str(exc), 400)
-
- def generate_events():
- bytes_scanned = 0
- bytes_returned = 0
- for chunk in result_stream:
- bytes_returned += len(chunk)
- yield _encode_select_event("Records", chunk)
- stats_payload = _build_stats_xml(bytes_scanned, bytes_returned)
- yield _encode_select_event("Stats", stats_payload)
- yield _encode_select_event("End", b"")
-
- return Response(generate_events(), mimetype="application/octet-stream", headers={"x-amz-request-charged": "requester"})
-
-
-def _parse_select_input_serialization(el: Element) -> tuple:
- csv_el = _find_element(el, "CSV")
- if csv_el is not None:
- file_header_el = _find_element(csv_el, "FileHeaderInfo")
- config = {
- "file_header_info": file_header_el.text.upper() if file_header_el is not None and file_header_el.text else "NONE",
- "comments": _find_element_text(csv_el, "Comments", "#"),
- "field_delimiter": _find_element_text(csv_el, "FieldDelimiter", ","),
- "record_delimiter": _find_element_text(csv_el, "RecordDelimiter", "\n"),
- "quote_character": _find_element_text(csv_el, "QuoteCharacter", '"'),
- "quote_escape_character": _find_element_text(csv_el, "QuoteEscapeCharacter", '"'),
- }
- return "CSV", config
- json_el = _find_element(el, "JSON")
- if json_el is not None:
- type_el = _find_element(json_el, "Type")
- config = {
- "type": type_el.text.upper() if type_el is not None and type_el.text else "DOCUMENT",
- }
- return "JSON", config
- parquet_el = _find_element(el, "Parquet")
- if parquet_el is not None:
- return "Parquet", {}
- raise ValueError("InputSerialization must specify CSV, JSON, or Parquet")
-
-
-def _parse_select_output_serialization(el: Element) -> tuple:
- csv_el = _find_element(el, "CSV")
- if csv_el is not None:
- config = {
- "field_delimiter": _find_element_text(csv_el, "FieldDelimiter", ","),
- "record_delimiter": _find_element_text(csv_el, "RecordDelimiter", "\n"),
- "quote_character": _find_element_text(csv_el, "QuoteCharacter", '"'),
- "quote_fields": _find_element_text(csv_el, "QuoteFields", "ASNEEDED").upper(),
- }
- return "CSV", config
- json_el = _find_element(el, "JSON")
- if json_el is not None:
- config = {
- "record_delimiter": _find_element_text(json_el, "RecordDelimiter", "\n"),
- }
- return "JSON", config
- raise ValueError("OutputSerialization must specify CSV or JSON")
-
-
-def _encode_select_event(event_type: str, payload: bytes) -> bytes:
- import struct
- import binascii
- headers = _build_event_headers(event_type)
- headers_length = len(headers)
- total_length = 4 + 4 + 4 + headers_length + len(payload) + 4
- prelude = struct.pack(">I", total_length) + struct.pack(">I", headers_length)
- prelude_crc = binascii.crc32(prelude) & 0xffffffff
- prelude += struct.pack(">I", prelude_crc)
- message = prelude + headers + payload
- message_crc = binascii.crc32(message) & 0xffffffff
- message += struct.pack(">I", message_crc)
- return message
-
-
-def _build_event_headers(event_type: str) -> bytes:
- headers = b""
- headers += _encode_select_header(":event-type", event_type)
- if event_type == "Records":
- headers += _encode_select_header(":content-type", "application/octet-stream")
- elif event_type == "Stats":
- headers += _encode_select_header(":content-type", "text/xml")
- headers += _encode_select_header(":message-type", "event")
- return headers
-
-
-def _encode_select_header(name: str, value: str) -> bytes:
- import struct
- name_bytes = name.encode("utf-8")
- value_bytes = value.encode("utf-8")
- header = struct.pack("B", len(name_bytes)) + name_bytes
- header += struct.pack("B", 7)
- header += struct.pack(">H", len(value_bytes)) + value_bytes
- return header
-
-
-def _build_stats_xml(bytes_scanned: int, bytes_returned: int) -> bytes:
- stats = Element("Stats")
- SubElement(stats, "BytesScanned").text = str(bytes_scanned)
- SubElement(stats, "BytesProcessed").text = str(bytes_scanned)
- SubElement(stats, "BytesReturned").text = str(bytes_returned)
- return tostring(stats, encoding="utf-8")
-
-
-@s3_api_bp.before_request
-def resolve_principal():
- g.principal = None
- try:
- if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \
- (request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
- g.principal = _verify_sigv4(request)
- return
- except IamError as exc:
- logger.debug(f"SigV4 authentication failed: {exc}")
- except (ValueError, KeyError) as exc:
- logger.debug(f"SigV4 parsing error: {exc}")
-
- access_key = request.headers.get("X-Access-Key")
- secret_key = request.headers.get("X-Secret-Key")
- if access_key and secret_key:
- try:
- g.principal = _iam().authenticate(access_key, secret_key)
- except IamError as exc:
- logger.debug(f"Header authentication failed: {exc}")
diff --git a/python/app/s3_client.py b/python/app/s3_client.py
deleted file mode 100644
index 916cd2a..0000000
--- a/python/app/s3_client.py
+++ /dev/null
@@ -1,296 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import threading
-import time
-from typing import Any, Generator, Optional
-
-import boto3
-from botocore.config import Config
-from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
-from flask import current_app, session
-
-logger = logging.getLogger(__name__)
-
-UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
-
-_BOTO_ERROR_MAP = {
- "NoSuchBucket": 404,
- "NoSuchKey": 404,
- "NoSuchUpload": 404,
- "BucketAlreadyExists": 409,
- "BucketAlreadyOwnedByYou": 409,
- "BucketNotEmpty": 409,
- "AccessDenied": 403,
- "InvalidAccessKeyId": 403,
- "SignatureDoesNotMatch": 403,
- "InvalidBucketName": 400,
- "InvalidArgument": 400,
- "MalformedXML": 400,
- "EntityTooLarge": 400,
- "QuotaExceeded": 403,
-}
-
-_UPLOAD_REGISTRY_MAX_AGE = 86400
-_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
-
-
-class UploadRegistry:
- def __init__(self) -> None:
- self._entries: dict[str, tuple[str, str, float]] = {}
- self._lock = threading.Lock()
- self._last_cleanup = time.monotonic()
-
- def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
- with self._lock:
- self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
- self._maybe_cleanup()
-
- def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
- with self._lock:
- entry = self._entries.get(upload_id)
- if entry is None:
- return None
- stored_bucket, key, created_at = entry
- if stored_bucket != bucket_name:
- return None
- if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
- del self._entries[upload_id]
- return None
- return key
-
- def remove(self, upload_id: str) -> None:
- with self._lock:
- self._entries.pop(upload_id, None)
-
- def _maybe_cleanup(self) -> None:
- now = time.monotonic()
- if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
- return
- self._last_cleanup = now
- cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
- stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
- for uid in stale:
- del self._entries[uid]
-
-
-class S3ProxyClient:
- def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
- if not api_base_url:
- raise ValueError("api_base_url is required for S3ProxyClient")
- self._api_base_url = api_base_url.rstrip("/")
- self._region = region
- self.upload_registry = UploadRegistry()
-
- @property
- def api_base_url(self) -> str:
- return self._api_base_url
-
- def get_client(self, access_key: str, secret_key: str) -> Any:
- if not access_key or not secret_key:
- raise ValueError("Both access_key and secret_key are required")
- config = Config(
- user_agent_extra=UI_PROXY_USER_AGENT,
- connect_timeout=5,
- read_timeout=30,
- retries={"max_attempts": 0},
- signature_version="s3v4",
- s3={"addressing_style": "path"},
- request_checksum_calculation="when_required",
- response_checksum_validation="when_required",
- )
- return boto3.client(
- "s3",
- endpoint_url=self._api_base_url,
- aws_access_key_id=access_key,
- aws_secret_access_key=secret_key,
- region_name=self._region,
- config=config,
- )
-
-
-def _get_proxy() -> S3ProxyClient:
- proxy = current_app.extensions.get("s3_proxy")
- if proxy is None:
- raise RuntimeError(
- "S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
- )
- return proxy
-
-
-def _get_session_creds() -> tuple[str, str]:
- secret_store = current_app.extensions["secret_store"]
- secret_store.purge_expired()
- token = session.get("cred_token")
- if not token:
- raise PermissionError("Not authenticated")
- creds = secret_store.peek(token)
- if not creds:
- raise PermissionError("Session expired")
- access_key = creds.get("access_key", "")
- secret_key = creds.get("secret_key", "")
- if not access_key or not secret_key:
- raise PermissionError("Invalid session credentials")
- return access_key, secret_key
-
-
-def get_session_s3_client() -> Any:
- proxy = _get_proxy()
- access_key, secret_key = _get_session_creds()
- return proxy.get_client(access_key, secret_key)
-
-
-def get_upload_registry() -> UploadRegistry:
- return _get_proxy().upload_registry
-
-
-def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
- error_info = exc.response.get("Error", {})
- code = error_info.get("Code", "InternalError")
- message = error_info.get("Message") or "S3 operation failed"
- http_status = _BOTO_ERROR_MAP.get(code)
- if http_status is None:
- http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
- return {"error": message}, http_status
-
-
-def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
- logger.error("S3 API connection failed: %s", exc)
- return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
-
-
-def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
- from .ui import _format_datetime_display
- return _format_datetime_display(dt, display_tz)
-
-
-def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
- from .ui import _format_datetime_iso
- return _format_datetime_iso(dt, display_tz)
-
-
-def build_url_templates(bucket_name: str) -> dict[str, str]:
- from flask import url_for
- preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- restore_t = url_for(
- "ui.restore_object_version",
- bucket_name=bucket_name,
- object_key="KEY_PLACEHOLDER",
- version_id="VERSION_ID_PLACEHOLDER",
- )
- tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
- return {
- "preview": preview_t,
- "download": preview_t + "?download=1",
- "presign": presign_t,
- "delete": delete_t,
- "versions": versions_t,
- "restore": restore_t,
- "tags": tags_t,
- "copy": copy_t,
- "move": move_t,
- "metadata": metadata_t,
- }
-
-
-def translate_list_objects(
- boto3_response: dict[str, Any],
- url_templates: dict[str, str],
- display_tz: str = "UTC",
- versioning_enabled: bool = False,
-) -> dict[str, Any]:
- objects_data = []
- for obj in boto3_response.get("Contents", []):
- last_mod = obj["LastModified"]
- objects_data.append({
- "key": obj["Key"],
- "size": obj["Size"],
- "last_modified": last_mod.isoformat(),
- "last_modified_display": format_datetime_display(last_mod, display_tz),
- "last_modified_iso": format_datetime_iso(last_mod, display_tz),
- "etag": obj.get("ETag", "").strip('"'),
- })
- return {
- "objects": objects_data,
- "is_truncated": boto3_response.get("IsTruncated", False),
- "next_continuation_token": boto3_response.get("NextContinuationToken"),
- "total_count": boto3_response.get("KeyCount", len(objects_data)),
- "versioning_enabled": versioning_enabled,
- "url_templates": url_templates,
- }
-
-
-def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
- try:
- resp = client.get_bucket_versioning(Bucket=bucket_name)
- return resp.get("Status") == "Enabled"
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- if code != "NoSuchBucket":
- logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
- return False
-
-
-def stream_objects_ndjson(
- client: Any,
- bucket_name: str,
- prefix: Optional[str],
- url_templates: dict[str, str],
- display_tz: str = "UTC",
- versioning_enabled: bool = False,
- delimiter: Optional[str] = None,
-) -> Generator[str, None, None]:
- meta_line = json.dumps({
- "type": "meta",
- "versioning_enabled": versioning_enabled,
- "url_templates": url_templates,
- }) + "\n"
- yield meta_line
-
- yield json.dumps({"type": "count", "total_count": 0}) + "\n"
-
- kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
- if prefix:
- kwargs["Prefix"] = prefix
- if delimiter:
- kwargs["Delimiter"] = delimiter
-
- running_count = 0
- try:
- paginator = client.get_paginator("list_objects_v2")
- for page in paginator.paginate(**kwargs):
- for cp in page.get("CommonPrefixes", []):
- yield json.dumps({
- "type": "folder",
- "prefix": cp["Prefix"],
- }) + "\n"
- page_contents = page.get("Contents", [])
- for obj in page_contents:
- last_mod = obj["LastModified"]
- yield json.dumps({
- "type": "object",
- "key": obj["Key"],
- "size": obj["Size"],
- "last_modified": last_mod.isoformat(),
- "last_modified_display": format_datetime_display(last_mod, display_tz),
- "last_modified_iso": format_datetime_iso(last_mod, display_tz),
- "etag": obj.get("ETag", "").strip('"'),
- }) + "\n"
- running_count += len(page_contents)
- yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
- except ClientError as exc:
- error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
- yield json.dumps({"type": "error", "error": error_msg}) + "\n"
- return
- except (EndpointConnectionError, ConnectionClosedError):
- yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
- return
-
- yield json.dumps({"type": "done"}) + "\n"
diff --git a/python/app/secret_store.py b/python/app/secret_store.py
deleted file mode 100644
index 903c2fa..0000000
--- a/python/app/secret_store.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import annotations
-
-import secrets
-import time
-from typing import Any, Dict, Optional
-
-
-class EphemeralSecretStore:
- """Keeps values in-memory for a short period and returns them once."""
-
- def __init__(self, default_ttl: int = 300) -> None:
- self._default_ttl = max(default_ttl, 1)
- self._store: Dict[str, tuple[Any, float]] = {}
-
- def remember(self, payload: Any, *, ttl: Optional[int] = None) -> str:
- token = secrets.token_urlsafe(16)
- expires_at = time.time() + (ttl or self._default_ttl)
- self._store[token] = (payload, expires_at)
- return token
-
- def peek(self, token: str | None) -> Any | None:
- if not token:
- return None
- entry = self._store.get(token)
- if not entry:
- return None
- payload, expires_at = entry
- if expires_at < time.time():
- self._store.pop(token, None)
- return None
- return payload
-
- def pop(self, token: str | None) -> Any | None:
- if not token:
- return None
- entry = self._store.pop(token, None)
- if not entry:
- return None
- payload, expires_at = entry
- if expires_at < time.time():
- return None
- return payload
-
- def purge_expired(self) -> None:
- now = time.time()
- stale = [token for token, (_, expires_at) in self._store.items() if expires_at < now]
- for token in stale:
- self._store.pop(token, None)
diff --git a/python/app/select_content.py b/python/app/select_content.py
deleted file mode 100644
index 57a3362..0000000
--- a/python/app/select_content.py
+++ /dev/null
@@ -1,171 +0,0 @@
-"""S3 SelectObjectContent SQL query execution using DuckDB."""
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from typing import Any, Dict, Generator, Optional
-
-try:
- import duckdb
- DUCKDB_AVAILABLE = True
-except ImportError:
- DUCKDB_AVAILABLE = False
-
-
-class SelectError(Exception):
- """Error during SELECT query execution."""
- pass
-
-
-def execute_select_query(
- file_path: Path,
- expression: str,
- input_format: str,
- input_config: Dict[str, Any],
- output_format: str,
- output_config: Dict[str, Any],
- chunk_size: int = 65536,
-) -> Generator[bytes, None, None]:
- """Execute SQL query on object content."""
- if not DUCKDB_AVAILABLE:
- raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
-
- conn = duckdb.connect(":memory:")
-
- try:
- if input_format == "CSV":
- _load_csv(conn, file_path, input_config)
- elif input_format == "JSON":
- _load_json(conn, file_path, input_config)
- elif input_format == "Parquet":
- _load_parquet(conn, file_path)
- else:
- raise SelectError(f"Unsupported input format: {input_format}")
-
- normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
-
- try:
- result = conn.execute(normalized_expression)
- except duckdb.Error as exc:
- raise SelectError(f"SQL execution error: {exc}")
-
- if output_format == "CSV":
- yield from _output_csv(result, output_config, chunk_size)
- elif output_format == "JSON":
- yield from _output_json(result, output_config, chunk_size)
- else:
- raise SelectError(f"Unsupported output format: {output_format}")
-
- finally:
- conn.close()
-
-
-def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
- """Load CSV file into DuckDB."""
- file_header_info = config.get("file_header_info", "NONE")
- delimiter = config.get("field_delimiter", ",")
- quote = config.get("quote_character", '"')
-
- header = file_header_info in ("USE", "IGNORE")
- path_str = str(file_path).replace("\\", "/")
-
- conn.execute(f"""
- CREATE TABLE data AS
- SELECT * FROM read_csv('{path_str}',
- header={header},
- delim='{delimiter}',
- quote='{quote}'
- )
- """)
-
-
-def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
- """Load JSON file into DuckDB."""
- json_type = config.get("type", "DOCUMENT")
- path_str = str(file_path).replace("\\", "/")
-
- if json_type == "LINES":
- conn.execute(f"""
- CREATE TABLE data AS
- SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
- """)
- else:
- conn.execute(f"""
- CREATE TABLE data AS
- SELECT * FROM read_json_auto('{path_str}', format='array')
- """)
-
-
-def _load_parquet(conn, file_path: Path) -> None:
- """Load Parquet file into DuckDB."""
- path_str = str(file_path).replace("\\", "/")
- conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
-
-
-def _output_csv(
- result,
- config: Dict[str, Any],
- chunk_size: int,
-) -> Generator[bytes, None, None]:
- """Output query results as CSV."""
- delimiter = config.get("field_delimiter", ",")
- record_delimiter = config.get("record_delimiter", "\n")
- quote = config.get("quote_character", '"')
-
- buffer = ""
-
- while True:
- rows = result.fetchmany(1000)
- if not rows:
- break
-
- for row in rows:
- fields = []
- for value in row:
- if value is None:
- fields.append("")
- elif isinstance(value, str):
- if delimiter in value or quote in value or record_delimiter in value:
- escaped = value.replace(quote, quote + quote)
- fields.append(f'{quote}{escaped}{quote}')
- else:
- fields.append(value)
- else:
- fields.append(str(value))
-
- buffer += delimiter.join(fields) + record_delimiter
-
- while len(buffer) >= chunk_size:
- yield buffer[:chunk_size].encode("utf-8")
- buffer = buffer[chunk_size:]
-
- if buffer:
- yield buffer.encode("utf-8")
-
-
-def _output_json(
- result,
- config: Dict[str, Any],
- chunk_size: int,
-) -> Generator[bytes, None, None]:
- """Output query results as JSON Lines."""
- record_delimiter = config.get("record_delimiter", "\n")
- columns = [desc[0] for desc in result.description]
-
- buffer = ""
-
- while True:
- rows = result.fetchmany(1000)
- if not rows:
- break
-
- for row in rows:
- record = dict(zip(columns, row))
- buffer += json.dumps(record, default=str) + record_delimiter
-
- while len(buffer) >= chunk_size:
- yield buffer[:chunk_size].encode("utf-8")
- buffer = buffer[chunk_size:]
-
- if buffer:
- yield buffer.encode("utf-8")
diff --git a/python/app/site_registry.py b/python/app/site_registry.py
deleted file mode 100644
index b257326..0000000
--- a/python/app/site_registry.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from __future__ import annotations
-
-import json
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-
-@dataclass
-class SiteInfo:
- site_id: str
- endpoint: str
- region: str = "us-east-1"
- priority: int = 100
- display_name: str = ""
- created_at: Optional[float] = None
- updated_at: Optional[float] = None
-
- def __post_init__(self) -> None:
- if not self.display_name:
- self.display_name = self.site_id
- if self.created_at is None:
- self.created_at = time.time()
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "site_id": self.site_id,
- "endpoint": self.endpoint,
- "region": self.region,
- "priority": self.priority,
- "display_name": self.display_name,
- "created_at": self.created_at,
- "updated_at": self.updated_at,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
- return cls(
- site_id=data["site_id"],
- endpoint=data.get("endpoint", ""),
- region=data.get("region", "us-east-1"),
- priority=data.get("priority", 100),
- display_name=data.get("display_name", ""),
- created_at=data.get("created_at"),
- updated_at=data.get("updated_at"),
- )
-
-
-@dataclass
-class PeerSite:
- site_id: str
- endpoint: str
- region: str = "us-east-1"
- priority: int = 100
- display_name: str = ""
- created_at: Optional[float] = None
- updated_at: Optional[float] = None
- connection_id: Optional[str] = None
- is_healthy: Optional[bool] = None
- last_health_check: Optional[float] = None
-
- def __post_init__(self) -> None:
- if not self.display_name:
- self.display_name = self.site_id
- if self.created_at is None:
- self.created_at = time.time()
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "site_id": self.site_id,
- "endpoint": self.endpoint,
- "region": self.region,
- "priority": self.priority,
- "display_name": self.display_name,
- "created_at": self.created_at,
- "updated_at": self.updated_at,
- "connection_id": self.connection_id,
- "is_healthy": self.is_healthy,
- "last_health_check": self.last_health_check,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
- return cls(
- site_id=data["site_id"],
- endpoint=data.get("endpoint", ""),
- region=data.get("region", "us-east-1"),
- priority=data.get("priority", 100),
- display_name=data.get("display_name", ""),
- created_at=data.get("created_at"),
- updated_at=data.get("updated_at"),
- connection_id=data.get("connection_id"),
- is_healthy=data.get("is_healthy"),
- last_health_check=data.get("last_health_check"),
- )
-
-
-class SiteRegistry:
- def __init__(self, config_path: Path) -> None:
- self.config_path = config_path
- self._local_site: Optional[SiteInfo] = None
- self._peers: Dict[str, PeerSite] = {}
- self.reload()
-
- def reload(self) -> None:
- if not self.config_path.exists():
- self._local_site = None
- self._peers = {}
- return
-
- try:
- with open(self.config_path, "r", encoding="utf-8") as f:
- data = json.load(f)
-
- if data.get("local"):
- self._local_site = SiteInfo.from_dict(data["local"])
- else:
- self._local_site = None
-
- self._peers = {}
- for peer_data in data.get("peers", []):
- peer = PeerSite.from_dict(peer_data)
- self._peers[peer.site_id] = peer
-
- except (OSError, json.JSONDecodeError, KeyError):
- self._local_site = None
- self._peers = {}
-
- def save(self) -> None:
- self.config_path.parent.mkdir(parents=True, exist_ok=True)
- data = {
- "local": self._local_site.to_dict() if self._local_site else None,
- "peers": [peer.to_dict() for peer in self._peers.values()],
- }
- with open(self.config_path, "w", encoding="utf-8") as f:
- json.dump(data, f, indent=2)
-
- def get_local_site(self) -> Optional[SiteInfo]:
- return self._local_site
-
- def set_local_site(self, site: SiteInfo) -> None:
- site.updated_at = time.time()
- self._local_site = site
- self.save()
-
- def list_peers(self) -> List[PeerSite]:
- return list(self._peers.values())
-
- def get_peer(self, site_id: str) -> Optional[PeerSite]:
- return self._peers.get(site_id)
-
- def add_peer(self, peer: PeerSite) -> None:
- peer.created_at = peer.created_at or time.time()
- self._peers[peer.site_id] = peer
- self.save()
-
- def update_peer(self, peer: PeerSite) -> None:
- if peer.site_id not in self._peers:
- raise ValueError(f"Peer {peer.site_id} not found")
- peer.updated_at = time.time()
- self._peers[peer.site_id] = peer
- self.save()
-
- def delete_peer(self, site_id: str) -> bool:
- if site_id in self._peers:
- del self._peers[site_id]
- self.save()
- return True
- return False
-
- def update_health(self, site_id: str, is_healthy: bool) -> None:
- peer = self._peers.get(site_id)
- if peer:
- peer.is_healthy = is_healthy
- peer.last_health_check = time.time()
- self.save()
diff --git a/python/app/site_sync.py b/python/app/site_sync.py
deleted file mode 100644
index 57cf185..0000000
--- a/python/app/site_sync.py
+++ /dev/null
@@ -1,416 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import tempfile
-import threading
-import time
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
-
-import boto3
-from botocore.config import Config
-from botocore.exceptions import ClientError
-
-if TYPE_CHECKING:
- from .connections import ConnectionStore, RemoteConnection
- from .replication import ReplicationManager, ReplicationRule
- from .storage import ObjectStorage
-
-logger = logging.getLogger(__name__)
-
-SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
-
-
-@dataclass
-class SyncedObjectInfo:
- last_synced_at: float
- remote_etag: str
- source: str
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "last_synced_at": self.last_synced_at,
- "remote_etag": self.remote_etag,
- "source": self.source,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
- return cls(
- last_synced_at=data["last_synced_at"],
- remote_etag=data["remote_etag"],
- source=data["source"],
- )
-
-
-@dataclass
-class SyncState:
- synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
- last_full_sync: Optional[float] = None
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
- "last_full_sync": self.last_full_sync,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
- synced_objects = {}
- for k, v in data.get("synced_objects", {}).items():
- synced_objects[k] = SyncedObjectInfo.from_dict(v)
- return cls(
- synced_objects=synced_objects,
- last_full_sync=data.get("last_full_sync"),
- )
-
-
-@dataclass
-class SiteSyncStats:
- last_sync_at: Optional[float] = None
- objects_pulled: int = 0
- objects_skipped: int = 0
- conflicts_resolved: int = 0
- deletions_applied: int = 0
- errors: int = 0
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "last_sync_at": self.last_sync_at,
- "objects_pulled": self.objects_pulled,
- "objects_skipped": self.objects_skipped,
- "conflicts_resolved": self.conflicts_resolved,
- "deletions_applied": self.deletions_applied,
- "errors": self.errors,
- }
-
-
-@dataclass
-class RemoteObjectMeta:
- key: str
- size: int
- last_modified: datetime
- etag: str
-
- @classmethod
- def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
- return cls(
- key=obj["Key"],
- size=obj.get("Size", 0),
- last_modified=obj["LastModified"],
- etag=obj.get("ETag", "").strip('"'),
- )
-
-
-def _create_sync_client(
- connection: "RemoteConnection",
- *,
- connect_timeout: int = 10,
- read_timeout: int = 120,
- max_retries: int = 2,
-) -> Any:
- config = Config(
- user_agent_extra=SITE_SYNC_USER_AGENT,
- connect_timeout=connect_timeout,
- read_timeout=read_timeout,
- retries={"max_attempts": max_retries},
- signature_version="s3v4",
- s3={"addressing_style": "path"},
- request_checksum_calculation="when_required",
- response_checksum_validation="when_required",
- )
- return boto3.client(
- "s3",
- endpoint_url=connection.endpoint_url,
- aws_access_key_id=connection.access_key,
- aws_secret_access_key=connection.secret_key,
- region_name=connection.region or "us-east-1",
- config=config,
- )
-
-
-class SiteSyncWorker:
- def __init__(
- self,
- storage: "ObjectStorage",
- connections: "ConnectionStore",
- replication_manager: "ReplicationManager",
- storage_root: Path,
- interval_seconds: int = 60,
- batch_size: int = 100,
- connect_timeout: int = 10,
- read_timeout: int = 120,
- max_retries: int = 2,
- clock_skew_tolerance_seconds: float = 1.0,
- ):
- self.storage = storage
- self.connections = connections
- self.replication_manager = replication_manager
- self.storage_root = storage_root
- self.interval_seconds = interval_seconds
- self.batch_size = batch_size
- self.connect_timeout = connect_timeout
- self.read_timeout = read_timeout
- self.max_retries = max_retries
- self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
- self._lock = threading.Lock()
- self._shutdown = threading.Event()
- self._sync_thread: Optional[threading.Thread] = None
- self._bucket_stats: Dict[str, SiteSyncStats] = {}
-
- def _create_client(self, connection: "RemoteConnection") -> Any:
- """Create an S3 client with the worker's configured timeouts."""
- return _create_sync_client(
- connection,
- connect_timeout=self.connect_timeout,
- read_timeout=self.read_timeout,
- max_retries=self.max_retries,
- )
-
- def start(self) -> None:
- if self._sync_thread is not None and self._sync_thread.is_alive():
- return
- self._shutdown.clear()
- self._sync_thread = threading.Thread(
- target=self._sync_loop, name="site-sync-worker", daemon=True
- )
- self._sync_thread.start()
- logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
-
- def shutdown(self) -> None:
- self._shutdown.set()
- if self._sync_thread is not None:
- self._sync_thread.join(timeout=10.0)
- logger.info("Site sync worker shut down")
-
- def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
- from .replication import REPLICATION_MODE_BIDIRECTIONAL
- rule = self.replication_manager.get_rule(bucket_name)
- if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
- return None
- return self._sync_bucket(rule)
-
- def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
- with self._lock:
- return self._bucket_stats.get(bucket_name)
-
- def _sync_loop(self) -> None:
- while not self._shutdown.is_set():
- self._shutdown.wait(timeout=self.interval_seconds)
- if self._shutdown.is_set():
- break
- self._run_sync_cycle()
-
- def _run_sync_cycle(self) -> None:
- from .replication import REPLICATION_MODE_BIDIRECTIONAL
- for bucket_name, rule in list(self.replication_manager._rules.items()):
- if self._shutdown.is_set():
- break
- if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
- continue
- try:
- stats = self._sync_bucket(rule)
- with self._lock:
- self._bucket_stats[bucket_name] = stats
- except Exception as e:
- logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
-
- def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
- stats = SiteSyncStats()
- connection = self.connections.get(rule.target_connection_id)
- if not connection:
- logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
- stats.errors += 1
- return stats
-
- try:
- local_objects = self._list_local_objects(rule.bucket_name)
- except Exception as e:
- logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
- stats.errors += 1
- return stats
-
- try:
- remote_objects = self._list_remote_objects(rule, connection)
- except Exception as e:
- logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
- stats.errors += 1
- return stats
-
- sync_state = self._load_sync_state(rule.bucket_name)
- local_keys = set(local_objects.keys())
- remote_keys = set(remote_objects.keys())
-
- to_pull = []
- for key in remote_keys:
- remote_meta = remote_objects[key]
- local_meta = local_objects.get(key)
- if local_meta is None:
- to_pull.append(key)
- else:
- resolution = self._resolve_conflict(local_meta, remote_meta)
- if resolution == "pull":
- to_pull.append(key)
- stats.conflicts_resolved += 1
- else:
- stats.objects_skipped += 1
-
- pulled_count = 0
- for key in to_pull:
- if self._shutdown.is_set():
- break
- if pulled_count >= self.batch_size:
- break
- remote_meta = remote_objects[key]
- success = self._pull_object(rule, key, connection, remote_meta)
- if success:
- stats.objects_pulled += 1
- pulled_count += 1
- sync_state.synced_objects[key] = SyncedObjectInfo(
- last_synced_at=time.time(),
- remote_etag=remote_meta.etag,
- source="remote",
- )
- else:
- stats.errors += 1
-
- if rule.sync_deletions:
- for key in list(sync_state.synced_objects.keys()):
- if key not in remote_keys and key in local_keys:
- tracked = sync_state.synced_objects[key]
- if tracked.source == "remote":
- local_meta = local_objects.get(key)
- if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
- success = self._apply_remote_deletion(rule.bucket_name, key)
- if success:
- stats.deletions_applied += 1
- del sync_state.synced_objects[key]
-
- sync_state.last_full_sync = time.time()
- self._save_sync_state(rule.bucket_name, sync_state)
-
- with self.replication_manager._stats_lock:
- rule.last_pull_at = time.time()
- self.replication_manager.save_rules()
-
- stats.last_sync_at = time.time()
- logger.info(
- "Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
- rule.bucket_name,
- stats.objects_pulled,
- stats.objects_skipped,
- stats.conflicts_resolved,
- stats.deletions_applied,
- stats.errors,
- )
- return stats
-
- def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
- from .storage import ObjectMeta
- objects = self.storage.list_objects_all(bucket_name)
- return {obj.key: obj for obj in objects}
-
- def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
- s3 = self._create_client(connection)
- result: Dict[str, RemoteObjectMeta] = {}
- paginator = s3.get_paginator("list_objects_v2")
- try:
- for page in paginator.paginate(Bucket=rule.target_bucket):
- for obj in page.get("Contents", []):
- meta = RemoteObjectMeta.from_s3_object(obj)
- result[meta.key] = meta
- except ClientError as e:
- if e.response["Error"]["Code"] == "NoSuchBucket":
- return {}
- raise
- return result
-
- def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
- local_ts = local_meta.last_modified.timestamp()
- remote_ts = remote_meta.last_modified.timestamp()
-
- if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
- local_etag = local_meta.etag or ""
- if remote_meta.etag == local_etag:
- return "skip"
- return "pull" if remote_meta.etag > local_etag else "keep"
-
- return "pull" if remote_ts > local_ts else "keep"
-
- def _pull_object(
- self,
- rule: "ReplicationRule",
- object_key: str,
- connection: "RemoteConnection",
- remote_meta: RemoteObjectMeta,
- ) -> bool:
- s3 = self._create_client(connection)
- tmp_path = None
- try:
- tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
- tmp_dir.mkdir(parents=True, exist_ok=True)
- with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
- tmp_path = Path(tmp_file.name)
-
- s3.download_file(rule.target_bucket, object_key, str(tmp_path))
-
- head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
- user_metadata = head_response.get("Metadata", {})
-
- with open(tmp_path, "rb") as f:
- self.storage.put_object(
- rule.bucket_name,
- object_key,
- f,
- metadata=user_metadata if user_metadata else None,
- )
-
- logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
- return True
-
- except ClientError as e:
- logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
- return False
- except Exception as e:
- logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
- return False
- finally:
- if tmp_path and tmp_path.exists():
- try:
- tmp_path.unlink()
- except OSError:
- pass
-
- def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
- try:
- self.storage.delete_object(bucket_name, object_key)
- logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
- return True
- except Exception as e:
- logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
- return False
-
- def _sync_state_path(self, bucket_name: str) -> Path:
- return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
-
- def _load_sync_state(self, bucket_name: str) -> SyncState:
- path = self._sync_state_path(bucket_name)
- if not path.exists():
- return SyncState()
- try:
- data = json.loads(path.read_text(encoding="utf-8"))
- return SyncState.from_dict(data)
- except (json.JSONDecodeError, OSError, KeyError) as e:
- logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
- return SyncState()
-
- def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
- path = self._sync_state_path(bucket_name)
- path.parent.mkdir(parents=True, exist_ok=True)
- try:
- path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
- except OSError as e:
- logger.warning("Failed to save sync state for %s: %s", bucket_name, e)
diff --git a/python/app/storage.py b/python/app/storage.py
deleted file mode 100644
index d2469e9..0000000
--- a/python/app/storage.py
+++ /dev/null
@@ -1,2904 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-import os
-import re
-import shutil
-import stat
-import threading
-import time
-import unicodedata
-import uuid
-from collections import OrderedDict
-from concurrent.futures import ThreadPoolExecutor
-from contextlib import contextmanager
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from pathlib import Path, PurePosixPath
-from typing import Any, BinaryIO, Dict, Generator, List, Optional
-
-try:
- import myfsio_core as _rc
- if not all(hasattr(_rc, f) for f in (
- "validate_bucket_name", "validate_object_key", "md5_file",
- "shallow_scan", "bucket_stats_scan", "search_objects_scan",
- "stream_to_file_with_md5", "assemble_parts_with_md5",
- "build_object_cache", "read_index_entry", "write_index_entry",
- "delete_index_entry", "check_bucket_contents",
- )):
- raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
- _HAS_RUST = True
-except ImportError:
- _rc = None
- _HAS_RUST = False
-
-logger = logging.getLogger(__name__)
-
-if os.name == "nt":
- import msvcrt
-
- @contextmanager
- def _file_lock(file_handle) -> Generator[None, None, None]:
- """Acquire an exclusive lock on a file (Windows)."""
- try:
- msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
- yield
- finally:
- try:
- file_handle.seek(0)
- msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
- except OSError:
- pass
-else:
- import fcntl # type: ignore
-
- @contextmanager
- def _file_lock(file_handle) -> Generator[None, None, None]:
- """Acquire an exclusive lock on a file (Unix)."""
- try:
- fcntl.flock(file_handle.fileno(), fcntl.LOCK_EX)
- yield
- finally:
- fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)
-
-
-@contextmanager
-def _atomic_lock_file(lock_path: Path, max_retries: int = 10, base_delay: float = 0.1) -> Generator[None, None, None]:
- """Atomically acquire a lock file with exponential backoff.
-
- Uses O_EXCL to ensure atomic creation of the lock file.
- """
- lock_path.parent.mkdir(parents=True, exist_ok=True)
- fd = None
- for attempt in range(max_retries):
- try:
- fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
- break
- except FileExistsError:
- if attempt == max_retries - 1:
- raise BlockingIOError("Another upload to this key is in progress")
- delay = base_delay * (2 ** attempt)
- time.sleep(min(delay, 2.0))
- try:
- yield
- finally:
- if fd is not None:
- os.close(fd)
- try:
- lock_path.unlink(missing_ok=True)
- except OSError:
- pass
-
-
-WINDOWS_RESERVED_NAMES = {
- "CON",
- "PRN",
- "AUX",
- "NUL",
- "COM1",
- "COM2",
- "COM3",
- "COM4",
- "COM5",
- "COM6",
- "COM7",
- "COM8",
- "COM9",
- "LPT1",
- "LPT2",
- "LPT3",
- "LPT4",
- "LPT5",
- "LPT6",
- "LPT7",
- "LPT8",
- "LPT9",
-}
-
-
-class StorageError(RuntimeError):
- """Raised when the storage layer encounters an unrecoverable problem."""
-
-
-class BucketNotFoundError(StorageError):
- """Raised when the bucket does not exist."""
-
-
-class ObjectNotFoundError(StorageError):
- """Raised when the object does not exist."""
-
-
-class QuotaExceededError(StorageError):
- """Raised when an operation would exceed bucket quota limits."""
-
- def __init__(self, message: str, quota: Dict[str, Any], usage: Dict[str, int]):
- super().__init__(message)
- self.quota = quota
- self.usage = usage
-
-
-@dataclass
-class ObjectMeta:
- key: str
- size: int
- last_modified: datetime
- etag: Optional[str] = None
- metadata: Optional[Dict[str, str]] = None
-
-
-@dataclass
-class BucketMeta:
- name: str
- created_at: datetime
-
-
-@dataclass
-class ListObjectsResult:
- """Paginated result for object listing."""
- objects: List[ObjectMeta]
- is_truncated: bool
- next_continuation_token: Optional[str]
- total_count: Optional[int] = None
-
-
-@dataclass
-class ShallowListResult:
- """Result for delimiter-aware directory-level listing."""
- objects: List[ObjectMeta]
- common_prefixes: List[str]
- is_truncated: bool
- next_continuation_token: Optional[str]
-
-
-def _utcnow() -> datetime:
- return datetime.now(timezone.utc)
-
-
-def _utc_isoformat() -> str:
- return _utcnow().isoformat().replace("+00:00", "Z")
-
-
-class ObjectStorage:
- """Very small filesystem wrapper implementing the bare S3 primitives."""
-
- INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
- SYSTEM_ROOT = ".myfsio.sys"
- SYSTEM_BUCKETS_DIR = "buckets"
- SYSTEM_MULTIPART_DIR = "multipart"
- SYSTEM_TMP_DIR = "tmp"
- BUCKET_META_DIR = "meta"
- BUCKET_VERSIONS_DIR = "versions"
- MULTIPART_MANIFEST = "manifest.json"
- BUCKET_CONFIG_FILE = ".bucket.json"
-
- def __init__(
- self,
- root: Path,
- cache_ttl: int = 5,
- object_cache_max_size: int = 100,
- bucket_config_cache_ttl: float = 30.0,
- object_key_max_length_bytes: int = 1024,
- meta_read_cache_max: int = 2048,
- ) -> None:
- self.root = Path(root)
- self.root.mkdir(parents=True, exist_ok=True)
- self._ensure_system_roots()
- self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict()
- self._obj_cache_lock = threading.Lock()
- self._meta_cache_lock = threading.Lock()
- self._registry_lock = threading.Lock()
- self._bucket_locks: Dict[str, threading.Lock] = {}
- self._cache_version: Dict[str, int] = {}
- self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
- self._bucket_config_cache_ttl = bucket_config_cache_ttl
- self._cache_ttl = cache_ttl
- self._object_cache_max_size = object_cache_max_size
- self._object_key_max_length_bytes = object_key_max_length_bytes
- self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {}
- self._meta_index_locks: Dict[str, threading.Lock] = {}
- self._meta_read_cache: OrderedDict[tuple, Optional[Dict[str, Any]]] = OrderedDict()
- self._meta_read_cache_max = meta_read_cache_max
- self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup")
- self._stats_mem: Dict[str, Dict[str, int]] = {}
- self._stats_serial: Dict[str, int] = {}
- self._stats_mem_time: Dict[str, float] = {}
- self._stats_lock = threading.Lock()
- self._stats_dirty: set[str] = set()
- self._stats_flush_timer: Optional[threading.Timer] = None
- self._etag_index_dirty: set[str] = set()
- self._etag_index_flush_timer: Optional[threading.Timer] = None
- self._etag_index_mem: Dict[str, tuple[Dict[str, str], float]] = {}
-
- def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
- with self._registry_lock:
- if bucket_id not in self._bucket_locks:
- self._bucket_locks[bucket_id] = threading.Lock()
- return self._bucket_locks[bucket_id]
-
- def list_buckets(self) -> List[BucketMeta]:
- buckets: List[BucketMeta] = []
- for bucket in sorted(self.root.iterdir()):
- if bucket.is_dir() and bucket.name != self.SYSTEM_ROOT:
- stat = bucket.stat()
- buckets.append(
- BucketMeta(
- name=bucket.name,
- created_at=datetime.fromtimestamp(stat.st_ctime, timezone.utc),
- )
- )
- return buckets
-
- def bucket_exists(self, bucket_name: str) -> bool:
- return self._bucket_path(bucket_name).exists()
-
- def _require_bucket_exists(self, bucket_path: Path) -> None:
- """Raise BucketNotFoundError if bucket does not exist."""
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
-
- def _validate_bucket_name(self, bucket_name: str) -> None:
- if _HAS_RUST:
- error = _rc.validate_bucket_name(bucket_name)
- if error:
- raise StorageError(error)
- return
- if len(bucket_name) < 3 or len(bucket_name) > 63:
- raise StorageError("Bucket name must be between 3 and 63 characters")
- if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
- raise StorageError("Bucket name must consist of lowercase letters, numbers, periods, and hyphens, and must start and end with a letter or number")
- if ".." in bucket_name:
- raise StorageError("Bucket name must not contain consecutive periods")
- if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", bucket_name):
- raise StorageError("Bucket name must not be formatted as an IP address")
-
- def create_bucket(self, bucket_name: str) -> None:
- self._validate_bucket_name(bucket_name)
- bucket_path = self._bucket_path(bucket_name)
- bucket_path.mkdir(parents=True, exist_ok=False)
- self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)
-
- def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
-
- with self._stats_lock:
- if bucket_name in self._stats_mem:
- cached_at = self._stats_mem_time.get(bucket_name, 0.0)
- if (time.monotonic() - cached_at) < cache_ttl:
- return dict(self._stats_mem[bucket_name])
- self._stats_mem.pop(bucket_name, None)
- self._stats_mem_time.pop(bucket_name, None)
-
- cache_path = self._system_bucket_root(bucket_name) / "stats.json"
- cached_stats = None
-
- if cache_path.exists():
- try:
- cached_stats = json.loads(cache_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- pass
-
- object_count = 0
- total_bytes = 0
- version_count = 0
- version_bytes = 0
-
- internal = self.INTERNAL_FOLDERS
- bucket_str = str(bucket_path)
-
- try:
- if _HAS_RUST:
- versions_root = str(self._bucket_versions_root(bucket_name))
- object_count, total_bytes, version_count, version_bytes = _rc.bucket_stats_scan(
- bucket_str, versions_root
- )
- else:
- stack = [bucket_str]
- while stack:
- current = stack.pop()
- try:
- with os.scandir(current) as it:
- for entry in it:
- if current == bucket_str and entry.name in internal:
- continue
- if entry.is_dir(follow_symlinks=False):
- stack.append(entry.path)
- elif entry.is_file(follow_symlinks=False):
- object_count += 1
- total_bytes += entry.stat(follow_symlinks=False).st_size
- except PermissionError:
- continue
-
- versions_root = self._bucket_versions_root(bucket_name)
- if versions_root.exists():
- v_stack = [str(versions_root)]
- while v_stack:
- v_current = v_stack.pop()
- try:
- with os.scandir(v_current) as it:
- for entry in it:
- if entry.is_dir(follow_symlinks=False):
- v_stack.append(entry.path)
- elif entry.is_file(follow_symlinks=False) and entry.name.endswith(".bin"):
- version_count += 1
- version_bytes += entry.stat(follow_symlinks=False).st_size
- except PermissionError:
- continue
- except OSError:
- if cached_stats is not None:
- return cached_stats
- raise
-
- existing_serial = 0
- if cached_stats is not None:
- existing_serial = cached_stats.get("_cache_serial", 0)
-
- stats = {
- "objects": object_count,
- "bytes": total_bytes,
- "version_count": version_count,
- "version_bytes": version_bytes,
- "total_objects": object_count + version_count,
- "total_bytes": total_bytes + version_bytes,
- "_cache_serial": existing_serial,
- }
-
- with self._stats_lock:
- self._stats_mem[bucket_name] = stats
- self._stats_mem_time[bucket_name] = time.monotonic()
- self._stats_serial[bucket_name] = existing_serial
-
- try:
- cache_path.parent.mkdir(parents=True, exist_ok=True)
- self._atomic_write_json(cache_path, stats)
- except OSError:
- pass
-
- return stats
-
- def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
- with self._stats_lock:
- self._stats_mem.pop(bucket_id, None)
- self._stats_mem_time.pop(bucket_id, None)
- self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
- self._stats_dirty.discard(bucket_id)
- cache_path = self._system_bucket_root(bucket_id) / "stats.json"
- try:
- cache_path.unlink(missing_ok=True)
- except OSError:
- pass
-
- def _update_bucket_stats_cache(
- self,
- bucket_id: str,
- *,
- bytes_delta: int = 0,
- objects_delta: int = 0,
- version_bytes_delta: int = 0,
- version_count_delta: int = 0,
- ) -> None:
- with self._stats_lock:
- if bucket_id not in self._stats_mem:
- self._stats_mem[bucket_id] = {
- "objects": 0, "bytes": 0, "version_count": 0,
- "version_bytes": 0, "total_objects": 0, "total_bytes": 0,
- "_cache_serial": 0,
- }
- data = self._stats_mem[bucket_id]
- data["objects"] = max(0, data["objects"] + objects_delta)
- data["bytes"] = max(0, data["bytes"] + bytes_delta)
- data["version_count"] = max(0, data["version_count"] + version_count_delta)
- data["version_bytes"] = max(0, data["version_bytes"] + version_bytes_delta)
- data["total_objects"] = max(0, data["total_objects"] + objects_delta + version_count_delta)
- data["total_bytes"] = max(0, data["total_bytes"] + bytes_delta + version_bytes_delta)
- data["_cache_serial"] = data["_cache_serial"] + 1
- self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
- self._stats_mem_time[bucket_id] = time.monotonic()
- self._stats_dirty.add(bucket_id)
- needs_immediate = data["objects"] == 0 and objects_delta < 0
- if needs_immediate:
- self._flush_stats()
- else:
- self._schedule_stats_flush()
-
- def _schedule_stats_flush(self) -> None:
- if self._stats_flush_timer is None or not self._stats_flush_timer.is_alive():
- self._stats_flush_timer = threading.Timer(3.0, self._flush_stats)
- self._stats_flush_timer.daemon = True
- self._stats_flush_timer.start()
-
- def _flush_stats(self) -> None:
- with self._stats_lock:
- dirty = list(self._stats_dirty)
- self._stats_dirty.clear()
- snapshots = {b: dict(self._stats_mem[b]) for b in dirty if b in self._stats_mem}
- for bucket_id, data in snapshots.items():
- cache_path = self._system_bucket_root(bucket_id) / "stats.json"
- try:
- cache_path.parent.mkdir(parents=True, exist_ok=True)
- self._atomic_write_json(cache_path, data, sync=False)
- except OSError:
- pass
-
- def shutdown_stats(self) -> None:
- if self._stats_flush_timer is not None:
- self._stats_flush_timer.cancel()
- self._flush_stats()
- if self._etag_index_flush_timer is not None:
- self._etag_index_flush_timer.cancel()
- self._flush_etag_indexes()
-
- def delete_bucket(self, bucket_name: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- has_objects, has_versions, has_multipart = self._check_bucket_contents(bucket_path)
- if has_objects:
- raise StorageError("Bucket not empty")
- if has_versions:
- raise StorageError("Bucket contains archived object versions")
- if has_multipart:
- raise StorageError("Bucket has active multipart uploads")
- bucket_id = bucket_path.name
- self._remove_tree(bucket_path)
- self._remove_tree(self._system_bucket_root(bucket_id))
- self._remove_tree(self._multipart_bucket_root(bucket_id))
- self._bucket_config_cache.pop(bucket_id, None)
- with self._obj_cache_lock:
- self._object_cache.pop(bucket_id, None)
- self._cache_version.pop(bucket_id, None)
- self._sorted_key_cache.pop(bucket_id, None)
- with self._meta_cache_lock:
- stale = [k for k in self._meta_read_cache if k[0] == bucket_id]
- for k in stale:
- del self._meta_read_cache[k]
- with self._stats_lock:
- self._stats_mem.pop(bucket_id, None)
- self._stats_mem_time.pop(bucket_id, None)
- self._stats_serial.pop(bucket_id, None)
- self._stats_dirty.discard(bucket_id)
- self._etag_index_dirty.discard(bucket_id)
-
- def list_objects(
- self,
- bucket_name: str,
- *,
- max_keys: int = 1000,
- continuation_token: Optional[str] = None,
- prefix: Optional[str] = None,
- ) -> ListObjectsResult:
- """List objects in a bucket with pagination support.
-
- Args:
- bucket_name: Name of the bucket
- max_keys: Maximum number of objects to return (default 1000)
- continuation_token: Token from previous request for pagination
- prefix: Filter objects by key prefix
-
- Returns:
- ListObjectsResult with objects, truncation status, and continuation token
- """
- import bisect
-
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
-
- object_cache = self._get_object_cache(bucket_id, bucket_path)
-
- cache_version = self._cache_version.get(bucket_id, 0)
- cached_entry = self._sorted_key_cache.get(bucket_id)
- if cached_entry and cached_entry[1] == cache_version:
- all_keys = cached_entry[0]
- else:
- all_keys = sorted(object_cache.keys())
- self._sorted_key_cache[bucket_id] = (all_keys, cache_version)
-
- if prefix:
- lo = bisect.bisect_left(all_keys, prefix)
- hi = len(all_keys)
- for i in range(lo, len(all_keys)):
- if not all_keys[i].startswith(prefix):
- hi = i
- break
- all_keys = all_keys[lo:hi]
-
- total_count = len(all_keys)
- start_index = 0
- if continuation_token:
- start_index = bisect.bisect_right(all_keys, continuation_token)
- if start_index >= total_count:
- return ListObjectsResult(
- objects=[],
- is_truncated=False,
- next_continuation_token=None,
- total_count=total_count,
- )
-
- end_index = start_index + max_keys
- keys_slice = all_keys[start_index:end_index]
- is_truncated = end_index < total_count
-
- objects: List[ObjectMeta] = []
- for key in keys_slice:
- obj = object_cache.get(key)
- if obj:
- objects.append(obj)
-
- next_token = keys_slice[-1] if is_truncated and keys_slice else None
-
- return ListObjectsResult(
- objects=objects,
- is_truncated=is_truncated,
- next_continuation_token=next_token,
- total_count=total_count,
- )
-
- def list_objects_all(self, bucket_name: str) -> List[ObjectMeta]:
- """List all objects in a bucket (no pagination). Use with caution for large buckets."""
- result = self.list_objects(bucket_name, max_keys=100000)
- return result.objects
-
- def list_objects_shallow(
- self,
- bucket_name: str,
- *,
- prefix: str = "",
- delimiter: str = "/",
- max_keys: int = 1000,
- continuation_token: Optional[str] = None,
- ) -> ShallowListResult:
- import bisect
-
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
-
- if delimiter != "/" or (prefix and not prefix.endswith(delimiter)):
- return self._shallow_via_full_scan(
- bucket_name, prefix=prefix, delimiter=delimiter,
- max_keys=max_keys, continuation_token=continuation_token,
- )
-
- target_dir = bucket_path
- if prefix:
- safe_prefix_path = Path(prefix.rstrip("/"))
- if ".." in safe_prefix_path.parts:
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
- target_dir = bucket_path / safe_prefix_path
- try:
- resolved = target_dir.resolve()
- bucket_resolved = bucket_path.resolve()
- if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
- except (OSError, ValueError):
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
-
- if not target_dir.exists() or not target_dir.is_dir():
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
-
- meta_cache: Dict[str, str] = self._get_etag_index(bucket_id)
-
- entries_files: list[tuple[str, int, float, Optional[str]]] = []
- entries_dirs: list[str] = []
-
- if _HAS_RUST:
- try:
- raw = _rc.shallow_scan(str(target_dir), prefix, json.dumps(meta_cache))
- entries_files = []
- for key, size, mtime, etag in raw["files"]:
- if etag is None:
- safe_key = PurePosixPath(key)
- meta = self._read_metadata(bucket_id, Path(safe_key))
- etag = meta.get("__etag__") if meta else None
- entries_files.append((key, size, mtime, etag))
- entries_dirs = raw["dirs"]
- all_items = raw["merged_keys"]
- except OSError:
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
- else:
- try:
- with os.scandir(str(target_dir)) as it:
- for entry in it:
- name = entry.name
- if name in self.INTERNAL_FOLDERS:
- continue
- if entry.is_dir(follow_symlinks=False):
- cp = prefix + name + delimiter
- entries_dirs.append(cp)
- elif entry.is_file(follow_symlinks=False):
- key = prefix + name
- try:
- st = entry.stat()
- etag = meta_cache.get(key)
- if etag is None:
- safe_key = PurePosixPath(key)
- meta = self._read_metadata(bucket_id, Path(safe_key))
- etag = meta.get("__etag__") if meta else None
- entries_files.append((key, st.st_size, st.st_mtime, etag))
- except OSError:
- pass
- except OSError:
- return ShallowListResult(
- objects=[], common_prefixes=[],
- is_truncated=False, next_continuation_token=None,
- )
-
- entries_dirs.sort()
- entries_files.sort(key=lambda x: x[0])
-
- all_items: list[tuple[str, bool]] = []
- fi, di = 0, 0
- while fi < len(entries_files) and di < len(entries_dirs):
- if entries_files[fi][0] <= entries_dirs[di]:
- all_items.append((entries_files[fi][0], False))
- fi += 1
- else:
- all_items.append((entries_dirs[di], True))
- di += 1
- while fi < len(entries_files):
- all_items.append((entries_files[fi][0], False))
- fi += 1
- while di < len(entries_dirs):
- all_items.append((entries_dirs[di], True))
- di += 1
-
- files_map = {e[0]: e for e in entries_files}
-
- start_index = 0
- if continuation_token:
- all_keys = [item[0] for item in all_items]
- start_index = bisect.bisect_right(all_keys, continuation_token)
-
- selected = all_items[start_index:start_index + max_keys]
- is_truncated = (start_index + max_keys) < len(all_items)
-
- result_objects: list[ObjectMeta] = []
- result_prefixes: list[str] = []
- for item_key, is_dir in selected:
- if is_dir:
- result_prefixes.append(item_key)
- else:
- fdata = files_map[item_key]
- result_objects.append(ObjectMeta(
- key=fdata[0],
- size=fdata[1],
- last_modified=datetime.fromtimestamp(fdata[2], timezone.utc),
- etag=fdata[3],
- metadata=None,
- ))
-
- next_token = None
- if is_truncated and selected:
- next_token = selected[-1][0]
-
- return ShallowListResult(
- objects=result_objects,
- common_prefixes=result_prefixes,
- is_truncated=is_truncated,
- next_continuation_token=next_token,
- )
-
- def iter_objects_shallow(
- self,
- bucket_name: str,
- *,
- prefix: str = "",
- delimiter: str = "/",
- ) -> Generator[tuple[str, ObjectMeta | str], None, None]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
-
- target_dir = bucket_path
- if prefix:
- safe_prefix_path = Path(prefix.rstrip("/"))
- if ".." in safe_prefix_path.parts:
- return
- target_dir = bucket_path / safe_prefix_path
- try:
- resolved = target_dir.resolve()
- bucket_resolved = bucket_path.resolve()
- if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
- return
- except (OSError, ValueError):
- return
-
- if not target_dir.exists() or not target_dir.is_dir():
- return
-
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- meta_cache: Dict[str, str] = {}
- if etag_index_path.exists():
- try:
- with open(etag_index_path, 'r', encoding='utf-8') as f:
- meta_cache = json.load(f)
- except (OSError, json.JSONDecodeError):
- pass
-
- try:
- with os.scandir(str(target_dir)) as it:
- for entry in it:
- name = entry.name
- if name in self.INTERNAL_FOLDERS:
- continue
- if entry.is_dir(follow_symlinks=False):
- yield ("folder", prefix + name + delimiter)
- elif entry.is_file(follow_symlinks=False):
- key = prefix + name
- try:
- st = entry.stat()
- etag = meta_cache.get(key)
- if etag is None:
- safe_key = PurePosixPath(key)
- meta = self._read_metadata(bucket_id, Path(safe_key))
- etag = meta.get("__etag__") if meta else None
- yield ("object", ObjectMeta(
- key=key,
- size=st.st_size,
- last_modified=datetime.fromtimestamp(st.st_mtime, timezone.utc),
- etag=etag,
- metadata=None,
- ))
- except OSError:
- pass
- except OSError:
- return
-
- def _shallow_via_full_scan(
- self,
- bucket_name: str,
- *,
- prefix: str = "",
- delimiter: str = "/",
- max_keys: int = 1000,
- continuation_token: Optional[str] = None,
- ) -> ShallowListResult:
- list_result = self.list_objects(
- bucket_name,
- max_keys=max_keys * 10,
- continuation_token=continuation_token,
- prefix=prefix or None,
- )
-
- common_prefixes: list[str] = []
- filtered_objects: list[ObjectMeta] = []
- seen_prefixes: set[str] = set()
-
- for obj in list_result.objects:
- key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
- if delimiter in key_after_prefix:
- cp = prefix + key_after_prefix.split(delimiter)[0] + delimiter
- if cp not in seen_prefixes:
- seen_prefixes.add(cp)
- common_prefixes.append(cp)
- else:
- filtered_objects.append(obj)
-
- common_prefixes.sort()
- total_items = len(filtered_objects) + len(common_prefixes)
- is_truncated = total_items > max_keys or list_result.is_truncated
-
- if len(filtered_objects) >= max_keys:
- filtered_objects = filtered_objects[:max_keys]
- common_prefixes = []
- else:
- remaining = max_keys - len(filtered_objects)
- common_prefixes = common_prefixes[:remaining]
-
- next_token = None
- if is_truncated:
- if filtered_objects:
- next_token = filtered_objects[-1].key
- elif common_prefixes:
- next_token = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
-
- return ShallowListResult(
- objects=filtered_objects,
- common_prefixes=common_prefixes,
- is_truncated=is_truncated,
- next_continuation_token=next_token,
- )
-
- def search_objects(
- self,
- bucket_name: str,
- query: str,
- *,
- prefix: str = "",
- limit: int = 500,
- ) -> Dict[str, Any]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.is_dir():
- raise BucketNotFoundError("Bucket does not exist")
-
- if prefix:
- search_root = bucket_path / prefix.replace("/", os.sep)
- if not search_root.is_dir():
- return {"results": [], "truncated": False}
- resolved = search_root.resolve()
- if not str(resolved).startswith(str(bucket_path.resolve())):
- return {"results": [], "truncated": False}
- else:
- search_root = bucket_path
-
- if _HAS_RUST:
- raw = _rc.search_objects_scan(
- str(bucket_path), str(search_root), query, limit
- )
- results = [
- {
- "key": k,
- "size": s,
- "last_modified": datetime.fromtimestamp(
- m, tz=timezone.utc
- ).strftime("%Y-%m-%dT%H:%M:%S.000Z"),
- }
- for k, s, m in raw["results"]
- ]
- return {"results": results, "truncated": raw["truncated"]}
-
- query_lower = query.lower()
- results: list[Dict[str, Any]] = []
- internal = self.INTERNAL_FOLDERS
- bucket_str = str(bucket_path)
- bucket_len = len(bucket_str) + 1
- meta_root = self._bucket_meta_root(bucket_name)
- scan_limit = limit * 4
-
- matched = 0
- scanned = 0
- search_str = str(search_root)
- stack = [search_str]
- while stack:
- current = stack.pop()
- try:
- with os.scandir(current) as it:
- for entry in it:
- if current == bucket_str and entry.name in internal:
- continue
- if entry.is_dir(follow_symlinks=False):
- stack.append(entry.path)
- elif entry.is_file(follow_symlinks=False):
- scanned += 1
- key = entry.path[bucket_len:].replace(os.sep, "/")
- if query_lower in key.lower():
- st = entry.stat(follow_symlinks=False)
- meta_path = meta_root / (key + ".meta.json")
- last_modified = ""
- try:
- if meta_path.exists():
- md = json.loads(meta_path.read_text(encoding="utf-8"))
- last_modified = md.get("last_modified", "")
- except (OSError, json.JSONDecodeError):
- pass
- if not last_modified:
- last_modified = datetime.fromtimestamp(
- st.st_mtime, tz=timezone.utc
- ).strftime("%Y-%m-%dT%H:%M:%S.000Z")
- results.append({
- "key": key,
- "size": st.st_size,
- "last_modified": last_modified,
- })
- matched += 1
- if matched >= scan_limit:
- break
- except PermissionError:
- continue
- if matched >= scan_limit:
- break
-
- results.sort(key=lambda r: r["key"])
- truncated = len(results) > limit
- return {"results": results[:limit], "truncated": truncated}
-
- def put_object(
- self,
- bucket_name: str,
- object_key: str,
- stream: BinaryIO,
- *,
- metadata: Optional[Dict[str, str]] = None,
- enforce_quota: bool = True,
- ) -> ObjectMeta:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
-
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- destination = bucket_path / safe_key
- destination.parent.mkdir(parents=True, exist_ok=True)
-
- is_overwrite = destination.exists()
- existing_size = destination.stat().st_size if is_overwrite else 0
-
- tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
- tmp_dir.mkdir(parents=True, exist_ok=True)
-
- if _HAS_RUST:
- tmp_path = None
- try:
- tmp_path_str, etag, new_size = _rc.stream_to_file_with_md5(
- stream, str(tmp_dir)
- )
- tmp_path = Path(tmp_path_str)
-
- size_delta = new_size - existing_size
- object_delta = 0 if is_overwrite else 1
-
- if enforce_quota:
- quota_check = self.check_quota(
- bucket_name,
- additional_bytes=max(0, size_delta),
- additional_objects=object_delta,
- )
- if not quota_check["allowed"]:
- raise QuotaExceededError(
- quota_check["message"] or "Quota exceeded",
- quota_check["quota"],
- quota_check["usage"],
- )
- except BaseException:
- if tmp_path:
- try:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
- else:
- tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
- try:
- checksum = hashlib.md5()
- with tmp_path.open("wb") as target:
- shutil.copyfileobj(_HashingReader(stream, checksum), target)
- target.flush()
- os.fsync(target.fileno())
-
- new_size = tmp_path.stat().st_size
- size_delta = new_size - existing_size
- object_delta = 0 if is_overwrite else 1
-
- if enforce_quota:
- quota_check = self.check_quota(
- bucket_name,
- additional_bytes=max(0, size_delta),
- additional_objects=object_delta,
- )
- if not quota_check["allowed"]:
- raise QuotaExceededError(
- quota_check["message"] or "Quota exceeded",
- quota_check["quota"],
- quota_check["usage"],
- )
-
- etag = checksum.hexdigest()
- except BaseException:
- try:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
-
- lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
- try:
- with _atomic_lock_file(lock_file_path):
- archived_version_size = 0
- if self._is_versioning_enabled(bucket_path) and is_overwrite:
- archived_version_size = existing_size
- self._archive_current_version(bucket_id, safe_key, reason="overwrite")
-
- shutil.move(str(tmp_path), str(destination))
- tmp_path = None
-
- stat = destination.stat()
-
- internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
- combined_meta = {**internal_meta, **(metadata or {})}
- self._write_metadata(bucket_id, safe_key, combined_meta)
- except BlockingIOError:
- try:
- if tmp_path:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise StorageError("Another upload to this key is in progress")
- finally:
- if tmp_path:
- try:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
-
- self._update_bucket_stats_cache(
- bucket_id,
- bytes_delta=size_delta,
- objects_delta=object_delta,
- version_bytes_delta=archived_version_size,
- version_count_delta=1 if archived_version_size > 0 else 0,
- )
-
- obj_meta = ObjectMeta(
- key=safe_key.as_posix(),
- size=stat.st_size,
- last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
- etag=etag,
- metadata=metadata,
- )
- self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
-
- return obj_meta
-
- def get_object_path(self, bucket_name: str, object_key: str) -> Path:
- path = self._object_path(bucket_name, object_key)
- if not path.is_file():
- raise ObjectNotFoundError("Object not found")
- return path
-
- def get_object_metadata(self, bucket_name: str, object_key: str) -> Dict[str, str]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- return {}
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- return self._read_metadata(bucket_path.name, safe_key) or {}
-
- def heal_missing_etag(self, bucket_name: str, object_key: str, etag: str) -> None:
- """Persist a computed ETag back to metadata (self-heal on read)."""
- try:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- return
- bucket_id = bucket_path.name
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- existing = self._read_metadata(bucket_id, safe_key) or {}
- if existing.get("__etag__"):
- return
- existing["__etag__"] = etag
- self._write_metadata(bucket_id, safe_key, existing)
- with self._obj_cache_lock:
- cached = self._object_cache.get(bucket_id)
- if cached:
- obj = cached[0].get(safe_key.as_posix())
- if obj and not obj.etag:
- obj.etag = etag
- self._etag_index_dirty.add(bucket_id)
- self._schedule_etag_index_flush()
- except Exception:
- logger.warning("Failed to heal missing ETag for %s/%s", bucket_name, object_key)
-
- def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
- """Remove empty parent directories in a background thread.
-
- On Windows/OneDrive, directories may be locked briefly after file deletion.
- Running this in the background avoids blocking the request thread with retries.
- """
- self._cleanup_executor.submit(self._do_cleanup_empty_parents, path, stop_at)
-
- def _do_cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
- for parent in path.parents:
- if parent == stop_at:
- break
- for attempt in range(3):
- try:
- if parent.exists() and not any(parent.iterdir()):
- parent.rmdir()
- break
- except OSError:
- if attempt < 2:
- time.sleep(0.1)
- break
-
- def delete_object(self, bucket_name: str, object_key: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- path = self._object_path(bucket_name, object_key)
- if not path.exists():
- return
- deleted_size = path.stat().st_size
- safe_key = path.relative_to(bucket_path)
- bucket_id = bucket_path.name
- archived_version_size = 0
- if self._is_versioning_enabled(bucket_path):
- archived_version_size = deleted_size
- self._archive_current_version(bucket_id, safe_key, reason="delete")
- rel = path.relative_to(bucket_path)
- self._safe_unlink(path)
- self._delete_metadata(bucket_id, rel)
-
- self._update_bucket_stats_cache(
- bucket_id,
- bytes_delta=-deleted_size,
- objects_delta=-1,
- version_bytes_delta=archived_version_size,
- version_count_delta=1 if archived_version_size > 0 else 0,
- )
- self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
- self._cleanup_empty_parents(path, bucket_path)
-
- def purge_object(self, bucket_name: str, object_key: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- target = self._object_path(bucket_name, object_key)
- bucket_id = bucket_path.name
- if target.exists():
- rel = target.relative_to(bucket_path)
- self._safe_unlink(target)
- self._delete_metadata(bucket_id, rel)
- else:
- rel = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- self._delete_metadata(bucket_id, rel)
- version_dir = self._version_dir(bucket_id, rel)
- if version_dir.exists():
- shutil.rmtree(version_dir, ignore_errors=True)
- legacy_version_dir = self._legacy_version_dir(bucket_id, rel)
- if legacy_version_dir.exists():
- shutil.rmtree(legacy_version_dir, ignore_errors=True)
-
- self._invalidate_bucket_stats_cache(bucket_id)
- self._update_object_cache_entry(bucket_id, rel.as_posix(), None)
- self._cleanup_empty_parents(target, bucket_path)
-
- def is_versioning_enabled(self, bucket_name: str) -> bool:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- return self._is_versioning_enabled(bucket_path)
-
- def set_bucket_versioning(self, bucket_name: str, enabled: bool) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- config["versioning_enabled"] = bool(enabled)
- self._write_bucket_config(bucket_path.name, config)
-
- def get_bucket_tags(self, bucket_name: str) -> List[Dict[str, str]]:
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- raw_tags = config.get("tags")
- if not isinstance(raw_tags, list):
- return []
- tags: List[Dict[str, str]] = []
- for entry in raw_tags:
- if not isinstance(entry, dict):
- continue
- key = str(entry.get("Key", "")).strip()
- if not key:
- continue
- value = str(entry.get("Value", ""))
- tags.append({"Key": key, "Value": value})
- return tags
-
- def set_bucket_tags(self, bucket_name: str, tags: Optional[List[Dict[str, str]]]) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- if not tags:
- self._set_bucket_config_entry(bucket_path.name, "tags", None)
- return
- clean: List[Dict[str, str]] = []
- for entry in tags:
- if not isinstance(entry, dict):
- continue
- key = str(entry.get("Key", "")).strip()
- if not key:
- continue
- clean.append({"Key": key, "Value": str(entry.get("Value", ""))})
- self._set_bucket_config_entry(bucket_path.name, "tags", clean or None)
-
- def get_bucket_cors(self, bucket_name: str) -> List[Dict[str, Any]]:
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- cors_rules = config.get("cors")
- return cors_rules if isinstance(cors_rules, list) else []
-
- def set_bucket_cors(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- self._set_bucket_config_entry(bucket_path.name, "cors", rules or None)
-
- def get_bucket_encryption(self, bucket_name: str) -> Dict[str, Any]:
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- payload = config.get("encryption")
- return payload if isinstance(payload, dict) else {}
-
- def set_bucket_encryption(self, bucket_name: str, config_payload: Optional[Dict[str, Any]]) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- self._set_bucket_config_entry(bucket_path.name, "encryption", config_payload or None)
-
- def get_bucket_lifecycle(self, bucket_name: str) -> Optional[List[Dict[str, Any]]]:
- """Get lifecycle configuration for bucket."""
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- lifecycle = config.get("lifecycle")
- return lifecycle if isinstance(lifecycle, list) else None
-
- def set_bucket_lifecycle(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)
-
- def get_bucket_website(self, bucket_name: str) -> Optional[Dict[str, Any]]:
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- website = config.get("website")
- return website if isinstance(website, dict) else None
-
- def set_bucket_website(self, bucket_name: str, website_config: Optional[Dict[str, Any]]) -> None:
- bucket_path = self._require_bucket_path(bucket_name)
- self._set_bucket_config_entry(bucket_path.name, "website", website_config)
-
- def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
- """Get quota configuration for bucket.
-
- Returns:
- Dict with 'max_bytes' and 'max_objects' (None if unlimited).
- """
- bucket_path = self._require_bucket_path(bucket_name)
- config = self._read_bucket_config(bucket_path.name)
- quota = config.get("quota")
- if isinstance(quota, dict):
- return {
- "max_bytes": quota.get("max_bytes"),
- "max_objects": quota.get("max_objects"),
- }
- return {"max_bytes": None, "max_objects": None}
-
- def set_bucket_quota(
- self,
- bucket_name: str,
- *,
- max_bytes: Optional[int] = None,
- max_objects: Optional[int] = None,
- ) -> None:
- """Set quota limits for a bucket.
-
- Args:
- bucket_name: Name of the bucket
- max_bytes: Maximum total size in bytes (None to remove limit)
- max_objects: Maximum number of objects (None to remove limit)
- """
- bucket_path = self._require_bucket_path(bucket_name)
-
- if max_bytes is None and max_objects is None:
- self._set_bucket_config_entry(bucket_path.name, "quota", None)
- return
-
- quota: Dict[str, Any] = {}
- if max_bytes is not None:
- if max_bytes < 0:
- raise StorageError("max_bytes must be non-negative")
- quota["max_bytes"] = max_bytes
- if max_objects is not None:
- if max_objects < 0:
- raise StorageError("max_objects must be non-negative")
- quota["max_objects"] = max_objects
-
- self._set_bucket_config_entry(bucket_path.name, "quota", quota)
-
- def check_quota(
- self,
- bucket_name: str,
- additional_bytes: int = 0,
- additional_objects: int = 0,
- ) -> Dict[str, Any]:
- """Check if an operation would exceed bucket quota.
-
- Args:
- bucket_name: Name of the bucket
- additional_bytes: Bytes that would be added
- additional_objects: Objects that would be added
-
- Returns:
- Dict with 'allowed' (bool), 'quota' (current limits),
- 'usage' (current usage), and 'message' (if not allowed).
- """
- quota = self.get_bucket_quota(bucket_name)
- if not quota:
- return {
- "allowed": True,
- "quota": None,
- "usage": None,
- "message": None,
- }
-
- stats = self.bucket_stats(bucket_name)
- current_bytes = stats.get("total_bytes", stats.get("bytes", 0))
- current_objects = stats.get("total_objects", stats.get("objects", 0))
-
- result = {
- "allowed": True,
- "quota": quota,
- "usage": {
- "bytes": current_bytes,
- "objects": current_objects,
- "version_count": stats.get("version_count", 0),
- "version_bytes": stats.get("version_bytes", 0),
- },
- "message": None,
- }
-
- max_bytes_limit = quota.get("max_bytes")
- max_objects = quota.get("max_objects")
-
- if max_bytes_limit is not None:
- projected_bytes = current_bytes + additional_bytes
- if projected_bytes > max_bytes_limit:
- result["allowed"] = False
- result["message"] = (
- f"Quota exceeded: adding {additional_bytes} bytes would result in "
- f"{projected_bytes} bytes, exceeding limit of {max_bytes_limit} bytes"
- )
- return result
-
- if max_objects is not None:
- projected_objects = current_objects + additional_objects
- if projected_objects > max_objects:
- result["allowed"] = False
- result["message"] = (
- f"Quota exceeded: adding {additional_objects} objects would result in "
- f"{projected_objects} objects, exceeding limit of {max_objects} objects"
- )
- return result
-
- return result
-
- def get_object_tags(self, bucket_name: str, object_key: str) -> List[Dict[str, str]]:
- """Get tags for an object."""
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- object_path = bucket_path / safe_key
- if not object_path.exists():
- raise ObjectNotFoundError("Object does not exist")
-
- entry = self._read_index_entry(bucket_path.name, safe_key)
- if entry is not None:
- tags = entry.get("tags")
- return tags if isinstance(tags, list) else []
- for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
- if not meta_file.exists():
- continue
- try:
- payload = json.loads(meta_file.read_text(encoding="utf-8"))
- tags = payload.get("tags")
- if isinstance(tags, list):
- return tags
- return []
- except (OSError, json.JSONDecodeError):
- return []
- return []
-
- def set_object_tags(self, bucket_name: str, object_key: str, tags: Optional[List[Dict[str, str]]]) -> None:
- """Set tags for an object."""
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- object_path = bucket_path / safe_key
- if not object_path.exists():
- raise ObjectNotFoundError("Object does not exist")
-
- bucket_id = bucket_path.name
- existing_entry = self._read_index_entry(bucket_id, safe_key) or {}
- if not existing_entry:
- meta_file = self._metadata_file(bucket_id, safe_key)
- if meta_file.exists():
- try:
- existing_entry = json.loads(meta_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- pass
-
- if tags:
- existing_entry["tags"] = tags
- else:
- existing_entry.pop("tags", None)
-
- if existing_entry.get("metadata") or existing_entry.get("tags"):
- self._write_index_entry(bucket_id, safe_key, existing_entry)
- else:
- self._delete_index_entry(bucket_id, safe_key)
- old_meta = self._metadata_file(bucket_id, safe_key)
- try:
- if old_meta.exists():
- old_meta.unlink()
- except OSError:
- pass
-
- def delete_object_tags(self, bucket_name: str, object_key: str) -> None:
- """Delete all tags from an object."""
- self.set_object_tags(bucket_name, object_key, None)
-
- def list_object_versions(self, bucket_name: str, object_key: str) -> List[Dict[str, Any]]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- version_dir = self._version_dir(bucket_id, safe_key)
- if not version_dir.exists():
- version_dir = self._legacy_version_dir(bucket_id, safe_key)
- if not version_dir.exists():
- return []
- versions: List[Dict[str, Any]] = []
- for meta_file in version_dir.glob("*.json"):
- try:
- payload = json.loads(meta_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- continue
- if not isinstance(payload, dict):
- continue
- payload.setdefault("version_id", meta_file.stem)
- versions.append(payload)
- versions.sort(key=lambda item: item.get("archived_at") or "1970-01-01T00:00:00Z", reverse=True)
- return versions
-
- def restore_object_version(self, bucket_name: str, object_key: str, version_id: str) -> ObjectMeta:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- version_dir = self._version_dir(bucket_id, safe_key)
- data_path = version_dir / f"{version_id}.bin"
- meta_path = version_dir / f"{version_id}.json"
- if not data_path.exists() or not meta_path.exists():
- raise StorageError("Version not found")
- try:
- payload = json.loads(meta_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- payload = {}
- metadata = payload.get("metadata") if isinstance(payload, dict) else {}
- if not isinstance(metadata, dict):
- metadata = {}
- destination = bucket_path / safe_key
- restored_size = data_path.stat().st_size
- is_overwrite = destination.exists()
- existing_size = destination.stat().st_size if is_overwrite else 0
- archived_version_size = 0
- if self._is_versioning_enabled(bucket_path) and is_overwrite:
- archived_version_size = existing_size
- self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
- destination.parent.mkdir(parents=True, exist_ok=True)
- shutil.copy2(data_path, destination)
- if metadata:
- self._write_metadata(bucket_id, safe_key, metadata)
- else:
- self._delete_metadata(bucket_id, safe_key)
- stat = destination.stat()
- self._update_bucket_stats_cache(
- bucket_id,
- bytes_delta=restored_size - existing_size,
- objects_delta=0 if is_overwrite else 1,
- version_bytes_delta=archived_version_size,
- version_count_delta=1 if archived_version_size > 0 else 0,
- )
- etag = self._compute_etag(destination)
- internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
- combined_meta = {**internal_meta, **(metadata or {})}
- self._write_metadata(bucket_id, safe_key, combined_meta)
- obj_meta = ObjectMeta(
- key=safe_key.as_posix(),
- size=stat.st_size,
- last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
- etag=etag,
- metadata=metadata or None,
- )
- self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
- return obj_meta
-
- def delete_object_version(self, bucket_name: str, object_key: str, version_id: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- version_dir = self._version_dir(bucket_id, safe_key)
- data_path = version_dir / f"{version_id}.bin"
- meta_path = version_dir / f"{version_id}.json"
- if not data_path.exists() and not meta_path.exists():
- legacy_version_dir = self._legacy_version_dir(bucket_id, safe_key)
- data_path = legacy_version_dir / f"{version_id}.bin"
- meta_path = legacy_version_dir / f"{version_id}.json"
- if not data_path.exists() and not meta_path.exists():
- raise StorageError(f"Version {version_id} not found")
- deleted_version_size = data_path.stat().st_size if data_path.exists() else 0
- if data_path.exists():
- data_path.unlink()
- if meta_path.exists():
- meta_path.unlink()
- parent = data_path.parent
- if parent.exists() and not any(parent.iterdir()):
- parent.rmdir()
- if deleted_version_size > 0:
- self._update_bucket_stats_cache(
- bucket_id,
- version_bytes_delta=-deleted_version_size,
- version_count_delta=-1,
- )
-
- def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- version_roots = [self._bucket_versions_root(bucket_id), self._legacy_versions_root(bucket_id)]
- if not any(root.exists() for root in version_roots):
- return []
- aggregated: Dict[str, Dict[str, Any]] = {}
- skipped: set[str] = set()
- for version_root in version_roots:
- if not version_root.exists():
- continue
- for meta_file in version_root.glob("**/*.json"):
- if not meta_file.is_file():
- continue
- rel = meta_file.parent.relative_to(version_root)
- rel_key = rel.as_posix()
- if rel_key in skipped:
- continue
- object_path = bucket_path / rel
- if object_path.exists():
- skipped.add(rel_key)
- continue
- try:
- payload = json.loads(meta_file.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- payload = {}
- version_id = payload.get("version_id") or meta_file.stem
- archived_at = payload.get("archived_at") or "1970-01-01T00:00:00Z"
- size = int(payload.get("size") or 0)
- reason = payload.get("reason") or "update"
- record = aggregated.setdefault(
- rel_key,
- {
- "key": rel_key,
- "versions": 0,
- "total_size": 0,
- "latest": None,
- "_latest_sort": None,
- },
- )
- record["versions"] += 1
- record["total_size"] += size
- candidate = {
- "version_id": version_id,
- "archived_at": archived_at,
- "size": size,
- "reason": reason,
- }
- sort_key = (
- archived_at,
- meta_file.stat().st_mtime,
- )
- current_sort = record.get("_latest_sort")
- if current_sort is None or sort_key > current_sort:
- record["_latest_sort"] = sort_key
- record["latest"] = candidate
- for record in aggregated.values():
- record.pop("_latest_sort", None)
- return sorted(aggregated.values(), key=lambda item: item["key"])
-
- def initiate_multipart_upload(
- self,
- bucket_name: str,
- object_key: str,
- *,
- metadata: Optional[Dict[str, str]] = None,
- ) -> str:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- upload_id = uuid.uuid4().hex
- upload_root = self._multipart_dir(bucket_id, upload_id)
- upload_root.mkdir(parents=True, exist_ok=False)
- manifest = {
- "upload_id": upload_id,
- "object_key": safe_key.as_posix(),
- "metadata": self._normalize_metadata(metadata),
- "parts": {},
- "created_at": _utc_isoformat(),
- }
- self._write_multipart_manifest(upload_root, manifest)
- return upload_id
-
- def upload_multipart_part(
- self,
- bucket_name: str,
- upload_id: str,
- part_number: int,
- stream: BinaryIO,
- ) -> str:
- """Upload a part for a multipart upload.
-
- Uses file locking to safely update the manifest and handle concurrent uploads.
- """
- if part_number < 1 or part_number > 10000:
- raise StorageError("part_number must be between 1 and 10000")
- bucket_path = self._bucket_path(bucket_name)
-
- upload_root = self._multipart_dir(bucket_path.name, upload_id)
- if not upload_root.exists():
- upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
- if not upload_root.exists():
- raise StorageError("Multipart upload not found")
-
- part_filename = f"part-{part_number:05d}.part"
- part_path = upload_root / part_filename
- temp_path = upload_root / f".{part_filename}.tmp"
-
- try:
- if _HAS_RUST:
- with temp_path.open("wb") as target:
- shutil.copyfileobj(stream, target)
- part_etag = _rc.md5_file(str(temp_path))
- else:
- checksum = hashlib.md5()
- with temp_path.open("wb") as target:
- shutil.copyfileobj(_HashingReader(stream, checksum), target)
- target.flush()
- os.fsync(target.fileno())
- part_etag = checksum.hexdigest()
- temp_path.replace(part_path)
- except OSError:
- try:
- temp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
-
- record = {
- "etag": part_etag,
- "size": part_path.stat().st_size,
- "filename": part_filename,
- }
-
- manifest_path = upload_root / self.MULTIPART_MANIFEST
- lock_path = upload_root / ".manifest.lock"
-
- max_retries = 3
- for attempt in range(max_retries):
- try:
- with lock_path.open("w") as lock_file:
- with _file_lock(lock_file):
- try:
- manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError) as exc:
- if attempt < max_retries - 1:
- time.sleep(0.1 * (attempt + 1))
- continue
- raise StorageError("Multipart manifest unreadable") from exc
-
- parts = manifest.setdefault("parts", {})
- parts[str(part_number)] = record
- self._atomic_write_json(manifest_path, manifest)
- break
- except OSError as exc:
- if attempt < max_retries - 1:
- time.sleep(0.1 * (attempt + 1))
- continue
- raise StorageError(f"Failed to update multipart manifest: {exc}") from exc
-
- return record["etag"]
-
- def upload_part_copy(
- self,
- bucket_name: str,
- upload_id: str,
- part_number: int,
- source_bucket: str,
- source_key: str,
- start_byte: Optional[int] = None,
- end_byte: Optional[int] = None,
- ) -> Dict[str, Any]:
- """Copy a range from an existing object as a multipart part."""
- if part_number < 1 or part_number > 10000:
- raise StorageError("part_number must be between 1 and 10000")
-
- source_path = self.get_object_path(source_bucket, source_key)
- source_size = source_path.stat().st_size
-
- if start_byte is None:
- start_byte = 0
- if end_byte is None:
- end_byte = source_size - 1
-
- if start_byte < 0 or end_byte >= source_size or start_byte > end_byte:
- raise StorageError("Invalid byte range")
-
- bucket_path = self._bucket_path(bucket_name)
- upload_root = self._multipart_dir(bucket_path.name, upload_id)
- if not upload_root.exists():
- upload_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
- if not upload_root.exists():
- raise StorageError("Multipart upload not found")
-
- checksum = hashlib.md5()
- part_filename = f"part-{part_number:05d}.part"
- part_path = upload_root / part_filename
- temp_path = upload_root / f".{part_filename}.tmp"
-
- try:
- with source_path.open("rb") as src:
- src.seek(start_byte)
- bytes_to_copy = end_byte - start_byte + 1
- with temp_path.open("wb") as target:
- remaining = bytes_to_copy
- while remaining > 0:
- chunk_size = min(65536, remaining)
- chunk = src.read(chunk_size)
- if not chunk:
- break
- checksum.update(chunk)
- target.write(chunk)
- remaining -= len(chunk)
- temp_path.replace(part_path)
- except OSError:
- try:
- temp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
-
- record = {
- "etag": checksum.hexdigest(),
- "size": part_path.stat().st_size,
- "filename": part_filename,
- }
-
- manifest_path = upload_root / self.MULTIPART_MANIFEST
- lock_path = upload_root / ".manifest.lock"
-
- max_retries = 3
- for attempt in range(max_retries):
- try:
- with lock_path.open("w") as lock_file:
- with _file_lock(lock_file):
- try:
- manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError) as exc:
- if attempt < max_retries - 1:
- time.sleep(0.1 * (attempt + 1))
- continue
- raise StorageError("Multipart manifest unreadable") from exc
-
- parts = manifest.setdefault("parts", {})
- parts[str(part_number)] = record
- self._atomic_write_json(manifest_path, manifest)
- break
- except OSError as exc:
- if attempt < max_retries - 1:
- time.sleep(0.1 * (attempt + 1))
- continue
- raise StorageError(f"Failed to update multipart manifest: {exc}") from exc
-
- return {
- "etag": record["etag"],
- "last_modified": datetime.fromtimestamp(part_path.stat().st_mtime, timezone.utc),
- }
-
- def complete_multipart_upload(
- self,
- bucket_name: str,
- upload_id: str,
- ordered_parts: List[Dict[str, Any]],
- enforce_quota: bool = True,
- ) -> ObjectMeta:
- if not ordered_parts:
- raise StorageError("parts list required")
- bucket_path = self._bucket_path(bucket_name)
- bucket_id = bucket_path.name
- manifest, upload_root = self._load_multipart_manifest(bucket_id, upload_id)
- parts_map = manifest.get("parts") or {}
- if not parts_map:
- raise StorageError("No uploaded parts found")
- validated: List[tuple[int, Dict[str, Any]]] = []
- total_size = 0
- for part in ordered_parts:
- raw_number = part.get("part_number")
- if raw_number is None:
- raw_number = part.get("PartNumber")
- try:
- number = int(raw_number)
- except (TypeError, ValueError) as exc:
- raise StorageError("Each part must include part_number") from exc
- if number < 1:
- raise StorageError("part numbers must be >= 1")
- key = str(number)
- record = parts_map.get(key)
- if not record:
- raise StorageError(f"Part {number} missing from upload")
- raw_etag = part.get("etag", part.get("ETag", ""))
- supplied_etag = str(raw_etag).strip() or record.get("etag")
- if supplied_etag and record.get("etag") and supplied_etag.strip('"') != record["etag"]:
- raise StorageError(f"ETag mismatch for part {number}")
- validated.append((number, record))
- total_size += record.get("size", 0)
- validated.sort(key=lambda entry: entry[0])
-
- safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes)
- destination = bucket_path / safe_key
-
- is_overwrite = destination.exists()
- existing_size = destination.stat().st_size if is_overwrite else 0
- size_delta = total_size - existing_size
- object_delta = 0 if is_overwrite else 1
- versioning_enabled = self._is_versioning_enabled(bucket_path)
-
- if enforce_quota:
- quota_check = self.check_quota(
- bucket_name,
- additional_bytes=max(0, size_delta),
- additional_objects=object_delta,
- )
- if not quota_check["allowed"]:
- raise QuotaExceededError(
- quota_check["message"] or "Quota exceeded",
- quota_check["quota"],
- quota_check["usage"],
- )
-
- destination.parent.mkdir(parents=True, exist_ok=True)
-
- lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
-
- archived_version_size = 0
- try:
- with _atomic_lock_file(lock_file_path):
- if versioning_enabled and destination.exists():
- archived_version_size = destination.stat().st_size
- self._archive_current_version(bucket_id, safe_key, reason="overwrite")
- if _HAS_RUST:
- part_paths = []
- for _, record in validated:
- pp = upload_root / record["filename"]
- if not pp.exists():
- raise StorageError(f"Missing part file {record['filename']}")
- part_paths.append(str(pp))
- checksum_hex = _rc.assemble_parts_with_md5(part_paths, str(destination))
- else:
- checksum = hashlib.md5()
- with destination.open("wb") as target:
- for _, record in validated:
- part_path = upload_root / record["filename"]
- if not part_path.exists():
- raise StorageError(f"Missing part file {record['filename']}")
- with part_path.open("rb") as chunk:
- while True:
- data = chunk.read(1024 * 1024)
- if not data:
- break
- checksum.update(data)
- target.write(data)
- target.flush()
- os.fsync(target.fileno())
- checksum_hex = checksum.hexdigest()
- except BlockingIOError:
- raise StorageError("Another upload to this key is in progress")
-
- shutil.rmtree(upload_root, ignore_errors=True)
-
- self._update_bucket_stats_cache(
- bucket_id,
- bytes_delta=size_delta,
- objects_delta=object_delta,
- version_bytes_delta=archived_version_size,
- version_count_delta=1 if archived_version_size > 0 else 0,
- )
-
- stat = destination.stat()
- etag = checksum_hex
- metadata = manifest.get("metadata")
-
- internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
- combined_meta = {**internal_meta, **(metadata or {})}
- self._write_metadata(bucket_id, safe_key, combined_meta)
-
- obj_meta = ObjectMeta(
- key=safe_key.as_posix(),
- size=stat.st_size,
- last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
- etag=etag,
- metadata=metadata,
- )
- self._update_object_cache_entry(bucket_id, safe_key.as_posix(), obj_meta)
-
- return obj_meta
-
- def abort_multipart_upload(self, bucket_name: str, upload_id: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- upload_root = self._multipart_dir(bucket_path.name, upload_id)
- if upload_root.exists():
- shutil.rmtree(upload_root, ignore_errors=True)
- return
- legacy_root = self._legacy_multipart_dir(bucket_path.name, upload_id)
- if legacy_root.exists():
- shutil.rmtree(legacy_root, ignore_errors=True)
-
- def list_multipart_parts(self, bucket_name: str, upload_id: str) -> List[Dict[str, Any]]:
- """List uploaded parts for a multipart upload."""
- bucket_path = self._bucket_path(bucket_name)
- manifest, upload_root = self._load_multipart_manifest(bucket_path.name, upload_id)
-
- parts = []
- parts_map = manifest.get("parts", {})
- for part_num_str, record in parts_map.items():
- part_num = int(part_num_str)
- part_filename = record.get("filename")
- if not part_filename:
- continue
- part_path = upload_root / part_filename
- if not part_path.exists():
- continue
-
- stat = part_path.stat()
- parts.append({
- "PartNumber": part_num,
- "Size": stat.st_size,
- "ETag": record.get("etag"),
- "LastModified": datetime.fromtimestamp(stat.st_mtime, timezone.utc)
- })
-
- parts.sort(key=lambda x: x["PartNumber"])
- return parts
-
- def list_multipart_uploads(self, bucket_name: str, include_orphaned: bool = False) -> List[Dict[str, Any]]:
- """List all active multipart uploads for a bucket.
-
- Args:
- bucket_name: The bucket to list uploads for.
- include_orphaned: If True, also include upload directories that have
- files but no valid manifest.json (orphaned/interrupted uploads).
- """
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise BucketNotFoundError("Bucket does not exist")
- bucket_id = bucket_path.name
- uploads = []
-
- for multipart_root in (
- self._multipart_bucket_root(bucket_id),
- self._legacy_multipart_bucket_root(bucket_id),
- ):
- if not multipart_root.exists():
- continue
- for upload_dir in multipart_root.iterdir():
- if not upload_dir.is_dir():
- continue
- manifest_path = upload_dir / "manifest.json"
- if manifest_path.exists():
- try:
- manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
- uploads.append({
- "upload_id": manifest.get("upload_id", upload_dir.name),
- "object_key": manifest.get("object_key", ""),
- "created_at": manifest.get("created_at", ""),
- })
- except (OSError, json.JSONDecodeError):
- if include_orphaned:
- has_files = any(upload_dir.rglob("*"))
- if has_files:
- uploads.append({
- "upload_id": upload_dir.name,
- "object_key": "(unknown)",
- "created_at": "",
- "orphaned": True,
- })
- elif include_orphaned:
- has_files = any(f.is_file() for f in upload_dir.rglob("*"))
- if has_files:
- uploads.append({
- "upload_id": upload_dir.name,
- "object_key": "(unknown)",
- "created_at": "",
- "orphaned": True,
- })
- return uploads
-
- def _bucket_path(self, bucket_name: str) -> Path:
- safe_name = self._sanitize_bucket_name(bucket_name)
- return self.root / safe_name
-
- def _require_bucket_path(self, bucket_name: str) -> Path:
- bucket_path = self._bucket_path(bucket_name)
- if not bucket_path.exists():
- raise StorageError("Bucket does not exist")
- return bucket_path
-
- def _object_path(self, bucket_name: str, object_key: str) -> Path:
- bucket_path = self._bucket_path(bucket_name)
- safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
- return bucket_path / safe_key
-
- def _system_root_path(self) -> Path:
- return self.root / self.SYSTEM_ROOT
-
- def _system_buckets_root(self) -> Path:
- return self._system_root_path() / self.SYSTEM_BUCKETS_DIR
-
- def _system_bucket_root(self, bucket_name: str) -> Path:
- return self._system_buckets_root() / bucket_name
-
- def _bucket_meta_root(self, bucket_name: str) -> Path:
- return self._system_bucket_root(bucket_name) / self.BUCKET_META_DIR
-
- def _bucket_versions_root(self, bucket_name: str) -> Path:
- return self._system_bucket_root(bucket_name) / self.BUCKET_VERSIONS_DIR
-
- def _multipart_root(self) -> Path:
- return self._system_root_path() / self.SYSTEM_MULTIPART_DIR
-
- def _multipart_bucket_root(self, bucket_name: str) -> Path:
- return self._multipart_root() / bucket_name
-
- def _legacy_metadata_file(self, bucket_name: str, key: Path) -> Path:
- meta_root = self._legacy_meta_root(bucket_name)
- meta_rel = Path(key.as_posix() + ".meta.json")
- return meta_root / meta_rel
-
- def _legacy_meta_root(self, bucket_name: str) -> Path:
- return self._bucket_path(bucket_name) / ".meta"
-
- def _legacy_versions_root(self, bucket_name: str) -> Path:
- return self._bucket_path(bucket_name) / ".versions"
-
- def _legacy_version_dir(self, bucket_name: str, key: Path) -> Path:
- return self._legacy_versions_root(bucket_name) / key
-
- def _legacy_multipart_bucket_root(self, bucket_name: str) -> Path:
- return self._bucket_path(bucket_name) / ".multipart"
-
- def _legacy_multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
- return self._legacy_multipart_bucket_root(bucket_name) / upload_id
-
- def _fast_list_keys(self, bucket_path: Path) -> List[str]:
- """Fast directory walk using os.scandir instead of pathlib.rglob.
-
- This is significantly faster for large directories (10K+ files).
- Returns just the keys (for backward compatibility).
- """
- return list(self._build_object_cache(bucket_path).keys())
-
- def _build_object_cache(self, bucket_path: Path) -> Dict[str, ObjectMeta]:
- from concurrent.futures import ThreadPoolExecutor
-
- bucket_id = bucket_path.name
- objects: Dict[str, ObjectMeta] = {}
- bucket_str = str(bucket_path)
- bucket_len = len(bucket_str) + 1
-
- if _HAS_RUST:
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- raw = _rc.build_object_cache(
- bucket_str,
- str(self._bucket_meta_root(bucket_id)),
- str(etag_index_path),
- )
- if raw["etag_cache_changed"] and raw["etag_cache"]:
- try:
- etag_index_path.parent.mkdir(parents=True, exist_ok=True)
- with open(etag_index_path, 'w', encoding='utf-8') as f:
- json.dump(raw["etag_cache"], f)
- self._etag_index_mem[bucket_id] = (dict(raw["etag_cache"]), etag_index_path.stat().st_mtime)
- except OSError:
- pass
- for key, size, mtime, etag in raw["objects"]:
- objects[key] = ObjectMeta(
- key=key,
- size=size,
- last_modified=datetime.fromtimestamp(mtime, timezone.utc),
- etag=etag,
- metadata=None,
- )
- return objects
-
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- meta_cache: Dict[str, str] = {}
- index_mtime: float = 0
-
- if etag_index_path.exists():
- try:
- index_mtime = etag_index_path.stat().st_mtime
- with open(etag_index_path, 'r', encoding='utf-8') as f:
- meta_cache = json.load(f)
- except (OSError, json.JSONDecodeError):
- meta_cache = {}
-
- meta_root = self._bucket_meta_root(bucket_id)
- needs_rebuild = False
-
- if meta_root.exists() and index_mtime > 0:
- def check_newer(dir_path: str) -> bool:
- try:
- with os.scandir(dir_path) as it:
- for entry in it:
- if entry.is_dir(follow_symlinks=False):
- if check_newer(entry.path):
- return True
- elif entry.is_file(follow_symlinks=False) and (entry.name.endswith('.meta.json') or entry.name == '_index.json'):
- if entry.stat().st_mtime > index_mtime:
- return True
- except OSError:
- pass
- return False
- needs_rebuild = check_newer(str(meta_root))
- elif not meta_cache:
- needs_rebuild = True
-
- if needs_rebuild and meta_root.exists():
- meta_str = str(meta_root)
- meta_len = len(meta_str) + 1
- meta_files: list[tuple[str, str]] = []
- index_files: list[str] = []
-
- def collect_meta_files(dir_path: str) -> None:
- try:
- with os.scandir(dir_path) as it:
- for entry in it:
- if entry.is_dir(follow_symlinks=False):
- collect_meta_files(entry.path)
- elif entry.is_file(follow_symlinks=False):
- if entry.name == '_index.json':
- index_files.append(entry.path)
- elif entry.name.endswith('.meta.json'):
- rel = entry.path[meta_len:]
- key = rel[:-10].replace(os.sep, '/')
- meta_files.append((key, entry.path))
- except OSError:
- pass
-
- collect_meta_files(meta_str)
-
- meta_cache = {}
-
- for idx_path in index_files:
- try:
- with open(idx_path, 'r', encoding='utf-8') as f:
- idx_data = json.load(f)
- rel_dir = idx_path[meta_len:]
- rel_dir = rel_dir.replace(os.sep, '/')
- if rel_dir.endswith('/_index.json'):
- dir_prefix = rel_dir[:-len('/_index.json')]
- else:
- dir_prefix = ''
- for entry_name, entry_data in idx_data.items():
- if dir_prefix:
- key = f"{dir_prefix}/{entry_name}"
- else:
- key = entry_name
- meta = entry_data.get("metadata", {})
- etag = meta.get("__etag__")
- if etag:
- meta_cache[key] = etag
- except (OSError, json.JSONDecodeError):
- pass
-
- def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
- key, path = item
- try:
- with open(path, 'rb') as f:
- content = f.read()
- etag_marker = b'"__etag__"'
- idx = content.find(etag_marker)
- if idx != -1:
- start = content.find(b'"', idx + len(etag_marker) + 1)
- if start != -1:
- end = content.find(b'"', start + 1)
- if end != -1:
- return key, content[start+1:end].decode('utf-8')
- return key, None
- except (OSError, UnicodeDecodeError):
- return key, None
-
- legacy_meta_files = [(k, p) for k, p in meta_files if k not in meta_cache]
- if legacy_meta_files:
- max_workers = min((os.cpu_count() or 4) * 2, len(legacy_meta_files), 16)
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
- for key, etag in executor.map(read_meta_file, legacy_meta_files):
- if etag:
- meta_cache[key] = etag
-
- if meta_cache:
- try:
- etag_index_path.parent.mkdir(parents=True, exist_ok=True)
- with open(etag_index_path, 'w', encoding='utf-8') as f:
- json.dump(meta_cache, f)
- self._etag_index_mem[bucket_id] = (dict(meta_cache), etag_index_path.stat().st_mtime)
- except OSError:
- pass
-
- def scan_dir(dir_path: str) -> None:
- try:
- with os.scandir(dir_path) as it:
- for entry in it:
- if entry.is_dir(follow_symlinks=False):
- rel_start = entry.path[bucket_len:].split(os.sep)[0] if len(entry.path) > bucket_len else entry.name
- if rel_start in self.INTERNAL_FOLDERS:
- continue
- scan_dir(entry.path)
- elif entry.is_file(follow_symlinks=False):
- rel = entry.path[bucket_len:]
- first_part = rel.split(os.sep)[0] if os.sep in rel else rel
- if first_part in self.INTERNAL_FOLDERS:
- continue
-
- key = rel.replace(os.sep, '/')
- try:
- stat = entry.stat()
-
- etag = meta_cache.get(key)
-
- objects[key] = ObjectMeta(
- key=key,
- size=stat.st_size,
- last_modified=datetime.fromtimestamp(stat.st_mtime, timezone.utc),
- etag=etag,
- metadata=None,
- )
- except OSError:
- pass
- except OSError:
- pass
-
- scan_dir(bucket_str)
- return objects
-
- def _get_object_cache(self, bucket_id: str, bucket_path: Path) -> Dict[str, ObjectMeta]:
- """Get cached object metadata for a bucket, refreshing if stale.
-
- Uses LRU eviction to prevent unbounded cache growth.
- Thread-safe with per-bucket locks to reduce contention.
- Checks stats.json for cross-process cache invalidation.
- """
- now = time.time()
- current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
-
- with self._obj_cache_lock:
- cached = self._object_cache.get(bucket_id)
- if cached:
- objects, timestamp, cached_stats_mtime = cached
- if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
- self._object_cache.move_to_end(bucket_id)
- return objects
-
- bucket_lock = self._get_bucket_lock(bucket_id)
- with bucket_lock:
- now = time.time()
- current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
- with self._obj_cache_lock:
- cached = self._object_cache.get(bucket_id)
- if cached:
- objects, timestamp, cached_stats_mtime = cached
- if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
- self._object_cache.move_to_end(bucket_id)
- return objects
-
- objects = self._build_object_cache(bucket_path)
- new_stats_mtime = self._get_cache_marker_mtime(bucket_id)
-
- with self._obj_cache_lock:
- while len(self._object_cache) >= self._object_cache_max_size:
- self._object_cache.popitem(last=False)
-
- self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime)
- self._object_cache.move_to_end(bucket_id)
- self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
- self._sorted_key_cache.pop(bucket_id, None)
-
- return objects
-
- def _invalidate_object_cache(self, bucket_id: str) -> None:
- with self._obj_cache_lock:
- self._object_cache.pop(bucket_id, None)
- self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
-
- self._etag_index_dirty.discard(bucket_id)
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- try:
- etag_index_path.unlink(missing_ok=True)
- except OSError:
- pass
-
- def _get_cache_marker_mtime(self, bucket_id: str) -> float:
- return float(self._stats_serial.get(bucket_id, 0))
-
- def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
- with self._obj_cache_lock:
- cached = self._object_cache.get(bucket_id)
- if cached:
- objects, timestamp, stats_mtime = cached
- if meta is None:
- objects.pop(key, None)
- else:
- objects[key] = meta
- self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
- self._sorted_key_cache.pop(bucket_id, None)
-
- self._etag_index_dirty.add(bucket_id)
- self._schedule_etag_index_flush()
-
- def _get_etag_index(self, bucket_id: str) -> Dict[str, str]:
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- try:
- current_mtime = etag_index_path.stat().st_mtime
- except OSError:
- return {}
- cached = self._etag_index_mem.get(bucket_id)
- if cached:
- cache_dict, cached_mtime = cached
- if current_mtime == cached_mtime:
- return cache_dict
- try:
- with open(etag_index_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- self._etag_index_mem[bucket_id] = (data, current_mtime)
- return data
- except (OSError, json.JSONDecodeError):
- return {}
-
- def _schedule_etag_index_flush(self) -> None:
- if self._etag_index_flush_timer is None or not self._etag_index_flush_timer.is_alive():
- self._etag_index_flush_timer = threading.Timer(5.0, self._flush_etag_indexes)
- self._etag_index_flush_timer.daemon = True
- self._etag_index_flush_timer.start()
-
- def _flush_etag_indexes(self) -> None:
- dirty = set(self._etag_index_dirty)
- self._etag_index_dirty.clear()
- for bucket_id in dirty:
- with self._obj_cache_lock:
- cached = self._object_cache.get(bucket_id)
- if not cached:
- continue
- objects = cached[0]
- index = {k: v.etag for k, v in objects.items() if v.etag}
- etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
- try:
- self._atomic_write_json(etag_index_path, index, sync=False)
- self._etag_index_mem[bucket_id] = (index, etag_index_path.stat().st_mtime)
- except OSError:
- logger.warning("Failed to flush etag index for bucket %s", bucket_id)
-
- def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
- """Pre-warm the object cache for specified buckets or all buckets.
-
- This is called on startup to ensure the first request is fast.
- """
- if bucket_names is None:
- bucket_names = [b.name for b in self.list_buckets()]
-
- for bucket_name in bucket_names:
- try:
- bucket_path = self._bucket_path(bucket_name)
- if bucket_path.exists():
- self._get_object_cache(bucket_path.name, bucket_path)
- except Exception:
- pass
-
- def warm_cache_async(self, bucket_names: Optional[List[str]] = None) -> threading.Thread:
- """Start cache warming in a background thread.
-
- Returns the thread object so caller can optionally wait for it.
- """
- thread = threading.Thread(
- target=self.warm_cache,
- args=(bucket_names,),
- daemon=True,
- name="cache-warmer",
- )
- thread.start()
- return thread
-
- def _ensure_system_roots(self) -> None:
- for path in (
- self._system_root_path(),
- self._system_buckets_root(),
- self._multipart_root(),
- self._system_root_path() / self.SYSTEM_TMP_DIR,
- ):
- path.mkdir(parents=True, exist_ok=True)
-
- @staticmethod
- def _atomic_write_json(path: Path, data: Any, *, sync: bool = True) -> None:
- path.parent.mkdir(parents=True, exist_ok=True)
- tmp_path = path.with_suffix(".tmp")
- try:
- with tmp_path.open("w", encoding="utf-8") as f:
- json.dump(data, f)
- if sync:
- f.flush()
- os.fsync(f.fileno())
- tmp_path.replace(path)
- except BaseException:
- try:
- tmp_path.unlink(missing_ok=True)
- except OSError:
- pass
- raise
-
- def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
- return self._multipart_bucket_root(bucket_name) / upload_id
-
- def _version_dir(self, bucket_name: str, key: Path) -> Path:
- return self._bucket_versions_root(bucket_name) / key
-
- def _bucket_config_path(self, bucket_name: str) -> Path:
- return self._system_bucket_root(bucket_name) / self.BUCKET_CONFIG_FILE
-
- def _read_bucket_config(self, bucket_name: str) -> dict[str, Any]:
- now = time.time()
- config_path = self._bucket_config_path(bucket_name)
- cached = self._bucket_config_cache.get(bucket_name)
- if cached:
- config, cached_time, cached_mtime = cached
- if now - cached_time < self._bucket_config_cache_ttl:
- return config.copy()
-
- if not config_path.exists():
- self._bucket_config_cache[bucket_name] = ({}, now, 0.0)
- return {}
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- config = data if isinstance(data, dict) else {}
- mtime = config_path.stat().st_mtime
- self._bucket_config_cache[bucket_name] = (config, now, mtime)
- return config.copy()
- except (OSError, json.JSONDecodeError):
- self._bucket_config_cache[bucket_name] = ({}, now, 0.0)
- return {}
-
- def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
- config_path = self._bucket_config_path(bucket_name)
- config_path.parent.mkdir(parents=True, exist_ok=True)
- self._atomic_write_json(config_path, payload)
- try:
- mtime = config_path.stat().st_mtime
- except OSError:
- mtime = 0.0
- self._bucket_config_cache[bucket_name] = (payload.copy(), time.time(), mtime)
-
- def _set_bucket_config_entry(self, bucket_name: str, key: str, value: Any | None) -> None:
- config = self._read_bucket_config(bucket_name)
- if value is None:
- config.pop(key, None)
- else:
- config[key] = value
- self._write_bucket_config(bucket_name, config)
-
- def _is_versioning_enabled(self, bucket_path: Path) -> bool:
- config = self._read_bucket_config(bucket_path.name)
- return bool(config.get("versioning_enabled"))
-
- def _load_multipart_manifest(self, bucket_name: str, upload_id: str) -> tuple[dict[str, Any], Path]:
- upload_root = self._multipart_dir(bucket_name, upload_id)
- if not upload_root.exists():
- upload_root = self._legacy_multipart_dir(bucket_name, upload_id)
- manifest_path = upload_root / self.MULTIPART_MANIFEST
- if not manifest_path.exists():
- raise StorageError("Multipart upload not found")
- try:
- manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError) as exc:
- raise StorageError("Multipart manifest unreadable") from exc
- return manifest, upload_root
-
- def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
- manifest_path = upload_root / self.MULTIPART_MANIFEST
- self._atomic_write_json(manifest_path, manifest)
-
- def _metadata_file(self, bucket_name: str, key: Path) -> Path:
- meta_root = self._bucket_meta_root(bucket_name)
- meta_rel = Path(key.as_posix() + ".meta.json")
- return meta_root / meta_rel
-
- def _index_file_for_key(self, bucket_name: str, key: Path) -> tuple[Path, str]:
- meta_root = self._bucket_meta_root(bucket_name)
- parent = key.parent
- entry_name = key.name
- if parent == Path("."):
- return meta_root / "_index.json", entry_name
- return meta_root / parent / "_index.json", entry_name
-
- def _get_meta_index_lock(self, index_path: str) -> threading.Lock:
- with self._registry_lock:
- if index_path not in self._meta_index_locks:
- self._meta_index_locks[index_path] = threading.Lock()
- return self._meta_index_locks[index_path]
-
- def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]:
- cache_key = (bucket_name, str(key))
- with self._meta_cache_lock:
- hit = self._meta_read_cache.get(cache_key)
- if hit is not None:
- self._meta_read_cache.move_to_end(cache_key)
- cached = hit[0]
- return dict(cached) if cached is not None else None
-
- index_path, entry_name = self._index_file_for_key(bucket_name, key)
- if _HAS_RUST:
- result = _rc.read_index_entry(str(index_path), entry_name)
- else:
- if not index_path.exists():
- result = None
- else:
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- result = index_data.get(entry_name)
- except (OSError, json.JSONDecodeError):
- result = None
-
- with self._meta_cache_lock:
- while len(self._meta_read_cache) >= self._meta_read_cache_max:
- self._meta_read_cache.popitem(last=False)
- self._meta_read_cache[cache_key] = (dict(result) if result is not None else None,)
-
- return result
-
- def _invalidate_meta_read_cache(self, bucket_name: str, key: Path) -> None:
- cache_key = (bucket_name, str(key))
- with self._meta_cache_lock:
- self._meta_read_cache.pop(cache_key, None)
-
- def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None:
- index_path, entry_name = self._index_file_for_key(bucket_name, key)
- lock = self._get_meta_index_lock(str(index_path))
- with lock:
- if _HAS_RUST:
- _rc.write_index_entry(str(index_path), entry_name, json.dumps(entry))
- else:
- index_path.parent.mkdir(parents=True, exist_ok=True)
- index_data: Dict[str, Any] = {}
- if index_path.exists():
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- pass
- index_data[entry_name] = entry
- self._atomic_write_json(index_path, index_data)
- self._invalidate_meta_read_cache(bucket_name, key)
-
- def _delete_index_entry(self, bucket_name: str, key: Path) -> None:
- index_path, entry_name = self._index_file_for_key(bucket_name, key)
- if not index_path.exists():
- self._invalidate_meta_read_cache(bucket_name, key)
- return
- lock = self._get_meta_index_lock(str(index_path))
- with lock:
- if _HAS_RUST:
- _rc.delete_index_entry(str(index_path), entry_name)
- else:
- try:
- index_data = json.loads(index_path.read_text(encoding="utf-8"))
- except (OSError, json.JSONDecodeError):
- self._invalidate_meta_read_cache(bucket_name, key)
- return
- if entry_name in index_data:
- del index_data[entry_name]
- if index_data:
- self._atomic_write_json(index_path, index_data)
- else:
- try:
- index_path.unlink()
- except OSError:
- pass
- self._invalidate_meta_read_cache(bucket_name, key)
-
- def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
- if not metadata:
- return None
- clean = {str(k).strip(): str(v) for k, v in metadata.items() if str(k).strip()}
- return clean or None
-
- def _write_metadata(self, bucket_name: str, key: Path, metadata: Dict[str, str]) -> None:
- clean = self._normalize_metadata(metadata)
- if not clean:
- self._delete_metadata(bucket_name, key)
- return
- self._write_index_entry(bucket_name, key, {"metadata": clean})
- old_meta = self._metadata_file(bucket_name, key)
- try:
- if old_meta.exists():
- old_meta.unlink()
- except OSError:
- pass
-
- def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
- bucket_path = self._bucket_path(bucket_name)
- source = bucket_path / key
- if not source.exists():
- return
- version_dir = self._version_dir(bucket_name, key)
- version_dir.mkdir(parents=True, exist_ok=True)
- now = _utcnow()
- version_id = f"{now.strftime('%Y%m%dT%H%M%S%fZ')}-{uuid.uuid4().hex[:8]}"
- data_path = version_dir / f"{version_id}.bin"
- shutil.copy2(source, data_path)
- metadata = self._read_metadata(bucket_name, key)
- record = {
- "version_id": version_id,
- "key": key.as_posix(),
- "size": source.stat().st_size,
- "archived_at": now.isoformat().replace("+00:00", "Z"),
- "etag": self._compute_etag(source),
- "metadata": metadata or {},
- "reason": reason,
- }
- manifest_path = version_dir / f"{version_id}.json"
- self._atomic_write_json(manifest_path, record)
-
- def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
- entry = self._read_index_entry(bucket_name, key)
- if entry is not None:
- data = entry.get("metadata")
- return data if isinstance(data, dict) else {}
- for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
- if not meta_file.exists():
- continue
- try:
- payload = json.loads(meta_file.read_text(encoding="utf-8"))
- data = payload.get("metadata")
- return data if isinstance(data, dict) else {}
- except (OSError, json.JSONDecodeError):
- return {}
- return {}
-
- def _safe_unlink(self, path: Path) -> None:
- attempts = 3
- last_error: PermissionError | None = None
- for attempt in range(attempts):
- try:
- path.unlink()
- return
- except FileNotFoundError:
- return
- except PermissionError as exc:
- last_error = exc
- if os.name == "nt":
- time.sleep(0.15 * (attempt + 1))
- except OSError as exc:
- raise StorageError(f"Unable to delete object: {exc}") from exc
- message = "Object file is currently in use. Close active previews or wait and try again."
- raise StorageError(message) from last_error
-
- def _delete_metadata(self, bucket_name: str, key: Path) -> None:
- self._delete_index_entry(bucket_name, key)
- locations = (
- (self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
- (self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
- )
- for meta_file, meta_root in locations:
- try:
- if meta_file.exists():
- meta_file.unlink()
- parent = meta_file.parent
- while parent != meta_root and parent.exists() and not any(parent.iterdir()):
- parent.rmdir()
- parent = parent.parent
- except OSError:
- continue
-
- def _check_bucket_contents(self, bucket_path: Path) -> tuple[bool, bool, bool]:
- bucket_name = bucket_path.name
-
- if _HAS_RUST:
- return _rc.check_bucket_contents(
- str(bucket_path),
- [
- str(self._bucket_versions_root(bucket_name)),
- str(self._legacy_versions_root(bucket_name)),
- ],
- [
- str(self._multipart_bucket_root(bucket_name)),
- str(self._legacy_multipart_bucket_root(bucket_name)),
- ],
- )
-
- has_objects = False
- has_versions = False
- has_multipart = False
-
- for path in bucket_path.rglob("*"):
- if has_objects:
- break
- if not path.is_file():
- continue
- rel = path.relative_to(bucket_path)
- if rel.parts and rel.parts[0] in self.INTERNAL_FOLDERS:
- continue
- has_objects = True
-
- for version_root in (
- self._bucket_versions_root(bucket_name),
- self._legacy_versions_root(bucket_name),
- ):
- if has_versions:
- break
- if version_root.exists():
- for path in version_root.rglob("*"):
- if path.is_file():
- has_versions = True
- break
-
- for uploads_root in (
- self._multipart_bucket_root(bucket_name),
- self._legacy_multipart_bucket_root(bucket_name),
- ):
- if has_multipart:
- break
- if uploads_root.exists():
- for path in uploads_root.rglob("*"):
- if path.is_file():
- has_multipart = True
- break
-
- return has_objects, has_versions, has_multipart
-
- def _has_visible_objects(self, bucket_path: Path) -> bool:
- has_objects, _, _ = self._check_bucket_contents(bucket_path)
- return has_objects
-
- def _has_archived_versions(self, bucket_path: Path) -> bool:
- _, has_versions, _ = self._check_bucket_contents(bucket_path)
- return has_versions
-
- def _has_active_multipart_uploads(self, bucket_path: Path) -> bool:
- _, _, has_multipart = self._check_bucket_contents(bucket_path)
- return has_multipart
-
- def _remove_tree(self, path: Path) -> None:
- if not path.exists():
- return
- def _handle_error(func, target_path, exc_info):
- try:
- os.chmod(target_path, stat.S_IRWXU)
- func(target_path)
- except Exception as exc:
- raise StorageError(f"Unable to delete bucket contents: {exc}") from exc
-
- try:
- shutil.rmtree(path, onerror=_handle_error)
- except FileNotFoundError:
- return
- except PermissionError as exc:
- raise StorageError("Bucket in use. Close open files and try again") from exc
-
- @staticmethod
- def _sanitize_bucket_name(bucket_name: str) -> str:
- if not bucket_name:
- raise StorageError("Bucket name required")
-
- name = bucket_name.lower()
- if len(name) < 3 or len(name) > 63:
- raise StorageError("Bucket name must be between 3 and 63 characters")
-
- if name.startswith("-") or name.endswith("-"):
- raise StorageError("Bucket name cannot start or end with a hyphen")
-
- if ".." in name:
- raise StorageError("Bucket name cannot contain consecutive periods")
-
- if name.startswith("xn--"):
- raise StorageError("Bucket name cannot start with 'xn--'")
-
- if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", name):
- raise StorageError("Bucket name cannot be formatted like an IP address")
-
- if not re.fullmatch(r"[a-z0-9][a-z0-9.-]+[a-z0-9]", name):
- raise StorageError("Bucket name can contain lowercase letters, numbers, dots, and hyphens")
-
- return name
-
- @staticmethod
- def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path:
- if _HAS_RUST:
- error = _rc.validate_object_key(object_key, max_length_bytes, os.name == "nt")
- if error:
- raise StorageError(error)
- normalized = unicodedata.normalize("NFC", object_key)
- candidate = Path(normalized)
- if candidate.is_absolute():
- raise StorageError("Absolute object keys are not allowed")
- if getattr(candidate, "drive", ""):
- raise StorageError("Object key cannot include a drive letter")
- return Path(*candidate.parts) if candidate.parts else candidate
-
- if not object_key:
- raise StorageError("Object key required")
- if "\x00" in object_key:
- raise StorageError("Object key contains null bytes")
- object_key = unicodedata.normalize("NFC", object_key)
- if len(object_key.encode("utf-8")) > max_length_bytes:
- raise StorageError(f"Object key exceeds maximum length of {max_length_bytes} bytes")
- if object_key.startswith(("/", "\\")):
- raise StorageError("Object key cannot start with a slash")
-
- candidate = Path(object_key)
- if ".." in candidate.parts:
- raise StorageError("Object key contains parent directory references")
-
- if candidate.is_absolute():
- raise StorageError("Absolute object keys are not allowed")
- if getattr(candidate, "drive", ""):
- raise StorageError("Object key cannot include a drive letter")
- parts = []
- for part in candidate.parts:
- if part in ("", ".", ".."):
- raise StorageError("Object key contains invalid segments")
- if any(ord(ch) < 32 for ch in part):
- raise StorageError("Object key contains control characters")
- if os.name == "nt":
- if any(ch in part for ch in "<>:\"/\\|?*"):
- raise StorageError("Object key contains characters not supported on Windows filesystems")
- if part.endswith((" ", ".")):
- raise StorageError("Object key segments cannot end with spaces or periods on Windows")
- trimmed = part.upper().rstrip(". ")
- if trimmed in WINDOWS_RESERVED_NAMES:
- raise StorageError(f"Invalid filename segment: {part}")
- parts.append(part)
- if parts:
- top_level = parts[0]
- if top_level in ObjectStorage.INTERNAL_FOLDERS or top_level == ObjectStorage.SYSTEM_ROOT:
- raise StorageError("Object key uses a reserved prefix")
- return Path(*parts)
-
- @staticmethod
- def _compute_etag(path: Path) -> str:
- if _HAS_RUST:
- return _rc.md5_file(str(path))
- checksum = hashlib.md5()
- with path.open("rb") as handle:
- for chunk in iter(lambda: handle.read(8192), b""):
- checksum.update(chunk)
- return checksum.hexdigest()
-
-
-class _HashingReader:
- """Wraps a binary stream, updating the checksum as it is read."""
-
- def __init__(self, stream: BinaryIO, checksum: Any) -> None:
- self.stream = stream
- self.checksum = checksum
-
- def read(self, size: int = -1) -> bytes:
- data = self.stream.read(size)
- if data:
- self.checksum.update(data)
- return data
diff --git a/python/app/system_metrics.py b/python/app/system_metrics.py
deleted file mode 100644
index 235710b..0000000
--- a/python/app/system_metrics.py
+++ /dev/null
@@ -1,215 +0,0 @@
-from __future__ import annotations
-
-import json
-import logging
-import threading
-import time
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
-
-import psutil
-
-if TYPE_CHECKING:
- from .storage import ObjectStorage
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class SystemMetricsSnapshot:
- timestamp: datetime
- cpu_percent: float
- memory_percent: float
- disk_percent: float
- storage_bytes: int
-
- def to_dict(self) -> Dict[str, Any]:
- return {
- "timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
- "cpu_percent": round(self.cpu_percent, 2),
- "memory_percent": round(self.memory_percent, 2),
- "disk_percent": round(self.disk_percent, 2),
- "storage_bytes": self.storage_bytes,
- }
-
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
- timestamp_str = data["timestamp"]
- if timestamp_str.endswith("Z"):
- timestamp_str = timestamp_str[:-1] + "+00:00"
- return cls(
- timestamp=datetime.fromisoformat(timestamp_str),
- cpu_percent=data.get("cpu_percent", 0.0),
- memory_percent=data.get("memory_percent", 0.0),
- disk_percent=data.get("disk_percent", 0.0),
- storage_bytes=data.get("storage_bytes", 0),
- )
-
-
-class SystemMetricsCollector:
- def __init__(
- self,
- storage_root: Path,
- interval_minutes: int = 5,
- retention_hours: int = 24,
- ):
- self.storage_root = storage_root
- self.interval_seconds = interval_minutes * 60
- self.retention_hours = retention_hours
- self._lock = threading.Lock()
- self._shutdown = threading.Event()
- self._snapshots: List[SystemMetricsSnapshot] = []
- self._storage_ref: Optional["ObjectStorage"] = None
-
- self._load_history()
-
- self._snapshot_thread = threading.Thread(
- target=self._snapshot_loop,
- name="system-metrics-snapshot",
- daemon=True,
- )
- self._snapshot_thread.start()
-
- def set_storage(self, storage: "ObjectStorage") -> None:
- with self._lock:
- self._storage_ref = storage
-
- def _config_path(self) -> Path:
- return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
-
- def _load_history(self) -> None:
- config_path = self._config_path()
- if not config_path.exists():
- return
- try:
- data = json.loads(config_path.read_text(encoding="utf-8"))
- history_data = data.get("history", [])
- self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
- self._prune_old_snapshots()
- except (json.JSONDecodeError, OSError, KeyError) as e:
- logger.warning(f"Failed to load system metrics history: {e}")
-
- def _save_history(self) -> None:
- config_path = self._config_path()
- config_path.parent.mkdir(parents=True, exist_ok=True)
- try:
- data = {"history": [s.to_dict() for s in self._snapshots]}
- config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
- except OSError as e:
- logger.warning(f"Failed to save system metrics history: {e}")
-
- def _prune_old_snapshots(self) -> None:
- if not self._snapshots:
- return
- cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
- self._snapshots = [
- s for s in self._snapshots if s.timestamp.timestamp() > cutoff
- ]
-
- def _snapshot_loop(self) -> None:
- while not self._shutdown.is_set():
- self._shutdown.wait(timeout=self.interval_seconds)
- if not self._shutdown.is_set():
- self._take_snapshot()
-
- def _take_snapshot(self) -> None:
- try:
- cpu_percent = psutil.cpu_percent(interval=0.1)
- memory = psutil.virtual_memory()
- disk = psutil.disk_usage(str(self.storage_root))
-
- storage_bytes = 0
- with self._lock:
- storage = self._storage_ref
- if storage:
- try:
- buckets = storage.list_buckets()
- for bucket in buckets:
- stats = storage.bucket_stats(bucket.name, cache_ttl=60)
- storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
- except Exception as e:
- logger.warning(f"Failed to collect bucket stats: {e}")
-
- snapshot = SystemMetricsSnapshot(
- timestamp=datetime.now(timezone.utc),
- cpu_percent=cpu_percent,
- memory_percent=memory.percent,
- disk_percent=disk.percent,
- storage_bytes=storage_bytes,
- )
-
- with self._lock:
- self._snapshots.append(snapshot)
- self._prune_old_snapshots()
- self._save_history()
-
- logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
- except Exception as e:
- logger.warning(f"Failed to take system metrics snapshot: {e}")
-
- def get_current(self) -> Dict[str, Any]:
- cpu_percent = psutil.cpu_percent(interval=0.1)
- memory = psutil.virtual_memory()
- disk = psutil.disk_usage(str(self.storage_root))
- boot_time = psutil.boot_time()
- uptime_seconds = time.time() - boot_time
- uptime_days = int(uptime_seconds / 86400)
-
- total_buckets = 0
- total_objects = 0
- total_bytes_used = 0
- total_versions = 0
-
- with self._lock:
- storage = self._storage_ref
- if storage:
- try:
- buckets = storage.list_buckets()
- total_buckets = len(buckets)
- for bucket in buckets:
- stats = storage.bucket_stats(bucket.name, cache_ttl=60)
- total_objects += stats.get("total_objects", stats.get("objects", 0))
- total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
- total_versions += stats.get("version_count", 0)
- except Exception as e:
- logger.warning(f"Failed to collect current bucket stats: {e}")
-
- return {
- "cpu_percent": round(cpu_percent, 2),
- "memory": {
- "total": memory.total,
- "available": memory.available,
- "used": memory.used,
- "percent": round(memory.percent, 2),
- },
- "disk": {
- "total": disk.total,
- "free": disk.free,
- "used": disk.used,
- "percent": round(disk.percent, 2),
- },
- "app": {
- "buckets": total_buckets,
- "objects": total_objects,
- "versions": total_versions,
- "storage_bytes": total_bytes_used,
- "uptime_days": uptime_days,
- },
- }
-
- def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
- with self._lock:
- snapshots = list(self._snapshots)
-
- if hours:
- cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
- snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
-
- return [s.to_dict() for s in snapshots]
-
- def shutdown(self) -> None:
- self._shutdown.set()
- self._take_snapshot()
- self._snapshot_thread.join(timeout=5.0)
diff --git a/python/app/ui.py b/python/app/ui.py
deleted file mode 100644
index f43ddb2..0000000
--- a/python/app/ui.py
+++ /dev/null
@@ -1,4309 +0,0 @@
-from __future__ import annotations
-
-import io
-import json
-import uuid
-import psutil
-import shutil
-from datetime import datetime, timezone as dt_timezone
-from pathlib import Path
-from typing import Any
-from urllib.parse import quote, urlparse
-from zoneinfo import ZoneInfo
-
-import boto3
-import requests
-from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
-from flask import (
- Blueprint,
- Response,
- current_app,
- flash,
- jsonify,
- redirect,
- render_template,
- request,
- send_file,
- session,
- url_for,
-)
-from flask_wtf.csrf import generate_csrf
-
-from .acl import AclService, create_canned_acl, CANNED_ACLS
-from .bucket_policies import BucketPolicyStore
-from .connections import ConnectionStore, RemoteConnection
-from .extensions import limiter, csrf
-from .iam import IamError
-from .kms import KMSManager
-from .replication import ReplicationManager, ReplicationRule
-from .s3_client import (
- get_session_s3_client,
- get_upload_registry,
- handle_client_error,
- handle_connection_error,
- build_url_templates,
- translate_list_objects,
- get_versioning_via_s3,
- stream_objects_ndjson,
- format_datetime_display as _s3_format_display,
- format_datetime_iso as _s3_format_iso,
-)
-from .secret_store import EphemeralSecretStore
-from .site_registry import SiteRegistry, SiteInfo, PeerSite
-from .storage import ObjectStorage, StorageError
-from .website_domains import normalize_domain, is_valid_domain
-
-ui_bp = Blueprint("ui", __name__, template_folder="../templates", url_prefix="/ui")
-
-
-def _convert_to_display_tz(dt: datetime, display_tz: str | None = None) -> datetime:
- """Convert a datetime to the configured display timezone.
-
- Args:
- dt: The datetime to convert
- display_tz: Optional timezone string. If not provided, reads from current_app.config.
- """
- if display_tz is None:
- display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
- if display_tz and display_tz != "UTC":
- try:
- tz = ZoneInfo(display_tz)
- if dt.tzinfo is None:
- dt = dt.replace(tzinfo=dt_timezone.utc)
- dt = dt.astimezone(tz)
- except (KeyError, ValueError):
- pass
- return dt
-
-
-def _format_datetime_display(dt: datetime, display_tz: str | None = None) -> str:
- """Format a datetime for display using the configured timezone.
-
- Args:
- dt: The datetime to format
- display_tz: Optional timezone string. If not provided, reads from current_app.config.
- """
- dt = _convert_to_display_tz(dt, display_tz)
- tz_abbr = dt.strftime("%Z") or "UTC"
- return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
-
-
-def _format_datetime_iso(dt: datetime, display_tz: str | None = None) -> str:
- """Format a datetime as ISO format using the configured timezone.
-
- Args:
- dt: The datetime to format
- display_tz: Optional timezone string. If not provided, reads from current_app.config.
- """
- dt = _convert_to_display_tz(dt, display_tz)
- return dt.isoformat()
-
-
-
-def _storage() -> ObjectStorage:
- return current_app.extensions["object_storage"]
-
-
-def _replication_manager() -> ReplicationManager:
- return current_app.extensions["replication"]
-
-
-def _iam():
- return current_app.extensions["iam"]
-
-
-def _kms() -> KMSManager | None:
- return current_app.extensions.get("kms")
-
-
-def _bucket_policies() -> BucketPolicyStore:
- store: BucketPolicyStore = current_app.extensions["bucket_policies"]
- store.maybe_reload()
- return store
-
-
-def _build_policy_context() -> dict[str, Any]:
- ctx: dict[str, Any] = {}
- if request.headers.get("Referer"):
- ctx["aws:Referer"] = request.headers.get("Referer")
- if request.access_route:
- ctx["aws:SourceIp"] = request.access_route[0]
- elif request.remote_addr:
- ctx["aws:SourceIp"] = request.remote_addr
- ctx["aws:SecureTransport"] = str(request.is_secure).lower()
- if request.headers.get("User-Agent"):
- ctx["aws:UserAgent"] = request.headers.get("User-Agent")
- return ctx
-
-
-def _connections() -> ConnectionStore:
- return current_app.extensions["connections"]
-
-
-def _replication() -> ReplicationManager:
- return current_app.extensions["replication"]
-
-
-def _secret_store() -> EphemeralSecretStore:
- store: EphemeralSecretStore = current_app.extensions["secret_store"]
- store.purge_expired()
- return store
-
-
-def _acl() -> AclService:
- return current_app.extensions["acl"]
-
-
-def _operation_metrics():
- return current_app.extensions.get("operation_metrics")
-
-
-def _site_registry() -> SiteRegistry:
- return current_app.extensions["site_registry"]
-
-
-def _format_bytes(num: int) -> str:
- step = 1024
- units = ["B", "KB", "MB", "GB", "TB", "PB"]
- value = float(num)
- for unit in units:
- if value < step or unit == units[-1]:
- if unit == "B":
- return f"{int(value)} B"
- return f"{value:.1f} {unit}"
- value /= step
- return f"{value:.1f} PB"
-
-
-def _friendly_error_message(exc: Exception) -> str:
- message = str(exc) or "An unexpected error occurred"
- if isinstance(exc, IamError):
- return f"Access issue: {message}"
- if isinstance(exc, StorageError):
- return f"Storage issue: {message}"
- return message
-
-
-def _wants_json() -> bool:
- return request.accept_mimetypes.best_match(
- ["application/json", "text/html"]
- ) == "application/json"
-
-
-def _policy_allows_public_read(policy: dict[str, Any]) -> bool:
- statements = policy.get("Statement", [])
- if isinstance(statements, dict):
- statements = [statements]
- list_allowed = False
- get_allowed = False
- for statement in statements:
- if not isinstance(statement, dict):
- continue
- if statement.get("Effect") != "Allow":
- continue
- if statement.get("Condition"):
- continue
- principal = statement.get("Principal")
- principal_all = principal == "*" or (
- isinstance(principal, dict)
- and any(value == "*" or value == ["*"] for value in principal.values())
- )
- if not principal_all:
- continue
- actions = statement.get("Action", [])
- if isinstance(actions, str):
- actions = [actions]
- normalized = {action.lower() for action in actions}
- if not list_allowed:
- list_allowed = any(action in {"*", "s3:*", "s3:listbucket"} for action in normalized)
- if not get_allowed:
- get_allowed = any(action in {"*", "s3:*", "s3:getobject"} for action in normalized)
- if list_allowed and get_allowed:
- return True
- return False
-
-
-def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]:
- if not policy:
- return ("IAM only", "bg-secondary-subtle text-secondary-emphasis")
- if _policy_allows_public_read(policy):
- return ("Public read", "bg-warning-subtle text-warning-emphasis")
- return ("Custom policy", "bg-info-subtle text-info-emphasis")
-
-
-def _current_principal():
- token = session.get("cred_token")
- creds = _secret_store().peek(token) if token else None
- if not creds:
- return None
- try:
- return _iam().authenticate(creds["access_key"], creds["secret_key"])
- except IamError:
- session.pop("cred_token", None)
- if token:
- _secret_store().pop(token)
- return None
-
-
-def _authorize_ui(principal, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None:
- iam_allowed = True
- iam_error: IamError | None = None
- try:
- _iam().authorize(principal, bucket_name, action)
- except IamError as exc:
- iam_allowed = False
- iam_error = exc
- decision = None
- enforce_bucket_policies = current_app.config.get("UI_ENFORCE_BUCKET_POLICIES", True)
- if bucket_name and enforce_bucket_policies:
- access_key = principal.access_key if principal else None
- policy_context = _build_policy_context()
- decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action, policy_context)
- if decision == "deny":
- raise IamError("Access denied by bucket policy")
- if not iam_allowed and decision != "allow":
- raise iam_error or IamError("Access denied")
-
-
-def _api_headers() -> dict[str, str]:
- token = session.get("cred_token")
- creds = _secret_store().peek(token) or {}
- return {
- "X-Access-Key": creds.get("access_key", ""),
- "X-Secret-Key": creds.get("secret_key", ""),
- }
-
-
-@ui_bp.app_context_processor
-def inject_nav_state() -> dict[str, Any]:
- principal = _current_principal()
- can_manage = False
- if principal:
- try:
- _iam().authorize(principal, None, "iam:list_users")
- can_manage = True
- except IamError:
- can_manage = False
- return {
- "principal": principal,
- "can_manage_iam": can_manage,
- "can_view_metrics": can_manage,
- "website_hosting_nav": can_manage and current_app.config.get("WEBSITE_HOSTING_ENABLED", False),
- "csrf_token": generate_csrf,
- }
-
-
-@ui_bp.before_request
-def ensure_authenticated():
- exempt = {"ui.login"}
- if request.endpoint in exempt or request.endpoint is None:
- return None
- if _current_principal() is None:
- return redirect(url_for("ui.login"))
- return None
-
-
-@ui_bp.route("/login", methods=["GET", "POST"])
-def login():
- if request.method == "POST":
- access_key = request.form.get("access_key", "").strip()
- secret_key = request.form.get("secret_key", "").strip()
- try:
- principal = _iam().authenticate(access_key, secret_key)
- except IamError as exc:
- flash(_friendly_error_message(exc), "danger")
- return render_template("login.html")
- creds = {"access_key": access_key, "secret_key": secret_key}
- ttl = int(current_app.permanent_session_lifetime.total_seconds())
- token = _secret_store().remember(creds, ttl=ttl)
- session["cred_token"] = token
- session.permanent = True
- flash(f"Welcome back, {principal.display_name}", "success")
- return redirect(url_for("ui.buckets_overview"))
- return render_template("login.html")
-
-
-@ui_bp.post("/logout")
-def logout():
- token = session.pop("cred_token", None)
- if token:
- _secret_store().pop(token)
- flash("Signed out", "info")
- return redirect(url_for("ui.login"))
-
-
-@ui_bp.get("/docs")
-def docs_page():
- principal = _current_principal()
- api_base = current_app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
- api_base = api_base.rstrip("/")
- parsed = urlparse(api_base)
- api_host = parsed.netloc or parsed.path or api_base
- return render_template(
- "docs.html",
- principal=principal,
- api_base=api_base,
- api_host=api_host,
- )
-
-
-@ui_bp.get("/")
-def buckets_overview():
- principal = _current_principal()
- try:
- client = get_session_s3_client()
- resp = client.list_buckets()
- bucket_names = [b["Name"] for b in resp.get("Buckets", [])]
- bucket_creation = {b["Name"]: b.get("CreationDate") for b in resp.get("Buckets", [])}
- except PermissionError:
- return redirect(url_for("ui.login"))
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- flash(exc.response.get("Error", {}).get("Message", "S3 operation failed"), "danger")
- else:
- flash("S3 API server is unreachable. Ensure the API server is running.", "danger")
- return render_template("buckets.html", buckets=[], principal=principal)
-
- allowed_names = set(_iam().buckets_for_principal(principal, bucket_names))
- visible_buckets = []
- policy_store = _bucket_policies()
- for name in bucket_names:
- if name not in allowed_names:
- continue
- policy = policy_store.get_policy(name)
- cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
- stats = _storage().bucket_stats(name, cache_ttl=cache_ttl)
- access_label, access_badge = _bucket_access_descriptor(policy)
-
- class _BucketMeta:
- def __init__(self, n, cd):
- self.name = n
- self.creation_date = cd
- meta = _BucketMeta(name, bucket_creation.get(name))
-
- visible_buckets.append({
- "meta": meta,
- "summary": {
- "objects": stats["total_objects"],
- "total_bytes": stats["total_bytes"],
- "human_size": _format_bytes(stats["total_bytes"]),
- },
- "access_label": access_label,
- "access_badge": access_badge,
- "has_policy": bool(policy),
- "detail_url": url_for("ui.bucket_detail", bucket_name=name),
- })
- return render_template("buckets.html", buckets=visible_buckets, principal=principal)
-
-@ui_bp.get("/buckets")
-def buckets_redirect():
- return redirect(url_for("ui.buckets_overview"))
-
-@ui_bp.post("/buckets")
-def create_bucket():
- principal = _current_principal()
- bucket_name = request.form.get("bucket_name", "").strip()
- if not bucket_name:
- if _wants_json():
- return jsonify({"error": "Bucket name is required"}), 400
- flash("Bucket name is required", "danger")
- return redirect(url_for("ui.buckets_overview"))
- try:
- _authorize_ui(principal, bucket_name, "write")
- client = get_session_s3_client()
- client.create_bucket(Bucket=bucket_name)
- if _wants_json():
- return jsonify({"success": True, "message": f"Bucket '{bucket_name}' created", "bucket_name": bucket_name})
- flash(f"Bucket '{bucket_name}' created", "success")
- except PermissionError:
- return redirect(url_for("ui.login"))
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- else:
- msg = "S3 API server is unreachable"
- if _wants_json():
- return jsonify({"error": msg}), 502
- flash(msg, "danger")
- return redirect(url_for("ui.buckets_overview"))
-
-
-@ui_bp.get("/buckets/")
-def bucket_detail(bucket_name: str):
- principal = _current_principal()
- storage = _storage()
- try:
- _authorize_ui(principal, bucket_name, "list")
- if not storage.bucket_exists(bucket_name):
- raise StorageError("Bucket does not exist")
- except (StorageError, IamError) as exc:
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.buckets_overview"))
- bucket_policy = _bucket_policies().get_policy(bucket_name)
- policy_text = json.dumps(bucket_policy, indent=2) if bucket_policy else ""
- default_policy = json.dumps(
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Sid": "AllowList",
- "Effect": "Allow",
- "Principal": "*",
- "Action": ["s3:ListBucket"],
- "Resource": [f"arn:aws:s3:::{bucket_name}"],
- },
- {
- "Sid": "AllowRead",
- "Effect": "Allow",
- "Principal": "*",
- "Action": ["s3:GetObject"],
- "Resource": [f"arn:aws:s3:::{bucket_name}/*"],
- },
- ],
- },
- indent=2,
- )
- iam = _iam()
- bucket_perms = iam.check_permissions(
- principal, bucket_name, ["policy", "lifecycle", "cors", "write", "replication"],
- ) if principal else {}
- admin_perms = iam.check_permissions(
- principal, None, ["iam:list_users"],
- ) if principal else {}
-
- can_edit_policy = bucket_perms.get("policy", False)
- can_manage_lifecycle = bucket_perms.get("lifecycle", False)
- can_manage_cors = bucket_perms.get("cors", False)
- can_manage_versioning = bucket_perms.get("write", False)
- can_manage_replication = bucket_perms.get("replication", False)
- is_replication_admin = admin_perms.get("iam:list_users", False)
-
- try:
- versioning_enabled = storage.is_versioning_enabled(bucket_name)
- except StorageError:
- versioning_enabled = False
-
- replication_rule = _replication().get_rule(bucket_name)
- connections = _connections().list() if (is_replication_admin or replication_rule) else []
-
- encryption_config = storage.get_bucket_encryption(bucket_name)
- kms_manager = _kms()
- kms_keys = kms_manager.list_keys() if kms_manager else []
- kms_enabled = current_app.config.get("KMS_ENABLED", False)
- encryption_enabled = current_app.config.get("ENCRYPTION_ENABLED", False)
- lifecycle_enabled = current_app.config.get("LIFECYCLE_ENABLED", False)
- site_sync_enabled = current_app.config.get("SITE_SYNC_ENABLED", False)
- website_hosting_enabled = current_app.config.get("WEBSITE_HOSTING_ENABLED", False)
- can_manage_encryption = can_manage_versioning
-
- bucket_quota = storage.get_bucket_quota(bucket_name)
- bucket_stats = storage.bucket_stats(bucket_name)
- can_manage_quota = is_replication_admin
-
- website_config = None
- website_domains = []
- if website_hosting_enabled:
- try:
- website_config = storage.get_bucket_website(bucket_name)
- except StorageError:
- website_config = None
- domain_store = current_app.extensions.get("website_domains")
- if domain_store:
- website_domains = domain_store.get_domains_for_bucket(bucket_name)
-
- objects_api_url = url_for("ui.list_bucket_objects", bucket_name=bucket_name)
- objects_stream_url = url_for("ui.stream_bucket_objects", bucket_name=bucket_name)
-
- lifecycle_url = url_for("ui.bucket_lifecycle", bucket_name=bucket_name)
- cors_url = url_for("ui.bucket_cors", bucket_name=bucket_name)
- acl_url = url_for("ui.bucket_acl", bucket_name=bucket_name)
- folders_url = url_for("ui.create_folder", bucket_name=bucket_name)
- buckets_for_copy_url = url_for("ui.list_buckets_for_copy", bucket_name=bucket_name)
-
- return render_template(
- "bucket_detail.html",
- bucket_name=bucket_name,
- objects_api_url=objects_api_url,
- objects_stream_url=objects_stream_url,
- lifecycle_url=lifecycle_url,
- cors_url=cors_url,
- acl_url=acl_url,
- folders_url=folders_url,
- buckets_for_copy_url=buckets_for_copy_url,
- principal=principal,
- bucket_policy_text=policy_text,
- bucket_policy=bucket_policy,
- can_edit_policy=can_edit_policy,
- can_manage_lifecycle=can_manage_lifecycle,
- can_manage_cors=can_manage_cors,
- can_manage_versioning=can_manage_versioning,
- can_manage_replication=can_manage_replication,
- can_manage_encryption=can_manage_encryption,
- is_replication_admin=is_replication_admin,
- default_policy=default_policy,
- versioning_enabled=versioning_enabled,
- replication_rule=replication_rule,
- connections=connections,
- encryption_config=encryption_config,
- kms_keys=kms_keys,
- kms_enabled=kms_enabled,
- encryption_enabled=encryption_enabled,
- lifecycle_enabled=lifecycle_enabled,
- bucket_quota=bucket_quota,
- bucket_stats=bucket_stats,
- can_manage_quota=can_manage_quota,
- site_sync_enabled=site_sync_enabled,
- website_hosting_enabled=website_hosting_enabled,
- website_config=website_config,
- website_domains=website_domains,
- can_manage_website=can_edit_policy,
- )
-
-
-@ui_bp.get("/buckets//objects")
-def list_bucket_objects(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "list")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- max_keys = max(1, min(int(request.args.get("max_keys", 1000)), 100000))
- except ValueError:
- return jsonify({"error": "max_keys must be an integer"}), 400
- continuation_token = request.args.get("continuation_token") or None
- prefix = request.args.get("prefix") or None
-
- try:
- client = get_session_s3_client()
- kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": max_keys}
- if continuation_token:
- kwargs["ContinuationToken"] = continuation_token
- if prefix:
- kwargs["Prefix"] = prefix
- boto_resp = client.list_objects_v2(**kwargs)
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
- versioning_enabled = get_versioning_via_s3(client, bucket_name)
- url_templates = build_url_templates(bucket_name)
- display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
- data = translate_list_objects(boto_resp, url_templates, display_tz, versioning_enabled)
- response = jsonify(data)
- response.headers["Cache-Control"] = "no-store"
- return response
-
-
-@ui_bp.get("/buckets//objects/stream")
-def stream_bucket_objects(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "list")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- prefix = request.args.get("prefix") or None
- delimiter = request.args.get("delimiter") or None
-
- storage = _storage()
- try:
- versioning_enabled = storage.is_versioning_enabled(bucket_name)
- except StorageError:
- versioning_enabled = False
- url_templates = build_url_templates(bucket_name)
- display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
-
- def generate():
- yield json.dumps({
- "type": "meta",
- "versioning_enabled": versioning_enabled,
- "url_templates": url_templates,
- }) + "\n"
- yield json.dumps({"type": "count", "total_count": 0}) + "\n"
-
- running_count = 0
- try:
- if delimiter:
- for item_type, item in storage.iter_objects_shallow(
- bucket_name, prefix=prefix or "", delimiter=delimiter,
- ):
- if item_type == "folder":
- yield json.dumps({"type": "folder", "prefix": item}) + "\n"
- else:
- last_mod = item.last_modified
- yield json.dumps({
- "type": "object",
- "key": item.key,
- "size": item.size,
- "last_modified": last_mod.isoformat(),
- "last_modified_display": _format_datetime_display(last_mod, display_tz),
- "last_modified_iso": _format_datetime_iso(last_mod, display_tz),
- "etag": item.etag or "",
- }) + "\n"
- running_count += 1
- if running_count % 1000 == 0:
- yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
- else:
- continuation_token = None
- while True:
- result = storage.list_objects(
- bucket_name,
- max_keys=1000,
- continuation_token=continuation_token,
- prefix=prefix,
- )
- for obj in result.objects:
- last_mod = obj.last_modified
- yield json.dumps({
- "type": "object",
- "key": obj.key,
- "size": obj.size,
- "last_modified": last_mod.isoformat(),
- "last_modified_display": _format_datetime_display(last_mod, display_tz),
- "last_modified_iso": _format_datetime_iso(last_mod, display_tz),
- "etag": obj.etag or "",
- }) + "\n"
- running_count += len(result.objects)
- yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
- if not result.is_truncated:
- break
- continuation_token = result.next_continuation_token
- except StorageError as exc:
- yield json.dumps({"type": "error", "error": str(exc)}) + "\n"
- return
- yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
- yield json.dumps({"type": "done"}) + "\n"
-
- return Response(
- generate(),
- mimetype='application/x-ndjson',
- headers={
- 'Cache-Control': 'no-cache',
- 'X-Accel-Buffering': 'no',
- 'X-Stream-Response': 'true',
- }
- )
-
-
-@ui_bp.get("/buckets//objects/search")
-@limiter.limit("30 per minute")
-def search_bucket_objects(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "list")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- query = request.args.get("q", "").strip()
- if not query:
- return jsonify({"results": [], "truncated": False})
-
- try:
- limit = max(1, min(int(request.args.get("limit", 500)), 1000))
- except (ValueError, TypeError):
- limit = 500
-
- prefix = request.args.get("prefix", "").strip()
-
- storage = _storage()
- try:
- return jsonify(storage.search_objects(bucket_name, query, prefix=prefix, limit=limit))
- except StorageError as exc:
- return jsonify({"error": str(exc)}), 404
-
-
-@ui_bp.post("/buckets//upload")
-@limiter.limit("30 per minute")
-def upload_object(bucket_name: str):
- principal = _current_principal()
- file = request.files.get("object")
- object_key = request.form.get("object_key")
- metadata_raw = (request.form.get("metadata") or "").strip()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
-
- def _response(success: bool, message: str, status: int = 200):
- if wants_json:
- payload = {"status": "ok" if success else "error", "message": message}
- return jsonify(payload), status
- flash(message, "success" if success else "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="objects"))
-
- if file and not object_key:
- object_key = file.filename
- if not object_key:
- return _response(False, "Object key is required", 400)
- if not file:
- return _response(False, "Choose a file to upload", 400)
-
- metadata = None
- if metadata_raw:
- try:
- parsed = json.loads(metadata_raw)
- if not isinstance(parsed, dict):
- raise ValueError
- metadata = {str(k): str(v) for k, v in parsed.items()}
- except ValueError:
- return _response(False, "Metadata must be a JSON object", 400)
-
- try:
- _authorize_ui(principal, bucket_name, "write")
- client = get_session_s3_client()
- put_kwargs: dict[str, Any] = {
- "Bucket": bucket_name,
- "Key": object_key,
- "Body": file.stream,
- }
- if file.content_type:
- put_kwargs["ContentType"] = file.content_type
- if metadata:
- put_kwargs["Metadata"] = metadata
- client.put_object(**put_kwargs)
- _replication().trigger_replication(bucket_name, object_key)
-
- message = f"Uploaded '{object_key}'"
- if metadata:
- message += " with metadata"
- return _response(True, message)
- except PermissionError as exc:
- return _response(False, str(exc), 401)
- except IamError as exc:
- return _response(False, _friendly_error_message(exc), 400)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return _response(False, err["error"], status)
- return _response(False, "S3 API server is unreachable", 502)
-
-
-@ui_bp.post("/buckets//multipart/initiate")
-def initiate_multipart_upload(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- payload = request.get_json(silent=True) or {}
- object_key = str(payload.get("object_key", "")).strip()
- if not object_key:
- return jsonify({"error": "object_key is required"}), 400
- if "\x00" in object_key:
- return jsonify({"error": "Object key cannot contain null bytes"}), 400
- max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024)
- if len(object_key.encode("utf-8")) > max_key_len:
- return jsonify({"error": f"Object key exceeds maximum length of {max_key_len} bytes"}), 400
- metadata_payload = payload.get("metadata")
- metadata = None
- if metadata_payload is not None:
- if not isinstance(metadata_payload, dict):
- return jsonify({"error": "metadata must be an object"}), 400
- metadata = {str(k): str(v) for k, v in metadata_payload.items()}
- try:
- client = get_session_s3_client()
- create_kwargs: dict[str, Any] = {"Bucket": bucket_name, "Key": object_key}
- if metadata:
- create_kwargs["Metadata"] = metadata
- resp = client.create_multipart_upload(**create_kwargs)
- upload_id = resp["UploadId"]
- get_upload_registry().register(upload_id, bucket_name, object_key)
- return jsonify({"upload_id": upload_id})
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.put("/buckets//multipart//parts")
-@csrf.exempt
-def upload_multipart_part(bucket_name: str, upload_id: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- part_number = int(request.args.get("partNumber", "0"))
- except ValueError:
- return jsonify({"error": "partNumber must be an integer"}), 400
- if part_number < 1 or part_number > 10000:
- return jsonify({"error": "partNumber must be between 1 and 10000"}), 400
- object_key = get_upload_registry().get_key(upload_id, bucket_name)
- if not object_key:
- return jsonify({"error": "Unknown upload ID or upload expired"}), 404
- try:
- data = request.get_data()
- if not data:
- return jsonify({"error": "Empty request body"}), 400
- client = get_session_s3_client()
- resp = client.upload_part(
- Bucket=bucket_name,
- Key=object_key,
- UploadId=upload_id,
- PartNumber=part_number,
- Body=data,
- )
- etag = resp.get("ETag", "").strip('"')
- return jsonify({"etag": etag, "part_number": part_number})
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.post("/buckets//multipart//complete")
-def complete_multipart_upload(bucket_name: str, upload_id: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- payload = request.get_json(silent=True) or {}
- parts_payload = payload.get("parts")
- if not isinstance(parts_payload, list) or not parts_payload:
- return jsonify({"error": "parts array required"}), 400
- normalized = []
- for part in parts_payload:
- if not isinstance(part, dict):
- return jsonify({"error": "Each part must be an object"}), 400
- raw_number = part.get("part_number") or part.get("PartNumber")
- try:
- number = int(raw_number)
- except (TypeError, ValueError):
- return jsonify({"error": "Each part must include part_number"}), 400
- etag = str(part.get("etag") or part.get("ETag") or "").strip()
- normalized.append({"PartNumber": number, "ETag": etag})
- object_key = get_upload_registry().get_key(upload_id, bucket_name)
- if not object_key:
- return jsonify({"error": "Unknown upload ID or upload expired"}), 404
- try:
- client = get_session_s3_client()
- resp = client.complete_multipart_upload(
- Bucket=bucket_name,
- Key=object_key,
- UploadId=upload_id,
- MultipartUpload={"Parts": normalized},
- )
- get_upload_registry().remove(upload_id)
- result_key = resp.get("Key", object_key)
- _replication().trigger_replication(bucket_name, result_key)
- return jsonify({
- "key": result_key,
- "size": 0,
- "etag": resp.get("ETag", "").strip('"'),
- "last_modified": None,
- })
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- code = exc.response.get("Error", {}).get("Code", "")
- if code in ("NoSuchUpload",):
- get_upload_registry().remove(upload_id)
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.delete("/buckets//multipart/")
-def abort_multipart_upload(bucket_name: str, upload_id: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- object_key = get_upload_registry().get_key(upload_id, bucket_name)
- if not object_key:
- return jsonify({"error": "Unknown upload ID or upload expired"}), 404
- try:
- client = get_session_s3_client()
- client.abort_multipart_upload(Bucket=bucket_name, Key=object_key, UploadId=upload_id)
- get_upload_registry().remove(upload_id)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- code = exc.response.get("Error", {}).get("Code", "")
- if code in ("NoSuchUpload",):
- get_upload_registry().remove(upload_id)
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- return jsonify({"status": "aborted"})
-
-
-@ui_bp.post("/buckets//delete")
-@limiter.limit("20 per minute")
-def delete_bucket(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "delete")
- client = get_session_s3_client()
- client.delete_bucket(Bucket=bucket_name)
- try:
- _bucket_policies().delete_policy(bucket_name)
- except Exception:
- pass
- try:
- _replication_manager().delete_rule(bucket_name)
- except Exception:
- pass
- if _wants_json():
- return jsonify({"success": True, "message": f"Bucket '{bucket_name}' removed"})
- flash(f"Bucket '{bucket_name}' removed", "success")
- except PermissionError:
- return redirect(url_for("ui.login"))
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- else:
- msg = "S3 API server is unreachable"
- if _wants_json():
- return jsonify({"error": msg}), 502
- flash(msg, "danger")
- return redirect(url_for("ui.buckets_overview"))
-
-
-@ui_bp.post("/buckets//objects//delete")
-@limiter.limit("60 per minute")
-def delete_object(bucket_name: str, object_key: str):
- principal = _current_principal()
- purge_versions = request.form.get("purge_versions") == "1"
- try:
- _authorize_ui(principal, bucket_name, "delete", object_key=object_key)
- if purge_versions:
- _storage().purge_object(bucket_name, object_key)
- message = f"Permanently deleted '{object_key}' and all versions"
- else:
- client = get_session_s3_client()
- client.delete_object(Bucket=bucket_name, Key=object_key)
- _replication_manager().trigger_replication(bucket_name, object_key, action="delete")
- message = f"Deleted '{object_key}'"
- if _wants_json():
- return jsonify({"success": True, "message": message})
- flash(message, "success")
- except PermissionError:
- return redirect(url_for("ui.login"))
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- except StorageError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
-
-
-@ui_bp.post("/buckets//objects/bulk-delete")
-@limiter.limit("40 per minute")
-def bulk_delete_objects(bucket_name: str):
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest" or request.is_json
- payload = request.get_json(silent=True) or {}
- keys_payload = payload.get("keys")
- purge_versions = bool(payload.get("purge_versions"))
-
- def _respond(success: bool, message: str, *, deleted=None, errors=None, status_code: int = 200):
- if wants_json:
- body = {
- "status": "ok" if success else "partial",
- "message": message,
- "deleted": deleted or [],
- "errors": errors or [],
- }
- if not success and not errors:
- body["status"] = "error"
- return jsonify(body), status_code
- flash(message, "success" if success and not errors else "warning")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
-
- if not isinstance(keys_payload, list):
- return _respond(False, "keys must be provided as a JSON array", status_code=400)
-
- cleaned: list[str] = []
- for entry in keys_payload:
- if isinstance(entry, str):
- candidate = entry.strip()
- if candidate:
- cleaned.append(candidate)
- if not cleaned:
- return _respond(False, "Select at least one object to delete", status_code=400)
-
- MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)
- if len(cleaned) > MAX_KEYS:
- return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400)
-
- unique_keys = list(dict.fromkeys(cleaned))
-
- folder_prefixes = [k for k in unique_keys if k.endswith("/")]
- if folder_prefixes:
- try:
- client = get_session_s3_client()
- for prefix in folder_prefixes:
- unique_keys.remove(prefix)
- paginator = client.get_paginator("list_objects_v2")
- for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
- for obj in page.get("Contents", []):
- if obj["Key"] not in unique_keys:
- unique_keys.append(obj["Key"])
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return _respond(False, err["error"], status_code=status)
- return _respond(False, "S3 API server is unreachable", status_code=502)
-
- if not unique_keys:
- return _respond(False, "No objects found under the selected folders", status_code=400)
-
- try:
- _authorize_ui(principal, bucket_name, "delete")
- except IamError as exc:
- return _respond(False, _friendly_error_message(exc), status_code=403)
-
- authorized_keys = []
- denied_keys = []
- for key in unique_keys:
- try:
- _authorize_ui(principal, bucket_name, "delete", object_key=key)
- authorized_keys.append(key)
- except IamError:
- denied_keys.append(key)
- if not authorized_keys:
- return _respond(False, "Access denied for all selected objects", status_code=403)
- unique_keys = authorized_keys
-
- if purge_versions:
- storage = _storage()
- deleted: list[str] = []
- errors: list[dict[str, str]] = []
- for key in unique_keys:
- try:
- storage.purge_object(bucket_name, key)
- deleted.append(key)
- except StorageError as exc:
- errors.append({"key": key, "error": str(exc)})
- else:
- try:
- client = get_session_s3_client()
- deleted = []
- errors = []
- for i in range(0, len(unique_keys), 1000):
- batch = unique_keys[i:i + 1000]
- objects_to_delete = [{"Key": k} for k in batch]
- resp = client.delete_objects(
- Bucket=bucket_name,
- Delete={"Objects": objects_to_delete, "Quiet": False},
- )
- deleted.extend(d["Key"] for d in resp.get("Deleted", []))
- errors.extend({"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", []))
- for key in deleted:
- _replication_manager().trigger_replication(bucket_name, key, action="delete")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return _respond(False, err["error"], status_code=status)
- return _respond(False, "S3 API server is unreachable", status_code=502)
-
- if not deleted and errors:
- return _respond(False, "Unable to delete the selected objects", deleted=deleted, errors=errors, status_code=400)
-
- message = f"Deleted {len(deleted)} object{'s' if len(deleted) != 1 else ''}"
- if purge_versions and deleted:
- message += " (including archived versions)"
- if errors:
- message += f"; {len(errors)} failed"
- return _respond(not errors, message, deleted=deleted, errors=errors)
-
-
-@ui_bp.post("/buckets//objects/bulk-download")
-@limiter.limit("10 per minute")
-def bulk_download_objects(bucket_name: str):
- import io
- import zipfile
-
- principal = _current_principal()
- payload = request.get_json(silent=True) or {}
- keys_payload = payload.get("keys")
-
- if not isinstance(keys_payload, list):
- return jsonify({"error": "keys must be provided as a JSON array"}), 400
-
- cleaned: list[str] = []
- for entry in keys_payload:
- if isinstance(entry, str):
- candidate = entry.strip()
- if candidate:
- cleaned.append(candidate)
- if not cleaned:
- return jsonify({"error": "Select at least one object to download"}), 400
-
- MAX_KEYS = current_app.config.get("BULK_DELETE_MAX_KEYS", 500)
- if len(cleaned) > MAX_KEYS:
- return jsonify({"error": f"A maximum of {MAX_KEYS} objects can be downloaded per request"}), 400
-
- unique_keys = list(dict.fromkeys(cleaned))
- storage = _storage()
-
- try:
- _authorize_ui(principal, bucket_name, "read")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- max_total_bytes = current_app.config.get("BULK_DOWNLOAD_MAX_BYTES", 1024 * 1024 * 1024)
- total_size = 0
- for key in unique_keys:
- try:
- path = storage.get_object_path(bucket_name, key)
- total_size += path.stat().st_size
- except (StorageError, OSError):
- continue
- if total_size > max_total_bytes:
- limit_mb = max_total_bytes // (1024 * 1024)
- return jsonify({"error": f"Total download size exceeds {limit_mb} MB limit. Select fewer objects."}), 400
-
- buffer = io.BytesIO()
- with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
- for key in unique_keys:
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=key)
-
- metadata = storage.get_object_metadata(bucket_name, key)
- is_encrypted = "x-amz-server-side-encryption" in metadata
-
- if is_encrypted and hasattr(storage, 'get_object_data'):
- data, _ = storage.get_object_data(bucket_name, key)
- zf.writestr(key, data)
- else:
- path = storage.get_object_path(bucket_name, key)
- zf.write(path, arcname=key)
- except (StorageError, IamError):
- continue
-
- buffer.seek(0)
- return send_file(
- buffer,
- as_attachment=True,
- download_name=f"{bucket_name}-download.zip",
- mimetype="application/zip"
- )
-
-
-@ui_bp.post("/buckets//objects//purge")
-@limiter.limit("30 per minute")
-def purge_object_versions(bucket_name: str, object_key: str):
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
- try:
- _authorize_ui(principal, bucket_name, "delete", object_key=object_key)
- _storage().purge_object(bucket_name, object_key)
- except IamError as exc:
- if wants_json:
- return jsonify({"error": str(exc)}), 403
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
- except StorageError as exc:
- if wants_json:
- return jsonify({"error": str(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
- message = f"Removed archived versions for '{object_key}'"
- if wants_json:
- return jsonify({"status": "ok", "message": message})
- flash(message, "success")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
-
-
-@ui_bp.get("/buckets//objects//preview")
-def object_preview(bucket_name: str, object_key: str) -> Response:
- import mimetypes as _mimetypes
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return Response(str(exc), status=403)
-
- download = request.args.get("download") == "1"
- raw_filename = object_key.rsplit("/", 1)[-1] or object_key
- safe_filename = raw_filename.replace('"', "'").replace("\\", "_")
- safe_filename = "".join(c for c in safe_filename if c.isprintable() and c not in "\r\n")
- if not safe_filename:
- safe_filename = "download"
- try:
- safe_filename.encode("latin-1")
- ascii_safe = True
- except UnicodeEncodeError:
- ascii_safe = False
-
- range_header = request.headers.get("Range")
-
- try:
- client = get_session_s3_client()
- get_kwargs: dict[str, Any] = {"Bucket": bucket_name, "Key": object_key}
- if range_header:
- get_kwargs["Range"] = range_header
- resp = client.get_object(**get_kwargs)
- except PermissionError as exc:
- return Response(str(exc), status=401)
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- status = 404 if code == "NoSuchKey" else 400
- return Response(exc.response.get("Error", {}).get("Message", "S3 operation failed"), status=status)
- except (EndpointConnectionError, ConnectionClosedError):
- return Response("S3 API server is unreachable", status=502)
-
- content_type = resp.get("ContentType") or _mimetypes.guess_type(object_key)[0] or "application/octet-stream"
- content_length = resp.get("ContentLength", 0)
- body_stream = resp["Body"]
- is_partial = resp.get("ResponseMetadata", {}).get("HTTPStatusCode") == 206
- content_range = resp.get("ContentRange")
-
- _DANGEROUS_TYPES = {
- "text/html", "text/xml", "application/xhtml+xml",
- "application/xml", "image/svg+xml",
- }
- base_ct = content_type.split(";")[0].strip().lower()
- if not download and base_ct in _DANGEROUS_TYPES:
- content_type = "text/plain; charset=utf-8"
-
- def generate():
- try:
- for chunk in body_stream.iter_chunks(chunk_size=65536):
- yield chunk
- finally:
- body_stream.close()
-
- status_code = 206 if is_partial else 200
- headers = {
- "Content-Type": content_type,
- "X-Content-Type-Options": "nosniff",
- "Accept-Ranges": "bytes",
- }
- if content_length:
- headers["Content-Length"] = str(content_length)
- if content_range:
- headers["Content-Range"] = content_range
- disposition = "attachment" if download else "inline"
- if ascii_safe:
- headers["Content-Disposition"] = f'{disposition}; filename="{safe_filename}"'
- else:
- from urllib.parse import quote
- encoded = quote(safe_filename, safe="")
- ascii_fallback = safe_filename.encode("ascii", "replace").decode("ascii").replace("?", "_")
- headers["Content-Disposition"] = f'{disposition}; filename="{ascii_fallback}"; filename*=UTF-8\'\'{encoded}'
-
- return Response(generate(), status=status_code, headers=headers)
-
-
-@ui_bp.post("/buckets//objects//presign")
-def object_presign(bucket_name: str, object_key: str):
- principal = _current_principal()
- payload = request.get_json(silent=True) or {}
- method = str(payload.get("method", "GET")).upper()
- allowed_methods = {"GET", "PUT", "DELETE"}
- if method not in allowed_methods:
- return jsonify({"error": "Method must be GET, PUT, or DELETE"}), 400
- action = "read" if method == "GET" else ("delete" if method == "DELETE" else "write")
- try:
- _authorize_ui(principal, bucket_name, action, object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- expires = int(payload.get("expires_in", 900))
- except (TypeError, ValueError):
- return jsonify({"error": "expires_in must be an integer"}), 400
- min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)
- max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)
- expires = max(min_expiry, min(expires, max_expiry))
-
- method_to_client_method = {"GET": "get_object", "PUT": "put_object", "DELETE": "delete_object"}
- client_method = method_to_client_method[method]
-
- try:
- client = get_session_s3_client()
- url = client.generate_presigned_url(
- ClientMethod=client_method,
- Params={"Bucket": bucket_name, "Key": object_key},
- ExpiresIn=expires,
- )
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- current_app.logger.info(
- "Presigned URL generated",
- extra={"bucket": bucket_name, "key": object_key, "method": method},
- )
- return jsonify({"url": url, "method": method, "expires_in": expires})
-
-
-@ui_bp.get("/buckets//objects//metadata")
-def object_metadata(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- client = get_session_s3_client()
- resp = client.head_object(Bucket=bucket_name, Key=object_key)
- metadata = resp.get("Metadata", {})
- if resp.get("ContentType"):
- metadata["Content-Type"] = resp["ContentType"]
- if resp.get("ContentLength") is not None:
- metadata["Content-Length"] = str(resp["ContentLength"])
- if resp.get("ServerSideEncryption"):
- metadata["x-amz-server-side-encryption"] = resp["ServerSideEncryption"]
- return jsonify({"metadata": metadata})
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- if code in ("NoSuchKey", "404", "NotFound"):
- return jsonify({"error": "Object not found"}), 404
- err, status = handle_client_error(exc)
- return jsonify(err), status
- except (EndpointConnectionError, ConnectionClosedError) as exc:
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.get("/buckets//objects//versions")
-def object_versions(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- client = get_session_s3_client()
- resp = client.list_object_versions(Bucket=bucket_name, Prefix=object_key, MaxKeys=1000)
- versions = []
- for v in resp.get("Versions", []):
- if v.get("Key") != object_key:
- continue
- if v.get("IsLatest", False):
- continue
- versions.append({
- "version_id": v.get("VersionId", ""),
- "last_modified": v["LastModified"].isoformat() if v.get("LastModified") else None,
- "size": v.get("Size", 0),
- "etag": v.get("ETag", "").strip('"'),
- "is_latest": False,
- })
- return jsonify({"versions": versions})
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.get("/buckets//archived")
-def archived_objects(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "list")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- entries = _storage().list_orphaned_objects(bucket_name)
- except StorageError as exc:
- return jsonify({"error": str(exc)}), 400
- payload: list[dict[str, Any]] = []
- for entry in entries:
- latest = entry.get("latest") or {}
- restore_url = None
- if latest.get("version_id"):
- restore_url = url_for(
- "ui.restore_object_version",
- bucket_name=bucket_name,
- object_key=entry["key"],
- version_id=latest["version_id"],
- )
- purge_url = url_for("ui.purge_object_versions", bucket_name=bucket_name, object_key=entry["key"])
- payload.append(
- {
- "key": entry["key"],
- "versions": entry.get("versions", 0),
- "total_size": entry.get("total_size", 0),
- "latest": entry.get("latest"),
- "restore_url": restore_url,
- "purge_url": purge_url,
- }
- )
- return jsonify({"objects": payload})
-
-
-@ui_bp.post("/buckets//objects//versions//restore")
-def restore_object_version(bucket_name: str, object_key: str, version_id: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
- try:
- meta = _storage().restore_object_version(bucket_name, object_key, version_id)
- except StorageError as exc:
- return jsonify({"error": str(exc)}), 400
- message = f"Restored '{meta.key}'" if meta else "Object restored"
- return jsonify({"status": "ok", "message": message})
-
-
-@ui_bp.post("/buckets//policy")
-@limiter.limit("10 per minute")
-def update_bucket_policy(bucket_name: str):
- principal = _current_principal()
- action = request.form.get("mode", "upsert")
- try:
- _authorize_ui(principal, bucket_name, "policy")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
-
- try:
- client = get_session_s3_client()
- except (PermissionError, RuntimeError) as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name))
-
- if action == "delete":
- try:
- client.delete_bucket_policy(Bucket=bucket_name)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions"))
- if _wants_json():
- return jsonify({"success": True, "message": "Bucket policy removed"})
- flash("Bucket policy removed", "info")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions"))
-
- document = request.form.get("policy_document", "").strip()
- if not document:
- if _wants_json():
- return jsonify({"error": "Provide a JSON policy document"}), 400
- flash("Provide a JSON policy document", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions"))
- try:
- json.loads(document)
- except json.JSONDecodeError as exc:
- if _wants_json():
- return jsonify({"error": f"Policy error: {exc}"}), 400
- flash(f"Policy error: {exc}", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions"))
- try:
- client.put_bucket_policy(Bucket=bucket_name, Policy=document)
- if _wants_json():
- return jsonify({"success": True, "message": "Bucket policy saved"})
- flash("Bucket policy saved", "success")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="permissions"))
-
-
-@ui_bp.post("/buckets//versioning")
-def update_bucket_versioning(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 403
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
- state = request.form.get("state", "enable")
- if state not in ("enable", "suspend"):
- if _wants_json():
- return jsonify({"error": "state must be 'enable' or 'suspend'"}), 400
- flash("Invalid versioning state", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
- enable = state == "enable"
- try:
- client = get_session_s3_client()
- client.put_bucket_versioning(
- Bucket=bucket_name,
- VersioningConfiguration={"Status": "Enabled" if enable else "Suspended"},
- )
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
- message = "Versioning enabled" if enable else "Versioning suspended"
- if _wants_json():
- return jsonify({"success": True, "message": message, "enabled": enable})
- flash(message, "success")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
-
-@ui_bp.post("/buckets//quota")
-def update_bucket_quota(bucket_name: str):
- """Update bucket quota configuration (admin only)."""
- principal = _current_principal()
-
- is_admin = False
- try:
- _iam().authorize(principal, None, "iam:list_users")
- is_admin = True
- except IamError:
- pass
-
- if not is_admin:
- if _wants_json():
- return jsonify({"error": "Only administrators can manage bucket quotas"}), 403
- flash("Only administrators can manage bucket quotas", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- action = request.form.get("action", "set")
-
- if action == "remove":
- try:
- _storage().set_bucket_quota(bucket_name, max_bytes=None, max_objects=None)
- if _wants_json():
- return jsonify({"success": True, "message": "Bucket quota removed"})
- flash("Bucket quota removed", "info")
- except StorageError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- max_mb_str = request.form.get("max_mb", "").strip()
- max_objects_str = request.form.get("max_objects", "").strip()
-
- max_bytes = None
- max_objects = None
-
- if max_mb_str:
- try:
- max_mb = int(max_mb_str)
- if max_mb < 1:
- raise ValueError("Size must be at least 1 MB")
- max_bytes = max_mb * 1024 * 1024
- except ValueError as exc:
- if _wants_json():
- return jsonify({"error": f"Invalid size value: {exc}"}), 400
- flash(f"Invalid size value: {exc}", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- if max_objects_str:
- try:
- max_objects = int(max_objects_str)
- if max_objects < 0:
- raise ValueError("Object count must be non-negative")
- except ValueError as exc:
- if _wants_json():
- return jsonify({"error": f"Invalid object count: {exc}"}), 400
- flash(f"Invalid object count: {exc}", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- try:
- _storage().set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
- if max_bytes is None and max_objects is None:
- message = "Bucket quota removed"
- else:
- message = "Bucket quota updated"
- if _wants_json():
- return jsonify({
- "success": True,
- "message": message,
- "max_bytes": max_bytes,
- "max_objects": max_objects,
- "has_quota": max_bytes is not None or max_objects is not None
- })
- flash(message, "success" if max_bytes or max_objects else "info")
- except StorageError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
-
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
-
-@ui_bp.post("/buckets//encryption")
-def update_bucket_encryption(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 403
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- action = request.form.get("action", "enable")
-
- if action == "disable":
- try:
- client = get_session_s3_client()
- client.delete_bucket_encryption(Bucket=bucket_name)
- if _wants_json():
- return jsonify({"success": True, "message": "Default encryption disabled", "enabled": False})
- flash("Default encryption disabled", "info")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- algorithm = request.form.get("algorithm", "AES256")
- kms_key_id = request.form.get("kms_key_id", "").strip() or None
-
- if algorithm not in ("AES256", "aws:kms"):
- if _wants_json():
- return jsonify({"error": "Invalid encryption algorithm"}), 400
- flash("Invalid encryption algorithm", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- sse_rule: dict[str, Any] = {"SSEAlgorithm": algorithm}
- if algorithm == "aws:kms" and kms_key_id:
- sse_rule["KMSMasterKeyID"] = kms_key_id
-
- try:
- client = get_session_s3_client()
- client.put_bucket_encryption(
- Bucket=bucket_name,
- ServerSideEncryptionConfiguration={
- "Rules": [{"ApplyServerSideEncryptionByDefault": sse_rule}]
- },
- )
- if algorithm == "aws:kms":
- message = "Default KMS encryption enabled"
- else:
- message = "Default AES-256 encryption enabled"
- if _wants_json():
- return jsonify({"success": True, "message": message, "enabled": True, "algorithm": algorithm})
- flash(message, "success")
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- else:
- err, status = handle_connection_error(exc)
- if _wants_json():
- return jsonify(err), status
- flash(err["error"], "danger")
-
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
-
-@ui_bp.post("/buckets//website")
-def update_bucket_website(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "policy")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 403
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- if _wants_json():
- return jsonify({"error": "Website hosting is not enabled"}), 400
- flash("Website hosting is not enabled", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- action = request.form.get("action", "enable")
-
- if action == "disable":
- try:
- _storage().set_bucket_website(bucket_name, None)
- if _wants_json():
- return jsonify({"success": True, "message": "Static website hosting disabled", "enabled": False})
- flash("Static website hosting disabled", "info")
- except StorageError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- index_document = request.form.get("index_document", "").strip()
- error_document = request.form.get("error_document", "").strip()
-
- if not index_document:
- if _wants_json():
- return jsonify({"error": "Index document is required"}), 400
- flash("Index document is required", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- if "/" in index_document:
- if _wants_json():
- return jsonify({"error": "Index document must not contain '/'"}), 400
- flash("Index document must not contain '/'", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
- website_cfg: dict[str, Any] = {"index_document": index_document}
- if error_document:
- website_cfg["error_document"] = error_document
-
- try:
- _storage().set_bucket_website(bucket_name, website_cfg)
- if _wants_json():
- return jsonify({
- "success": True,
- "message": "Static website hosting enabled",
- "enabled": True,
- "index_document": index_document,
- "error_document": error_document,
- })
- flash("Static website hosting enabled", "success")
- except StorageError as exc:
- if _wants_json():
- return jsonify({"error": _friendly_error_message(exc)}), 400
- flash(_friendly_error_message(exc), "danger")
-
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="properties"))
-
-
-@ui_bp.get("/iam")
-def iam_dashboard():
- principal = _current_principal()
- iam_service = _iam()
- secret_token = request.args.get("secret_token")
- disclosed_secret: dict[str, str] | None = None
- if secret_token:
- payload = _secret_store().pop(secret_token)
- if isinstance(payload, dict):
- access_key = str(payload.get("access_key", ""))
- secret_key = payload.get("secret_key")
- if secret_key:
- disclosed_secret = {
- "access_key": access_key,
- "secret_key": str(secret_key),
- "operation": str(payload.get("operation", "create")),
- }
- locked = False
- locked_reason = None
- try:
- iam_service.authorize(principal, None, "iam:list_users")
- except IamError as exc:
- locked = True
- locked_reason = str(exc)
- users = iam_service.list_users() if not locked else []
- config_summary = iam_service.config_summary()
- config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
- from datetime import datetime as _dt, timedelta as _td, timezone as _tz
- _now = _dt.now(_tz.utc)
- now_iso = _now.isoformat()
- soon_iso = (_now + _td(days=7)).isoformat()
- return render_template(
- "iam.html",
- users=users,
- principal=principal,
- iam_locked=locked,
- locked_reason=locked_reason,
- config_summary=config_summary,
- config_document=config_document,
- disclosed_secret=disclosed_secret,
- now_iso=now_iso,
- soon_iso=soon_iso,
- )
-
-
-@ui_bp.post("/iam/users")
-def create_iam_user():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:create_user")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
- display_name = request.form.get("display_name", "").strip() or "Unnamed"
- if len(display_name) > 64:
- if _wants_json():
- return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
- flash("Display name must be 64 characters or fewer", "danger")
- return redirect(url_for("ui.iam_dashboard"))
- custom_access_key = request.form.get("access_key", "").strip() or None
- custom_secret_key = request.form.get("secret_key", "").strip() or None
- policies_text = request.form.get("policies", "").strip()
- policies = None
- if policies_text:
- try:
- policies = json.loads(policies_text)
- except json.JSONDecodeError as exc:
- if _wants_json():
- return jsonify({"error": f"Invalid JSON: {exc}"}), 400
- flash(f"Invalid JSON: {exc}", "danger")
- return redirect(url_for("ui.iam_dashboard"))
- expires_at = request.form.get("expires_at", "").strip() or None
- if expires_at:
- try:
- from datetime import datetime as _dt, timezone as _tz
- exp_dt = _dt.fromisoformat(expires_at)
- if exp_dt.tzinfo is None:
- exp_dt = exp_dt.replace(tzinfo=_tz.utc)
- expires_at = exp_dt.isoformat()
- except (ValueError, TypeError):
- if _wants_json():
- return jsonify({"error": "Invalid expiry date format"}), 400
- flash("Invalid expiry date format", "danger")
- return redirect(url_for("ui.iam_dashboard"))
- try:
- created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- token = _secret_store().remember(
- {
- "access_key": created["access_key"],
- "secret_key": created["secret_key"],
- "operation": "create",
- }
- )
- if _wants_json():
- return jsonify({
- "success": True,
- "message": f"Created user {created['access_key']}",
- "access_key": created["access_key"],
- "secret_key": created["secret_key"],
- "display_name": display_name,
- "policies": policies or []
- })
- flash(f"Created user {created['access_key']}. Copy the secret below.", "success")
- return redirect(url_for("ui.iam_dashboard", secret_token=token))
-
-
-@ui_bp.post("/iam/users//rotate")
-def rotate_iam_secret(access_key: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:rotate_key")
- except IamError as exc:
- if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
- try:
- new_secret = _iam().rotate_secret(access_key)
- if principal and principal.access_key == access_key:
- creds = session.get("credentials", {})
- creds["secret_key"] = new_secret
- session["credentials"] = creds
- session.modified = True
- except IamError as exc:
- if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- if request.accept_mimetypes.accept_json and not request.accept_mimetypes.accept_html:
- return jsonify({
- "access_key": access_key,
- "secret_key": new_secret,
- "message": f"Secret rotated for {access_key}",
- })
-
- token = _secret_store().remember(
- {
- "access_key": access_key,
- "secret_key": new_secret,
- "operation": "rotate",
- }
- )
- flash(f"Rotated secret for {access_key}. Copy the secret below.", "info")
- return redirect(url_for("ui.iam_dashboard", secret_token=token))
-
-
-@ui_bp.post("/iam/users//update")
-def update_iam_user(access_key: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:create_user")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- display_name = request.form.get("display_name", "").strip()
- if display_name:
- if len(display_name) > 64:
- if _wants_json():
- return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
- flash("Display name must be 64 characters or fewer", "danger")
- else:
- try:
- _iam().update_user(access_key, display_name)
- if _wants_json():
- return jsonify({"success": True, "message": f"Updated user {access_key}", "display_name": display_name})
- flash(f"Updated user {access_key}", "success")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
-
- return redirect(url_for("ui.iam_dashboard"))
-
-
-@ui_bp.post("/iam/users//delete")
-def delete_iam_user(access_key: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:delete_user")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- if access_key == principal.access_key:
- try:
- _iam().delete_user(access_key)
- session.pop("credentials", None)
- if _wants_json():
- return jsonify({"success": True, "message": "Your account has been deleted", "redirect": url_for("ui.login")})
- flash("Your account has been deleted.", "info")
- return redirect(url_for("ui.login"))
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- try:
- _iam().delete_user(access_key)
- if _wants_json():
- return jsonify({"success": True, "message": f"Deleted user {access_key}"})
- flash(f"Deleted user {access_key}", "success")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
-
-@ui_bp.post("/iam/users//policies")
-def update_iam_policies(access_key: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:update_policy")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- policies_raw = request.form.get("policies", "").strip()
- if not policies_raw:
- policies = []
- else:
- try:
- policies = json.loads(policies_raw)
- if not isinstance(policies, list):
- raise ValueError("Policies must be a list")
- except (ValueError, json.JSONDecodeError):
- if _wants_json():
- return jsonify({"error": "Invalid JSON format for policies"}), 400
- flash("Invalid JSON format for policies", "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- try:
- _iam().update_user_policies(access_key, policies)
- if _wants_json():
- return jsonify({"success": True, "message": f"Updated policies for {access_key}", "policies": policies})
- flash(f"Updated policies for {access_key}", "success")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
-
- return redirect(url_for("ui.iam_dashboard"))
-
-
-@ui_bp.post("/iam/users//expiry")
-def update_iam_expiry(access_key: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:update_policy")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- expires_at = request.form.get("expires_at", "").strip() or None
- if expires_at:
- try:
- from datetime import datetime as _dt, timezone as _tz
- exp_dt = _dt.fromisoformat(expires_at)
- if exp_dt.tzinfo is None:
- exp_dt = exp_dt.replace(tzinfo=_tz.utc)
- expires_at = exp_dt.isoformat()
- except (ValueError, TypeError):
- if _wants_json():
- return jsonify({"error": "Invalid expiry date format"}), 400
- flash("Invalid expiry date format", "danger")
- return redirect(url_for("ui.iam_dashboard"))
-
- try:
- _iam().update_user_expiry(access_key, expires_at)
- if _wants_json():
- return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
- label = expires_at if expires_at else "never"
- flash(f"Expiry for {access_key} set to {label}", "success")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 400
- flash(str(exc), "danger")
-
- return redirect(url_for("ui.iam_dashboard"))
-
-
-@ui_bp.post("/connections")
-def create_connection():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- name = request.form.get("name", "").strip()
- endpoint = request.form.get("endpoint_url", "").strip()
- access_key = request.form.get("access_key", "").strip()
- secret_key = request.form.get("secret_key", "").strip()
- region = request.form.get("region", "us-east-1").strip()
-
- if not all([name, endpoint, access_key, secret_key]):
- if _wants_json():
- return jsonify({"error": "All fields are required"}), 400
- flash("All fields are required", "danger")
- return redirect(url_for("ui.connections_dashboard"))
-
- conn = RemoteConnection(
- id=str(uuid.uuid4()),
- name=name,
- endpoint_url=endpoint,
- access_key=access_key,
- secret_key=secret_key,
- region=region
- )
- _connections().add(conn)
- if _wants_json():
- return jsonify({"success": True, "message": f"Connection '{name}' created", "connection_id": conn.id})
- flash(f"Connection '{name}' created", "success")
- return redirect(url_for("ui.connections_dashboard"))
-
-
-@ui_bp.post("/connections/test")
-def test_connection():
- from botocore.config import Config as BotoConfig
- from botocore.exceptions import ConnectTimeoutError, EndpointConnectionError, ReadTimeoutError
-
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"status": "error", "message": "Access denied"}), 403
-
- data = request.get_json(silent=True) or request.form
- endpoint = data.get("endpoint_url", "").strip()
- access_key = data.get("access_key", "").strip()
- secret_key = data.get("secret_key", "").strip()
- region = data.get("region", "us-east-1").strip()
-
- if not all([endpoint, access_key, secret_key]):
- return jsonify({"status": "error", "message": "Missing credentials"}), 400
-
- try:
- config = BotoConfig(
- connect_timeout=5,
- read_timeout=10,
- retries={'max_attempts': 1}
- )
- s3 = boto3.client(
- "s3",
- endpoint_url=endpoint,
- aws_access_key_id=access_key,
- aws_secret_access_key=secret_key,
- region_name=region,
- config=config,
- )
-
- s3.list_buckets()
- return jsonify({"status": "ok", "message": "Connection successful"})
- except (ConnectTimeoutError, ReadTimeoutError):
- return jsonify({"status": "error", "message": f"Connection timed out - endpoint may be down or unreachable: {endpoint}"}), 400
- except EndpointConnectionError:
- return jsonify({"status": "error", "message": f"Could not connect to endpoint: {endpoint}"}), 400
- except ClientError as e:
- error_code = e.response.get('Error', {}).get('Code', 'Unknown')
- error_msg = e.response.get('Error', {}).get('Message', str(e))
- return jsonify({"status": "error", "message": f"Connection failed ({error_code}): {error_msg}"}), 400
- except Exception as e:
- return jsonify({"status": "error", "message": f"Connection failed: {str(e)}"}), 400
-
-
-@ui_bp.post("/connections//update")
-def update_connection(connection_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- conn = _connections().get(connection_id)
- if not conn:
- if _wants_json():
- return jsonify({"error": "Connection not found"}), 404
- flash("Connection not found", "danger")
- return redirect(url_for("ui.connections_dashboard"))
-
- name = request.form.get("name", "").strip()
- endpoint = request.form.get("endpoint_url", "").strip()
- access_key = request.form.get("access_key", "").strip()
- secret_key = request.form.get("secret_key", "").strip()
- region = request.form.get("region", "us-east-1").strip()
-
- if not all([name, endpoint, access_key]):
- if _wants_json():
- return jsonify({"error": "Name, endpoint, and access key are required"}), 400
- flash("Name, endpoint, and access key are required", "danger")
- return redirect(url_for("ui.connections_dashboard"))
-
- conn.name = name
- conn.endpoint_url = endpoint
- conn.access_key = access_key
- if secret_key:
- conn.secret_key = secret_key
- conn.region = region
-
- _connections().save()
- if _wants_json():
- return jsonify({
- "success": True,
- "message": f"Connection '{name}' updated",
- "connection": {
- "id": connection_id,
- "name": name,
- "endpoint_url": endpoint,
- "access_key": access_key,
- "region": region
- }
- })
- flash(f"Connection '{name}' updated", "success")
- return redirect(url_for("ui.connections_dashboard"))
-
-
-@ui_bp.post("/connections//delete")
-def delete_connection(connection_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- _connections().delete(connection_id)
- if _wants_json():
- return jsonify({"success": True, "message": "Connection deleted"})
- flash("Connection deleted", "success")
- return redirect(url_for("ui.connections_dashboard"))
-
-
-@ui_bp.post("/buckets//replication")
-def update_bucket_replication(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError as exc:
- if _wants_json():
- return jsonify({"error": str(exc)}), 403
- flash(str(exc), "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
-
- is_admin = False
- try:
- _iam().authorize(principal, None, "iam:list_users")
- is_admin = True
- except IamError:
- is_admin = False
-
- action = request.form.get("action")
-
- if action == "delete":
- if not is_admin:
- if _wants_json():
- return jsonify({"error": "Only administrators can remove replication configuration"}), 403
- flash("Only administrators can remove replication configuration", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
- _replication().delete_rule(bucket_name)
- if _wants_json():
- return jsonify({"success": True, "message": "Replication configuration removed", "action": "delete"})
- flash("Replication configuration removed", "info")
- elif action == "pause":
- rule = _replication().get_rule(bucket_name)
- if rule:
- rule.enabled = False
- _replication().set_rule(rule)
- if _wants_json():
- return jsonify({"success": True, "message": "Replication paused", "action": "pause", "enabled": False})
- flash("Replication paused", "info")
- else:
- if _wants_json():
- return jsonify({"error": "No replication configuration to pause"}), 404
- flash("No replication configuration to pause", "warning")
- elif action == "resume":
- from .replication import REPLICATION_MODE_ALL
- rule = _replication().get_rule(bucket_name)
- if rule:
- rule.enabled = True
- _replication().set_rule(rule)
- if rule.mode == REPLICATION_MODE_ALL:
- _replication().replicate_existing_objects(bucket_name)
- message = "Replication resumed. Syncing pending objects in background."
- else:
- message = "Replication resumed"
- if _wants_json():
- return jsonify({"success": True, "message": message, "action": "resume", "enabled": True})
- flash(message, "success")
- else:
- if _wants_json():
- return jsonify({"error": "No replication configuration to resume"}), 404
- flash("No replication configuration to resume", "warning")
- elif action == "create":
- if not is_admin:
- if _wants_json():
- return jsonify({"error": "Only administrators can configure replication settings"}), 403
- flash("Only administrators can configure replication settings", "danger")
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
-
- from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL
- import time
-
- target_conn_id = request.form.get("target_connection_id")
- target_bucket = request.form.get("target_bucket", "").strip()
- replication_mode = request.form.get("replication_mode", REPLICATION_MODE_NEW_ONLY)
-
- if not target_conn_id or not target_bucket:
- if _wants_json():
- return jsonify({"error": "Target connection and bucket are required"}), 400
- flash("Target connection and bucket are required", "danger")
- else:
- rule = ReplicationRule(
- bucket_name=bucket_name,
- target_connection_id=target_conn_id,
- target_bucket=target_bucket,
- enabled=True,
- mode=replication_mode,
- created_at=time.time(),
- )
- _replication().set_rule(rule)
-
- if replication_mode == REPLICATION_MODE_ALL:
- _replication().replicate_existing_objects(bucket_name)
- message = "Replication configured. Existing objects are being replicated in the background."
- else:
- message = "Replication configured. Only new uploads will be replicated."
- if _wants_json():
- return jsonify({"success": True, "message": message, "action": "create", "enabled": True})
- flash(message, "success")
- else:
- if _wants_json():
- return jsonify({"error": "Invalid action"}), 400
- flash("Invalid action", "danger")
-
- return redirect(url_for("ui.bucket_detail", bucket_name=bucket_name, tab="replication"))
-
-
-@ui_bp.get("/buckets//replication/status")
-def get_replication_status(bucket_name: str):
- """Async endpoint to fetch replication sync status without blocking page load."""
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- rule = _replication().get_rule(bucket_name)
- if not rule:
- return jsonify({"error": "No replication rule"}), 404
-
- connection = _connections().get(rule.target_connection_id)
- endpoint_healthy = False
- endpoint_error = None
- if connection:
- endpoint_healthy = _replication().check_endpoint_health(connection)
- if not endpoint_healthy:
- endpoint_error = f"Cannot reach endpoint: {connection.endpoint_url}"
- else:
- endpoint_error = "Target connection not found"
-
- stats = None
- if endpoint_healthy:
- stats = _replication().get_sync_status(bucket_name)
-
- if not stats:
- return jsonify({
- "objects_synced": 0,
- "objects_pending": 0,
- "objects_orphaned": 0,
- "bytes_synced": 0,
- "last_sync_at": rule.stats.last_sync_at if rule.stats else None,
- "last_sync_key": rule.stats.last_sync_key if rule.stats else None,
- "endpoint_healthy": endpoint_healthy,
- "endpoint_error": endpoint_error,
- })
-
- return jsonify({
- "objects_synced": stats.objects_synced,
- "objects_pending": stats.objects_pending,
- "objects_orphaned": stats.objects_orphaned,
- "bytes_synced": stats.bytes_synced,
- "last_sync_at": stats.last_sync_at,
- "last_sync_key": stats.last_sync_key,
- "endpoint_healthy": endpoint_healthy,
- "endpoint_error": endpoint_error,
- })
-
-
-@ui_bp.get("/buckets//replication/failures")
-def get_replication_failures(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- limit = request.args.get("limit", 50, type=int)
- offset = request.args.get("offset", 0, type=int)
-
- failures = _replication().get_failed_items(bucket_name, limit, offset)
- total = _replication().get_failure_count(bucket_name)
-
- return jsonify({
- "failures": [f.to_dict() for f in failures],
- "total": total,
- "limit": limit,
- "offset": offset,
- })
-
-
-@ui_bp.post("/buckets//replication/failures//retry")
-def retry_replication_failure(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- success = _replication().retry_failed_item(bucket_name, object_key)
- if success:
- return jsonify({"status": "submitted", "object_key": object_key})
- return jsonify({"error": "Failed to submit retry"}), 400
-
-
-@ui_bp.post("/buckets//replication/failures/retry-all")
-def retry_all_replication_failures(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- result = _replication().retry_all_failed(bucket_name)
- return jsonify({
- "status": "submitted",
- "submitted": result["submitted"],
- "skipped": result["skipped"],
- })
-
-
-@ui_bp.delete("/buckets//replication/failures/")
-def dismiss_replication_failure(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- success = _replication().dismiss_failure(bucket_name, object_key)
- if success:
- return jsonify({"status": "dismissed", "object_key": object_key})
- return jsonify({"error": "Failure not found"}), 404
-
-
-@ui_bp.delete("/buckets//replication/failures")
-def clear_replication_failures(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "replication")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- _replication().clear_failures(bucket_name)
- return jsonify({"status": "cleared"})
-
-
-@ui_bp.get("/connections//health")
-def check_connection_health(connection_id: str):
- """Check if a connection endpoint is reachable."""
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- conn = _connections().get(connection_id)
- if not conn:
- return jsonify({"healthy": False, "error": "Connection not found"}), 404
-
- healthy = _replication().check_endpoint_health(conn)
- return jsonify({
- "healthy": healthy,
- "error": None if healthy else f"Cannot reach endpoint: {conn.endpoint_url}"
- })
-
-
-@ui_bp.get("/connections")
-def connections_dashboard():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- flash("Access denied", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- connections = _connections().list()
- return render_template("connections.html", connections=connections, principal=principal)
-
-
-@ui_bp.get("/website-domains")
-def website_domains_dashboard():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- flash("Access denied", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- flash("Website hosting is not enabled", "warning")
- return redirect(url_for("ui.buckets_overview"))
-
- store = current_app.extensions.get("website_domains")
- mappings = store.list_all() if store else []
- storage = _storage()
- buckets = [
- b.name for b in storage.list_buckets()
- if storage.get_bucket_website(b.name)
- ]
- return render_template(
- "website_domains.html",
- mappings=mappings,
- buckets=buckets,
- principal=principal,
- can_manage_iam=True,
- )
-
-
-@ui_bp.post("/website-domains/create")
-def create_website_domain():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
- if _wants_json():
- return jsonify({"error": "Website hosting is not enabled"}), 400
- flash("Website hosting is not enabled", "warning")
- return redirect(url_for("ui.buckets_overview"))
-
- domain = normalize_domain(request.form.get("domain") or "")
- bucket = (request.form.get("bucket") or "").strip()
-
- if not domain:
- if _wants_json():
- return jsonify({"error": "Domain is required"}), 400
- flash("Domain is required", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- if not is_valid_domain(domain):
- if _wants_json():
- return jsonify({"error": f"Invalid domain format: '{domain}'"}), 400
- flash(f"Invalid domain format: '{domain}'. Use a hostname like www.example.com", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- if not bucket:
- if _wants_json():
- return jsonify({"error": "Bucket is required"}), 400
- flash("Bucket is required", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- storage = _storage()
- if not storage.bucket_exists(bucket):
- if _wants_json():
- return jsonify({"error": f"Bucket '{bucket}' does not exist"}), 404
- flash(f"Bucket '{bucket}' does not exist", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- store = current_app.extensions.get("website_domains")
- if store.get_bucket(domain):
- if _wants_json():
- return jsonify({"error": f"Domain '{domain}' is already mapped"}), 409
- flash(f"Domain '{domain}' is already mapped", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- store.set_mapping(domain, bucket)
- if _wants_json():
- return jsonify({"success": True, "domain": domain, "bucket": bucket}), 201
- flash(f"Domain '{domain}' mapped to bucket '{bucket}'", "success")
- return redirect(url_for("ui.website_domains_dashboard"))
-
-
-@ui_bp.post("/website-domains//update")
-def update_website_domain(domain: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- domain = normalize_domain(domain)
- bucket = (request.form.get("bucket") or "").strip()
- if not bucket:
- if _wants_json():
- return jsonify({"error": "Bucket is required"}), 400
- flash("Bucket is required", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- storage = _storage()
- if not storage.bucket_exists(bucket):
- if _wants_json():
- return jsonify({"error": f"Bucket '{bucket}' does not exist"}), 404
- flash(f"Bucket '{bucket}' does not exist", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- store = current_app.extensions.get("website_domains")
- if not store.get_bucket(domain):
- if _wants_json():
- return jsonify({"error": f"No mapping for domain '{domain}'"}), 404
- flash(f"No mapping for domain '{domain}'", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
- store.set_mapping(domain, bucket)
- if _wants_json():
- return jsonify({"success": True, "domain": domain, "bucket": bucket})
- flash(f"Domain '{domain}' updated to bucket '{bucket}'", "success")
- return redirect(url_for("ui.website_domains_dashboard"))
-
-
-@ui_bp.post("/website-domains//delete")
-def delete_website_domain(domain: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- if _wants_json():
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- domain = normalize_domain(domain)
- store = current_app.extensions.get("website_domains")
- if not store.delete_mapping(domain):
- if _wants_json():
- return jsonify({"error": f"No mapping for domain '{domain}'"}), 404
- flash(f"No mapping for domain '{domain}'", "danger")
- return redirect(url_for("ui.website_domains_dashboard"))
-
- if _wants_json():
- return jsonify({"success": True})
- flash(f"Domain '{domain}' mapping deleted", "success")
- return redirect(url_for("ui.website_domains_dashboard"))
-
-
-@ui_bp.get("/metrics")
-def metrics_dashboard():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- flash("Access denied: Metrics require admin permissions", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- from app.version import APP_VERSION
- import time
-
- cpu_percent = psutil.cpu_percent(interval=0.1)
- memory = psutil.virtual_memory()
-
- storage_root = current_app.config["STORAGE_ROOT"]
- disk = psutil.disk_usage(storage_root)
-
- storage = _storage()
- buckets = storage.list_buckets()
- total_buckets = len(buckets)
-
- total_objects = 0
- total_bytes_used = 0
- total_versions = 0
-
- cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
- for bucket in buckets:
- stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl)
- total_objects += stats.get("total_objects", stats.get("objects", 0))
- total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
- total_versions += stats.get("version_count", 0)
-
- boot_time = psutil.boot_time()
- uptime_seconds = time.time() - boot_time
- uptime_days = int(uptime_seconds / 86400)
-
- return render_template(
- "metrics.html",
- principal=principal,
- cpu_percent=round(cpu_percent, 2),
- memory={
- "total": _format_bytes(memory.total),
- "available": _format_bytes(memory.available),
- "used": _format_bytes(memory.used),
- "percent": round(memory.percent, 2),
- },
- disk={
- "total": _format_bytes(disk.total),
- "free": _format_bytes(disk.free),
- "used": _format_bytes(disk.used),
- "percent": round(disk.percent, 2),
- },
- app={
- "buckets": total_buckets,
- "objects": total_objects,
- "versions": total_versions,
- "storage_used": _format_bytes(total_bytes_used),
- "storage_raw": total_bytes_used,
- "version": APP_VERSION,
- "uptime_days": uptime_days,
- },
- metrics_history_enabled=current_app.config.get("METRICS_HISTORY_ENABLED", False),
- operation_metrics_enabled=current_app.config.get("OPERATION_METRICS_ENABLED", False),
- )
-
-
-@ui_bp.route("/metrics/api")
-def metrics_api():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- import time
-
- cpu_percent = psutil.cpu_percent(interval=0.1)
- memory = psutil.virtual_memory()
-
- storage_root = current_app.config["STORAGE_ROOT"]
- disk = psutil.disk_usage(storage_root)
-
- storage = _storage()
- buckets = storage.list_buckets()
- total_buckets = len(buckets)
-
- total_objects = 0
- total_bytes_used = 0
- total_versions = 0
-
- cache_ttl = current_app.config.get("BUCKET_STATS_CACHE_TTL", 60)
- for bucket in buckets:
- stats = storage.bucket_stats(bucket.name, cache_ttl=cache_ttl)
- total_objects += stats.get("total_objects", stats.get("objects", 0))
- total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
- total_versions += stats.get("version_count", 0)
-
- boot_time = psutil.boot_time()
- uptime_seconds = time.time() - boot_time
- uptime_days = int(uptime_seconds / 86400)
-
- return jsonify({
- "cpu_percent": round(cpu_percent, 2),
- "memory": {
- "total": _format_bytes(memory.total),
- "available": _format_bytes(memory.available),
- "used": _format_bytes(memory.used),
- "percent": round(memory.percent, 2),
- },
- "disk": {
- "total": _format_bytes(disk.total),
- "free": _format_bytes(disk.free),
- "used": _format_bytes(disk.used),
- "percent": round(disk.percent, 2),
- },
- "app": {
- "buckets": total_buckets,
- "objects": total_objects,
- "versions": total_versions,
- "storage_used": _format_bytes(total_bytes_used),
- "storage_raw": total_bytes_used,
- "uptime_days": uptime_days,
- }
- })
-
-
-@ui_bp.route("/metrics/history")
-def metrics_history():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- system_metrics = current_app.extensions.get("system_metrics")
- if not system_metrics:
- return jsonify({"enabled": False, "history": []})
-
- hours = request.args.get("hours", type=int)
- if hours is None:
- hours = current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24)
-
- history = system_metrics.get_history(hours=hours)
-
- return jsonify({
- "enabled": True,
- "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
- "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
- "history": history,
- })
-
-
-@ui_bp.route("/metrics/settings", methods=["GET", "PUT"])
-def metrics_settings():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- if request.method == "GET":
- return jsonify({
- "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False),
- "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
- "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
- })
-
- data = request.get_json() or {}
-
- if "enabled" in data:
- current_app.config["METRICS_HISTORY_ENABLED"] = bool(data["enabled"])
- if "retention_hours" in data:
- current_app.config["METRICS_HISTORY_RETENTION_HOURS"] = max(1, int(data["retention_hours"]))
- if "interval_minutes" in data:
- current_app.config["METRICS_HISTORY_INTERVAL_MINUTES"] = max(1, int(data["interval_minutes"]))
-
- return jsonify({
- "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False),
- "retention_hours": current_app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
- "interval_minutes": current_app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
- })
-
-
-@ui_bp.get("/metrics/operations")
-def metrics_operations():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- collector = _operation_metrics()
- if not collector:
- return jsonify({
- "enabled": False,
- "stats": None,
- })
-
- return jsonify({
- "enabled": True,
- "stats": collector.get_current_stats(),
- })
-
-
-@ui_bp.get("/metrics/operations/history")
-def metrics_operations_history():
- principal = _current_principal()
-
- try:
- _iam().authorize(principal, None, "iam:list_users")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- collector = _operation_metrics()
- if not collector:
- return jsonify({
- "enabled": False,
- "history": [],
- })
-
- hours = request.args.get("hours", type=int)
- return jsonify({
- "enabled": True,
- "history": collector.get_history(hours),
- "interval_minutes": current_app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
- })
-
-
-@ui_bp.route("/buckets//lifecycle", methods=["GET", "POST", "DELETE"])
-def bucket_lifecycle(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "lifecycle")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- except (PermissionError, RuntimeError) as exc:
- return jsonify({"error": str(exc)}), 403
-
- if request.method == "GET":
- try:
- resp = client.get_bucket_lifecycle_configuration(Bucket=bucket_name)
- rules = resp.get("Rules", [])
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- if code == "NoSuchLifecycleConfiguration":
- rules = []
- else:
- err, status = handle_client_error(exc)
- return jsonify(err), status
- except (EndpointConnectionError, ConnectionClosedError) as exc:
- return jsonify(*handle_connection_error(exc))
- return jsonify({"rules": rules})
-
- if request.method == "DELETE":
- try:
- client.delete_bucket_lifecycle(Bucket=bucket_name)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- return jsonify({"status": "ok", "message": "Lifecycle configuration deleted"})
-
- payload = request.get_json(silent=True) or {}
- rules = payload.get("rules", [])
- if not isinstance(rules, list):
- return jsonify({"error": "rules must be a list"}), 400
-
- validated_rules = []
- for i, rule in enumerate(rules):
- if not isinstance(rule, dict):
- return jsonify({"error": f"Rule {i} must be an object"}), 400
- validated = {
- "ID": str(rule.get("ID", f"rule-{i+1}")),
- "Status": "Enabled" if rule.get("Status", "Enabled") == "Enabled" else "Disabled",
- }
- filt = {}
- if rule.get("Prefix"):
- filt["Prefix"] = str(rule["Prefix"])
- if filt:
- validated["Filter"] = filt
- if rule.get("Expiration"):
- exp = rule["Expiration"]
- if isinstance(exp, dict) and exp.get("Days"):
- validated["Expiration"] = {"Days": int(exp["Days"])}
- if rule.get("NoncurrentVersionExpiration"):
- nve = rule["NoncurrentVersionExpiration"]
- if isinstance(nve, dict) and nve.get("NoncurrentDays"):
- validated["NoncurrentVersionExpiration"] = {"NoncurrentDays": int(nve["NoncurrentDays"])}
- if rule.get("AbortIncompleteMultipartUpload"):
- aimu = rule["AbortIncompleteMultipartUpload"]
- if isinstance(aimu, dict) and aimu.get("DaysAfterInitiation"):
- validated["AbortIncompleteMultipartUpload"] = {"DaysAfterInitiation": int(aimu["DaysAfterInitiation"])}
- validated_rules.append(validated)
-
- try:
- if validated_rules:
- client.put_bucket_lifecycle_configuration(
- Bucket=bucket_name,
- LifecycleConfiguration={"Rules": validated_rules},
- )
- else:
- client.delete_bucket_lifecycle(Bucket=bucket_name)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- return jsonify({"status": "ok", "message": "Lifecycle configuration saved", "rules": validated_rules})
-
-
-@ui_bp.get("/buckets//lifecycle/history")
-def get_lifecycle_history(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "lifecycle")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- limit = request.args.get("limit", 50, type=int)
- offset = request.args.get("offset", 0, type=int)
-
- lifecycle_manager = current_app.extensions.get("lifecycle")
- if not lifecycle_manager:
- return jsonify({
- "executions": [],
- "total": 0,
- "limit": limit,
- "offset": offset,
- "enabled": False,
- })
-
- records = lifecycle_manager.get_execution_history(bucket_name, limit, offset)
- return jsonify({
- "executions": [r.to_dict() for r in records],
- "total": len(lifecycle_manager.get_execution_history(bucket_name, 1000, 0)),
- "limit": limit,
- "offset": offset,
- "enabled": True,
- })
-
-
-@ui_bp.route("/buckets//cors", methods=["GET", "POST", "DELETE"])
-def bucket_cors(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "cors")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- except (PermissionError, RuntimeError) as exc:
- return jsonify({"error": str(exc)}), 403
-
- if request.method == "GET":
- try:
- resp = client.get_bucket_cors(Bucket=bucket_name)
- rules = resp.get("CORSRules", [])
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- if code == "NoSuchCORSConfiguration":
- rules = []
- else:
- err, status = handle_client_error(exc)
- return jsonify(err), status
- except (EndpointConnectionError, ConnectionClosedError) as exc:
- return jsonify(*handle_connection_error(exc))
- return jsonify({"rules": rules})
-
- if request.method == "DELETE":
- try:
- client.delete_bucket_cors(Bucket=bucket_name)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- return jsonify({"status": "ok", "message": "CORS configuration deleted"})
-
- payload = request.get_json(silent=True) or {}
- rules = payload.get("rules", [])
- if not isinstance(rules, list):
- return jsonify({"error": "rules must be a list"}), 400
-
- validated_rules = []
- for i, rule in enumerate(rules):
- if not isinstance(rule, dict):
- return jsonify({"error": f"Rule {i} must be an object"}), 400
- origins = rule.get("AllowedOrigins", [])
- methods = rule.get("AllowedMethods", [])
- if not origins or not methods:
- return jsonify({"error": f"Rule {i} must have AllowedOrigins and AllowedMethods"}), 400
- validated = {
- "AllowedOrigins": [str(o) for o in origins if o],
- "AllowedMethods": [str(m).upper() for m in methods if m],
- }
- if rule.get("AllowedHeaders"):
- validated["AllowedHeaders"] = [str(h) for h in rule["AllowedHeaders"] if h]
- if rule.get("ExposeHeaders"):
- validated["ExposeHeaders"] = [str(h) for h in rule["ExposeHeaders"] if h]
- if rule.get("MaxAgeSeconds") is not None:
- try:
- validated["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"])
- except (ValueError, TypeError):
- pass
- validated_rules.append(validated)
-
- try:
- if validated_rules:
- client.put_bucket_cors(
- Bucket=bucket_name,
- CORSConfiguration={"CORSRules": validated_rules},
- )
- else:
- client.delete_bucket_cors(Bucket=bucket_name)
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
- return jsonify({"status": "ok", "message": "CORS configuration saved", "rules": validated_rules})
-
-
-@ui_bp.route("/buckets//acl", methods=["GET", "POST"])
-def bucket_acl(bucket_name: str):
- principal = _current_principal()
- action = "read" if request.method == "GET" else "write"
- try:
- _authorize_ui(principal, bucket_name, action)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- except (PermissionError, RuntimeError) as exc:
- return jsonify({"error": str(exc)}), 403
-
- owner_id = principal.access_key if principal else "anonymous"
-
- if request.method == "GET":
- try:
- resp = client.get_bucket_acl(Bucket=bucket_name)
- owner = resp.get("Owner", {}).get("ID", owner_id)
- grants = []
- for grant in resp.get("Grants", []):
- grantee = grant.get("Grantee", {})
- grantee_display = grantee.get("DisplayName") or grantee.get("ID", "")
- if not grantee_display:
- uri = grantee.get("URI", "")
- if "AllUsers" in uri:
- grantee_display = "Everyone (public)"
- elif "AuthenticatedUsers" in uri:
- grantee_display = "Authenticated users"
- else:
- grantee_display = uri or "unknown"
- grants.append({
- "grantee": grantee_display,
- "permission": grant.get("Permission", ""),
- })
- return jsonify({
- "owner": owner,
- "grants": grants,
- "canned_acls": list(CANNED_ACLS.keys()),
- })
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
- payload = request.get_json(silent=True) or {}
- canned_acl = payload.get("canned_acl")
- if canned_acl:
- if canned_acl not in CANNED_ACLS:
- return jsonify({"error": f"Invalid canned ACL: {canned_acl}"}), 400
- try:
- client.put_bucket_acl(Bucket=bucket_name, ACL=canned_acl)
- return jsonify({"status": "ok", "message": f"ACL set to {canned_acl}"})
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
- return jsonify({"error": "canned_acl is required"}), 400
-
-
-@ui_bp.route("/buckets//objects//tags", methods=["GET", "POST"])
-def object_tags(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- except (PermissionError, RuntimeError) as exc:
- return jsonify({"error": str(exc)}), 403
-
- if request.method == "GET":
- try:
- resp = client.get_object_tagging(Bucket=bucket_name, Key=object_key)
- tags = resp.get("TagSet", [])
- return jsonify({"tags": tags})
- except ClientError as exc:
- code = exc.response.get("Error", {}).get("Code", "")
- if code == "NoSuchKey":
- return jsonify({"error": "Object not found"}), 404
- err, status = handle_client_error(exc)
- return jsonify(err), status
- except (EndpointConnectionError, ConnectionClosedError) as exc:
- return jsonify(*handle_connection_error(exc))
-
- try:
- _authorize_ui(principal, bucket_name, "write", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- payload = request.get_json(silent=True) or {}
- tags = payload.get("tags", [])
- if not isinstance(tags, list):
- return jsonify({"error": "tags must be a list"}), 400
- tag_limit = current_app.config.get("OBJECT_TAG_LIMIT", 50)
- if len(tags) > tag_limit:
- return jsonify({"error": f"Maximum {tag_limit} tags allowed"}), 400
-
- validated_tags = []
- for i, tag in enumerate(tags):
- if not isinstance(tag, dict) or not tag.get("Key"):
- return jsonify({"error": f"Tag at index {i} must have a Key field"}), 400
- validated_tags.append({
- "Key": str(tag["Key"]),
- "Value": str(tag.get("Value", ""))
- })
-
- try:
- if validated_tags:
- client.put_object_tagging(
- Bucket=bucket_name,
- Key=object_key,
- Tagging={"TagSet": validated_tags},
- )
- else:
- client.delete_object_tagging(Bucket=bucket_name, Key=object_key)
- return jsonify({"status": "ok", "message": "Tags saved", "tags": validated_tags})
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.post("/buckets//folders")
-def create_folder(bucket_name: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "write")
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- payload = request.get_json(silent=True) or {}
- folder_name = str(payload.get("folder_name", "")).strip()
- prefix = str(payload.get("prefix", "")).strip()
-
- if not folder_name:
- return jsonify({"error": "folder_name is required"}), 400
-
- folder_name = folder_name.rstrip("/")
- if "/" in folder_name:
- return jsonify({"error": "Folder name cannot contain /"}), 400
- if "\x00" in folder_name or "\x00" in prefix:
- return jsonify({"error": "Null bytes not allowed"}), 400
- if ".." in prefix.split("/"):
- return jsonify({"error": "Invalid prefix"}), 400
-
- folder_key = f"{prefix}{folder_name}/" if prefix else f"{folder_name}/"
-
- max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024)
- if len(folder_key.encode("utf-8")) > max_key_len:
- return jsonify({"error": f"Key exceeds maximum length of {max_key_len} bytes"}), 400
-
- try:
- client = get_session_s3_client()
- client.put_object(Bucket=bucket_name, Key=folder_key, Body=b"")
- return jsonify({"status": "ok", "message": f"Folder '{folder_name}' created", "key": folder_key})
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.post("/buckets//objects//copy")
-def copy_object(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- payload = request.get_json(silent=True) or {}
- dest_bucket = str(payload.get("dest_bucket", bucket_name)).strip()
- dest_key = str(payload.get("dest_key", "")).strip()
-
- if not dest_key:
- return jsonify({"error": "dest_key is required"}), 400
- if "\x00" in dest_key:
- return jsonify({"error": "Destination key cannot contain null bytes"}), 400
- max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024)
- if len(dest_key.encode("utf-8")) > max_key_len:
- return jsonify({"error": f"Destination key exceeds maximum length of {max_key_len} bytes"}), 400
-
- try:
- _authorize_ui(principal, dest_bucket, "write", object_key=dest_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- client.copy_object(
- Bucket=dest_bucket,
- Key=dest_key,
- CopySource={"Bucket": bucket_name, "Key": object_key},
- )
- return jsonify({
- "status": "ok",
- "message": f"Copied to {dest_bucket}/{dest_key}",
- "dest_bucket": dest_bucket,
- "dest_key": dest_key,
- })
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
-
-@ui_bp.post("/buckets//objects//move")
-def move_object(bucket_name: str, object_key: str):
- principal = _current_principal()
- try:
- _authorize_ui(principal, bucket_name, "read", object_key=object_key)
- _authorize_ui(principal, bucket_name, "delete", object_key=object_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- payload = request.get_json(silent=True) or {}
- dest_bucket = str(payload.get("dest_bucket", bucket_name)).strip()
- dest_key = str(payload.get("dest_key", "")).strip()
-
- if not dest_key:
- return jsonify({"error": "dest_key is required"}), 400
- if "\x00" in dest_key:
- return jsonify({"error": "Destination key cannot contain null bytes"}), 400
- max_key_len = current_app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024)
- if len(dest_key.encode("utf-8")) > max_key_len:
- return jsonify({"error": f"Destination key exceeds maximum length of {max_key_len} bytes"}), 400
-
- if dest_bucket == bucket_name and dest_key == object_key:
- return jsonify({"error": "Cannot move object to the same location"}), 400
-
- try:
- _authorize_ui(principal, dest_bucket, "write", object_key=dest_key)
- except IamError as exc:
- return jsonify({"error": str(exc)}), 403
-
- try:
- client = get_session_s3_client()
- client.copy_object(
- Bucket=dest_bucket,
- Key=dest_key,
- CopySource={"Bucket": bucket_name, "Key": object_key},
- )
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
- if isinstance(exc, ClientError):
- err, status = handle_client_error(exc)
- return jsonify(err), status
- return jsonify(*handle_connection_error(exc))
-
- try:
- client.delete_object(Bucket=bucket_name, Key=object_key)
- except (ClientError, EndpointConnectionError, ConnectionClosedError):
- return jsonify({
- "status": "partial",
- "message": f"Copied to {dest_bucket}/{dest_key} but failed to delete source",
- "dest_bucket": dest_bucket,
- "dest_key": dest_key,
- }), 200
-
- return jsonify({
- "status": "ok",
- "message": f"Moved to {dest_bucket}/{dest_key}",
- "dest_bucket": dest_bucket,
- "dest_key": dest_key,
- })
-
-
-@ui_bp.get("/buckets//list-for-copy")
-def list_buckets_for_copy(bucket_name: str):
- principal = _current_principal()
- try:
- client = get_session_s3_client()
- resp = client.list_buckets()
- except PermissionError as exc:
- return jsonify({"error": str(exc)}), 401
- except ClientError as exc:
- return jsonify(*handle_client_error(exc))
- except (EndpointConnectionError, ConnectionClosedError) as exc:
- return jsonify(*handle_connection_error(exc))
- allowed = []
- for b in resp.get("Buckets", []):
- try:
- _authorize_ui(principal, b["Name"], "write")
- allowed.append(b["Name"])
- except IamError:
- pass
- return jsonify({"buckets": allowed})
-
-
-@ui_bp.get("/sites")
-def sites_dashboard():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- flash("Access denied: Site management requires admin permissions", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- registry = _site_registry()
- local_site = registry.get_local_site()
- peers = registry.list_peers()
- connections = _connections().list()
-
- replication = _replication()
- all_rules = replication.list_rules()
-
- peers_with_stats = []
- for peer in peers:
- buckets_syncing = 0
- has_bidirectional = False
- if peer.connection_id:
- for rule in all_rules:
- if rule.target_connection_id == peer.connection_id:
- buckets_syncing += 1
- if rule.mode == "bidirectional":
- has_bidirectional = True
- peers_with_stats.append({
- "peer": peer,
- "buckets_syncing": buckets_syncing,
- "has_connection": bool(peer.connection_id),
- "has_bidirectional": has_bidirectional,
- })
-
- return render_template(
- "sites.html",
- principal=principal,
- local_site=local_site,
- peers=peers,
- peers_with_stats=peers_with_stats,
- connections=connections,
- config_site_id=current_app.config.get("SITE_ID"),
- config_site_endpoint=current_app.config.get("SITE_ENDPOINT"),
- config_site_region=current_app.config.get("SITE_REGION", "us-east-1"),
- )
-
-
-@ui_bp.post("/sites/local")
-def update_local_site():
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- if wants_json:
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- site_id = request.form.get("site_id", "").strip()
- endpoint = request.form.get("endpoint", "").strip()
- region = request.form.get("region", "us-east-1").strip()
- priority = request.form.get("priority", "100")
- display_name = request.form.get("display_name", "").strip()
-
- if not site_id:
- if wants_json:
- return jsonify({"error": "Site ID is required"}), 400
- flash("Site ID is required", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- try:
- priority_int = int(priority)
- except ValueError:
- priority_int = 100
-
- registry = _site_registry()
- existing = registry.get_local_site()
-
- site = SiteInfo(
- site_id=site_id,
- endpoint=endpoint,
- region=region,
- priority=priority_int,
- display_name=display_name or site_id,
- created_at=existing.created_at if existing else None,
- )
- registry.set_local_site(site)
-
- if wants_json:
- return jsonify({"message": "Local site configuration updated"})
- flash("Local site configuration updated", "success")
- return redirect(url_for("ui.sites_dashboard"))
-
-
-@ui_bp.post("/sites/peers")
-def add_peer_site():
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- if wants_json:
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- site_id = request.form.get("site_id", "").strip()
- endpoint = request.form.get("endpoint", "").strip()
- region = request.form.get("region", "us-east-1").strip()
- priority = request.form.get("priority", "100")
- display_name = request.form.get("display_name", "").strip()
- connection_id = request.form.get("connection_id", "").strip() or None
-
- if not site_id:
- if wants_json:
- return jsonify({"error": "Site ID is required"}), 400
- flash("Site ID is required", "danger")
- return redirect(url_for("ui.sites_dashboard"))
- if not endpoint:
- if wants_json:
- return jsonify({"error": "Endpoint is required"}), 400
- flash("Endpoint is required", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- try:
- priority_int = int(priority)
- except ValueError:
- priority_int = 100
-
- registry = _site_registry()
-
- if registry.get_peer(site_id):
- if wants_json:
- return jsonify({"error": f"Peer site '{site_id}' already exists"}), 409
- flash(f"Peer site '{site_id}' already exists", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- if connection_id and not _connections().get(connection_id):
- if wants_json:
- return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
- flash(f"Connection '{connection_id}' not found", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- peer = PeerSite(
- site_id=site_id,
- endpoint=endpoint,
- region=region,
- priority=priority_int,
- display_name=display_name or site_id,
- connection_id=connection_id,
- )
- registry.add_peer(peer)
-
- if wants_json:
- redirect_url = None
- if connection_id:
- redirect_url = url_for("ui.replication_wizard", site_id=site_id)
- return jsonify({"message": f"Peer site '{site_id}' added", "redirect": redirect_url})
- flash(f"Peer site '{site_id}' added", "success")
-
- if connection_id:
- return redirect(url_for("ui.replication_wizard", site_id=site_id))
- return redirect(url_for("ui.sites_dashboard"))
-
-
-@ui_bp.post("/sites/peers//update")
-def update_peer_site(site_id: str):
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- if wants_json:
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- registry = _site_registry()
- existing = registry.get_peer(site_id)
-
- if not existing:
- if wants_json:
- return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
- flash(f"Peer site '{site_id}' not found", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- endpoint = request.form.get("endpoint", existing.endpoint).strip()
- region = request.form.get("region", existing.region).strip()
- priority = request.form.get("priority", str(existing.priority))
- display_name = request.form.get("display_name", existing.display_name).strip()
- if "connection_id" in request.form:
- connection_id = request.form["connection_id"].strip() or None
- else:
- connection_id = existing.connection_id
-
- try:
- priority_int = int(priority)
- except ValueError:
- priority_int = existing.priority
-
- if connection_id and not _connections().get(connection_id):
- if wants_json:
- return jsonify({"error": f"Connection '{connection_id}' not found"}), 404
- flash(f"Connection '{connection_id}' not found", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- peer = PeerSite(
- site_id=site_id,
- endpoint=endpoint,
- region=region,
- priority=priority_int,
- display_name=display_name or site_id,
- connection_id=connection_id,
- created_at=existing.created_at,
- is_healthy=existing.is_healthy,
- last_health_check=existing.last_health_check,
- )
- registry.update_peer(peer)
-
- if wants_json:
- return jsonify({"message": f"Peer site '{site_id}' updated"})
- flash(f"Peer site '{site_id}' updated", "success")
- return redirect(url_for("ui.sites_dashboard"))
-
-
-@ui_bp.post("/sites/peers//delete")
-def delete_peer_site(site_id: str):
- principal = _current_principal()
- wants_json = request.headers.get("X-Requested-With") == "XMLHttpRequest"
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- if wants_json:
- return jsonify({"error": "Access denied"}), 403
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- registry = _site_registry()
- if registry.delete_peer(site_id):
- if wants_json:
- return jsonify({"message": f"Peer site '{site_id}' deleted"})
- flash(f"Peer site '{site_id}' deleted", "success")
- else:
- if wants_json:
- return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
- flash(f"Peer site '{site_id}' not found", "danger")
-
- return redirect(url_for("ui.sites_dashboard"))
-
-
-@ui_bp.get("/sites/peers//health")
-def check_peer_site_health(site_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
-
- if not peer:
- return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
-
- is_healthy = False
- error_message = None
-
- if peer.connection_id:
- connection = _connections().get(peer.connection_id)
- if connection:
- is_healthy = _replication().check_endpoint_health(connection)
- else:
- error_message = f"Connection '{peer.connection_id}' not found"
- else:
- error_message = "No connection configured for this peer"
-
- registry.update_health(site_id, is_healthy)
-
- result = {
- "site_id": site_id,
- "is_healthy": is_healthy,
- }
- if error_message:
- result["error"] = error_message
-
- return jsonify(result)
-
-
-@ui_bp.get("/sites/peers//bidirectional-status")
-def check_peer_bidirectional_status(site_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
-
- if not peer:
- return jsonify({"error": f"Peer site '{site_id}' not found"}), 404
-
- local_site = registry.get_local_site()
- replication = _replication()
- local_rules = replication.list_rules()
-
- local_bidir_rules = []
- for rule in local_rules:
- if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
- local_bidir_rules.append({
- "bucket_name": rule.bucket_name,
- "target_bucket": rule.target_bucket,
- "enabled": rule.enabled,
- })
-
- result = {
- "site_id": site_id,
- "local_site_id": local_site.site_id if local_site else None,
- "local_endpoint": local_site.endpoint if local_site else None,
- "local_bidirectional_rules": local_bidir_rules,
- "local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
- "remote_status": None,
- "issues": [],
- "is_fully_configured": False,
- }
-
- if not local_site or not local_site.site_id:
- result["issues"].append({
- "code": "NO_LOCAL_SITE_ID",
- "message": "Local site identity not configured",
- "severity": "error",
- })
-
- if not local_site or not local_site.endpoint:
- result["issues"].append({
- "code": "NO_LOCAL_ENDPOINT",
- "message": "Local site endpoint not configured (remote site cannot reach back)",
- "severity": "error",
- })
-
- if not peer.connection_id:
- result["issues"].append({
- "code": "NO_CONNECTION",
- "message": "No connection configured for this peer",
- "severity": "error",
- })
- return jsonify(result)
-
- connection = _connections().get(peer.connection_id)
- if not connection:
- result["issues"].append({
- "code": "CONNECTION_NOT_FOUND",
- "message": f"Connection '{peer.connection_id}' not found",
- "severity": "error",
- })
- return jsonify(result)
-
- if not local_bidir_rules:
- result["issues"].append({
- "code": "NO_LOCAL_BIDIRECTIONAL_RULES",
- "message": "No bidirectional replication rules configured on this site",
- "severity": "warning",
- })
-
- if not result["local_site_sync_enabled"]:
- result["issues"].append({
- "code": "SITE_SYNC_DISABLED",
- "message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
- "severity": "warning",
- })
-
- if not replication.check_endpoint_health(connection):
- result["issues"].append({
- "code": "REMOTE_UNREACHABLE",
- "message": "Remote endpoint is not reachable",
- "severity": "error",
- })
- return jsonify(result)
-
- try:
- parsed = urlparse(peer.endpoint)
- hostname = parsed.hostname or ""
- import ipaddress
- cloud_metadata_hosts = {"metadata.google.internal", "169.254.169.254"}
- if hostname.lower() in cloud_metadata_hosts:
- result["issues"].append({
- "code": "ENDPOINT_NOT_ALLOWED",
- "message": "Peer endpoint points to cloud metadata service (SSRF protection)",
- "severity": "error",
- })
- return jsonify(result)
- allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
- if not allow_internal:
- try:
- ip = ipaddress.ip_address(hostname)
- if ip.is_private or ip.is_loopback or ip.is_reserved or ip.is_link_local:
- result["issues"].append({
- "code": "ENDPOINT_NOT_ALLOWED",
- "message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)",
- "severity": "error",
- })
- return jsonify(result)
- except ValueError:
- blocked_patterns = ["localhost", "127.", "10.", "192.168.", "172.16."]
- if any(hostname.startswith(p) or hostname == p.rstrip(".") for p in blocked_patterns):
- result["issues"].append({
- "code": "ENDPOINT_NOT_ALLOWED",
- "message": "Peer endpoint points to internal or private address (set ALLOW_INTERNAL_ENDPOINTS=true for self-hosted deployments)",
- "severity": "error",
- })
- return jsonify(result)
- except Exception:
- pass
-
- try:
- admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
- resp = requests.get(
- admin_url,
- timeout=10,
- headers={
- "Accept": "application/json",
- "X-Access-Key": connection.access_key,
- "X-Secret-Key": connection.secret_key,
- },
- )
-
- if resp.status_code == 200:
- try:
- remote_data = resp.json()
- if not isinstance(remote_data, dict):
- raise ValueError("Expected JSON object")
- remote_local = remote_data.get("local")
- if remote_local is not None and not isinstance(remote_local, dict):
- raise ValueError("Expected 'local' to be an object")
- remote_peers = remote_data.get("peers", [])
- if not isinstance(remote_peers, list):
- raise ValueError("Expected 'peers' to be a list")
- except (ValueError, json.JSONDecodeError) as e:
- result["remote_status"] = {"reachable": True, "invalid_response": True}
- result["issues"].append({
- "code": "REMOTE_INVALID_RESPONSE",
- "message": "Remote admin API returned invalid JSON",
- "severity": "warning",
- })
- return jsonify(result)
-
- result["remote_status"] = {
- "reachable": True,
- "local_site": remote_local,
- "site_sync_enabled": None,
- "has_peer_for_us": False,
- "peer_connection_configured": False,
- "has_bidirectional_rules_for_us": False,
- }
-
- for rp in remote_peers:
- if not isinstance(rp, dict):
- continue
- if local_site and (
- rp.get("site_id") == local_site.site_id or
- rp.get("endpoint") == local_site.endpoint
- ):
- result["remote_status"]["has_peer_for_us"] = True
- result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
- break
-
- if not result["remote_status"]["has_peer_for_us"]:
- result["issues"].append({
- "code": "REMOTE_NO_PEER_FOR_US",
- "message": "Remote site does not have this site registered as a peer",
- "severity": "error",
- })
- elif not result["remote_status"]["peer_connection_configured"]:
- result["issues"].append({
- "code": "REMOTE_NO_CONNECTION_FOR_US",
- "message": "Remote site has us as peer but no connection configured (cannot push back)",
- "severity": "error",
- })
- elif resp.status_code == 401 or resp.status_code == 403:
- result["remote_status"] = {
- "reachable": True,
- "admin_access_denied": True,
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_ACCESS_DENIED",
- "message": "Cannot verify remote configuration (admin access denied)",
- "severity": "warning",
- })
- else:
- result["remote_status"] = {
- "reachable": True,
- "admin_api_error": resp.status_code,
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_API_ERROR",
- "message": f"Remote admin API returned status {resp.status_code}",
- "severity": "warning",
- })
- except requests.RequestException:
- result["remote_status"] = {
- "reachable": False,
- "error": "Connection failed",
- }
- result["issues"].append({
- "code": "REMOTE_ADMIN_UNREACHABLE",
- "message": "Could not reach remote admin API",
- "severity": "warning",
- })
- except Exception:
- result["issues"].append({
- "code": "VERIFICATION_ERROR",
- "message": "Internal error during verification",
- "severity": "warning",
- })
-
- error_issues = [i for i in result["issues"] if i["severity"] == "error"]
- result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
-
- return jsonify(result)
-
-
-@ui_bp.get("/sites/peers//replication-wizard")
-def replication_wizard(site_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
- if not peer:
- flash(f"Peer site '{site_id}' not found", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- if not peer.connection_id:
- flash("This peer has no connection configured. Add a connection first to set up replication.", "warning")
- return redirect(url_for("ui.sites_dashboard"))
-
- connection = _connections().get(peer.connection_id)
- if not connection:
- flash(f"Connection '{peer.connection_id}' not found", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- buckets = _storage().list_buckets()
- replication = _replication()
-
- bucket_info = []
- for bucket in buckets:
- existing_rule = replication.get_rule(bucket.name)
- has_rule_for_peer = (
- existing_rule and
- existing_rule.target_connection_id == peer.connection_id
- )
- bucket_info.append({
- "name": bucket.name,
- "has_rule": has_rule_for_peer,
- "existing_mode": existing_rule.mode if has_rule_for_peer else None,
- "existing_target": existing_rule.target_bucket if has_rule_for_peer else None,
- })
-
- local_site = registry.get_local_site()
-
- return render_template(
- "replication_wizard.html",
- principal=principal,
- peer=peer,
- connection=connection,
- buckets=bucket_info,
- local_site=local_site,
- csrf_token=generate_csrf,
- )
-
-
-@ui_bp.post("/sites/peers//replication-rules")
-def create_peer_replication_rules(site_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- flash("Access denied", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
- if not peer or not peer.connection_id:
- flash("Invalid peer site or no connection configured", "danger")
- return redirect(url_for("ui.sites_dashboard"))
-
- from .replication import REPLICATION_MODE_NEW_ONLY, REPLICATION_MODE_ALL
- import time as time_module
-
- selected_buckets = request.form.getlist("buckets")
- mode = request.form.get("mode", REPLICATION_MODE_NEW_ONLY)
-
- if not selected_buckets:
- flash("No buckets selected", "warning")
- return redirect(url_for("ui.sites_dashboard"))
-
- created = 0
- failed = 0
- replication = _replication()
-
- for bucket_name in selected_buckets:
- target_bucket = request.form.get(f"target_{bucket_name}", bucket_name).strip()
- if not target_bucket:
- target_bucket = bucket_name
-
- try:
- rule = ReplicationRule(
- bucket_name=bucket_name,
- target_connection_id=peer.connection_id,
- target_bucket=target_bucket,
- enabled=True,
- mode=mode,
- created_at=time_module.time(),
- )
- replication.set_rule(rule)
-
- if mode == REPLICATION_MODE_ALL:
- replication.replicate_existing_objects(bucket_name)
-
- created += 1
- except Exception:
- failed += 1
-
- if created > 0:
- flash(f"Created {created} replication rule(s) for {peer.display_name or peer.site_id}", "success")
- if failed > 0:
- flash(f"Failed to create {failed} rule(s)", "danger")
-
- return redirect(url_for("ui.sites_dashboard"))
-
-
-@ui_bp.get("/sites/peers//sync-stats")
-def get_peer_sync_stats(site_id: str):
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- registry = _site_registry()
- peer = registry.get_peer(site_id)
- if not peer:
- return jsonify({"error": "Peer not found"}), 404
-
- if not peer.connection_id:
- return jsonify({"error": "No connection configured"}), 400
-
- replication = _replication()
- all_rules = replication.list_rules()
-
- stats = {
- "buckets_syncing": 0,
- "objects_synced": 0,
- "objects_pending": 0,
- "objects_failed": 0,
- "bytes_synced": 0,
- "last_sync_at": None,
- "buckets": [],
- }
-
- for rule in all_rules:
- if rule.target_connection_id != peer.connection_id:
- continue
-
- stats["buckets_syncing"] += 1
-
- bucket_stats = {
- "bucket_name": rule.bucket_name,
- "target_bucket": rule.target_bucket,
- "mode": rule.mode,
- "enabled": rule.enabled,
- }
-
- if rule.stats:
- stats["objects_synced"] += rule.stats.objects_synced
- stats["objects_pending"] += rule.stats.objects_pending
- stats["bytes_synced"] += rule.stats.bytes_synced
-
- if rule.stats.last_sync_at:
- if not stats["last_sync_at"] or rule.stats.last_sync_at > stats["last_sync_at"]:
- stats["last_sync_at"] = rule.stats.last_sync_at
-
- bucket_stats["last_sync_at"] = rule.stats.last_sync_at
- bucket_stats["objects_synced"] = rule.stats.objects_synced
- bucket_stats["objects_pending"] = rule.stats.objects_pending
-
- failure_count = replication.get_failure_count(rule.bucket_name)
- stats["objects_failed"] += failure_count
- bucket_stats["failures"] = failure_count
-
- stats["buckets"].append(bucket_stats)
-
- return jsonify(stats)
-
-
-@ui_bp.get("/system")
-def system_dashboard():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- flash("Access denied: System page requires admin permissions", "danger")
- return redirect(url_for("ui.buckets_overview"))
-
- import platform as _platform
- import sys
- from app.version import APP_VERSION
-
- try:
- import myfsio_core as _rc
- has_rust = True
- except ImportError:
- has_rust = False
-
- gc = current_app.extensions.get("gc")
- gc_status = gc.get_status() if gc else {"enabled": False}
- gc_history_records = []
- if gc:
- raw = gc.get_history(limit=10, offset=0)
- for rec in raw:
- r = rec.get("result", {})
- total_freed = r.get("temp_bytes_freed", 0) + r.get("multipart_bytes_freed", 0) + r.get("orphaned_version_bytes_freed", 0)
- rec["bytes_freed_display"] = _format_bytes(total_freed)
- rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
- gc_history_records.append(rec)
-
- checker = current_app.extensions.get("integrity")
- integrity_status = checker.get_status() if checker else {"enabled": False}
- integrity_history_records = []
- if checker:
- raw = checker.get_history(limit=10, offset=0)
- for rec in raw:
- rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
- integrity_history_records.append(rec)
-
- features = [
- {"label": "Encryption (SSE-S3)", "enabled": current_app.config.get("ENCRYPTION_ENABLED", False)},
- {"label": "KMS", "enabled": current_app.config.get("KMS_ENABLED", False)},
- {"label": "Versioning Lifecycle", "enabled": current_app.config.get("LIFECYCLE_ENABLED", False)},
- {"label": "Metrics History", "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False)},
- {"label": "Operation Metrics", "enabled": current_app.config.get("OPERATION_METRICS_ENABLED", False)},
- {"label": "Site Sync", "enabled": current_app.config.get("SITE_SYNC_ENABLED", False)},
- {"label": "Website Hosting", "enabled": current_app.config.get("WEBSITE_HOSTING_ENABLED", False)},
- {"label": "Garbage Collection", "enabled": current_app.config.get("GC_ENABLED", False)},
- {"label": "Integrity Scanner", "enabled": current_app.config.get("INTEGRITY_ENABLED", False)},
- ]
-
- return render_template(
- "system.html",
- principal=principal,
- app_version=APP_VERSION,
- storage_root=current_app.config.get("STORAGE_ROOT", "./data"),
- platform=_platform.platform(),
- python_version=sys.version.split()[0],
- has_rust=has_rust,
- features=features,
- gc_status=gc_status,
- gc_history=gc_history_records,
- integrity_status=integrity_status,
- integrity_history=integrity_history_records,
- display_timezone=current_app.config.get("DISPLAY_TIMEZONE", "UTC"),
- )
-
-
-@ui_bp.post("/system/gc/run")
-def system_gc_run():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- gc = current_app.extensions.get("gc")
- if not gc:
- return jsonify({"error": "GC is not enabled"}), 400
-
- payload = request.get_json(silent=True) or {}
- started = gc.run_async(dry_run=payload.get("dry_run"))
- if not started:
- return jsonify({"error": "GC is already in progress"}), 409
- return jsonify({"status": "started"})
-
-
-@ui_bp.get("/system/gc/status")
-def system_gc_status():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- gc = current_app.extensions.get("gc")
- if not gc:
- return jsonify({"error": "GC is not enabled"}), 400
-
- return jsonify(gc.get_status())
-
-
-@ui_bp.get("/system/gc/history")
-def system_gc_history():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- gc = current_app.extensions.get("gc")
- if not gc:
- return jsonify({"executions": []})
-
- limit = min(int(request.args.get("limit", 10)), 200)
- offset = int(request.args.get("offset", 0))
- records = gc.get_history(limit=limit, offset=offset)
- return jsonify({"executions": records})
-
-
-@ui_bp.post("/system/integrity/run")
-def system_integrity_run():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- checker = current_app.extensions.get("integrity")
- if not checker:
- return jsonify({"error": "Integrity checker is not enabled"}), 400
-
- payload = request.get_json(silent=True) or {}
- started = checker.run_async(
- auto_heal=payload.get("auto_heal"),
- dry_run=payload.get("dry_run"),
- )
- if not started:
- return jsonify({"error": "A scan is already in progress"}), 409
- return jsonify({"status": "started"})
-
-
-@ui_bp.get("/system/integrity/status")
-def system_integrity_status():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- checker = current_app.extensions.get("integrity")
- if not checker:
- return jsonify({"error": "Integrity checker is not enabled"}), 400
-
- return jsonify(checker.get_status())
-
-
-@ui_bp.get("/system/integrity/history")
-def system_integrity_history():
- principal = _current_principal()
- try:
- _iam().authorize(principal, None, "iam:*")
- except IamError:
- return jsonify({"error": "Access denied"}), 403
-
- checker = current_app.extensions.get("integrity")
- if not checker:
- return jsonify({"executions": []})
-
- limit = min(int(request.args.get("limit", 10)), 200)
- offset = int(request.args.get("offset", 0))
- records = checker.get_history(limit=limit, offset=offset)
- return jsonify({"executions": records})
-
-
-@ui_bp.app_errorhandler(404)
-def ui_not_found(error): # type: ignore[override]
- prefix = ui_bp.url_prefix or ""
- path = request.path or ""
- wants_html = request.accept_mimetypes.accept_html
- if wants_html and (not prefix or path.startswith(prefix)):
- return render_template("404.html"), 404
- return error
diff --git a/python/app/version.py b/python/app/version.py
deleted file mode 100644
index d2d65c4..0000000
--- a/python/app/version.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import annotations
-
-APP_VERSION = "0.4.3"
-
-
-def get_version() -> str:
- """Return the current application version."""
- return APP_VERSION
diff --git a/python/app/website_domains.py b/python/app/website_domains.py
deleted file mode 100644
index 7ec33f7..0000000
--- a/python/app/website_domains.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from __future__ import annotations
-
-import json
-import re
-import threading
-from pathlib import Path
-from typing import Dict, List, Optional
-
-_DOMAIN_RE = re.compile(
- r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
-)
-
-
-def normalize_domain(raw: str) -> str:
- raw = raw.strip().lower()
- for prefix in ("https://", "http://"):
- if raw.startswith(prefix):
- raw = raw[len(prefix):]
- raw = raw.split("/", 1)[0]
- raw = raw.split("?", 1)[0]
- raw = raw.split("#", 1)[0]
- if ":" in raw:
- raw = raw.rsplit(":", 1)[0]
- return raw
-
-
-def is_valid_domain(domain: str) -> bool:
- if not domain or len(domain) > 253:
- return False
- return bool(_DOMAIN_RE.match(domain))
-
-
-class WebsiteDomainStore:
- def __init__(self, config_path: Path) -> None:
- self.config_path = config_path
- self._lock = threading.Lock()
- self._domains: Dict[str, str] = {}
- self._last_mtime: float = 0.0
- self.reload()
-
- def reload(self) -> None:
- if not self.config_path.exists():
- self._domains = {}
- self._last_mtime = 0.0
- return
- try:
- self._last_mtime = self.config_path.stat().st_mtime
- with open(self.config_path, "r", encoding="utf-8") as f:
- data = json.load(f)
- if isinstance(data, dict):
- self._domains = {k.lower(): v for k, v in data.items()}
- else:
- self._domains = {}
- except (OSError, json.JSONDecodeError):
- self._domains = {}
-
- def _maybe_reload(self) -> None:
- try:
- if self.config_path.exists():
- mtime = self.config_path.stat().st_mtime
- if mtime != self._last_mtime:
- self._last_mtime = mtime
- with open(self.config_path, "r", encoding="utf-8") as f:
- data = json.load(f)
- if isinstance(data, dict):
- self._domains = {k.lower(): v for k, v in data.items()}
- else:
- self._domains = {}
- elif self._domains:
- self._domains = {}
- self._last_mtime = 0.0
- except (OSError, json.JSONDecodeError):
- pass
-
- def _save(self) -> None:
- self.config_path.parent.mkdir(parents=True, exist_ok=True)
- with open(self.config_path, "w", encoding="utf-8") as f:
- json.dump(self._domains, f, indent=2)
- self._last_mtime = self.config_path.stat().st_mtime
-
- def list_all(self) -> List[Dict[str, str]]:
- with self._lock:
- self._maybe_reload()
- return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
-
- def get_bucket(self, domain: str) -> Optional[str]:
- with self._lock:
- self._maybe_reload()
- return self._domains.get(domain.lower())
-
- def get_domains_for_bucket(self, bucket: str) -> List[str]:
- with self._lock:
- self._maybe_reload()
- return [d for d, b in self._domains.items() if b == bucket]
-
- def set_mapping(self, domain: str, bucket: str) -> None:
- with self._lock:
- self._domains[domain.lower()] = bucket
- self._save()
-
- def delete_mapping(self, domain: str) -> bool:
- with self._lock:
- key = domain.lower()
- if key not in self._domains:
- return False
- del self._domains[key]
- self._save()
- return True
diff --git a/python/docker-entrypoint.sh b/python/docker-entrypoint.sh
deleted file mode 100644
index 100eb45..0000000
--- a/python/docker-entrypoint.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-set -e
-
-exec python run.py --prod
diff --git a/python/myfsio_core/Cargo.toml b/python/myfsio_core/Cargo.toml
deleted file mode 100644
index 6f900e0..0000000
--- a/python/myfsio_core/Cargo.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-[package]
-name = "myfsio_core"
-version = "0.1.0"
-edition = "2021"
-
-[lib]
-name = "myfsio_core"
-crate-type = ["cdylib"]
-
-[dependencies]
-pyo3 = { version = "0.28", features = ["extension-module"] }
-hmac = "0.12"
-sha2 = "0.10"
-md-5 = "0.10"
-hex = "0.4"
-unicode-normalization = "0.1"
-serde_json = "1"
-regex = "1"
-lru = "0.14"
-parking_lot = "0.12"
-percent-encoding = "2"
-aes-gcm = "0.10"
-hkdf = "0.12"
-uuid = { version = "1", features = ["v4"] }
diff --git a/python/myfsio_core/pyproject.toml b/python/myfsio_core/pyproject.toml
deleted file mode 100644
index fbea25c..0000000
--- a/python/myfsio_core/pyproject.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[build-system]
-requires = ["maturin>=1.0,<2.0"]
-build-backend = "maturin"
-
-[project]
-name = "myfsio_core"
-version = "0.1.0"
-requires-python = ">=3.10"
-
-[tool.maturin]
-features = ["pyo3/extension-module"]
diff --git a/python/myfsio_core/src/crypto.rs b/python/myfsio_core/src/crypto.rs
deleted file mode 100644
index 082814d..0000000
--- a/python/myfsio_core/src/crypto.rs
+++ /dev/null
@@ -1,192 +0,0 @@
-use aes_gcm::aead::Aead;
-use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
-use hkdf::Hkdf;
-use pyo3::exceptions::{PyIOError, PyValueError};
-use pyo3::prelude::*;
-use sha2::Sha256;
-use std::fs::File;
-use std::io::{Read, Seek, SeekFrom, Write};
-
-const DEFAULT_CHUNK_SIZE: usize = 65536;
-const HEADER_SIZE: usize = 4;
-
-fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result {
- let mut filled = 0;
- while filled < buf.len() {
- match reader.read(&mut buf[filled..]) {
- Ok(0) => break,
- Ok(n) => filled += n,
- Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
- Err(e) => return Err(e),
- }
- }
- Ok(filled)
-}
-
-fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
- let hkdf = Hkdf::::new(Some(base_nonce), b"chunk_nonce");
- let mut okm = [0u8; 12];
- hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
- .map_err(|e| format!("HKDF expand failed: {}", e))?;
- Ok(okm)
-}
-
-#[pyfunction]
-#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
-pub fn encrypt_stream_chunked(
- py: Python<'_>,
- input_path: &str,
- output_path: &str,
- key: &[u8],
- base_nonce: &[u8],
- chunk_size: usize,
-) -> PyResult {
- if key.len() != 32 {
- return Err(PyValueError::new_err(format!(
- "Key must be 32 bytes, got {}",
- key.len()
- )));
- }
- if base_nonce.len() != 12 {
- return Err(PyValueError::new_err(format!(
- "Base nonce must be 12 bytes, got {}",
- base_nonce.len()
- )));
- }
-
- let chunk_size = if chunk_size == 0 {
- DEFAULT_CHUNK_SIZE
- } else {
- chunk_size
- };
-
- let inp = input_path.to_owned();
- let out = output_path.to_owned();
- let key_arr: [u8; 32] = key.try_into().unwrap();
- let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
-
- py.detach(move || {
- let cipher = Aes256Gcm::new(&key_arr.into());
-
- let mut infile = File::open(&inp)
- .map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
- let mut outfile = File::create(&out)
- .map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
-
- outfile
- .write_all(&[0u8; 4])
- .map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
-
- let mut buf = vec![0u8; chunk_size];
- let mut chunk_index: u32 = 0;
-
- loop {
- let n = read_exact_chunk(&mut infile, &mut buf)
- .map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
- if n == 0 {
- break;
- }
-
- let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
- .map_err(|e| PyValueError::new_err(e))?;
- let nonce = Nonce::from_slice(&nonce_bytes);
-
- let encrypted = cipher
- .encrypt(nonce, &buf[..n])
- .map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
-
- let size = encrypted.len() as u32;
- outfile
- .write_all(&size.to_be_bytes())
- .map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
- outfile
- .write_all(&encrypted)
- .map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
-
- chunk_index += 1;
- }
-
- outfile
- .seek(SeekFrom::Start(0))
- .map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
- outfile
- .write_all(&chunk_index.to_be_bytes())
- .map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
-
- Ok(chunk_index)
- })
-}
-
-#[pyfunction]
-pub fn decrypt_stream_chunked(
- py: Python<'_>,
- input_path: &str,
- output_path: &str,
- key: &[u8],
- base_nonce: &[u8],
-) -> PyResult {
- if key.len() != 32 {
- return Err(PyValueError::new_err(format!(
- "Key must be 32 bytes, got {}",
- key.len()
- )));
- }
- if base_nonce.len() != 12 {
- return Err(PyValueError::new_err(format!(
- "Base nonce must be 12 bytes, got {}",
- base_nonce.len()
- )));
- }
-
- let inp = input_path.to_owned();
- let out = output_path.to_owned();
- let key_arr: [u8; 32] = key.try_into().unwrap();
- let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
-
- py.detach(move || {
- let cipher = Aes256Gcm::new(&key_arr.into());
-
- let mut infile = File::open(&inp)
- .map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
- let mut outfile = File::create(&out)
- .map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
-
- let mut header = [0u8; HEADER_SIZE];
- infile
- .read_exact(&mut header)
- .map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
- let chunk_count = u32::from_be_bytes(header);
-
- let mut size_buf = [0u8; HEADER_SIZE];
- for chunk_index in 0..chunk_count {
- infile
- .read_exact(&mut size_buf)
- .map_err(|e| {
- PyIOError::new_err(format!(
- "Failed to read chunk {} size: {}",
- chunk_index, e
- ))
- })?;
- let chunk_size = u32::from_be_bytes(size_buf) as usize;
-
- let mut encrypted = vec![0u8; chunk_size];
- infile.read_exact(&mut encrypted).map_err(|e| {
- PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
- })?;
-
- let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
- .map_err(|e| PyValueError::new_err(e))?;
- let nonce = Nonce::from_slice(&nonce_bytes);
-
- let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
- PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
- })?;
-
- outfile.write_all(&decrypted).map_err(|e| {
- PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
- })?;
- }
-
- Ok(chunk_count)
- })
-}
diff --git a/python/myfsio_core/src/hashing.rs b/python/myfsio_core/src/hashing.rs
deleted file mode 100644
index aec3fa4..0000000
--- a/python/myfsio_core/src/hashing.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-use md5::{Digest, Md5};
-use pyo3::exceptions::PyIOError;
-use pyo3::prelude::*;
-use sha2::Sha256;
-use std::fs::File;
-use std::io::Read;
-
-const CHUNK_SIZE: usize = 65536;
-
-#[pyfunction]
-pub fn md5_file(py: Python<'_>, path: &str) -> PyResult {
- let path = path.to_owned();
- py.detach(move || {
- let mut file = File::open(&path)
- .map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
- let mut hasher = Md5::new();
- let mut buf = vec![0u8; CHUNK_SIZE];
- loop {
- let n = file
- .read(&mut buf)
- .map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
- if n == 0 {
- break;
- }
- hasher.update(&buf[..n]);
- }
- Ok(format!("{:x}", hasher.finalize()))
- })
-}
-
-#[pyfunction]
-pub fn md5_bytes(data: &[u8]) -> String {
- let mut hasher = Md5::new();
- hasher.update(data);
- format!("{:x}", hasher.finalize())
-}
-
-#[pyfunction]
-pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult {
- let path = path.to_owned();
- py.detach(move || {
- let mut file = File::open(&path)
- .map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
- let mut hasher = Sha256::new();
- let mut buf = vec![0u8; CHUNK_SIZE];
- loop {
- let n = file
- .read(&mut buf)
- .map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
- if n == 0 {
- break;
- }
- hasher.update(&buf[..n]);
- }
- Ok(format!("{:x}", hasher.finalize()))
- })
-}
-
-#[pyfunction]
-pub fn sha256_bytes(data: &[u8]) -> String {
- let mut hasher = Sha256::new();
- hasher.update(data);
- format!("{:x}", hasher.finalize())
-}
-
-#[pyfunction]
-pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
- let path = path.to_owned();
- py.detach(move || {
- let mut file = File::open(&path)
- .map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
- let mut md5_hasher = Md5::new();
- let mut sha_hasher = Sha256::new();
- let mut buf = vec![0u8; CHUNK_SIZE];
- loop {
- let n = file
- .read(&mut buf)
- .map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
- if n == 0 {
- break;
- }
- md5_hasher.update(&buf[..n]);
- sha_hasher.update(&buf[..n]);
- }
- Ok((
- format!("{:x}", md5_hasher.finalize()),
- format!("{:x}", sha_hasher.finalize()),
- ))
- })
-}
diff --git a/python/myfsio_core/src/lib.rs b/python/myfsio_core/src/lib.rs
deleted file mode 100644
index f10dde3..0000000
--- a/python/myfsio_core/src/lib.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-mod crypto;
-mod hashing;
-mod metadata;
-mod sigv4;
-mod storage;
-mod streaming;
-mod validation;
-
-use pyo3::prelude::*;
-
-#[pymodule]
-mod myfsio_core {
- use super::*;
-
- #[pymodule_init]
- fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
- m.add_function(wrap_pyfunction!(sigv4::verify_sigv4_signature, m)?)?;
- m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
- m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
- m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
- m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
- m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
-
- m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
- m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
- m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
- m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
- m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
-
- m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
- m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
-
- m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?;
-
- m.add_function(wrap_pyfunction!(storage::write_index_entry, m)?)?;
- m.add_function(wrap_pyfunction!(storage::delete_index_entry, m)?)?;
- m.add_function(wrap_pyfunction!(storage::check_bucket_contents, m)?)?;
- m.add_function(wrap_pyfunction!(storage::shallow_scan, m)?)?;
- m.add_function(wrap_pyfunction!(storage::bucket_stats_scan, m)?)?;
- m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
- m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
-
- m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
- m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
-
- m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
- m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
-
- Ok(())
- }
-}
diff --git a/python/myfsio_core/src/metadata.rs b/python/myfsio_core/src/metadata.rs
deleted file mode 100644
index 67d09f8..0000000
--- a/python/myfsio_core/src/metadata.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-use pyo3::exceptions::PyValueError;
-use pyo3::prelude::*;
-use pyo3::types::{PyDict, PyList, PyString};
-use serde_json::Value;
-use std::fs;
-
-const MAX_DEPTH: u32 = 64;
-
-fn value_to_py(py: Python<'_>, v: &Value, depth: u32) -> PyResult> {
- if depth > MAX_DEPTH {
- return Err(PyValueError::new_err("JSON nesting too deep"));
- }
- match v {
- Value::Null => Ok(py.None()),
- Value::Bool(b) => Ok((*b).into_pyobject(py)?.to_owned().into_any().unbind()),
- Value::Number(n) => {
- if let Some(i) = n.as_i64() {
- Ok(i.into_pyobject(py)?.into_any().unbind())
- } else if let Some(f) = n.as_f64() {
- Ok(f.into_pyobject(py)?.into_any().unbind())
- } else {
- Ok(py.None())
- }
- }
- Value::String(s) => Ok(PyString::new(py, s).into_any().unbind()),
- Value::Array(arr) => {
- let list = PyList::empty(py);
- for item in arr {
- list.append(value_to_py(py, item, depth + 1)?)?;
- }
- Ok(list.into_any().unbind())
- }
- Value::Object(map) => {
- let dict = PyDict::new(py);
- for (k, val) in map {
- dict.set_item(k, value_to_py(py, val, depth + 1)?)?;
- }
- Ok(dict.into_any().unbind())
- }
- }
-}
-
-#[pyfunction]
-pub fn read_index_entry(
- py: Python<'_>,
- path: &str,
- entry_name: &str,
-) -> PyResult