25 Commits

Author SHA1 Message Date
fdd068feee MyFSIO v0.3.3 Release
Reviewed-on: #26
2026-02-27 04:49:32 +00:00
66b7677d2c MyFSIO v0.3.2 Release
Reviewed-on: #25
2026-02-26 10:10:19 +00:00
4d90ead816 Merge pull request 'Fix incorrect Upgrading & Updates section in Docs' (#24) from next into main
Reviewed-on: #24
2026-02-26 09:50:17 +00:00
b37a51ed1d MyFSIO v0.3.1 Release
Reviewed-on: #23
2026-02-26 09:42:37 +00:00
0462a7b62e MyFSIO v0.3.0 Release
Reviewed-on: #22
2026-02-22 10:22:35 +00:00
52660570c1 Merge pull request 'MyFSIO v0.2.9 Release' (#21) from next into main
Reviewed-on: #21
2026-02-15 14:24:14 +00:00
35f61313e0 MyFSIO v0.2.8 Release
Reviewed-on: #20
2026-02-10 14:16:22 +00:00
c470cfb576 MyFSIO v0.2.7 Release
Reviewed-on: #19
2026-02-09 12:22:37 +00:00
jun
d96955deee MyFSIO v0.2.6 Release
Reviewed-on: #18
2026-02-05 16:18:03 +00:00
85181f0be6 Merge pull request 'MyFSIO v0.2.5 Release' (#17) from next into main
Reviewed-on: #17
2026-02-02 05:32:02 +00:00
d5ca7a8be1 Merge pull request 'MyFSIO v0.2.4 Release' (#16) from next into main
Reviewed-on: #16
2026-02-01 10:27:11 +00:00
476dc79e42 MyFSIO v0.2.3 Release
Reviewed-on: #15
2026-01-25 06:05:53 +00:00
bb6590fc5e Merge pull request 'MyFSIO v0.2.2 Release' (#14) from next into main
Reviewed-on: #14
2026-01-19 07:12:15 +00:00
899db3421b Merge pull request 'MyFSIO v0.2.1 Release' (#13) from next into main
Reviewed-on: #13
2026-01-12 08:03:29 +00:00
caf01d6ada Merge pull request 'MyFSIO v0.2.0 Release' (#12) from next into main
Reviewed-on: #12
2026-01-05 15:48:03 +00:00
bb366cb4cd Merge pull request 'MyFSIO v0.1.9 Release' (#10) from next into main
Reviewed-on: #10
2025-12-29 06:49:48 +00:00
a2745ff2ee Merge pull request 'MyFSIO v0.1.8 Release' (#9) from next into main
Reviewed-on: #9
2025-12-23 06:01:32 +00:00
28cb656d94 Merge pull request 'MyFSIO v0.1.7 Release' (#8) from next into main
Reviewed-on: #8
2025-12-22 03:10:35 +00:00
3c44152fc6 Merge pull request 'MyFSIO v0.1.6 Release' (#7) from next into main
Reviewed-on: #7
2025-12-21 06:30:21 +00:00
397515edce Merge pull request 'MyFSIO v0.1.5 Release' (#6) from next into main
Reviewed-on: #6
2025-12-13 15:41:03 +00:00
980fced7e4 Merge pull request 'MyFSIO v0.1.4 Release' (#5) from next into main
Reviewed-on: #5
2025-12-13 08:22:43 +00:00
bae5009ec4 Merge pull request 'Release v0.1.3' (#4) from next into main
Reviewed-on: #4
2025-12-03 04:14:57 +00:00
233780617f Merge pull request 'Release V0.1.2' (#3) from next into main
Reviewed-on: #3
2025-11-26 04:59:15 +00:00
fd8fb21517 Merge pull request 'Prepare for binary release' (#2) from next into main
Reviewed-on: #2
2025-11-22 12:33:38 +00:00
c6cbe822e1 Merge pull request 'Release v0.1.1' (#1) from next into main
Reviewed-on: #1
2025-11-22 12:31:27 +00:00
204 changed files with 3877 additions and 65559 deletions

View File

@@ -11,7 +11,3 @@ htmlcov
logs
data
tmp
tests
myfsio_core/target
Dockerfile
.dockerignore

7
.gitignore vendored
View File

@@ -27,11 +27,8 @@ dist/
.eggs/
# Rust / maturin build artifacts
python/myfsio_core/target/
python/myfsio_core/Cargo.lock
# Rust engine build artifacts
rust/myfsio-engine/target/
myfsio_core/target/
myfsio_core/Cargo.lock
# Local runtime artifacts
logs/

View File

@@ -1,9 +1,9 @@
FROM python:3.14.3-slim AS builder
FROM python:3.14.3-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /build
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends build-essential curl \
@@ -12,35 +12,24 @@ RUN apt-get update \
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --no-cache-dir maturin
COPY myfsio_core ./myfsio_core
RUN cd myfsio_core \
&& maturin build --release --out /wheels
FROM python:3.14.3-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY --from=builder /wheels/*.whl /tmp/
RUN pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl
COPY . .
COPY app ./app
COPY templates ./templates
COPY static ./static
COPY run.py ./
COPY docker-entrypoint.sh ./
RUN pip install --no-cache-dir maturin \
&& cd myfsio_core \
&& maturin build --release \
&& pip install target/wheels/*.whl \
&& cd .. \
&& rm -rf myfsio_core/target \
&& pip uninstall -y maturin \
&& rustup self uninstall -y
RUN chmod +x docker-entrypoint.sh \
&& mkdir -p /app/data \
&& useradd -m -u 1000 myfsio \
RUN chmod +x docker-entrypoint.sh
RUN mkdir -p /app/data \
&& useradd -m -u 1000 myfsio \
&& chown -R myfsio:myfsio /app
USER myfsio

390
README.md
View File

@@ -1,212 +1,250 @@
# MyFSIO
MyFSIO is an S3-compatible object storage server with a Rust runtime and a filesystem-backed storage engine. The active server lives under `rust/myfsio-engine` and serves both the S3 API and the built-in web UI from a single process.
The repository still contains a `python/` tree, but you do not need Python to run the current server.
A lightweight, S3-compatible object storage system built with Flask. MyFSIO implements core AWS S3 REST API operations with filesystem-backed storage, making it ideal for local development, testing, and self-hosted storage scenarios.
## Features
- S3-compatible REST API with Signature Version 4 authentication
- Browser UI for buckets, objects, IAM users, policies, replication, metrics, and site administration
- Filesystem-backed storage rooted at `data/`
- Bucket versioning, multipart uploads, presigned URLs, CORS, object and bucket tagging
- Server-side encryption and built-in KMS support
- Optional background services for lifecycle, garbage collection, integrity scanning, operation metrics, and system metrics history
- Replication, site sync, and static website hosting support
**Core Storage**
- S3-compatible REST API with AWS Signature Version 4 authentication
- Bucket and object CRUD operations
- Object versioning with version history
- Multipart uploads for large files
- Presigned URLs (1 second to 7 days validity)
## Runtime Model
**Security & Access Control**
- IAM users with access key management and rotation
- Bucket policies (AWS Policy Version 2012-10-17)
- Server-side encryption (SSE-S3 and SSE-KMS)
- Built-in Key Management Service (KMS)
- Rate limiting per endpoint
MyFSIO now runs as one Rust process:
**Advanced Features**
- Cross-bucket replication to remote S3-compatible endpoints
- Hot-reload for bucket policies (no restart required)
- CORS configuration per bucket
- API listener on `HOST` + `PORT` (default `127.0.0.1:5000`)
- UI listener on `HOST` + `UI_PORT` (default `127.0.0.1:5100`)
- Shared state for storage, IAM, policies, sessions, metrics, and background workers
**Management UI**
- Web console for bucket and object management
- IAM dashboard for user administration
- Inline JSON policy editor with presets
- Object browser with folder navigation and bulk operations
- Dark mode support
If you want API-only mode, set `UI_ENABLED=false`. There is no separate "UI-only" runtime anymore.
## Architecture
```
+------------------+ +------------------+
| API Server | | UI Server |
| (port 5000) | | (port 5100) |
| | | |
| - S3 REST API |<------->| - Web Console |
| - SigV4 Auth | | - IAM Dashboard |
| - Presign URLs | | - Bucket Editor |
+--------+---------+ +------------------+
|
v
+------------------+ +------------------+
| Object Storage | | System Metadata |
| (filesystem) | | (.myfsio.sys/) |
| | | |
| data/<bucket>/ | | - IAM config |
| <objects> | | - Bucket policies|
| | | - Encryption keys|
+------------------+ +------------------+
```
## Quick Start
From the repository root:
```bash
cd rust/myfsio-engine
cargo run -p myfsio-server --
# Clone and setup
git clone https://gitea.jzwsite.com/kqjy/MyFSIO
cd s3
python -m venv .venv
# Activate virtual environment
# Windows PowerShell:
.\.venv\Scripts\Activate.ps1
# Windows CMD:
.venv\Scripts\activate.bat
# Linux/macOS:
source .venv/bin/activate
# Install dependencies
pip install -r requirements.txt
# Start both servers
python run.py
# Or start individually
python run.py --mode api # API only (port 5000)
python run.py --mode ui # UI only (port 5100)
```
Useful URLs:
**Default Credentials:** `localadmin` / `localadmin`
- UI: `http://127.0.0.1:5100/ui`
- API: `http://127.0.0.1:5000/`
- Health: `http://127.0.0.1:5000/myfsio/health`
On first boot, MyFSIO creates `data/.myfsio.sys/config/iam.json` and prints the generated admin access key and secret key to the console.
### Common CLI commands
```bash
# Show resolved configuration
cargo run -p myfsio-server -- --show-config
# Validate configuration and exit non-zero on critical issues
cargo run -p myfsio-server -- --check-config
# Reset admin credentials
cargo run -p myfsio-server -- --reset-cred
# API only
UI_ENABLED=false cargo run -p myfsio-server --
```
## Building a Binary
```bash
cd rust/myfsio-engine
cargo build --release -p myfsio-server
```
Binary locations:
- Linux/macOS: `rust/myfsio-engine/target/release/myfsio-server`
- Windows: `rust/myfsio-engine/target/release/myfsio-server.exe`
Run the built binary directly:
```bash
./target/release/myfsio-server
```
- **Web Console:** http://127.0.0.1:5100/ui
- **API Endpoint:** http://127.0.0.1:5000
## Configuration
The server reads environment variables from the process environment and also loads, when present:
- `/opt/myfsio/myfsio.env`
- `.env`
- `myfsio.env`
Core settings:
| Variable | Default | Description |
| --- | --- | --- |
| `HOST` | `127.0.0.1` | Bind address for API and UI listeners |
| `PORT` | `5000` | API port |
| `UI_PORT` | `5100` | UI port |
| `UI_ENABLED` | `true` | Disable to run API-only |
| `STORAGE_ROOT` | `./data` | Root directory for buckets and system metadata |
| `IAM_CONFIG` | `<STORAGE_ROOT>/.myfsio.sys/config/iam.json` | IAM config path |
| `API_BASE_URL` | unset | Public API base used by the UI and presigned URL generation |
| `AWS_REGION` | `us-east-1` | Region used in SigV4 scope |
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Allowed request time skew |
| `PRESIGNED_URL_MIN_EXPIRY_SECONDS` | `1` | Minimum presigned URL expiry |
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Maximum presigned URL expiry |
| `SECRET_KEY` | loaded from `.myfsio.sys/config/.secret` if present | Session signing key and IAM-at-rest encryption key |
| `ADMIN_ACCESS_KEY` | unset | Optional first-run or reset access key |
| `ADMIN_SECRET_KEY` | unset | Optional first-run or reset secret key |
Feature toggles:
| Variable | Default |
| --- | --- |
| `ENCRYPTION_ENABLED` | `false` |
| `KMS_ENABLED` | `false` |
| `GC_ENABLED` | `false` |
| `INTEGRITY_ENABLED` | `false` |
| `LIFECYCLE_ENABLED` | `false` |
| `METRICS_HISTORY_ENABLED` | `false` |
| `OPERATION_METRICS_ENABLED` | `false` |
| `WEBSITE_HOSTING_ENABLED` | `false` |
| `SITE_SYNC_ENABLED` | `false` |
Metrics and replication tuning:
| Variable | Default |
| --- | --- |
| `OPERATION_METRICS_INTERVAL_MINUTES` | `5` |
| `OPERATION_METRICS_RETENTION_HOURS` | `24` |
| `METRICS_HISTORY_INTERVAL_MINUTES` | `5` |
| `METRICS_HISTORY_RETENTION_HOURS` | `24` |
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` |
| `REPLICATION_READ_TIMEOUT_SECONDS` | `30` |
| `REPLICATION_MAX_RETRIES` | `2` |
| `REPLICATION_STREAMING_THRESHOLD_BYTES` | `10485760` |
| `REPLICATION_MAX_FAILURES_PER_BUCKET` | `50` |
| `SITE_SYNC_INTERVAL_SECONDS` | `60` |
| `SITE_SYNC_BATCH_SIZE` | `100` |
| `SITE_SYNC_CONNECT_TIMEOUT_SECONDS` | `10` |
| `SITE_SYNC_READ_TIMEOUT_SECONDS` | `120` |
| `SITE_SYNC_MAX_RETRIES` | `2` |
| `SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS` | `1.0` |
UI asset overrides:
| Variable | Default |
| --- | --- |
| `TEMPLATES_DIR` | built-in crate templates directory |
| `STATIC_DIR` | built-in crate static directory |
See [docs.md](./docs.md) for the full Rust-side operations guide.
|----------|---------|-------------|
| `STORAGE_ROOT` | `./data` | Filesystem root for bucket storage |
| `IAM_CONFIG` | `.myfsio.sys/config/iam.json` | IAM user and policy store |
| `BUCKET_POLICY_PATH` | `.myfsio.sys/config/bucket_policies.json` | Bucket policy store |
| `API_BASE_URL` | `http://127.0.0.1:5000` | API endpoint for UI calls |
| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size in bytes (1 GB) |
| `MULTIPART_MIN_PART_SIZE` | `5242880` | Minimum multipart part size (5 MB) |
| `UI_PAGE_SIZE` | `100` | Default page size for listings |
| `SECRET_KEY` | `dev-secret-key` | Flask session secret |
| `AWS_REGION` | `us-east-1` | Region for SigV4 signing |
| `AWS_SERVICE` | `s3` | Service name for SigV4 signing |
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
| `KMS_ENABLED` | `false` | Enable Key Management Service |
| `LOG_LEVEL` | `INFO` | Logging verbosity |
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
## Data Layout
```text
data/
<bucket>/
.myfsio.sys/
config/
iam.json
bucket_policies.json
connections.json
operation_metrics.json
metrics_history.json
buckets/<bucket>/
meta/
versions/
multipart/
keys/
```
data/
├── <bucket>/ # User buckets with objects
└── .myfsio.sys/ # System metadata
├── config/
│ ├── iam.json # IAM users and policies
│ ├── bucket_policies.json # Bucket policies
│ ├── replication_rules.json
│ └── connections.json # Remote S3 connections
├── buckets/<bucket>/
│ ├── meta/ # Object metadata (.meta.json)
│ ├── versions/ # Archived object versions
│ └── .bucket.json # Bucket config (versioning, CORS)
├── multipart/ # Active multipart uploads
└── keys/ # Encryption keys (SSE-S3/KMS)
```
## API Reference
All endpoints require AWS Signature Version 4 authentication unless using presigned URLs or public bucket policies.
### Bucket Operations
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/` | List all buckets |
| `PUT` | `/<bucket>` | Create bucket |
| `DELETE` | `/<bucket>` | Delete bucket (must be empty) |
| `HEAD` | `/<bucket>` | Check bucket exists |
### Object Operations
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>` | List objects (supports `list-type=2`) |
| `PUT` | `/<bucket>/<key>` | Upload object |
| `GET` | `/<bucket>/<key>` | Download object |
| `DELETE` | `/<bucket>/<key>` | Delete object |
| `HEAD` | `/<bucket>/<key>` | Get object metadata |
| `POST` | `/<bucket>/<key>?uploads` | Initiate multipart upload |
| `PUT` | `/<bucket>/<key>?partNumber=N&uploadId=X` | Upload part |
| `POST` | `/<bucket>/<key>?uploadId=X` | Complete multipart upload |
| `DELETE` | `/<bucket>/<key>?uploadId=X` | Abort multipart upload |
### Bucket Policies (S3-compatible)
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>?policy` | Get bucket policy |
| `PUT` | `/<bucket>?policy` | Set bucket policy |
| `DELETE` | `/<bucket>?policy` | Delete bucket policy |
### Versioning
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>/<key>?versionId=X` | Get specific version |
| `DELETE` | `/<bucket>/<key>?versionId=X` | Delete specific version |
| `GET` | `/<bucket>?versions` | List object versions |
### Health Check
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/myfsio/health` | Health check endpoint |
## IAM & Access Control
### Users and Access Keys
On first run, MyFSIO creates a default admin user (`localadmin`/`localadmin`). Use the IAM dashboard to:
- Create and delete users
- Generate and rotate access keys
- Attach inline policies to users
- Control IAM management permissions
### Bucket Policies
Bucket policies follow AWS policy grammar (Version `2012-10-17`) with support for:
- Principal-based access (`*` for anonymous, specific users)
- Action-based permissions (`s3:GetObject`, `s3:PutObject`, etc.)
- Resource patterns (`arn:aws:s3:::bucket/*`)
- Condition keys
**Policy Presets:**
- **Public:** Grants anonymous read access (`s3:GetObject`, `s3:ListBucket`)
- **Private:** Removes bucket policy (IAM-only access)
- **Custom:** Manual policy editing with draft preservation
Policies hot-reload when the JSON file changes.
## Server-Side Encryption
MyFSIO supports two encryption modes:
- **SSE-S3:** Server-managed keys with automatic key rotation
- **SSE-KMS:** Customer-managed keys via built-in KMS
Enable encryption with:
```bash
ENCRYPTION_ENABLED=true python run.py
```
## Cross-Bucket Replication
Replicate objects to remote S3-compatible endpoints:
1. Configure remote connections in the UI
2. Create replication rules specifying source/destination
3. Objects are automatically replicated on upload
## Docker
Build the Rust image from the `rust/` directory:
```bash
docker build -t myfsio ./rust
docker run --rm -p 5000:5000 -p 5100:5100 -v "${PWD}/data:/app/data" myfsio
docker build -t myfsio .
docker run -p 5000:5000 -p 5100:5100 -v ./data:/app/data myfsio
```
If the instance sits behind a reverse proxy, set `API_BASE_URL` to the public S3 endpoint.
## Linux Installation
The repository includes `scripts/install.sh` for systemd-style Linux installs. Build the Rust binary first, then pass it to the installer:
```bash
cd rust/myfsio-engine
cargo build --release -p myfsio-server
cd ../..
sudo ./scripts/install.sh --binary ./rust/myfsio-engine/target/release/myfsio-server
```
The installer copies the binary into `/opt/myfsio/myfsio`, writes `/opt/myfsio/myfsio.env`, and can register a `myfsio.service` unit.
## Testing
Run the Rust test suite from the workspace:
```bash
cd rust/myfsio-engine
cargo test
# Run all tests
pytest tests/ -v
# Run specific test file
pytest tests/test_api.py -v
# Run with coverage
pytest tests/ --cov=app --cov-report=html
```
## Health Check
## References
`GET /myfsio/health` returns:
```json
{
"status": "ok",
"version": "0.5.0"
}
```
The `version` field comes from the Rust crate version in `rust/myfsio-engine/crates/myfsio-server/Cargo.toml`.
- [Amazon S3 Documentation](https://docs.aws.amazon.com/s3/)
- [AWS Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html)
- [S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html)

View File

@@ -1,13 +1,13 @@
from __future__ import annotations
import html as html_module
import itertools
import logging
import mimetypes
import os
import shutil
import sys
import time
import uuid
from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import timedelta
@@ -18,8 +18,6 @@ from flask_cors import CORS
from flask_wtf.csrf import CSRFError
from werkzeug.middleware.proxy_fix import ProxyFix
import io
from .access_logging import AccessLoggingService
from .operation_metrics import OperationMetricsCollector, classify_endpoint
from .compression import GzipMiddleware
@@ -31,8 +29,6 @@ from .encryption import EncryptionManager
from .extensions import limiter, csrf
from .iam import IamService
from .kms import KMSManager
from .gc import GarbageCollector
from .integrity import IntegrityChecker
from .lifecycle import LifecycleManager
from .notifications import NotificationService
from .object_lock import ObjectLockService
@@ -43,66 +39,6 @@ from .storage import ObjectStorage, StorageError
from .version import get_version
from .website_domains import WebsiteDomainStore
_request_counter = itertools.count(1)
class _ChunkedTransferMiddleware:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
return self.app(environ, start_response)
transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
content_length = environ.get("CONTENT_LENGTH")
if "chunked" in transfer_encoding.lower():
if content_length:
del environ["HTTP_TRANSFER_ENCODING"]
else:
raw = environ.get("wsgi.input")
if raw:
try:
if hasattr(raw, "seek"):
raw.seek(0)
body = raw.read()
except Exception:
body = b""
if body:
environ["wsgi.input"] = io.BytesIO(body)
environ["CONTENT_LENGTH"] = str(len(body))
del environ["HTTP_TRANSFER_ENCODING"]
content_length = environ.get("CONTENT_LENGTH")
if not content_length or content_length == "0":
sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
if ("STREAMING" in sha256.upper() or decoded_len
or "aws-chunked" in content_encoding.lower()):
raw = environ.get("wsgi.input")
if raw:
try:
if hasattr(raw, "seek"):
raw.seek(0)
body = raw.read()
except Exception:
body = b""
if body:
environ["wsgi.input"] = io.BytesIO(body)
environ["CONTENT_LENGTH"] = str(len(body))
raw = environ.get("wsgi.input")
if raw and hasattr(raw, "seek"):
try:
raw.seek(0)
except Exception:
pass
return self.app(environ, start_response)
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
"""Migrate config file from legacy locations to the active path.
@@ -167,11 +103,10 @@ def create_app(
)
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
# Enable gzip compression for responses (10-20x smaller JSON payloads)
if app.config.get("ENABLE_GZIP", True):
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
_configure_cors(app)
_configure_logging(app)
@@ -184,7 +119,6 @@ def create_app(
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048),
)
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
@@ -194,7 +128,6 @@ def create_app(
Path(app.config["IAM_CONFIG"]),
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
encryption_key=app.config.get("SECRET_KEY"),
)
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
@@ -285,31 +218,6 @@ def create_app(
)
lifecycle_manager.start()
gc_collector = None
if app.config.get("GC_ENABLED", False):
gc_collector = GarbageCollector(
storage_root=storage_root,
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
dry_run=app.config.get("GC_DRY_RUN", False),
io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
)
gc_collector.start()
integrity_checker = None
if app.config.get("INTEGRITY_ENABLED", False):
integrity_checker = IntegrityChecker(
storage_root=storage_root,
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
)
integrity_checker.start()
app.extensions["object_storage"] = storage
app.extensions["iam"] = iam
app.extensions["bucket_policies"] = bucket_policies
@@ -321,8 +229,6 @@ def create_app(
app.extensions["kms"] = kms_manager
app.extensions["acl"] = acl_service
app.extensions["lifecycle"] = lifecycle_manager
app.extensions["gc"] = gc_collector
app.extensions["integrity"] = integrity_checker
app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service
@@ -575,9 +481,13 @@ def _configure_logging(app: Flask) -> None:
@app.before_request
def _log_request_start() -> None:
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
g.request_id = uuid.uuid4().hex
g.request_started_at = time.perf_counter()
g.request_bytes_in = request.content_length or 0
app.logger.info(
"Request started",
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
)
@app.before_request
def _maybe_serve_website():
@@ -626,57 +536,30 @@ def _configure_logging(app: Flask) -> None:
is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError):
pass
if request.method == "HEAD":
response = Response(status=200)
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response.headers["Content-Length"] = len(data)
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
else:
try:
stat = obj_path.stat()
response.headers["Content-Length"] = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
response.headers["Content-Type"] = content_type
return response
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
file_size = len(data)
response = Response(data, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
else:
data = None
try:
stat = obj_path.stat()
file_size = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
if request.method == "HEAD":
response = Response(status=200)
response.headers["Content-Length"] = file_size
response.headers["Content-Type"] = content_type
response.headers["Accept-Ranges"] = "bytes"
return response
from .s3_api import _parse_range_header
range_header = request.headers.get("Range")
if range_header:
ranges = _parse_range_header(range_header, file_size)
if ranges is None:
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
start, end = ranges[0]
length = end - start + 1
if data is not None:
partial_data = data[start:end + 1]
response = Response(partial_data, status=206, mimetype=content_type)
else:
def _stream_range(file_path, start_pos, length_to_read):
with file_path.open("rb") as f:
f.seek(start_pos)
remaining = length_to_read
while remaining > 0:
chunk = f.read(min(262144, remaining))
if not chunk:
break
remaining -= len(chunk)
yield chunk
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
response.headers["Content-Length"] = length
response.headers["Accept-Ranges"] = "bytes"
return response
if data is not None:
response = Response(data, mimetype=content_type)
response.headers["Content-Length"] = file_size
response.headers["Accept-Ranges"] = "bytes"
return response
def _stream(file_path):
with file_path.open("rb") as f:
while True:
@@ -684,10 +567,13 @@ def _configure_logging(app: Flask) -> None:
if not chunk:
break
yield chunk
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
response.headers["Content-Length"] = file_size
response.headers["Accept-Ranges"] = "bytes"
return response
try:
stat = obj_path.stat()
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
response.headers["Content-Length"] = stat.st_size
return response
except OSError:
return _website_error_response(500, "Internal Server Error")
def _serve_website_error(storage, bucket, error_doc_key, status_code):
if not error_doc_key:
@@ -730,19 +616,17 @@ def _configure_logging(app: Flask) -> None:
duration_ms = 0.0
if hasattr(g, "request_started_at"):
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
request_id = getattr(g, "request_id", uuid.uuid4().hex)
response.headers.setdefault("X-Request-ID", request_id)
if app.logger.isEnabledFor(logging.INFO):
app.logger.info(
"Request completed",
extra={
"path": request.path,
"method": request.method,
"remote_addr": request.remote_addr,
},
)
app.logger.info(
"Request completed",
extra={
"path": request.path,
"method": request.method,
"remote_addr": request.remote_addr,
},
)
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
response.headers["Server"] = "MyFSIO"
operation_metrics = app.extensions.get("operation_metrics")
if operation_metrics:

View File

@@ -14,8 +14,6 @@ from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore
from .extensions import limiter
from .gc import GarbageCollector
from .integrity import IntegrityChecker
from .iam import IamError, Principal
from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry
@@ -686,107 +684,6 @@ def _storage():
return current_app.extensions["object_storage"]
def _require_iam_action(action: str):
principal, error = _require_principal()
if error:
return None, error
try:
_iam().authorize(principal, None, action)
return principal, None
except IamError:
return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
@admin_api_bp.route("/iam/users", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_list_users():
principal, error = _require_iam_action("iam:list_users")
if error:
return error
return jsonify({"users": _iam().list_users()})
@admin_api_bp.route("/iam/users/<identifier>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_get_user(identifier):
principal, error = _require_iam_action("iam:get_user")
if error:
return error
try:
user_id = _iam().resolve_user_id(identifier)
return jsonify(_iam().get_user_by_id(user_id))
except IamError as exc:
return _json_error("NotFound", str(exc), 404)
@admin_api_bp.route("/iam/users/<identifier>/policies", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_get_user_policies(identifier):
principal, error = _require_iam_action("iam:get_policy")
if error:
return error
try:
return jsonify({"policies": _iam().get_user_policies(identifier)})
except IamError as exc:
return _json_error("NotFound", str(exc), 404)
@admin_api_bp.route("/iam/users/<identifier>/keys", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_create_access_key(identifier):
principal, error = _require_iam_action("iam:create_key")
if error:
return error
try:
result = _iam().create_access_key(identifier)
logger.info("Access key created for %s by %s", identifier, principal.access_key)
return jsonify(result), 201
except IamError as exc:
return _json_error("InvalidRequest", str(exc), 400)
@admin_api_bp.route("/iam/users/<identifier>/keys/<access_key>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_delete_access_key(identifier, access_key):
principal, error = _require_iam_action("iam:delete_key")
if error:
return error
try:
_iam().delete_access_key(access_key)
logger.info("Access key %s deleted by %s", access_key, principal.access_key)
return "", 204
except IamError as exc:
return _json_error("InvalidRequest", str(exc), 400)
@admin_api_bp.route("/iam/users/<identifier>/disable", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_disable_user(identifier):
principal, error = _require_iam_action("iam:disable_user")
if error:
return error
try:
_iam().disable_user(identifier)
logger.info("User %s disabled by %s", identifier, principal.access_key)
return jsonify({"status": "disabled"})
except IamError as exc:
return _json_error("InvalidRequest", str(exc), 400)
@admin_api_bp.route("/iam/users/<identifier>/enable", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def iam_enable_user(identifier):
principal, error = _require_iam_action("iam:disable_user")
if error:
return error
try:
_iam().enable_user(identifier)
logger.info("User %s enabled by %s", identifier, principal.access_key)
return jsonify({"status": "enabled"})
except IamError as exc:
return _json_error("InvalidRequest", str(exc), 400)
@admin_api_bp.route("/website-domains", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_website_domains():
@@ -879,106 +776,3 @@ def delete_website_domain(domain: str):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
logger.info("Website domain mapping deleted: %s", domain)
return Response(status=204)
def _gc() -> Optional[GarbageCollector]:
return current_app.extensions.get("gc")
@admin_api_bp.route("/gc/status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_status():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
return jsonify(gc.get_status())
@admin_api_bp.route("/gc/run", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_run_now():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return _json_error("InvalidRequest", "GC is not enabled", 400)
payload = request.get_json(silent=True) or {}
started = gc.run_async(dry_run=payload.get("dry_run"))
logger.info("GC manual run by %s", principal.access_key)
if not started:
return _json_error("Conflict", "GC is already in progress", 409)
return jsonify({"status": "started"})
@admin_api_bp.route("/gc/history", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def gc_history():
principal, error = _require_admin()
if error:
return error
gc = _gc()
if not gc:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 50)), 200)
offset = int(request.args.get("offset", 0))
records = gc.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})
def _integrity() -> Optional[IntegrityChecker]:
return current_app.extensions.get("integrity")
@admin_api_bp.route("/integrity/status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_status():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
return jsonify(checker.get_status())
@admin_api_bp.route("/integrity/run", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_run_now():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
payload = request.get_json(silent=True) or {}
override_dry_run = payload.get("dry_run")
override_auto_heal = payload.get("auto_heal")
started = checker.run_async(
auto_heal=override_auto_heal if override_auto_heal is not None else None,
dry_run=override_dry_run if override_dry_run is not None else None,
)
logger.info("Integrity manual run by %s", principal.access_key)
if not started:
return _json_error("Conflict", "A scan is already in progress", 409)
return jsonify({"status": "started"})
@admin_api_bp.route("/integrity/history", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def integrity_history():
principal, error = _require_admin()
if error:
return error
checker = _integrity()
if not checker:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 50)), 200)
offset = int(request.args.get("offset", 0))
records = checker.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
import ipaddress
import json
import os
import re
import time
from dataclasses import dataclass, field
@@ -269,7 +268,7 @@ class BucketPolicyStore:
self._last_mtime = self._current_mtime()
# Performance: Avoid stat() on every request
self._last_stat_check = 0.0
self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
self._stat_check_interval = 1.0 # Only check mtime every 1 second
def maybe_reload(self) -> None:
# Performance: Skip stat check if we checked recently

View File

@@ -25,7 +25,7 @@ def _calculate_auto_connection_limit() -> int:
def _calculate_auto_backlog(connection_limit: int) -> int:
return max(128, min(connection_limit * 2, 4096))
return max(64, min(connection_limit * 2, 4096))
def _validate_rate_limit(value: str) -> str:
@@ -115,7 +115,6 @@ class AppConfig:
server_connection_limit: int
server_backlog: int
server_channel_timeout: int
server_max_buffer_size: int
server_threads_auto: bool
server_connection_limit_auto: bool
server_backlog_auto: bool
@@ -136,7 +135,6 @@ class AppConfig:
site_sync_clock_skew_tolerance_seconds: float
object_key_max_length_bytes: int
object_cache_max_size: int
meta_read_cache_max: int
bucket_config_cache_ttl_seconds: float
object_tag_limit: int
encryption_chunk_size_bytes: int
@@ -152,19 +150,6 @@ class AppConfig:
allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool
website_hosting_enabled: bool
gc_enabled: bool
gc_interval_hours: float
gc_temp_file_max_age_hours: float
gc_multipart_max_age_days: int
gc_lock_file_max_age_hours: float
gc_dry_run: bool
gc_io_throttle_ms: int
integrity_enabled: bool
integrity_interval_hours: float
integrity_batch_size: int
integrity_auto_heal: bool
integrity_dry_run: bool
integrity_io_throttle_ms: int
@classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -297,7 +282,6 @@ class AppConfig:
server_backlog_auto = False
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
@@ -316,7 +300,6 @@ class AppConfig:
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048))
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
@@ -336,19 +319,6 @@ class AppConfig:
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
return cls(storage_root=storage_root,
max_upload_size=max_upload_size,
@@ -402,7 +372,6 @@ class AppConfig:
server_connection_limit=server_connection_limit,
server_backlog=server_backlog,
server_channel_timeout=server_channel_timeout,
server_max_buffer_size=server_max_buffer_size,
server_threads_auto=server_threads_auto,
server_connection_limit_auto=server_connection_limit_auto,
server_backlog_auto=server_backlog_auto,
@@ -423,7 +392,6 @@ class AppConfig:
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
object_key_max_length_bytes=object_key_max_length_bytes,
object_cache_max_size=object_cache_max_size,
meta_read_cache_max=meta_read_cache_max,
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
object_tag_limit=object_tag_limit,
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
@@ -438,20 +406,7 @@ class AppConfig:
num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints,
website_hosting_enabled=website_hosting_enabled,
gc_enabled=gc_enabled,
gc_interval_hours=gc_interval_hours,
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
gc_multipart_max_age_days=gc_multipart_max_age_days,
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
gc_dry_run=gc_dry_run,
gc_io_throttle_ms=gc_io_throttle_ms,
integrity_enabled=integrity_enabled,
integrity_interval_hours=integrity_interval_hours,
integrity_batch_size=integrity_batch_size,
integrity_auto_heal=integrity_auto_heal,
integrity_dry_run=integrity_dry_run,
integrity_io_throttle_ms=integrity_io_throttle_ms)
website_hosting_enabled=website_hosting_enabled)
def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues.
@@ -516,12 +471,10 @@ class AppConfig:
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
if not (10 <= self.server_connection_limit <= 1000):
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
if not (128 <= self.server_backlog <= 4096):
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
if not (64 <= self.server_backlog <= 4096):
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
if not (10 <= self.server_channel_timeout <= 300):
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
if self.server_max_buffer_size < 1024 * 1024:
issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
if sys.platform != "win32":
try:
@@ -567,7 +520,6 @@ class AppConfig:
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
print("=" * 60)
issues = self.validate_and_report()
@@ -633,7 +585,6 @@ class AppConfig:
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
"SERVER_BACKLOG": self.server_backlog,
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
"SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
"SITE_SYNC_ENABLED": self.site_sync_enabled,
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
@@ -651,7 +602,6 @@ class AppConfig:
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
"META_READ_CACHE_MAX": self.meta_read_cache_max,
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
"OBJECT_TAG_LIMIT": self.object_tag_limit,
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
@@ -667,17 +617,4 @@ class AppConfig:
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
"GC_ENABLED": self.gc_enabled,
"GC_INTERVAL_HOURS": self.gc_interval_hours,
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
"GC_DRY_RUN": self.gc_dry_run,
"GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
"INTEGRITY_ENABLED": self.integrity_enabled,
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
"INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
}

View File

@@ -193,9 +193,6 @@ class EncryptedObjectStorage:
def list_objects_shallow(self, bucket_name: str, **kwargs):
return self.storage.list_objects_shallow(bucket_name, **kwargs)
def iter_objects_shallow(self, bucket_name: str, **kwargs):
return self.storage.iter_objects_shallow(bucket_name, **kwargs)
def search_objects(self, bucket_name: str, query: str, **kwargs):
return self.storage.search_objects(bucket_name, query, **kwargs)

View File

@@ -19,17 +19,6 @@ from cryptography.hazmat.primitives import hashes
if sys.platform != "win32":
import fcntl
try:
import myfsio_core as _rc
if not all(hasattr(_rc, f) for f in (
"encrypt_stream_chunked", "decrypt_stream_chunked",
)):
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
_HAS_RUST = True
except ImportError:
_rc = None
_HAS_RUST = False
logger = logging.getLogger(__name__)
@@ -349,69 +338,6 @@ class StreamingEncryptor:
output.seek(0)
return output
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
data_key, encrypted_data_key = self.provider.generate_data_key()
base_nonce = secrets.token_bytes(12)
if _HAS_RUST:
_rc.encrypt_stream_chunked(
input_path, output_path, data_key, base_nonce, self.chunk_size
)
else:
with open(input_path, "rb") as stream:
aesgcm = AESGCM(data_key)
with open(output_path, "wb") as out:
out.write(b"\x00\x00\x00\x00")
chunk_index = 0
while True:
chunk = stream.read(self.chunk_size)
if not chunk:
break
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
out.write(encrypted_chunk)
chunk_index += 1
out.seek(0)
out.write(chunk_index.to_bytes(4, "big"))
return EncryptionMetadata(
algorithm="AES256",
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
nonce=base_nonce,
encrypted_data_key=encrypted_data_key,
)
def decrypt_file(self, input_path: str, output_path: str,
metadata: EncryptionMetadata) -> None:
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
base_nonce = metadata.nonce
if _HAS_RUST:
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
else:
with open(input_path, "rb") as stream:
chunk_count_bytes = stream.read(4)
if len(chunk_count_bytes) < 4:
raise EncryptionError("Invalid encrypted stream: missing header")
chunk_count = int.from_bytes(chunk_count_bytes, "big")
aesgcm = AESGCM(data_key)
with open(output_path, "wb") as out:
for chunk_index in range(chunk_count):
size_bytes = stream.read(self.HEADER_SIZE)
if len(size_bytes) < self.HEADER_SIZE:
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
chunk_size = int.from_bytes(size_bytes, "big")
encrypted_chunk = stream.read(chunk_size)
if len(encrypted_chunk) < chunk_size:
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
try:
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
out.write(decrypted_chunk)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
class EncryptionManager:
"""Manages encryption providers and operations."""

View File

@@ -175,21 +175,13 @@ def handle_app_error(error: AppError) -> Response:
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
g.s3_error_code = "SlowDown"
if request.path.startswith("/ui") or request.path.startswith("/buckets"):
wants_json = (
request.is_json or
request.headers.get("X-Requested-With") == "XMLHttpRequest" or
"application/json" in request.accept_mimetypes.values()
)
if wants_json:
return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
error = Element("Error")
SubElement(error, "Code").text = "SlowDown"
SubElement(error, "Message").text = "Please reduce your request rate."
SubElement(error, "Resource").text = request.path
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
xml_bytes = tostring(error, encoding="utf-8")
return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
return Response(xml_bytes, status=429, mimetype="application/xml")
def register_error_handlers(app):

615
app/iam.py Normal file
View File

@@ -0,0 +1,615 @@
from __future__ import annotations
import hashlib
import hmac
import json
import math
import os
import secrets
import threading
import time
from collections import deque
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple
class IamError(RuntimeError):
"""Raised when authentication or authorization fails."""
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy", "replication", "lifecycle", "cors"}
IAM_ACTIONS = {
"iam:list_users",
"iam:create_user",
"iam:delete_user",
"iam:rotate_key",
"iam:update_policy",
}
ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"}
ACTION_ALIASES = {
"list": "list",
"s3:listbucket": "list",
"s3:listallmybuckets": "list",
"s3:listbucketversions": "list",
"s3:listmultipartuploads": "list",
"s3:listparts": "list",
"read": "read",
"s3:getobject": "read",
"s3:getobjectversion": "read",
"s3:getobjecttagging": "read",
"s3:getobjectversiontagging": "read",
"s3:getobjectacl": "read",
"s3:getbucketversioning": "read",
"s3:headobject": "read",
"s3:headbucket": "read",
"write": "write",
"s3:putobject": "write",
"s3:createbucket": "write",
"s3:putobjecttagging": "write",
"s3:putbucketversioning": "write",
"s3:createmultipartupload": "write",
"s3:uploadpart": "write",
"s3:completemultipartupload": "write",
"s3:abortmultipartupload": "write",
"s3:copyobject": "write",
"delete": "delete",
"s3:deleteobject": "delete",
"s3:deleteobjectversion": "delete",
"s3:deletebucket": "delete",
"s3:deleteobjecttagging": "delete",
"share": "share",
"s3:putobjectacl": "share",
"s3:putbucketacl": "share",
"s3:getbucketacl": "share",
"policy": "policy",
"s3:putbucketpolicy": "policy",
"s3:getbucketpolicy": "policy",
"s3:deletebucketpolicy": "policy",
"replication": "replication",
"s3:getreplicationconfiguration": "replication",
"s3:putreplicationconfiguration": "replication",
"s3:deletereplicationconfiguration": "replication",
"s3:replicateobject": "replication",
"s3:replicatetags": "replication",
"s3:replicatedelete": "replication",
"lifecycle": "lifecycle",
"s3:getlifecycleconfiguration": "lifecycle",
"s3:putlifecycleconfiguration": "lifecycle",
"s3:deletelifecycleconfiguration": "lifecycle",
"s3:getbucketlifecycle": "lifecycle",
"s3:putbucketlifecycle": "lifecycle",
"cors": "cors",
"s3:getbucketcors": "cors",
"s3:putbucketcors": "cors",
"s3:deletebucketcors": "cors",
"iam:listusers": "iam:list_users",
"iam:createuser": "iam:create_user",
"iam:deleteuser": "iam:delete_user",
"iam:rotateaccesskey": "iam:rotate_key",
"iam:putuserpolicy": "iam:update_policy",
"iam:*": "iam:*",
}
@dataclass
class Policy:
bucket: str
actions: Set[str]
@dataclass
class Principal:
access_key: str
display_name: str
policies: List[Policy]
class IamService:
"""Loads IAM configuration, manages users, and evaluates policies."""
def __init__(self, config_path: Path, auth_max_attempts: int = 5, auth_lockout_minutes: int = 15) -> None:
self.config_path = Path(config_path)
self.auth_max_attempts = auth_max_attempts
self.auth_lockout_window = timedelta(minutes=auth_lockout_minutes)
self.config_path.parent.mkdir(parents=True, exist_ok=True)
if not self.config_path.exists():
self._write_default()
self._users: Dict[str, Dict[str, Any]] = {}
self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._last_load_time = 0.0
self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0
self._stat_check_interval = 1.0
self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock()
self._load()
self._load_lockout_state()
def _maybe_reload(self) -> None:
"""Reload configuration if the file has changed on disk."""
now = time.time()
if now - self._last_stat_check < self._stat_check_interval:
return
self._last_stat_check = now
try:
if self.config_path.stat().st_mtime > self._last_load_time:
self._load()
self._principal_cache.clear()
self._secret_key_cache.clear()
except OSError:
pass
def authenticate(self, access_key: str, secret_key: str) -> Principal:
self._maybe_reload()
access_key = (access_key or "").strip()
secret_key = (secret_key or "").strip()
if not access_key or not secret_key:
raise IamError("Missing access credentials")
if self._is_locked_out(access_key):
seconds = self._seconds_until_unlock(access_key)
raise IamError(
f"Access temporarily locked. Try again in {seconds} seconds."
)
record = self._users.get(access_key)
stored_secret = record["secret_key"] if record else secrets.token_urlsafe(24)
if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key)
raise IamError("Invalid credentials")
self._clear_failed_attempts(access_key)
return self._build_principal(access_key, record)
_MAX_LOCKOUT_KEYS = 10000
def _record_failed_attempt(self, access_key: str) -> None:
if not access_key:
return
if access_key not in self._failed_attempts and len(self._failed_attempts) >= self._MAX_LOCKOUT_KEYS:
oldest_key = min(self._failed_attempts, key=lambda k: self._failed_attempts[k][0] if self._failed_attempts[k] else datetime.min.replace(tzinfo=timezone.utc))
del self._failed_attempts[oldest_key]
attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts)
attempts.append(datetime.now(timezone.utc))
self._save_lockout_state()
def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key:
return
if self._failed_attempts.pop(access_key, None) is not None:
self._save_lockout_state()
def _lockout_file(self) -> Path:
return self.config_path.parent / "lockout_state.json"
def _load_lockout_state(self) -> None:
"""Load lockout state from disk."""
try:
if self._lockout_file().exists():
data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
for key, timestamps in data.get("failed_attempts", {}).items():
valid = []
for ts in timestamps:
try:
dt = datetime.fromisoformat(ts)
if dt > cutoff:
valid.append(dt)
except (ValueError, TypeError):
continue
if valid:
self._failed_attempts[key] = deque(valid)
except (OSError, json.JSONDecodeError):
pass
def _save_lockout_state(self) -> None:
"""Persist lockout state to disk."""
data: Dict[str, Any] = {"failed_attempts": {}}
for key, attempts in self._failed_attempts.items():
data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
try:
self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
except OSError:
pass
def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
while attempts and attempts[0] < cutoff:
attempts.popleft()
def _is_locked_out(self, access_key: str) -> bool:
if not access_key:
return False
attempts = self._failed_attempts.get(access_key)
if not attempts:
return False
self._prune_attempts(attempts)
return len(attempts) >= self.auth_max_attempts
def _seconds_until_unlock(self, access_key: str) -> int:
attempts = self._failed_attempts.get(access_key)
if not attempts:
return 0
self._prune_attempts(attempts)
if len(attempts) < self.auth_max_attempts:
return 0
oldest = attempts[0]
elapsed = (datetime.now(timezone.utc) - oldest).total_seconds()
return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
def create_session_token(self, access_key: str, duration_seconds: int = 3600) -> str:
"""Create a temporary session token for an access key."""
self._maybe_reload()
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
self._cleanup_expired_sessions()
token = secrets.token_urlsafe(32)
expires_at = time.time() + duration_seconds
self._sessions[token] = {
"access_key": access_key,
"expires_at": expires_at,
}
return token
def validate_session_token(self, access_key: str, session_token: str) -> bool:
"""Validate a session token for an access key (thread-safe, constant-time)."""
dummy_key = secrets.token_urlsafe(16)
dummy_token = secrets.token_urlsafe(32)
with self._session_lock:
session = self._sessions.get(session_token)
if not session:
hmac.compare_digest(access_key, dummy_key)
hmac.compare_digest(session_token, dummy_token)
return False
key_match = hmac.compare_digest(session["access_key"], access_key)
if not key_match:
hmac.compare_digest(session_token, dummy_token)
return False
if time.time() > session["expires_at"]:
self._sessions.pop(session_token, None)
return False
return True
def _cleanup_expired_sessions(self) -> None:
"""Remove expired session tokens."""
now = time.time()
expired = [token for token, data in self._sessions.items() if now > data["expires_at"]]
for token in expired:
del self._sessions[token]
def principal_for_key(self, access_key: str) -> Principal:
now = time.time()
cached = self._principal_cache.get(access_key)
if cached:
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload()
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now)
return principal
def secret_for_key(self, access_key: str) -> str:
self._maybe_reload()
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
return record["secret_key"]
def authorize(self, principal: Principal, bucket_name: str | None, action: str) -> None:
action = self._normalize_action(action)
if action not in ALLOWED_ACTIONS:
raise IamError(f"Unknown action '{action}'")
bucket_name = bucket_name or "*"
normalized = bucket_name.lower() if bucket_name != "*" else bucket_name
if not self._is_allowed(principal, normalized, action):
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
self._maybe_reload()
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
normalized_actions = {a: self._normalize_action(a) for a in actions}
results: Dict[str, bool] = {}
for original, canonical in normalized_actions.items():
if canonical not in ALLOWED_ACTIONS:
results[original] = False
else:
results[original] = self._is_allowed(principal, bucket_name, canonical)
return results
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
def _is_allowed(self, principal: Principal, bucket_name: str, action: str) -> bool:
bucket_name = bucket_name.lower()
for policy in principal.policies:
if policy.bucket not in {"*", bucket_name}:
continue
if "*" in policy.actions or action in policy.actions:
return True
if "iam:*" in policy.actions and action.startswith("iam:"):
return True
return False
def list_users(self) -> List[Dict[str, Any]]:
listing: List[Dict[str, Any]] = []
for access_key, record in self._users.items():
listing.append(
{
"access_key": access_key,
"display_name": record["display_name"],
"policies": [
{"bucket": policy.bucket, "actions": sorted(policy.actions)}
for policy in record["policies"]
],
}
)
return listing
def create_user(
self,
*,
display_name: str,
policies: Optional[Sequence[Dict[str, Any]]] = None,
access_key: str | None = None,
secret_key: str | None = None,
) -> Dict[str, str]:
access_key = (access_key or self._generate_access_key()).strip()
if not access_key:
raise IamError("Access key cannot be empty")
if access_key in self._users:
raise IamError("Access key already exists")
secret_key = secret_key or self._generate_secret_key()
sanitized_policies = self._prepare_policy_payload(policies)
record = {
"access_key": access_key,
"secret_key": secret_key,
"display_name": display_name or access_key,
"policies": sanitized_policies,
}
self._raw_config.setdefault("users", []).append(record)
self._save()
self._load()
return {"access_key": access_key, "secret_key": secret_key}
def rotate_secret(self, access_key: str) -> str:
user = self._get_raw_user(access_key)
new_secret = self._generate_secret_key()
user["secret_key"] = new_secret
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load()
return new_secret
def update_user(self, access_key: str, display_name: str) -> None:
user = self._get_raw_user(access_key)
user["display_name"] = display_name
self._save()
self._load()
def delete_user(self, access_key: str) -> None:
users = self._raw_config.get("users", [])
if len(users) <= 1:
raise IamError("Cannot delete the only user")
remaining = [user for user in users if user["access_key"] != access_key]
if len(remaining) == len(users):
raise IamError("User not found")
self._raw_config["users"] = remaining
self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
user = self._get_raw_user(access_key)
user["policies"] = self._prepare_policy_payload(policies)
self._save()
self._load()
def _load(self) -> None:
try:
self._last_load_time = self.config_path.stat().st_mtime
content = self.config_path.read_text(encoding='utf-8')
raw = json.loads(content)
except FileNotFoundError:
raise IamError(f"IAM config not found: {self.config_path}")
except json.JSONDecodeError as e:
raise IamError(f"Corrupted IAM config (invalid JSON): {e}")
except PermissionError as e:
raise IamError(f"Cannot read IAM config (permission denied): {e}")
except (OSError, ValueError) as e:
raise IamError(f"Failed to load IAM config: {e}")
users: Dict[str, Dict[str, Any]] = {}
for user in raw.get("users", []):
policies = self._build_policy_objects(user.get("policies", []))
users[user["access_key"]] = {
"secret_key": user["secret_key"],
"display_name": user.get("display_name", user["access_key"]),
"policies": policies,
}
if not users:
raise IamError("IAM configuration contains no users")
self._users = users
self._raw_config = {
"users": [
{
"access_key": entry["access_key"],
"secret_key": entry["secret_key"],
"display_name": entry.get("display_name", entry["access_key"]),
"policies": entry.get("policies", []),
}
for entry in raw.get("users", [])
]
}
def _save(self) -> None:
try:
temp_path = self.config_path.with_suffix('.json.tmp')
temp_path.write_text(json.dumps(self._raw_config, indent=2), encoding='utf-8')
temp_path.replace(self.config_path)
except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}")
def config_summary(self) -> Dict[str, Any]:
return {
"path": str(self.config_path),
"user_count": len(self._users),
"allowed_actions": sorted(ALLOWED_ACTIONS),
}
def export_config(self, mask_secrets: bool = True) -> Dict[str, Any]:
payload: Dict[str, Any] = {"users": []}
for user in self._raw_config.get("users", []):
record = dict(user)
if mask_secrets and "secret_key" in record:
record["secret_key"] = "••••••••••"
payload["users"].append(record)
return payload
def _build_policy_objects(self, policies: Sequence[Dict[str, Any]]) -> List[Policy]:
entries: List[Policy] = []
for policy in policies:
bucket = str(policy.get("bucket", "*")).lower()
raw_actions = policy.get("actions", [])
if isinstance(raw_actions, str):
raw_actions = [raw_actions]
action_set: Set[str] = set()
for action in raw_actions:
canonical = self._normalize_action(action)
if canonical == "*":
action_set = set(ALLOWED_ACTIONS)
break
if canonical:
action_set.add(canonical)
if action_set:
entries.append(Policy(bucket=bucket, actions=action_set))
return entries
def _prepare_policy_payload(self, policies: Optional[Sequence[Dict[str, Any]]]) -> List[Dict[str, Any]]:
if not policies:
policies = (
{
"bucket": "*",
"actions": ["list", "read", "write", "delete", "share", "policy"],
},
)
sanitized: List[Dict[str, Any]] = []
for policy in policies:
bucket = str(policy.get("bucket", "*")).lower()
raw_actions = policy.get("actions", [])
if isinstance(raw_actions, str):
raw_actions = [raw_actions]
action_set: Set[str] = set()
for action in raw_actions:
canonical = self._normalize_action(action)
if canonical == "*":
action_set = set(ALLOWED_ACTIONS)
break
if canonical:
action_set.add(canonical)
if not action_set:
continue
sanitized.append({"bucket": bucket, "actions": sorted(action_set)})
if not sanitized:
raise IamError("At least one policy with valid actions is required")
return sanitized
def _build_principal(self, access_key: str, record: Dict[str, Any]) -> Principal:
return Principal(
access_key=access_key,
display_name=record["display_name"],
policies=record["policies"],
)
def _normalize_action(self, action: str) -> str:
if not action:
return ""
lowered = action.strip().lower()
if lowered == "*":
return "*"
candidate = ACTION_ALIASES.get(lowered, lowered)
return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None:
access_key = secrets.token_hex(12)
secret_key = secrets.token_urlsafe(32)
default = {
"users": [
{
"access_key": access_key,
"secret_key": secret_key,
"display_name": "Local Admin",
"policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
],
}
]
}
self.config_path.write_text(json.dumps(default, indent=2))
print(f"\n{'='*60}")
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED")
print(f"{'='*60}")
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
print(f"Missed this? Check: {self.config_path}")
print(f"{'='*60}\n")
def _generate_access_key(self) -> str:
return secrets.token_hex(8)
def _generate_secret_key(self) -> str:
return secrets.token_urlsafe(24)
def _get_raw_user(self, access_key: str) -> Dict[str, Any]:
for user in self._raw_config.get("users", []):
if user["access_key"] == access_key:
return user
raise IamError("User not found")
def get_secret_key(self, access_key: str) -> str | None:
now = time.time()
cached = self._secret_key_cache.get(access_key)
if cached:
secret_key, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret_key
self._maybe_reload()
record = self._users.get(access_key)
if record:
secret_key = record["secret_key"]
self._secret_key_cache[access_key] = (secret_key, now)
return secret_key
return None
def get_principal(self, access_key: str) -> Principal | None:
now = time.time()
cached = self._principal_cache.get(access_key)
if cached:
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload()
record = self._users.get(access_key)
if record:
principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now)
return principal
return None

View File

@@ -5,7 +5,6 @@ import logging
import random
import threading
import time
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
@@ -139,8 +138,8 @@ class OperationMetricsCollector:
self.interval_seconds = interval_minutes * 60
self.retention_hours = retention_hours
self._lock = threading.Lock()
self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
self._by_method: Dict[str, OperationStats] = {}
self._by_endpoint: Dict[str, OperationStats] = {}
self._by_status_class: Dict[str, int] = {}
self._error_codes: Dict[str, int] = {}
self._totals = OperationStats()
@@ -212,8 +211,8 @@ class OperationMetricsCollector:
self._prune_old_snapshots()
self._save_history()
self._by_method = defaultdict(OperationStats)
self._by_endpoint = defaultdict(OperationStats)
self._by_method.clear()
self._by_endpoint.clear()
self._by_status_class.clear()
self._error_codes.clear()
self._totals = OperationStats()
@@ -233,7 +232,12 @@ class OperationMetricsCollector:
status_class = f"{status_code // 100}xx"
with self._lock:
if method not in self._by_method:
self._by_method[method] = OperationStats()
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
if endpoint_type not in self._by_endpoint:
self._by_endpoint[endpoint_type] = OperationStats()
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1

View File

@@ -19,10 +19,6 @@ from defusedxml.ElementTree import fromstring
try:
import myfsio_core as _rc
if not all(hasattr(_rc, f) for f in (
"verify_sigv4_signature", "derive_signing_key", "clear_signing_key_cache",
)):
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
_HAS_RUST = True
except ImportError:
_rc = None
@@ -89,9 +85,6 @@ def _bucket_policies() -> BucketPolicyStore:
def _build_policy_context() -> Dict[str, Any]:
cached = getattr(g, "_policy_context", None)
if cached is not None:
return cached
ctx: Dict[str, Any] = {}
if request.headers.get("Referer"):
ctx["aws:Referer"] = request.headers.get("Referer")
@@ -105,7 +98,6 @@ def _build_policy_context() -> Dict[str, Any]:
ctx["aws:SecureTransport"] = str(request.is_secure).lower()
if request.headers.get("User-Agent"):
ctx["aws:UserAgent"] = request.headers.get("User-Agent")
g._policy_context = ctx
return ctx
@@ -205,11 +197,6 @@ _SIGNING_KEY_CACHE_LOCK = threading.Lock()
_SIGNING_KEY_CACHE_TTL = 60.0
_SIGNING_KEY_CACHE_MAX_SIZE = 256
_SIGV4_HEADER_RE = re.compile(
r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)"
)
_SIGV4_REQUIRED_HEADERS = frozenset({'host', 'x-amz-date'})
def clear_signing_key_cache() -> None:
if _HAS_RUST:
@@ -268,7 +255,10 @@ def _get_canonical_uri(req: Any) -> str:
def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
match = _SIGV4_HEADER_RE.match(auth_header)
match = re.match(
r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)",
auth_header,
)
if not match:
return None
@@ -292,22 +282,24 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
if time_diff > tolerance:
raise IamError("Request timestamp too old or too far in the future")
required_headers = {'host', 'x-amz-date'}
signed_headers_set = set(signed_headers_str.split(';'))
if not _SIGV4_REQUIRED_HEADERS.issubset(signed_headers_set):
if not ({'host', 'date'}.issubset(signed_headers_set)):
if not required_headers.issubset(signed_headers_set):
if 'date' in signed_headers_set:
required_headers.remove('x-amz-date')
required_headers.add('date')
if not required_headers.issubset(signed_headers_set):
raise IamError("Required headers not signed")
canonical_uri = _get_canonical_uri(req)
payload_hash = req.headers.get("X-Amz-Content-Sha256") or "UNSIGNED-PAYLOAD"
payload_hash = req.headers.get("X-Amz-Content-Sha256")
if not payload_hash:
payload_hash = hashlib.sha256(req.get_data()).hexdigest()
if _HAS_RUST:
query_params = list(req.args.items(multi=True))
header_values = []
for h in signed_headers_str.split(";"):
val = req.headers.get(h) or ""
if h.lower() == "expect" and val == "":
val = "100-continue"
header_values.append((h, val))
header_values = [(h, req.headers.get(h) or "") for h in signed_headers_str.split(";")]
if not _rc.verify_sigv4_signature(
req.method, canonical_uri, query_params, signed_headers_str,
header_values, payload_hash, amz_date, date_stamp, region,
@@ -396,12 +388,7 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
if _HAS_RUST:
query_params = [(k, v) for k, v in req.args.items(multi=True) if k != "X-Amz-Signature"]
header_values = []
for h in signed_headers_str.split(";"):
val = req.headers.get(h) or ""
if h.lower() == "expect" and val == "":
val = "100-continue"
header_values.append((h, val))
header_values = [(h, req.headers.get(h) or "") for h in signed_headers_str.split(";")]
if not _rc.verify_sigv4_signature(
req.method, canonical_uri, query_params, signed_headers_str,
header_values, "UNSIGNED-PAYLOAD", amz_date, date_stamp, region,
@@ -499,7 +486,7 @@ def _authorize_action(principal: Principal | None, bucket_name: str | None, acti
iam_error: IamError | None = None
if principal is not None:
try:
_iam().authorize(principal, bucket_name, action, object_key=object_key)
_iam().authorize(principal, bucket_name, action)
iam_allowed = True
except IamError as exc:
iam_error = exc
@@ -534,6 +521,21 @@ def _authorize_action(principal: Principal | None, bucket_name: str | None, acti
raise iam_error or IamError("Access denied")
def _enforce_bucket_policy(principal: Principal | None, bucket_name: str | None, object_key: str | None, action: str) -> None:
if not bucket_name:
return
policy_context = _build_policy_context()
decision = _bucket_policies().evaluate(
principal.access_key if principal else None,
bucket_name,
object_key,
action,
policy_context,
)
if decision == "deny":
raise IamError("Access denied by bucket policy")
def _object_principal(action: str, bucket_name: str, object_key: str):
principal, error = _require_principal()
try:
@@ -542,7 +544,121 @@ def _object_principal(action: str, bucket_name: str, object_key: str):
except IamError as exc:
if not error:
return None, _error_response("AccessDenied", str(exc), 403)
if not _has_presign_params():
return None, error
try:
principal = _validate_presigned_request(action, bucket_name, object_key)
_enforce_bucket_policy(principal, bucket_name, object_key, action)
return principal, None
except IamError as exc:
return None, _error_response("AccessDenied", str(exc), 403)
def _has_presign_params() -> bool:
return bool(request.args.get("X-Amz-Algorithm"))
def _validate_presigned_request(action: str, bucket_name: str, object_key: str) -> Principal:
algorithm = request.args.get("X-Amz-Algorithm")
credential = request.args.get("X-Amz-Credential")
amz_date = request.args.get("X-Amz-Date")
signed_headers = request.args.get("X-Amz-SignedHeaders")
expires = request.args.get("X-Amz-Expires")
signature = request.args.get("X-Amz-Signature")
if not all([algorithm, credential, amz_date, signed_headers, expires, signature]):
raise IamError("Malformed presigned URL")
if algorithm != "AWS4-HMAC-SHA256":
raise IamError("Unsupported signing algorithm")
parts = credential.split("/")
if len(parts) != 5:
raise IamError("Invalid credential scope")
access_key, date_stamp, region, service, terminal = parts
if terminal != "aws4_request":
raise IamError("Invalid credential scope")
config_region = current_app.config["AWS_REGION"]
config_service = current_app.config["AWS_SERVICE"]
if region != config_region or service != config_service:
raise IamError("Credential scope mismatch")
try:
expiry = int(expires)
except ValueError as exc:
raise IamError("Invalid expiration") from exc
min_expiry = current_app.config.get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1)
max_expiry = current_app.config.get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800)
if expiry < min_expiry or expiry > max_expiry:
raise IamError(f"Expiration must be between {min_expiry} second(s) and {max_expiry} seconds")
try:
request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
except ValueError as exc:
raise IamError("Invalid X-Amz-Date") from exc
now = datetime.now(timezone.utc)
tolerance = timedelta(seconds=current_app.config.get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
if request_time > now + tolerance:
raise IamError("Request date is too far in the future")
if now > request_time + timedelta(seconds=expiry):
raise IamError("Presigned URL expired")
signed_headers_list = [header.strip().lower() for header in signed_headers.split(";") if header]
signed_headers_list.sort()
canonical_headers = _canonical_headers_from_request(signed_headers_list)
canonical_query = _canonical_query_from_request()
payload_hash = request.args.get("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD")
canonical_request = "\n".join(
[
request.method,
_canonical_uri(bucket_name, object_key),
canonical_query,
canonical_headers,
";".join(signed_headers_list),
payload_hash,
]
)
hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest()
scope = f"{date_stamp}/{region}/{service}/aws4_request"
string_to_sign = "\n".join([
"AWS4-HMAC-SHA256",
amz_date,
scope,
hashed_request,
])
secret = _iam().secret_for_key(access_key)
signing_key = _derive_signing_key(secret, date_stamp, region, service)
expected = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest()
if not hmac.compare_digest(expected, signature):
raise IamError("Signature mismatch")
return _iam().principal_for_key(access_key)
def _canonical_query_from_request() -> str:
parts = []
for key in sorted(request.args.keys()):
if key == "X-Amz-Signature":
continue
values = request.args.getlist(key)
encoded_key = quote(str(key), safe="-_.~")
for value in sorted(values):
encoded_value = quote(str(value), safe="-_.~")
parts.append(f"{encoded_key}={encoded_value}")
return "&".join(parts)
def _canonical_headers_from_request(headers: list[str]) -> str:
lines = []
for header in headers:
if header == "host":
api_base = current_app.config.get("API_BASE_URL")
if api_base:
value = urlparse(api_base).netloc
else:
value = request.host
else:
value = request.headers.get(header, "")
canonical_value = " ".join(value.strip().split()) if value else ""
lines.append(f"{header}:{canonical_value}")
return "\n".join(lines) + "\n"
def _canonical_uri(bucket_name: str, object_key: str | None) -> str:
@@ -608,8 +724,8 @@ def _generate_presigned_url(
host = parsed.netloc
scheme = parsed.scheme
else:
host = request.host
scheme = request.scheme or "http"
host = request.headers.get("X-Forwarded-Host", request.host)
scheme = request.headers.get("X-Forwarded-Proto", request.scheme or "http")
canonical_headers = f"host:{host}\n"
canonical_request = "\n".join(
@@ -882,7 +998,7 @@ def _render_encryption_document(config: dict[str, Any]) -> Element:
return root
def _stream_file(path, chunk_size: int = 1024 * 1024):
def _stream_file(path, chunk_size: int = 256 * 1024):
with path.open("rb") as handle:
while True:
chunk = handle.read(chunk_size)
@@ -901,73 +1017,17 @@ def _method_not_allowed(allowed: list[str]) -> Response:
return response
def _check_conditional_headers(etag: str, last_modified: float | None) -> Response | None:
from email.utils import parsedate_to_datetime
if_match = request.headers.get("If-Match")
if if_match:
if if_match.strip() != "*":
match_etags = [e.strip().strip('"') for e in if_match.split(",")]
if etag not in match_etags:
return Response(status=412)
if_unmodified = request.headers.get("If-Unmodified-Since")
if not if_match and if_unmodified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_unmodified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt > dt:
return Response(status=412)
except (TypeError, ValueError):
pass
if_none_match = request.headers.get("If-None-Match")
if if_none_match:
if if_none_match.strip() == "*":
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
none_match_etags = [e.strip().strip('"') for e in if_none_match.split(",")]
if etag in none_match_etags:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
if last_modified is not None:
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
if_modified = request.headers.get("If-Modified-Since")
if not if_none_match and if_modified and last_modified is not None:
try:
dt = parsedate_to_datetime(if_modified)
obj_dt = datetime.fromtimestamp(last_modified, timezone.utc)
if obj_dt <= dt:
resp = Response(status=304)
resp.headers["ETag"] = f'"{etag}"'
resp.headers["Last-Modified"] = http_date(last_modified)
return resp
except (TypeError, ValueError):
pass
return None
def _apply_object_headers(
response: Response,
*,
file_stat,
metadata: Dict[str, str] | None,
etag: str,
size_override: int | None = None,
mtime_override: float | None = None,
) -> None:
effective_size = size_override if size_override is not None else (file_stat.st_size if file_stat is not None else None)
effective_mtime = mtime_override if mtime_override is not None else (file_stat.st_mtime if file_stat is not None else None)
if effective_size is not None and response.status_code != 206:
response.headers["Content-Length"] = str(effective_size)
if effective_mtime is not None:
response.headers["Last-Modified"] = http_date(effective_mtime)
if file_stat is not None:
if response.status_code != 206:
response.headers["Content-Length"] = str(file_stat.st_size)
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
response.headers["ETag"] = f'"{etag}"'
response.headers["Accept-Ranges"] = "bytes"
for key, value in (metadata or {}).items():
@@ -1017,7 +1077,7 @@ def _bucket_versioning_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "versioning")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -1064,7 +1124,7 @@ def _bucket_tagging_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "tagging")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -1229,7 +1289,7 @@ def _bucket_cors_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "cors")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -1282,7 +1342,7 @@ def _bucket_encryption_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "encryption")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -1357,7 +1417,7 @@ def _bucket_acl_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "share")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -1600,12 +1660,12 @@ def _bucket_lifecycle_handler(bucket_name: str) -> Response:
"""Handle bucket lifecycle configuration (GET/PUT/DELETE /<bucket>?lifecycle)."""
if request.method not in {"GET", "PUT", "DELETE"}:
return _method_not_allowed(["GET", "PUT", "DELETE"])
principal, error = _require_principal()
if error:
return error
try:
_authorize_action(principal, bucket_name, "lifecycle")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -1764,12 +1824,12 @@ def _bucket_quota_handler(bucket_name: str) -> Response:
"""Handle bucket quota configuration (GET/PUT/DELETE /<bucket>?quota)."""
if request.method not in {"GET", "PUT", "DELETE"}:
return _method_not_allowed(["GET", "PUT", "DELETE"])
principal, error = _require_principal()
if error:
return error
try:
_authorize_action(principal, bucket_name, "quota")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -1846,7 +1906,7 @@ def _bucket_object_lock_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "object_lock")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -1892,7 +1952,7 @@ def _bucket_notification_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "notification")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -1988,7 +2048,7 @@ def _bucket_logging_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "logging")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -2130,7 +2190,7 @@ def _object_retention_handler(bucket_name: str, object_key: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
_authorize_action(principal, bucket_name, "write" if request.method == "PUT" else "read", object_key=object_key)
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -2206,7 +2266,7 @@ def _object_legal_hold_handler(bucket_name: str, object_key: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "object_lock", object_key=object_key)
_authorize_action(principal, bucket_name, "write" if request.method == "PUT" else "read", object_key=object_key)
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
@@ -2539,7 +2599,7 @@ def bucket_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "create_bucket")
_authorize_action(principal, bucket_name, "write")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
try:
@@ -2556,7 +2616,7 @@ def bucket_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "delete_bucket")
_authorize_action(principal, bucket_name, "delete")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
try:
@@ -2762,8 +2822,6 @@ def object_handler(bucket_name: str, object_key: str):
if validation_error:
return _error_response("InvalidArgument", validation_error, 400)
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try:
meta = storage.put_object(
bucket_name,
@@ -2778,23 +2836,10 @@ def object_handler(bucket_name: str, object_key: str):
if "Bucket" in message:
return _error_response("NoSuchBucket", message, 404)
return _error_response("InvalidArgument", message, 400)
content_md5 = request.headers.get("Content-MD5")
if content_md5 and meta.etag:
try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
storage.delete_object(bucket_name, object_key)
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != meta.etag:
storage.delete_object(bucket_name, object_key)
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info(
"Object uploaded",
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
)
current_app.logger.info(
"Object uploaded",
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
)
response = Response(status=200)
if meta.etag:
response.headers["ETag"] = f'"{meta.etag}"'
@@ -2828,30 +2873,10 @@ def object_handler(bucket_name: str, object_key: str):
except StorageError as exc:
return _error_response("NoSuchKey", str(exc), 404)
metadata = storage.get_object_metadata(bucket_name, object_key)
mimetype = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
mimetype = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
is_encrypted = "x-amz-server-side-encryption" in metadata
cond_etag = metadata.get("__etag__")
_etag_was_healed = False
if not cond_etag and not is_encrypted:
try:
cond_etag = storage._compute_etag(path)
_etag_was_healed = True
storage.heal_missing_etag(bucket_name, object_key, cond_etag)
except OSError:
cond_etag = None
if cond_etag:
cond_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
if cond_mtime is None:
try:
cond_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(cond_etag, cond_mtime)
if cond_resp:
return cond_resp
if request.method == "GET":
range_header = request.headers.get("Range")
@@ -2884,7 +2909,7 @@ def object_handler(bucket_name: str, object_key: str):
try:
stat = path.stat()
file_size = stat.st_size
etag = cond_etag or storage._compute_etag(path)
etag = metadata.get("__etag__") or storage._compute_etag(path)
except PermissionError:
return _error_response("AccessDenied", "Permission denied accessing object", 403)
except OSError as exc:
@@ -2932,7 +2957,7 @@ def object_handler(bucket_name: str, object_key: str):
try:
stat = path.stat()
response = Response(status=200)
etag = cond_etag or storage._compute_etag(path)
etag = metadata.get("__etag__") or storage._compute_etag(path)
except PermissionError:
return _error_response("AccessDenied", "Permission denied accessing object", 403)
except OSError as exc:
@@ -2940,7 +2965,10 @@ def object_handler(bucket_name: str, object_key: str):
response.headers["Content-Type"] = mimetype
logged_bytes = 0
file_stat = stat if not is_encrypted else None
try:
file_stat = path.stat() if not is_encrypted else None
except (PermissionError, OSError):
file_stat = None
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
if request.method == "GET":
@@ -2957,9 +2985,8 @@ def object_handler(bucket_name: str, object_key: str):
if value:
response.headers[header] = _sanitize_header_value(value)
if current_app.logger.isEnabledFor(logging.INFO):
action = "Object read" if request.method == "GET" else "Object head"
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
action = "Object read" if request.method == "GET" else "Object head"
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
return response
if "uploadId" in request.args:
@@ -2977,8 +3004,7 @@ def object_handler(bucket_name: str, object_key: str):
storage.delete_object(bucket_name, object_key)
lock_service.delete_object_lock_metadata(bucket_name, object_key)
if current_app.logger.isEnabledFor(logging.INFO):
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
principal, _ = _require_principal()
_notifications().emit_object_removed(
@@ -3114,7 +3140,7 @@ def _bucket_replication_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "replication")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -3197,7 +3223,7 @@ def _bucket_website_handler(bucket_name: str) -> Response:
if error:
return error
try:
_authorize_action(principal, bucket_name, "website")
_authorize_action(principal, bucket_name, "policy")
except IamError as exc:
return _error_response("AccessDenied", str(exc), 403)
storage = _storage()
@@ -3317,36 +3343,14 @@ def head_object(bucket_name: str, object_key: str) -> Response:
return error
try:
_authorize_action(principal, bucket_name, "read", object_key=object_key)
storage = _storage()
path = storage.get_object_path(bucket_name, object_key)
metadata = storage.get_object_metadata(bucket_name, object_key)
etag = metadata.get("__etag__")
if not etag:
etag = storage._compute_etag(path)
storage.heal_missing_etag(bucket_name, object_key, etag)
head_mtime = float(metadata["__last_modified__"]) if "__last_modified__" in metadata else None
if head_mtime is None:
try:
head_mtime = path.stat().st_mtime
except OSError:
pass
cond_resp = _check_conditional_headers(etag, head_mtime)
if cond_resp:
return cond_resp
cached_size = metadata.get("__size__")
cached_mtime = metadata.get("__last_modified__")
if cached_size is not None and cached_mtime is not None:
size_val = int(cached_size)
mtime_val = float(cached_mtime)
response = Response(status=200)
_apply_object_headers(response, file_stat=None, metadata=metadata, etag=etag, size_override=size_val, mtime_override=mtime_val)
else:
stat = path.stat()
response = Response(status=200)
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
response.headers["Content-Type"] = metadata.get("__content_type__") or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
path = _storage().get_object_path(bucket_name, object_key)
metadata = _storage().get_object_metadata(bucket_name, object_key)
stat = path.stat()
etag = metadata.get("__etag__") or _storage()._compute_etag(path)
response = Response(status=200)
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
return response
except (StorageError, FileNotFoundError):
return _error_response("NoSuchKey", "Object not found", 404)
@@ -3576,12 +3580,10 @@ def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
return error
metadata = _extract_request_metadata()
content_type = request.headers.get("Content-Type")
metadata["__content_type__"] = content_type or mimetypes.guess_type(object_key)[0] or "application/octet-stream"
try:
upload_id = _storage().initiate_multipart_upload(
bucket_name,
object_key,
bucket_name,
object_key,
metadata=metadata or None
)
except StorageError as exc:
@@ -3630,15 +3632,6 @@ def _upload_part(bucket_name: str, object_key: str) -> Response:
return _error_response("NoSuchUpload", str(exc), 404)
return _error_response("InvalidArgument", str(exc), 400)
content_md5 = request.headers.get("Content-MD5")
if content_md5 and etag:
try:
expected_md5 = base64.b64decode(content_md5).hex()
except Exception:
return _error_response("InvalidDigest", "Content-MD5 header is not valid base64", 400)
if expected_md5 != etag:
return _error_response("BadDigest", "The Content-MD5 you specified did not match what we received", 400)
response = Response(status=200)
response.headers["ETag"] = f'"{etag}"'
return response

View File

@@ -1,8 +1,8 @@
from __future__ import annotations
import copy
import hashlib
import json
import logging
import os
import re
import shutil
@@ -21,21 +21,12 @@ from typing import Any, BinaryIO, Dict, Generator, List, Optional
try:
import myfsio_core as _rc
if not all(hasattr(_rc, f) for f in (
"validate_bucket_name", "validate_object_key", "md5_file",
"shallow_scan", "bucket_stats_scan", "search_objects_scan",
"stream_to_file_with_md5", "assemble_parts_with_md5",
"build_object_cache", "read_index_entry", "write_index_entry",
"delete_index_entry", "check_bucket_contents",
)):
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
_HAS_RUST = True
except ImportError:
_rc = None
_HAS_RUST = False
logger = logging.getLogger(__name__)
# Platform-specific file locking
if os.name == "nt":
import msvcrt
@@ -200,15 +191,12 @@ class ObjectStorage:
object_cache_max_size: int = 100,
bucket_config_cache_ttl: float = 30.0,
object_key_max_length_bytes: int = 1024,
meta_read_cache_max: int = 2048,
) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
self._ensure_system_roots()
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict()
self._obj_cache_lock = threading.Lock()
self._meta_cache_lock = threading.Lock()
self._registry_lock = threading.Lock()
self._cache_lock = threading.Lock()
self._bucket_locks: Dict[str, threading.Lock] = {}
self._cache_version: Dict[str, int] = {}
self._bucket_config_cache: Dict[str, tuple[dict[str, Any], float]] = {}
@@ -219,20 +207,12 @@ class ObjectStorage:
self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {}
self._meta_index_locks: Dict[str, threading.Lock] = {}
self._meta_read_cache: OrderedDict[tuple, Optional[Dict[str, Any]]] = OrderedDict()
self._meta_read_cache_max = meta_read_cache_max
self._meta_read_cache_max = 2048
self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup")
self._stats_mem: Dict[str, Dict[str, int]] = {}
self._stats_serial: Dict[str, int] = {}
self._stats_mem_time: Dict[str, float] = {}
self._stats_lock = threading.Lock()
self._stats_dirty: set[str] = set()
self._stats_flush_timer: Optional[threading.Timer] = None
self._etag_index_dirty: set[str] = set()
self._etag_index_flush_timer: Optional[threading.Timer] = None
self._etag_index_mem: Dict[str, tuple[Dict[str, str], float]] = {}
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
with self._registry_lock:
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
with self._cache_lock:
if bucket_id not in self._bucket_locks:
self._bucket_locks[bucket_id] = threading.Lock()
return self._bucket_locks[bucket_id]
@@ -280,24 +260,26 @@ class ObjectStorage:
self._system_bucket_root(bucket_path.name).mkdir(parents=True, exist_ok=True)
def bucket_stats(self, bucket_name: str, cache_ttl: int = 60) -> dict[str, int]:
"""Return object count and total size for the bucket (cached).
Args:
bucket_name: Name of the bucket
cache_ttl: Cache time-to-live in seconds (default 60)
"""
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
with self._stats_lock:
if bucket_name in self._stats_mem:
cached_at = self._stats_mem_time.get(bucket_name, 0.0)
if (time.monotonic() - cached_at) < cache_ttl:
return dict(self._stats_mem[bucket_name])
self._stats_mem.pop(bucket_name, None)
self._stats_mem_time.pop(bucket_name, None)
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
cached_stats = None
cache_fresh = False
if cache_path.exists():
try:
cache_fresh = time.time() - cache_path.stat().st_mtime < cache_ttl
cached_stats = json.loads(cache_path.read_text(encoding="utf-8"))
if cache_fresh:
return cached_stats
except (OSError, json.JSONDecodeError):
pass
@@ -366,25 +348,16 @@ class ObjectStorage:
"_cache_serial": existing_serial,
}
with self._stats_lock:
self._stats_mem[bucket_name] = stats
self._stats_mem_time[bucket_name] = time.monotonic()
self._stats_serial[bucket_name] = existing_serial
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
self._atomic_write_json(cache_path, stats)
cache_path.write_text(json.dumps(stats), encoding="utf-8")
except OSError:
pass
return stats
def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
with self._stats_lock:
self._stats_mem.pop(bucket_id, None)
self._stats_mem_time.pop(bucket_id, None)
self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
self._stats_dirty.discard(bucket_id)
"""Invalidate the cached bucket statistics."""
cache_path = self._system_bucket_root(bucket_id) / "stats.json"
try:
cache_path.unlink(missing_ok=True)
@@ -400,56 +373,29 @@ class ObjectStorage:
version_bytes_delta: int = 0,
version_count_delta: int = 0,
) -> None:
with self._stats_lock:
if bucket_id not in self._stats_mem:
self._stats_mem[bucket_id] = {
"objects": 0, "bytes": 0, "version_count": 0,
"version_bytes": 0, "total_objects": 0, "total_bytes": 0,
"_cache_serial": 0,
}
data = self._stats_mem[bucket_id]
data["objects"] = max(0, data["objects"] + objects_delta)
data["bytes"] = max(0, data["bytes"] + bytes_delta)
data["version_count"] = max(0, data["version_count"] + version_count_delta)
data["version_bytes"] = max(0, data["version_bytes"] + version_bytes_delta)
data["total_objects"] = max(0, data["total_objects"] + objects_delta + version_count_delta)
data["total_bytes"] = max(0, data["total_bytes"] + bytes_delta + version_bytes_delta)
data["_cache_serial"] = data["_cache_serial"] + 1
self._stats_serial[bucket_id] = self._stats_serial.get(bucket_id, 0) + 1
self._stats_mem_time[bucket_id] = time.monotonic()
self._stats_dirty.add(bucket_id)
needs_immediate = data["objects"] == 0 and objects_delta < 0
if needs_immediate:
self._flush_stats()
else:
self._schedule_stats_flush()
"""Incrementally update cached bucket statistics instead of invalidating.
def _schedule_stats_flush(self) -> None:
if self._stats_flush_timer is None or not self._stats_flush_timer.is_alive():
self._stats_flush_timer = threading.Timer(3.0, self._flush_stats)
self._stats_flush_timer.daemon = True
self._stats_flush_timer.start()
def _flush_stats(self) -> None:
with self._stats_lock:
dirty = list(self._stats_dirty)
self._stats_dirty.clear()
snapshots = {b: dict(self._stats_mem[b]) for b in dirty if b in self._stats_mem}
for bucket_id, data in snapshots.items():
cache_path = self._system_bucket_root(bucket_id) / "stats.json"
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
self._atomic_write_json(cache_path, data, sync=False)
except OSError:
pass
def shutdown_stats(self) -> None:
if self._stats_flush_timer is not None:
self._stats_flush_timer.cancel()
self._flush_stats()
if self._etag_index_flush_timer is not None:
self._etag_index_flush_timer.cancel()
self._flush_etag_indexes()
This avoids expensive full directory scans on every PUT/DELETE by
adjusting the cached values directly. Also signals cross-process cache
invalidation by incrementing _cache_serial.
"""
cache_path = self._system_bucket_root(bucket_id) / "stats.json"
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
if cache_path.exists():
data = json.loads(cache_path.read_text(encoding="utf-8"))
else:
data = {"objects": 0, "bytes": 0, "version_count": 0, "version_bytes": 0, "total_objects": 0, "total_bytes": 0, "_cache_serial": 0}
data["objects"] = max(0, data.get("objects", 0) + objects_delta)
data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta)
data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta)
data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta)
data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta)
data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta)
data["_cache_serial"] = data.get("_cache_serial", 0) + 1
cache_path.write_text(json.dumps(data), encoding="utf-8")
except (OSError, json.JSONDecodeError):
pass
def delete_bucket(self, bucket_name: str) -> None:
bucket_path = self._bucket_path(bucket_name)
@@ -467,20 +413,13 @@ class ObjectStorage:
self._remove_tree(self._system_bucket_root(bucket_id))
self._remove_tree(self._multipart_bucket_root(bucket_id))
self._bucket_config_cache.pop(bucket_id, None)
with self._obj_cache_lock:
with self._cache_lock:
self._object_cache.pop(bucket_id, None)
self._cache_version.pop(bucket_id, None)
self._sorted_key_cache.pop(bucket_id, None)
with self._meta_cache_lock:
stale = [k for k in self._meta_read_cache if k[0] == bucket_id]
for k in stale:
del self._meta_read_cache[k]
with self._stats_lock:
self._stats_mem.pop(bucket_id, None)
self._stats_mem_time.pop(bucket_id, None)
self._stats_serial.pop(bucket_id, None)
self._stats_dirty.discard(bucket_id)
self._etag_index_dirty.discard(bucket_id)
def list_objects(
self,
@@ -614,7 +553,14 @@ class ObjectStorage:
is_truncated=False, next_continuation_token=None,
)
meta_cache: Dict[str, str] = self._get_etag_index(bucket_id)
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
meta_cache: Dict[str, str] = {}
if etag_index_path.exists():
try:
with open(etag_index_path, 'r', encoding='utf-8') as f:
meta_cache = json.load(f)
except (OSError, json.JSONDecodeError):
pass
entries_files: list[tuple[str, int, float, Optional[str]]] = []
entries_dirs: list[str] = []
@@ -719,73 +665,6 @@ class ObjectStorage:
next_continuation_token=next_token,
)
def iter_objects_shallow(
self,
bucket_name: str,
*,
prefix: str = "",
delimiter: str = "/",
) -> Generator[tuple[str, ObjectMeta | str], None, None]:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
raise BucketNotFoundError("Bucket does not exist")
bucket_id = bucket_path.name
target_dir = bucket_path
if prefix:
safe_prefix_path = Path(prefix.rstrip("/"))
if ".." in safe_prefix_path.parts:
return
target_dir = bucket_path / safe_prefix_path
try:
resolved = target_dir.resolve()
bucket_resolved = bucket_path.resolve()
if not str(resolved).startswith(str(bucket_resolved) + os.sep) and resolved != bucket_resolved:
return
except (OSError, ValueError):
return
if not target_dir.exists() or not target_dir.is_dir():
return
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
meta_cache: Dict[str, str] = {}
if etag_index_path.exists():
try:
with open(etag_index_path, 'r', encoding='utf-8') as f:
meta_cache = json.load(f)
except (OSError, json.JSONDecodeError):
pass
try:
with os.scandir(str(target_dir)) as it:
for entry in it:
name = entry.name
if name in self.INTERNAL_FOLDERS:
continue
if entry.is_dir(follow_symlinks=False):
yield ("folder", prefix + name + delimiter)
elif entry.is_file(follow_symlinks=False):
key = prefix + name
try:
st = entry.stat()
etag = meta_cache.get(key)
if etag is None:
safe_key = PurePosixPath(key)
meta = self._read_metadata(bucket_id, Path(safe_key))
etag = meta.get("__etag__") if meta else None
yield ("object", ObjectMeta(
key=key,
size=st.st_size,
last_modified=datetime.fromtimestamp(st.st_mtime, timezone.utc),
etag=etag,
metadata=None,
))
except OSError:
pass
except OSError:
return
def _shallow_via_full_scan(
self,
bucket_name: str,
@@ -955,102 +834,51 @@ class ObjectStorage:
is_overwrite = destination.exists()
existing_size = destination.stat().st_size if is_overwrite else 0
archived_version_size = 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
archived_version_size = existing_size
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
tmp_dir.mkdir(parents=True, exist_ok=True)
if _HAS_RUST:
tmp_path = None
try:
tmp_path_str, etag, new_size = _rc.stream_to_file_with_md5(
stream, str(tmp_dir)
)
tmp_path = Path(tmp_path_str)
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
if enforce_quota:
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
except BaseException:
if tmp_path:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
else:
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try:
checksum = hashlib.md5()
with tmp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
target.flush()
os.fsync(target.fileno())
new_size = tmp_path.stat().st_size
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
if enforce_quota:
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
etag = checksum.hexdigest()
except BaseException:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
tmp_path = tmp_dir / f"{uuid.uuid4().hex}.tmp"
try:
with _atomic_lock_file(lock_file_path):
archived_version_size = 0
if self._is_versioning_enabled(bucket_path) and is_overwrite:
archived_version_size = existing_size
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
checksum = hashlib.md5()
with tmp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
new_size = tmp_path.stat().st_size
size_delta = new_size - existing_size
object_delta = 0 if is_overwrite else 1
shutil.move(str(tmp_path), str(destination))
tmp_path = None
if enforce_quota:
quota_check = self.check_quota(
bucket_name,
additional_bytes=max(0, size_delta),
additional_objects=object_delta,
)
if not quota_check["allowed"]:
raise QuotaExceededError(
quota_check["message"] or "Quota exceeded",
quota_check["quota"],
quota_check["usage"],
)
stat = destination.stat()
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
except BlockingIOError:
shutil.move(str(tmp_path), str(destination))
finally:
try:
if tmp_path:
tmp_path.unlink(missing_ok=True)
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise StorageError("Another upload to this key is in progress")
finally:
if tmp_path:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
stat = destination.stat()
etag = checksum.hexdigest()
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
self._update_bucket_stats_cache(
bucket_id,
@@ -1084,30 +912,6 @@ class ObjectStorage:
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
return self._read_metadata(bucket_path.name, safe_key) or {}
def heal_missing_etag(self, bucket_name: str, object_key: str, etag: str) -> None:
"""Persist a computed ETag back to metadata (self-heal on read)."""
try:
bucket_path = self._bucket_path(bucket_name)
if not bucket_path.exists():
return
bucket_id = bucket_path.name
safe_key = self._sanitize_object_key(object_key, self._object_key_max_length_bytes)
existing = self._read_metadata(bucket_id, safe_key) or {}
if existing.get("__etag__"):
return
existing["__etag__"] = etag
self._write_metadata(bucket_id, safe_key, existing)
with self._obj_cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
obj = cached[0].get(safe_key.as_posix())
if obj and not obj.etag:
obj.etag = etag
self._etag_index_dirty.add(bucket_id)
self._schedule_etag_index_flush()
except Exception:
logger.warning("Failed to heal missing ETag for %s/%s", bucket_name, object_key)
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
"""Remove empty parent directories in a background thread.
@@ -1661,22 +1465,14 @@ class ObjectStorage:
if not upload_root.exists():
raise StorageError("Multipart upload not found")
checksum = hashlib.md5()
part_filename = f"part-{part_number:05d}.part"
part_path = upload_root / part_filename
temp_path = upload_root / f".{part_filename}.tmp"
try:
if _HAS_RUST:
with temp_path.open("wb") as target:
shutil.copyfileobj(stream, target)
part_etag = _rc.md5_file(str(temp_path))
else:
checksum = hashlib.md5()
with temp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
target.flush()
os.fsync(target.fileno())
part_etag = checksum.hexdigest()
with temp_path.open("wb") as target:
shutil.copyfileobj(_HashingReader(stream, checksum), target)
temp_path.replace(part_path)
except OSError:
try:
@@ -1686,7 +1482,7 @@ class ObjectStorage:
raise
record = {
"etag": part_etag,
"etag": checksum.hexdigest(),
"size": part_path.stat().st_size,
"filename": part_filename,
}
@@ -1709,7 +1505,7 @@ class ObjectStorage:
parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record
self._atomic_write_json(manifest_path, manifest)
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
break
except OSError as exc:
if attempt < max_retries - 1:
@@ -1802,7 +1598,7 @@ class ObjectStorage:
parts = manifest.setdefault("parts", {})
parts[str(part_number)] = record
self._atomic_write_json(manifest_path, manifest)
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
break
except OSError as exc:
if attempt < max_retries - 1:
@@ -1886,31 +1682,19 @@ class ObjectStorage:
if versioning_enabled and destination.exists():
archived_version_size = destination.stat().st_size
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
if _HAS_RUST:
part_paths = []
checksum = hashlib.md5()
with destination.open("wb") as target:
for _, record in validated:
pp = upload_root / record["filename"]
if not pp.exists():
part_path = upload_root / record["filename"]
if not part_path.exists():
raise StorageError(f"Missing part file {record['filename']}")
part_paths.append(str(pp))
checksum_hex = _rc.assemble_parts_with_md5(part_paths, str(destination))
else:
checksum = hashlib.md5()
with destination.open("wb") as target:
for _, record in validated:
part_path = upload_root / record["filename"]
if not part_path.exists():
raise StorageError(f"Missing part file {record['filename']}")
with part_path.open("rb") as chunk:
while True:
data = chunk.read(1024 * 1024)
if not data:
break
checksum.update(data)
target.write(data)
target.flush()
os.fsync(target.fileno())
checksum_hex = checksum.hexdigest()
with part_path.open("rb") as chunk:
while True:
data = chunk.read(1024 * 1024)
if not data:
break
checksum.update(data)
target.write(data)
except BlockingIOError:
raise StorageError("Another upload to this key is in progress")
@@ -1925,10 +1709,10 @@ class ObjectStorage:
)
stat = destination.stat()
etag = checksum_hex
etag = checksum.hexdigest()
metadata = manifest.get("metadata")
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size), "__last_modified__": str(stat.st_mtime)}
internal_meta = {"__etag__": etag, "__size__": str(stat.st_size)}
combined_meta = {**internal_meta, **(metadata or {})}
self._write_metadata(bucket_id, safe_key, combined_meta)
@@ -2117,7 +1901,6 @@ class ObjectStorage:
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(raw["etag_cache"], f)
self._etag_index_mem[bucket_id] = (dict(raw["etag_cache"]), etag_index_path.stat().st_mtime)
except OSError:
pass
for key, size, mtime, etag in raw["objects"]:
@@ -2241,7 +2024,6 @@ class ObjectStorage:
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(meta_cache, f)
self._etag_index_mem[bucket_id] = (dict(meta_cache), etag_index_path.stat().st_mtime)
except OSError:
pass
@@ -2291,19 +2073,19 @@ class ObjectStorage:
now = time.time()
current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
with self._obj_cache_lock:
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp, cached_stats_mtime = cached
if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
self._object_cache.move_to_end(bucket_id)
return objects
cache_version = self._cache_version.get(bucket_id, 0)
bucket_lock = self._get_bucket_lock(bucket_id)
with bucket_lock:
now = time.time()
current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
with self._obj_cache_lock:
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp, cached_stats_mtime = cached
@@ -2314,23 +2096,31 @@ class ObjectStorage:
objects = self._build_object_cache(bucket_path)
new_stats_mtime = self._get_cache_marker_mtime(bucket_id)
with self._obj_cache_lock:
with self._cache_lock:
current_version = self._cache_version.get(bucket_id, 0)
if current_version != cache_version:
objects = self._build_object_cache(bucket_path)
new_stats_mtime = self._get_cache_marker_mtime(bucket_id)
while len(self._object_cache) >= self._object_cache_max_size:
self._object_cache.popitem(last=False)
self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime)
self._object_cache.move_to_end(bucket_id)
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
self._cache_version[bucket_id] = current_version + 1
self._sorted_key_cache.pop(bucket_id, None)
return objects
def _invalidate_object_cache(self, bucket_id: str) -> None:
with self._obj_cache_lock:
"""Invalidate the object cache and etag index for a bucket.
Increments version counter to signal stale reads.
Cross-process invalidation is handled by checking stats.json mtime.
"""
with self._cache_lock:
self._object_cache.pop(bucket_id, None)
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
self._etag_index_dirty.discard(bucket_id)
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
etag_index_path.unlink(missing_ok=True)
@@ -2338,10 +2128,22 @@ class ObjectStorage:
pass
def _get_cache_marker_mtime(self, bucket_id: str) -> float:
return float(self._stats_serial.get(bucket_id, 0))
"""Get a cache marker combining serial and object count for cross-process invalidation.
Returns a combined value that changes if either _cache_serial or object count changes.
This handles cases where the serial was reset but object count differs.
"""
stats_path = self._system_bucket_root(bucket_id) / "stats.json"
try:
data = json.loads(stats_path.read_text(encoding="utf-8"))
serial = data.get("_cache_serial", 0)
count = data.get("objects", 0)
return float(serial * 1000000 + count)
except (OSError, json.JSONDecodeError):
return 0
def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
with self._obj_cache_lock:
with self._cache_lock:
cached = self._object_cache.get(bucket_id)
if cached:
objects, timestamp, stats_mtime = cached
@@ -2352,50 +2154,23 @@ class ObjectStorage:
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
self._sorted_key_cache.pop(bucket_id, None)
self._etag_index_dirty.add(bucket_id)
self._schedule_etag_index_flush()
self._update_etag_index(bucket_id, key, meta.etag if meta else None)
def _get_etag_index(self, bucket_id: str) -> Dict[str, str]:
def _update_etag_index(self, bucket_id: str, key: str, etag: Optional[str]) -> None:
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
current_mtime = etag_index_path.stat().st_mtime
except OSError:
return {}
cached = self._etag_index_mem.get(bucket_id)
if cached:
cache_dict, cached_mtime = cached
if current_mtime == cached_mtime:
return cache_dict
if not etag_index_path.exists():
return
try:
with open(etag_index_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self._etag_index_mem[bucket_id] = (data, current_mtime)
return data
index = json.load(f)
if etag is None:
index.pop(key, None)
else:
index[key] = etag
with open(etag_index_path, 'w', encoding='utf-8') as f:
json.dump(index, f)
except (OSError, json.JSONDecodeError):
return {}
def _schedule_etag_index_flush(self) -> None:
if self._etag_index_flush_timer is None or not self._etag_index_flush_timer.is_alive():
self._etag_index_flush_timer = threading.Timer(5.0, self._flush_etag_indexes)
self._etag_index_flush_timer.daemon = True
self._etag_index_flush_timer.start()
def _flush_etag_indexes(self) -> None:
dirty = set(self._etag_index_dirty)
self._etag_index_dirty.clear()
for bucket_id in dirty:
with self._obj_cache_lock:
cached = self._object_cache.get(bucket_id)
if not cached:
continue
objects = cached[0]
index = {k: v.etag for k, v in objects.items() if v.etag}
etag_index_path = self._system_bucket_root(bucket_id) / "etag_index.json"
try:
self._atomic_write_json(etag_index_path, index, sync=False)
self._etag_index_mem[bucket_id] = (index, etag_index_path.stat().st_mtime)
except OSError:
logger.warning("Failed to flush etag index for bucket %s", bucket_id)
pass
def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
"""Pre-warm the object cache for specified buckets or all buckets.
@@ -2436,24 +2211,6 @@ class ObjectStorage:
):
path.mkdir(parents=True, exist_ok=True)
@staticmethod
def _atomic_write_json(path: Path, data: Any, *, sync: bool = True) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_suffix(".tmp")
try:
with tmp_path.open("w", encoding="utf-8") as f:
json.dump(data, f)
if sync:
f.flush()
os.fsync(f.fileno())
tmp_path.replace(path)
except BaseException:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
def _multipart_dir(self, bucket_name: str, upload_id: str) -> Path:
return self._multipart_bucket_root(bucket_name) / upload_id
@@ -2470,7 +2227,12 @@ class ObjectStorage:
if cached:
config, cached_time, cached_mtime = cached
if now - cached_time < self._bucket_config_cache_ttl:
return config.copy()
try:
current_mtime = config_path.stat().st_mtime if config_path.exists() else 0.0
except OSError:
current_mtime = 0.0
if current_mtime == cached_mtime:
return config.copy()
if not config_path.exists():
self._bucket_config_cache[bucket_name] = ({}, now, 0.0)
@@ -2488,7 +2250,7 @@ class ObjectStorage:
def _write_bucket_config(self, bucket_name: str, payload: dict[str, Any]) -> None:
config_path = self._bucket_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
self._atomic_write_json(config_path, payload)
config_path.write_text(json.dumps(payload), encoding="utf-8")
try:
mtime = config_path.stat().st_mtime
except OSError:
@@ -2522,7 +2284,8 @@ class ObjectStorage:
def _write_multipart_manifest(self, upload_root: Path, manifest: dict[str, Any]) -> None:
manifest_path = upload_root / self.MULTIPART_MANIFEST
self._atomic_write_json(manifest_path, manifest)
manifest_path.parent.mkdir(parents=True, exist_ok=True)
manifest_path.write_text(json.dumps(manifest), encoding="utf-8")
def _metadata_file(self, bucket_name: str, key: Path) -> Path:
meta_root = self._bucket_meta_root(bucket_name)
@@ -2538,19 +2301,19 @@ class ObjectStorage:
return meta_root / parent / "_index.json", entry_name
def _get_meta_index_lock(self, index_path: str) -> threading.Lock:
with self._registry_lock:
with self._cache_lock:
if index_path not in self._meta_index_locks:
self._meta_index_locks[index_path] = threading.Lock()
return self._meta_index_locks[index_path]
def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]:
cache_key = (bucket_name, str(key))
with self._meta_cache_lock:
with self._cache_lock:
hit = self._meta_read_cache.get(cache_key)
if hit is not None:
self._meta_read_cache.move_to_end(cache_key)
cached = hit[0]
return dict(cached) if cached is not None else None
return copy.deepcopy(cached) if cached is not None else None
index_path, entry_name = self._index_file_for_key(bucket_name, key)
if _HAS_RUST:
@@ -2565,16 +2328,16 @@ class ObjectStorage:
except (OSError, json.JSONDecodeError):
result = None
with self._meta_cache_lock:
with self._cache_lock:
while len(self._meta_read_cache) >= self._meta_read_cache_max:
self._meta_read_cache.popitem(last=False)
self._meta_read_cache[cache_key] = (dict(result) if result is not None else None,)
self._meta_read_cache[cache_key] = (copy.deepcopy(result) if result is not None else None,)
return result
def _invalidate_meta_read_cache(self, bucket_name: str, key: Path) -> None:
cache_key = (bucket_name, str(key))
with self._meta_cache_lock:
with self._cache_lock:
self._meta_read_cache.pop(cache_key, None)
def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None:
@@ -2592,7 +2355,7 @@ class ObjectStorage:
except (OSError, json.JSONDecodeError):
pass
index_data[entry_name] = entry
self._atomic_write_json(index_path, index_data)
index_path.write_text(json.dumps(index_data), encoding="utf-8")
self._invalidate_meta_read_cache(bucket_name, key)
def _delete_index_entry(self, bucket_name: str, key: Path) -> None:
@@ -2613,7 +2376,7 @@ class ObjectStorage:
if entry_name in index_data:
del index_data[entry_name]
if index_data:
self._atomic_write_json(index_path, index_data)
index_path.write_text(json.dumps(index_data), encoding="utf-8")
else:
try:
index_path.unlink()
@@ -2662,7 +2425,7 @@ class ObjectStorage:
"reason": reason,
}
manifest_path = version_dir / f"{version_id}.json"
self._atomic_write_json(manifest_path, record)
manifest_path.write_text(json.dumps(record), encoding="utf-8")
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
entry = self._read_index_entry(bucket_name, key)

View File

@@ -225,10 +225,10 @@ def _policy_allows_public_read(policy: dict[str, Any]) -> bool:
def _bucket_access_descriptor(policy: dict[str, Any] | None) -> tuple[str, str]:
if not policy:
return ("IAM only", "bg-secondary-subtle text-secondary-emphasis")
return ("IAM only", "text-bg-secondary")
if _policy_allows_public_read(policy):
return ("Public read", "bg-warning-subtle text-warning-emphasis")
return ("Custom policy", "bg-info-subtle text-info-emphasis")
return ("Public read", "text-bg-warning")
return ("Custom policy", "text-bg-info")
def _current_principal():
@@ -618,77 +618,20 @@ def stream_bucket_objects(bucket_name: str):
prefix = request.args.get("prefix") or None
delimiter = request.args.get("delimiter") or None
storage = _storage()
try:
versioning_enabled = storage.is_versioning_enabled(bucket_name)
except StorageError:
versioning_enabled = False
client = get_session_s3_client()
except (PermissionError, RuntimeError) as exc:
return jsonify({"error": str(exc)}), 403
versioning_enabled = get_versioning_via_s3(client, bucket_name)
url_templates = build_url_templates(bucket_name)
display_tz = current_app.config.get("DISPLAY_TIMEZONE", "UTC")
def generate():
yield json.dumps({
"type": "meta",
"versioning_enabled": versioning_enabled,
"url_templates": url_templates,
}) + "\n"
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
running_count = 0
try:
if delimiter:
for item_type, item in storage.iter_objects_shallow(
bucket_name, prefix=prefix or "", delimiter=delimiter,
):
if item_type == "folder":
yield json.dumps({"type": "folder", "prefix": item}) + "\n"
else:
last_mod = item.last_modified
yield json.dumps({
"type": "object",
"key": item.key,
"size": item.size,
"last_modified": last_mod.isoformat(),
"last_modified_display": _format_datetime_display(last_mod, display_tz),
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
"etag": item.etag or "",
}) + "\n"
running_count += 1
if running_count % 1000 == 0:
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
else:
continuation_token = None
while True:
result = storage.list_objects(
bucket_name,
max_keys=1000,
continuation_token=continuation_token,
prefix=prefix,
)
for obj in result.objects:
last_mod = obj.last_modified
yield json.dumps({
"type": "object",
"key": obj.key,
"size": obj.size,
"last_modified": last_mod.isoformat(),
"last_modified_display": _format_datetime_display(last_mod, display_tz),
"last_modified_iso": _format_datetime_iso(last_mod, display_tz),
"etag": obj.etag or "",
}) + "\n"
running_count += len(result.objects)
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
if not result.is_truncated:
break
continuation_token = result.next_continuation_token
except StorageError as exc:
yield json.dumps({"type": "error", "error": str(exc)}) + "\n"
return
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
yield json.dumps({"type": "done"}) + "\n"
return Response(
generate(),
stream_objects_ndjson(
client, bucket_name, prefix, url_templates, display_tz, versioning_enabled,
delimiter=delimiter,
),
mimetype='application/x-ndjson',
headers={
'Cache-Control': 'no-cache',
@@ -1063,27 +1006,6 @@ def bulk_delete_objects(bucket_name: str):
return _respond(False, f"A maximum of {MAX_KEYS} objects can be deleted per request", status_code=400)
unique_keys = list(dict.fromkeys(cleaned))
folder_prefixes = [k for k in unique_keys if k.endswith("/")]
if folder_prefixes:
try:
client = get_session_s3_client()
for prefix in folder_prefixes:
unique_keys.remove(prefix)
paginator = client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
for obj in page.get("Contents", []):
if obj["Key"] not in unique_keys:
unique_keys.append(obj["Key"])
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
if isinstance(exc, ClientError):
err, status = handle_client_error(exc)
return _respond(False, err["error"], status_code=status)
return _respond(False, "S3 API server is unreachable", status_code=502)
if not unique_keys:
return _respond(False, "No objects found under the selected folders", status_code=400)
try:
_authorize_ui(principal, bucket_name, "delete")
except IamError as exc:
@@ -1114,17 +1036,13 @@ def bulk_delete_objects(bucket_name: str):
else:
try:
client = get_session_s3_client()
deleted = []
errors = []
for i in range(0, len(unique_keys), 1000):
batch = unique_keys[i:i + 1000]
objects_to_delete = [{"Key": k} for k in batch]
resp = client.delete_objects(
Bucket=bucket_name,
Delete={"Objects": objects_to_delete, "Quiet": False},
)
deleted.extend(d["Key"] for d in resp.get("Deleted", []))
errors.extend({"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", []))
objects_to_delete = [{"Key": k} for k in unique_keys]
resp = client.delete_objects(
Bucket=bucket_name,
Delete={"Objects": objects_to_delete, "Quiet": False},
)
deleted = [d["Key"] for d in resp.get("Deleted", [])]
errors = [{"key": e["Key"], "error": e.get("Message", e.get("Code", "Unknown error"))} for e in resp.get("Errors", [])]
for key in deleted:
_replication_manager().trigger_replication(bucket_name, key, action="delete")
except (ClientError, EndpointConnectionError, ConnectionClosedError) as exc:
@@ -1836,10 +1754,6 @@ def iam_dashboard():
users = iam_service.list_users() if not locked else []
config_summary = iam_service.config_summary()
config_document = json.dumps(iam_service.export_config(mask_secrets=True), indent=2)
from datetime import datetime as _dt, timedelta as _td, timezone as _tz
_now = _dt.now(_tz.utc)
now_iso = _now.isoformat()
soon_iso = (_now + _td(days=7)).isoformat()
return render_template(
"iam.html",
users=users,
@@ -1849,8 +1763,6 @@ def iam_dashboard():
config_summary=config_summary,
config_document=config_document,
disclosed_secret=disclosed_secret,
now_iso=now_iso,
soon_iso=soon_iso,
)
@@ -1870,8 +1782,6 @@ def create_iam_user():
return jsonify({"error": "Display name must be 64 characters or fewer"}), 400
flash("Display name must be 64 characters or fewer", "danger")
return redirect(url_for("ui.iam_dashboard"))
custom_access_key = request.form.get("access_key", "").strip() or None
custom_secret_key = request.form.get("secret_key", "").strip() or None
policies_text = request.form.get("policies", "").strip()
policies = None
if policies_text:
@@ -1882,21 +1792,8 @@ def create_iam_user():
return jsonify({"error": f"Invalid JSON: {exc}"}), 400
flash(f"Invalid JSON: {exc}", "danger")
return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try:
created = _iam().create_user(display_name=display_name, policies=policies, access_key=custom_access_key, secret_key=custom_secret_key, expires_at=expires_at)
created = _iam().create_user(display_name=display_name, policies=policies)
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 400
@@ -2070,45 +1967,6 @@ def update_iam_policies(access_key: str):
return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/iam/users/<access_key>/expiry")
def update_iam_expiry(access_key: str):
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:update_policy")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 403
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
expires_at = request.form.get("expires_at", "").strip() or None
if expires_at:
try:
from datetime import datetime as _dt, timezone as _tz
exp_dt = _dt.fromisoformat(expires_at)
if exp_dt.tzinfo is None:
exp_dt = exp_dt.replace(tzinfo=_tz.utc)
expires_at = exp_dt.isoformat()
except (ValueError, TypeError):
if _wants_json():
return jsonify({"error": "Invalid expiry date format"}), 400
flash("Invalid expiry date format", "danger")
return redirect(url_for("ui.iam_dashboard"))
try:
_iam().update_user_expiry(access_key, expires_at)
if _wants_json():
return jsonify({"success": True, "message": f"Updated expiry for {access_key}", "expires_at": expires_at})
label = expires_at if expires_at else "never"
flash(f"Expiry for {access_key} set to {label}", "success")
except IamError as exc:
if _wants_json():
return jsonify({"error": str(exc)}), 400
flash(str(exc), "danger")
return redirect(url_for("ui.iam_dashboard"))
@ui_bp.post("/connections")
def create_connection():
principal = _current_principal()
@@ -4123,182 +3981,6 @@ def get_peer_sync_stats(site_id: str):
return jsonify(stats)
@ui_bp.get("/system")
def system_dashboard():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
flash("Access denied: System page requires admin permissions", "danger")
return redirect(url_for("ui.buckets_overview"))
import platform as _platform
import sys
from app.version import APP_VERSION
try:
import myfsio_core as _rc
has_rust = True
except ImportError:
has_rust = False
gc = current_app.extensions.get("gc")
gc_status = gc.get_status() if gc else {"enabled": False}
gc_history_records = []
if gc:
raw = gc.get_history(limit=10, offset=0)
for rec in raw:
r = rec.get("result", {})
total_freed = r.get("temp_bytes_freed", 0) + r.get("multipart_bytes_freed", 0) + r.get("orphaned_version_bytes_freed", 0)
rec["bytes_freed_display"] = _format_bytes(total_freed)
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
gc_history_records.append(rec)
checker = current_app.extensions.get("integrity")
integrity_status = checker.get_status() if checker else {"enabled": False}
integrity_history_records = []
if checker:
raw = checker.get_history(limit=10, offset=0)
for rec in raw:
rec["timestamp_display"] = _format_datetime_display(datetime.fromtimestamp(rec["timestamp"], tz=dt_timezone.utc))
integrity_history_records.append(rec)
features = [
{"label": "Encryption (SSE-S3)", "enabled": current_app.config.get("ENCRYPTION_ENABLED", False)},
{"label": "KMS", "enabled": current_app.config.get("KMS_ENABLED", False)},
{"label": "Versioning Lifecycle", "enabled": current_app.config.get("LIFECYCLE_ENABLED", False)},
{"label": "Metrics History", "enabled": current_app.config.get("METRICS_HISTORY_ENABLED", False)},
{"label": "Operation Metrics", "enabled": current_app.config.get("OPERATION_METRICS_ENABLED", False)},
{"label": "Site Sync", "enabled": current_app.config.get("SITE_SYNC_ENABLED", False)},
{"label": "Website Hosting", "enabled": current_app.config.get("WEBSITE_HOSTING_ENABLED", False)},
{"label": "Garbage Collection", "enabled": current_app.config.get("GC_ENABLED", False)},
{"label": "Integrity Scanner", "enabled": current_app.config.get("INTEGRITY_ENABLED", False)},
]
return render_template(
"system.html",
principal=principal,
app_version=APP_VERSION,
storage_root=current_app.config.get("STORAGE_ROOT", "./data"),
platform=_platform.platform(),
python_version=sys.version.split()[0],
has_rust=has_rust,
features=features,
gc_status=gc_status,
gc_history=gc_history_records,
integrity_status=integrity_status,
integrity_history=integrity_history_records,
display_timezone=current_app.config.get("DISPLAY_TIMEZONE", "UTC"),
)
@ui_bp.post("/system/gc/run")
def system_gc_run():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
gc = current_app.extensions.get("gc")
if not gc:
return jsonify({"error": "GC is not enabled"}), 400
payload = request.get_json(silent=True) or {}
started = gc.run_async(dry_run=payload.get("dry_run"))
if not started:
return jsonify({"error": "GC is already in progress"}), 409
return jsonify({"status": "started"})
@ui_bp.get("/system/gc/status")
def system_gc_status():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
gc = current_app.extensions.get("gc")
if not gc:
return jsonify({"error": "GC is not enabled"}), 400
return jsonify(gc.get_status())
@ui_bp.get("/system/gc/history")
def system_gc_history():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
gc = current_app.extensions.get("gc")
if not gc:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 10)), 200)
offset = int(request.args.get("offset", 0))
records = gc.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})
@ui_bp.post("/system/integrity/run")
def system_integrity_run():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
checker = current_app.extensions.get("integrity")
if not checker:
return jsonify({"error": "Integrity checker is not enabled"}), 400
payload = request.get_json(silent=True) or {}
started = checker.run_async(
auto_heal=payload.get("auto_heal"),
dry_run=payload.get("dry_run"),
)
if not started:
return jsonify({"error": "A scan is already in progress"}), 409
return jsonify({"status": "started"})
@ui_bp.get("/system/integrity/status")
def system_integrity_status():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
checker = current_app.extensions.get("integrity")
if not checker:
return jsonify({"error": "Integrity checker is not enabled"}), 400
return jsonify(checker.get_status())
@ui_bp.get("/system/integrity/history")
def system_integrity_history():
principal = _current_principal()
try:
_iam().authorize(principal, None, "iam:*")
except IamError:
return jsonify({"error": "Access denied"}), 403
checker = current_app.extensions.get("integrity")
if not checker:
return jsonify({"executions": []})
limit = min(int(request.args.get("limit", 10)), 200)
offset = int(request.args.get("offset", 0))
records = checker.get_history(limit=limit, offset=offset)
return jsonify({"executions": records})
@ui_bp.app_errorhandler(404)
def ui_not_found(error): # type: ignore[override]
prefix = ui_bp.url_prefix or ""

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
APP_VERSION = "0.4.3"
APP_VERSION = "0.3.3"
def get_version() -> str:

5
docker-entrypoint.sh Normal file
View File

@@ -0,0 +1,5 @@
#!/bin/sh
set -e
# Run both services using the python runner in production mode
exec python run.py --prod

2551
docs.md

File diff suppressed because it is too large Load Diff

View File

@@ -19,6 +19,3 @@ regex = "1"
lru = "0.14"
parking_lot = "0.12"
percent-encoding = "2"
aes-gcm = "0.10"
hkdf = "0.12"
uuid = { version = "1", features = ["v4"] }

View File

@@ -1,9 +1,7 @@
mod crypto;
mod hashing;
mod metadata;
mod sigv4;
mod storage;
mod streaming;
mod validation;
use pyo3::prelude::*;
@@ -40,12 +38,6 @@ mod myfsio_core {
m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
Ok(())
}
}

View File

@@ -125,7 +125,7 @@ pub fn delete_index_entry(py: Python<'_>, path: &str, entry_name: &str) -> PyRes
fs::write(&path_owned, serialized)
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
Ok(true)
Ok(false)
})
}

View File

@@ -1,596 +0,0 @@
from __future__ import annotations
import json
import logging
import os
import shutil
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class GCResult:
temp_files_deleted: int = 0
temp_bytes_freed: int = 0
multipart_uploads_deleted: int = 0
multipart_bytes_freed: int = 0
lock_files_deleted: int = 0
orphaned_metadata_deleted: int = 0
orphaned_versions_deleted: int = 0
orphaned_version_bytes_freed: int = 0
empty_dirs_removed: int = 0
errors: List[str] = field(default_factory=list)
execution_time_seconds: float = 0.0
def to_dict(self) -> dict:
return {
"temp_files_deleted": self.temp_files_deleted,
"temp_bytes_freed": self.temp_bytes_freed,
"multipart_uploads_deleted": self.multipart_uploads_deleted,
"multipart_bytes_freed": self.multipart_bytes_freed,
"lock_files_deleted": self.lock_files_deleted,
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
"orphaned_versions_deleted": self.orphaned_versions_deleted,
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
"empty_dirs_removed": self.empty_dirs_removed,
"errors": self.errors,
"execution_time_seconds": self.execution_time_seconds,
}
@property
def total_bytes_freed(self) -> int:
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
@property
def has_work(self) -> bool:
return (
self.temp_files_deleted > 0
or self.multipart_uploads_deleted > 0
or self.lock_files_deleted > 0
or self.orphaned_metadata_deleted > 0
or self.orphaned_versions_deleted > 0
or self.empty_dirs_removed > 0
)
@dataclass
class GCExecutionRecord:
timestamp: float
result: dict
dry_run: bool
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"result": self.result,
"dry_run": self.dry_run,
}
@classmethod
def from_dict(cls, data: dict) -> GCExecutionRecord:
return cls(
timestamp=data["timestamp"],
result=data["result"],
dry_run=data.get("dry_run", False),
)
class GCHistoryStore:
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
self.storage_root = storage_root
self.max_records = max_records
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
def load(self) -> List[GCExecutionRecord]:
path = self._get_path()
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error("Failed to load GC history: %s", e)
return []
def save(self, records: List[GCExecutionRecord]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save GC history: %s", e)
def add(self, record: GCExecutionRecord) -> None:
with self._lock:
records = self.load()
records.insert(0, record)
self.save(records)
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
return self.load()[offset : offset + limit]
def _dir_size(path: Path) -> int:
total = 0
try:
for f in path.rglob("*"):
if f.is_file():
try:
total += f.stat().st_size
except OSError:
pass
except OSError:
pass
return total
def _file_age_hours(path: Path) -> float:
try:
mtime = path.stat().st_mtime
return (time.time() - mtime) / 3600.0
except OSError:
return 0.0
class GarbageCollector:
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_TMP_DIR = "tmp"
SYSTEM_MULTIPART_DIR = "multipart"
SYSTEM_BUCKETS_DIR = "buckets"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
def __init__(
self,
storage_root: Path,
interval_hours: float = 6.0,
temp_file_max_age_hours: float = 24.0,
multipart_max_age_days: int = 7,
lock_file_max_age_hours: float = 1.0,
dry_run: bool = False,
max_history: int = 50,
io_throttle_ms: int = 10,
) -> None:
self.storage_root = Path(storage_root)
self.interval_seconds = interval_hours * 3600.0
self.temp_file_max_age_hours = temp_file_max_age_hours
self.multipart_max_age_days = multipart_max_age_days
self.lock_file_max_age_hours = lock_file_max_age_hours
self.dry_run = dry_run
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self._scanning = False
self._scan_start_time: Optional[float] = None
self._io_throttle = max(0, io_throttle_ms) / 1000.0
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
self.interval_seconds / 3600.0,
self.temp_file_max_age_hours,
self.multipart_max_age_days,
self.lock_file_max_age_hours,
self.dry_run,
)
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("GC stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
self._timer.daemon = True
self._timer.start()
def _run_cycle(self) -> None:
if self._shutdown:
return
try:
self.run_now()
except Exception as e:
logger.error("GC cycle failed: %s", e)
finally:
self._schedule_next()
def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
if not self._lock.acquire(blocking=False):
raise RuntimeError("GC is already in progress")
effective_dry_run = dry_run if dry_run is not None else self.dry_run
try:
self._scanning = True
self._scan_start_time = time.time()
start = self._scan_start_time
result = GCResult()
original_dry_run = self.dry_run
self.dry_run = effective_dry_run
try:
self._clean_temp_files(result)
self._clean_orphaned_multipart(result)
self._clean_stale_locks(result)
self._clean_orphaned_metadata(result)
self._clean_orphaned_versions(result)
self._clean_empty_dirs(result)
finally:
self.dry_run = original_dry_run
result.execution_time_seconds = time.time() - start
if result.has_work or result.errors:
logger.info(
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
result.execution_time_seconds,
result.temp_files_deleted,
result.temp_bytes_freed / (1024 * 1024),
result.multipart_uploads_deleted,
result.multipart_bytes_freed / (1024 * 1024),
result.lock_files_deleted,
result.orphaned_metadata_deleted,
result.orphaned_versions_deleted,
result.orphaned_version_bytes_freed / (1024 * 1024),
result.empty_dirs_removed,
len(result.errors),
" (dry run)" if effective_dry_run else "",
)
record = GCExecutionRecord(
timestamp=time.time(),
result=result.to_dict(),
dry_run=effective_dry_run,
)
self.history_store.add(record)
return result
finally:
self._scanning = False
self._scan_start_time = None
self._lock.release()
def run_async(self, dry_run: Optional[bool] = None) -> bool:
if self._scanning:
return False
t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
t.start()
return True
def _system_path(self) -> Path:
return self.storage_root / self.SYSTEM_ROOT
def _throttle(self) -> bool:
if self._shutdown:
return True
if self._io_throttle > 0:
time.sleep(self._io_throttle)
return self._shutdown
def _list_bucket_names(self) -> List[str]:
names = []
try:
for entry in self.storage_root.iterdir():
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
names.append(entry.name)
except OSError:
pass
return names
def _clean_temp_files(self, result: GCResult) -> None:
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
if not tmp_dir.exists():
return
try:
for entry in tmp_dir.iterdir():
if self._throttle():
return
if not entry.is_file():
continue
age = _file_age_hours(entry)
if age < self.temp_file_max_age_hours:
continue
try:
size = entry.stat().st_size
if not self.dry_run:
entry.unlink()
result.temp_files_deleted += 1
result.temp_bytes_freed += size
except OSError as e:
result.errors.append(f"temp file {entry.name}: {e}")
except OSError as e:
result.errors.append(f"scan tmp dir: {e}")
def _clean_orphaned_multipart(self, result: GCResult) -> None:
cutoff_hours = self.multipart_max_age_days * 24.0
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
if self._shutdown:
return
for multipart_root in (
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
self.storage_root / bucket_name / ".multipart",
):
if not multipart_root.exists():
continue
try:
for upload_dir in multipart_root.iterdir():
if self._throttle():
return
if not upload_dir.is_dir():
continue
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
except OSError as e:
result.errors.append(f"scan multipart {bucket_name}: {e}")
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
manifest_path = upload_dir / "manifest.json"
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
if age < cutoff_hours:
return
dir_bytes = _dir_size(upload_dir)
try:
if not self.dry_run:
shutil.rmtree(upload_dir, ignore_errors=True)
result.multipart_uploads_deleted += 1
result.multipart_bytes_freed += dir_bytes
except OSError as e:
result.errors.append(f"multipart {upload_dir.name}: {e}")
def _clean_stale_locks(self, result: GCResult) -> None:
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
if not buckets_root.exists():
return
try:
for bucket_dir in buckets_root.iterdir():
if self._shutdown:
return
if not bucket_dir.is_dir():
continue
locks_dir = bucket_dir / "locks"
if not locks_dir.exists():
continue
try:
for lock_file in locks_dir.iterdir():
if self._throttle():
return
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
continue
age = _file_age_hours(lock_file)
if age < self.lock_file_max_age_hours:
continue
try:
if not self.dry_run:
lock_file.unlink(missing_ok=True)
result.lock_files_deleted += 1
except OSError as e:
result.errors.append(f"lock {lock_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
except OSError as e:
result.errors.append(f"scan buckets for locks: {e}")
def _clean_orphaned_metadata(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
if self._shutdown:
return
legacy_meta = self.storage_root / bucket_name / ".meta"
if legacy_meta.exists():
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if new_meta.exists():
self._clean_index_metadata(bucket_name, new_meta, result)
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for meta_file in meta_root.rglob("*.meta.json"):
if self._throttle():
return
if not meta_file.is_file():
continue
try:
rel = meta_file.relative_to(meta_root)
object_key = rel.as_posix().removesuffix(".meta.json")
object_path = bucket_path / object_key
if not object_path.exists():
if not self.dry_run:
meta_file.unlink(missing_ok=True)
result.orphaned_metadata_deleted += 1
except (OSError, ValueError) as e:
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
except OSError as e:
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
bucket_path = self.storage_root / bucket_name
try:
for index_file in meta_root.rglob("_index.json"):
if self._throttle():
return
if not index_file.is_file():
continue
try:
with open(index_file, "r", encoding="utf-8") as f:
index_data = json.load(f)
except (OSError, json.JSONDecodeError):
continue
keys_to_remove = []
for key in index_data:
rel_dir = index_file.parent.relative_to(meta_root)
if rel_dir == Path("."):
full_key = key
else:
full_key = rel_dir.as_posix() + "/" + key
object_path = bucket_path / full_key
if not object_path.exists():
keys_to_remove.append(key)
if keys_to_remove:
if not self.dry_run:
for k in keys_to_remove:
index_data.pop(k, None)
if index_data:
try:
with open(index_file, "w", encoding="utf-8") as f:
json.dump(index_data, f)
except OSError as e:
result.errors.append(f"write index {bucket_name}: {e}")
continue
else:
try:
index_file.unlink(missing_ok=True)
except OSError:
pass
result.orphaned_metadata_deleted += len(keys_to_remove)
except OSError as e:
result.errors.append(f"scan index meta {bucket_name}: {e}")
def _clean_orphaned_versions(self, result: GCResult) -> None:
bucket_names = self._list_bucket_names()
for bucket_name in bucket_names:
if self._shutdown:
return
bucket_path = self.storage_root / bucket_name
for versions_root in (
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
self.storage_root / bucket_name / ".versions",
):
if not versions_root.exists():
continue
try:
for key_dir in versions_root.iterdir():
if self._throttle():
return
if not key_dir.is_dir():
continue
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
except OSError as e:
result.errors.append(f"scan versions {bucket_name}: {e}")
def _clean_versions_for_key(
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
) -> None:
try:
rel = key_dir.relative_to(versions_root)
except ValueError:
return
object_path = bucket_path / rel
if object_path.exists():
return
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
if not version_files:
return
for vf in version_files:
try:
size = vf.stat().st_size if vf.suffix == ".bin" else 0
if not self.dry_run:
vf.unlink(missing_ok=True)
if vf.suffix == ".bin":
result.orphaned_version_bytes_freed += size
result.orphaned_versions_deleted += 1
except OSError as e:
result.errors.append(f"version file {vf.name}: {e}")
def _clean_empty_dirs(self, result: GCResult) -> None:
targets = [
self._system_path() / self.SYSTEM_TMP_DIR,
self._system_path() / self.SYSTEM_MULTIPART_DIR,
self._system_path() / self.SYSTEM_BUCKETS_DIR,
]
for bucket_name in self._list_bucket_names():
targets.append(self.storage_root / bucket_name / ".meta")
targets.append(self.storage_root / bucket_name / ".versions")
targets.append(self.storage_root / bucket_name / ".multipart")
for root in targets:
if not root.exists():
continue
self._remove_empty_dirs_recursive(root, root, result)
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
if self._shutdown:
return False
if not path.is_dir():
return False
try:
children = list(path.iterdir())
except OSError:
return False
all_empty = True
for child in children:
if self._throttle():
return False
if child.is_dir():
if not self._remove_empty_dirs_recursive(child, stop_at, result):
all_empty = False
else:
all_empty = False
if all_empty and path != stop_at:
try:
if not self.dry_run:
path.rmdir()
result.empty_dirs_removed += 1
return True
except OSError:
return False
return all_empty
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
records = self.history_store.get_history(limit, offset)
return [r.to_dict() for r in records]
def get_status(self) -> dict:
status: Dict[str, Any] = {
"enabled": not self._shutdown or self._timer is not None,
"running": self._timer is not None and not self._shutdown,
"scanning": self._scanning,
"interval_hours": self.interval_seconds / 3600.0,
"temp_file_max_age_hours": self.temp_file_max_age_hours,
"multipart_max_age_days": self.multipart_max_age_days,
"lock_file_max_age_hours": self.lock_file_max_age_hours,
"dry_run": self.dry_run,
"io_throttle_ms": round(self._io_throttle * 1000),
}
if self._scanning and self._scan_start_time:
status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
return status

File diff suppressed because it is too large Load Diff

View File

@@ -1,995 +0,0 @@
from __future__ import annotations
import hashlib
import json
import logging
import os
import threading
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
try:
import myfsio_core as _rc
if not hasattr(_rc, "md5_file"):
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
_HAS_RUST = True
except ImportError:
_HAS_RUST = False
logger = logging.getLogger(__name__)
def _compute_etag(path: Path) -> str:
if _HAS_RUST:
return _rc.md5_file(str(path))
checksum = hashlib.md5()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(8192), b""):
checksum.update(chunk)
return checksum.hexdigest()
@dataclass
class IntegrityIssue:
issue_type: str
bucket: str
key: str
detail: str
healed: bool = False
heal_action: str = ""
def to_dict(self) -> dict:
return {
"issue_type": self.issue_type,
"bucket": self.bucket,
"key": self.key,
"detail": self.detail,
"healed": self.healed,
"heal_action": self.heal_action,
}
@dataclass
class IntegrityResult:
corrupted_objects: int = 0
orphaned_objects: int = 0
phantom_metadata: int = 0
stale_versions: int = 0
etag_cache_inconsistencies: int = 0
legacy_metadata_drifts: int = 0
issues_healed: int = 0
issues: List[IntegrityIssue] = field(default_factory=list)
errors: List[str] = field(default_factory=list)
objects_scanned: int = 0
buckets_scanned: int = 0
execution_time_seconds: float = 0.0
def to_dict(self) -> dict:
return {
"corrupted_objects": self.corrupted_objects,
"orphaned_objects": self.orphaned_objects,
"phantom_metadata": self.phantom_metadata,
"stale_versions": self.stale_versions,
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
"legacy_metadata_drifts": self.legacy_metadata_drifts,
"issues_healed": self.issues_healed,
"issues": [i.to_dict() for i in self.issues],
"errors": self.errors,
"objects_scanned": self.objects_scanned,
"buckets_scanned": self.buckets_scanned,
"execution_time_seconds": self.execution_time_seconds,
}
@property
def total_issues(self) -> int:
return (
self.corrupted_objects
+ self.orphaned_objects
+ self.phantom_metadata
+ self.stale_versions
+ self.etag_cache_inconsistencies
+ self.legacy_metadata_drifts
)
@property
def has_issues(self) -> bool:
return self.total_issues > 0
@dataclass
class IntegrityExecutionRecord:
timestamp: float
result: dict
dry_run: bool
auto_heal: bool
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"result": self.result,
"dry_run": self.dry_run,
"auto_heal": self.auto_heal,
}
@classmethod
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
return cls(
timestamp=data["timestamp"],
result=data["result"],
dry_run=data.get("dry_run", False),
auto_heal=data.get("auto_heal", False),
)
class IntegrityHistoryStore:
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
self.storage_root = storage_root
self.max_records = max_records
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
def load(self) -> List[IntegrityExecutionRecord]:
path = self._get_path()
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error("Failed to load integrity history: %s", e)
return []
def save(self, records: List[IntegrityExecutionRecord]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save integrity history: %s", e)
def add(self, record: IntegrityExecutionRecord) -> None:
with self._lock:
records = self.load()
records.insert(0, record)
self.save(records)
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
return self.load()[offset : offset + limit]
class IntegrityCursorStore:
def __init__(self, storage_root: Path) -> None:
self.storage_root = storage_root
self._lock = threading.Lock()
def _get_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
def load(self) -> Dict[str, Any]:
path = self._get_path()
if not path.exists():
return {"buckets": {}}
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data.get("buckets"), dict):
return {"buckets": {}}
return data
except (OSError, ValueError, KeyError):
return {"buckets": {}}
def save(self, data: Dict[str, Any]) -> None:
path = self._get_path()
path.parent.mkdir(parents=True, exist_ok=True)
try:
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error("Failed to save integrity cursor: %s", e)
def update_bucket(
self,
bucket_name: str,
timestamp: float,
last_key: Optional[str] = None,
completed: bool = False,
) -> None:
with self._lock:
data = self.load()
entry = data["buckets"].get(bucket_name, {})
if completed:
entry["last_scanned"] = timestamp
entry.pop("last_key", None)
entry["completed"] = True
else:
entry["last_scanned"] = timestamp
if last_key is not None:
entry["last_key"] = last_key
entry["completed"] = False
data["buckets"][bucket_name] = entry
self.save(data)
def clean_stale(self, existing_buckets: List[str]) -> None:
with self._lock:
data = self.load()
existing_set = set(existing_buckets)
stale_keys = [k for k in data["buckets"] if k not in existing_set]
if stale_keys:
for k in stale_keys:
del data["buckets"][k]
self.save(data)
def get_last_key(self, bucket_name: str) -> Optional[str]:
data = self.load()
entry = data.get("buckets", {}).get(bucket_name)
if entry is None:
return None
return entry.get("last_key")
def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
data = self.load()
buckets_info = data.get("buckets", {})
incomplete = []
complete = []
for name in bucket_names:
entry = buckets_info.get(name)
if entry is None:
incomplete.append((name, 0.0))
elif entry.get("last_key") is not None:
incomplete.append((name, entry.get("last_scanned", 0.0)))
else:
complete.append((name, entry.get("last_scanned", 0.0)))
incomplete.sort(key=lambda x: x[1])
complete.sort(key=lambda x: x[1])
return [n for n, _ in incomplete] + [n for n, _ in complete]
def get_info(self) -> Dict[str, Any]:
data = self.load()
buckets = data.get("buckets", {})
return {
"tracked_buckets": len(buckets),
"buckets": {
name: {
"last_scanned": info.get("last_scanned"),
"last_key": info.get("last_key"),
"completed": info.get("completed", False),
}
for name, info in buckets.items()
},
}
MAX_ISSUES = 500
class IntegrityChecker:
SYSTEM_ROOT = ".myfsio.sys"
SYSTEM_BUCKETS_DIR = "buckets"
BUCKET_META_DIR = "meta"
BUCKET_VERSIONS_DIR = "versions"
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
def __init__(
self,
storage_root: Path,
interval_hours: float = 24.0,
batch_size: int = 1000,
auto_heal: bool = False,
dry_run: bool = False,
max_history: int = 50,
io_throttle_ms: int = 10,
) -> None:
self.storage_root = Path(storage_root)
self.interval_seconds = interval_hours * 3600.0
self.batch_size = batch_size
self.auto_heal = auto_heal
self.dry_run = dry_run
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self._scanning = False
self._scan_start_time: Optional[float] = None
self._io_throttle = max(0, io_throttle_ms) / 1000.0
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
self.cursor_store = IntegrityCursorStore(self.storage_root)
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
self.interval_seconds / 3600.0,
self.batch_size,
self.auto_heal,
self.dry_run,
)
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("Integrity checker stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
self._timer.daemon = True
self._timer.start()
def _run_cycle(self) -> None:
if self._shutdown:
return
try:
self.run_now()
except Exception as e:
logger.error("Integrity check cycle failed: %s", e)
finally:
self._schedule_next()
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
if not self._lock.acquire(blocking=False):
raise RuntimeError("Integrity scan is already in progress")
try:
self._scanning = True
self._scan_start_time = time.time()
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
effective_dry_run = dry_run if dry_run is not None else self.dry_run
start = self._scan_start_time
result = IntegrityResult()
bucket_names = self._list_bucket_names()
self.cursor_store.clean_stale(bucket_names)
ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
for bucket_name in ordered_buckets:
if self._batch_exhausted(result):
break
result.buckets_scanned += 1
cursor_key = self.cursor_store.get_last_key(bucket_name)
key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None]
bucket_exhausted = self._batch_exhausted(result)
if bucket_exhausted and returned_keys:
self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys))
else:
self.cursor_store.update_bucket(bucket_name, time.time(), completed=True)
result.execution_time_seconds = time.time() - start
if result.has_issues or result.errors:
logger.info(
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
result.execution_time_seconds,
result.corrupted_objects,
result.orphaned_objects,
result.phantom_metadata,
result.stale_versions,
result.etag_cache_inconsistencies,
result.legacy_metadata_drifts,
result.issues_healed,
len(result.errors),
" (dry run)" if effective_dry_run else "",
)
record = IntegrityExecutionRecord(
timestamp=time.time(),
result=result.to_dict(),
dry_run=effective_dry_run,
auto_heal=effective_auto_heal,
)
self.history_store.add(record)
return result
finally:
self._scanning = False
self._scan_start_time = None
self._lock.release()
def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
if self._scanning:
return False
t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
t.start()
return True
def _system_path(self) -> Path:
return self.storage_root / self.SYSTEM_ROOT
def _list_bucket_names(self) -> List[str]:
names = []
try:
for entry in self.storage_root.iterdir():
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
names.append(entry.name)
except OSError:
pass
return names
def _throttle(self) -> bool:
if self._shutdown:
return True
if self._io_throttle > 0:
time.sleep(self._io_throttle)
return self._shutdown
def _batch_exhausted(self, result: IntegrityResult) -> bool:
return self._shutdown or result.objects_scanned >= self.batch_size
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
if len(result.issues) < MAX_ISSUES:
result.issues.append(issue)
def _collect_index_keys(
self, meta_root: Path, cursor_key: Optional[str] = None,
) -> Dict[str, Dict[str, Any]]:
all_keys: Dict[str, Dict[str, Any]] = {}
if not meta_root.exists():
return all_keys
try:
for index_file in meta_root.rglob("_index.json"):
if not index_file.is_file():
continue
rel_dir = index_file.parent.relative_to(meta_root)
dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix()
if cursor_key is not None and dir_prefix:
full_prefix = dir_prefix + "/"
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
continue
try:
index_data = json.loads(index_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
for key_name, entry in index_data.items():
full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name
if cursor_key is not None and full_key <= cursor_key:
continue
all_keys[full_key] = {
"entry": entry,
"index_file": index_file,
"key_name": key_name,
}
except OSError:
pass
return all_keys
def _walk_bucket_files_sorted(
self, bucket_path: Path, cursor_key: Optional[str] = None,
):
def _walk(dir_path: Path, prefix: str):
try:
entries = list(os.scandir(dir_path))
except OSError:
return
def _sort_key(e):
if e.is_dir(follow_symlinks=False):
return e.name + "/"
return e.name
entries.sort(key=_sort_key)
for entry in entries:
if entry.is_dir(follow_symlinks=False):
if not prefix and entry.name in self.INTERNAL_FOLDERS:
continue
new_prefix = (prefix + "/" + entry.name) if prefix else entry.name
if cursor_key is not None:
full_prefix = new_prefix + "/"
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
continue
yield from _walk(Path(entry.path), new_prefix)
elif entry.is_file(follow_symlinks=False):
full_key = (prefix + "/" + entry.name) if prefix else entry.name
if cursor_key is not None and full_key <= cursor_key:
continue
yield full_key
yield from _walk(bucket_path, "")
def _check_corrupted_objects(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
cursor_key: Optional[str] = None,
) -> Optional[str]:
if self._batch_exhausted(result):
return None
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return None
last_key = None
try:
all_keys = self._collect_index_keys(meta_root, cursor_key)
sorted_keys = sorted(all_keys.keys())
for full_key in sorted_keys:
if self._throttle():
return last_key
if self._batch_exhausted(result):
return last_key
info = all_keys[full_key]
entry = info["entry"]
index_file = info["index_file"]
key_name = info["key_name"]
object_path = bucket_path / full_key
if not object_path.exists():
continue
result.objects_scanned += 1
last_key = full_key
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
stored_etag = meta.get("__etag__")
if not stored_etag:
continue
try:
actual_etag = _compute_etag(object_path)
except OSError:
continue
if actual_etag != stored_etag:
result.corrupted_objects += 1
issue = IntegrityIssue(
issue_type="corrupted_object",
bucket=bucket_name,
key=full_key,
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
)
if auto_heal and not dry_run:
try:
stat = object_path.stat()
meta["__etag__"] = actual_etag
meta["__size__"] = str(stat.st_size)
meta["__last_modified__"] = str(stat.st_mtime)
try:
index_data = json.loads(index_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
index_data = {}
index_data[key_name] = {"metadata": meta}
self._atomic_write_index(index_file, index_data)
issue.healed = True
issue.heal_action = "updated etag in index"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check corrupted {bucket_name}: {e}")
return last_key
def _check_orphaned_objects(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
cursor_key: Optional[str] = None,
) -> Optional[str]:
if self._batch_exhausted(result):
return None
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
last_key = None
try:
for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key):
if self._throttle():
return last_key
if self._batch_exhausted(result):
return last_key
result.objects_scanned += 1
last_key = full_key
key_path = Path(full_key)
key_name = key_path.name
parent = key_path.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
has_entry = False
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
has_entry = key_name in index_data
except (OSError, json.JSONDecodeError):
pass
if not has_entry:
result.orphaned_objects += 1
issue = IntegrityIssue(
issue_type="orphaned_object",
bucket=bucket_name,
key=full_key,
detail="file exists without metadata entry",
)
if auto_heal and not dry_run:
try:
object_path = bucket_path / full_key
etag = _compute_etag(object_path)
stat = object_path.stat()
meta = {
"__etag__": etag,
"__size__": str(stat.st_size),
"__last_modified__": str(stat.st_mtime),
}
index_data = {}
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
pass
index_data[key_name] = {"metadata": meta}
self._atomic_write_index(index_path, index_data)
issue.healed = True
issue.heal_action = "created metadata entry"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check orphaned {bucket_name}: {e}")
return last_key
def _check_phantom_metadata(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
cursor_key: Optional[str] = None,
) -> Optional[str]:
if self._batch_exhausted(result):
return None
bucket_path = self.storage_root / bucket_name
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return None
last_key = None
try:
all_keys = self._collect_index_keys(meta_root, cursor_key)
sorted_keys = sorted(all_keys.keys())
heal_by_index: Dict[Path, List[str]] = {}
for full_key in sorted_keys:
if self._batch_exhausted(result):
break
result.objects_scanned += 1
last_key = full_key
object_path = bucket_path / full_key
if not object_path.exists():
result.phantom_metadata += 1
info = all_keys[full_key]
issue = IntegrityIssue(
issue_type="phantom_metadata",
bucket=bucket_name,
key=full_key,
detail="metadata entry without file on disk",
)
if auto_heal and not dry_run:
index_file = info["index_file"]
heal_by_index.setdefault(index_file, []).append(info["key_name"])
issue.healed = True
issue.heal_action = "removed stale index entry"
result.issues_healed += 1
self._add_issue(result, issue)
if heal_by_index and auto_heal and not dry_run:
for index_file, keys_to_remove in heal_by_index.items():
try:
index_data = json.loads(index_file.read_text(encoding="utf-8"))
for k in keys_to_remove:
index_data.pop(k, None)
if index_data:
self._atomic_write_index(index_file, index_data)
else:
index_file.unlink(missing_ok=True)
except OSError as e:
result.errors.append(f"heal phantom {bucket_name}: {e}")
except OSError as e:
result.errors.append(f"check phantom {bucket_name}: {e}")
return last_key
def _check_stale_versions(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
if self._batch_exhausted(result):
return
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
if not versions_root.exists():
return
try:
for key_dir in versions_root.rglob("*"):
if self._throttle():
return
if self._batch_exhausted(result):
return
if not key_dir.is_dir():
continue
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
json_files = {f.stem: f for f in key_dir.glob("*.json")}
for stem, bin_file in bin_files.items():
if self._batch_exhausted(result):
return
result.objects_scanned += 1
if stem not in json_files:
result.stale_versions += 1
issue = IntegrityIssue(
issue_type="stale_version",
bucket=bucket_name,
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
detail="version data without manifest",
)
if auto_heal and not dry_run:
try:
bin_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "removed orphaned version data"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal stale version {bin_file}: {e}")
self._add_issue(result, issue)
for stem, json_file in json_files.items():
if self._batch_exhausted(result):
return
result.objects_scanned += 1
if stem not in bin_files:
result.stale_versions += 1
issue = IntegrityIssue(
issue_type="stale_version",
bucket=bucket_name,
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
detail="version manifest without data",
)
if auto_heal and not dry_run:
try:
json_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "removed orphaned version manifest"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal stale version {json_file}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check stale versions {bucket_name}: {e}")
def _check_etag_cache(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
if self._batch_exhausted(result):
return
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
if not etag_index_path.exists():
return
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
if not meta_root.exists():
return
try:
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return
found_mismatch = False
for full_key, cached_etag in etag_cache.items():
if self._batch_exhausted(result):
break
result.objects_scanned += 1
key_path = Path(full_key)
key_name = key_path.name
parent = key_path.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
if not index_path.exists():
continue
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
entry = index_data.get(key_name)
if not entry:
continue
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
stored_etag = meta.get("__etag__")
if stored_etag and cached_etag != stored_etag:
result.etag_cache_inconsistencies += 1
found_mismatch = True
issue = IntegrityIssue(
issue_type="etag_cache_inconsistency",
bucket=bucket_name,
key=full_key,
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
)
self._add_issue(result, issue)
if found_mismatch and auto_heal and not dry_run:
try:
etag_index_path.unlink(missing_ok=True)
for issue in result.issues:
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
issue.healed = True
issue.heal_action = "deleted etag_index.json"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal etag cache {bucket_name}: {e}")
def _check_legacy_metadata(
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
) -> None:
if self._batch_exhausted(result):
return
legacy_meta_root = self.storage_root / bucket_name / ".meta"
if not legacy_meta_root.exists():
return
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
try:
for meta_file in legacy_meta_root.rglob("*.meta.json"):
if self._throttle():
return
if self._batch_exhausted(result):
return
if not meta_file.is_file():
continue
result.objects_scanned += 1
try:
rel = meta_file.relative_to(legacy_meta_root)
except ValueError:
continue
full_key = rel.as_posix().removesuffix(".meta.json")
key_path = Path(full_key)
key_name = key_path.name
parent = key_path.parent
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
try:
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
continue
index_entry = None
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
index_entry = index_data.get(key_name)
except (OSError, json.JSONDecodeError):
pass
if index_entry is None:
result.legacy_metadata_drifts += 1
issue = IntegrityIssue(
issue_type="legacy_metadata_drift",
bucket=bucket_name,
key=full_key,
detail="unmigrated legacy .meta.json",
)
if auto_heal and not dry_run:
try:
index_data = {}
if index_path.exists():
try:
index_data = json.loads(index_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
pass
index_data[key_name] = {"metadata": legacy_data}
self._atomic_write_index(index_path, index_data)
meta_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "migrated to index and deleted legacy file"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
else:
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
if legacy_data != index_meta:
result.legacy_metadata_drifts += 1
issue = IntegrityIssue(
issue_type="legacy_metadata_drift",
bucket=bucket_name,
key=full_key,
detail="legacy .meta.json differs from index entry",
)
if auto_heal and not dry_run:
try:
meta_file.unlink(missing_ok=True)
issue.healed = True
issue.heal_action = "deleted legacy file (index is authoritative)"
result.issues_healed += 1
except OSError as e:
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
self._add_issue(result, issue)
except OSError as e:
result.errors.append(f"check legacy meta {bucket_name}: {e}")
@staticmethod
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
index_path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = index_path.with_suffix(".tmp")
try:
with open(tmp_path, "w", encoding="utf-8") as f:
json.dump(data, f)
os.replace(str(tmp_path), str(index_path))
except BaseException:
try:
tmp_path.unlink(missing_ok=True)
except OSError:
pass
raise
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
records = self.history_store.get_history(limit, offset)
return [r.to_dict() for r in records]
def get_status(self) -> dict:
status: Dict[str, Any] = {
"enabled": not self._shutdown or self._timer is not None,
"running": self._timer is not None and not self._shutdown,
"scanning": self._scanning,
"interval_hours": self.interval_seconds / 3600.0,
"batch_size": self.batch_size,
"auto_heal": self.auto_heal,
"dry_run": self.dry_run,
"io_throttle_ms": round(self._io_throttle * 1000),
}
if self._scanning and self._scan_start_time is not None:
status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
status["cursor"] = self.cursor_store.get_info()
return status

View File

@@ -1,4 +0,0 @@
#!/bin/sh
set -e
exec python run.py --prod

View File

@@ -1,192 +0,0 @@
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
use hkdf::Hkdf;
use pyo3::exceptions::{PyIOError, PyValueError};
use pyo3::prelude::*;
use sha2::Sha256;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
const DEFAULT_CHUNK_SIZE: usize = 65536;
const HEADER_SIZE: usize = 4;
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
let mut filled = 0;
while filled < buf.len() {
match reader.read(&mut buf[filled..]) {
Ok(0) => break,
Ok(n) => filled += n,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
Ok(filled)
}
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
let mut okm = [0u8; 12];
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
.map_err(|e| format!("HKDF expand failed: {}", e))?;
Ok(okm)
}
#[pyfunction]
#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
pub fn encrypt_stream_chunked(
py: Python<'_>,
input_path: &str,
output_path: &str,
key: &[u8],
base_nonce: &[u8],
chunk_size: usize,
) -> PyResult<u32> {
if key.len() != 32 {
return Err(PyValueError::new_err(format!(
"Key must be 32 bytes, got {}",
key.len()
)));
}
if base_nonce.len() != 12 {
return Err(PyValueError::new_err(format!(
"Base nonce must be 12 bytes, got {}",
base_nonce.len()
)));
}
let chunk_size = if chunk_size == 0 {
DEFAULT_CHUNK_SIZE
} else {
chunk_size
};
let inp = input_path.to_owned();
let out = output_path.to_owned();
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
py.detach(move || {
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(&inp)
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
let mut outfile = File::create(&out)
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
outfile
.write_all(&[0u8; 4])
.map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
let mut buf = vec![0u8; chunk_size];
let mut chunk_index: u32 = 0;
loop {
let n = read_exact_chunk(&mut infile, &mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
if n == 0 {
break;
}
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
.map_err(|e| PyValueError::new_err(e))?;
let nonce = Nonce::from_slice(&nonce_bytes);
let encrypted = cipher
.encrypt(nonce, &buf[..n])
.map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
let size = encrypted.len() as u32;
outfile
.write_all(&size.to_be_bytes())
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
outfile
.write_all(&encrypted)
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
chunk_index += 1;
}
outfile
.seek(SeekFrom::Start(0))
.map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
outfile
.write_all(&chunk_index.to_be_bytes())
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
Ok(chunk_index)
})
}
#[pyfunction]
pub fn decrypt_stream_chunked(
py: Python<'_>,
input_path: &str,
output_path: &str,
key: &[u8],
base_nonce: &[u8],
) -> PyResult<u32> {
if key.len() != 32 {
return Err(PyValueError::new_err(format!(
"Key must be 32 bytes, got {}",
key.len()
)));
}
if base_nonce.len() != 12 {
return Err(PyValueError::new_err(format!(
"Base nonce must be 12 bytes, got {}",
base_nonce.len()
)));
}
let inp = input_path.to_owned();
let out = output_path.to_owned();
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
py.detach(move || {
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(&inp)
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
let mut outfile = File::create(&out)
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
let mut header = [0u8; HEADER_SIZE];
infile
.read_exact(&mut header)
.map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
let chunk_count = u32::from_be_bytes(header);
let mut size_buf = [0u8; HEADER_SIZE];
for chunk_index in 0..chunk_count {
infile
.read_exact(&mut size_buf)
.map_err(|e| {
PyIOError::new_err(format!(
"Failed to read chunk {} size: {}",
chunk_index, e
))
})?;
let chunk_size = u32::from_be_bytes(size_buf) as usize;
let mut encrypted = vec![0u8; chunk_size];
infile.read_exact(&mut encrypted).map_err(|e| {
PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
})?;
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
.map_err(|e| PyValueError::new_err(e))?;
let nonce = Nonce::from_slice(&nonce_bytes);
let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
})?;
outfile.write_all(&decrypted).map_err(|e| {
PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
})?;
}
Ok(chunk_count)
})
}

View File

@@ -1,112 +0,0 @@
use md5::{Digest, Md5};
use pyo3::exceptions::{PyIOError, PyValueError};
use pyo3::prelude::*;
use std::fs::{self, File};
use std::io::{Read, Write};
use uuid::Uuid;
const DEFAULT_CHUNK_SIZE: usize = 262144;
#[pyfunction]
#[pyo3(signature = (stream, tmp_dir, chunk_size=DEFAULT_CHUNK_SIZE))]
pub fn stream_to_file_with_md5(
py: Python<'_>,
stream: &Bound<'_, PyAny>,
tmp_dir: &str,
chunk_size: usize,
) -> PyResult<(String, String, u64)> {
let chunk_size = if chunk_size == 0 {
DEFAULT_CHUNK_SIZE
} else {
chunk_size
};
fs::create_dir_all(tmp_dir)
.map_err(|e| PyIOError::new_err(format!("Failed to create tmp dir: {}", e)))?;
let tmp_name = format!("{}.tmp", Uuid::new_v4().as_hyphenated());
let tmp_path_buf = std::path::PathBuf::from(tmp_dir).join(&tmp_name);
let tmp_path = tmp_path_buf.to_string_lossy().into_owned();
let mut file = File::create(&tmp_path)
.map_err(|e| PyIOError::new_err(format!("Failed to create temp file: {}", e)))?;
let mut hasher = Md5::new();
let mut total_bytes: u64 = 0;
let result: PyResult<()> = (|| {
loop {
let chunk: Vec<u8> = stream.call_method1("read", (chunk_size,))?.extract()?;
if chunk.is_empty() {
break;
}
hasher.update(&chunk);
file.write_all(&chunk)
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
total_bytes += chunk.len() as u64;
py.check_signals()?;
}
file.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(())
})();
if let Err(e) = result {
drop(file);
let _ = fs::remove_file(&tmp_path);
return Err(e);
}
drop(file);
let md5_hex = format!("{:x}", hasher.finalize());
Ok((tmp_path, md5_hex, total_bytes))
}
#[pyfunction]
pub fn assemble_parts_with_md5(
py: Python<'_>,
part_paths: Vec<String>,
dest_path: &str,
) -> PyResult<String> {
if part_paths.is_empty() {
return Err(PyValueError::new_err("No parts to assemble"));
}
let dest = dest_path.to_owned();
let parts = part_paths;
py.detach(move || {
if let Some(parent) = std::path::Path::new(&dest).parent() {
fs::create_dir_all(parent)
.map_err(|e| PyIOError::new_err(format!("Failed to create dest dir: {}", e)))?;
}
let mut target = File::create(&dest)
.map_err(|e| PyIOError::new_err(format!("Failed to create dest file: {}", e)))?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; 1024 * 1024];
for part_path in &parts {
let mut part = File::open(part_path)
.map_err(|e| PyIOError::new_err(format!("Failed to open part {}: {}", part_path, e)))?;
loop {
let n = part
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read part: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
target
.write_all(&buf[..n])
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
}
}
target.sync_all()
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
Ok(format!("{:x}", hasher.finalize()))
})
}

View File

@@ -1,308 +0,0 @@
"""Helper script to run the API server, UI server, or both."""
from __future__ import annotations
import argparse
import atexit
import os
import signal
import sys
import warnings
import multiprocessing
from multiprocessing import Process
from pathlib import Path
from dotenv import load_dotenv
for _env_file in [
Path("/opt/myfsio/myfsio.env"),
Path.cwd() / ".env",
Path.cwd() / "myfsio.env",
]:
if _env_file.exists():
load_dotenv(_env_file, override=True)
from typing import Optional
from app import create_api_app, create_ui_app
from app.config import AppConfig
from app.iam import IamService, IamError, ALLOWED_ACTIONS, _derive_fernet_key
from app.version import get_version
def _server_host() -> str:
"""Return the bind host for API and UI servers."""
return os.getenv("APP_HOST", "0.0.0.0")
def _is_debug_enabled() -> bool:
return os.getenv("FLASK_DEBUG", "0").lower() in ("1", "true", "yes")
def _is_frozen() -> bool:
"""Check if running as a compiled binary (PyInstaller/Nuitka)."""
return getattr(sys, 'frozen', False) or '__compiled__' in globals()
def _serve_granian(target: str, port: int, config: Optional[AppConfig] = None) -> None:
from granian import Granian
from granian.constants import Interfaces
from granian.http import HTTP1Settings
kwargs: dict = {
"target": target,
"address": _server_host(),
"port": port,
"interface": Interfaces.WSGI,
"factory": True,
"workers": 1,
}
if config:
kwargs["blocking_threads"] = config.server_threads
kwargs["backlog"] = config.server_backlog
kwargs["backpressure"] = config.server_connection_limit
kwargs["http1_settings"] = HTTP1Settings(
header_read_timeout=config.server_channel_timeout * 1000,
max_buffer_size=config.server_max_buffer_size,
)
else:
kwargs["http1_settings"] = HTTP1Settings(
max_buffer_size=1024 * 1024 * 128,
)
server = Granian(**kwargs)
server.serve()
def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
if prod:
_serve_granian("app:create_api_app", port, config)
else:
app = create_api_app()
debug = _is_debug_enabled()
if debug:
warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
app.run(host=_server_host(), port=port, debug=debug)
def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
if prod:
_serve_granian("app:create_ui_app", port, config)
else:
app = create_ui_app()
debug = _is_debug_enabled()
if debug:
warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
app.run(host=_server_host(), port=port, debug=debug)
def reset_credentials() -> None:
import json
import secrets
from cryptography.fernet import Fernet
config = AppConfig.from_env()
iam_path = config.iam_config_path
encryption_key = config.secret_key
access_key = os.environ.get("ADMIN_ACCESS_KEY", "").strip() or secrets.token_hex(12)
secret_key = os.environ.get("ADMIN_SECRET_KEY", "").strip() or secrets.token_urlsafe(32)
custom_keys = bool(os.environ.get("ADMIN_ACCESS_KEY", "").strip())
fernet = Fernet(_derive_fernet_key(encryption_key)) if encryption_key else None
raw_config = None
if iam_path.exists():
try:
raw_bytes = iam_path.read_bytes()
from app.iam import _IAM_ENCRYPTED_PREFIX
if raw_bytes.startswith(_IAM_ENCRYPTED_PREFIX):
if fernet:
try:
content = fernet.decrypt(raw_bytes[len(_IAM_ENCRYPTED_PREFIX):]).decode("utf-8")
raw_config = json.loads(content)
except Exception:
print("WARNING: Could not decrypt existing IAM config. Creating fresh config.")
else:
print("WARNING: IAM config is encrypted but no SECRET_KEY available. Creating fresh config.")
else:
try:
raw_config = json.loads(raw_bytes.decode("utf-8"))
except json.JSONDecodeError:
print("WARNING: Existing IAM config is corrupted. Creating fresh config.")
except OSError:
pass
if raw_config and raw_config.get("users"):
is_v2 = raw_config.get("version", 1) >= 2
admin_user = None
for user in raw_config["users"]:
policies = user.get("policies", [])
for p in policies:
actions = p.get("actions", [])
if "iam:*" in actions or "*" in actions:
admin_user = user
break
if admin_user:
break
if not admin_user:
admin_user = raw_config["users"][0]
if is_v2:
admin_keys = admin_user.get("access_keys", [])
if admin_keys:
admin_keys[0]["access_key"] = access_key
admin_keys[0]["secret_key"] = secret_key
else:
from datetime import datetime as _dt, timezone as _tz
admin_user["access_keys"] = [{
"access_key": access_key,
"secret_key": secret_key,
"status": "active",
"created_at": _dt.now(_tz.utc).isoformat(),
}]
else:
admin_user["access_key"] = access_key
admin_user["secret_key"] = secret_key
else:
from datetime import datetime as _dt, timezone as _tz
raw_config = {
"version": 2,
"users": [
{
"user_id": f"u-{secrets.token_hex(8)}",
"display_name": "Local Admin",
"enabled": True,
"access_keys": [
{
"access_key": access_key,
"secret_key": secret_key,
"status": "active",
"created_at": _dt.now(_tz.utc).isoformat(),
}
],
"policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
],
}
]
}
json_text = json.dumps(raw_config, indent=2)
iam_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = iam_path.with_suffix(".json.tmp")
if fernet:
from app.iam import _IAM_ENCRYPTED_PREFIX
encrypted = fernet.encrypt(json_text.encode("utf-8"))
temp_path.write_bytes(_IAM_ENCRYPTED_PREFIX + encrypted)
else:
temp_path.write_text(json_text, encoding="utf-8")
temp_path.replace(iam_path)
print(f"\n{'='*60}")
print("MYFSIO - ADMIN CREDENTIALS RESET")
print(f"{'='*60}")
if custom_keys:
print(f"Access Key: {access_key} (from ADMIN_ACCESS_KEY)")
print(f"Secret Key: {'(from ADMIN_SECRET_KEY)' if os.environ.get('ADMIN_SECRET_KEY', '').strip() else secret_key}")
else:
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
if fernet:
print("IAM config saved (encrypted).")
else:
print(f"IAM config saved to: {iam_path}")
print(f"{'='*60}\n")
if __name__ == "__main__":
multiprocessing.freeze_support()
if _is_frozen():
multiprocessing.set_start_method("spawn", force=True)
parser = argparse.ArgumentParser(description="Run the S3 clone services.")
parser.add_argument("--mode", choices=["api", "ui", "both", "reset-cred"], default="both")
parser.add_argument("--api-port", type=int, default=5000)
parser.add_argument("--ui-port", type=int, default=5100)
parser.add_argument("--prod", action="store_true", help="Run in production mode using Granian")
parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)")
parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit")
parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
parser.add_argument("--reset-cred", action="store_true", help="Reset admin credentials and exit")
parser.add_argument("--version", action="version", version=f"MyFSIO {get_version()}")
args = parser.parse_args()
if args.reset_cred or args.mode == "reset-cred":
reset_credentials()
sys.exit(0)
if args.check_config or args.show_config:
config = AppConfig.from_env()
config.print_startup_summary()
if args.check_config:
issues = config.validate_and_report()
critical = [i for i in issues if i.startswith("CRITICAL:")]
sys.exit(1 if critical else 0)
sys.exit(0)
prod_mode = args.prod or (_is_frozen() and not args.dev)
config = AppConfig.from_env()
first_run_marker = config.storage_root / ".myfsio.sys" / ".initialized"
is_first_run = not first_run_marker.exists()
if is_first_run:
config.print_startup_summary()
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
try:
first_run_marker.parent.mkdir(parents=True, exist_ok=True)
first_run_marker.write_text(f"Initialized on {__import__('datetime').datetime.now().isoformat()}\n")
except OSError:
pass
if prod_mode:
print("Running in production mode (Granian)")
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
for issue in critical_issues:
print(f" {issue}")
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
else:
print("Running in development mode (Flask dev server)")
if args.mode in {"api", "both"}:
print(f"Starting API server on port {args.api_port}...")
api_proc = Process(target=serve_api, args=(args.api_port, prod_mode, config))
api_proc.start()
else:
api_proc = None
def _cleanup_api():
if api_proc and api_proc.is_alive():
api_proc.terminate()
api_proc.join(timeout=5)
if api_proc.is_alive():
api_proc.kill()
if api_proc:
atexit.register(_cleanup_api)
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
if args.mode in {"ui", "both"}:
print(f"Starting UI server on port {args.ui_port}...")
serve_ui(args.ui_port, prod_mode, config)
elif api_proc:
try:
api_proc.join()
except KeyboardInterrupt:
pass

File diff suppressed because it is too large Load Diff

View File

@@ -1,750 +0,0 @@
{% extends "base.html" %}
{% block title %}System - MyFSIO Console{% endblock %}
{% block content %}
<div class="page-header d-flex justify-content-between align-items-center mb-4">
<div>
<p class="text-uppercase text-muted small mb-1">Administration</p>
<h1 class="h3 mb-1 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M9.405 1.05c-.413-1.4-2.397-1.4-2.81 0l-.1.34a1.464 1.464 0 0 1-2.105.872l-.31-.17c-1.283-.698-2.686.705-1.987 1.987l.169.311c.446.82.023 1.841-.872 2.105l-.34.1c-1.4.413-1.4 2.397 0 2.81l.34.1a1.464 1.464 0 0 1 .872 2.105l-.17.31c-.698 1.283.705 2.686 1.987 1.987l.311-.169a1.464 1.464 0 0 1 2.105.872l.1.34c.413 1.4 2.397 1.4 2.81 0l.1-.34a1.464 1.464 0 0 1 2.105-.872l.31.17c1.283.698 2.686-.705 1.987-1.987l-.169-.311a1.464 1.464 0 0 1 .872-2.105l.34-.1c1.4-.413 1.4-2.397 0-2.81l-.34-.1a1.464 1.464 0 0 1-.872-2.105l.17-.31c.698-1.283-.705-2.686-1.987-1.987l-.311.169a1.464 1.464 0 0 1-2.105-.872l-.1-.34zM8 10.93a2.929 2.929 0 1 1 0-5.86 2.929 2.929 0 0 1 0 5.858z"/>
</svg>
System
</h1>
<p class="text-muted mb-0 mt-1">Server information, feature flags, and maintenance tools.</p>
</div>
<div class="d-none d-md-block">
<span class="badge bg-primary bg-opacity-10 text-primary fs-6 px-3 py-2">v{{ app_version }}</span>
</div>
</div>
<div class="row g-4 mb-4">
<div class="col-lg-6">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M5 0a.5.5 0 0 1 .5.5V2h1V.5a.5.5 0 0 1 1 0V2h1V.5a.5.5 0 0 1 1 0V2h1V.5a.5.5 0 0 1 1 0V2A2.5 2.5 0 0 1 14 4.5h1.5a.5.5 0 0 1 0 1H14v1h1.5a.5.5 0 0 1 0 1H14v1h1.5a.5.5 0 0 1 0 1H14v1h1.5a.5.5 0 0 1 0 1H14a2.5 2.5 0 0 1-2.5 2.5v1.5a.5.5 0 0 1-1 0V14h-1v1.5a.5.5 0 0 1-1 0V14h-1v1.5a.5.5 0 0 1-1 0V14h-1v1.5a.5.5 0 0 1-1 0V14A2.5 2.5 0 0 1 2 11.5H.5a.5.5 0 0 1 0-1H2v-1H.5a.5.5 0 0 1 0-1H2v-1H.5a.5.5 0 0 1 0-1H2v-1H.5a.5.5 0 0 1 0-1H2A2.5 2.5 0 0 1 4.5 2V.5A.5.5 0 0 1 5 0zm-.5 3A1.5 1.5 0 0 0 3 4.5v7A1.5 1.5 0 0 0 4.5 13h7a1.5 1.5 0 0 0 1.5-1.5v-7A1.5 1.5 0 0 0 11.5 3h-7zM5 6.5A1.5 1.5 0 0 1 6.5 5h3A1.5 1.5 0 0 1 11 6.5v3A1.5 1.5 0 0 1 9.5 11h-3A1.5 1.5 0 0 1 5 9.5v-3zM6.5 6a.5.5 0 0 0-.5.5v3a.5.5 0 0 0 .5.5h3a.5.5 0 0 0 .5-.5v-3a.5.5 0 0 0-.5-.5h-3z"/>
</svg>
Server Information
</h5>
<p class="text-muted small mb-0">Runtime environment and configuration</p>
</div>
<div class="card-body px-4 pb-4">
<table class="table table-sm mb-0">
<tbody>
<tr><td class="text-muted" style="width:40%">Version</td><td class="fw-medium">{{ app_version }}</td></tr>
<tr><td class="text-muted">Storage Root</td><td><code>{{ storage_root }}</code></td></tr>
<tr><td class="text-muted">Platform</td><td>{{ platform }}</td></tr>
<tr><td class="text-muted">Python</td><td>{{ python_version }}</td></tr>
<tr><td class="text-muted">Rust Extension</td><td>
{% if has_rust %}
<span class="badge bg-success bg-opacity-10 text-success">Loaded</span>
{% else %}
<span class="badge bg-secondary bg-opacity-10 text-secondary">Not loaded</span>
{% endif %}
</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M11.5 2a1.5 1.5 0 1 0 0 3 1.5 1.5 0 0 0 0-3zM9.05 3a2.5 2.5 0 0 1 4.9 0H16v1h-2.05a2.5 2.5 0 0 1-4.9 0H0V3h9.05zM4.5 7a1.5 1.5 0 1 0 0 3 1.5 1.5 0 0 0 0-3zM2.05 8a2.5 2.5 0 0 1 4.9 0H16v1H6.95a2.5 2.5 0 0 1-4.9 0H0V8h2.05zm9.45 4a1.5 1.5 0 1 0 0 3 1.5 1.5 0 0 0 0-3zm-2.45 1a2.5 2.5 0 0 1 4.9 0H16v1h-2.05a2.5 2.5 0 0 1-4.9 0H0v-1h9.05z"/>
</svg>
Feature Flags
</h5>
<p class="text-muted small mb-0">Features configured via environment variables</p>
</div>
<div class="card-body px-4 pb-4">
<table class="table table-sm mb-0">
<tbody>
{% for feat in features %}
<tr>
<td class="text-muted" style="width:55%">{{ feat.label }}</td>
<td class="text-end">
{% if feat.enabled %}
<span class="badge bg-success bg-opacity-10 text-success">Enabled</span>
{% else %}
<span class="badge bg-secondary bg-opacity-10 text-secondary">Disabled</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
<div class="row g-4 mb-4">
<div class="col-lg-6">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<div class="d-flex justify-content-between align-items-start">
<div>
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M2.5 1a1 1 0 0 0-1 1v1a1 1 0 0 0 1 1H3v9a2 2 0 0 0 2 2h6a2 2 0 0 0 2-2V4h.5a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H10a1 1 0 0 0-1-1H7a1 1 0 0 0-1 1H2.5zm3 4a.5.5 0 0 1 .5.5v7a.5.5 0 0 1-1 0v-7a.5.5 0 0 1 .5-.5zM8 5a.5.5 0 0 1 .5.5v7a.5.5 0 0 1-1 0v-7A.5.5 0 0 1 8 5zm3 .5v7a.5.5 0 0 1-1 0v-7a.5.5 0 0 1 1 0z"/>
</svg>
Garbage Collection
</h5>
<p class="text-muted small mb-0">Clean up temporary files, orphaned uploads, and stale locks</p>
</div>
<div>
{% if gc_status.enabled %}
<span class="badge bg-success bg-opacity-10 text-success">Active</span>
{% else %}
<span class="badge bg-secondary bg-opacity-10 text-secondary">Disabled</span>
{% endif %}
</div>
</div>
</div>
<div class="card-body px-4 pb-4">
{% if gc_status.enabled %}
<div class="d-flex gap-2 mb-3">
<button class="btn btn-primary btn-sm d-inline-flex align-items-center" id="gcRunBtn" onclick="runGC(false)">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1 flex-shrink-0" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 0 4.546 2.914.5.5 0 0 1 .908-.417A6 6 0 1 1 8 2v1z"/>
<path d="M8 4.466V.534a.25.25 0 0 1 .41-.192l2.36 1.966c.12.1.12.284 0 .384L8.41 4.658A.25.25 0 0 1 8 4.466z"/>
</svg>
Run Now
</button>
<button class="btn btn-outline-secondary btn-sm" id="gcDryRunBtn" onclick="runGC(true)">
Dry Run
</button>
</div>
<div id="gcScanningBanner" class="mb-3 {% if not gc_status.scanning %}d-none{% endif %}">
<div class="alert alert-info mb-0 small d-flex align-items-center gap-2">
<div class="spinner-border spinner-border-sm text-info" role="status"></div>
<span>GC in progress<span id="gcScanElapsed"></span></span>
</div>
</div>
<div id="gcResult" class="mb-3 d-none">
<div class="alert mb-0 small" id="gcResultAlert">
<div class="d-flex justify-content-between align-items-start">
<div class="fw-semibold mb-1" id="gcResultTitle"></div>
<button type="button" class="btn-close btn-close-sm" style="font-size:0.65rem" onclick="document.getElementById('gcResult').classList.add('d-none')"></button>
</div>
<div id="gcResultBody"></div>
</div>
</div>
<div class="border rounded p-3 mb-3" style="background: var(--bs-tertiary-bg, #f8f9fa);">
<div class="d-flex align-items-center gap-2 mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M9.405 1.05c-.413-1.4-2.397-1.4-2.81 0l-.1.34a1.464 1.464 0 0 1-2.105.872l-.31-.17c-1.283-.698-2.686.705-1.987 1.987l.169.311c.446.82.023 1.841-.872 2.105l-.34.1c-1.4.413-1.4 2.397 0 2.81l.34.1a1.464 1.464 0 0 1 .872 2.105l-.17.31c-.698 1.283.705 2.686 1.987 1.987l.311-.169a1.464 1.464 0 0 1 2.105.872l.1.34c.413 1.4 2.397 1.4 2.81 0l.1-.34a1.464 1.464 0 0 1 2.105-.872l.31.17c1.283.698 2.686-.705 1.987-1.987l-.169-.311a1.464 1.464 0 0 1 .872-2.105l.34-.1c1.4-.413 1.4-2.397 0-2.81l-.34-.1a1.464 1.464 0 0 1-.872-2.105l.17-.31c.698-1.283-.705-2.686-1.987-1.987l-.311.169a1.464 1.464 0 0 1-2.105-.872l-.1-.34zM8 10.93a2.929 2.929 0 1 1 0-5.86 2.929 2.929 0 0 1 0 5.858z"/>
</svg>
<span class="small fw-semibold text-muted">Configuration</span>
</div>
<div class="row small">
<div class="col-6 mb-1"><span class="text-muted">Interval:</span> {{ gc_status.interval_hours }}h</div>
<div class="col-6 mb-1"><span class="text-muted">Dry run:</span> {{ "Yes" if gc_status.dry_run else "No" }}</div>
<div class="col-6 mb-1"><span class="text-muted">Temp max age:</span> {{ gc_status.temp_file_max_age_hours }}h</div>
<div class="col-6 mb-1"><span class="text-muted">Lock max age:</span> {{ gc_status.lock_file_max_age_hours }}h</div>
<div class="col-6"><span class="text-muted">Multipart max age:</span> {{ gc_status.multipart_max_age_days }}d</div>
</div>
</div>
<div id="gcHistoryContainer">
{% if gc_history %}
<h6 class="fw-semibold small text-muted mb-2 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M8.515 1.019A7 7 0 0 0 8 1V0a8 8 0 0 1 .589.022l-.074.997zm2.004.45a7.003 7.003 0 0 0-.985-.299l.219-.976c.383.086.76.2 1.126.342l-.36.933zm1.37.71a7.01 7.01 0 0 0-.439-.27l.493-.87a8.025 8.025 0 0 1 .979.654l-.615.789a6.996 6.996 0 0 0-.418-.302zm1.834 1.79a6.99 6.99 0 0 0-.653-.796l.724-.69c.27.285.52.59.747.91l-.818.576zm.744 1.352a7.08 7.08 0 0 0-.214-.468l.893-.45a7.976 7.976 0 0 1 .45 1.088l-.95.313a7.023 7.023 0 0 0-.179-.483zm.53 2.507a6.991 6.991 0 0 0-.1-1.025l.985-.17c.067.386.106.778.116 1.17l-1 .025zm-.131 1.538c.033-.17.06-.339.081-.51l.993.123a7.957 7.957 0 0 1-.23 1.155l-.964-.267c.046-.165.086-.332.12-.501zm-.952 2.379c.184-.29.346-.594.486-.908l.914.405c-.16.36-.345.706-.555 1.038l-.845-.535zm-.964 1.205c.122-.122.239-.248.35-.378l.758.653a8.073 8.073 0 0 1-.401.432l-.707-.707z"/>
<path d="M8 1a7 7 0 1 0 4.95 11.95l.707.707A8.001 8.001 0 1 1 8 0v1z"/>
<path d="M7.5 3a.5.5 0 0 1 .5.5v5.21l3.248 1.856a.5.5 0 0 1-.496.868l-3.5-2A.5.5 0 0 1 7 8V3.5a.5.5 0 0 1 .5-.5z"/>
</svg>
Recent Executions
</h6>
<div class="table-responsive">
<table class="table table-sm small mb-0">
<thead class="table-light">
<tr>
<th>Time</th>
<th class="text-center">Cleaned</th>
<th class="text-center">Freed</th>
<th class="text-center">Mode</th>
</tr>
</thead>
<tbody>
{% for exec in gc_history %}
<tr>
<td class="text-nowrap">{{ exec.timestamp_display }}</td>
<td class="text-center">
{% set r = exec.result %}
{{ (r.temp_files_deleted|d(0)) + (r.multipart_uploads_deleted|d(0)) + (r.lock_files_deleted|d(0)) + (r.orphaned_metadata_deleted|d(0)) + (r.orphaned_versions_deleted|d(0)) + (r.empty_dirs_removed|d(0)) }}
</td>
<td class="text-center">{{ exec.bytes_freed_display }}</td>
<td class="text-center">
{% if exec.dry_run %}
<span class="badge bg-warning bg-opacity-10 text-warning">Dry run</span>
{% else %}
<span class="badge bg-primary bg-opacity-10 text-primary">Live</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="text-center py-2">
<p class="text-muted small mb-0">No executions recorded yet.</p>
</div>
{% endif %}
</div>
{% else %}
<div class="text-center py-4">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" fill="currentColor" class="text-muted mb-2 opacity-50" viewBox="0 0 16 16">
<path d="M2.5 1a1 1 0 0 0-1 1v1a1 1 0 0 0 1 1H3v9a2 2 0 0 0 2 2h6a2 2 0 0 0 2-2V4h.5a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H10a1 1 0 0 0-1-1H7a1 1 0 0 0-1 1H2.5zm3 4a.5.5 0 0 1 .5.5v7a.5.5 0 0 1-1 0v-7a.5.5 0 0 1 .5-.5zM8 5a.5.5 0 0 1 .5.5v7a.5.5 0 0 1-1 0v-7A.5.5 0 0 1 8 5zm3 .5v7a.5.5 0 0 1-1 0v-7a.5.5 0 0 1 1 0z"/>
</svg>
<p class="text-muted mb-1">Garbage collection is not enabled.</p>
<p class="text-muted small mb-0">Set <code>GC_ENABLED=true</code> to enable automatic cleanup.</p>
</div>
{% endif %}
</div>
</div>
</div>
<div class="col-lg-6">
<div class="card shadow-sm border-0" style="border-radius: 1rem;">
<div class="card-header bg-transparent border-0 pt-4 pb-0 px-4">
<div class="d-flex justify-content-between align-items-start">
<div>
<h5 class="fw-semibold d-flex align-items-center gap-2 mb-1">
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="text-primary" viewBox="0 0 16 16">
<path d="M5.338 1.59a61.44 61.44 0 0 0-2.837.856.481.481 0 0 0-.328.39c-.554 4.157.726 7.19 2.253 9.188a10.725 10.725 0 0 0 2.287 2.233c.346.244.652.42.893.533.12.057.218.095.293.118a.55.55 0 0 0 .101.025.615.615 0 0 0 .1-.025c.076-.023.174-.061.294-.118.24-.113.547-.29.893-.533a10.726 10.726 0 0 0 2.287-2.233c1.527-1.997 2.807-5.031 2.253-9.188a.48.48 0 0 0-.328-.39c-.651-.213-1.75-.56-2.837-.855C9.552 1.29 8.531 1.067 8 1.067c-.53 0-1.552.223-2.662.524zM5.072.56C6.157.265 7.31 0 8 0s1.843.265 2.928.56c1.11.3 2.229.655 2.887.87a1.54 1.54 0 0 1 1.044 1.262c.596 4.477-.787 7.795-2.465 9.99a11.775 11.775 0 0 1-2.517 2.453 7.159 7.159 0 0 1-1.048.625c-.28.132-.581.24-.829.24s-.548-.108-.829-.24a7.158 7.158 0 0 1-1.048-.625 11.777 11.777 0 0 1-2.517-2.453C1.928 10.487.545 7.169 1.141 2.692A1.54 1.54 0 0 1 2.185 1.43 62.456 62.456 0 0 1 5.072.56z"/>
<path d="M10.854 5.146a.5.5 0 0 1 0 .708l-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 1 1 .708-.708L7.5 7.793l2.646-2.647a.5.5 0 0 1 .708 0z"/>
</svg>
Integrity Scanner
</h5>
<p class="text-muted small mb-0">Detect and heal corrupted objects, orphaned files, and metadata drift</p>
</div>
<div>
{% if integrity_status.enabled %}
<span class="badge bg-success bg-opacity-10 text-success">Active</span>
{% else %}
<span class="badge bg-secondary bg-opacity-10 text-secondary">Disabled</span>
{% endif %}
</div>
</div>
</div>
<div class="card-body px-4 pb-4">
{% if integrity_status.enabled %}
<div class="d-flex gap-2 flex-wrap mb-3">
<button class="btn btn-primary btn-sm d-inline-flex align-items-center" id="integrityRunBtn" onclick="runIntegrity(false, false)" {% if integrity_status.scanning %}disabled{% endif %}>
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="me-1 flex-shrink-0" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 3a5 5 0 1 0 4.546 2.914.5.5 0 0 1 .908-.417A6 6 0 1 1 8 2v1z"/>
<path d="M8 4.466V.534a.25.25 0 0 1 .41-.192l2.36 1.966c.12.1.12.284 0 .384L8.41 4.658A.25.25 0 0 1 8 4.466z"/>
</svg>
Scan Now
</button>
<button class="btn btn-outline-warning btn-sm" id="integrityHealBtn" onclick="runIntegrity(false, true)" {% if integrity_status.scanning %}disabled{% endif %}>
Scan &amp; Heal
</button>
<button class="btn btn-outline-secondary btn-sm" id="integrityDryRunBtn" onclick="runIntegrity(true, false)" {% if integrity_status.scanning %}disabled{% endif %}>
Dry Run
</button>
</div>
<div id="integrityScanningBanner" class="mb-3 {% if not integrity_status.scanning %}d-none{% endif %}">
<div class="alert alert-info mb-0 small d-flex align-items-center gap-2">
<div class="spinner-border spinner-border-sm text-info" role="status"></div>
<span>Scan in progress<span id="integrityScanElapsed"></span></span>
</div>
</div>
<div id="integrityResult" class="mb-3 d-none">
<div class="alert mb-0 small" id="integrityResultAlert">
<div class="d-flex justify-content-between align-items-start">
<div class="fw-semibold mb-1" id="integrityResultTitle"></div>
<button type="button" class="btn-close btn-close-sm" style="font-size:0.65rem" onclick="document.getElementById('integrityResult').classList.add('d-none')"></button>
</div>
<div id="integrityResultBody"></div>
</div>
</div>
<div class="border rounded p-3 mb-3" style="background: var(--bs-tertiary-bg, #f8f9fa);">
<div class="d-flex align-items-center gap-2 mb-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" class="text-muted" viewBox="0 0 16 16">
<path d="M9.405 1.05c-.413-1.4-2.397-1.4-2.81 0l-.1.34a1.464 1.464 0 0 1-2.105.872l-.31-.17c-1.283-.698-2.686.705-1.987 1.987l.169.311c.446.82.023 1.841-.872 2.105l-.34.1c-1.4.413-1.4 2.397 0 2.81l.34.1a1.464 1.464 0 0 1 .872 2.105l-.17.31c-.698 1.283.705 2.686 1.987 1.987l.311-.169a1.464 1.464 0 0 1 2.105.872l.1.34c.413 1.4 2.397 1.4 2.81 0l.1-.34a1.464 1.464 0 0 1 2.105-.872l.31.17c1.283.698 2.686-.705 1.987-1.987l-.169-.311a1.464 1.464 0 0 1 .872-2.105l.34-.1c1.4-.413 1.4-2.397 0-2.81l-.34-.1a1.464 1.464 0 0 1-.872-2.105l.17-.31c.698-1.283-.705-2.686-1.987-1.987l-.311.169a1.464 1.464 0 0 1-2.105-.872l-.1-.34zM8 10.93a2.929 2.929 0 1 1 0-5.86 2.929 2.929 0 0 1 0 5.858z"/>
</svg>
<span class="small fw-semibold text-muted">Configuration</span>
</div>
<div class="row small">
<div class="col-6 mb-1"><span class="text-muted">Interval:</span> {{ integrity_status.interval_hours }}h</div>
<div class="col-6 mb-1"><span class="text-muted">Dry run:</span> {{ "Yes" if integrity_status.dry_run else "No" }}</div>
<div class="col-6"><span class="text-muted">Batch size:</span> {{ integrity_status.batch_size }}</div>
<div class="col-6"><span class="text-muted">Auto-heal:</span> {{ "Yes" if integrity_status.auto_heal else "No" }}</div>
</div>
</div>
<div id="integrityHistoryContainer">
{% if integrity_history %}
<h6 class="fw-semibold small text-muted mb-2 d-flex align-items-center gap-2">
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
<path d="M8.515 1.019A7 7 0 0 0 8 1V0a8 8 0 0 1 .589.022l-.074.997zm2.004.45a7.003 7.003 0 0 0-.985-.299l.219-.976c.383.086.76.2 1.126.342l-.36.933zm1.37.71a7.01 7.01 0 0 0-.439-.27l.493-.87a8.025 8.025 0 0 1 .979.654l-.615.789a6.996 6.996 0 0 0-.418-.302zm1.834 1.79a6.99 6.99 0 0 0-.653-.796l.724-.69c.27.285.52.59.747.91l-.818.576zm.744 1.352a7.08 7.08 0 0 0-.214-.468l.893-.45a7.976 7.976 0 0 1 .45 1.088l-.95.313a7.023 7.023 0 0 0-.179-.483zm.53 2.507a6.991 6.991 0 0 0-.1-1.025l.985-.17c.067.386.106.778.116 1.17l-1 .025zm-.131 1.538c.033-.17.06-.339.081-.51l.993.123a7.957 7.957 0 0 1-.23 1.155l-.964-.267c.046-.165.086-.332.12-.501zm-.952 2.379c.184-.29.346-.594.486-.908l.914.405c-.16.36-.345.706-.555 1.038l-.845-.535zm-.964 1.205c.122-.122.239-.248.35-.378l.758.653a8.073 8.073 0 0 1-.401.432l-.707-.707z"/>
<path d="M8 1a7 7 0 1 0 4.95 11.95l.707.707A8.001 8.001 0 1 1 8 0v1z"/>
<path d="M7.5 3a.5.5 0 0 1 .5.5v5.21l3.248 1.856a.5.5 0 0 1-.496.868l-3.5-2A.5.5 0 0 1 7 8V3.5a.5.5 0 0 1 .5-.5z"/>
</svg>
Recent Scans
</h6>
<div class="table-responsive">
<table class="table table-sm small mb-0">
<thead class="table-light">
<tr>
<th>Time</th>
<th class="text-center">Scanned</th>
<th class="text-center">Issues</th>
<th class="text-center">Healed</th>
<th class="text-center">Mode</th>
</tr>
</thead>
<tbody>
{% for exec in integrity_history %}
<tr>
<td class="text-nowrap">{{ exec.timestamp_display }}</td>
<td class="text-center">{{ exec.result.objects_scanned|d(0) }}</td>
<td class="text-center">
{% set total_issues = (exec.result.corrupted_objects|d(0)) + (exec.result.orphaned_objects|d(0)) + (exec.result.phantom_metadata|d(0)) + (exec.result.stale_versions|d(0)) + (exec.result.etag_cache_inconsistencies|d(0)) + (exec.result.legacy_metadata_drifts|d(0)) %}
{% if total_issues > 0 %}
<span class="text-danger fw-medium">{{ total_issues }}</span>
{% else %}
<span class="text-success">0</span>
{% endif %}
</td>
<td class="text-center">{{ exec.result.issues_healed|d(0) }}</td>
<td class="text-center">
{% if exec.dry_run %}
<span class="badge bg-warning bg-opacity-10 text-warning">Dry</span>
{% elif exec.auto_heal %}
<span class="badge bg-success bg-opacity-10 text-success">Heal</span>
{% else %}
<span class="badge bg-primary bg-opacity-10 text-primary">Scan</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="text-center py-2">
<p class="text-muted small mb-0">No scans recorded yet.</p>
</div>
{% endif %}
</div>
{% else %}
<div class="text-center py-4">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" fill="currentColor" class="text-muted mb-2 opacity-50" viewBox="0 0 16 16">
<path d="M5.338 1.59a61.44 61.44 0 0 0-2.837.856.481.481 0 0 0-.328.39c-.554 4.157.726 7.19 2.253 9.188a10.725 10.725 0 0 0 2.287 2.233c.346.244.652.42.893.533.12.057.218.095.293.118a.55.55 0 0 0 .101.025.615.615 0 0 0 .1-.025c.076-.023.174-.061.294-.118.24-.113.547-.29.893-.533a10.726 10.726 0 0 0 2.287-2.233c1.527-1.997 2.807-5.031 2.253-9.188a.48.48 0 0 0-.328-.39c-.651-.213-1.75-.56-2.837-.855C9.552 1.29 8.531 1.067 8 1.067c-.53 0-1.552.223-2.662.524zM5.072.56C6.157.265 7.31 0 8 0s1.843.265 2.928.56c1.11.3 2.229.655 2.887.87a1.54 1.54 0 0 1 1.044 1.262c.596 4.477-.787 7.795-2.465 9.99a11.775 11.775 0 0 1-2.517 2.453 7.159 7.159 0 0 1-1.048.625c-.28.132-.581.24-.829.24s-.548-.108-.829-.24a7.158 7.158 0 0 1-1.048-.625 11.777 11.777 0 0 1-2.517-2.453C1.928 10.487.545 7.169 1.141 2.692A1.54 1.54 0 0 1 2.185 1.43 62.456 62.456 0 0 1 5.072.56z"/>
<path d="M10.854 5.146a.5.5 0 0 1 0 .708l-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 1 1 .708-.708L7.5 7.793l2.646-2.647a.5.5 0 0 1 .708 0z"/>
</svg>
<p class="text-muted mb-1">Integrity scanner is not enabled.</p>
<p class="text-muted small mb-0">Set <code>INTEGRITY_ENABLED=true</code> to enable automatic scanning.</p>
</div>
{% endif %}
</div>
</div>
</div>
</div>
{% endblock %}
{% block extra_scripts %}
<script>
(function () {
var csrfToken = document.querySelector('meta[name="csrf-token"]')?.getAttribute('content') || '';
function setLoading(btnId, loading, spinnerOnly) {
var btn = document.getElementById(btnId);
if (!btn) return;
btn.disabled = loading;
if (loading && !spinnerOnly) {
btn.dataset.originalHtml = btn.innerHTML;
btn.innerHTML = '<span class="spinner-border spinner-border-sm me-1" role="status"></span>Running...';
} else if (!loading && btn.dataset.originalHtml) {
btn.innerHTML = btn.dataset.originalHtml;
}
}
function formatBytes(bytes) {
if (!bytes || bytes === 0) return '0 B';
var units = ['B', 'KB', 'MB', 'GB'];
var i = 0;
var b = bytes;
while (b >= 1024 && i < units.length - 1) { b /= 1024; i++; }
return (i === 0 ? b : b.toFixed(1)) + ' ' + units[i];
}
var _displayTimezone = {{ display_timezone|tojson }};
function formatTimestamp(ts) {
var d = new Date(ts * 1000);
try {
var opts = {year: 'numeric', month: 'short', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: false, timeZone: _displayTimezone, timeZoneName: 'short'};
return d.toLocaleString('en-US', opts);
} catch (e) {
var pad = function (n) { return n < 10 ? '0' + n : '' + n; };
return d.getUTCFullYear() + '-' + pad(d.getUTCMonth() + 1) + '-' + pad(d.getUTCDate()) +
' ' + pad(d.getUTCHours()) + ':' + pad(d.getUTCMinutes()) + ' UTC';
}
}
var _gcHistoryIcon = '<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">' +
'<path d="M8.515 1.019A7 7 0 0 0 8 1V0a8 8 0 0 1 .589.022l-.074.997zm2.004.45a7.003 7.003 0 0 0-.985-.299l.219-.976c.383.086.76.2 1.126.342l-.36.933zm1.37.71a7.01 7.01 0 0 0-.439-.27l.493-.87a8.025 8.025 0 0 1 .979.654l-.615.789a6.996 6.996 0 0 0-.418-.302zm1.834 1.79a6.99 6.99 0 0 0-.653-.796l.724-.69c.27.285.52.59.747.91l-.818.576zm.744 1.352a7.08 7.08 0 0 0-.214-.468l.893-.45a7.976 7.976 0 0 1 .45 1.088l-.95.313a7.023 7.023 0 0 0-.179-.483zm.53 2.507a6.991 6.991 0 0 0-.1-1.025l.985-.17c.067.386.106.778.116 1.17l-1 .025zm-.131 1.538c.033-.17.06-.339.081-.51l.993.123a7.957 7.957 0 0 1-.23 1.155l-.964-.267c.046-.165.086-.332.12-.501zm-.952 2.379c.184-.29.346-.594.486-.908l.914.405c-.16.36-.345.706-.555 1.038l-.845-.535zm-.964 1.205c.122-.122.239-.248.35-.378l.758.653a8.073 8.073 0 0 1-.401.432l-.707-.707z"/>' +
'<path d="M8 1a7 7 0 1 0 4.95 11.95l.707.707A8.001 8.001 0 1 1 8 0v1z"/>' +
'<path d="M7.5 3a.5.5 0 0 1 .5.5v5.21l3.248 1.856a.5.5 0 0 1-.496.868l-3.5-2A.5.5 0 0 1 7 8V3.5a.5.5 0 0 1 .5-.5z"/></svg>';
function _gcRefreshHistory() {
fetch('{{ url_for("ui.system_gc_history") }}?limit=10', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (hist) {
var container = document.getElementById('gcHistoryContainer');
if (!container) return;
var execs = hist.executions || [];
if (execs.length === 0) {
container.innerHTML = '<div class="text-center py-2"><p class="text-muted small mb-0">No executions recorded yet.</p></div>';
return;
}
var html = '<h6 class="fw-semibold small text-muted mb-2 d-flex align-items-center gap-2">' +
_gcHistoryIcon + ' Recent Executions</h6>' +
'<div class="table-responsive"><table class="table table-sm small mb-0">' +
'<thead class="table-light"><tr><th>Time</th><th class="text-center">Cleaned</th>' +
'<th class="text-center">Freed</th><th class="text-center">Mode</th></tr></thead><tbody>';
execs.forEach(function (exec) {
var r = exec.result || {};
var cleaned = (r.temp_files_deleted || 0) + (r.multipart_uploads_deleted || 0) +
(r.lock_files_deleted || 0) + (r.orphaned_metadata_deleted || 0) +
(r.orphaned_versions_deleted || 0) + (r.empty_dirs_removed || 0);
var freed = (r.temp_bytes_freed || 0) + (r.multipart_bytes_freed || 0) +
(r.orphaned_version_bytes_freed || 0);
var mode = exec.dry_run
? '<span class="badge bg-warning bg-opacity-10 text-warning">Dry run</span>'
: '<span class="badge bg-primary bg-opacity-10 text-primary">Live</span>';
html += '<tr><td class="text-nowrap">' + formatTimestamp(exec.timestamp) + '</td>' +
'<td class="text-center">' + cleaned + '</td>' +
'<td class="text-center">' + formatBytes(freed) + '</td>' +
'<td class="text-center">' + mode + '</td></tr>';
});
html += '</tbody></table></div>';
container.innerHTML = html;
})
.catch(function () {});
}
function _integrityRefreshHistory() {
fetch('{{ url_for("ui.system_integrity_history") }}?limit=10', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (hist) {
var container = document.getElementById('integrityHistoryContainer');
if (!container) return;
var execs = hist.executions || [];
if (execs.length === 0) {
container.innerHTML = '<div class="text-center py-2"><p class="text-muted small mb-0">No scans recorded yet.</p></div>';
return;
}
var html = '<h6 class="fw-semibold small text-muted mb-2 d-flex align-items-center gap-2">' +
_gcHistoryIcon + ' Recent Scans</h6>' +
'<div class="table-responsive"><table class="table table-sm small mb-0">' +
'<thead class="table-light"><tr><th>Time</th><th class="text-center">Scanned</th>' +
'<th class="text-center">Issues</th><th class="text-center">Healed</th>' +
'<th class="text-center">Mode</th></tr></thead><tbody>';
execs.forEach(function (exec) {
var r = exec.result || {};
var issues = (r.corrupted_objects || 0) + (r.orphaned_objects || 0) +
(r.phantom_metadata || 0) + (r.stale_versions || 0) +
(r.etag_cache_inconsistencies || 0) + (r.legacy_metadata_drifts || 0);
var issueHtml = issues > 0
? '<span class="text-danger fw-medium">' + issues + '</span>'
: '<span class="text-success">0</span>';
var mode = exec.dry_run
? '<span class="badge bg-warning bg-opacity-10 text-warning">Dry</span>'
: (exec.auto_heal
? '<span class="badge bg-success bg-opacity-10 text-success">Heal</span>'
: '<span class="badge bg-primary bg-opacity-10 text-primary">Scan</span>');
html += '<tr><td class="text-nowrap">' + formatTimestamp(exec.timestamp) + '</td>' +
'<td class="text-center">' + (r.objects_scanned || 0) + '</td>' +
'<td class="text-center">' + issueHtml + '</td>' +
'<td class="text-center">' + (r.issues_healed || 0) + '</td>' +
'<td class="text-center">' + mode + '</td></tr>';
});
html += '</tbody></table></div>';
container.innerHTML = html;
})
.catch(function () {});
}
var _gcPollTimer = null;
var _gcLastDryRun = false;
function _gcSetScanning(scanning) {
var banner = document.getElementById('gcScanningBanner');
var btns = ['gcRunBtn', 'gcDryRunBtn'];
if (scanning) {
banner.classList.remove('d-none');
btns.forEach(function (id) {
var el = document.getElementById(id);
if (el) el.disabled = true;
});
} else {
banner.classList.add('d-none');
document.getElementById('gcScanElapsed').textContent = '';
btns.forEach(function (id) {
var el = document.getElementById(id);
if (el) el.disabled = false;
});
}
}
function _gcShowResult(data, dryRun) {
var container = document.getElementById('gcResult');
var alert = document.getElementById('gcResultAlert');
var title = document.getElementById('gcResultTitle');
var body = document.getElementById('gcResultBody');
container.classList.remove('d-none');
var totalItems = (data.temp_files_deleted || 0) + (data.multipart_uploads_deleted || 0) +
(data.lock_files_deleted || 0) + (data.orphaned_metadata_deleted || 0) +
(data.orphaned_versions_deleted || 0) + (data.empty_dirs_removed || 0);
var totalFreed = (data.temp_bytes_freed || 0) + (data.multipart_bytes_freed || 0) +
(data.orphaned_version_bytes_freed || 0);
alert.className = totalItems > 0 ? 'alert alert-success mb-0 small' : 'alert alert-info mb-0 small';
title.textContent = (dryRun ? '[Dry Run] ' : '') + 'Completed in ' + (data.execution_time_seconds || 0).toFixed(2) + 's';
var lines = [];
if (data.temp_files_deleted) lines.push('Temp files: ' + data.temp_files_deleted + ' (' + formatBytes(data.temp_bytes_freed) + ')');
if (data.multipart_uploads_deleted) lines.push('Multipart uploads: ' + data.multipart_uploads_deleted + ' (' + formatBytes(data.multipart_bytes_freed) + ')');
if (data.lock_files_deleted) lines.push('Lock files: ' + data.lock_files_deleted);
if (data.orphaned_metadata_deleted) lines.push('Orphaned metadata: ' + data.orphaned_metadata_deleted);
if (data.orphaned_versions_deleted) lines.push('Orphaned versions: ' + data.orphaned_versions_deleted + ' (' + formatBytes(data.orphaned_version_bytes_freed) + ')');
if (data.empty_dirs_removed) lines.push('Empty directories: ' + data.empty_dirs_removed);
if (totalItems === 0) lines.push('Nothing to clean up.');
if (totalFreed > 0) lines.push('Total freed: ' + formatBytes(totalFreed));
if (data.errors && data.errors.length > 0) lines.push('Errors: ' + data.errors.join(', '));
body.innerHTML = lines.join('<br>');
}
function _gcPoll() {
fetch('{{ url_for("ui.system_gc_status") }}', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (status) {
if (status.scanning) {
var elapsed = status.scan_elapsed_seconds || 0;
document.getElementById('gcScanElapsed').textContent = ' (' + elapsed.toFixed(0) + 's)';
_gcPollTimer = setTimeout(_gcPoll, 2000);
} else {
_gcSetScanning(false);
_gcRefreshHistory();
fetch('{{ url_for("ui.system_gc_history") }}?limit=1', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (hist) {
if (hist.executions && hist.executions.length > 0) {
var latest = hist.executions[0];
_gcShowResult(latest.result, latest.dry_run);
}
})
.catch(function () {});
}
})
.catch(function () {
_gcPollTimer = setTimeout(_gcPoll, 3000);
});
}
window.runGC = function (dryRun) {
_gcLastDryRun = dryRun;
document.getElementById('gcResult').classList.add('d-none');
_gcSetScanning(true);
fetch('{{ url_for("ui.system_gc_run") }}', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken},
body: JSON.stringify({dry_run: dryRun})
})
.then(function (r) { return r.json(); })
.then(function (data) {
if (data.error) {
_gcSetScanning(false);
var container = document.getElementById('gcResult');
var alert = document.getElementById('gcResultAlert');
var title = document.getElementById('gcResultTitle');
var body = document.getElementById('gcResultBody');
container.classList.remove('d-none');
alert.className = 'alert alert-danger mb-0 small';
title.textContent = 'Error';
body.textContent = data.error;
return;
}
_gcPollTimer = setTimeout(_gcPoll, 2000);
})
.catch(function (err) {
_gcSetScanning(false);
var container = document.getElementById('gcResult');
var alert = document.getElementById('gcResultAlert');
var title = document.getElementById('gcResultTitle');
var body = document.getElementById('gcResultBody');
container.classList.remove('d-none');
alert.className = 'alert alert-danger mb-0 small';
title.textContent = 'Error';
body.textContent = err.message;
});
};
{% if gc_status.scanning %}
_gcSetScanning(true);
_gcPollTimer = setTimeout(_gcPoll, 2000);
{% endif %}
var _integrityPollTimer = null;
var _integrityLastMode = {dryRun: false, autoHeal: false};
function _integritySetScanning(scanning) {
var banner = document.getElementById('integrityScanningBanner');
var btns = ['integrityRunBtn', 'integrityHealBtn', 'integrityDryRunBtn'];
if (scanning) {
banner.classList.remove('d-none');
btns.forEach(function (id) {
var el = document.getElementById(id);
if (el) el.disabled = true;
});
} else {
banner.classList.add('d-none');
document.getElementById('integrityScanElapsed').textContent = '';
btns.forEach(function (id) {
var el = document.getElementById(id);
if (el) el.disabled = false;
});
}
}
function _integrityShowResult(data, dryRun, autoHeal) {
var container = document.getElementById('integrityResult');
var alert = document.getElementById('integrityResultAlert');
var title = document.getElementById('integrityResultTitle');
var body = document.getElementById('integrityResultBody');
container.classList.remove('d-none');
var totalIssues = (data.corrupted_objects || 0) + (data.orphaned_objects || 0) +
(data.phantom_metadata || 0) + (data.stale_versions || 0) +
(data.etag_cache_inconsistencies || 0) + (data.legacy_metadata_drifts || 0);
var prefix = dryRun ? '[Dry Run] ' : (autoHeal ? '[Heal] ' : '');
alert.className = totalIssues > 0 ? 'alert alert-warning mb-0 small' : 'alert alert-success mb-0 small';
title.textContent = prefix + 'Completed in ' + (data.execution_time_seconds || 0).toFixed(2) + 's';
var lines = [];
lines.push('Scanned: ' + (data.objects_scanned || 0) + ' objects in ' + (data.buckets_scanned || 0) + ' buckets');
if (totalIssues === 0) {
lines.push('No issues found.');
} else {
if (data.corrupted_objects) lines.push('Corrupted objects: ' + data.corrupted_objects);
if (data.orphaned_objects) lines.push('Orphaned objects: ' + data.orphaned_objects);
if (data.phantom_metadata) lines.push('Phantom metadata: ' + data.phantom_metadata);
if (data.stale_versions) lines.push('Stale versions: ' + data.stale_versions);
if (data.etag_cache_inconsistencies) lines.push('ETag inconsistencies: ' + data.etag_cache_inconsistencies);
if (data.legacy_metadata_drifts) lines.push('Legacy metadata drifts: ' + data.legacy_metadata_drifts);
if (data.issues_healed) lines.push('Issues healed: ' + data.issues_healed);
}
if (data.errors && data.errors.length > 0) lines.push('Errors: ' + data.errors.join(', '));
body.innerHTML = lines.join('<br>');
}
function _integrityPoll() {
fetch('{{ url_for("ui.system_integrity_status") }}', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (status) {
if (status.scanning) {
var elapsed = status.scan_elapsed_seconds || 0;
document.getElementById('integrityScanElapsed').textContent = ' (' + elapsed.toFixed(0) + 's)';
_integrityPollTimer = setTimeout(_integrityPoll, 2000);
} else {
_integritySetScanning(false);
_integrityRefreshHistory();
fetch('{{ url_for("ui.system_integrity_history") }}?limit=1', {
headers: {'X-CSRFToken': csrfToken}
})
.then(function (r) { return r.json(); })
.then(function (hist) {
if (hist.executions && hist.executions.length > 0) {
var latest = hist.executions[0];
_integrityShowResult(latest.result, latest.dry_run, latest.auto_heal);
}
})
.catch(function () {});
}
})
.catch(function () {
_integrityPollTimer = setTimeout(_integrityPoll, 3000);
});
}
window.runIntegrity = function (dryRun, autoHeal) {
_integrityLastMode = {dryRun: dryRun, autoHeal: autoHeal};
document.getElementById('integrityResult').classList.add('d-none');
_integritySetScanning(true);
fetch('{{ url_for("ui.system_integrity_run") }}', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken},
body: JSON.stringify({dry_run: dryRun, auto_heal: autoHeal})
})
.then(function (r) { return r.json(); })
.then(function (data) {
if (data.error) {
_integritySetScanning(false);
var container = document.getElementById('integrityResult');
var alert = document.getElementById('integrityResultAlert');
var title = document.getElementById('integrityResultTitle');
var body = document.getElementById('integrityResultBody');
container.classList.remove('d-none');
alert.className = 'alert alert-danger mb-0 small';
title.textContent = 'Error';
body.textContent = data.error;
return;
}
_integrityPollTimer = setTimeout(_integrityPoll, 2000);
})
.catch(function (err) {
_integritySetScanning(false);
var container = document.getElementById('integrityResult');
var alert = document.getElementById('integrityResultAlert');
var title = document.getElementById('integrityResultTitle');
var body = document.getElementById('integrityResultBody');
container.classList.remove('d-none');
alert.className = 'alert alert-danger mb-0 small';
title.textContent = 'Error';
body.textContent = err.message;
});
};
{% if integrity_status.scanning %}
_integritySetScanning(true);
_integrityPollTimer = setTimeout(_integrityPoll, 2000);
{% endif %}
})();
</script>
{% endblock %}

View File

@@ -1,156 +0,0 @@
import hashlib
import time
import pytest
@pytest.fixture()
def bucket(client, signer):
headers = signer("PUT", "/cond-test")
client.put("/cond-test", headers=headers)
return "cond-test"
@pytest.fixture()
def uploaded(client, signer, bucket):
body = b"hello conditional"
etag = hashlib.md5(body).hexdigest()
headers = signer("PUT", f"/{bucket}/obj.txt", body=body)
resp = client.put(f"/{bucket}/obj.txt", headers=headers, data=body)
last_modified = resp.headers.get("Last-Modified")
return {"etag": etag, "last_modified": last_modified}
class TestIfMatch:
def test_get_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_get_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_wildcard_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_one_matches(self, client, signer, bucket, uploaded):
etag_list = f'"bad1", "{uploaded["etag"]}", "bad2"'
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": etag_list})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_none_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"bad1", "bad2"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
class TestIfNoneMatch:
def test_get_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert uploaded["etag"] in resp.headers.get("ETag", "")
def test_get_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_head_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_wildcard_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
class TestIfModifiedSince:
def test_not_modified_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert "ETag" in resp.headers
def test_modified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_not_modified(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_if_none_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-None-Match": '"wrongetag"',
"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestIfUnmodifiedSince:
def test_unmodified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_if_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestConditionalWithRange:
def test_if_match_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 206
def test_if_match_fails_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": '"wrongetag"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412

View File

@@ -1,356 +0,0 @@
import json
import os
import time
from pathlib import Path
import pytest
from app.gc import GarbageCollector, GCResult
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
sys_root = root / ".myfsio.sys"
sys_root.mkdir()
(sys_root / "config").mkdir(parents=True)
(sys_root / "tmp").mkdir()
(sys_root / "multipart").mkdir()
(sys_root / "buckets").mkdir()
return root
@pytest.fixture
def gc(storage_root):
return GarbageCollector(
storage_root=storage_root,
interval_hours=1.0,
temp_file_max_age_hours=1.0,
multipart_max_age_days=1,
lock_file_max_age_hours=0.5,
dry_run=False,
)
def _make_old(path, hours=48):
old_time = time.time() - hours * 3600
os.utime(path, (old_time, old_time))
class TestTempFileCleanup:
def test_old_temp_files_deleted(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "abc123.tmp"
old_file.write_bytes(b"x" * 1000)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 1000
assert not old_file.exists()
def test_recent_temp_files_kept(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
new_file = tmp_dir / "recent.tmp"
new_file.write_bytes(b"data")
result = gc.run_now()
assert result.temp_files_deleted == 0
assert new_file.exists()
def test_dry_run_keeps_files(self, storage_root, gc):
gc.dry_run = True
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "stale.tmp"
old_file.write_bytes(b"x" * 500)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 500
assert old_file.exists()
class TestMultipartCleanup:
def test_old_orphaned_multipart_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-123"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-123", "object_key": "foo.txt"}))
part = upload_dir / "part-00001.part"
part.write_bytes(b"x" * 2000)
_make_old(manifest, hours=200)
_make_old(part, hours=200)
_make_old(upload_dir, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
assert result.multipart_bytes_freed > 0
assert not upload_dir.exists()
def test_recent_multipart_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-new"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-new", "object_key": "bar.txt"}))
result = gc.run_now()
assert result.multipart_uploads_deleted == 0
assert upload_dir.exists()
def test_legacy_multipart_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
legacy_mp = bucket / ".multipart" / "upload-old"
legacy_mp.mkdir(parents=True)
part = legacy_mp / "part-00001.part"
part.write_bytes(b"y" * 500)
_make_old(part, hours=200)
_make_old(legacy_mp, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
class TestLockFileCleanup:
def test_stale_lock_files_deleted(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "some_key.lock"
lock.write_text("")
_make_old(lock, hours=2)
result = gc.run_now()
assert result.lock_files_deleted == 1
assert not lock.exists()
def test_recent_lock_kept(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "active.lock"
lock.write_text("")
result = gc.run_now()
assert result.lock_files_deleted == 0
assert lock.exists()
class TestOrphanedMetadataCleanup:
def test_legacy_orphaned_metadata_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
meta_dir = bucket / ".meta"
meta_dir.mkdir()
orphan = meta_dir / "deleted_file.txt.meta.json"
orphan.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
assert not orphan.exists()
def test_valid_metadata_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "exists.txt"
obj.write_text("hello")
meta_dir = bucket / ".meta"
meta_dir.mkdir()
meta = meta_dir / "exists.txt.meta.json"
meta.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 0
assert meta.exists()
def test_index_orphaned_entries_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "keep.txt"
obj.write_text("hello")
meta_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "meta"
meta_dir.mkdir(parents=True)
index = meta_dir / "_index.json"
index.write_text(json.dumps({"keep.txt": {"etag": "a"}, "gone.txt": {"etag": "b"}}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
updated = json.loads(index.read_text())
assert "keep.txt" in updated
assert "gone.txt" not in updated
class TestOrphanedVersionsCleanup:
def test_orphaned_versions_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "deleted_obj.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_json = versions_dir / "v1.json"
v_bin.write_bytes(b"old data" * 100)
v_json.write_text(json.dumps({"version_id": "v1", "size": 800}))
result = gc.run_now()
assert result.orphaned_versions_deleted == 2
assert result.orphaned_version_bytes_freed == 800
def test_active_versions_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "active.txt"
obj.write_text("current")
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "active.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_bin.write_bytes(b"old version")
result = gc.run_now()
assert result.orphaned_versions_deleted == 0
assert v_bin.exists()
class TestEmptyDirCleanup:
def test_empty_dirs_removed(self, storage_root, gc):
empty = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks" / "sub"
empty.mkdir(parents=True)
result = gc.run_now()
assert result.empty_dirs_removed > 0
assert not empty.exists()
class TestHistory:
def test_history_recorded(self, storage_root, gc):
gc.run_now()
history = gc.get_history()
assert len(history) == 1
assert "result" in history[0]
assert "timestamp" in history[0]
def test_multiple_runs(self, storage_root, gc):
gc.run_now()
gc.run_now()
gc.run_now()
history = gc.get_history()
assert len(history) == 3
assert history[0]["timestamp"] >= history[1]["timestamp"]
class TestStatus:
def test_get_status(self, storage_root, gc):
status = gc.get_status()
assert status["interval_hours"] == 1.0
assert status["dry_run"] is False
assert status["temp_file_max_age_hours"] == 1.0
assert status["multipart_max_age_days"] == 1
assert status["lock_file_max_age_hours"] == 0.5
class TestGCResult:
def test_total_bytes_freed(self):
r = GCResult(temp_bytes_freed=100, multipart_bytes_freed=200, orphaned_version_bytes_freed=300)
assert r.total_bytes_freed == 600
def test_has_work(self):
assert not GCResult().has_work
assert GCResult(temp_files_deleted=1).has_work
assert GCResult(lock_files_deleted=1).has_work
assert GCResult(empty_dirs_removed=1).has_work
class TestAdminAPI:
@pytest.fixture
def gc_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"GC_ENABLED": True,
"GC_INTERVAL_HOURS": 1.0,
})
yield flask_app
gc = flask_app.extensions.get("gc")
if gc:
gc.stop()
def test_gc_status(self, gc_app):
client = gc_app.test_client()
resp = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
def test_gc_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
def test_gc_dry_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
data=json.dumps({"dry_run": True}),
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
def test_gc_history(self, gc_app):
import time
client = gc_app.test_client()
client.post("/admin/gc/run", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
for _ in range(50):
time.sleep(0.1)
status = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}).get_json()
if not status.get("scanning"):
break
resp = client.get("/admin/gc/history", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert len(data["executions"]) >= 1
def test_gc_requires_admin(self, gc_app):
iam = gc_app.extensions["iam"]
user = iam.create_user(display_name="Regular")
client = gc_app.test_client()
resp = client.get(
"/admin/gc/status",
headers={"X-Access-Key": user["access_key"], "X-Secret-Key": user["secret_key"]},
)
assert resp.status_code == 403

View File

@@ -1,788 +0,0 @@
import hashlib
import json
import os
import sys
import time
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from app.integrity import IntegrityChecker, IntegrityCursorStore, IntegrityResult
def _wait_scan_done(client, headers, timeout=10):
deadline = time.time() + timeout
while time.time() < deadline:
resp = client.get("/admin/integrity/status", headers=headers)
data = resp.get_json()
if not data.get("scanning"):
return
time.sleep(0.1)
raise TimeoutError("scan did not complete")
def _md5(data: bytes) -> str:
return hashlib.md5(data).hexdigest()
def _setup_bucket(storage_root: Path, bucket_name: str, objects: dict[str, bytes]) -> None:
bucket_path = storage_root / bucket_name
bucket_path.mkdir(parents=True, exist_ok=True)
meta_root = storage_root / ".myfsio.sys" / "buckets" / bucket_name / "meta"
meta_root.mkdir(parents=True, exist_ok=True)
bucket_json = storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".bucket.json"
bucket_json.write_text(json.dumps({"created": "2025-01-01"}))
for key, data in objects.items():
obj_path = bucket_path / key
obj_path.parent.mkdir(parents=True, exist_ok=True)
obj_path.write_bytes(data)
etag = _md5(data)
stat = obj_path.stat()
meta = {
"__etag__": etag,
"__size__": str(stat.st_size),
"__last_modified__": str(stat.st_mtime),
}
key_path = Path(key)
parent = key_path.parent
key_name = key_path.name
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
index_path.parent.mkdir(parents=True, exist_ok=True)
index_data = {}
if index_path.exists():
index_data = json.loads(index_path.read_text())
index_data[key_name] = {"metadata": meta}
index_path.write_text(json.dumps(index_data))
def _issues_of_type(result, issue_type):
return [i for i in result.issues if i.issue_type == issue_type]
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
(root / ".myfsio.sys" / "config").mkdir(parents=True, exist_ok=True)
return root
@pytest.fixture
def checker(storage_root):
return IntegrityChecker(
storage_root=storage_root,
interval_hours=24.0,
batch_size=1000,
auto_heal=False,
dry_run=False,
)
class TestCorruptedObjects:
def test_detect_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert len(issues) == 1
assert issues[0].bucket == "mybucket"
assert issues[0].key == "file.txt"
assert not issues[0].healed
def test_heal_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now(auto_heal=True)
assert result.corrupted_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.corrupted_objects == 0
def test_valid_objects_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
result = checker.run_now()
assert result.corrupted_objects == 0
assert result.objects_scanned >= 1
def test_corrupted_nested_key(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"sub/dir/file.txt": b"nested content"})
(storage_root / "mybucket" / "sub" / "dir" / "file.txt").write_bytes(b"bad")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].key == "sub/dir/file.txt"
class TestOrphanedObjects:
def test_detect_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now()
assert result.orphaned_objects == 1
issues = _issues_of_type(result, "orphaned_object")
assert len(issues) == 1
def test_heal_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now(auto_heal=True)
assert result.orphaned_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "orphaned_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.orphaned_objects == 0
assert result2.objects_scanned >= 1
class TestPhantomMetadata:
def test_detect_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now()
assert result.phantom_metadata == 1
issues = _issues_of_type(result, "phantom_metadata")
assert len(issues) == 1
def test_heal_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now(auto_heal=True)
assert result.phantom_metadata == 1
assert result.issues_healed == 1
result2 = checker.run_now()
assert result2.phantom_metadata == 0
class TestStaleVersions:
def test_manifest_without_data(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "manifest without data" in issues[0].detail
def test_data_without_manifest(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "data without manifest" in issues[0].detail
def test_heal_stale_versions(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v2.bin").write_bytes(b"old data")
result = checker.run_now(auto_heal=True)
assert result.stale_versions == 2
assert result.issues_healed == 2
assert not (versions_root / "v1.json").exists()
assert not (versions_root / "v2.bin").exists()
def test_valid_versions_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 0
class TestEtagCache:
def test_detect_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now()
assert result.etag_cache_inconsistencies == 1
issues = _issues_of_type(result, "etag_cache_inconsistency")
assert len(issues) == 1
def test_heal_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now(auto_heal=True)
assert result.etag_cache_inconsistencies == 1
assert result.issues_healed == 1
assert not etag_path.exists()
class TestLegacyMetadata:
def test_detect_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(issues) == 1
assert issues[0].detail == "unmigrated legacy .meta.json"
def test_detect_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert "differs from index" in issues[0].detail
def test_heal_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_data = {"__etag__": _md5(b"hello"), "__size__": "5"}
legacy_meta.write_text(json.dumps(legacy_data))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(legacy_issues) == 1
assert legacy_issues[0].healed
assert not legacy_meta.exists()
index_data = json.loads(index_path.read_text())
assert "file.txt" in index_data
assert index_data["file.txt"]["metadata"]["__etag__"] == _md5(b"hello")
def test_heal_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert legacy_issues[0].healed
assert not legacy_meta.exists()
class TestDryRun:
def test_dry_run_no_changes(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted")
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan")
result = checker.run_now(auto_heal=True, dry_run=True)
assert result.corrupted_objects == 1
assert result.orphaned_objects == 1
assert result.issues_healed == 0
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_data = json.loads((meta_root / "_index.json").read_text())
assert "orphan.txt" not in index_data
class TestBatchSize:
def test_batch_limits_scan(self, storage_root):
objects = {f"file{i}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(
storage_root=storage_root,
batch_size=3,
)
result = checker.run_now()
assert result.objects_scanned <= 3
class TestHistory:
def test_history_recorded(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
history = checker.get_history()
assert len(history) == 1
assert "corrupted_objects" in history[0]["result"]
def test_history_multiple(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
checker.run_now()
checker.run_now()
history = checker.get_history()
assert len(history) == 3
def test_history_pagination(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
for _ in range(5):
checker.run_now()
history = checker.get_history(limit=2, offset=1)
assert len(history) == 2
AUTH_HEADERS = {"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}
class TestAdminAPI:
@pytest.fixture
def integrity_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": True,
"INTEGRITY_AUTO_HEAL": False,
"INTEGRITY_DRY_RUN": False,
})
yield flask_app
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
ic = flask_app.extensions.get("integrity")
if ic:
ic.stop()
def test_status_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
assert "interval_hours" in data
def test_run_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
_wait_scan_done(client, AUTH_HEADERS)
resp = client.get("/admin/integrity/history?limit=1", headers=AUTH_HEADERS)
hist = resp.get_json()
assert len(hist["executions"]) >= 1
assert "corrupted_objects" in hist["executions"][0]["result"]
assert "objects_scanned" in hist["executions"][0]["result"]
def test_run_with_overrides(self, integrity_app):
client = integrity_app.test_client()
resp = client.post(
"/admin/integrity/run",
headers=AUTH_HEADERS,
json={"dry_run": True, "auto_heal": True},
)
assert resp.status_code == 200
_wait_scan_done(client, AUTH_HEADERS)
def test_history_endpoint(self, integrity_app):
client = integrity_app.test_client()
client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
_wait_scan_done(client, AUTH_HEADERS)
resp = client.get("/admin/integrity/history", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert "executions" in data
assert len(data["executions"]) >= 1
def test_auth_required(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status")
assert resp.status_code in (401, 403)
def test_disabled_status(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data2"
iam_config = tmp_path / "iam2.json"
bucket_policies = tmp_path / "bp2.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": False,
})
c = flask_app.test_client()
resp = c.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is False
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
class TestMultipleBuckets:
def test_scans_multiple_buckets(self, storage_root, checker):
_setup_bucket(storage_root, "bucket1", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket2", {"b.txt": b"bbb"})
result = checker.run_now()
assert result.buckets_scanned == 2
assert result.objects_scanned >= 2
assert result.corrupted_objects == 0
class TestGetStatus:
def test_status_fields(self, checker):
status = checker.get_status()
assert "enabled" in status
assert "running" in status
assert "interval_hours" in status
assert "batch_size" in status
assert "auto_heal" in status
assert "dry_run" in status
def test_status_includes_cursor(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
status = checker.get_status()
assert "cursor" in status
assert status["cursor"]["tracked_buckets"] == 1
assert "mybucket" in status["cursor"]["buckets"]
class TestUnifiedBatchCounter:
def test_orphaned_objects_count_toward_batch(self, storage_root):
_setup_bucket(storage_root, "mybucket", {})
for i in range(10):
(storage_root / "mybucket" / f"orphan{i}.txt").write_bytes(f"data{i}".encode())
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
result = checker.run_now()
assert result.objects_scanned <= 3
def test_phantom_metadata_counts_toward_batch(self, storage_root):
objects = {f"file{i}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
for i in range(10):
(storage_root / "mybucket" / f"file{i}.txt").unlink()
checker = IntegrityChecker(storage_root=storage_root, batch_size=5)
result = checker.run_now()
assert result.objects_scanned <= 5
def test_all_check_types_contribute(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"valid.txt": b"hello"})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan")
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
result = checker.run_now()
assert result.objects_scanned > 2
class TestCursorRotation:
def test_oldest_bucket_scanned_first(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-b", {"b.txt": b"bbb"})
_setup_bucket(storage_root, "bucket-c", {"c.txt": b"ccc"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=5)
checker.cursor_store.update_bucket("bucket-a", 1000.0)
checker.cursor_store.update_bucket("bucket-b", 3000.0)
checker.cursor_store.update_bucket("bucket-c", 2000.0)
ordered = checker.cursor_store.get_bucket_order(["bucket-a", "bucket-b", "bucket-c"])
assert ordered[0] == "bucket-a"
assert ordered[1] == "bucket-c"
assert ordered[2] == "bucket-b"
def test_never_scanned_buckets_first(self, storage_root):
_setup_bucket(storage_root, "bucket-old", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-new", {"b.txt": b"bbb"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker.cursor_store.update_bucket("bucket-old", time.time())
ordered = checker.cursor_store.get_bucket_order(["bucket-old", "bucket-new"])
assert ordered[0] == "bucket-new"
def test_rotation_covers_all_buckets(self, storage_root):
for name in ["bucket-a", "bucket-b", "bucket-c"]:
_setup_bucket(storage_root, name, {f"{name}.txt": name.encode()})
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
result1 = checker.run_now()
scanned_buckets_1 = set()
for issue_bucket in [storage_root]:
pass
assert result1.buckets_scanned >= 1
result2 = checker.run_now()
result3 = checker.run_now()
cursor_info = checker.cursor_store.get_info()
assert cursor_info["tracked_buckets"] == 3
def test_cursor_persistence(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker1 = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker1.run_now()
cursor1 = checker1.cursor_store.get_info()
assert cursor1["tracked_buckets"] == 1
assert "mybucket" in cursor1["buckets"]
checker2 = IntegrityChecker(storage_root=storage_root, batch_size=1000)
cursor2 = checker2.cursor_store.get_info()
assert cursor2["tracked_buckets"] == 1
assert "mybucket" in cursor2["buckets"]
def test_stale_cursor_cleanup(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-b", {"b.txt": b"bbb"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker.run_now()
import shutil
shutil.rmtree(storage_root / "bucket-b")
meta_b = storage_root / ".myfsio.sys" / "buckets" / "bucket-b"
if meta_b.exists():
shutil.rmtree(meta_b)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
assert "bucket-b" not in cursor_info["buckets"]
assert "bucket-a" in cursor_info["buckets"]
def test_cursor_updates_after_scan(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
before = time.time()
checker.run_now()
after = time.time()
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert before <= entry["last_scanned"] <= after
assert entry["completed"] is True
class TestIntraBucketCursor:
def test_resumes_from_cursor_key(self, storage_root):
objects = {f"file_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
result1 = checker.run_now()
assert result1.objects_scanned == 3
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert entry["last_key"] is not None
assert entry["completed"] is False
result2 = checker.run_now()
assert result2.objects_scanned == 3
cursor_after = checker.cursor_store.get_info()["buckets"]["mybucket"]
assert cursor_after["last_key"] > entry["last_key"]
def test_cursor_resets_after_full_pass(self, storage_root):
objects = {f"file_{i}.txt": f"data{i}".encode() for i in range(3)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=100)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert entry["last_key"] is None
assert entry["completed"] is True
def test_full_coverage_across_cycles(self, storage_root):
objects = {f"obj_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
all_scanned = 0
for _ in range(10):
result = checker.run_now()
all_scanned += result.objects_scanned
if checker.cursor_store.get_info()["buckets"]["mybucket"]["completed"]:
break
assert all_scanned >= 10
def test_deleted_cursor_key_skips_gracefully(self, storage_root):
objects = {f"file_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(6)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
cursor_key = cursor_info["buckets"]["mybucket"]["last_key"]
assert cursor_key is not None
obj_path = storage_root / "mybucket" / cursor_key
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
key_path = Path(cursor_key)
index_path = meta_root / key_path.parent / "_index.json" if key_path.parent != Path(".") else meta_root / "_index.json"
if key_path.parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / key_path.parent / "_index.json"
if obj_path.exists():
obj_path.unlink()
if index_path.exists():
index_data = json.loads(index_path.read_text())
index_data.pop(key_path.name, None)
index_path.write_text(json.dumps(index_data))
result2 = checker.run_now()
assert result2.objects_scanned > 0
def test_incomplete_buckets_prioritized(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {f"a{i}.txt": b"a" for i in range(5)})
_setup_bucket(storage_root, "bucket-b", {f"b{i}.txt": b"b" for i in range(5)})
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
incomplete = [
name for name, info in cursor_info["buckets"].items()
if info.get("last_key") is not None
]
assert len(incomplete) >= 1
result2 = checker.run_now()
assert result2.objects_scanned > 0
def test_cursor_skips_nested_directories(self, storage_root):
objects = {
"aaa/file1.txt": b"a1",
"aaa/file2.txt": b"a2",
"bbb/file1.txt": b"b1",
"bbb/file2.txt": b"b2",
"ccc/file1.txt": b"c1",
"ccc/file2.txt": b"c2",
}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
result1 = checker.run_now()
assert result1.objects_scanned == 4
cursor_info = checker.cursor_store.get_info()
cursor_key = cursor_info["buckets"]["mybucket"]["last_key"]
assert cursor_key is not None
assert cursor_key.startswith("aaa/") or cursor_key.startswith("bbb/")
result2 = checker.run_now()
assert result2.objects_scanned >= 2
all_scanned = result1.objects_scanned + result2.objects_scanned
for _ in range(10):
if checker.cursor_store.get_info()["buckets"]["mybucket"]["completed"]:
break
r = checker.run_now()
all_scanned += r.objects_scanned
assert all_scanned >= 6
def test_sorted_walk_order(self, storage_root):
objects = {
"bar.txt": b"bar",
"bar/inner.txt": b"inner",
"abc.txt": b"abc",
"zzz/deep.txt": b"deep",
}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=100)
result = checker.run_now()
assert result.objects_scanned >= 4
assert result.total_issues == 0

View File

@@ -1,350 +0,0 @@
import hashlib
import io
import os
import secrets
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
try:
import myfsio_core as _rc
HAS_RUST = True
except ImportError:
_rc = None
HAS_RUST = False
pytestmark = pytest.mark.skipif(not HAS_RUST, reason="myfsio_core not available")
class TestStreamToFileWithMd5:
def test_basic_write(self, tmp_path):
data = b"hello world" * 1000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(tmp_path_str).exists()
assert Path(tmp_path_str).read_bytes() == data
def test_empty_stream(self, tmp_path):
stream = io.BytesIO(b"")
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == 0
assert md5_hex == hashlib.md5(b"").hexdigest()
assert Path(tmp_path_str).read_bytes() == b""
def test_large_data(self, tmp_path):
data = os.urandom(1024 * 1024 * 2)
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
def test_custom_chunk_size(self, tmp_path):
data = b"x" * 10000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(
stream, tmp_dir, chunk_size=128
)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
class TestAssemblePartsWithMd5:
def test_basic_assembly(self, tmp_path):
parts = []
combined = b""
for i in range(3):
data = f"part{i}data".encode() * 100
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
def test_single_part(self, tmp_path):
data = b"single part data"
p = tmp_path / "part0"
p.write_bytes(data)
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5([str(p)], dest)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(dest).read_bytes() == data
def test_empty_parts_list(self):
with pytest.raises(ValueError, match="No parts"):
_rc.assemble_parts_with_md5([], "dummy")
def test_missing_part_file(self, tmp_path):
with pytest.raises(OSError):
_rc.assemble_parts_with_md5(
[str(tmp_path / "nonexistent")], str(tmp_path / "out")
)
def test_large_parts(self, tmp_path):
parts = []
combined = b""
for i in range(5):
data = os.urandom(512 * 1024)
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
class TestEncryptDecryptStreamChunked:
def _python_derive_chunk_nonce(self, base_nonce, chunk_index):
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
hkdf = HKDF(
algorithm=hashes.SHA256(),
length=12,
salt=base_nonce,
info=chunk_index.to_bytes(4, "big"),
)
return hkdf.derive(b"chunk_nonce")
def test_encrypt_decrypt_roundtrip(self, tmp_path):
data = b"Hello, encryption!" * 500
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count > 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == chunk_count
assert Path(decrypted_path).read_bytes() == data
def test_empty_file(self, tmp_path):
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "empty")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(b"")
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count == 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == 0
assert Path(decrypted_path).read_bytes() == b""
def test_custom_chunk_size(self, tmp_path):
data = os.urandom(10000)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=1024
)
assert chunk_count == 10
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_invalid_key_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="32 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), b"short", secrets.token_bytes(12)
)
def test_invalid_nonce_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="12 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), secrets.token_bytes(32), b"short"
)
def test_wrong_key_fails_decrypt(self, tmp_path):
data = b"sensitive data"
key = secrets.token_bytes(32)
wrong_key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
with pytest.raises((ValueError, OSError)):
_rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, wrong_key, base_nonce
)
def test_cross_compat_python_encrypt_rust_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat test data" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
encrypted_path = str(tmp_path / "py_encrypted")
with open(encrypted_path, "wb") as f:
f.write(b"\x00\x00\x00\x00")
aesgcm = AESGCM(key)
chunk_index = 0
offset = 0
while offset < len(data):
chunk = data[offset:offset + chunk_size]
nonce = self._python_derive_chunk_nonce(base_nonce, chunk_index)
enc = aesgcm.encrypt(nonce, chunk, None)
f.write(len(enc).to_bytes(4, "big"))
f.write(enc)
chunk_index += 1
offset += chunk_size
f.seek(0)
f.write(chunk_index.to_bytes(4, "big"))
decrypted_path = str(tmp_path / "rust_decrypted")
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_cross_compat_rust_encrypt_python_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat reverse test" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "rust_encrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=chunk_size
)
aesgcm = AESGCM(key)
with open(encrypted_path, "rb") as f:
count_bytes = f.read(4)
assert int.from_bytes(count_bytes, "big") == chunk_count
decrypted = b""
for i in range(chunk_count):
size = int.from_bytes(f.read(4), "big")
enc_chunk = f.read(size)
nonce = self._python_derive_chunk_nonce(base_nonce, i)
decrypted += aesgcm.decrypt(nonce, enc_chunk, None)
assert decrypted == data
def test_large_file_roundtrip(self, tmp_path):
data = os.urandom(1024 * 1024)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "large")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
class TestStreamingEncryptorFileMethods:
def test_encrypt_file_decrypt_file_roundtrip(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"file method test data" * 200
input_path = str(tmp_path / "input")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
metadata = encryptor.encrypt_file(input_path, encrypted_path)
assert metadata.algorithm == "AES256"
encryptor.decrypt_file(encrypted_path, decrypted_path, metadata)
assert Path(decrypted_path).read_bytes() == data
def test_encrypt_file_matches_encrypt_stream(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"stream vs file comparison" * 100
input_path = str(tmp_path / "input")
Path(input_path).write_bytes(data)
file_encrypted_path = str(tmp_path / "file_enc")
metadata_file = encryptor.encrypt_file(input_path, file_encrypted_path)
file_decrypted_path = str(tmp_path / "file_dec")
encryptor.decrypt_file(file_encrypted_path, file_decrypted_path, metadata_file)
assert Path(file_decrypted_path).read_bytes() == data
stream_enc, metadata_stream = encryptor.encrypt_stream(io.BytesIO(data))
stream_dec = encryptor.decrypt_stream(stream_enc, metadata_stream)
assert stream_dec.read() == data

View File

@@ -1,4 +1,4 @@
Flask>=3.1.3
Flask>=3.1.2
Flask-Limiter>=4.1.1
Flask-Cors>=6.0.2
Flask-WTF>=1.2.2
@@ -6,8 +6,8 @@ python-dotenv>=1.2.1
pytest>=9.0.2
requests>=2.32.5
boto3>=1.42.14
granian>=2.7.2
psutil>=7.2.2
cryptography>=46.0.5
waitress>=3.0.2
psutil>=7.1.3
cryptography>=46.0.3
defusedxml>=0.7.1
duckdb>=1.5.1
duckdb>=1.4.4

162
run.py Normal file
View File

@@ -0,0 +1,162 @@
"""Helper script to run the API server, UI server, or both."""
from __future__ import annotations
import argparse
import os
import sys
import warnings
import multiprocessing
from multiprocessing import Process
from pathlib import Path
from dotenv import load_dotenv
for _env_file in [
Path("/opt/myfsio/myfsio.env"),
Path.cwd() / ".env",
Path.cwd() / "myfsio.env",
]:
if _env_file.exists():
load_dotenv(_env_file, override=True)
from typing import Optional
from app import create_api_app, create_ui_app
from app.config import AppConfig
def _server_host() -> str:
"""Return the bind host for API and UI servers."""
return os.getenv("APP_HOST", "0.0.0.0")
def _is_debug_enabled() -> bool:
return os.getenv("FLASK_DEBUG", "0").lower() in ("1", "true", "yes")
def _is_frozen() -> bool:
"""Check if running as a compiled binary (PyInstaller/Nuitka)."""
return getattr(sys, 'frozen', False) or '__compiled__' in globals()
def serve_api(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
app = create_api_app()
if prod:
from waitress import serve
if config:
serve(
app,
host=_server_host(),
port=port,
ident="MyFSIO",
threads=config.server_threads,
connection_limit=config.server_connection_limit,
backlog=config.server_backlog,
channel_timeout=config.server_channel_timeout,
)
else:
serve(app, host=_server_host(), port=port, ident="MyFSIO")
else:
debug = _is_debug_enabled()
if debug:
warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
app.run(host=_server_host(), port=port, debug=debug)
def serve_ui(port: int, prod: bool = False, config: Optional[AppConfig] = None) -> None:
app = create_ui_app()
if prod:
from waitress import serve
if config:
serve(
app,
host=_server_host(),
port=port,
ident="MyFSIO",
threads=config.server_threads,
connection_limit=config.server_connection_limit,
backlog=config.server_backlog,
channel_timeout=config.server_channel_timeout,
)
else:
serve(app, host=_server_host(), port=port, ident="MyFSIO")
else:
debug = _is_debug_enabled()
if debug:
warnings.warn("DEBUG MODE ENABLED - DO NOT USE IN PRODUCTION", RuntimeWarning)
app.run(host=_server_host(), port=port, debug=debug)
if __name__ == "__main__":
multiprocessing.freeze_support()
if _is_frozen():
multiprocessing.set_start_method("spawn", force=True)
parser = argparse.ArgumentParser(description="Run the S3 clone services.")
parser.add_argument("--mode", choices=["api", "ui", "both"], default="both")
parser.add_argument("--api-port", type=int, default=5000)
parser.add_argument("--ui-port", type=int, default=5100)
parser.add_argument("--prod", action="store_true", help="Run in production mode using Waitress")
parser.add_argument("--dev", action="store_true", help="Force development mode (Flask dev server)")
parser.add_argument("--check-config", action="store_true", help="Validate configuration and exit")
parser.add_argument("--show-config", action="store_true", help="Show configuration summary and exit")
args = parser.parse_args()
if args.check_config or args.show_config:
config = AppConfig.from_env()
config.print_startup_summary()
if args.check_config:
issues = config.validate_and_report()
critical = [i for i in issues if i.startswith("CRITICAL:")]
sys.exit(1 if critical else 0)
sys.exit(0)
prod_mode = args.prod or (_is_frozen() and not args.dev)
config = AppConfig.from_env()
first_run_marker = config.storage_root / ".myfsio.sys" / ".initialized"
is_first_run = not first_run_marker.exists()
if is_first_run:
config.print_startup_summary()
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
try:
first_run_marker.parent.mkdir(parents=True, exist_ok=True)
first_run_marker.write_text(f"Initialized on {__import__('datetime').datetime.now().isoformat()}\n")
except OSError:
pass
if prod_mode:
print("Running in production mode (Waitress)")
issues = config.validate_and_report()
critical_issues = [i for i in issues if i.startswith("CRITICAL:")]
if critical_issues:
for issue in critical_issues:
print(f" {issue}")
print("ABORTING: Critical configuration issues detected. Please fix them before starting.")
sys.exit(1)
else:
print("Running in development mode (Flask dev server)")
if args.mode in {"api", "both"}:
print(f"Starting API server on port {args.api_port}...")
api_proc = Process(target=serve_api, args=(args.api_port, prod_mode, config), daemon=True)
api_proc.start()
else:
api_proc = None
if args.mode in {"ui", "both"}:
print(f"Starting UI server on port {args.ui_port}...")
serve_ui(args.ui_port, prod_mode, config)
elif api_proc:
try:
api_proc.join()
except KeyboardInterrupt:
pass

View File

@@ -1,9 +0,0 @@
.git
.gitignore
logs
data
tmp
myfsio-engine/target
myfsio-engine/tests
Dockerfile
.dockerignore

View File

@@ -1,45 +0,0 @@
FROM rust:1-slim-bookworm AS builder
WORKDIR /build
RUN apt-get update \
&& apt-get install -y --no-install-recommends pkg-config libssl-dev \
&& rm -rf /var/lib/apt/lists/*
COPY myfsio-engine ./myfsio-engine
RUN cd myfsio-engine \
&& cargo build --release --bin myfsio-server \
&& strip target/release/myfsio-server
FROM debian:bookworm-slim
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates curl \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /app/data \
&& useradd -m -u 1000 myfsio \
&& chown -R myfsio:myfsio /app
COPY --from=builder /build/myfsio-engine/target/release/myfsio-server /usr/local/bin/myfsio-server
COPY --from=builder /build/myfsio-engine/templates /app/templates
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
RUN chmod +x /app/docker-entrypoint.sh \
&& chown -R myfsio:myfsio /app
USER myfsio
EXPOSE 5000
ENV HOST=0.0.0.0 \
PORT=5000 \
STORAGE_ROOT=/app/data \
RUST_LOG=info
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -fsS "http://localhost:${PORT}/myfsio/health" || exit 1
CMD ["/app/docker-entrypoint.sh"]

View File

@@ -1,4 +0,0 @@
#!/bin/sh
set -e
exec /usr/local/bin/myfsio-server

File diff suppressed because it is too large Load Diff

View File

@@ -1,61 +0,0 @@
[workspace]
resolver = "2"
members = [
"crates/myfsio-common",
"crates/myfsio-auth",
"crates/myfsio-crypto",
"crates/myfsio-storage",
"crates/myfsio-xml",
"crates/myfsio-server",
]
[workspace.package]
version = "0.4.3"
edition = "2021"
[workspace.dependencies]
tokio = { version = "1", features = ["full"] }
axum = { version = "0.8" }
tower = { version = "0.5" }
tower-http = { version = "0.6", features = ["cors", "trace", "fs", "compression-gzip"] }
hyper = { version = "1" }
bytes = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
quick-xml = { version = "0.37", features = ["serialize"] }
hmac = "0.12"
sha2 = "0.10"
md-5 = "0.10"
hex = "0.4"
aes = "0.8"
aes-gcm = "0.10"
cbc = { version = "0.1", features = ["alloc"] }
hkdf = "0.12"
uuid = { version = "1", features = ["v4"] }
parking_lot = "0.12"
lru = "0.14"
percent-encoding = "2"
regex = "1"
unicode-normalization = "0.1"
tracing = "0.1"
tracing-subscriber = "0.3"
thiserror = "2"
chrono = { version = "0.4", features = ["serde"] }
base64 = "0.22"
tokio-util = { version = "0.7", features = ["io"] }
futures = "0.3"
dashmap = "6"
crc32fast = "1"
duckdb = { version = "1", features = ["bundled"] }
reqwest = { version = "0.12", default-features = false, features = ["stream", "rustls-tls", "json"] }
aws-sdk-s3 = { version = "1", features = ["behavior-version-latest", "rt-tokio"] }
aws-config = { version = "1", features = ["behavior-version-latest"] }
aws-credential-types = "1"
aws-smithy-runtime-api = "1"
aws-smithy-types = "1"
async-trait = "0.1"
tera = "1"
cookie = "0.18"
subtle = "2"
clap = { version = "4", features = ["derive"] }
dotenvy = "0.15"

View File

@@ -1,27 +0,0 @@
[package]
name = "myfsio-auth"
version.workspace = true
edition.workspace = true
[dependencies]
myfsio-common = { path = "../myfsio-common" }
hmac = { workspace = true }
sha2 = { workspace = true }
hex = { workspace = true }
aes = { workspace = true }
cbc = { workspace = true }
base64 = { workspace = true }
pbkdf2 = "0.12"
rand = "0.8"
lru = { workspace = true }
parking_lot = { workspace = true }
percent-encoding = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
chrono = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
[dev-dependencies]
tempfile = "3"

View File

@@ -1,118 +0,0 @@
use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, BlockEncryptMut, KeyIvInit};
use base64::{engine::general_purpose::URL_SAFE, Engine};
use hmac::{Hmac, Mac};
use rand::RngCore;
use sha2::Sha256;
type Aes128CbcDec = cbc::Decryptor<aes::Aes128>;
type Aes128CbcEnc = cbc::Encryptor<aes::Aes128>;
type HmacSha256 = Hmac<Sha256>;
pub fn derive_fernet_key(secret: &str) -> String {
let mut derived = [0u8; 32];
pbkdf2::pbkdf2_hmac::<Sha256>(
secret.as_bytes(),
b"myfsio-iam-encryption",
100_000,
&mut derived,
);
URL_SAFE.encode(derived)
}
pub fn decrypt(key_b64: &str, token: &str) -> Result<Vec<u8>, &'static str> {
let key_bytes = URL_SAFE
.decode(key_b64)
.map_err(|_| "invalid fernet key base64")?;
if key_bytes.len() != 32 {
return Err("fernet key must be 32 bytes");
}
let signing_key = &key_bytes[..16];
let encryption_key = &key_bytes[16..];
let token_bytes = URL_SAFE
.decode(token)
.map_err(|_| "invalid fernet token base64")?;
if token_bytes.len() < 57 {
return Err("fernet token too short");
}
if token_bytes[0] != 0x80 {
return Err("invalid fernet version");
}
let hmac_offset = token_bytes.len() - 32;
let payload = &token_bytes[..hmac_offset];
let expected_hmac = &token_bytes[hmac_offset..];
let mut mac = HmacSha256::new_from_slice(signing_key).map_err(|_| "hmac key error")?;
mac.update(payload);
mac.verify_slice(expected_hmac)
.map_err(|_| "HMAC verification failed")?;
let iv = &token_bytes[9..25];
let ciphertext = &token_bytes[25..hmac_offset];
let plaintext = Aes128CbcDec::new(encryption_key.into(), iv.into())
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
.map_err(|_| "AES-CBC decryption failed")?;
Ok(plaintext)
}
pub fn encrypt(key_b64: &str, plaintext: &[u8]) -> Result<String, &'static str> {
let key_bytes = URL_SAFE
.decode(key_b64)
.map_err(|_| "invalid fernet key base64")?;
if key_bytes.len() != 32 {
return Err("fernet key must be 32 bytes");
}
let signing_key = &key_bytes[..16];
let encryption_key = &key_bytes[16..];
let mut iv = [0u8; 16];
rand::thread_rng().fill_bytes(&mut iv);
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map_err(|_| "system time error")?
.as_secs();
let ciphertext = Aes128CbcEnc::new(encryption_key.into(), (&iv).into())
.encrypt_padded_vec_mut::<Pkcs7>(plaintext);
let mut payload = Vec::with_capacity(1 + 8 + 16 + ciphertext.len());
payload.push(0x80);
payload.extend_from_slice(&timestamp.to_be_bytes());
payload.extend_from_slice(&iv);
payload.extend_from_slice(&ciphertext);
let mut mac = HmacSha256::new_from_slice(signing_key).map_err(|_| "hmac key error")?;
mac.update(&payload);
let tag = mac.finalize().into_bytes();
let mut token_bytes = payload;
token_bytes.extend_from_slice(&tag);
Ok(URL_SAFE.encode(&token_bytes))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_derive_fernet_key_format() {
let key = derive_fernet_key("test-secret");
let decoded = URL_SAFE.decode(&key).unwrap();
assert_eq!(decoded.len(), 32);
}
#[test]
fn test_roundtrip_with_python_compat() {
let key = derive_fernet_key("dev-secret-key");
let decoded = URL_SAFE.decode(&key).unwrap();
assert_eq!(decoded.len(), 32);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +0,0 @@
mod fernet;
pub mod iam;
pub mod principal;
pub mod sigv4;

View File

@@ -1 +0,0 @@
pub use myfsio_common::types::Principal;

View File

@@ -1,287 +0,0 @@
use hmac::{Hmac, Mac};
use lru::LruCache;
use parking_lot::Mutex;
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
use sha2::{Digest, Sha256};
use std::num::NonZeroUsize;
use std::sync::LazyLock;
use std::time::Instant;
type HmacSha256 = Hmac<Sha256>;
struct CacheEntry {
key: Vec<u8>,
created: Instant,
}
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
const CACHE_TTL_SECS: u64 = 60;
const AWS_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
.remove(b'-')
.remove(b'_')
.remove(b'.')
.remove(b'~');
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
mac.update(msg);
mac.finalize().into_bytes().to_vec()
}
fn sha256_hex(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
hex::encode(hasher.finalize())
}
fn aws_uri_encode(input: &str) -> String {
percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string()
}
pub fn derive_signing_key_cached(
secret_key: &str,
date_stamp: &str,
region: &str,
service: &str,
) -> Vec<u8> {
let cache_key = (
secret_key.to_owned(),
date_stamp.to_owned(),
region.to_owned(),
service.to_owned(),
);
{
let mut cache = SIGNING_KEY_CACHE.lock();
if let Some(entry) = cache.get(&cache_key) {
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
return entry.key.clone();
}
cache.pop(&cache_key);
}
}
let k_date = hmac_sha256(
format!("AWS4{}", secret_key).as_bytes(),
date_stamp.as_bytes(),
);
let k_region = hmac_sha256(&k_date, region.as_bytes());
let k_service = hmac_sha256(&k_region, service.as_bytes());
let k_signing = hmac_sha256(&k_service, b"aws4_request");
{
let mut cache = SIGNING_KEY_CACHE.lock();
cache.put(
cache_key,
CacheEntry {
key: k_signing.clone(),
created: Instant::now(),
},
);
}
k_signing
}
fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool {
if a.len() != b.len() {
return false;
}
let mut result: u8 = 0;
for (x, y) in a.iter().zip(b.iter()) {
result |= x ^ y;
}
result == 0
}
pub fn verify_sigv4_signature(
method: &str,
canonical_uri: &str,
query_params: &[(String, String)],
signed_headers_str: &str,
header_values: &[(String, String)],
payload_hash: &str,
amz_date: &str,
date_stamp: &str,
region: &str,
service: &str,
secret_key: &str,
provided_signature: &str,
) -> bool {
let mut sorted_params = query_params.to_vec();
sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
let canonical_query_string = sorted_params
.iter()
.map(|(k, v)| format!("{}={}", aws_uri_encode(k), aws_uri_encode(v)))
.collect::<Vec<_>>()
.join("&");
let mut canonical_headers = String::new();
for (name, value) in header_values {
let lower_name = name.to_lowercase();
let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
let final_value = if lower_name == "expect" && normalized.is_empty() {
"100-continue"
} else {
&normalized
};
canonical_headers.push_str(&lower_name);
canonical_headers.push(':');
canonical_headers.push_str(final_value);
canonical_headers.push('\n');
}
let canonical_request = format!(
"{}\n{}\n{}\n{}\n{}\n{}",
method,
canonical_uri,
canonical_query_string,
canonical_headers,
signed_headers_str,
payload_hash
);
let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service);
let cr_hash = sha256_hex(canonical_request.as_bytes());
let string_to_sign = format!(
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
amz_date, credential_scope, cr_hash
);
let signing_key = derive_signing_key_cached(secret_key, date_stamp, region, service);
let calculated = hmac_sha256(&signing_key, string_to_sign.as_bytes());
let calculated_hex = hex::encode(&calculated);
constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes())
}
pub fn derive_signing_key(
secret_key: &str,
date_stamp: &str,
region: &str,
service: &str,
) -> Vec<u8> {
derive_signing_key_cached(secret_key, date_stamp, region, service)
}
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
hex::encode(sig)
}
pub fn compute_post_policy_signature(signing_key: &[u8], policy_b64: &str) -> String {
let sig = hmac_sha256(signing_key, policy_b64.as_bytes());
hex::encode(sig)
}
pub fn build_string_to_sign(
amz_date: &str,
credential_scope: &str,
canonical_request: &str,
) -> String {
let cr_hash = sha256_hex(canonical_request.as_bytes());
format!(
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
amz_date, credential_scope, cr_hash
)
}
pub fn constant_time_compare(a: &str, b: &str) -> bool {
constant_time_compare_inner(a.as_bytes(), b.as_bytes())
}
pub fn clear_signing_key_cache() {
SIGNING_KEY_CACHE.lock().clear();
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_derive_signing_key() {
let key = derive_signing_key(
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
"20130524",
"us-east-1",
"s3",
);
assert_eq!(key.len(), 32);
}
#[test]
fn test_derive_signing_key_cached() {
let key1 = derive_signing_key("secret", "20240101", "us-east-1", "s3");
let key2 = derive_signing_key("secret", "20240101", "us-east-1", "s3");
assert_eq!(key1, key2);
}
#[test]
fn test_constant_time_compare() {
assert!(constant_time_compare("abc", "abc"));
assert!(!constant_time_compare("abc", "abd"));
assert!(!constant_time_compare("abc", "abcd"));
}
#[test]
fn test_build_string_to_sign() {
let result = build_string_to_sign(
"20130524T000000Z",
"20130524/us-east-1/s3/aws4_request",
"GET\n/\n\nhost:example.com\n\nhost\nUNSIGNED-PAYLOAD",
);
assert!(result.starts_with("AWS4-HMAC-SHA256\n"));
assert!(result.contains("20130524T000000Z"));
}
#[test]
fn test_aws_uri_encode() {
assert_eq!(aws_uri_encode("hello world"), "hello%20world");
assert_eq!(aws_uri_encode("test-file_name.txt"), "test-file_name.txt");
assert_eq!(aws_uri_encode("a/b"), "a%2Fb");
}
#[test]
fn test_verify_sigv4_roundtrip() {
let secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
let date_stamp = "20130524";
let region = "us-east-1";
let service = "s3";
let amz_date = "20130524T000000Z";
let signing_key = derive_signing_key(secret, date_stamp, region, service);
let canonical_request =
"GET\n/\n\nhost:examplebucket.s3.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD";
let string_to_sign = build_string_to_sign(
amz_date,
&format!("{}/{}/{}/aws4_request", date_stamp, region, service),
canonical_request,
);
let signature = compute_signature(&signing_key, &string_to_sign);
let result = verify_sigv4_signature(
"GET",
"/",
&[],
"host",
&[(
"host".to_string(),
"examplebucket.s3.amazonaws.com".to_string(),
)],
"UNSIGNED-PAYLOAD",
amz_date,
date_stamp,
region,
service,
secret,
&signature,
);
assert!(result);
}
}

View File

@@ -1,11 +0,0 @@
[package]
name = "myfsio-common"
version.workspace = true
edition.workspace = true
[dependencies]
thiserror = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }

View File

@@ -1,20 +0,0 @@
pub const SYSTEM_ROOT: &str = ".myfsio.sys";
pub const SYSTEM_BUCKETS_DIR: &str = "buckets";
pub const SYSTEM_MULTIPART_DIR: &str = "multipart";
pub const BUCKET_META_DIR: &str = "meta";
pub const BUCKET_VERSIONS_DIR: &str = "versions";
pub const BUCKET_CONFIG_FILE: &str = ".bucket.json";
pub const STATS_FILE: &str = "stats.json";
pub const ETAG_INDEX_FILE: &str = "etag_index.json";
pub const INDEX_FILE: &str = "_index.json";
pub const MANIFEST_FILE: &str = "manifest.json";
pub const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
pub const DEFAULT_REGION: &str = "us-east-1";
pub const AWS_SERVICE: &str = "s3";
pub const DEFAULT_MAX_KEYS: usize = 1000;
pub const DEFAULT_OBJECT_KEY_MAX_BYTES: usize = 1024;
pub const DEFAULT_CHUNK_SIZE: usize = 65536;
pub const STREAM_CHUNK_SIZE: usize = 1_048_576;

View File

@@ -1,225 +0,0 @@
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum S3ErrorCode {
AccessDenied,
BucketAlreadyExists,
BucketNotEmpty,
EntityTooLarge,
InternalError,
InvalidAccessKeyId,
InvalidArgument,
InvalidBucketName,
InvalidKey,
InvalidPolicyDocument,
InvalidRange,
InvalidRequest,
MalformedXML,
MethodNotAllowed,
NoSuchBucket,
NoSuchKey,
NoSuchUpload,
NoSuchVersion,
NoSuchTagSet,
PreconditionFailed,
NotModified,
QuotaExceeded,
SignatureDoesNotMatch,
SlowDown,
}
impl S3ErrorCode {
pub fn http_status(&self) -> u16 {
match self {
Self::AccessDenied => 403,
Self::BucketAlreadyExists => 409,
Self::BucketNotEmpty => 409,
Self::EntityTooLarge => 413,
Self::InternalError => 500,
Self::InvalidAccessKeyId => 403,
Self::InvalidArgument => 400,
Self::InvalidBucketName => 400,
Self::InvalidKey => 400,
Self::InvalidPolicyDocument => 400,
Self::InvalidRange => 416,
Self::InvalidRequest => 400,
Self::MalformedXML => 400,
Self::MethodNotAllowed => 405,
Self::NoSuchBucket => 404,
Self::NoSuchKey => 404,
Self::NoSuchUpload => 404,
Self::NoSuchVersion => 404,
Self::NoSuchTagSet => 404,
Self::PreconditionFailed => 412,
Self::NotModified => 304,
Self::QuotaExceeded => 403,
Self::SignatureDoesNotMatch => 403,
Self::SlowDown => 429,
}
}
pub fn as_str(&self) -> &'static str {
match self {
Self::AccessDenied => "AccessDenied",
Self::BucketAlreadyExists => "BucketAlreadyExists",
Self::BucketNotEmpty => "BucketNotEmpty",
Self::EntityTooLarge => "EntityTooLarge",
Self::InternalError => "InternalError",
Self::InvalidAccessKeyId => "InvalidAccessKeyId",
Self::InvalidArgument => "InvalidArgument",
Self::InvalidBucketName => "InvalidBucketName",
Self::InvalidKey => "InvalidKey",
Self::InvalidPolicyDocument => "InvalidPolicyDocument",
Self::InvalidRange => "InvalidRange",
Self::InvalidRequest => "InvalidRequest",
Self::MalformedXML => "MalformedXML",
Self::MethodNotAllowed => "MethodNotAllowed",
Self::NoSuchBucket => "NoSuchBucket",
Self::NoSuchKey => "NoSuchKey",
Self::NoSuchUpload => "NoSuchUpload",
Self::NoSuchVersion => "NoSuchVersion",
Self::NoSuchTagSet => "NoSuchTagSet",
Self::PreconditionFailed => "PreconditionFailed",
Self::NotModified => "NotModified",
Self::QuotaExceeded => "QuotaExceeded",
Self::SignatureDoesNotMatch => "SignatureDoesNotMatch",
Self::SlowDown => "SlowDown",
}
}
pub fn default_message(&self) -> &'static str {
match self {
Self::AccessDenied => "Access Denied",
Self::BucketAlreadyExists => "The requested bucket name is not available",
Self::BucketNotEmpty => "The bucket you tried to delete is not empty",
Self::EntityTooLarge => "Your proposed upload exceeds the maximum allowed size",
Self::InternalError => "We encountered an internal error. Please try again.",
Self::InvalidAccessKeyId => "The access key ID you provided does not exist",
Self::InvalidArgument => "Invalid argument",
Self::InvalidBucketName => "The specified bucket is not valid",
Self::InvalidKey => "The specified key is not valid",
Self::InvalidPolicyDocument => "The content of the form does not meet the conditions specified in the policy document",
Self::InvalidRange => "The requested range is not satisfiable",
Self::InvalidRequest => "Invalid request",
Self::MalformedXML => "The XML you provided was not well-formed",
Self::MethodNotAllowed => "The specified method is not allowed against this resource",
Self::NoSuchBucket => "The specified bucket does not exist",
Self::NoSuchKey => "The specified key does not exist",
Self::NoSuchUpload => "The specified multipart upload does not exist",
Self::NoSuchVersion => "The specified version does not exist",
Self::NoSuchTagSet => "The TagSet does not exist",
Self::PreconditionFailed => "At least one of the preconditions you specified did not hold",
Self::NotModified => "Not Modified",
Self::QuotaExceeded => "The bucket quota has been exceeded",
Self::SignatureDoesNotMatch => "The request signature we calculated does not match the signature you provided",
Self::SlowDown => "Please reduce your request rate",
}
}
}
impl fmt::Display for S3ErrorCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone)]
pub struct S3Error {
pub code: S3ErrorCode,
pub message: String,
pub resource: String,
pub request_id: String,
}
impl S3Error {
pub fn new(code: S3ErrorCode, message: impl Into<String>) -> Self {
Self {
code,
message: message.into(),
resource: String::new(),
request_id: String::new(),
}
}
pub fn from_code(code: S3ErrorCode) -> Self {
Self::new(code, code.default_message())
}
pub fn with_resource(mut self, resource: impl Into<String>) -> Self {
self.resource = resource.into();
self
}
pub fn with_request_id(mut self, request_id: impl Into<String>) -> Self {
self.request_id = request_id.into();
self
}
pub fn http_status(&self) -> u16 {
self.code.http_status()
}
pub fn to_xml(&self) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
<Error>\
<Code>{}</Code>\
<Message>{}</Message>\
<Resource>{}</Resource>\
<RequestId>{}</RequestId>\
</Error>",
self.code.as_str(),
xml_escape(&self.message),
xml_escape(&self.resource),
xml_escape(&self.request_id),
)
}
}
impl fmt::Display for S3Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}: {}", self.code, self.message)
}
}
impl std::error::Error for S3Error {}
fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_codes() {
assert_eq!(S3ErrorCode::NoSuchKey.http_status(), 404);
assert_eq!(S3ErrorCode::AccessDenied.http_status(), 403);
assert_eq!(S3ErrorCode::NoSuchBucket.as_str(), "NoSuchBucket");
}
#[test]
fn test_error_to_xml() {
let err = S3Error::from_code(S3ErrorCode::NoSuchKey)
.with_resource("/test-bucket/test-key")
.with_request_id("abc123");
let xml = err.to_xml();
assert!(xml.contains("<Code>NoSuchKey</Code>"));
assert!(xml.contains("<Resource>/test-bucket/test-key</Resource>"));
assert!(xml.contains("<RequestId>abc123</RequestId>"));
}
#[test]
fn test_xml_escape() {
let err = S3Error::new(S3ErrorCode::InvalidArgument, "key <test> & \"value\"")
.with_resource("/bucket/key&amp");
let xml = err.to_xml();
assert!(xml.contains("&lt;test&gt;"));
assert!(xml.contains("&amp;"));
}
}

View File

@@ -1,3 +0,0 @@
pub mod constants;
pub mod error;
pub mod types;

View File

@@ -1,176 +0,0 @@
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ObjectMeta {
pub key: String,
pub size: u64,
pub last_modified: DateTime<Utc>,
pub etag: Option<String>,
pub content_type: Option<String>,
pub storage_class: Option<String>,
pub metadata: HashMap<String, String>,
}
impl ObjectMeta {
pub fn new(key: String, size: u64, last_modified: DateTime<Utc>) -> Self {
Self {
key,
size,
last_modified,
etag: None,
content_type: None,
storage_class: Some("STANDARD".to_string()),
metadata: HashMap::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BucketMeta {
pub name: String,
pub creation_date: DateTime<Utc>,
}
#[derive(Debug, Clone, Default)]
pub struct BucketStats {
pub objects: u64,
pub bytes: u64,
pub version_count: u64,
pub version_bytes: u64,
}
impl BucketStats {
pub fn total_objects(&self) -> u64 {
self.objects + self.version_count
}
pub fn total_bytes(&self) -> u64 {
self.bytes + self.version_bytes
}
}
#[derive(Debug, Clone)]
pub struct ListObjectsResult {
pub objects: Vec<ObjectMeta>,
pub is_truncated: bool,
pub next_continuation_token: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ShallowListResult {
pub objects: Vec<ObjectMeta>,
pub common_prefixes: Vec<String>,
pub is_truncated: bool,
pub next_continuation_token: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct ListParams {
pub max_keys: usize,
pub continuation_token: Option<String>,
pub prefix: Option<String>,
pub start_after: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct ShallowListParams {
pub prefix: String,
pub delimiter: String,
pub max_keys: usize,
pub continuation_token: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PartMeta {
pub part_number: u32,
pub etag: String,
pub size: u64,
pub last_modified: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone)]
pub struct PartInfo {
pub part_number: u32,
pub etag: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MultipartUploadInfo {
pub upload_id: String,
pub key: String,
pub initiated: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VersionInfo {
pub version_id: String,
pub key: String,
pub size: u64,
pub last_modified: DateTime<Utc>,
pub etag: Option<String>,
pub is_latest: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tag {
pub key: String,
pub value: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BucketConfig {
#[serde(default)]
pub versioning_enabled: bool,
#[serde(default)]
pub tags: Vec<Tag>,
#[serde(default)]
pub cors: Option<serde_json::Value>,
#[serde(default)]
pub encryption: Option<serde_json::Value>,
#[serde(default)]
pub lifecycle: Option<serde_json::Value>,
#[serde(default)]
pub website: Option<serde_json::Value>,
#[serde(default)]
pub quota: Option<QuotaConfig>,
#[serde(default)]
pub acl: Option<serde_json::Value>,
#[serde(default)]
pub notification: Option<serde_json::Value>,
#[serde(default)]
pub logging: Option<serde_json::Value>,
#[serde(default)]
pub object_lock: Option<serde_json::Value>,
#[serde(default)]
pub policy: Option<serde_json::Value>,
#[serde(default)]
pub replication: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuotaConfig {
pub max_bytes: Option<u64>,
pub max_objects: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct Principal {
pub access_key: String,
pub user_id: String,
pub display_name: String,
pub is_admin: bool,
}
impl Principal {
pub fn new(access_key: String, user_id: String, display_name: String, is_admin: bool) -> Self {
Self {
access_key,
user_id,
display_name,
is_admin,
}
}
}

View File

@@ -1,24 +0,0 @@
[package]
name = "myfsio-crypto"
version.workspace = true
edition.workspace = true
[dependencies]
myfsio-common = { path = "../myfsio-common" }
md-5 = { workspace = true }
sha2 = { workspace = true }
hex = { workspace = true }
aes-gcm = { workspace = true }
hkdf = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
base64 = { workspace = true }
rand = "0.8"
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
tempfile = "3"

View File

@@ -1,253 +0,0 @@
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
use hkdf::Hkdf;
use sha2::Sha256;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;
use thiserror::Error;
const DEFAULT_CHUNK_SIZE: usize = 65536;
const HEADER_SIZE: usize = 4;
#[derive(Debug, Error)]
pub enum CryptoError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid key size: expected 32 bytes, got {0}")]
InvalidKeySize(usize),
#[error("Invalid nonce size: expected 12 bytes, got {0}")]
InvalidNonceSize(usize),
#[error("Encryption failed: {0}")]
EncryptionFailed(String),
#[error("Decryption failed at chunk {0}")]
DecryptionFailed(u32),
#[error("HKDF expand failed: {0}")]
HkdfFailed(String),
}
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
let mut filled = 0;
while filled < buf.len() {
match reader.read(&mut buf[filled..]) {
Ok(0) => break,
Ok(n) => filled += n,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
Ok(filled)
}
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], CryptoError> {
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
let mut okm = [0u8; 12];
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
.map_err(|e| CryptoError::HkdfFailed(e.to_string()))?;
Ok(okm)
}
pub fn encrypt_stream_chunked(
input_path: &Path,
output_path: &Path,
key: &[u8],
base_nonce: &[u8],
chunk_size: Option<usize>,
) -> Result<u32, CryptoError> {
if key.len() != 32 {
return Err(CryptoError::InvalidKeySize(key.len()));
}
if base_nonce.len() != 12 {
return Err(CryptoError::InvalidNonceSize(base_nonce.len()));
}
let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE);
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(input_path)?;
let mut outfile = File::create(output_path)?;
outfile.write_all(&[0u8; 4])?;
let mut buf = vec![0u8; chunk_size];
let mut chunk_index: u32 = 0;
loop {
let n = read_exact_chunk(&mut infile, &mut buf)?;
if n == 0 {
break;
}
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?;
let nonce = Nonce::from_slice(&nonce_bytes);
let encrypted = cipher
.encrypt(nonce, &buf[..n])
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
let size = encrypted.len() as u32;
outfile.write_all(&size.to_be_bytes())?;
outfile.write_all(&encrypted)?;
chunk_index += 1;
}
outfile.seek(SeekFrom::Start(0))?;
outfile.write_all(&chunk_index.to_be_bytes())?;
Ok(chunk_index)
}
pub fn decrypt_stream_chunked(
input_path: &Path,
output_path: &Path,
key: &[u8],
base_nonce: &[u8],
) -> Result<u32, CryptoError> {
if key.len() != 32 {
return Err(CryptoError::InvalidKeySize(key.len()));
}
if base_nonce.len() != 12 {
return Err(CryptoError::InvalidNonceSize(base_nonce.len()));
}
let key_arr: [u8; 32] = key.try_into().unwrap();
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
let cipher = Aes256Gcm::new(&key_arr.into());
let mut infile = File::open(input_path)?;
let mut outfile = File::create(output_path)?;
let mut header = [0u8; HEADER_SIZE];
infile.read_exact(&mut header)?;
let chunk_count = u32::from_be_bytes(header);
let mut size_buf = [0u8; HEADER_SIZE];
for chunk_index in 0..chunk_count {
infile.read_exact(&mut size_buf)?;
let chunk_size = u32::from_be_bytes(size_buf) as usize;
let mut encrypted = vec![0u8; chunk_size];
infile.read_exact(&mut encrypted)?;
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?;
let nonce = Nonce::from_slice(&nonce_bytes);
let decrypted = cipher
.decrypt(nonce, encrypted.as_ref())
.map_err(|_| CryptoError::DecryptionFailed(chunk_index))?;
outfile.write_all(&decrypted)?;
}
Ok(chunk_count)
}
pub async fn encrypt_stream_chunked_async(
input_path: &Path,
output_path: &Path,
key: &[u8],
base_nonce: &[u8],
chunk_size: Option<usize>,
) -> Result<u32, CryptoError> {
let input_path = input_path.to_owned();
let output_path = output_path.to_owned();
let key = key.to_vec();
let base_nonce = base_nonce.to_vec();
tokio::task::spawn_blocking(move || {
encrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce, chunk_size)
})
.await
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
}
pub async fn decrypt_stream_chunked_async(
input_path: &Path,
output_path: &Path,
key: &[u8],
base_nonce: &[u8],
) -> Result<u32, CryptoError> {
let input_path = input_path.to_owned();
let output_path = output_path.to_owned();
let key = key.to_vec();
let base_nonce = base_nonce.to_vec();
tokio::task::spawn_blocking(move || {
decrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce)
})
.await
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write as IoWrite;
#[test]
fn test_encrypt_decrypt_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let input = dir.path().join("input.bin");
let encrypted = dir.path().join("encrypted.bin");
let decrypted = dir.path().join("decrypted.bin");
let data = b"Hello, this is a test of AES-256-GCM chunked encryption!";
std::fs::File::create(&input)
.unwrap()
.write_all(data)
.unwrap();
let key = [0x42u8; 32];
let nonce = [0x01u8; 12];
let chunks = encrypt_stream_chunked(&input, &encrypted, &key, &nonce, Some(16)).unwrap();
assert!(chunks > 0);
let chunks2 = decrypt_stream_chunked(&encrypted, &decrypted, &key, &nonce).unwrap();
assert_eq!(chunks, chunks2);
let result = std::fs::read(&decrypted).unwrap();
assert_eq!(result, data);
}
#[test]
fn test_invalid_key_size() {
let dir = tempfile::tempdir().unwrap();
let input = dir.path().join("input.bin");
std::fs::File::create(&input)
.unwrap()
.write_all(b"test")
.unwrap();
let result = encrypt_stream_chunked(
&input,
&dir.path().join("out"),
&[0u8; 16],
&[0u8; 12],
None,
);
assert!(matches!(result, Err(CryptoError::InvalidKeySize(16))));
}
#[test]
fn test_wrong_key_fails_decrypt() {
let dir = tempfile::tempdir().unwrap();
let input = dir.path().join("input.bin");
let encrypted = dir.path().join("encrypted.bin");
let decrypted = dir.path().join("decrypted.bin");
std::fs::File::create(&input)
.unwrap()
.write_all(b"secret data")
.unwrap();
let key = [0x42u8; 32];
let nonce = [0x01u8; 12];
encrypt_stream_chunked(&input, &encrypted, &key, &nonce, None).unwrap();
let wrong_key = [0x43u8; 32];
let result = decrypt_stream_chunked(&encrypted, &decrypted, &wrong_key, &nonce);
assert!(matches!(result, Err(CryptoError::DecryptionFailed(_))));
}
}

View File

@@ -1,377 +0,0 @@
use base64::engine::general_purpose::STANDARD as B64;
use base64::Engine;
use rand::RngCore;
use std::collections::HashMap;
use std::path::Path;
use crate::aes_gcm::{decrypt_stream_chunked, encrypt_stream_chunked, CryptoError};
use crate::kms::KmsService;
#[derive(Debug, Clone, PartialEq)]
pub enum SseAlgorithm {
Aes256,
AwsKms,
CustomerProvided,
}
impl SseAlgorithm {
pub fn as_str(&self) -> &'static str {
match self {
SseAlgorithm::Aes256 => "AES256",
SseAlgorithm::AwsKms => "aws:kms",
SseAlgorithm::CustomerProvided => "AES256",
}
}
}
#[derive(Debug, Clone)]
pub struct EncryptionContext {
pub algorithm: SseAlgorithm,
pub kms_key_id: Option<String>,
pub customer_key: Option<Vec<u8>>,
}
#[derive(Debug, Clone)]
pub struct EncryptionMetadata {
pub algorithm: String,
pub nonce: String,
pub encrypted_data_key: Option<String>,
pub kms_key_id: Option<String>,
}
impl EncryptionMetadata {
pub fn to_metadata_map(&self) -> HashMap<String, String> {
let mut map = HashMap::new();
map.insert(
"x-amz-server-side-encryption".to_string(),
self.algorithm.clone(),
);
map.insert("x-amz-encryption-nonce".to_string(), self.nonce.clone());
if let Some(ref dk) = self.encrypted_data_key {
map.insert("x-amz-encrypted-data-key".to_string(), dk.clone());
}
if let Some(ref kid) = self.kms_key_id {
map.insert("x-amz-encryption-key-id".to_string(), kid.clone());
}
map
}
pub fn from_metadata(meta: &HashMap<String, String>) -> Option<Self> {
let algorithm = meta.get("x-amz-server-side-encryption")?;
let nonce = meta.get("x-amz-encryption-nonce")?;
Some(Self {
algorithm: algorithm.clone(),
nonce: nonce.clone(),
encrypted_data_key: meta.get("x-amz-encrypted-data-key").cloned(),
kms_key_id: meta.get("x-amz-encryption-key-id").cloned(),
})
}
pub fn is_encrypted(meta: &HashMap<String, String>) -> bool {
meta.contains_key("x-amz-server-side-encryption")
}
pub fn clean_metadata(meta: &mut HashMap<String, String>) {
meta.remove("x-amz-server-side-encryption");
meta.remove("x-amz-encryption-nonce");
meta.remove("x-amz-encrypted-data-key");
meta.remove("x-amz-encryption-key-id");
}
}
pub struct EncryptionService {
master_key: [u8; 32],
kms: Option<std::sync::Arc<KmsService>>,
}
impl EncryptionService {
pub fn new(master_key: [u8; 32], kms: Option<std::sync::Arc<KmsService>>) -> Self {
Self { master_key, kms }
}
pub fn generate_data_key(&self) -> ([u8; 32], [u8; 12]) {
let mut data_key = [0u8; 32];
let mut nonce = [0u8; 12];
rand::thread_rng().fill_bytes(&mut data_key);
rand::thread_rng().fill_bytes(&mut nonce);
(data_key, nonce)
}
pub fn wrap_data_key(&self, data_key: &[u8; 32]) -> Result<String, CryptoError> {
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
let cipher = Aes256Gcm::new((&self.master_key).into());
let mut nonce_bytes = [0u8; 12];
rand::thread_rng().fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
let encrypted = cipher
.encrypt(nonce, data_key.as_slice())
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
let mut combined = Vec::with_capacity(12 + encrypted.len());
combined.extend_from_slice(&nonce_bytes);
combined.extend_from_slice(&encrypted);
Ok(B64.encode(&combined))
}
pub fn unwrap_data_key(&self, wrapped_b64: &str) -> Result<[u8; 32], CryptoError> {
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
let combined = B64.decode(wrapped_b64).map_err(|e| {
CryptoError::EncryptionFailed(format!("Bad wrapped key encoding: {}", e))
})?;
if combined.len() < 12 {
return Err(CryptoError::EncryptionFailed(
"Wrapped key too short".to_string(),
));
}
let (nonce_bytes, ciphertext) = combined.split_at(12);
let cipher = Aes256Gcm::new((&self.master_key).into());
let nonce = Nonce::from_slice(nonce_bytes);
let plaintext = cipher
.decrypt(nonce, ciphertext)
.map_err(|_| CryptoError::DecryptionFailed(0))?;
if plaintext.len() != 32 {
return Err(CryptoError::InvalidKeySize(plaintext.len()));
}
let mut key = [0u8; 32];
key.copy_from_slice(&plaintext);
Ok(key)
}
pub async fn encrypt_object(
&self,
input_path: &Path,
output_path: &Path,
ctx: &EncryptionContext,
) -> Result<EncryptionMetadata, CryptoError> {
let (data_key, nonce) = self.generate_data_key();
let (encrypted_data_key, kms_key_id) = match ctx.algorithm {
SseAlgorithm::Aes256 => {
let wrapped = self.wrap_data_key(&data_key)?;
(Some(wrapped), None)
}
SseAlgorithm::AwsKms => {
let kms = self
.kms
.as_ref()
.ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?;
let kid = ctx
.kms_key_id
.as_ref()
.ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID".into()))?;
let ciphertext = kms.encrypt_data(kid, &data_key).await?;
(Some(B64.encode(&ciphertext)), Some(kid.clone()))
}
SseAlgorithm::CustomerProvided => (None, None),
};
let actual_key = if ctx.algorithm == SseAlgorithm::CustomerProvided {
let ck = ctx
.customer_key
.as_ref()
.ok_or_else(|| CryptoError::EncryptionFailed("No customer key provided".into()))?;
if ck.len() != 32 {
return Err(CryptoError::InvalidKeySize(ck.len()));
}
let mut k = [0u8; 32];
k.copy_from_slice(ck);
k
} else {
data_key
};
let ip = input_path.to_owned();
let op = output_path.to_owned();
let ak = actual_key;
let n = nonce;
tokio::task::spawn_blocking(move || encrypt_stream_chunked(&ip, &op, &ak, &n, None))
.await
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??;
Ok(EncryptionMetadata {
algorithm: ctx.algorithm.as_str().to_string(),
nonce: B64.encode(nonce),
encrypted_data_key,
kms_key_id,
})
}
pub async fn decrypt_object(
&self,
input_path: &Path,
output_path: &Path,
enc_meta: &EncryptionMetadata,
customer_key: Option<&[u8]>,
) -> Result<(), CryptoError> {
let nonce_bytes = B64
.decode(&enc_meta.nonce)
.map_err(|e| CryptoError::EncryptionFailed(format!("Bad nonce encoding: {}", e)))?;
if nonce_bytes.len() != 12 {
return Err(CryptoError::InvalidNonceSize(nonce_bytes.len()));
}
let data_key: [u8; 32] = if let Some(ck) = customer_key {
if ck.len() != 32 {
return Err(CryptoError::InvalidKeySize(ck.len()));
}
let mut k = [0u8; 32];
k.copy_from_slice(ck);
k
} else if enc_meta.algorithm == "aws:kms" {
let kms = self
.kms
.as_ref()
.ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?;
let kid = enc_meta
.kms_key_id
.as_ref()
.ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID in metadata".into()))?;
let encrypted_dk = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| {
CryptoError::EncryptionFailed("No encrypted data key in metadata".into())
})?;
let ct = B64.decode(encrypted_dk).map_err(|e| {
CryptoError::EncryptionFailed(format!("Bad data key encoding: {}", e))
})?;
let dk = kms.decrypt_data(kid, &ct).await?;
if dk.len() != 32 {
return Err(CryptoError::InvalidKeySize(dk.len()));
}
let mut k = [0u8; 32];
k.copy_from_slice(&dk);
k
} else {
let wrapped = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| {
CryptoError::EncryptionFailed("No encrypted data key in metadata".into())
})?;
self.unwrap_data_key(wrapped)?
};
let ip = input_path.to_owned();
let op = output_path.to_owned();
let nb: [u8; 12] = nonce_bytes.try_into().unwrap();
tokio::task::spawn_blocking(move || decrypt_stream_chunked(&ip, &op, &data_key, &nb))
.await
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn test_master_key() -> [u8; 32] {
[0x42u8; 32]
}
#[test]
fn test_wrap_unwrap_data_key() {
let svc = EncryptionService::new(test_master_key(), None);
let dk = [0xAAu8; 32];
let wrapped = svc.wrap_data_key(&dk).unwrap();
let unwrapped = svc.unwrap_data_key(&wrapped).unwrap();
assert_eq!(dk, unwrapped);
}
#[tokio::test]
async fn test_encrypt_decrypt_object_sse_s3() {
let dir = tempfile::tempdir().unwrap();
let input = dir.path().join("plain.bin");
let encrypted = dir.path().join("enc.bin");
let decrypted = dir.path().join("dec.bin");
let data = b"SSE-S3 encrypted content for testing!";
std::fs::File::create(&input)
.unwrap()
.write_all(data)
.unwrap();
let svc = EncryptionService::new(test_master_key(), None);
let ctx = EncryptionContext {
algorithm: SseAlgorithm::Aes256,
kms_key_id: None,
customer_key: None,
};
let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap();
assert_eq!(meta.algorithm, "AES256");
assert!(meta.encrypted_data_key.is_some());
svc.decrypt_object(&encrypted, &decrypted, &meta, None)
.await
.unwrap();
let result = std::fs::read(&decrypted).unwrap();
assert_eq!(result, data);
}
#[tokio::test]
async fn test_encrypt_decrypt_object_sse_c() {
let dir = tempfile::tempdir().unwrap();
let input = dir.path().join("plain.bin");
let encrypted = dir.path().join("enc.bin");
let decrypted = dir.path().join("dec.bin");
let data = b"SSE-C encrypted content!";
std::fs::File::create(&input)
.unwrap()
.write_all(data)
.unwrap();
let customer_key = [0xBBu8; 32];
let svc = EncryptionService::new(test_master_key(), None);
let ctx = EncryptionContext {
algorithm: SseAlgorithm::CustomerProvided,
kms_key_id: None,
customer_key: Some(customer_key.to_vec()),
};
let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap();
assert!(meta.encrypted_data_key.is_none());
svc.decrypt_object(&encrypted, &decrypted, &meta, Some(&customer_key))
.await
.unwrap();
let result = std::fs::read(&decrypted).unwrap();
assert_eq!(result, data);
}
#[test]
fn test_encryption_metadata_roundtrip() {
let meta = EncryptionMetadata {
algorithm: "AES256".to_string(),
nonce: "dGVzdG5vbmNlMTI=".to_string(),
encrypted_data_key: Some("c29tZWtleQ==".to_string()),
kms_key_id: None,
};
let map = meta.to_metadata_map();
let restored = EncryptionMetadata::from_metadata(&map).unwrap();
assert_eq!(restored.algorithm, "AES256");
assert_eq!(restored.nonce, meta.nonce);
assert_eq!(restored.encrypted_data_key, meta.encrypted_data_key);
}
#[test]
fn test_is_encrypted() {
let mut meta = HashMap::new();
assert!(!EncryptionMetadata::is_encrypted(&meta));
meta.insert(
"x-amz-server-side-encryption".to_string(),
"AES256".to_string(),
);
assert!(EncryptionMetadata::is_encrypted(&meta));
}
}

View File

@@ -1,138 +0,0 @@
use md5::{Digest, Md5};
use sha2::Sha256;
use std::io::Read;
use std::path::Path;
const CHUNK_SIZE: usize = 65536;
pub fn md5_file(path: &Path) -> std::io::Result<String> {
let mut file = std::fs::File::open(path)?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
}
pub fn md5_bytes(data: &[u8]) -> String {
let mut hasher = Md5::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
pub fn sha256_file(path: &Path) -> std::io::Result<String> {
let mut file = std::fs::File::open(path)?;
let mut hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
}
pub fn sha256_bytes(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
pub fn md5_sha256_file(path: &Path) -> std::io::Result<(String, String)> {
let mut file = std::fs::File::open(path)?;
let mut md5_hasher = Md5::new();
let mut sha_hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
md5_hasher.update(&buf[..n]);
sha_hasher.update(&buf[..n]);
}
Ok((
format!("{:x}", md5_hasher.finalize()),
format!("{:x}", sha_hasher.finalize()),
))
}
pub async fn md5_file_async(path: &Path) -> std::io::Result<String> {
let path = path.to_owned();
tokio::task::spawn_blocking(move || md5_file(&path))
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
}
pub async fn sha256_file_async(path: &Path) -> std::io::Result<String> {
let path = path.to_owned();
tokio::task::spawn_blocking(move || sha256_file(&path))
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
}
pub async fn md5_sha256_file_async(path: &Path) -> std::io::Result<(String, String)> {
let path = path.to_owned();
tokio::task::spawn_blocking(move || md5_sha256_file(&path))
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_md5_bytes() {
assert_eq!(md5_bytes(b""), "d41d8cd98f00b204e9800998ecf8427e");
assert_eq!(md5_bytes(b"hello"), "5d41402abc4b2a76b9719d911017c592");
}
#[test]
fn test_sha256_bytes() {
let hash = sha256_bytes(b"hello");
assert_eq!(
hash,
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
}
#[test]
fn test_md5_file() {
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"hello").unwrap();
tmp.flush().unwrap();
let hash = md5_file(tmp.path()).unwrap();
assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592");
}
#[test]
fn test_md5_sha256_file() {
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"hello").unwrap();
tmp.flush().unwrap();
let (md5, sha) = md5_sha256_file(tmp.path()).unwrap();
assert_eq!(md5, "5d41402abc4b2a76b9719d911017c592");
assert_eq!(
sha,
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
}
#[tokio::test]
async fn test_md5_file_async() {
let mut tmp = tempfile::NamedTempFile::new().unwrap();
tmp.write_all(b"hello").unwrap();
tmp.flush().unwrap();
let hash = md5_file_async(tmp.path()).await.unwrap();
assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592");
}
}

View File

@@ -1,451 +0,0 @@
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
use base64::engine::general_purpose::STANDARD as B64;
use base64::Engine;
use chrono::{DateTime, Utc};
use rand::RngCore;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::aes_gcm::CryptoError;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KmsKey {
#[serde(rename = "KeyId")]
pub key_id: String,
#[serde(rename = "Arn")]
pub arn: String,
#[serde(rename = "Description")]
pub description: String,
#[serde(rename = "CreationDate")]
pub creation_date: DateTime<Utc>,
#[serde(rename = "Enabled")]
pub enabled: bool,
#[serde(rename = "KeyState")]
pub key_state: String,
#[serde(rename = "KeyUsage")]
pub key_usage: String,
#[serde(rename = "KeySpec")]
pub key_spec: String,
#[serde(rename = "EncryptedKeyMaterial")]
pub encrypted_key_material: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct KmsStore {
keys: Vec<KmsKey>,
}
pub struct KmsService {
keys_path: PathBuf,
master_key: Arc<RwLock<[u8; 32]>>,
keys: Arc<RwLock<Vec<KmsKey>>>,
}
impl KmsService {
pub async fn new(keys_dir: &Path) -> Result<Self, CryptoError> {
std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?;
let keys_path = keys_dir.join("kms_keys.json");
let master_key = Self::load_or_create_master_key(&keys_dir.join("kms_master.key"))?;
let keys = if keys_path.exists() {
let data = std::fs::read_to_string(&keys_path).map_err(CryptoError::Io)?;
let store: KmsStore = serde_json::from_str(&data)
.map_err(|e| CryptoError::EncryptionFailed(format!("Bad KMS store: {}", e)))?;
store.keys
} else {
Vec::new()
};
Ok(Self {
keys_path,
master_key: Arc::new(RwLock::new(master_key)),
keys: Arc::new(RwLock::new(keys)),
})
}
fn load_or_create_master_key(path: &Path) -> Result<[u8; 32], CryptoError> {
if path.exists() {
let encoded = std::fs::read_to_string(path).map_err(CryptoError::Io)?;
let decoded = B64.decode(encoded.trim()).map_err(|e| {
CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e))
})?;
if decoded.len() != 32 {
return Err(CryptoError::InvalidKeySize(decoded.len()));
}
let mut key = [0u8; 32];
key.copy_from_slice(&decoded);
Ok(key)
} else {
let mut key = [0u8; 32];
rand::thread_rng().fill_bytes(&mut key);
let encoded = B64.encode(key);
std::fs::write(path, &encoded).map_err(CryptoError::Io)?;
Ok(key)
}
}
fn encrypt_key_material(
master_key: &[u8; 32],
plaintext_key: &[u8],
) -> Result<String, CryptoError> {
let cipher = Aes256Gcm::new(master_key.into());
let mut nonce_bytes = [0u8; 12];
rand::thread_rng().fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
let ciphertext = cipher
.encrypt(nonce, plaintext_key)
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
let mut combined = Vec::with_capacity(12 + ciphertext.len());
combined.extend_from_slice(&nonce_bytes);
combined.extend_from_slice(&ciphertext);
Ok(B64.encode(&combined))
}
fn decrypt_key_material(
master_key: &[u8; 32],
encrypted_b64: &str,
) -> Result<Vec<u8>, CryptoError> {
let combined = B64.decode(encrypted_b64).map_err(|e| {
CryptoError::EncryptionFailed(format!("Bad key material encoding: {}", e))
})?;
if combined.len() < 12 {
return Err(CryptoError::EncryptionFailed(
"Encrypted key material too short".to_string(),
));
}
let (nonce_bytes, ciphertext) = combined.split_at(12);
let cipher = Aes256Gcm::new(master_key.into());
let nonce = Nonce::from_slice(nonce_bytes);
cipher
.decrypt(nonce, ciphertext)
.map_err(|_| CryptoError::DecryptionFailed(0))
}
async fn save(&self) -> Result<(), CryptoError> {
let keys = self.keys.read().await;
let store = KmsStore { keys: keys.clone() };
let json = serde_json::to_string_pretty(&store)
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
std::fs::write(&self.keys_path, json).map_err(CryptoError::Io)?;
Ok(())
}
pub async fn create_key(&self, description: &str) -> Result<KmsKey, CryptoError> {
let key_id = uuid::Uuid::new_v4().to_string();
let arn = format!("arn:aws:kms:local:000000000000:key/{}", key_id);
let mut plaintext_key = [0u8; 32];
rand::thread_rng().fill_bytes(&mut plaintext_key);
let master = self.master_key.read().await;
let encrypted = Self::encrypt_key_material(&master, &plaintext_key)?;
let kms_key = KmsKey {
key_id: key_id.clone(),
arn,
description: description.to_string(),
creation_date: Utc::now(),
enabled: true,
key_state: "Enabled".to_string(),
key_usage: "ENCRYPT_DECRYPT".to_string(),
key_spec: "SYMMETRIC_DEFAULT".to_string(),
encrypted_key_material: encrypted,
};
self.keys.write().await.push(kms_key.clone());
self.save().await?;
Ok(kms_key)
}
pub async fn list_keys(&self) -> Vec<KmsKey> {
self.keys.read().await.clone()
}
pub async fn get_key(&self, key_id: &str) -> Option<KmsKey> {
let keys = self.keys.read().await;
keys.iter()
.find(|k| k.key_id == key_id || k.arn == key_id)
.cloned()
}
pub async fn delete_key(&self, key_id: &str) -> Result<bool, CryptoError> {
let mut keys = self.keys.write().await;
let len_before = keys.len();
keys.retain(|k| k.key_id != key_id && k.arn != key_id);
let removed = keys.len() < len_before;
drop(keys);
if removed {
self.save().await?;
}
Ok(removed)
}
pub async fn enable_key(&self, key_id: &str) -> Result<bool, CryptoError> {
let mut keys = self.keys.write().await;
if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) {
key.enabled = true;
key.key_state = "Enabled".to_string();
drop(keys);
self.save().await?;
Ok(true)
} else {
Ok(false)
}
}
pub async fn disable_key(&self, key_id: &str) -> Result<bool, CryptoError> {
let mut keys = self.keys.write().await;
if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) {
key.enabled = false;
key.key_state = "Disabled".to_string();
drop(keys);
self.save().await?;
Ok(true)
} else {
Ok(false)
}
}
pub async fn decrypt_data_key(&self, key_id: &str) -> Result<Vec<u8>, CryptoError> {
let keys = self.keys.read().await;
let key = keys
.iter()
.find(|k| k.key_id == key_id || k.arn == key_id)
.ok_or_else(|| CryptoError::EncryptionFailed("KMS key not found".to_string()))?;
if !key.enabled {
return Err(CryptoError::EncryptionFailed(
"KMS key is disabled".to_string(),
));
}
let master = self.master_key.read().await;
Self::decrypt_key_material(&master, &key.encrypted_key_material)
}
pub async fn encrypt_data(
&self,
key_id: &str,
plaintext: &[u8],
) -> Result<Vec<u8>, CryptoError> {
let data_key = self.decrypt_data_key(key_id).await?;
if data_key.len() != 32 {
return Err(CryptoError::InvalidKeySize(data_key.len()));
}
let key_arr: [u8; 32] = data_key.try_into().unwrap();
let cipher = Aes256Gcm::new(&key_arr.into());
let mut nonce_bytes = [0u8; 12];
rand::thread_rng().fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
let ciphertext = cipher
.encrypt(nonce, plaintext)
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
let mut result = Vec::with_capacity(12 + ciphertext.len());
result.extend_from_slice(&nonce_bytes);
result.extend_from_slice(&ciphertext);
Ok(result)
}
pub async fn decrypt_data(
&self,
key_id: &str,
ciphertext: &[u8],
) -> Result<Vec<u8>, CryptoError> {
if ciphertext.len() < 12 {
return Err(CryptoError::EncryptionFailed(
"Ciphertext too short".to_string(),
));
}
let data_key = self.decrypt_data_key(key_id).await?;
if data_key.len() != 32 {
return Err(CryptoError::InvalidKeySize(data_key.len()));
}
let key_arr: [u8; 32] = data_key.try_into().unwrap();
let (nonce_bytes, ct) = ciphertext.split_at(12);
let cipher = Aes256Gcm::new(&key_arr.into());
let nonce = Nonce::from_slice(nonce_bytes);
cipher
.decrypt(nonce, ct)
.map_err(|_| CryptoError::DecryptionFailed(0))
}
pub async fn generate_data_key(
&self,
key_id: &str,
num_bytes: usize,
) -> Result<(Vec<u8>, Vec<u8>), CryptoError> {
let kms_key = self.decrypt_data_key(key_id).await?;
if kms_key.len() != 32 {
return Err(CryptoError::InvalidKeySize(kms_key.len()));
}
let mut plaintext_key = vec![0u8; num_bytes];
rand::thread_rng().fill_bytes(&mut plaintext_key);
let key_arr: [u8; 32] = kms_key.try_into().unwrap();
let cipher = Aes256Gcm::new(&key_arr.into());
let mut nonce_bytes = [0u8; 12];
rand::thread_rng().fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
let encrypted = cipher
.encrypt(nonce, plaintext_key.as_slice())
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
let mut wrapped = Vec::with_capacity(12 + encrypted.len());
wrapped.extend_from_slice(&nonce_bytes);
wrapped.extend_from_slice(&encrypted);
Ok((plaintext_key, wrapped))
}
}
pub async fn load_or_create_master_key(keys_dir: &Path) -> Result<[u8; 32], CryptoError> {
std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?;
let path = keys_dir.join("master.key");
if path.exists() {
let encoded = std::fs::read_to_string(&path).map_err(CryptoError::Io)?;
let decoded = B64.decode(encoded.trim()).map_err(|e| {
CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e))
})?;
if decoded.len() != 32 {
return Err(CryptoError::InvalidKeySize(decoded.len()));
}
let mut key = [0u8; 32];
key.copy_from_slice(&decoded);
Ok(key)
} else {
let mut key = [0u8; 32];
rand::thread_rng().fill_bytes(&mut key);
let encoded = B64.encode(key);
std::fs::write(&path, &encoded).map_err(CryptoError::Io)?;
Ok(key)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_create_and_list_keys() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("test key").await.unwrap();
assert!(key.enabled);
assert_eq!(key.description, "test key");
assert!(key.key_id.len() > 0);
let keys = kms.list_keys().await;
assert_eq!(keys.len(), 1);
assert_eq!(keys[0].key_id, key.key_id);
}
#[tokio::test]
async fn test_enable_disable_key() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("toggle").await.unwrap();
assert!(key.enabled);
kms.disable_key(&key.key_id).await.unwrap();
let k = kms.get_key(&key.key_id).await.unwrap();
assert!(!k.enabled);
kms.enable_key(&key.key_id).await.unwrap();
let k = kms.get_key(&key.key_id).await.unwrap();
assert!(k.enabled);
}
#[tokio::test]
async fn test_delete_key() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("doomed").await.unwrap();
assert!(kms.delete_key(&key.key_id).await.unwrap());
assert!(kms.get_key(&key.key_id).await.is_none());
assert_eq!(kms.list_keys().await.len(), 0);
}
#[tokio::test]
async fn test_encrypt_decrypt_data() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("enc-key").await.unwrap();
let plaintext = b"Hello, KMS!";
let ciphertext = kms.encrypt_data(&key.key_id, plaintext).await.unwrap();
assert_ne!(&ciphertext, plaintext);
let decrypted = kms.decrypt_data(&key.key_id, &ciphertext).await.unwrap();
assert_eq!(decrypted, plaintext);
}
#[tokio::test]
async fn test_generate_data_key() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("data-key-gen").await.unwrap();
let (plaintext, wrapped) = kms.generate_data_key(&key.key_id, 32).await.unwrap();
assert_eq!(plaintext.len(), 32);
assert!(wrapped.len() > 32);
}
#[tokio::test]
async fn test_disabled_key_cannot_encrypt() {
let dir = tempfile::tempdir().unwrap();
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("disabled").await.unwrap();
kms.disable_key(&key.key_id).await.unwrap();
let result = kms.encrypt_data(&key.key_id, b"test").await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_persistence_across_reload() {
let dir = tempfile::tempdir().unwrap();
let key_id = {
let kms = KmsService::new(dir.path()).await.unwrap();
let key = kms.create_key("persistent").await.unwrap();
key.key_id
};
let kms2 = KmsService::new(dir.path()).await.unwrap();
let key = kms2.get_key(&key_id).await;
assert!(key.is_some());
assert_eq!(key.unwrap().description, "persistent");
}
#[tokio::test]
async fn test_master_key_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let key1 = load_or_create_master_key(dir.path()).await.unwrap();
let key2 = load_or_create_master_key(dir.path()).await.unwrap();
assert_eq!(key1, key2);
}
}

View File

@@ -1,4 +0,0 @@
pub mod aes_gcm;
pub mod encryption;
pub mod hashing;
pub mod kms;

View File

@@ -1,54 +0,0 @@
[package]
name = "myfsio-server"
version.workspace = true
edition.workspace = true
[dependencies]
myfsio-common = { path = "../myfsio-common" }
myfsio-auth = { path = "../myfsio-auth" }
myfsio-crypto = { path = "../myfsio-crypto" }
myfsio-storage = { path = "../myfsio-storage" }
myfsio-xml = { path = "../myfsio-xml" }
base64 = { workspace = true }
axum = { workspace = true }
tokio = { workspace = true }
tower = { workspace = true }
tower-http = { workspace = true }
hyper = { workspace = true }
bytes = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
tokio-util = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }
futures = { workspace = true }
http-body-util = "0.1"
percent-encoding = { workspace = true }
quick-xml = { workspace = true }
mime_guess = "2"
crc32fast = { workspace = true }
duckdb = { workspace = true }
roxmltree = "0.20"
parking_lot = { workspace = true }
regex = "1"
multer = "3"
reqwest = { workspace = true }
aws-sdk-s3 = { workspace = true }
aws-config = { workspace = true }
aws-credential-types = { workspace = true }
aws-smithy-types = { workspace = true }
async-trait = { workspace = true }
rand = "0.8"
tera = { workspace = true }
cookie = { workspace = true }
subtle = { workspace = true }
clap = { workspace = true }
dotenvy = { workspace = true }
sysinfo = "0.32"
aes-gcm = { workspace = true }
[dev-dependencies]
tempfile = "3"
tower = { workspace = true, features = ["util"] }

View File

@@ -1,245 +0,0 @@
use std::net::SocketAddr;
use std::path::PathBuf;
#[derive(Debug, Clone)]
pub struct ServerConfig {
pub bind_addr: SocketAddr,
pub ui_bind_addr: SocketAddr,
pub storage_root: PathBuf,
pub region: String,
pub iam_config_path: PathBuf,
pub sigv4_timestamp_tolerance_secs: u64,
pub presigned_url_min_expiry: u64,
pub presigned_url_max_expiry: u64,
pub secret_key: Option<String>,
pub encryption_enabled: bool,
pub kms_enabled: bool,
pub gc_enabled: bool,
pub integrity_enabled: bool,
pub metrics_enabled: bool,
pub metrics_history_enabled: bool,
pub metrics_interval_minutes: u64,
pub metrics_retention_hours: u64,
pub metrics_history_interval_minutes: u64,
pub metrics_history_retention_hours: u64,
pub lifecycle_enabled: bool,
pub website_hosting_enabled: bool,
pub replication_connect_timeout_secs: u64,
pub replication_read_timeout_secs: u64,
pub replication_max_retries: u32,
pub replication_streaming_threshold_bytes: u64,
pub replication_max_failures_per_bucket: usize,
pub site_sync_enabled: bool,
pub site_sync_interval_secs: u64,
pub site_sync_batch_size: usize,
pub site_sync_connect_timeout_secs: u64,
pub site_sync_read_timeout_secs: u64,
pub site_sync_max_retries: u32,
pub site_sync_clock_skew_tolerance: f64,
pub ui_enabled: bool,
pub templates_dir: PathBuf,
pub static_dir: PathBuf,
}
impl ServerConfig {
pub fn from_env() -> Self {
let host = std::env::var("HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
let port: u16 = std::env::var("PORT")
.unwrap_or_else(|_| "5000".to_string())
.parse()
.unwrap_or(5000);
let ui_port: u16 = std::env::var("UI_PORT")
.unwrap_or_else(|_| "5100".to_string())
.parse()
.unwrap_or(5100);
let storage_root = std::env::var("STORAGE_ROOT").unwrap_or_else(|_| "./data".to_string());
let region = std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string());
let storage_path = PathBuf::from(&storage_root);
let iam_config_path = std::env::var("IAM_CONFIG")
.map(PathBuf::from)
.unwrap_or_else(|_| {
storage_path
.join(".myfsio.sys")
.join("config")
.join("iam.json")
});
let sigv4_timestamp_tolerance_secs: u64 =
std::env::var("SIGV4_TIMESTAMP_TOLERANCE_SECONDS")
.unwrap_or_else(|_| "900".to_string())
.parse()
.unwrap_or(900);
let presigned_url_min_expiry: u64 = std::env::var("PRESIGNED_URL_MIN_EXPIRY_SECONDS")
.unwrap_or_else(|_| "1".to_string())
.parse()
.unwrap_or(1);
let presigned_url_max_expiry: u64 = std::env::var("PRESIGNED_URL_MAX_EXPIRY_SECONDS")
.unwrap_or_else(|_| "604800".to_string())
.parse()
.unwrap_or(604800);
let secret_key = {
let env_key = std::env::var("SECRET_KEY").ok();
match env_key {
Some(k) if !k.is_empty() && k != "dev-secret-key" => Some(k),
_ => {
let secret_file = storage_path
.join(".myfsio.sys")
.join("config")
.join(".secret");
std::fs::read_to_string(&secret_file)
.ok()
.map(|s| s.trim().to_string())
}
}
};
let encryption_enabled = std::env::var("ENCRYPTION_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let kms_enabled = std::env::var("KMS_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let gc_enabled = std::env::var("GC_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let integrity_enabled = std::env::var("INTEGRITY_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let metrics_enabled = std::env::var("OPERATION_METRICS_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let metrics_history_enabled = std::env::var("METRICS_HISTORY_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let metrics_interval_minutes = parse_u64_env("OPERATION_METRICS_INTERVAL_MINUTES", 5);
let metrics_retention_hours = parse_u64_env("OPERATION_METRICS_RETENTION_HOURS", 24);
let metrics_history_interval_minutes = parse_u64_env("METRICS_HISTORY_INTERVAL_MINUTES", 5);
let metrics_history_retention_hours = parse_u64_env("METRICS_HISTORY_RETENTION_HOURS", 24);
let lifecycle_enabled = std::env::var("LIFECYCLE_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let website_hosting_enabled = std::env::var("WEBSITE_HOSTING_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let replication_connect_timeout_secs =
parse_u64_env("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5);
let replication_read_timeout_secs = parse_u64_env("REPLICATION_READ_TIMEOUT_SECONDS", 30);
let replication_max_retries = parse_u64_env("REPLICATION_MAX_RETRIES", 2) as u32;
let replication_streaming_threshold_bytes =
parse_u64_env("REPLICATION_STREAMING_THRESHOLD_BYTES", 10_485_760);
let replication_max_failures_per_bucket =
parse_u64_env("REPLICATION_MAX_FAILURES_PER_BUCKET", 50) as usize;
let site_sync_enabled = std::env::var("SITE_SYNC_ENABLED")
.unwrap_or_else(|_| "false".to_string())
.to_lowercase()
== "true";
let site_sync_interval_secs = parse_u64_env("SITE_SYNC_INTERVAL_SECONDS", 60);
let site_sync_batch_size = parse_u64_env("SITE_SYNC_BATCH_SIZE", 100) as usize;
let site_sync_connect_timeout_secs = parse_u64_env("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10);
let site_sync_read_timeout_secs = parse_u64_env("SITE_SYNC_READ_TIMEOUT_SECONDS", 120);
let site_sync_max_retries = parse_u64_env("SITE_SYNC_MAX_RETRIES", 2) as u32;
let site_sync_clock_skew_tolerance: f64 =
std::env::var("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(1.0);
let ui_enabled = std::env::var("UI_ENABLED")
.unwrap_or_else(|_| "true".to_string())
.to_lowercase()
== "true";
let templates_dir = std::env::var("TEMPLATES_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| default_templates_dir());
let static_dir = std::env::var("STATIC_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| default_static_dir());
let host_ip: std::net::IpAddr = host.parse().unwrap();
Self {
bind_addr: SocketAddr::new(host_ip, port),
ui_bind_addr: SocketAddr::new(host_ip, ui_port),
storage_root: storage_path,
region,
iam_config_path,
sigv4_timestamp_tolerance_secs,
presigned_url_min_expiry,
presigned_url_max_expiry,
secret_key,
encryption_enabled,
kms_enabled,
gc_enabled,
integrity_enabled,
metrics_enabled,
metrics_history_enabled,
metrics_interval_minutes,
metrics_retention_hours,
metrics_history_interval_minutes,
metrics_history_retention_hours,
lifecycle_enabled,
website_hosting_enabled,
replication_connect_timeout_secs,
replication_read_timeout_secs,
replication_max_retries,
replication_streaming_threshold_bytes,
replication_max_failures_per_bucket,
site_sync_enabled,
site_sync_interval_secs,
site_sync_batch_size,
site_sync_connect_timeout_secs,
site_sync_read_timeout_secs,
site_sync_max_retries,
site_sync_clock_skew_tolerance,
ui_enabled,
templates_dir,
static_dir,
}
}
}
fn default_templates_dir() -> PathBuf {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
manifest_dir.join("templates")
}
fn default_static_dir() -> PathBuf {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
for candidate in [
manifest_dir.join("static"),
manifest_dir.join("..").join("..").join("..").join("static"),
] {
if candidate.exists() {
return candidate;
}
}
manifest_dir.join("static")
}
fn parse_u64_env(key: &str, default: u64) -> u64 {
std::env::var(key)
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(default)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,184 +0,0 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use bytes::{Buf, BytesMut};
use tokio::io::{AsyncRead, ReadBuf};
enum State {
ReadSize,
ReadData(u64),
ReadTrailer,
Finished,
}
pub struct AwsChunkedStream<S> {
inner: S,
buffer: BytesMut,
state: State,
pending: BytesMut,
eof: bool,
}
impl<S> AwsChunkedStream<S> {
pub fn new(inner: S) -> Self {
Self {
inner,
buffer: BytesMut::with_capacity(8192),
state: State::ReadSize,
pending: BytesMut::new(),
eof: false,
}
}
fn find_crlf(&self) -> Option<usize> {
for i in 0..self.buffer.len().saturating_sub(1) {
if self.buffer[i] == b'\r' && self.buffer[i + 1] == b'\n' {
return Some(i);
}
}
None
}
fn parse_chunk_size(line: &[u8]) -> std::io::Result<u64> {
let text = std::str::from_utf8(line).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
"invalid chunk size encoding",
)
})?;
let head = text.split(';').next().unwrap_or("").trim();
u64::from_str_radix(head, 16).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("invalid chunk size: {}", head),
)
})
}
fn try_advance(&mut self, out: &mut ReadBuf<'_>) -> std::io::Result<bool> {
loop {
if out.remaining() == 0 {
return Ok(true);
}
if !self.pending.is_empty() {
let take = std::cmp::min(self.pending.len(), out.remaining());
out.put_slice(&self.pending[..take]);
self.pending.advance(take);
continue;
}
match self.state {
State::Finished => return Ok(true),
State::ReadSize => {
let idx = match self.find_crlf() {
Some(i) => i,
None => return Ok(false),
};
let line = self.buffer.split_to(idx);
self.buffer.advance(2);
let size = Self::parse_chunk_size(&line)?;
if size == 0 {
self.state = State::ReadTrailer;
} else {
self.state = State::ReadData(size);
}
}
State::ReadData(remaining) => {
if self.buffer.is_empty() {
return Ok(false);
}
let avail = std::cmp::min(self.buffer.len() as u64, remaining) as usize;
let take = std::cmp::min(avail, out.remaining());
out.put_slice(&self.buffer[..take]);
self.buffer.advance(take);
let new_remaining = remaining - take as u64;
if new_remaining == 0 {
if self.buffer.len() < 2 {
self.state = State::ReadData(0);
return Ok(false);
}
if &self.buffer[..2] != b"\r\n" {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"malformed chunk terminator",
));
}
self.buffer.advance(2);
self.state = State::ReadSize;
} else {
self.state = State::ReadData(new_remaining);
}
}
State::ReadTrailer => {
let idx = match self.find_crlf() {
Some(i) => i,
None => return Ok(false),
};
if idx == 0 {
self.buffer.advance(2);
self.state = State::Finished;
} else {
self.buffer.advance(idx + 2);
}
}
}
}
}
}
impl<S> AsyncRead for AwsChunkedStream<S>
where
S: AsyncRead + Unpin,
{
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<std::io::Result<()>> {
loop {
let before = buf.filled().len();
let done = match self.try_advance(buf) {
Ok(v) => v,
Err(e) => return Poll::Ready(Err(e)),
};
if buf.filled().len() > before {
return Poll::Ready(Ok(()));
}
if done {
return Poll::Ready(Ok(()));
}
if self.eof {
return Poll::Ready(Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"unexpected EOF in aws-chunked stream",
)));
}
let mut tmp = [0u8; 8192];
let mut rb = ReadBuf::new(&mut tmp);
match Pin::new(&mut self.inner).poll_read(cx, &mut rb) {
Poll::Ready(Ok(())) => {
let n = rb.filled().len();
if n == 0 {
self.eof = true;
continue;
}
self.buffer.extend_from_slice(rb.filled());
}
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
Poll::Pending => return Poll::Pending,
}
}
}
}
pub fn decode_body(body: axum::body::Body) -> impl AsyncRead + Send + Unpin {
use futures::TryStreamExt;
let stream = tokio_util::io::StreamReader::new(
http_body_util::BodyStream::new(body)
.map_ok(|frame| frame.into_data().unwrap_or_default())
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)),
);
AwsChunkedStream::new(stream)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,541 +0,0 @@
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
use axum::body::Body;
use axum::extract::State;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Response};
use base64::engine::general_purpose::STANDARD as B64;
use base64::Engine;
use rand::RngCore;
use serde_json::{json, Value};
use crate::state::AppState;
fn json_ok(value: Value) -> Response {
(
StatusCode::OK,
[("content-type", "application/json")],
value.to_string(),
)
.into_response()
}
fn json_err(status: StatusCode, msg: &str) -> Response {
(
status,
[("content-type", "application/json")],
json!({"error": msg}).to_string(),
)
.into_response()
}
async fn read_json(body: Body) -> Result<Value, Response> {
let body_bytes = http_body_util::BodyExt::collect(body)
.await
.map_err(|_| json_err(StatusCode::BAD_REQUEST, "Invalid request body"))?
.to_bytes();
if body_bytes.is_empty() {
Ok(json!({}))
} else {
serde_json::from_slice(&body_bytes)
.map_err(|_| json_err(StatusCode::BAD_REQUEST, "Invalid JSON"))
}
}
fn require_kms(
state: &AppState,
) -> Result<&std::sync::Arc<myfsio_crypto::kms::KmsService>, Response> {
state
.kms
.as_ref()
.ok_or_else(|| json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"))
}
fn decode_b64(value: &str, field: &str) -> Result<Vec<u8>, Response> {
B64.decode(value).map_err(|_| {
json_err(
StatusCode::BAD_REQUEST,
&format!("Invalid base64 {}", field),
)
})
}
fn require_str<'a>(value: &'a Value, names: &[&str], message: &str) -> Result<&'a str, Response> {
for name in names {
if let Some(found) = value.get(*name).and_then(|v| v.as_str()) {
return Ok(found);
}
}
Err(json_err(StatusCode::BAD_REQUEST, message))
}
pub async fn list_keys(State(state): State<AppState>) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let keys = kms.list_keys().await;
let keys_json: Vec<Value> = keys
.iter()
.map(|k| {
json!({
"KeyId": k.key_id,
"Arn": k.arn,
"Description": k.description,
"CreationDate": k.creation_date.to_rfc3339(),
"Enabled": k.enabled,
"KeyState": k.key_state,
"KeyUsage": k.key_usage,
"KeySpec": k.key_spec,
})
})
.collect();
json_ok(json!({"keys": keys_json}))
}
pub async fn create_key(State(state): State<AppState>, body: Body) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let description = req
.get("Description")
.or_else(|| req.get("description"))
.and_then(|d| d.as_str())
.unwrap_or("");
match kms.create_key(description).await {
Ok(key) => json_ok(json!({
"KeyId": key.key_id,
"Arn": key.arn,
"Description": key.description,
"CreationDate": key.creation_date.to_rfc3339(),
"Enabled": key.enabled,
"KeyState": key.key_state,
})),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn get_key(
State(state): State<AppState>,
axum::extract::Path(key_id): axum::extract::Path<String>,
) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
match kms.get_key(&key_id).await {
Some(key) => json_ok(json!({
"KeyId": key.key_id,
"Arn": key.arn,
"Description": key.description,
"CreationDate": key.creation_date.to_rfc3339(),
"Enabled": key.enabled,
"KeyState": key.key_state,
"KeyUsage": key.key_usage,
"KeySpec": key.key_spec,
})),
None => json_err(StatusCode::NOT_FOUND, "Key not found"),
}
}
pub async fn delete_key(
State(state): State<AppState>,
axum::extract::Path(key_id): axum::extract::Path<String>,
) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
match kms.delete_key(&key_id).await {
Ok(true) => StatusCode::NO_CONTENT.into_response(),
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn enable_key(
State(state): State<AppState>,
axum::extract::Path(key_id): axum::extract::Path<String>,
) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
match kms.enable_key(&key_id).await {
Ok(true) => json_ok(json!({"status": "enabled"})),
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn disable_key(
State(state): State<AppState>,
axum::extract::Path(key_id): axum::extract::Path<String>,
) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
match kms.disable_key(&key_id).await {
Ok(true) => json_ok(json!({"status": "disabled"})),
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn encrypt(State(state): State<AppState>, body: Body) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") {
Ok(value) => value,
Err(response) => return response,
};
let plaintext_b64 = match require_str(&req, &["Plaintext", "plaintext"], "Missing Plaintext") {
Ok(value) => value,
Err(response) => return response,
};
let plaintext = match decode_b64(plaintext_b64, "Plaintext") {
Ok(value) => value,
Err(response) => return response,
};
match kms.encrypt_data(key_id, &plaintext).await {
Ok(ct) => json_ok(json!({
"KeyId": key_id,
"CiphertextBlob": B64.encode(&ct),
})),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn decrypt(State(state): State<AppState>, body: Body) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") {
Ok(value) => value,
Err(response) => return response,
};
let ciphertext_b64 = match require_str(
&req,
&["CiphertextBlob", "ciphertext_blob"],
"Missing CiphertextBlob",
) {
Ok(value) => value,
Err(response) => return response,
};
let ciphertext = match decode_b64(ciphertext_b64, "CiphertextBlob") {
Ok(value) => value,
Err(response) => return response,
};
match kms.decrypt_data(key_id, &ciphertext).await {
Ok(pt) => json_ok(json!({
"KeyId": key_id,
"Plaintext": B64.encode(&pt),
})),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn generate_data_key(State(state): State<AppState>, body: Body) -> Response {
generate_data_key_inner(state, body, true).await
}
pub async fn generate_data_key_without_plaintext(
State(state): State<AppState>,
body: Body,
) -> Response {
generate_data_key_inner(state, body, false).await
}
async fn generate_data_key_inner(state: AppState, body: Body, include_plaintext: bool) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let key_id = match require_str(&req, &["KeyId", "key_id"], "Missing KeyId") {
Ok(value) => value,
Err(response) => return response,
};
let num_bytes = req
.get("NumberOfBytes")
.and_then(|v| v.as_u64())
.unwrap_or(32) as usize;
if !(1..=1024).contains(&num_bytes) {
return json_err(StatusCode::BAD_REQUEST, "NumberOfBytes must be 1-1024");
}
match kms.generate_data_key(key_id, num_bytes).await {
Ok((plaintext, wrapped)) => {
let mut value = json!({
"KeyId": key_id,
"CiphertextBlob": B64.encode(&wrapped),
});
if include_plaintext {
value["Plaintext"] = json!(B64.encode(&plaintext));
}
json_ok(value)
}
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn re_encrypt(State(state): State<AppState>, body: Body) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let ciphertext_b64 = match require_str(
&req,
&["CiphertextBlob", "ciphertext_blob"],
"CiphertextBlob is required",
) {
Ok(value) => value,
Err(response) => return response,
};
let destination_key_id = match require_str(
&req,
&["DestinationKeyId", "destination_key_id"],
"DestinationKeyId is required",
) {
Ok(value) => value,
Err(response) => return response,
};
let ciphertext = match decode_b64(ciphertext_b64, "CiphertextBlob") {
Ok(value) => value,
Err(response) => return response,
};
let keys = kms.list_keys().await;
let mut source_key_id: Option<String> = None;
let mut plaintext: Option<Vec<u8>> = None;
for key in keys {
if !key.enabled {
continue;
}
if let Ok(value) = kms.decrypt_data(&key.key_id, &ciphertext).await {
source_key_id = Some(key.key_id);
plaintext = Some(value);
break;
}
}
let Some(source_key_id) = source_key_id else {
return json_err(
StatusCode::BAD_REQUEST,
"Could not determine source key for CiphertextBlob",
);
};
let plaintext = plaintext.unwrap_or_default();
match kms.encrypt_data(destination_key_id, &plaintext).await {
Ok(new_ciphertext) => json_ok(json!({
"CiphertextBlob": B64.encode(&new_ciphertext),
"SourceKeyId": source_key_id,
"KeyId": destination_key_id,
})),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn generate_random(State(state): State<AppState>, body: Body) -> Response {
if let Err(response) = require_kms(&state) {
return response;
}
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let num_bytes = req
.get("NumberOfBytes")
.and_then(|v| v.as_u64())
.unwrap_or(32) as usize;
if !(1..=1024).contains(&num_bytes) {
return json_err(StatusCode::BAD_REQUEST, "NumberOfBytes must be 1-1024");
}
let mut bytes = vec![0u8; num_bytes];
rand::thread_rng().fill_bytes(&mut bytes);
json_ok(json!({
"Plaintext": B64.encode(bytes),
}))
}
pub async fn client_generate_key(State(state): State<AppState>) -> Response {
let _ = state;
let mut key = [0u8; 32];
rand::thread_rng().fill_bytes(&mut key);
json_ok(json!({
"Key": B64.encode(key),
"Algorithm": "AES-256-GCM",
"KeySize": 32,
}))
}
pub async fn client_encrypt(State(state): State<AppState>, body: Body) -> Response {
let _ = state;
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let plaintext_b64 =
match require_str(&req, &["Plaintext", "plaintext"], "Plaintext is required") {
Ok(value) => value,
Err(response) => return response,
};
let key_b64 = match require_str(&req, &["Key", "key"], "Key is required") {
Ok(value) => value,
Err(response) => return response,
};
let plaintext = match decode_b64(plaintext_b64, "Plaintext") {
Ok(value) => value,
Err(response) => return response,
};
let key_bytes = match decode_b64(key_b64, "Key") {
Ok(value) => value,
Err(response) => return response,
};
if key_bytes.len() != 32 {
return json_err(StatusCode::BAD_REQUEST, "Key must decode to 32 bytes");
}
let cipher = match Aes256Gcm::new_from_slice(&key_bytes) {
Ok(cipher) => cipher,
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid encryption key"),
};
let mut nonce_bytes = [0u8; 12];
rand::thread_rng().fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
match cipher.encrypt(nonce, plaintext.as_ref()) {
Ok(ciphertext) => json_ok(json!({
"Ciphertext": B64.encode(ciphertext),
"Nonce": B64.encode(nonce_bytes),
"Algorithm": "AES-256-GCM",
})),
Err(e) => json_err(StatusCode::BAD_REQUEST, &e.to_string()),
}
}
pub async fn client_decrypt(State(state): State<AppState>, body: Body) -> Response {
let _ = state;
let req = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
let ciphertext_b64 = match require_str(
&req,
&["Ciphertext", "ciphertext"],
"Ciphertext is required",
) {
Ok(value) => value,
Err(response) => return response,
};
let nonce_b64 = match require_str(&req, &["Nonce", "nonce"], "Nonce is required") {
Ok(value) => value,
Err(response) => return response,
};
let key_b64 = match require_str(&req, &["Key", "key"], "Key is required") {
Ok(value) => value,
Err(response) => return response,
};
let ciphertext = match decode_b64(ciphertext_b64, "Ciphertext") {
Ok(value) => value,
Err(response) => return response,
};
let nonce_bytes = match decode_b64(nonce_b64, "Nonce") {
Ok(value) => value,
Err(response) => return response,
};
let key_bytes = match decode_b64(key_b64, "Key") {
Ok(value) => value,
Err(response) => return response,
};
if key_bytes.len() != 32 {
return json_err(StatusCode::BAD_REQUEST, "Key must decode to 32 bytes");
}
if nonce_bytes.len() != 12 {
return json_err(StatusCode::BAD_REQUEST, "Nonce must decode to 12 bytes");
}
let cipher = match Aes256Gcm::new_from_slice(&key_bytes) {
Ok(cipher) => cipher,
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid encryption key"),
};
let nonce = Nonce::from_slice(&nonce_bytes);
match cipher.decrypt(nonce, ciphertext.as_ref()) {
Ok(plaintext) => json_ok(json!({
"Plaintext": B64.encode(plaintext),
})),
Err(e) => json_err(StatusCode::BAD_REQUEST, &e.to_string()),
}
}
pub async fn materials(
State(state): State<AppState>,
axum::extract::Path(key_id): axum::extract::Path<String>,
body: Body,
) -> Response {
let kms = match require_kms(&state) {
Ok(kms) => kms,
Err(response) => return response,
};
let _ = match read_json(body).await {
Ok(req) => req,
Err(response) => return response,
};
match kms.generate_data_key(&key_id, 32).await {
Ok((plaintext, wrapped)) => json_ok(json!({
"PlaintextKey": B64.encode(plaintext),
"EncryptedKey": B64.encode(wrapped),
"KeyId": key_id,
"Algorithm": "AES-256-GCM",
"KeyWrapAlgorithm": "kms",
})),
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,578 +0,0 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use axum::body::Body;
use axum::http::{HeaderMap, HeaderName, StatusCode};
use axum::response::{IntoResponse, Response};
use base64::Engine;
use bytes::Bytes;
use crc32fast::Hasher;
use duckdb::types::ValueRef;
use duckdb::Connection;
use futures::stream;
use http_body_util::BodyExt;
use myfsio_common::error::{S3Error, S3ErrorCode};
use myfsio_storage::traits::StorageEngine;
use crate::state::AppState;
#[cfg(target_os = "windows")]
#[link(name = "Rstrtmgr")]
extern "system" {}
const CHUNK_SIZE: usize = 65_536;
pub async fn post_select_object_content(
state: &AppState,
bucket: &str,
key: &str,
headers: &HeaderMap,
body: Body,
) -> Response {
if let Some(resp) = require_xml_content_type(headers) {
return resp;
}
let body_bytes = match body.collect().await {
Ok(collected) => collected.to_bytes(),
Err(_) => {
return s3_error_response(S3Error::new(
S3ErrorCode::MalformedXML,
"Unable to parse XML document",
));
}
};
let request = match parse_select_request(&body_bytes) {
Ok(r) => r,
Err(err) => return s3_error_response(err),
};
let object_path = match state.storage.get_object_path(bucket, key).await {
Ok(path) => path,
Err(_) => {
return s3_error_response(S3Error::new(S3ErrorCode::NoSuchKey, "Object not found"));
}
};
let join_res =
tokio::task::spawn_blocking(move || execute_select_query(object_path, request)).await;
let chunks = match join_res {
Ok(Ok(chunks)) => chunks,
Ok(Err(message)) => {
return s3_error_response(S3Error::new(S3ErrorCode::InvalidRequest, message));
}
Err(_) => {
return s3_error_response(S3Error::new(
S3ErrorCode::InternalError,
"SelectObjectContent execution failed",
));
}
};
let bytes_returned: usize = chunks.iter().map(|c| c.len()).sum();
let mut events: Vec<Bytes> = Vec::with_capacity(chunks.len() + 2);
for chunk in chunks {
events.push(Bytes::from(encode_select_event("Records", &chunk)));
}
let stats_payload = build_stats_xml(0, bytes_returned);
events.push(Bytes::from(encode_select_event(
"Stats",
stats_payload.as_bytes(),
)));
events.push(Bytes::from(encode_select_event("End", b"")));
let stream = stream::iter(events.into_iter().map(Ok::<Bytes, std::io::Error>));
let body = Body::from_stream(stream);
let mut response = (StatusCode::OK, body).into_response();
response.headers_mut().insert(
HeaderName::from_static("content-type"),
"application/octet-stream".parse().unwrap(),
);
response.headers_mut().insert(
HeaderName::from_static("x-amz-request-charged"),
"requester".parse().unwrap(),
);
response
}
#[derive(Clone)]
struct SelectRequest {
expression: String,
input_format: InputFormat,
output_format: OutputFormat,
}
#[derive(Clone)]
enum InputFormat {
Csv(CsvInputConfig),
Json(JsonInputConfig),
Parquet,
}
#[derive(Clone)]
struct CsvInputConfig {
file_header_info: String,
field_delimiter: String,
quote_character: String,
}
#[derive(Clone)]
struct JsonInputConfig {
json_type: String,
}
#[derive(Clone)]
enum OutputFormat {
Csv(CsvOutputConfig),
Json(JsonOutputConfig),
}
#[derive(Clone)]
struct CsvOutputConfig {
field_delimiter: String,
record_delimiter: String,
quote_character: String,
}
#[derive(Clone)]
struct JsonOutputConfig {
record_delimiter: String,
}
fn parse_select_request(payload: &[u8]) -> Result<SelectRequest, S3Error> {
let xml = String::from_utf8_lossy(payload);
let doc = roxmltree::Document::parse(&xml)
.map_err(|_| S3Error::new(S3ErrorCode::MalformedXML, "Unable to parse XML document"))?;
let root = doc.root_element();
if root.tag_name().name() != "SelectObjectContentRequest" {
return Err(S3Error::new(
S3ErrorCode::MalformedXML,
"Root element must be SelectObjectContentRequest",
));
}
let expression = child_text(&root, "Expression")
.filter(|v| !v.is_empty())
.ok_or_else(|| S3Error::new(S3ErrorCode::InvalidRequest, "Expression is required"))?;
let expression_type = child_text(&root, "ExpressionType").unwrap_or_else(|| "SQL".to_string());
if !expression_type.eq_ignore_ascii_case("SQL") {
return Err(S3Error::new(
S3ErrorCode::InvalidRequest,
"Only SQL expression type is supported",
));
}
let input_node = child(&root, "InputSerialization").ok_or_else(|| {
S3Error::new(
S3ErrorCode::InvalidRequest,
"InputSerialization is required",
)
})?;
let output_node = child(&root, "OutputSerialization").ok_or_else(|| {
S3Error::new(
S3ErrorCode::InvalidRequest,
"OutputSerialization is required",
)
})?;
let input_format = parse_input_format(&input_node)?;
let output_format = parse_output_format(&output_node)?;
Ok(SelectRequest {
expression,
input_format,
output_format,
})
}
fn parse_input_format(node: &roxmltree::Node<'_, '_>) -> Result<InputFormat, S3Error> {
if let Some(csv_node) = child(node, "CSV") {
return Ok(InputFormat::Csv(CsvInputConfig {
file_header_info: child_text(&csv_node, "FileHeaderInfo")
.unwrap_or_else(|| "NONE".to_string())
.to_ascii_uppercase(),
field_delimiter: child_text(&csv_node, "FieldDelimiter")
.unwrap_or_else(|| ",".to_string()),
quote_character: child_text(&csv_node, "QuoteCharacter")
.unwrap_or_else(|| "\"".to_string()),
}));
}
if let Some(json_node) = child(node, "JSON") {
return Ok(InputFormat::Json(JsonInputConfig {
json_type: child_text(&json_node, "Type")
.unwrap_or_else(|| "DOCUMENT".to_string())
.to_ascii_uppercase(),
}));
}
if child(node, "Parquet").is_some() {
return Ok(InputFormat::Parquet);
}
Err(S3Error::new(
S3ErrorCode::InvalidRequest,
"InputSerialization must specify CSV, JSON, or Parquet",
))
}
fn parse_output_format(node: &roxmltree::Node<'_, '_>) -> Result<OutputFormat, S3Error> {
if let Some(csv_node) = child(node, "CSV") {
return Ok(OutputFormat::Csv(CsvOutputConfig {
field_delimiter: child_text(&csv_node, "FieldDelimiter")
.unwrap_or_else(|| ",".to_string()),
record_delimiter: child_text(&csv_node, "RecordDelimiter")
.unwrap_or_else(|| "\n".to_string()),
quote_character: child_text(&csv_node, "QuoteCharacter")
.unwrap_or_else(|| "\"".to_string()),
}));
}
if let Some(json_node) = child(node, "JSON") {
return Ok(OutputFormat::Json(JsonOutputConfig {
record_delimiter: child_text(&json_node, "RecordDelimiter")
.unwrap_or_else(|| "\n".to_string()),
}));
}
Err(S3Error::new(
S3ErrorCode::InvalidRequest,
"OutputSerialization must specify CSV or JSON",
))
}
fn child<'a, 'input>(
node: &'a roxmltree::Node<'a, 'input>,
name: &str,
) -> Option<roxmltree::Node<'a, 'input>> {
node.children()
.find(|n| n.is_element() && n.tag_name().name() == name)
}
fn child_text(node: &roxmltree::Node<'_, '_>, name: &str) -> Option<String> {
child(node, name)
.and_then(|n| n.text())
.map(|s| s.to_string())
}
fn execute_select_query(path: PathBuf, request: SelectRequest) -> Result<Vec<Vec<u8>>, String> {
let conn =
Connection::open_in_memory().map_err(|e| format!("DuckDB connection error: {}", e))?;
load_input_table(&conn, &path, &request.input_format)?;
let expression = request
.expression
.replace("s3object", "data")
.replace("S3Object", "data");
let mut stmt = conn
.prepare(&expression)
.map_err(|e| format!("SQL execution error: {}", e))?;
let mut rows = stmt
.query([])
.map_err(|e| format!("SQL execution error: {}", e))?;
let stmt_ref = rows
.as_ref()
.ok_or_else(|| "SQL execution error: statement metadata unavailable".to_string())?;
let col_count = stmt_ref.column_count();
let mut columns: Vec<String> = Vec::with_capacity(col_count);
for i in 0..col_count {
let name = stmt_ref
.column_name(i)
.map(|s| s.to_string())
.unwrap_or_else(|_| format!("_{}", i));
columns.push(name);
}
match request.output_format {
OutputFormat::Csv(cfg) => collect_csv_chunks(&mut rows, col_count, cfg),
OutputFormat::Json(cfg) => collect_json_chunks(&mut rows, col_count, &columns, cfg),
}
}
fn load_input_table(conn: &Connection, path: &Path, input: &InputFormat) -> Result<(), String> {
let path_str = path.to_string_lossy().replace('\\', "/");
match input {
InputFormat::Csv(cfg) => {
let header = cfg.file_header_info == "USE" || cfg.file_header_info == "IGNORE";
let delimiter = normalize_single_char(&cfg.field_delimiter, ',');
let quote = normalize_single_char(&cfg.quote_character, '"');
let sql = format!(
"CREATE TABLE data AS SELECT * FROM read_csv('{}', header={}, delim='{}', quote='{}')",
sql_escape(&path_str),
if header { "true" } else { "false" },
sql_escape(&delimiter),
sql_escape(&quote)
);
conn.execute_batch(&sql)
.map_err(|e| format!("Failed loading CSV data: {}", e))?;
}
InputFormat::Json(cfg) => {
let format = if cfg.json_type == "LINES" {
"newline_delimited"
} else {
"array"
};
let sql = format!(
"CREATE TABLE data AS SELECT * FROM read_json_auto('{}', format='{}')",
sql_escape(&path_str),
format
);
conn.execute_batch(&sql)
.map_err(|e| format!("Failed loading JSON data: {}", e))?;
}
InputFormat::Parquet => {
let sql = format!(
"CREATE TABLE data AS SELECT * FROM read_parquet('{}')",
sql_escape(&path_str)
);
conn.execute_batch(&sql)
.map_err(|e| format!("Failed loading Parquet data: {}", e))?;
}
}
Ok(())
}
fn sql_escape(value: &str) -> String {
value.replace('\'', "''")
}
fn normalize_single_char(value: &str, default_char: char) -> String {
value.chars().next().unwrap_or(default_char).to_string()
}
fn collect_csv_chunks(
rows: &mut duckdb::Rows<'_>,
col_count: usize,
cfg: CsvOutputConfig,
) -> Result<Vec<Vec<u8>>, String> {
let delimiter = cfg.field_delimiter;
let record_delimiter = cfg.record_delimiter;
let quote = cfg.quote_character;
let mut chunks: Vec<Vec<u8>> = Vec::new();
let mut buffer = String::new();
while let Some(row) = rows
.next()
.map_err(|e| format!("SQL execution error: {}", e))?
{
let mut fields: Vec<String> = Vec::with_capacity(col_count);
for i in 0..col_count {
let value = row
.get_ref(i)
.map_err(|e| format!("SQL execution error: {}", e))?;
if matches!(value, ValueRef::Null) {
fields.push(String::new());
continue;
}
let mut text = value_ref_to_string(value);
if text.contains(&delimiter)
|| text.contains(&quote)
|| text.contains(&record_delimiter)
{
text = text.replace(&quote, &(quote.clone() + &quote));
text = format!("{}{}{}", quote, text, quote);
}
fields.push(text);
}
buffer.push_str(&fields.join(&delimiter));
buffer.push_str(&record_delimiter);
while buffer.len() >= CHUNK_SIZE {
let rest = buffer.split_off(CHUNK_SIZE);
chunks.push(buffer.into_bytes());
buffer = rest;
}
}
if !buffer.is_empty() {
chunks.push(buffer.into_bytes());
}
Ok(chunks)
}
fn collect_json_chunks(
rows: &mut duckdb::Rows<'_>,
col_count: usize,
columns: &[String],
cfg: JsonOutputConfig,
) -> Result<Vec<Vec<u8>>, String> {
let record_delimiter = cfg.record_delimiter;
let mut chunks: Vec<Vec<u8>> = Vec::new();
let mut buffer = String::new();
while let Some(row) = rows
.next()
.map_err(|e| format!("SQL execution error: {}", e))?
{
let mut record: HashMap<String, serde_json::Value> = HashMap::with_capacity(col_count);
for i in 0..col_count {
let value = row
.get_ref(i)
.map_err(|e| format!("SQL execution error: {}", e))?;
let key = columns.get(i).cloned().unwrap_or_else(|| format!("_{}", i));
record.insert(key, value_ref_to_json(value));
}
let line = serde_json::to_string(&record)
.map_err(|e| format!("JSON output encoding failed: {}", e))?;
buffer.push_str(&line);
buffer.push_str(&record_delimiter);
while buffer.len() >= CHUNK_SIZE {
let rest = buffer.split_off(CHUNK_SIZE);
chunks.push(buffer.into_bytes());
buffer = rest;
}
}
if !buffer.is_empty() {
chunks.push(buffer.into_bytes());
}
Ok(chunks)
}
fn value_ref_to_string(value: ValueRef<'_>) -> String {
match value {
ValueRef::Null => String::new(),
ValueRef::Boolean(v) => v.to_string(),
ValueRef::TinyInt(v) => v.to_string(),
ValueRef::SmallInt(v) => v.to_string(),
ValueRef::Int(v) => v.to_string(),
ValueRef::BigInt(v) => v.to_string(),
ValueRef::UTinyInt(v) => v.to_string(),
ValueRef::USmallInt(v) => v.to_string(),
ValueRef::UInt(v) => v.to_string(),
ValueRef::UBigInt(v) => v.to_string(),
ValueRef::Float(v) => v.to_string(),
ValueRef::Double(v) => v.to_string(),
ValueRef::Decimal(v) => v.to_string(),
ValueRef::Text(v) => String::from_utf8_lossy(v).into_owned(),
ValueRef::Blob(v) => base64::engine::general_purpose::STANDARD.encode(v),
_ => format!("{:?}", value),
}
}
fn value_ref_to_json(value: ValueRef<'_>) -> serde_json::Value {
match value {
ValueRef::Null => serde_json::Value::Null,
ValueRef::Boolean(v) => serde_json::Value::Bool(v),
ValueRef::TinyInt(v) => serde_json::json!(v),
ValueRef::SmallInt(v) => serde_json::json!(v),
ValueRef::Int(v) => serde_json::json!(v),
ValueRef::BigInt(v) => serde_json::json!(v),
ValueRef::UTinyInt(v) => serde_json::json!(v),
ValueRef::USmallInt(v) => serde_json::json!(v),
ValueRef::UInt(v) => serde_json::json!(v),
ValueRef::UBigInt(v) => serde_json::json!(v),
ValueRef::Float(v) => serde_json::json!(v),
ValueRef::Double(v) => serde_json::json!(v),
ValueRef::Decimal(v) => serde_json::Value::String(v.to_string()),
ValueRef::Text(v) => serde_json::Value::String(String::from_utf8_lossy(v).into_owned()),
ValueRef::Blob(v) => {
serde_json::Value::String(base64::engine::general_purpose::STANDARD.encode(v))
}
_ => serde_json::Value::String(format!("{:?}", value)),
}
}
fn require_xml_content_type(headers: &HeaderMap) -> Option<Response> {
let value = headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.trim();
if value.is_empty() {
return None;
}
let lowered = value.to_ascii_lowercase();
if lowered.starts_with("application/xml") || lowered.starts_with("text/xml") {
return None;
}
Some(s3_error_response(S3Error::new(
S3ErrorCode::InvalidRequest,
"Content-Type must be application/xml or text/xml",
)))
}
fn s3_error_response(err: S3Error) -> Response {
let status =
StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);
let resource = if err.resource.is_empty() {
"/".to_string()
} else {
err.resource.clone()
};
let body = err
.with_resource(resource)
.with_request_id(uuid::Uuid::new_v4().simple().to_string())
.to_xml();
(status, [("content-type", "application/xml")], body).into_response()
}
fn build_stats_xml(bytes_scanned: usize, bytes_returned: usize) -> String {
format!(
"<Stats><BytesScanned>{}</BytesScanned><BytesProcessed>{}</BytesProcessed><BytesReturned>{}</BytesReturned></Stats>",
bytes_scanned,
bytes_scanned,
bytes_returned
)
}
fn encode_select_event(event_type: &str, payload: &[u8]) -> Vec<u8> {
let mut headers = Vec::new();
headers.extend(encode_select_header(":event-type", event_type));
if event_type == "Records" {
headers.extend(encode_select_header(
":content-type",
"application/octet-stream",
));
} else if event_type == "Stats" {
headers.extend(encode_select_header(":content-type", "text/xml"));
}
headers.extend(encode_select_header(":message-type", "event"));
let headers_len = headers.len() as u32;
let total_len = 4 + 4 + 4 + headers.len() + payload.len() + 4;
let mut message = Vec::with_capacity(total_len);
let mut prelude = Vec::with_capacity(8);
prelude.extend((total_len as u32).to_be_bytes());
prelude.extend(headers_len.to_be_bytes());
let prelude_crc = crc32(&prelude);
message.extend(prelude);
message.extend(prelude_crc.to_be_bytes());
message.extend(headers);
message.extend(payload);
let msg_crc = crc32(&message);
message.extend(msg_crc.to_be_bytes());
message
}
fn encode_select_header(name: &str, value: &str) -> Vec<u8> {
let name_bytes = name.as_bytes();
let value_bytes = value.as_bytes();
let mut header = Vec::with_capacity(1 + name_bytes.len() + 1 + 2 + value_bytes.len());
header.push(name_bytes.len() as u8);
header.extend(name_bytes);
header.push(7);
header.extend((value_bytes.len() as u16).to_be_bytes());
header.extend(value_bytes);
header
}
fn crc32(data: &[u8]) -> u32 {
let mut hasher = Hasher::new();
hasher.update(data);
hasher.finalize()
}

View File

@@ -1,210 +0,0 @@
use std::collections::HashMap;
use std::error::Error as StdError;
use axum::extract::{Extension, Form, State};
use axum::http::{header, HeaderMap, StatusCode};
use axum::response::{IntoResponse, Redirect, Response};
use tera::Context;
use crate::middleware::session::SessionHandle;
use crate::session::FlashMessage;
use crate::state::AppState;
pub async fn login_page(
State(state): State<AppState>,
Extension(session): Extension<SessionHandle>,
) -> Response {
if session.read(|s| s.is_authenticated()) {
return Redirect::to("/ui/buckets").into_response();
}
let mut ctx = base_context(&session, None);
let flashed = session.write(|s| s.take_flash());
inject_flash(&mut ctx, flashed);
render(&state, "login.html", &ctx)
}
#[derive(serde::Deserialize)]
pub struct LoginForm {
pub access_key: String,
pub secret_key: String,
#[serde(default)]
pub csrf_token: String,
#[serde(default)]
pub next: Option<String>,
}
pub async fn login_submit(
State(state): State<AppState>,
Extension(session): Extension<SessionHandle>,
Form(form): Form<LoginForm>,
) -> Response {
let access_key = form.access_key.trim();
let secret_key = form.secret_key.trim();
match state.iam.get_secret_key(access_key) {
Some(expected) if constant_time_eq_str(&expected, secret_key) => {
let display = state
.iam
.get_user(access_key)
.await
.and_then(|v| {
v.get("display_name")
.and_then(|d| d.as_str())
.map(|s| s.to_string())
})
.unwrap_or_else(|| access_key.to_string());
session.write(|s| {
s.user_id = Some(access_key.to_string());
s.display_name = Some(display);
s.rotate_csrf();
s.push_flash("success", "Signed in successfully.");
});
let next = form
.next
.as_deref()
.filter(|n| n.starts_with("/ui/") || *n == "/ui")
.unwrap_or("/ui/buckets")
.to_string();
Redirect::to(&next).into_response()
}
_ => {
session.write(|s| {
s.push_flash("danger", "Invalid access key or secret key.");
});
Redirect::to("/login").into_response()
}
}
}
pub async fn logout(Extension(session): Extension<SessionHandle>) -> Response {
session.write(|s| {
s.user_id = None;
s.display_name = None;
s.flash.clear();
s.rotate_csrf();
s.push_flash("info", "Signed out.");
});
Redirect::to("/login").into_response()
}
pub async fn csrf_error_page(
State(state): State<AppState>,
Extension(session): Extension<SessionHandle>,
) -> Response {
let ctx = base_context(&session, None);
let mut resp = render(&state, "csrf_error.html", &ctx);
*resp.status_mut() = StatusCode::FORBIDDEN;
resp
}
pub async fn root_redirect() -> Response {
Redirect::to("/ui/buckets").into_response()
}
pub async fn not_found_page(
State(state): State<AppState>,
Extension(session): Extension<SessionHandle>,
) -> Response {
let ctx = base_context(&session, None);
let mut resp = render(&state, "404.html", &ctx);
*resp.status_mut() = StatusCode::NOT_FOUND;
resp
}
pub async fn require_login(
Extension(session): Extension<SessionHandle>,
req: axum::extract::Request,
next: axum::middleware::Next,
) -> Response {
if session.read(|s| s.is_authenticated()) {
return next.run(req).await;
}
let path = req.uri().path().to_string();
let query = req
.uri()
.query()
.map(|q| format!("?{}", q))
.unwrap_or_default();
let next_url = format!("{}{}", path, query);
let encoded =
percent_encoding::utf8_percent_encode(&next_url, percent_encoding::NON_ALPHANUMERIC)
.to_string();
let target = format!("/login?next={}", encoded);
Redirect::to(&target).into_response()
}
pub fn render(state: &AppState, template: &str, ctx: &Context) -> Response {
let engine = match &state.templates {
Some(e) => e,
None => {
return (
StatusCode::INTERNAL_SERVER_ERROR,
"Templates not configured",
)
.into_response();
}
};
match engine.render(template, ctx) {
Ok(html) => {
let mut headers = HeaderMap::new();
headers.insert(
header::CONTENT_TYPE,
"text/html; charset=utf-8".parse().unwrap(),
);
(StatusCode::OK, headers, html).into_response()
}
Err(e) => {
let mut detail = format!("{}", e);
let mut src = StdError::source(&e);
while let Some(s) = src {
detail.push_str(" | ");
detail.push_str(&s.to_string());
src = s.source();
}
tracing::error!("Template render failed ({}): {}", template, detail);
let fallback_ctx = Context::new();
let body = if template != "500.html" {
engine
.render("500.html", &fallback_ctx)
.unwrap_or_else(|_| "Internal Server Error".to_string())
} else {
"Internal Server Error".to_string()
};
let mut headers = HeaderMap::new();
headers.insert(
header::CONTENT_TYPE,
"text/html; charset=utf-8".parse().unwrap(),
);
(StatusCode::INTERNAL_SERVER_ERROR, headers, body).into_response()
}
}
}
pub fn base_context(session: &SessionHandle, endpoint: Option<&str>) -> Context {
let mut ctx = Context::new();
let snapshot = session.snapshot();
ctx.insert("csrf_token_value", &snapshot.csrf_token);
ctx.insert("is_authenticated", &snapshot.user_id.is_some());
ctx.insert("current_user", &snapshot.user_id);
ctx.insert("current_user_display_name", &snapshot.display_name);
ctx.insert("current_endpoint", &endpoint.unwrap_or(""));
ctx.insert("request_args", &HashMap::<String, String>::new());
ctx.insert("null", &serde_json::Value::Null);
ctx.insert("none", &serde_json::Value::Null);
ctx
}
pub fn inject_flash(ctx: &mut Context, flashed: Vec<FlashMessage>) {
ctx.insert("flashed_messages", &flashed);
}
fn constant_time_eq_str(a: &str, b: &str) -> bool {
if a.len() != b.len() {
return false;
}
subtle::ConstantTimeEq::ct_eq(a.as_bytes(), b.as_bytes()).into()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,504 +0,0 @@
pub mod config;
pub mod handlers;
pub mod middleware;
pub mod services;
pub mod session;
pub mod state;
pub mod stores;
pub mod templates;
use axum::Router;
pub const SERVER_HEADER: &str = "MyFSIO";
pub fn create_ui_router(state: state::AppState) -> Router {
use axum::routing::{delete, get, post, put};
use handlers::ui;
use handlers::ui_api;
use handlers::ui_pages;
let protected = Router::new()
.route("/", get(ui::root_redirect))
.route("/ui", get(ui::root_redirect))
.route("/ui/", get(ui::root_redirect))
.route("/ui/buckets", get(ui_pages::buckets_overview))
.route("/ui/buckets/create", post(ui_pages::create_bucket))
.route("/ui/buckets/{bucket_name}", get(ui_pages::bucket_detail))
.route(
"/ui/buckets/{bucket_name}/delete",
post(ui_pages::delete_bucket),
)
.route(
"/ui/buckets/{bucket_name}/versioning",
post(ui_pages::update_bucket_versioning),
)
.route(
"/ui/buckets/{bucket_name}/quota",
post(ui_pages::update_bucket_quota),
)
.route(
"/ui/buckets/{bucket_name}/encryption",
post(ui_pages::update_bucket_encryption),
)
.route(
"/ui/buckets/{bucket_name}/policy",
post(ui_pages::update_bucket_policy),
)
.route(
"/ui/buckets/{bucket_name}/replication",
post(ui_pages::update_bucket_replication),
)
.route(
"/ui/buckets/{bucket_name}/website",
post(ui_pages::update_bucket_website),
)
.route(
"/ui/buckets/{bucket_name}/upload",
post(ui_api::upload_object),
)
.route(
"/ui/buckets/{bucket_name}/multipart/initiate",
post(ui_api::initiate_multipart_upload),
)
.route(
"/ui/buckets/{bucket_name}/multipart/{upload_id}/part",
put(ui_api::upload_multipart_part),
)
.route(
"/ui/buckets/{bucket_name}/multipart/{upload_id}/complete",
post(ui_api::complete_multipart_upload),
)
.route(
"/ui/buckets/{bucket_name}/multipart/{upload_id}/abort",
delete(ui_api::abort_multipart_upload),
)
.route(
"/ui/buckets/{bucket_name}/objects",
get(ui_api::list_bucket_objects),
)
.route(
"/ui/buckets/{bucket_name}/objects/stream",
get(ui_api::stream_bucket_objects),
)
.route(
"/ui/buckets/{bucket_name}/folders",
get(ui_api::list_bucket_folders),
)
.route(
"/ui/buckets/{bucket_name}/copy-targets",
get(ui_api::list_copy_targets),
)
.route(
"/ui/buckets/{bucket_name}/objects/{*rest}",
get(ui_api::object_get_dispatch).post(ui_api::object_post_dispatch),
)
.route(
"/ui/buckets/{bucket_name}/acl",
get(ui_api::bucket_acl).post(ui_api::update_bucket_acl),
)
.route(
"/ui/buckets/{bucket_name}/cors",
get(ui_api::bucket_cors).post(ui_api::update_bucket_cors),
)
.route(
"/ui/buckets/{bucket_name}/lifecycle",
get(ui_api::bucket_lifecycle).post(ui_api::update_bucket_lifecycle),
)
.route(
"/ui/buckets/{bucket_name}/lifecycle/history",
get(ui_api::lifecycle_history_stub),
)
.route(
"/ui/buckets/{bucket_name}/replication/status",
get(ui_api::replication_status),
)
.route(
"/ui/buckets/{bucket_name}/replication/failures",
get(ui_api::replication_failures).delete(ui_api::clear_replication_failures),
)
.route(
"/ui/buckets/{bucket_name}/replication/failures/retry",
post(ui_api::retry_replication_failure),
)
.route(
"/ui/buckets/{bucket_name}/replication/failures/retry-all",
post(ui_api::retry_all_replication_failures),
)
.route(
"/ui/buckets/{bucket_name}/replication/failures/dismiss",
delete(ui_api::dismiss_replication_failure),
)
.route(
"/ui/buckets/{bucket_name}/replication/failures/clear",
delete(ui_api::clear_replication_failures),
)
.route(
"/ui/buckets/{bucket_name}/bulk-delete",
post(ui_api::bulk_delete_objects),
)
.route(
"/ui/buckets/{bucket_name}/bulk-download",
post(ui_api::bulk_download_objects),
)
.route(
"/ui/buckets/{bucket_name}/archived",
get(ui_api::archived_objects),
)
.route(
"/ui/buckets/{bucket_name}/archived/{*rest}",
post(ui_api::archived_post_dispatch),
)
.route("/ui/iam", get(ui_pages::iam_dashboard))
.route("/ui/iam/users", post(ui_pages::create_iam_user))
.route("/ui/iam/users/{user_id}", post(ui_pages::update_iam_user))
.route(
"/ui/iam/users/{user_id}/delete",
post(ui_pages::delete_iam_user),
)
.route(
"/ui/iam/users/{user_id}/policies",
post(ui_pages::update_iam_policies),
)
.route(
"/ui/iam/users/{user_id}/expiry",
post(ui_pages::update_iam_expiry),
)
.route(
"/ui/iam/users/{user_id}/rotate-secret",
post(ui_pages::rotate_iam_secret),
)
.route("/ui/connections/create", post(ui_pages::create_connection))
.route("/ui/connections/test", post(ui_api::test_connection))
.route(
"/ui/connections/{connection_id}",
post(ui_pages::update_connection),
)
.route(
"/ui/connections/{connection_id}/delete",
post(ui_pages::delete_connection),
)
.route(
"/ui/connections/{connection_id}/health",
get(ui_api::connection_health),
)
.route("/ui/sites", get(ui_pages::sites_dashboard))
.route("/ui/sites/local", post(ui_pages::update_local_site))
.route("/ui/sites/peers", post(ui_pages::add_peer_site))
.route(
"/ui/sites/peers/{site_id}/update",
post(ui_pages::update_peer_site),
)
.route(
"/ui/sites/peers/{site_id}/delete",
post(ui_pages::delete_peer_site),
)
.route("/ui/sites/peers/{site_id}/health", get(ui_api::peer_health))
.route(
"/ui/sites/peers/{site_id}/sync-stats",
get(ui_api::peer_sync_stats),
)
.route(
"/ui/sites/peers/{site_id}/bidirectional-status",
get(ui_api::peer_bidirectional_status),
)
.route("/ui/connections", get(ui_pages::connections_dashboard))
.route("/ui/metrics", get(ui_pages::metrics_dashboard))
.route(
"/ui/metrics/settings",
get(ui_api::metrics_settings).put(ui_api::update_metrics_settings),
)
.route("/ui/metrics/api", get(ui_api::metrics_api))
.route("/ui/metrics/history", get(ui_api::metrics_history))
.route("/ui/metrics/operations", get(ui_api::metrics_operations))
.route(
"/ui/metrics/operations/history",
get(ui_api::metrics_operations_history),
)
.route("/ui/system", get(ui_pages::system_dashboard))
.route("/ui/system/gc/status", get(ui_api::gc_status_ui))
.route("/ui/system/gc/run", post(ui_api::gc_run_ui))
.route("/ui/system/gc/history", get(ui_api::gc_history_ui))
.route(
"/ui/system/integrity/status",
get(ui_api::integrity_status_ui),
)
.route("/ui/system/integrity/run", post(ui_api::integrity_run_ui))
.route(
"/ui/system/integrity/history",
get(ui_api::integrity_history_ui),
)
.route(
"/ui/website-domains",
get(ui_pages::website_domains_dashboard),
)
.route(
"/ui/website-domains/create",
post(ui_pages::create_website_domain),
)
.route(
"/ui/website-domains/{domain}",
post(ui_pages::update_website_domain),
)
.route(
"/ui/website-domains/{domain}/delete",
post(ui_pages::delete_website_domain),
)
.route("/ui/replication/new", get(ui_pages::replication_wizard))
.route(
"/ui/replication/create",
post(ui_pages::create_peer_replication_rules_from_query),
)
.route(
"/ui/sites/peers/{site_id}/replication-rules",
post(ui_pages::create_peer_replication_rules),
)
.route("/ui/docs", get(ui_pages::docs_page))
.layer(axum::middleware::from_fn(ui::require_login));
let public = Router::new()
.route("/login", get(ui::login_page).post(ui::login_submit))
.route("/logout", post(ui::logout).get(ui::logout))
.route("/csrf-error", get(ui::csrf_error_page));
let session_state = middleware::SessionLayerState {
store: state.sessions.clone(),
secure: false,
};
let static_service = tower_http::services::ServeDir::new(&state.config.static_dir);
protected
.merge(public)
.fallback(ui::not_found_page)
.layer(axum::middleware::from_fn(middleware::csrf_layer))
.layer(axum::middleware::from_fn_with_state(
session_state,
middleware::session_layer,
))
.layer(axum::middleware::from_fn_with_state(
state.clone(),
middleware::ui_metrics_layer,
))
.with_state(state)
.nest_service("/static", static_service)
.layer(axum::middleware::from_fn(middleware::server_header))
.layer(tower_http::compression::CompressionLayer::new())
}
pub fn create_router(state: state::AppState) -> Router {
let mut router = Router::new()
.route("/myfsio/health", axum::routing::get(handlers::health_check))
.route("/", axum::routing::get(handlers::list_buckets))
.route(
"/{bucket}",
axum::routing::put(handlers::create_bucket)
.get(handlers::get_bucket)
.delete(handlers::delete_bucket)
.head(handlers::head_bucket)
.post(handlers::post_bucket),
)
.route(
"/{bucket}/",
axum::routing::put(handlers::create_bucket)
.get(handlers::get_bucket)
.delete(handlers::delete_bucket)
.head(handlers::head_bucket)
.post(handlers::post_bucket),
)
.route(
"/{bucket}/{*key}",
axum::routing::put(handlers::put_object)
.get(handlers::get_object)
.delete(handlers::delete_object)
.head(handlers::head_object)
.post(handlers::post_object),
);
if state.config.kms_enabled {
router = router
.route(
"/kms/keys",
axum::routing::get(handlers::kms::list_keys).post(handlers::kms::create_key),
)
.route(
"/kms/keys/{key_id}",
axum::routing::get(handlers::kms::get_key).delete(handlers::kms::delete_key),
)
.route(
"/kms/keys/{key_id}/enable",
axum::routing::post(handlers::kms::enable_key),
)
.route(
"/kms/keys/{key_id}/disable",
axum::routing::post(handlers::kms::disable_key),
)
.route("/kms/encrypt", axum::routing::post(handlers::kms::encrypt))
.route("/kms/decrypt", axum::routing::post(handlers::kms::decrypt))
.route(
"/kms/generate-data-key",
axum::routing::post(handlers::kms::generate_data_key),
)
.route(
"/kms/generate-data-key-without-plaintext",
axum::routing::post(handlers::kms::generate_data_key_without_plaintext),
)
.route(
"/kms/re-encrypt",
axum::routing::post(handlers::kms::re_encrypt),
)
.route(
"/kms/generate-random",
axum::routing::post(handlers::kms::generate_random),
)
.route(
"/kms/client/generate-key",
axum::routing::post(handlers::kms::client_generate_key),
)
.route(
"/kms/client/encrypt",
axum::routing::post(handlers::kms::client_encrypt),
)
.route(
"/kms/client/decrypt",
axum::routing::post(handlers::kms::client_decrypt),
)
.route(
"/kms/materials/{key_id}",
axum::routing::post(handlers::kms::materials),
);
}
router = router
.route(
"/admin/site",
axum::routing::get(handlers::admin::get_local_site)
.put(handlers::admin::update_local_site),
)
.route(
"/admin/sites",
axum::routing::get(handlers::admin::list_all_sites)
.post(handlers::admin::register_peer_site),
)
.route(
"/admin/sites/{site_id}",
axum::routing::get(handlers::admin::get_peer_site)
.put(handlers::admin::update_peer_site)
.delete(handlers::admin::delete_peer_site),
)
.route(
"/admin/sites/{site_id}/health",
axum::routing::get(handlers::admin::check_peer_health)
.post(handlers::admin::check_peer_health),
)
.route(
"/admin/sites/{site_id}/bidirectional-status",
axum::routing::get(handlers::admin::check_bidirectional_status),
)
.route(
"/admin/topology",
axum::routing::get(handlers::admin::get_topology),
)
.route(
"/admin/site/local",
axum::routing::get(handlers::admin::get_local_site)
.put(handlers::admin::update_local_site),
)
.route(
"/admin/site/all",
axum::routing::get(handlers::admin::list_all_sites),
)
.route(
"/admin/site/peers",
axum::routing::post(handlers::admin::register_peer_site),
)
.route(
"/admin/site/peers/{site_id}",
axum::routing::get(handlers::admin::get_peer_site)
.put(handlers::admin::update_peer_site)
.delete(handlers::admin::delete_peer_site),
)
.route(
"/admin/site/peers/{site_id}/health",
axum::routing::post(handlers::admin::check_peer_health),
)
.route(
"/admin/site/topology",
axum::routing::get(handlers::admin::get_topology),
)
.route(
"/admin/site/peers/{site_id}/bidirectional-status",
axum::routing::get(handlers::admin::check_bidirectional_status),
)
.route(
"/admin/iam/users",
axum::routing::get(handlers::admin::iam_list_users),
)
.route(
"/admin/iam/users/{identifier}",
axum::routing::get(handlers::admin::iam_get_user),
)
.route(
"/admin/iam/users/{identifier}/policies",
axum::routing::get(handlers::admin::iam_get_user_policies),
)
.route(
"/admin/iam/users/{identifier}/access-keys",
axum::routing::post(handlers::admin::iam_create_access_key),
)
.route(
"/admin/iam/users/{identifier}/access-keys/{access_key}",
axum::routing::delete(handlers::admin::iam_delete_access_key),
)
.route(
"/admin/iam/users/{identifier}/disable",
axum::routing::post(handlers::admin::iam_disable_user),
)
.route(
"/admin/iam/users/{identifier}/enable",
axum::routing::post(handlers::admin::iam_enable_user),
)
.route(
"/admin/website-domains",
axum::routing::get(handlers::admin::list_website_domains)
.post(handlers::admin::create_website_domain),
)
.route(
"/admin/website-domains/{domain}",
axum::routing::get(handlers::admin::get_website_domain)
.put(handlers::admin::update_website_domain)
.delete(handlers::admin::delete_website_domain),
)
.route(
"/admin/gc/status",
axum::routing::get(handlers::admin::gc_status),
)
.route(
"/admin/gc/run",
axum::routing::post(handlers::admin::gc_run),
)
.route(
"/admin/gc/history",
axum::routing::get(handlers::admin::gc_history),
)
.route(
"/admin/integrity/status",
axum::routing::get(handlers::admin::integrity_status),
)
.route(
"/admin/integrity/run",
axum::routing::post(handlers::admin::integrity_run),
)
.route(
"/admin/integrity/history",
axum::routing::get(handlers::admin::integrity_history),
);
router
.layer(axum::middleware::from_fn_with_state(
state.clone(),
middleware::auth_layer,
))
.layer(axum::middleware::from_fn(middleware::server_header))
.layer(tower_http::compression::CompressionLayer::new())
.with_state(state)
}

View File

@@ -1,425 +0,0 @@
use clap::{Parser, Subcommand};
use myfsio_server::config::ServerConfig;
use myfsio_server::state::AppState;
#[derive(Parser)]
#[command(
name = "myfsio",
version,
about = "MyFSIO S3-compatible storage engine"
)]
struct Cli {
#[arg(long, help = "Validate configuration and exit")]
check_config: bool,
#[arg(long, help = "Show configuration summary and exit")]
show_config: bool,
#[arg(long, help = "Reset admin credentials and exit")]
reset_cred: bool,
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand)]
enum Command {
Serve,
Version,
}
#[tokio::main]
async fn main() {
load_env_files();
tracing_subscriber::fmt::init();
let cli = Cli::parse();
let config = ServerConfig::from_env();
if cli.reset_cred {
reset_admin_credentials(&config);
return;
}
if cli.check_config || cli.show_config {
print_config_summary(&config);
if cli.check_config {
let issues = validate_config(&config);
for issue in &issues {
println!("{issue}");
}
if issues.iter().any(|issue| issue.starts_with("CRITICAL:")) {
std::process::exit(1);
}
}
return;
}
match cli.command.unwrap_or(Command::Serve) {
Command::Version => {
println!("myfsio {}", env!("CARGO_PKG_VERSION"));
return;
}
Command::Serve => {}
}
ensure_iam_bootstrap(&config);
let bind_addr = config.bind_addr;
let ui_bind_addr = config.ui_bind_addr;
tracing::info!("MyFSIO Rust Engine starting — API on {}", bind_addr);
if config.ui_enabled {
tracing::info!("UI will bind on {}", ui_bind_addr);
}
tracing::info!("Storage root: {}", config.storage_root.display());
tracing::info!("Region: {}", config.region);
tracing::info!(
"Encryption: {}, KMS: {}, GC: {}, Lifecycle: {}, Integrity: {}, Metrics History: {}, Operation Metrics: {}, UI: {}",
config.encryption_enabled,
config.kms_enabled,
config.gc_enabled,
config.lifecycle_enabled,
config.integrity_enabled,
config.metrics_history_enabled,
config.metrics_enabled,
config.ui_enabled
);
let state = if config.encryption_enabled || config.kms_enabled {
AppState::new_with_encryption(config.clone()).await
} else {
AppState::new(config.clone())
};
let mut bg_handles: Vec<tokio::task::JoinHandle<()>> = Vec::new();
if let Some(ref gc) = state.gc {
bg_handles.push(gc.clone().start_background());
tracing::info!("GC background service started");
}
if let Some(ref integrity) = state.integrity {
bg_handles.push(integrity.clone().start_background());
tracing::info!("Integrity checker background service started");
}
if let Some(ref metrics) = state.metrics {
bg_handles.push(metrics.clone().start_background());
tracing::info!("Metrics collector background service started");
}
if let Some(ref system_metrics) = state.system_metrics {
bg_handles.push(system_metrics.clone().start_background());
tracing::info!("System metrics history collector started");
}
if config.lifecycle_enabled {
let lifecycle =
std::sync::Arc::new(myfsio_server::services::lifecycle::LifecycleService::new(
state.storage.clone(),
myfsio_server::services::lifecycle::LifecycleConfig::default(),
));
bg_handles.push(lifecycle.start_background());
tracing::info!("Lifecycle manager background service started");
}
if let Some(ref site_sync) = state.site_sync {
let worker = site_sync.clone();
bg_handles.push(tokio::spawn(async move {
worker.run().await;
}));
tracing::info!("Site sync worker started");
}
let ui_enabled = config.ui_enabled;
let api_app = myfsio_server::create_router(state.clone());
let ui_app = if ui_enabled {
Some(myfsio_server::create_ui_router(state.clone()))
} else {
None
};
let api_listener = match tokio::net::TcpListener::bind(bind_addr).await {
Ok(listener) => listener,
Err(err) => {
if err.kind() == std::io::ErrorKind::AddrInUse {
tracing::error!("API port already in use: {}", bind_addr);
} else {
tracing::error!("Failed to bind API {}: {}", bind_addr, err);
}
for handle in bg_handles {
handle.abort();
}
std::process::exit(1);
}
};
tracing::info!("API listening on {}", bind_addr);
let ui_listener = if let Some(ref app) = ui_app {
let _ = app;
match tokio::net::TcpListener::bind(ui_bind_addr).await {
Ok(listener) => {
tracing::info!("UI listening on {}", ui_bind_addr);
Some(listener)
}
Err(err) => {
if err.kind() == std::io::ErrorKind::AddrInUse {
tracing::error!("UI port already in use: {}", ui_bind_addr);
} else {
tracing::error!("Failed to bind UI {}: {}", ui_bind_addr, err);
}
for handle in bg_handles {
handle.abort();
}
std::process::exit(1);
}
}
} else {
None
};
let shutdown = shutdown_signal_shared();
let api_shutdown = shutdown.clone();
let api_task = tokio::spawn(async move {
axum::serve(api_listener, api_app)
.with_graceful_shutdown(async move {
api_shutdown.notified().await;
})
.await
});
let ui_task = if let (Some(listener), Some(app)) = (ui_listener, ui_app) {
let ui_shutdown = shutdown.clone();
Some(tokio::spawn(async move {
axum::serve(listener, app)
.with_graceful_shutdown(async move {
ui_shutdown.notified().await;
})
.await
}))
} else {
None
};
tokio::signal::ctrl_c()
.await
.expect("Failed to listen for Ctrl+C");
tracing::info!("Shutdown signal received");
shutdown.notify_waiters();
if let Err(err) = api_task.await.unwrap_or(Ok(())) {
tracing::error!("API server exited with error: {}", err);
}
if let Some(task) = ui_task {
if let Err(err) = task.await.unwrap_or(Ok(())) {
tracing::error!("UI server exited with error: {}", err);
}
}
for handle in bg_handles {
handle.abort();
}
}
fn print_config_summary(config: &ServerConfig) {
println!("MyFSIO Rust Configuration");
println!("Version: {}", env!("CARGO_PKG_VERSION"));
println!("API bind: {}", config.bind_addr);
println!("UI bind: {}", config.ui_bind_addr);
println!("UI enabled: {}", config.ui_enabled);
println!("Storage root: {}", config.storage_root.display());
println!("IAM config: {}", config.iam_config_path.display());
println!("Region: {}", config.region);
println!("Encryption enabled: {}", config.encryption_enabled);
println!("KMS enabled: {}", config.kms_enabled);
println!("GC enabled: {}", config.gc_enabled);
println!("Integrity enabled: {}", config.integrity_enabled);
println!("Lifecycle enabled: {}", config.lifecycle_enabled);
println!(
"Website hosting enabled: {}",
config.website_hosting_enabled
);
println!("Site sync enabled: {}", config.site_sync_enabled);
println!(
"Metrics history enabled: {}",
config.metrics_history_enabled
);
println!("Operation metrics enabled: {}", config.metrics_enabled);
}
fn validate_config(config: &ServerConfig) -> Vec<String> {
let mut issues = Vec::new();
if config.ui_enabled && config.bind_addr == config.ui_bind_addr {
issues.push(
"CRITICAL: API and UI bind addresses cannot be identical when UI is enabled."
.to_string(),
);
}
if config.presigned_url_min_expiry > config.presigned_url_max_expiry {
issues.push("CRITICAL: PRESIGNED_URL_MIN_EXPIRY_SECONDS cannot exceed PRESIGNED_URL_MAX_EXPIRY_SECONDS.".to_string());
}
if let Err(err) = std::fs::create_dir_all(&config.storage_root) {
issues.push(format!(
"CRITICAL: Cannot create storage root {}: {}",
config.storage_root.display(),
err
));
}
if let Some(parent) = config.iam_config_path.parent() {
if let Err(err) = std::fs::create_dir_all(parent) {
issues.push(format!(
"CRITICAL: Cannot create IAM config directory {}: {}",
parent.display(),
err
));
}
}
if config.encryption_enabled && config.secret_key.is_none() {
issues.push(
"WARNING: ENCRYPTION_ENABLED=true but SECRET_KEY is not configured; secure-at-rest config encryption is unavailable.".to_string(),
);
}
if config.site_sync_enabled && !config.website_hosting_enabled {
issues.push(
"INFO: SITE_SYNC_ENABLED=true without WEBSITE_HOSTING_ENABLED; this is valid but unrelated.".to_string(),
);
}
issues
}
fn shutdown_signal_shared() -> std::sync::Arc<tokio::sync::Notify> {
std::sync::Arc::new(tokio::sync::Notify::new())
}
fn load_env_files() {
let cwd = std::env::current_dir().ok();
let mut candidates: Vec<std::path::PathBuf> = Vec::new();
candidates.push(std::path::PathBuf::from("/opt/myfsio/myfsio.env"));
if let Some(ref dir) = cwd {
candidates.push(dir.join(".env"));
candidates.push(dir.join("myfsio.env"));
for ancestor in dir.ancestors().skip(1).take(4) {
candidates.push(ancestor.join(".env"));
candidates.push(ancestor.join("myfsio.env"));
}
}
let mut seen = std::collections::HashSet::new();
for path in candidates {
if !seen.insert(path.clone()) {
continue;
}
if path.is_file() {
match dotenvy::from_path_override(&path) {
Ok(()) => eprintln!("Loaded env file: {}", path.display()),
Err(e) => eprintln!("Failed to load env file {}: {}", path.display(), e),
}
}
}
}
fn ensure_iam_bootstrap(config: &ServerConfig) {
let iam_path = &config.iam_config_path;
if iam_path.exists() {
return;
}
let access_key = std::env::var("ADMIN_ACCESS_KEY")
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| format!("AK{}", uuid::Uuid::new_v4().simple()));
let secret_key = std::env::var("ADMIN_SECRET_KEY")
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| format!("SK{}", uuid::Uuid::new_v4().simple()));
let user_id = format!("u-{}", &uuid::Uuid::new_v4().simple().to_string()[..16]);
let created_at = chrono::Utc::now().to_rfc3339();
let body = serde_json::json!({
"version": 2,
"users": [{
"user_id": user_id,
"display_name": "Local Admin",
"enabled": true,
"access_keys": [{
"access_key": access_key,
"secret_key": secret_key,
"status": "active",
"created_at": created_at,
}],
"policies": [{
"bucket": "*",
"actions": ["*"],
"prefix": "*",
}]
}]
});
let json = match serde_json::to_string_pretty(&body) {
Ok(s) => s,
Err(e) => {
tracing::error!("Failed to serialize IAM bootstrap config: {}", e);
return;
}
};
if let Some(parent) = iam_path.parent() {
if let Err(e) = std::fs::create_dir_all(parent) {
tracing::error!(
"Failed to create IAM config dir {}: {}",
parent.display(),
e
);
return;
}
}
if let Err(e) = std::fs::write(iam_path, json) {
tracing::error!(
"Failed to write IAM bootstrap config {}: {}",
iam_path.display(),
e
);
return;
}
tracing::info!("============================================================");
tracing::info!("MYFSIO - ADMIN CREDENTIALS INITIALIZED");
tracing::info!("============================================================");
tracing::info!("Access Key: {}", access_key);
tracing::info!("Secret Key: {}", secret_key);
tracing::info!("Saved to: {}", iam_path.display());
tracing::info!("============================================================");
}
fn reset_admin_credentials(config: &ServerConfig) {
if let Some(parent) = config.iam_config_path.parent() {
if let Err(err) = std::fs::create_dir_all(parent) {
eprintln!(
"Failed to create IAM config directory {}: {}",
parent.display(),
err
);
std::process::exit(1);
}
}
if config.iam_config_path.exists() {
let backup = config
.iam_config_path
.with_extension(format!("bak-{}", chrono::Utc::now().timestamp()));
if let Err(err) = std::fs::rename(&config.iam_config_path, &backup) {
eprintln!(
"Failed to back up existing IAM config {}: {}",
config.iam_config_path.display(),
err
);
std::process::exit(1);
}
println!("Backed up existing IAM config to {}", backup.display());
}
ensure_iam_bootstrap(config);
println!("Admin credentials reset.");
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,85 +0,0 @@
mod auth;
pub mod session;
pub use auth::auth_layer;
pub use session::{csrf_layer, session_layer, SessionHandle, SessionLayerState};
use axum::extract::{Request, State};
use axum::middleware::Next;
use axum::response::Response;
use std::time::Instant;
use crate::state::AppState;
pub async fn server_header(req: Request, next: Next) -> Response {
let mut resp = next.run(req).await;
resp.headers_mut()
.insert("server", crate::SERVER_HEADER.parse().unwrap());
resp
}
pub async fn ui_metrics_layer(State(state): State<AppState>, req: Request, next: Next) -> Response {
let metrics = match state.metrics.clone() {
Some(m) => m,
None => return next.run(req).await,
};
let start = Instant::now();
let method = req.method().clone();
let path = req.uri().path().to_string();
let endpoint_type = classify_ui_endpoint(&path);
let bytes_in = req
.headers()
.get(axum::http::header::CONTENT_LENGTH)
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(0);
let response = next.run(req).await;
let latency_ms = start.elapsed().as_secs_f64() * 1000.0;
let status = response.status().as_u16();
let bytes_out = response
.headers()
.get(axum::http::header::CONTENT_LENGTH)
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(0);
let error_code = if status >= 400 { Some("UIError") } else { None };
metrics.record_request(
method.as_str(),
endpoint_type,
status,
latency_ms,
bytes_in,
bytes_out,
error_code,
);
response
}
fn classify_ui_endpoint(path: &str) -> &'static str {
if path.contains("/upload") {
"ui_upload"
} else if path.starts_with("/ui/buckets/") {
"ui_bucket"
} else if path.starts_with("/ui/iam") {
"ui_iam"
} else if path.starts_with("/ui/sites") {
"ui_sites"
} else if path.starts_with("/ui/connections") {
"ui_connections"
} else if path.starts_with("/ui/metrics") {
"ui_metrics"
} else if path.starts_with("/ui/system") {
"ui_system"
} else if path.starts_with("/ui/website-domains") {
"ui_website_domains"
} else if path.starts_with("/ui/replication") {
"ui_replication"
} else if path.starts_with("/login") || path.starts_with("/logout") {
"ui_auth"
} else {
"ui_other"
}
}

View File

@@ -1,228 +0,0 @@
use std::sync::Arc;
use axum::extract::{Request, State};
use axum::http::{header, HeaderValue, StatusCode};
use axum::middleware::Next;
use axum::response::{IntoResponse, Response};
use cookie::{Cookie, SameSite};
use parking_lot::Mutex;
use crate::session::{
csrf_tokens_match, SessionData, SessionStore, CSRF_FIELD_NAME, CSRF_HEADER_NAME,
SESSION_COOKIE_NAME,
};
#[derive(Clone)]
pub struct SessionLayerState {
pub store: Arc<SessionStore>,
pub secure: bool,
}
#[derive(Clone)]
pub struct SessionHandle {
pub id: String,
inner: Arc<Mutex<SessionData>>,
dirty: Arc<Mutex<bool>>,
}
impl SessionHandle {
pub fn new(id: String, data: SessionData) -> Self {
Self {
id,
inner: Arc::new(Mutex::new(data)),
dirty: Arc::new(Mutex::new(false)),
}
}
pub fn read<R>(&self, f: impl FnOnce(&SessionData) -> R) -> R {
let guard = self.inner.lock();
f(&guard)
}
pub fn write<R>(&self, f: impl FnOnce(&mut SessionData) -> R) -> R {
let mut guard = self.inner.lock();
let out = f(&mut guard);
*self.dirty.lock() = true;
out
}
pub fn snapshot(&self) -> SessionData {
self.inner.lock().clone()
}
pub fn is_dirty(&self) -> bool {
*self.dirty.lock()
}
}
pub async fn session_layer(
State(state): State<SessionLayerState>,
mut req: Request,
next: Next,
) -> Response {
let cookie_id = extract_session_cookie(&req);
let (session_id, session_data, is_new) =
match cookie_id.and_then(|id| state.store.get(&id).map(|data| (id.clone(), data))) {
Some((id, data)) => (id, data, false),
None => {
let (id, data) = state.store.create();
(id, data, true)
}
};
let handle = SessionHandle::new(session_id.clone(), session_data);
req.extensions_mut().insert(handle.clone());
let mut resp = next.run(req).await;
if handle.is_dirty() {
state.store.save(&handle.id, handle.snapshot());
}
if is_new {
let cookie = build_session_cookie(&session_id, state.secure);
if let Ok(value) = HeaderValue::from_str(&cookie.to_string()) {
resp.headers_mut().append(header::SET_COOKIE, value);
}
}
resp
}
pub async fn csrf_layer(req: Request, next: Next) -> Response {
const CSRF_HEADER_ALIAS: &str = "x-csrftoken";
let method = req.method().clone();
let needs_check = matches!(
method,
axum::http::Method::POST
| axum::http::Method::PUT
| axum::http::Method::PATCH
| axum::http::Method::DELETE
);
if !needs_check {
return next.run(req).await;
}
let is_ui = req.uri().path().starts_with("/ui/")
|| req.uri().path() == "/ui"
|| req.uri().path() == "/login"
|| req.uri().path() == "/logout";
if !is_ui {
return next.run(req).await;
}
let handle = match req.extensions().get::<SessionHandle>() {
Some(h) => h.clone(),
None => return (StatusCode::FORBIDDEN, "Missing session").into_response(),
};
let expected = handle.read(|s| s.csrf_token.clone());
let header_token = req
.headers()
.get(CSRF_HEADER_NAME)
.or_else(|| req.headers().get(CSRF_HEADER_ALIAS))
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
if let Some(token) = header_token.as_deref() {
if csrf_tokens_match(&expected, token) {
return next.run(req).await;
}
}
let content_type = req
.headers()
.get(header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
let (parts, body) = req.into_parts();
let bytes = match axum::body::to_bytes(body, usize::MAX).await {
Ok(b) => b,
Err(_) => return (StatusCode::BAD_REQUEST, "Body read failed").into_response(),
};
let form_token = if content_type.starts_with("application/x-www-form-urlencoded") {
extract_form_token(&bytes)
} else if content_type.starts_with("multipart/form-data") {
extract_multipart_token(&content_type, &bytes)
} else {
None
};
if let Some(token) = form_token {
if csrf_tokens_match(&expected, &token) {
let req = Request::from_parts(parts, axum::body::Body::from(bytes));
return next.run(req).await;
}
}
tracing::warn!(
path = %parts.uri.path(),
content_type = %content_type,
expected_len = expected.len(),
header_present = header_token.is_some(),
"CSRF token mismatch"
);
(StatusCode::FORBIDDEN, "Invalid CSRF token").into_response()
}
fn extract_multipart_token(content_type: &str, body: &[u8]) -> Option<String> {
let boundary = multer::parse_boundary(content_type).ok()?;
let prefix = format!("--{}", boundary);
let text = std::str::from_utf8(body).ok()?;
let needle = "name=\"csrf_token\"";
let idx = text.find(needle)?;
let after = &text[idx + needle.len()..];
let body_start = after.find("\r\n\r\n")? + 4;
let tail = &after[body_start..];
let end = tail
.find(&format!("\r\n--{}", prefix.trim_start_matches("--")))
.or_else(|| tail.find("\r\n--"))
.unwrap_or(tail.len());
Some(tail[..end].trim().to_string())
}
fn extract_session_cookie(req: &Request) -> Option<String> {
let raw = req.headers().get(header::COOKIE)?.to_str().ok()?;
for pair in raw.split(';') {
if let Ok(cookie) = Cookie::parse(pair.trim().to_string()) {
if cookie.name() == SESSION_COOKIE_NAME {
return Some(cookie.value().to_string());
}
}
}
None
}
fn build_session_cookie(id: &str, secure: bool) -> Cookie<'static> {
let mut cookie = Cookie::new(SESSION_COOKIE_NAME, id.to_string());
cookie.set_http_only(true);
cookie.set_same_site(SameSite::Lax);
cookie.set_secure(secure);
cookie.set_path("/");
cookie
}
fn extract_form_token(body: &[u8]) -> Option<String> {
let text = std::str::from_utf8(body).ok()?;
let prefix = format!("{}=", CSRF_FIELD_NAME);
for pair in text.split('&') {
if let Some(rest) = pair.strip_prefix(&prefix) {
return urldecode(rest);
}
}
None
}
fn urldecode(s: &str) -> Option<String> {
percent_encoding::percent_decode_str(&s.replace('+', " "))
.decode_utf8()
.ok()
.map(|c| c.into_owned())
}

View File

@@ -1,105 +0,0 @@
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoggingConfiguration {
pub target_bucket: String,
#[serde(default)]
pub target_prefix: String,
#[serde(default = "default_enabled")]
pub enabled: bool,
}
fn default_enabled() -> bool {
true
}
#[derive(Serialize, Deserialize)]
struct StoredLoggingFile {
#[serde(rename = "LoggingEnabled")]
logging_enabled: Option<StoredLoggingEnabled>,
}
#[derive(Serialize, Deserialize)]
struct StoredLoggingEnabled {
#[serde(rename = "TargetBucket")]
target_bucket: String,
#[serde(rename = "TargetPrefix", default)]
target_prefix: String,
}
pub struct AccessLoggingService {
storage_root: PathBuf,
cache: RwLock<HashMap<String, Option<LoggingConfiguration>>>,
}
impl AccessLoggingService {
pub fn new(storage_root: &Path) -> Self {
Self {
storage_root: storage_root.to_path_buf(),
cache: RwLock::new(HashMap::new()),
}
}
fn config_path(&self, bucket: &str) -> PathBuf {
self.storage_root
.join(".myfsio.sys")
.join("buckets")
.join(bucket)
.join("logging.json")
}
pub fn get(&self, bucket: &str) -> Option<LoggingConfiguration> {
if let Some(cached) = self.cache.read().get(bucket).cloned() {
return cached;
}
let path = self.config_path(bucket);
let config = if path.exists() {
std::fs::read_to_string(&path)
.ok()
.and_then(|s| serde_json::from_str::<StoredLoggingFile>(&s).ok())
.and_then(|f| f.logging_enabled)
.map(|e| LoggingConfiguration {
target_bucket: e.target_bucket,
target_prefix: e.target_prefix,
enabled: true,
})
} else {
None
};
self.cache
.write()
.insert(bucket.to_string(), config.clone());
config
}
pub fn set(&self, bucket: &str, config: LoggingConfiguration) -> std::io::Result<()> {
let path = self.config_path(bucket);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let stored = StoredLoggingFile {
logging_enabled: Some(StoredLoggingEnabled {
target_bucket: config.target_bucket.clone(),
target_prefix: config.target_prefix.clone(),
}),
};
let json = serde_json::to_string_pretty(&stored)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
std::fs::write(&path, json)?;
self.cache.write().insert(bucket.to_string(), Some(config));
Ok(())
}
pub fn delete(&self, bucket: &str) {
let path = self.config_path(bucket);
if path.exists() {
let _ = std::fs::remove_file(&path);
}
self.cache.write().insert(bucket.to_string(), None);
}
}

Some files were not shown because too many files have changed in this diff Show More