Compare commits
26 Commits
v0.2.5
...
4c661477d5
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c661477d5 | |||
| f3f52f14a5 | |||
| d19ba3e305 | |||
| c627f41f53 | |||
| bcad0cd3da | |||
| 67f057ca1c | |||
| 01e79e6993 | |||
| 1e3c4b545f | |||
| 4ecd32a554 | |||
| aa6d7c4d28 | |||
| 6e6d6d32bf | |||
| 54705ab9c4 | |||
| 77a46d0725 | |||
| 0f750b9d89 | |||
| e0dee9db36 | |||
| 126657c99f | |||
| 07fb1ac773 | |||
| 147962e1dd | |||
| 2643a79121 | |||
| e9a035827b | |||
| 033b8a82be | |||
| e76c311231 | |||
| cbdf1a27c8 | |||
| 4a60cb269a | |||
| ebe7f6222d | |||
| 70b61fd8e6 |
22
Dockerfile
22
Dockerfile
@@ -1,25 +1,33 @@
|
|||||||
# syntax=docker/dockerfile:1.7
|
FROM python:3.14.3-slim
|
||||||
FROM python:3.12.12-slim
|
|
||||||
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install build deps for any wheels that need compilation, then clean up
|
RUN apt-get update \
|
||||||
RUN apt-get update \
|
&& apt-get install -y --no-install-recommends build-essential curl \
|
||||||
&& apt-get install -y --no-install-recommends build-essential \
|
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
COPY requirements.txt ./
|
COPY requirements.txt ./
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Make entrypoint executable
|
RUN pip install --no-cache-dir maturin \
|
||||||
|
&& cd myfsio_core \
|
||||||
|
&& maturin build --release \
|
||||||
|
&& pip install target/wheels/*.whl \
|
||||||
|
&& cd .. \
|
||||||
|
&& rm -rf myfsio_core/target \
|
||||||
|
&& pip uninstall -y maturin \
|
||||||
|
&& rustup self uninstall -y
|
||||||
|
|
||||||
RUN chmod +x docker-entrypoint.sh
|
RUN chmod +x docker-entrypoint.sh
|
||||||
|
|
||||||
# Create data directory and set permissions
|
|
||||||
RUN mkdir -p /app/data \
|
RUN mkdir -p /app/data \
|
||||||
&& useradd -m -u 1000 myfsio \
|
&& useradd -m -u 1000 myfsio \
|
||||||
&& chown -R myfsio:myfsio /app
|
&& chown -R myfsio:myfsio /app
|
||||||
|
|||||||
172
app/__init__.py
172
app/__init__.py
@@ -1,6 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import html as html_module
|
||||||
import logging
|
import logging
|
||||||
|
import mimetypes
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@@ -10,7 +12,7 @@ from pathlib import Path
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
|
from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from flask_wtf.csrf import CSRFError
|
from flask_wtf.csrf import CSRFError
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
@@ -32,8 +34,9 @@ from .object_lock import ObjectLockService
|
|||||||
from .replication import ReplicationManager
|
from .replication import ReplicationManager
|
||||||
from .secret_store import EphemeralSecretStore
|
from .secret_store import EphemeralSecretStore
|
||||||
from .site_registry import SiteRegistry, SiteInfo
|
from .site_registry import SiteRegistry, SiteInfo
|
||||||
from .storage import ObjectStorage
|
from .storage import ObjectStorage, StorageError
|
||||||
from .version import get_version
|
from .version import get_version
|
||||||
|
from .website_domains import WebsiteDomainStore
|
||||||
|
|
||||||
|
|
||||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||||
@@ -223,6 +226,19 @@ def create_app(
|
|||||||
app.extensions["access_logging"] = access_logging_service
|
app.extensions["access_logging"] = access_logging_service
|
||||||
app.extensions["site_registry"] = site_registry
|
app.extensions["site_registry"] = site_registry
|
||||||
|
|
||||||
|
website_domains_store = None
|
||||||
|
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
website_domains_path = config_dir / "website_domains.json"
|
||||||
|
website_domains_store = WebsiteDomainStore(website_domains_path)
|
||||||
|
app.extensions["website_domains"] = website_domains_store
|
||||||
|
|
||||||
|
from .s3_client import S3ProxyClient
|
||||||
|
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
|
||||||
|
app.extensions["s3_proxy"] = S3ProxyClient(
|
||||||
|
api_base_url=api_base,
|
||||||
|
region=app.config.get("AWS_REGION", "us-east-1"),
|
||||||
|
)
|
||||||
|
|
||||||
operation_metrics_collector = None
|
operation_metrics_collector = None
|
||||||
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
||||||
operation_metrics_collector = OperationMetricsCollector(
|
operation_metrics_collector = OperationMetricsCollector(
|
||||||
@@ -263,11 +279,37 @@ def create_app(
|
|||||||
|
|
||||||
@app.errorhandler(500)
|
@app.errorhandler(500)
|
||||||
def internal_error(error):
|
def internal_error(error):
|
||||||
return render_template('500.html'), 500
|
wants_html = request.accept_mimetypes.accept_html
|
||||||
|
path = request.path or ""
|
||||||
|
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||||
|
return render_template('500.html'), 500
|
||||||
|
error_xml = (
|
||||||
|
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||||
|
'<Error>'
|
||||||
|
'<Code>InternalError</Code>'
|
||||||
|
'<Message>An internal server error occurred</Message>'
|
||||||
|
f'<Resource>{path}</Resource>'
|
||||||
|
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||||
|
'</Error>'
|
||||||
|
)
|
||||||
|
return error_xml, 500, {'Content-Type': 'application/xml'}
|
||||||
|
|
||||||
@app.errorhandler(CSRFError)
|
@app.errorhandler(CSRFError)
|
||||||
def handle_csrf_error(e):
|
def handle_csrf_error(e):
|
||||||
return render_template('csrf_error.html', reason=e.description), 400
|
wants_html = request.accept_mimetypes.accept_html
|
||||||
|
path = request.path or ""
|
||||||
|
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||||
|
return render_template('csrf_error.html', reason=e.description), 400
|
||||||
|
error_xml = (
|
||||||
|
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||||
|
'<Error>'
|
||||||
|
'<Code>CSRFError</Code>'
|
||||||
|
f'<Message>{e.description}</Message>'
|
||||||
|
f'<Resource>{path}</Resource>'
|
||||||
|
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||||
|
'</Error>'
|
||||||
|
)
|
||||||
|
return error_xml, 400, {'Content-Type': 'application/xml'}
|
||||||
|
|
||||||
@app.template_filter("filesizeformat")
|
@app.template_filter("filesizeformat")
|
||||||
def filesizeformat(value: int) -> str:
|
def filesizeformat(value: int) -> str:
|
||||||
@@ -439,6 +481,128 @@ def _configure_logging(app: Flask) -> None:
|
|||||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@app.before_request
|
||||||
|
def _maybe_serve_website():
|
||||||
|
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
|
||||||
|
return None
|
||||||
|
if request.method not in {"GET", "HEAD"}:
|
||||||
|
return None
|
||||||
|
host = request.host
|
||||||
|
if ":" in host:
|
||||||
|
host = host.rsplit(":", 1)[0]
|
||||||
|
host = host.lower()
|
||||||
|
store = app.extensions.get("website_domains")
|
||||||
|
if not store:
|
||||||
|
return None
|
||||||
|
bucket = store.get_bucket(host)
|
||||||
|
if not bucket:
|
||||||
|
return None
|
||||||
|
storage = app.extensions["object_storage"]
|
||||||
|
if not storage.bucket_exists(bucket):
|
||||||
|
return _website_error_response(404, "Not Found")
|
||||||
|
website_config = storage.get_bucket_website(bucket)
|
||||||
|
if not website_config:
|
||||||
|
return _website_error_response(404, "Not Found")
|
||||||
|
index_doc = website_config.get("index_document", "index.html")
|
||||||
|
error_doc = website_config.get("error_document")
|
||||||
|
req_path = request.path.lstrip("/")
|
||||||
|
if not req_path or req_path.endswith("/"):
|
||||||
|
object_key = req_path + index_doc
|
||||||
|
else:
|
||||||
|
object_key = req_path
|
||||||
|
try:
|
||||||
|
obj_path = storage.get_object_path(bucket, object_key)
|
||||||
|
except (StorageError, OSError):
|
||||||
|
if object_key == req_path:
|
||||||
|
try:
|
||||||
|
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
|
||||||
|
object_key = req_path + "/" + index_doc
|
||||||
|
except (StorageError, OSError):
|
||||||
|
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||||
|
else:
|
||||||
|
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||||
|
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||||
|
is_encrypted = False
|
||||||
|
try:
|
||||||
|
metadata = storage.get_object_metadata(bucket, object_key)
|
||||||
|
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||||
|
except (StorageError, OSError):
|
||||||
|
pass
|
||||||
|
if request.method == "HEAD":
|
||||||
|
response = Response(status=200)
|
||||||
|
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||||
|
try:
|
||||||
|
data, _ = storage.get_object_data(bucket, object_key)
|
||||||
|
response.headers["Content-Length"] = len(data)
|
||||||
|
except (StorageError, OSError):
|
||||||
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
stat = obj_path.stat()
|
||||||
|
response.headers["Content-Length"] = stat.st_size
|
||||||
|
except OSError:
|
||||||
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
response.headers["Content-Type"] = content_type
|
||||||
|
return response
|
||||||
|
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||||
|
try:
|
||||||
|
data, _ = storage.get_object_data(bucket, object_key)
|
||||||
|
response = Response(data, mimetype=content_type)
|
||||||
|
response.headers["Content-Length"] = len(data)
|
||||||
|
return response
|
||||||
|
except (StorageError, OSError):
|
||||||
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
def _stream(file_path):
|
||||||
|
with file_path.open("rb") as f:
|
||||||
|
while True:
|
||||||
|
chunk = f.read(65536)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
yield chunk
|
||||||
|
try:
|
||||||
|
stat = obj_path.stat()
|
||||||
|
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||||
|
response.headers["Content-Length"] = stat.st_size
|
||||||
|
return response
|
||||||
|
except OSError:
|
||||||
|
return _website_error_response(500, "Internal Server Error")
|
||||||
|
|
||||||
|
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
||||||
|
if not error_doc_key:
|
||||||
|
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
|
||||||
|
try:
|
||||||
|
obj_path = storage.get_object_path(bucket, error_doc_key)
|
||||||
|
except (StorageError, OSError):
|
||||||
|
return _website_error_response(status_code, "Not Found")
|
||||||
|
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
|
||||||
|
is_encrypted = False
|
||||||
|
try:
|
||||||
|
metadata = storage.get_object_metadata(bucket, error_doc_key)
|
||||||
|
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||||
|
except (StorageError, OSError):
|
||||||
|
pass
|
||||||
|
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||||
|
try:
|
||||||
|
data, _ = storage.get_object_data(bucket, error_doc_key)
|
||||||
|
response = Response(data, status=status_code, mimetype=content_type)
|
||||||
|
response.headers["Content-Length"] = len(data)
|
||||||
|
return response
|
||||||
|
except (StorageError, OSError):
|
||||||
|
return _website_error_response(status_code, "Not Found")
|
||||||
|
try:
|
||||||
|
data = obj_path.read_bytes()
|
||||||
|
response = Response(data, status=status_code, mimetype=content_type)
|
||||||
|
response.headers["Content-Length"] = len(data)
|
||||||
|
return response
|
||||||
|
except OSError:
|
||||||
|
return _website_error_response(status_code, "Not Found")
|
||||||
|
|
||||||
|
def _website_error_response(status_code, message):
|
||||||
|
safe_msg = html_module.escape(str(message))
|
||||||
|
safe_code = html_module.escape(str(status_code))
|
||||||
|
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
|
||||||
|
return Response(body, status=status_code, mimetype="text/html")
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def _log_request_end(response):
|
def _log_request_end(response):
|
||||||
duration_ms = 0.0
|
duration_ms = 0.0
|
||||||
|
|||||||
108
app/admin_api.py
108
app/admin_api.py
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
@@ -16,6 +17,7 @@ from .extensions import limiter
|
|||||||
from .iam import IamError, Principal
|
from .iam import IamError, Principal
|
||||||
from .replication import ReplicationManager
|
from .replication import ReplicationManager
|
||||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
||||||
|
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
|
||||||
|
|
||||||
|
|
||||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||||
@@ -354,6 +356,10 @@ def update_peer_site(site_id: str):
|
|||||||
if region_error:
|
if region_error:
|
||||||
return _json_error("ValidationError", region_error, 400)
|
return _json_error("ValidationError", region_error, 400)
|
||||||
|
|
||||||
|
if "connection_id" in payload:
|
||||||
|
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
|
||||||
|
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
|
||||||
|
|
||||||
peer = PeerSite(
|
peer = PeerSite(
|
||||||
site_id=site_id,
|
site_id=site_id,
|
||||||
endpoint=payload.get("endpoint", existing.endpoint),
|
endpoint=payload.get("endpoint", existing.endpoint),
|
||||||
@@ -668,3 +674,105 @@ def check_bidirectional_status(site_id: str):
|
|||||||
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
|
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
|
||||||
|
|
||||||
return jsonify(result)
|
return jsonify(result)
|
||||||
|
|
||||||
|
|
||||||
|
def _website_domains() -> WebsiteDomainStore:
|
||||||
|
return current_app.extensions["website_domains"]
|
||||||
|
|
||||||
|
|
||||||
|
def _storage():
|
||||||
|
return current_app.extensions["object_storage"]
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/website-domains", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def list_website_domains():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
return jsonify(_website_domains().list_all())
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/website-domains", methods=["POST"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def create_website_domain():
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
domain = normalize_domain(payload.get("domain") or "")
|
||||||
|
bucket = (payload.get("bucket") or "").strip()
|
||||||
|
if not domain:
|
||||||
|
return _json_error("ValidationError", "domain is required", 400)
|
||||||
|
if not is_valid_domain(domain):
|
||||||
|
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
|
||||||
|
if not bucket:
|
||||||
|
return _json_error("ValidationError", "bucket is required", 400)
|
||||||
|
storage = _storage()
|
||||||
|
if not storage.bucket_exists(bucket):
|
||||||
|
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||||
|
store = _website_domains()
|
||||||
|
existing = store.get_bucket(domain)
|
||||||
|
if existing:
|
||||||
|
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
|
||||||
|
store.set_mapping(domain, bucket)
|
||||||
|
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
|
||||||
|
return jsonify({"domain": domain, "bucket": bucket}), 201
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def get_website_domain(domain: str):
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
domain = normalize_domain(domain)
|
||||||
|
bucket = _website_domains().get_bucket(domain)
|
||||||
|
if not bucket:
|
||||||
|
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||||
|
return jsonify({"domain": domain, "bucket": bucket})
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def update_website_domain(domain: str):
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
domain = normalize_domain(domain)
|
||||||
|
payload = request.get_json(silent=True) or {}
|
||||||
|
bucket = (payload.get("bucket") or "").strip()
|
||||||
|
if not bucket:
|
||||||
|
return _json_error("ValidationError", "bucket is required", 400)
|
||||||
|
storage = _storage()
|
||||||
|
if not storage.bucket_exists(bucket):
|
||||||
|
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||||
|
store = _website_domains()
|
||||||
|
if not store.get_bucket(domain):
|
||||||
|
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||||
|
store.set_mapping(domain, bucket)
|
||||||
|
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
|
||||||
|
return jsonify({"domain": domain, "bucket": bucket})
|
||||||
|
|
||||||
|
|
||||||
|
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
|
||||||
|
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||||
|
def delete_website_domain(domain: str):
|
||||||
|
principal, error = _require_admin()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
domain = normalize_domain(domain)
|
||||||
|
if not _website_domains().delete_mapping(domain):
|
||||||
|
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||||
|
logger.info("Website domain mapping deleted: %s", domain)
|
||||||
|
return Response(status=204)
|
||||||
|
|||||||
@@ -36,10 +36,11 @@ class GzipMiddleware:
|
|||||||
content_type = None
|
content_type = None
|
||||||
content_length = None
|
content_length = None
|
||||||
should_compress = False
|
should_compress = False
|
||||||
|
passthrough = False
|
||||||
exc_info_holder = [None]
|
exc_info_holder = [None]
|
||||||
|
|
||||||
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
||||||
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress
|
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
|
||||||
response_started = True
|
response_started = True
|
||||||
status_code = int(status.split(' ', 1)[0])
|
status_code = int(status.split(' ', 1)[0])
|
||||||
response_headers = list(headers)
|
response_headers = list(headers)
|
||||||
@@ -50,18 +51,32 @@ class GzipMiddleware:
|
|||||||
if name_lower == 'content-type':
|
if name_lower == 'content-type':
|
||||||
content_type = value.split(';')[0].strip().lower()
|
content_type = value.split(';')[0].strip().lower()
|
||||||
elif name_lower == 'content-length':
|
elif name_lower == 'content-length':
|
||||||
content_length = int(value)
|
try:
|
||||||
|
content_length = int(value)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
elif name_lower == 'content-encoding':
|
elif name_lower == 'content-encoding':
|
||||||
should_compress = False
|
passthrough = True
|
||||||
|
return start_response(status, headers, exc_info)
|
||||||
|
elif name_lower == 'x-stream-response':
|
||||||
|
passthrough = True
|
||||||
return start_response(status, headers, exc_info)
|
return start_response(status, headers, exc_info)
|
||||||
|
|
||||||
if content_type and content_type in COMPRESSIBLE_MIMES:
|
if content_type and content_type in COMPRESSIBLE_MIMES:
|
||||||
if content_length is None or content_length >= self.min_size:
|
if content_length is None or content_length >= self.min_size:
|
||||||
should_compress = True
|
should_compress = True
|
||||||
|
else:
|
||||||
|
passthrough = True
|
||||||
|
return start_response(status, headers, exc_info)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
response_body = b''.join(self.app(environ, custom_start_response))
|
app_iter = self.app(environ, custom_start_response)
|
||||||
|
|
||||||
|
if passthrough:
|
||||||
|
return app_iter
|
||||||
|
|
||||||
|
response_body = b''.join(app_iter)
|
||||||
|
|
||||||
if not response_started:
|
if not response_started:
|
||||||
return [response_body]
|
return [response_body]
|
||||||
|
|||||||
@@ -149,6 +149,7 @@ class AppConfig:
|
|||||||
num_trusted_proxies: int
|
num_trusted_proxies: int
|
||||||
allowed_redirect_hosts: list[str]
|
allowed_redirect_hosts: list[str]
|
||||||
allow_internal_endpoints: bool
|
allow_internal_endpoints: bool
|
||||||
|
website_hosting_enabled: bool
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||||
@@ -317,6 +318,7 @@ class AppConfig:
|
|||||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
||||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
||||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
return cls(storage_root=storage_root,
|
return cls(storage_root=storage_root,
|
||||||
max_upload_size=max_upload_size,
|
max_upload_size=max_upload_size,
|
||||||
@@ -403,7 +405,8 @@ class AppConfig:
|
|||||||
ratelimit_admin=ratelimit_admin,
|
ratelimit_admin=ratelimit_admin,
|
||||||
num_trusted_proxies=num_trusted_proxies,
|
num_trusted_proxies=num_trusted_proxies,
|
||||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
allowed_redirect_hosts=allowed_redirect_hosts,
|
||||||
allow_internal_endpoints=allow_internal_endpoints)
|
allow_internal_endpoints=allow_internal_endpoints,
|
||||||
|
website_hosting_enabled=website_hosting_enabled)
|
||||||
|
|
||||||
def validate_and_report(self) -> list[str]:
|
def validate_and_report(self) -> list[str]:
|
||||||
"""Validate configuration and return a list of warnings/issues.
|
"""Validate configuration and return a list of warnings/issues.
|
||||||
@@ -509,6 +512,8 @@ class AppConfig:
|
|||||||
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
||||||
if self.kms_enabled:
|
if self.kms_enabled:
|
||||||
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
||||||
|
if self.website_hosting_enabled:
|
||||||
|
print(f" WEBSITE_HOSTING: Enabled")
|
||||||
def _auto(flag: bool) -> str:
|
def _auto(flag: bool) -> str:
|
||||||
return " (auto)" if flag else ""
|
return " (auto)" if flag else ""
|
||||||
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
|
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
|
||||||
@@ -611,4 +616,5 @@ class AppConfig:
|
|||||||
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
|
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
|
||||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
||||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
||||||
|
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -270,9 +270,15 @@ class EncryptedObjectStorage:
|
|||||||
|
|
||||||
def get_bucket_quota(self, bucket_name: str):
|
def get_bucket_quota(self, bucket_name: str):
|
||||||
return self.storage.get_bucket_quota(bucket_name)
|
return self.storage.get_bucket_quota(bucket_name)
|
||||||
|
|
||||||
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
||||||
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
||||||
|
|
||||||
|
def get_bucket_website(self, bucket_name: str):
|
||||||
|
return self.storage.get_bucket_website(bucket_name)
|
||||||
|
|
||||||
|
def set_bucket_website(self, bucket_name: str, website_config):
|
||||||
|
return self.storage.set_bucket_website(bucket_name, website_config)
|
||||||
|
|
||||||
def _compute_etag(self, path: Path) -> str:
|
def _compute_etag(self, path: Path) -> str:
|
||||||
return self.storage._compute_etag(path)
|
return self.storage._compute_etag(path)
|
||||||
|
|||||||
26
app/iam.py
26
app/iam.py
@@ -309,6 +309,18 @@ class IamService:
|
|||||||
if not self._is_allowed(principal, normalized, action):
|
if not self._is_allowed(principal, normalized, action):
|
||||||
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
|
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
|
||||||
|
|
||||||
|
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
|
||||||
|
self._maybe_reload()
|
||||||
|
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
|
||||||
|
normalized_actions = {a: self._normalize_action(a) for a in actions}
|
||||||
|
results: Dict[str, bool] = {}
|
||||||
|
for original, canonical in normalized_actions.items():
|
||||||
|
if canonical not in ALLOWED_ACTIONS:
|
||||||
|
results[original] = False
|
||||||
|
else:
|
||||||
|
results[original] = self._is_allowed(principal, bucket_name, canonical)
|
||||||
|
return results
|
||||||
|
|
||||||
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
|
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
|
||||||
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
|
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
|
||||||
|
|
||||||
@@ -529,11 +541,13 @@ class IamService:
|
|||||||
return candidate if candidate in ALLOWED_ACTIONS else ""
|
return candidate if candidate in ALLOWED_ACTIONS else ""
|
||||||
|
|
||||||
def _write_default(self) -> None:
|
def _write_default(self) -> None:
|
||||||
|
access_key = secrets.token_hex(12)
|
||||||
|
secret_key = secrets.token_urlsafe(32)
|
||||||
default = {
|
default = {
|
||||||
"users": [
|
"users": [
|
||||||
{
|
{
|
||||||
"access_key": "localadmin",
|
"access_key": access_key,
|
||||||
"secret_key": "localadmin",
|
"secret_key": secret_key,
|
||||||
"display_name": "Local Admin",
|
"display_name": "Local Admin",
|
||||||
"policies": [
|
"policies": [
|
||||||
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
|
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
|
||||||
@@ -542,6 +556,14 @@ class IamService:
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
self.config_path.write_text(json.dumps(default, indent=2))
|
self.config_path.write_text(json.dumps(default, indent=2))
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f"Access Key: {access_key}")
|
||||||
|
print(f"Secret Key: {secret_key}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f"Missed this? Check: {self.config_path}")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
def _generate_access_key(self) -> str:
|
def _generate_access_key(self) -> str:
|
||||||
return secrets.token_hex(8)
|
return secrets.token_hex(8)
|
||||||
|
|||||||
30
app/kms.py
30
app/kms.py
@@ -160,6 +160,7 @@ class KMSManager:
|
|||||||
self.generate_data_key_max_bytes = generate_data_key_max_bytes
|
self.generate_data_key_max_bytes = generate_data_key_max_bytes
|
||||||
self._keys: Dict[str, KMSKey] = {}
|
self._keys: Dict[str, KMSKey] = {}
|
||||||
self._master_key: bytes | None = None
|
self._master_key: bytes | None = None
|
||||||
|
self._master_aesgcm: AESGCM | None = None
|
||||||
self._loaded = False
|
self._loaded = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -191,6 +192,7 @@ class KMSManager:
|
|||||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||||
else:
|
else:
|
||||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||||
|
self._master_aesgcm = AESGCM(self._master_key)
|
||||||
return self._master_key
|
return self._master_key
|
||||||
|
|
||||||
def _load_keys(self) -> None:
|
def _load_keys(self) -> None:
|
||||||
@@ -231,18 +233,16 @@ class KMSManager:
|
|||||||
_set_secure_file_permissions(self.keys_path)
|
_set_secure_file_permissions(self.keys_path)
|
||||||
|
|
||||||
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
||||||
"""Encrypt key material with the master key."""
|
_ = self.master_key
|
||||||
aesgcm = AESGCM(self.master_key)
|
|
||||||
nonce = secrets.token_bytes(12)
|
nonce = secrets.token_bytes(12)
|
||||||
ciphertext = aesgcm.encrypt(nonce, key_material, None)
|
ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
|
||||||
return nonce + ciphertext
|
return nonce + ciphertext
|
||||||
|
|
||||||
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
||||||
"""Decrypt key material with the master key."""
|
_ = self.master_key
|
||||||
aesgcm = AESGCM(self.master_key)
|
|
||||||
nonce = encrypted[:12]
|
nonce = encrypted[:12]
|
||||||
ciphertext = encrypted[12:]
|
ciphertext = encrypted[12:]
|
||||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
return self._master_aesgcm.decrypt(nonce, ciphertext, None)
|
||||||
|
|
||||||
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
||||||
"""Create a new KMS key."""
|
"""Create a new KMS key."""
|
||||||
@@ -404,22 +404,6 @@ class KMSManager:
|
|||||||
plaintext, _ = self.decrypt(encrypted_key, context)
|
plaintext, _ = self.decrypt(encrypted_key, context)
|
||||||
return plaintext
|
return plaintext
|
||||||
|
|
||||||
def get_provider(self, key_id: str | None = None) -> KMSEncryptionProvider:
|
|
||||||
"""Get an encryption provider for a specific key."""
|
|
||||||
self._load_keys()
|
|
||||||
|
|
||||||
if key_id is None:
|
|
||||||
if not self._keys:
|
|
||||||
key = self.create_key("Default KMS Key")
|
|
||||||
key_id = key.key_id
|
|
||||||
else:
|
|
||||||
key_id = next(iter(self._keys.keys()))
|
|
||||||
|
|
||||||
if key_id not in self._keys:
|
|
||||||
raise EncryptionError(f"Key not found: {key_id}")
|
|
||||||
|
|
||||||
return KMSEncryptionProvider(self, key_id)
|
|
||||||
|
|
||||||
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
||||||
source_context: Dict[str, str] | None = None,
|
source_context: Dict[str, str] | None = None,
|
||||||
destination_context: Dict[str, str] | None = None) -> bytes:
|
destination_context: Dict[str, str] | None = None) -> bytes:
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
@@ -9,6 +10,8 @@ from datetime import datetime, timezone
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
MAX_LATENCY_SAMPLES = 5000
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -22,6 +25,17 @@ class OperationStats:
|
|||||||
latency_max_ms: float = 0.0
|
latency_max_ms: float = 0.0
|
||||||
bytes_in: int = 0
|
bytes_in: int = 0
|
||||||
bytes_out: int = 0
|
bytes_out: int = 0
|
||||||
|
latency_samples: List[float] = field(default_factory=list)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _compute_percentile(sorted_data: List[float], p: float) -> float:
|
||||||
|
if not sorted_data:
|
||||||
|
return 0.0
|
||||||
|
k = (len(sorted_data) - 1) * (p / 100.0)
|
||||||
|
f = int(k)
|
||||||
|
c = min(f + 1, len(sorted_data) - 1)
|
||||||
|
d = k - f
|
||||||
|
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
|
||||||
|
|
||||||
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
||||||
self.count += 1
|
self.count += 1
|
||||||
@@ -36,10 +50,17 @@ class OperationStats:
|
|||||||
self.latency_max_ms = latency_ms
|
self.latency_max_ms = latency_ms
|
||||||
self.bytes_in += bytes_in
|
self.bytes_in += bytes_in
|
||||||
self.bytes_out += bytes_out
|
self.bytes_out += bytes_out
|
||||||
|
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
|
||||||
|
self.latency_samples.append(latency_ms)
|
||||||
|
else:
|
||||||
|
j = random.randint(0, self.count - 1)
|
||||||
|
if j < MAX_LATENCY_SAMPLES:
|
||||||
|
self.latency_samples[j] = latency_ms
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
||||||
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
||||||
|
sorted_latencies = sorted(self.latency_samples)
|
||||||
return {
|
return {
|
||||||
"count": self.count,
|
"count": self.count,
|
||||||
"success_count": self.success_count,
|
"success_count": self.success_count,
|
||||||
@@ -47,6 +68,9 @@ class OperationStats:
|
|||||||
"latency_avg_ms": round(avg_latency, 2),
|
"latency_avg_ms": round(avg_latency, 2),
|
||||||
"latency_min_ms": round(min_latency, 2),
|
"latency_min_ms": round(min_latency, 2),
|
||||||
"latency_max_ms": round(self.latency_max_ms, 2),
|
"latency_max_ms": round(self.latency_max_ms, 2),
|
||||||
|
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
|
||||||
|
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
|
||||||
|
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
|
||||||
"bytes_in": self.bytes_in,
|
"bytes_in": self.bytes_in,
|
||||||
"bytes_out": self.bytes_out,
|
"bytes_out": self.bytes_out,
|
||||||
}
|
}
|
||||||
@@ -62,6 +86,11 @@ class OperationStats:
|
|||||||
self.latency_max_ms = other.latency_max_ms
|
self.latency_max_ms = other.latency_max_ms
|
||||||
self.bytes_in += other.bytes_in
|
self.bytes_in += other.bytes_in
|
||||||
self.bytes_out += other.bytes_out
|
self.bytes_out += other.bytes_out
|
||||||
|
combined = self.latency_samples + other.latency_samples
|
||||||
|
if len(combined) > MAX_LATENCY_SAMPLES:
|
||||||
|
random.shuffle(combined)
|
||||||
|
combined = combined[:MAX_LATENCY_SAMPLES]
|
||||||
|
self.latency_samples = combined
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -176,11 +176,12 @@ class ReplicationFailureStore:
|
|||||||
self.storage_root = storage_root
|
self.storage_root = storage_root
|
||||||
self.max_failures_per_bucket = max_failures_per_bucket
|
self.max_failures_per_bucket = max_failures_per_bucket
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
self._cache: Dict[str, List[ReplicationFailure]] = {}
|
||||||
|
|
||||||
def _get_failures_path(self, bucket_name: str) -> Path:
|
def _get_failures_path(self, bucket_name: str) -> Path:
|
||||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
||||||
|
|
||||||
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return []
|
return []
|
||||||
@@ -192,7 +193,7 @@ class ReplicationFailureStore:
|
|||||||
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
|
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
|
||||||
@@ -202,6 +203,18 @@ class ReplicationFailureStore:
|
|||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
||||||
|
|
||||||
|
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||||
|
if bucket_name in self._cache:
|
||||||
|
return list(self._cache[bucket_name])
|
||||||
|
failures = self._load_from_disk(bucket_name)
|
||||||
|
self._cache[bucket_name] = failures
|
||||||
|
return list(failures)
|
||||||
|
|
||||||
|
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||||
|
trimmed = failures[:self.max_failures_per_bucket]
|
||||||
|
self._cache[bucket_name] = trimmed
|
||||||
|
self._save_to_disk(bucket_name, trimmed)
|
||||||
|
|
||||||
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
failures = self.load_failures(bucket_name)
|
failures = self.load_failures(bucket_name)
|
||||||
@@ -227,6 +240,7 @@ class ReplicationFailureStore:
|
|||||||
|
|
||||||
def clear_failures(self, bucket_name: str) -> None:
|
def clear_failures(self, bucket_name: str) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
self._cache.pop(bucket_name, None)
|
||||||
path = self._get_failures_path(bucket_name)
|
path = self._get_failures_path(bucket_name)
|
||||||
if path.exists():
|
if path.exists():
|
||||||
path.unlink()
|
path.unlink()
|
||||||
|
|||||||
156
app/s3_api.py
156
app/s3_api.py
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import re
|
import re
|
||||||
@@ -16,6 +17,13 @@ from urllib.parse import quote, urlencode, urlparse, unquote
|
|||||||
from xml.etree.ElementTree import Element, SubElement, tostring, ParseError
|
from xml.etree.ElementTree import Element, SubElement, tostring, ParseError
|
||||||
from defusedxml.ElementTree import fromstring
|
from defusedxml.ElementTree import fromstring
|
||||||
|
|
||||||
|
try:
|
||||||
|
import myfsio_core as _rc
|
||||||
|
_HAS_RUST = True
|
||||||
|
except ImportError:
|
||||||
|
_rc = None
|
||||||
|
_HAS_RUST = False
|
||||||
|
|
||||||
from flask import Blueprint, Response, current_app, jsonify, request, g
|
from flask import Blueprint, Response, current_app, jsonify, request, g
|
||||||
from werkzeug.http import http_date
|
from werkzeug.http import http_date
|
||||||
|
|
||||||
@@ -191,11 +199,16 @@ _SIGNING_KEY_CACHE_MAX_SIZE = 256
|
|||||||
|
|
||||||
|
|
||||||
def clear_signing_key_cache() -> None:
|
def clear_signing_key_cache() -> None:
|
||||||
|
if _HAS_RUST:
|
||||||
|
_rc.clear_signing_key_cache()
|
||||||
with _SIGNING_KEY_CACHE_LOCK:
|
with _SIGNING_KEY_CACHE_LOCK:
|
||||||
_SIGNING_KEY_CACHE.clear()
|
_SIGNING_KEY_CACHE.clear()
|
||||||
|
|
||||||
|
|
||||||
def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes:
|
def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes:
|
||||||
|
if _HAS_RUST:
|
||||||
|
return bytes(_rc.derive_signing_key(key, date_stamp, region_name, service_name))
|
||||||
|
|
||||||
cache_key = (key, date_stamp, region_name, service_name)
|
cache_key = (key, date_stamp, region_name, service_name)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
@@ -313,9 +326,13 @@ def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
|||||||
raise IamError("Required headers not signed")
|
raise IamError("Required headers not signed")
|
||||||
|
|
||||||
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
||||||
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
|
|
||||||
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
||||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
if _HAS_RUST:
|
||||||
|
string_to_sign = _rc.build_string_to_sign(amz_date, credential_scope, canonical_request)
|
||||||
|
calculated_signature = _rc.compute_signature(signing_key, string_to_sign)
|
||||||
|
else:
|
||||||
|
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
|
||||||
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
if not hmac.compare_digest(calculated_signature, signature):
|
if not hmac.compare_digest(calculated_signature, signature):
|
||||||
if current_app.config.get("DEBUG_SIGV4"):
|
if current_app.config.get("DEBUG_SIGV4"):
|
||||||
@@ -399,18 +416,15 @@ def _verify_sigv4_query(req: Any) -> Principal | None:
|
|||||||
payload_hash
|
payload_hash
|
||||||
])
|
])
|
||||||
|
|
||||||
algorithm = "AWS4-HMAC-SHA256"
|
|
||||||
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
||||||
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
|
||||||
string_to_sign = "\n".join([
|
|
||||||
algorithm,
|
|
||||||
amz_date,
|
|
||||||
credential_scope,
|
|
||||||
hashed_request
|
|
||||||
])
|
|
||||||
|
|
||||||
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
||||||
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
if _HAS_RUST:
|
||||||
|
string_to_sign = _rc.build_string_to_sign(amz_date, credential_scope, canonical_request)
|
||||||
|
calculated_signature = _rc.compute_signature(signing_key, string_to_sign)
|
||||||
|
else:
|
||||||
|
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
||||||
|
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}"
|
||||||
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
if not hmac.compare_digest(calculated_signature, signature):
|
if not hmac.compare_digest(calculated_signature, signature):
|
||||||
raise IamError("SignatureDoesNotMatch")
|
raise IamError("SignatureDoesNotMatch")
|
||||||
@@ -999,12 +1013,14 @@ def _apply_object_headers(
|
|||||||
etag: str,
|
etag: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
if file_stat is not None:
|
if file_stat is not None:
|
||||||
response.headers["Content-Length"] = str(file_stat.st_size)
|
if response.status_code != 206:
|
||||||
|
response.headers["Content-Length"] = str(file_stat.st_size)
|
||||||
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
|
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
|
||||||
response.headers["ETag"] = f'"{etag}"'
|
response.headers["ETag"] = f'"{etag}"'
|
||||||
response.headers["Accept-Ranges"] = "bytes"
|
response.headers["Accept-Ranges"] = "bytes"
|
||||||
for key, value in (metadata or {}).items():
|
for key, value in (metadata or {}).items():
|
||||||
response.headers[f"X-Amz-Meta-{key}"] = value
|
safe_value = _sanitize_header_value(str(value))
|
||||||
|
response.headers[f"X-Amz-Meta-{key}"] = safe_value
|
||||||
|
|
||||||
|
|
||||||
def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
||||||
@@ -1024,6 +1040,7 @@ def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
|||||||
"uploads": _bucket_uploads_handler,
|
"uploads": _bucket_uploads_handler,
|
||||||
"policy": _bucket_policy_handler,
|
"policy": _bucket_policy_handler,
|
||||||
"replication": _bucket_replication_handler,
|
"replication": _bucket_replication_handler,
|
||||||
|
"website": _bucket_website_handler,
|
||||||
}
|
}
|
||||||
requested = [key for key in handlers if key in request.args]
|
requested = [key for key in handlers if key in request.args]
|
||||||
if not requested:
|
if not requested:
|
||||||
@@ -2342,10 +2359,12 @@ def _post_object(bucket_name: str) -> Response:
|
|||||||
success_action_redirect = request.form.get("success_action_redirect")
|
success_action_redirect = request.form.get("success_action_redirect")
|
||||||
if success_action_redirect:
|
if success_action_redirect:
|
||||||
allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", [])
|
allowed_hosts = current_app.config.get("ALLOWED_REDIRECT_HOSTS", [])
|
||||||
|
if not allowed_hosts:
|
||||||
|
allowed_hosts = [request.host]
|
||||||
parsed = urlparse(success_action_redirect)
|
parsed = urlparse(success_action_redirect)
|
||||||
if parsed.scheme not in ("http", "https"):
|
if parsed.scheme not in ("http", "https"):
|
||||||
return _error_response("InvalidArgument", "Redirect URL must use http or https", 400)
|
return _error_response("InvalidArgument", "Redirect URL must use http or https", 400)
|
||||||
if allowed_hosts and parsed.netloc not in allowed_hosts:
|
if parsed.netloc not in allowed_hosts:
|
||||||
return _error_response("InvalidArgument", "Redirect URL host not allowed", 400)
|
return _error_response("InvalidArgument", "Redirect URL host not allowed", 400)
|
||||||
redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}"
|
redirect_url = f"{success_action_redirect}?bucket={bucket_name}&key={quote(object_key)}&etag={meta.etag}"
|
||||||
return Response(status=303, headers={"Location": redirect_url})
|
return Response(status=303, headers={"Location": redirect_url})
|
||||||
@@ -2773,9 +2792,14 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
except StorageError as exc:
|
except StorageError as exc:
|
||||||
return _error_response("InternalError", str(exc), 500)
|
return _error_response("InternalError", str(exc), 500)
|
||||||
else:
|
else:
|
||||||
stat = path.stat()
|
try:
|
||||||
file_size = stat.st_size
|
stat = path.stat()
|
||||||
etag = storage._compute_etag(path)
|
file_size = stat.st_size
|
||||||
|
etag = metadata.get("__etag__") or storage._compute_etag(path)
|
||||||
|
except PermissionError:
|
||||||
|
return _error_response("AccessDenied", "Permission denied accessing object", 403)
|
||||||
|
except OSError as exc:
|
||||||
|
return _error_response("InternalError", f"Failed to access object: {exc}", 500)
|
||||||
|
|
||||||
if range_header:
|
if range_header:
|
||||||
try:
|
try:
|
||||||
@@ -2816,13 +2840,22 @@ def object_handler(bucket_name: str, object_key: str):
|
|||||||
except StorageError as exc:
|
except StorageError as exc:
|
||||||
return _error_response("InternalError", str(exc), 500)
|
return _error_response("InternalError", str(exc), 500)
|
||||||
else:
|
else:
|
||||||
stat = path.stat()
|
try:
|
||||||
response = Response(status=200)
|
stat = path.stat()
|
||||||
etag = storage._compute_etag(path)
|
response = Response(status=200)
|
||||||
|
etag = metadata.get("__etag__") or storage._compute_etag(path)
|
||||||
|
except PermissionError:
|
||||||
|
return _error_response("AccessDenied", "Permission denied accessing object", 403)
|
||||||
|
except OSError as exc:
|
||||||
|
return _error_response("InternalError", f"Failed to access object: {exc}", 500)
|
||||||
response.headers["Content-Type"] = mimetype
|
response.headers["Content-Type"] = mimetype
|
||||||
logged_bytes = 0
|
logged_bytes = 0
|
||||||
|
|
||||||
_apply_object_headers(response, file_stat=path.stat() if not is_encrypted else None, metadata=metadata, etag=etag)
|
try:
|
||||||
|
file_stat = path.stat() if not is_encrypted else None
|
||||||
|
except (PermissionError, OSError):
|
||||||
|
file_stat = None
|
||||||
|
_apply_object_headers(response, file_stat=file_stat, metadata=metadata, etag=etag)
|
||||||
|
|
||||||
if request.method == "GET":
|
if request.method == "GET":
|
||||||
response_overrides = {
|
response_overrides = {
|
||||||
@@ -2945,7 +2978,11 @@ def _bucket_policy_handler(bucket_name: str) -> Response:
|
|||||||
store.delete_policy(bucket_name)
|
store.delete_policy(bucket_name)
|
||||||
current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name})
|
current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name})
|
||||||
return Response(status=204)
|
return Response(status=204)
|
||||||
payload = request.get_json(silent=True)
|
raw_body = request.get_data(cache=False) or b""
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw_body)
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
|
||||||
if not payload:
|
if not payload:
|
||||||
return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
|
return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
|
||||||
try:
|
try:
|
||||||
@@ -3037,6 +3074,79 @@ def _parse_replication_config(bucket_name: str, payload: bytes):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_website_handler(bucket_name: str) -> Response:
|
||||||
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
||||||
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
||||||
|
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||||
|
return _error_response("InvalidRequest", "Website hosting is not enabled", 400)
|
||||||
|
principal, error = _require_principal()
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
try:
|
||||||
|
_authorize_action(principal, bucket_name, "policy")
|
||||||
|
except IamError as exc:
|
||||||
|
return _error_response("AccessDenied", str(exc), 403)
|
||||||
|
storage = _storage()
|
||||||
|
if request.method == "GET":
|
||||||
|
try:
|
||||||
|
config = storage.get_bucket_website(bucket_name)
|
||||||
|
except StorageError as exc:
|
||||||
|
return _error_response("NoSuchBucket", str(exc), 404)
|
||||||
|
if not config:
|
||||||
|
return _error_response("NoSuchWebsiteConfiguration", "The specified bucket does not have a website configuration", 404)
|
||||||
|
root = Element("WebsiteConfiguration")
|
||||||
|
root.set("xmlns", S3_NS)
|
||||||
|
index_doc = config.get("index_document")
|
||||||
|
if index_doc:
|
||||||
|
idx_el = SubElement(root, "IndexDocument")
|
||||||
|
SubElement(idx_el, "Suffix").text = index_doc
|
||||||
|
error_doc = config.get("error_document")
|
||||||
|
if error_doc:
|
||||||
|
err_el = SubElement(root, "ErrorDocument")
|
||||||
|
SubElement(err_el, "Key").text = error_doc
|
||||||
|
return _xml_response(root)
|
||||||
|
if request.method == "DELETE":
|
||||||
|
try:
|
||||||
|
storage.set_bucket_website(bucket_name, None)
|
||||||
|
except StorageError as exc:
|
||||||
|
return _error_response("NoSuchBucket", str(exc), 404)
|
||||||
|
current_app.logger.info("Bucket website config deleted", extra={"bucket": bucket_name})
|
||||||
|
return Response(status=204)
|
||||||
|
ct_error = _require_xml_content_type()
|
||||||
|
if ct_error:
|
||||||
|
return ct_error
|
||||||
|
payload = request.get_data(cache=False) or b""
|
||||||
|
if not payload.strip():
|
||||||
|
return _error_response("MalformedXML", "Request body is required", 400)
|
||||||
|
try:
|
||||||
|
root = _parse_xml_with_limit(payload)
|
||||||
|
except ParseError:
|
||||||
|
return _error_response("MalformedXML", "Unable to parse XML document", 400)
|
||||||
|
if _strip_ns(root.tag) != "WebsiteConfiguration":
|
||||||
|
return _error_response("MalformedXML", "Root element must be WebsiteConfiguration", 400)
|
||||||
|
index_el = _find_element(root, "IndexDocument")
|
||||||
|
if index_el is None:
|
||||||
|
return _error_response("InvalidArgument", "IndexDocument is required", 400)
|
||||||
|
suffix_el = _find_element(index_el, "Suffix")
|
||||||
|
if suffix_el is None or not (suffix_el.text or "").strip():
|
||||||
|
return _error_response("InvalidArgument", "IndexDocument Suffix is required", 400)
|
||||||
|
index_suffix = suffix_el.text.strip()
|
||||||
|
if "/" in index_suffix:
|
||||||
|
return _error_response("InvalidArgument", "IndexDocument Suffix must not contain '/'", 400)
|
||||||
|
website_config: Dict[str, Any] = {"index_document": index_suffix}
|
||||||
|
error_el = _find_element(root, "ErrorDocument")
|
||||||
|
if error_el is not None:
|
||||||
|
key_el = _find_element(error_el, "Key")
|
||||||
|
if key_el is not None and (key_el.text or "").strip():
|
||||||
|
website_config["error_document"] = key_el.text.strip()
|
||||||
|
try:
|
||||||
|
storage.set_bucket_website(bucket_name, website_config)
|
||||||
|
except StorageError as exc:
|
||||||
|
return _error_response("NoSuchBucket", str(exc), 404)
|
||||||
|
current_app.logger.info("Bucket website config updated", extra={"bucket": bucket_name, "index": index_suffix})
|
||||||
|
return Response(status=200)
|
||||||
|
|
||||||
|
|
||||||
def _parse_destination_arn(arn: str) -> tuple:
|
def _parse_destination_arn(arn: str) -> tuple:
|
||||||
if not arn.startswith("arn:aws:s3:::"):
|
if not arn.startswith("arn:aws:s3:::"):
|
||||||
raise ValueError(f"Invalid ARN format: {arn}")
|
raise ValueError(f"Invalid ARN format: {arn}")
|
||||||
|
|||||||
284
app/s3_client.py
Normal file
284
app/s3_client.py
Normal file
@@ -0,0 +1,284 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import Any, Generator, Optional
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
from botocore.config import Config
|
||||||
|
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
|
||||||
|
from flask import current_app, session
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
|
||||||
|
|
||||||
|
_BOTO_ERROR_MAP = {
|
||||||
|
"NoSuchBucket": 404,
|
||||||
|
"NoSuchKey": 404,
|
||||||
|
"NoSuchUpload": 404,
|
||||||
|
"BucketAlreadyExists": 409,
|
||||||
|
"BucketAlreadyOwnedByYou": 409,
|
||||||
|
"BucketNotEmpty": 409,
|
||||||
|
"AccessDenied": 403,
|
||||||
|
"InvalidAccessKeyId": 403,
|
||||||
|
"SignatureDoesNotMatch": 403,
|
||||||
|
"InvalidBucketName": 400,
|
||||||
|
"InvalidArgument": 400,
|
||||||
|
"MalformedXML": 400,
|
||||||
|
"EntityTooLarge": 400,
|
||||||
|
"QuotaExceeded": 403,
|
||||||
|
}
|
||||||
|
|
||||||
|
_UPLOAD_REGISTRY_MAX_AGE = 86400
|
||||||
|
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
|
||||||
|
|
||||||
|
|
||||||
|
class UploadRegistry:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._entries: dict[str, tuple[str, str, float]] = {}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._last_cleanup = time.monotonic()
|
||||||
|
|
||||||
|
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
|
||||||
|
self._maybe_cleanup()
|
||||||
|
|
||||||
|
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
|
||||||
|
with self._lock:
|
||||||
|
entry = self._entries.get(upload_id)
|
||||||
|
if entry is None:
|
||||||
|
return None
|
||||||
|
stored_bucket, key, created_at = entry
|
||||||
|
if stored_bucket != bucket_name:
|
||||||
|
return None
|
||||||
|
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
|
||||||
|
del self._entries[upload_id]
|
||||||
|
return None
|
||||||
|
return key
|
||||||
|
|
||||||
|
def remove(self, upload_id: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._entries.pop(upload_id, None)
|
||||||
|
|
||||||
|
def _maybe_cleanup(self) -> None:
|
||||||
|
now = time.monotonic()
|
||||||
|
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
|
||||||
|
return
|
||||||
|
self._last_cleanup = now
|
||||||
|
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
|
||||||
|
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
|
||||||
|
for uid in stale:
|
||||||
|
del self._entries[uid]
|
||||||
|
|
||||||
|
|
||||||
|
class S3ProxyClient:
|
||||||
|
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
|
||||||
|
if not api_base_url:
|
||||||
|
raise ValueError("api_base_url is required for S3ProxyClient")
|
||||||
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
self._region = region
|
||||||
|
self.upload_registry = UploadRegistry()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def api_base_url(self) -> str:
|
||||||
|
return self._api_base_url
|
||||||
|
|
||||||
|
def get_client(self, access_key: str, secret_key: str) -> Any:
|
||||||
|
if not access_key or not secret_key:
|
||||||
|
raise ValueError("Both access_key and secret_key are required")
|
||||||
|
config = Config(
|
||||||
|
user_agent_extra=UI_PROXY_USER_AGENT,
|
||||||
|
connect_timeout=5,
|
||||||
|
read_timeout=30,
|
||||||
|
retries={"max_attempts": 0},
|
||||||
|
signature_version="s3v4",
|
||||||
|
s3={"addressing_style": "path"},
|
||||||
|
request_checksum_calculation="when_required",
|
||||||
|
response_checksum_validation="when_required",
|
||||||
|
)
|
||||||
|
return boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=self._api_base_url,
|
||||||
|
aws_access_key_id=access_key,
|
||||||
|
aws_secret_access_key=secret_key,
|
||||||
|
region_name=self._region,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_proxy() -> S3ProxyClient:
|
||||||
|
proxy = current_app.extensions.get("s3_proxy")
|
||||||
|
if proxy is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
|
||||||
|
)
|
||||||
|
return proxy
|
||||||
|
|
||||||
|
|
||||||
|
def _get_session_creds() -> tuple[str, str]:
|
||||||
|
secret_store = current_app.extensions["secret_store"]
|
||||||
|
secret_store.purge_expired()
|
||||||
|
token = session.get("cred_token")
|
||||||
|
if not token:
|
||||||
|
raise PermissionError("Not authenticated")
|
||||||
|
creds = secret_store.peek(token)
|
||||||
|
if not creds:
|
||||||
|
raise PermissionError("Session expired")
|
||||||
|
access_key = creds.get("access_key", "")
|
||||||
|
secret_key = creds.get("secret_key", "")
|
||||||
|
if not access_key or not secret_key:
|
||||||
|
raise PermissionError("Invalid session credentials")
|
||||||
|
return access_key, secret_key
|
||||||
|
|
||||||
|
|
||||||
|
def get_session_s3_client() -> Any:
|
||||||
|
proxy = _get_proxy()
|
||||||
|
access_key, secret_key = _get_session_creds()
|
||||||
|
return proxy.get_client(access_key, secret_key)
|
||||||
|
|
||||||
|
|
||||||
|
def get_upload_registry() -> UploadRegistry:
|
||||||
|
return _get_proxy().upload_registry
|
||||||
|
|
||||||
|
|
||||||
|
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
|
||||||
|
error_info = exc.response.get("Error", {})
|
||||||
|
code = error_info.get("Code", "InternalError")
|
||||||
|
message = error_info.get("Message") or "S3 operation failed"
|
||||||
|
http_status = _BOTO_ERROR_MAP.get(code)
|
||||||
|
if http_status is None:
|
||||||
|
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
|
||||||
|
return {"error": message}, http_status
|
||||||
|
|
||||||
|
|
||||||
|
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
|
||||||
|
logger.error("S3 API connection failed: %s", exc)
|
||||||
|
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
|
||||||
|
|
||||||
|
|
||||||
|
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
|
||||||
|
from .ui import _format_datetime_display
|
||||||
|
return _format_datetime_display(dt, display_tz)
|
||||||
|
|
||||||
|
|
||||||
|
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
|
||||||
|
from .ui import _format_datetime_iso
|
||||||
|
return _format_datetime_iso(dt, display_tz)
|
||||||
|
|
||||||
|
|
||||||
|
def build_url_templates(bucket_name: str) -> dict[str, str]:
|
||||||
|
from flask import url_for
|
||||||
|
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
restore_t = url_for(
|
||||||
|
"ui.restore_object_version",
|
||||||
|
bucket_name=bucket_name,
|
||||||
|
object_key="KEY_PLACEHOLDER",
|
||||||
|
version_id="VERSION_ID_PLACEHOLDER",
|
||||||
|
)
|
||||||
|
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||||
|
return {
|
||||||
|
"preview": preview_t,
|
||||||
|
"download": preview_t + "?download=1",
|
||||||
|
"presign": presign_t,
|
||||||
|
"delete": delete_t,
|
||||||
|
"versions": versions_t,
|
||||||
|
"restore": restore_t,
|
||||||
|
"tags": tags_t,
|
||||||
|
"copy": copy_t,
|
||||||
|
"move": move_t,
|
||||||
|
"metadata": metadata_t,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def translate_list_objects(
|
||||||
|
boto3_response: dict[str, Any],
|
||||||
|
url_templates: dict[str, str],
|
||||||
|
display_tz: str = "UTC",
|
||||||
|
versioning_enabled: bool = False,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
objects_data = []
|
||||||
|
for obj in boto3_response.get("Contents", []):
|
||||||
|
last_mod = obj["LastModified"]
|
||||||
|
objects_data.append({
|
||||||
|
"key": obj["Key"],
|
||||||
|
"size": obj["Size"],
|
||||||
|
"last_modified": last_mod.isoformat(),
|
||||||
|
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||||
|
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||||
|
"etag": obj.get("ETag", "").strip('"'),
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"objects": objects_data,
|
||||||
|
"is_truncated": boto3_response.get("IsTruncated", False),
|
||||||
|
"next_continuation_token": boto3_response.get("NextContinuationToken"),
|
||||||
|
"total_count": boto3_response.get("KeyCount", len(objects_data)),
|
||||||
|
"versioning_enabled": versioning_enabled,
|
||||||
|
"url_templates": url_templates,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
|
||||||
|
try:
|
||||||
|
resp = client.get_bucket_versioning(Bucket=bucket_name)
|
||||||
|
return resp.get("Status") == "Enabled"
|
||||||
|
except ClientError as exc:
|
||||||
|
code = exc.response.get("Error", {}).get("Code", "")
|
||||||
|
if code != "NoSuchBucket":
|
||||||
|
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def stream_objects_ndjson(
|
||||||
|
client: Any,
|
||||||
|
bucket_name: str,
|
||||||
|
prefix: Optional[str],
|
||||||
|
url_templates: dict[str, str],
|
||||||
|
display_tz: str = "UTC",
|
||||||
|
versioning_enabled: bool = False,
|
||||||
|
) -> Generator[str, None, None]:
|
||||||
|
meta_line = json.dumps({
|
||||||
|
"type": "meta",
|
||||||
|
"versioning_enabled": versioning_enabled,
|
||||||
|
"url_templates": url_templates,
|
||||||
|
}) + "\n"
|
||||||
|
yield meta_line
|
||||||
|
|
||||||
|
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
||||||
|
|
||||||
|
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||||
|
if prefix:
|
||||||
|
kwargs["Prefix"] = prefix
|
||||||
|
|
||||||
|
try:
|
||||||
|
paginator = client.get_paginator("list_objects_v2")
|
||||||
|
for page in paginator.paginate(**kwargs):
|
||||||
|
for obj in page.get("Contents", []):
|
||||||
|
last_mod = obj["LastModified"]
|
||||||
|
yield json.dumps({
|
||||||
|
"type": "object",
|
||||||
|
"key": obj["Key"],
|
||||||
|
"size": obj["Size"],
|
||||||
|
"last_modified": last_mod.isoformat(),
|
||||||
|
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||||
|
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||||
|
"etag": obj.get("ETag", "").strip('"'),
|
||||||
|
}) + "\n"
|
||||||
|
except ClientError as exc:
|
||||||
|
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||||
|
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||||
|
return
|
||||||
|
except (EndpointConnectionError, ConnectionClosedError):
|
||||||
|
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
|
||||||
|
return
|
||||||
|
|
||||||
|
yield json.dumps({"type": "done"}) + "\n"
|
||||||
@@ -18,6 +18,18 @@ class EphemeralSecretStore:
|
|||||||
self._store[token] = (payload, expires_at)
|
self._store[token] = (payload, expires_at)
|
||||||
return token
|
return token
|
||||||
|
|
||||||
|
def peek(self, token: str | None) -> Any | None:
|
||||||
|
if not token:
|
||||||
|
return None
|
||||||
|
entry = self._store.get(token)
|
||||||
|
if not entry:
|
||||||
|
return None
|
||||||
|
payload, expires_at = entry
|
||||||
|
if expires_at < time.time():
|
||||||
|
self._store.pop(token, None)
|
||||||
|
return None
|
||||||
|
return payload
|
||||||
|
|
||||||
def pop(self, token: str | None) -> Any | None:
|
def pop(self, token: str | None) -> Any | None:
|
||||||
if not token:
|
if not token:
|
||||||
return None
|
return None
|
||||||
|
|||||||
475
app/storage.py
475
app/storage.py
@@ -11,12 +11,20 @@ import time
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
import uuid
|
import uuid
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Dict, Generator, List, Optional
|
from typing import Any, BinaryIO, Dict, Generator, List, Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
import myfsio_core as _rc
|
||||||
|
_HAS_RUST = True
|
||||||
|
except ImportError:
|
||||||
|
_rc = None
|
||||||
|
_HAS_RUST = False
|
||||||
|
|
||||||
# Platform-specific file locking
|
# Platform-specific file locking
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
import msvcrt
|
import msvcrt
|
||||||
@@ -177,7 +185,7 @@ class ObjectStorage:
|
|||||||
self.root = Path(root)
|
self.root = Path(root)
|
||||||
self.root.mkdir(parents=True, exist_ok=True)
|
self.root.mkdir(parents=True, exist_ok=True)
|
||||||
self._ensure_system_roots()
|
self._ensure_system_roots()
|
||||||
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float]] = OrderedDict()
|
self._object_cache: OrderedDict[str, tuple[Dict[str, ObjectMeta], float, float]] = OrderedDict()
|
||||||
self._cache_lock = threading.Lock()
|
self._cache_lock = threading.Lock()
|
||||||
self._bucket_locks: Dict[str, threading.Lock] = {}
|
self._bucket_locks: Dict[str, threading.Lock] = {}
|
||||||
self._cache_version: Dict[str, int] = {}
|
self._cache_version: Dict[str, int] = {}
|
||||||
@@ -186,6 +194,9 @@ class ObjectStorage:
|
|||||||
self._cache_ttl = cache_ttl
|
self._cache_ttl = cache_ttl
|
||||||
self._object_cache_max_size = object_cache_max_size
|
self._object_cache_max_size = object_cache_max_size
|
||||||
self._object_key_max_length_bytes = object_key_max_length_bytes
|
self._object_key_max_length_bytes = object_key_max_length_bytes
|
||||||
|
self._sorted_key_cache: Dict[str, tuple[list[str], int]] = {}
|
||||||
|
self._meta_index_locks: Dict[str, threading.Lock] = {}
|
||||||
|
self._cleanup_executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="ParentCleanup")
|
||||||
|
|
||||||
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
|
def _get_bucket_lock(self, bucket_id: str) -> threading.Lock:
|
||||||
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
|
"""Get or create a lock for a specific bucket. Reduces global lock contention."""
|
||||||
@@ -216,6 +227,11 @@ class ObjectStorage:
|
|||||||
raise BucketNotFoundError("Bucket does not exist")
|
raise BucketNotFoundError("Bucket does not exist")
|
||||||
|
|
||||||
def _validate_bucket_name(self, bucket_name: str) -> None:
|
def _validate_bucket_name(self, bucket_name: str) -> None:
|
||||||
|
if _HAS_RUST:
|
||||||
|
error = _rc.validate_bucket_name(bucket_name)
|
||||||
|
if error:
|
||||||
|
raise StorageError(error)
|
||||||
|
return
|
||||||
if len(bucket_name) < 3 or len(bucket_name) > 63:
|
if len(bucket_name) < 3 or len(bucket_name) > 63:
|
||||||
raise StorageError("Bucket name must be between 3 and 63 characters")
|
raise StorageError("Bucket name must be between 3 and 63 characters")
|
||||||
if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
|
if not re.match(r"^[a-z0-9][a-z0-9.-]*[a-z0-9]$", bucket_name):
|
||||||
@@ -243,10 +259,15 @@ class ObjectStorage:
|
|||||||
raise BucketNotFoundError("Bucket does not exist")
|
raise BucketNotFoundError("Bucket does not exist")
|
||||||
|
|
||||||
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
|
cache_path = self._system_bucket_root(bucket_name) / "stats.json"
|
||||||
|
cached_stats = None
|
||||||
|
cache_fresh = False
|
||||||
|
|
||||||
if cache_path.exists():
|
if cache_path.exists():
|
||||||
try:
|
try:
|
||||||
if time.time() - cache_path.stat().st_mtime < cache_ttl:
|
cache_fresh = time.time() - cache_path.stat().st_mtime < cache_ttl
|
||||||
return json.loads(cache_path.read_text(encoding="utf-8"))
|
cached_stats = json.loads(cache_path.read_text(encoding="utf-8"))
|
||||||
|
if cache_fresh:
|
||||||
|
return cached_stats
|
||||||
except (OSError, json.JSONDecodeError):
|
except (OSError, json.JSONDecodeError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -255,40 +276,50 @@ class ObjectStorage:
|
|||||||
version_count = 0
|
version_count = 0
|
||||||
version_bytes = 0
|
version_bytes = 0
|
||||||
|
|
||||||
for path in bucket_path.rglob("*"):
|
try:
|
||||||
if path.is_file():
|
for path in bucket_path.rglob("*"):
|
||||||
rel = path.relative_to(bucket_path)
|
|
||||||
if not rel.parts:
|
|
||||||
continue
|
|
||||||
top_folder = rel.parts[0]
|
|
||||||
if top_folder not in self.INTERNAL_FOLDERS:
|
|
||||||
stat = path.stat()
|
|
||||||
object_count += 1
|
|
||||||
total_bytes += stat.st_size
|
|
||||||
|
|
||||||
versions_root = self._bucket_versions_root(bucket_name)
|
|
||||||
if versions_root.exists():
|
|
||||||
for path in versions_root.rglob("*.bin"):
|
|
||||||
if path.is_file():
|
if path.is_file():
|
||||||
stat = path.stat()
|
rel = path.relative_to(bucket_path)
|
||||||
version_count += 1
|
if not rel.parts:
|
||||||
version_bytes += stat.st_size
|
continue
|
||||||
|
top_folder = rel.parts[0]
|
||||||
|
if top_folder not in self.INTERNAL_FOLDERS:
|
||||||
|
stat = path.stat()
|
||||||
|
object_count += 1
|
||||||
|
total_bytes += stat.st_size
|
||||||
|
|
||||||
|
versions_root = self._bucket_versions_root(bucket_name)
|
||||||
|
if versions_root.exists():
|
||||||
|
for path in versions_root.rglob("*.bin"):
|
||||||
|
if path.is_file():
|
||||||
|
stat = path.stat()
|
||||||
|
version_count += 1
|
||||||
|
version_bytes += stat.st_size
|
||||||
|
except OSError:
|
||||||
|
if cached_stats is not None:
|
||||||
|
return cached_stats
|
||||||
|
raise
|
||||||
|
|
||||||
|
existing_serial = 0
|
||||||
|
if cached_stats is not None:
|
||||||
|
existing_serial = cached_stats.get("_cache_serial", 0)
|
||||||
|
|
||||||
stats = {
|
stats = {
|
||||||
"objects": object_count,
|
"objects": object_count,
|
||||||
"bytes": total_bytes,
|
"bytes": total_bytes,
|
||||||
"version_count": version_count,
|
"version_count": version_count,
|
||||||
"version_bytes": version_bytes,
|
"version_bytes": version_bytes,
|
||||||
"total_objects": object_count + version_count,
|
"total_objects": object_count + version_count,
|
||||||
"total_bytes": total_bytes + version_bytes,
|
"total_bytes": total_bytes + version_bytes,
|
||||||
|
"_cache_serial": existing_serial,
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
cache_path.write_text(json.dumps(stats), encoding="utf-8")
|
cache_path.write_text(json.dumps(stats), encoding="utf-8")
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
|
def _invalidate_bucket_stats_cache(self, bucket_id: str) -> None:
|
||||||
@@ -299,6 +330,39 @@ class ObjectStorage:
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _update_bucket_stats_cache(
|
||||||
|
self,
|
||||||
|
bucket_id: str,
|
||||||
|
*,
|
||||||
|
bytes_delta: int = 0,
|
||||||
|
objects_delta: int = 0,
|
||||||
|
version_bytes_delta: int = 0,
|
||||||
|
version_count_delta: int = 0,
|
||||||
|
) -> None:
|
||||||
|
"""Incrementally update cached bucket statistics instead of invalidating.
|
||||||
|
|
||||||
|
This avoids expensive full directory scans on every PUT/DELETE by
|
||||||
|
adjusting the cached values directly. Also signals cross-process cache
|
||||||
|
invalidation by incrementing _cache_serial.
|
||||||
|
"""
|
||||||
|
cache_path = self._system_bucket_root(bucket_id) / "stats.json"
|
||||||
|
try:
|
||||||
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if cache_path.exists():
|
||||||
|
data = json.loads(cache_path.read_text(encoding="utf-8"))
|
||||||
|
else:
|
||||||
|
data = {"objects": 0, "bytes": 0, "version_count": 0, "version_bytes": 0, "total_objects": 0, "total_bytes": 0, "_cache_serial": 0}
|
||||||
|
data["objects"] = max(0, data.get("objects", 0) + objects_delta)
|
||||||
|
data["bytes"] = max(0, data.get("bytes", 0) + bytes_delta)
|
||||||
|
data["version_count"] = max(0, data.get("version_count", 0) + version_count_delta)
|
||||||
|
data["version_bytes"] = max(0, data.get("version_bytes", 0) + version_bytes_delta)
|
||||||
|
data["total_objects"] = max(0, data.get("total_objects", 0) + objects_delta + version_count_delta)
|
||||||
|
data["total_bytes"] = max(0, data.get("total_bytes", 0) + bytes_delta + version_bytes_delta)
|
||||||
|
data["_cache_serial"] = data.get("_cache_serial", 0) + 1
|
||||||
|
cache_path.write_text(json.dumps(data), encoding="utf-8")
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
def delete_bucket(self, bucket_name: str) -> None:
|
def delete_bucket(self, bucket_name: str) -> None:
|
||||||
bucket_path = self._bucket_path(bucket_name)
|
bucket_path = self._bucket_path(bucket_name)
|
||||||
if not bucket_path.exists():
|
if not bucket_path.exists():
|
||||||
@@ -333,22 +397,35 @@ class ObjectStorage:
|
|||||||
Returns:
|
Returns:
|
||||||
ListObjectsResult with objects, truncation status, and continuation token
|
ListObjectsResult with objects, truncation status, and continuation token
|
||||||
"""
|
"""
|
||||||
|
import bisect
|
||||||
|
|
||||||
bucket_path = self._bucket_path(bucket_name)
|
bucket_path = self._bucket_path(bucket_name)
|
||||||
if not bucket_path.exists():
|
if not bucket_path.exists():
|
||||||
raise BucketNotFoundError("Bucket does not exist")
|
raise BucketNotFoundError("Bucket does not exist")
|
||||||
bucket_id = bucket_path.name
|
bucket_id = bucket_path.name
|
||||||
|
|
||||||
object_cache = self._get_object_cache(bucket_id, bucket_path)
|
object_cache = self._get_object_cache(bucket_id, bucket_path)
|
||||||
|
|
||||||
all_keys = sorted(object_cache.keys())
|
cache_version = self._cache_version.get(bucket_id, 0)
|
||||||
|
cached_entry = self._sorted_key_cache.get(bucket_id)
|
||||||
|
if cached_entry and cached_entry[1] == cache_version:
|
||||||
|
all_keys = cached_entry[0]
|
||||||
|
else:
|
||||||
|
all_keys = sorted(object_cache.keys())
|
||||||
|
self._sorted_key_cache[bucket_id] = (all_keys, cache_version)
|
||||||
|
|
||||||
if prefix:
|
if prefix:
|
||||||
all_keys = [k for k in all_keys if k.startswith(prefix)]
|
lo = bisect.bisect_left(all_keys, prefix)
|
||||||
|
hi = len(all_keys)
|
||||||
|
for i in range(lo, len(all_keys)):
|
||||||
|
if not all_keys[i].startswith(prefix):
|
||||||
|
hi = i
|
||||||
|
break
|
||||||
|
all_keys = all_keys[lo:hi]
|
||||||
|
|
||||||
total_count = len(all_keys)
|
total_count = len(all_keys)
|
||||||
start_index = 0
|
start_index = 0
|
||||||
if continuation_token:
|
if continuation_token:
|
||||||
import bisect
|
|
||||||
start_index = bisect.bisect_right(all_keys, continuation_token)
|
start_index = bisect.bisect_right(all_keys, continuation_token)
|
||||||
if start_index >= total_count:
|
if start_index >= total_count:
|
||||||
return ListObjectsResult(
|
return ListObjectsResult(
|
||||||
@@ -356,8 +433,8 @@ class ObjectStorage:
|
|||||||
is_truncated=False,
|
is_truncated=False,
|
||||||
next_continuation_token=None,
|
next_continuation_token=None,
|
||||||
total_count=total_count,
|
total_count=total_count,
|
||||||
)
|
)
|
||||||
|
|
||||||
end_index = start_index + max_keys
|
end_index = start_index + max_keys
|
||||||
keys_slice = all_keys[start_index:end_index]
|
keys_slice = all_keys[start_index:end_index]
|
||||||
is_truncated = end_index < total_count
|
is_truncated = end_index < total_count
|
||||||
@@ -403,7 +480,9 @@ class ObjectStorage:
|
|||||||
is_overwrite = destination.exists()
|
is_overwrite = destination.exists()
|
||||||
existing_size = destination.stat().st_size if is_overwrite else 0
|
existing_size = destination.stat().st_size if is_overwrite else 0
|
||||||
|
|
||||||
|
archived_version_size = 0
|
||||||
if self._is_versioning_enabled(bucket_path) and is_overwrite:
|
if self._is_versioning_enabled(bucket_path) and is_overwrite:
|
||||||
|
archived_version_size = existing_size
|
||||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||||
|
|
||||||
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
|
tmp_dir = self._system_root_path() / self.SYSTEM_TMP_DIR
|
||||||
@@ -416,11 +495,10 @@ class ObjectStorage:
|
|||||||
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
shutil.copyfileobj(_HashingReader(stream, checksum), target)
|
||||||
|
|
||||||
new_size = tmp_path.stat().st_size
|
new_size = tmp_path.stat().st_size
|
||||||
|
size_delta = new_size - existing_size
|
||||||
|
object_delta = 0 if is_overwrite else 1
|
||||||
|
|
||||||
if enforce_quota:
|
if enforce_quota:
|
||||||
size_delta = new_size - existing_size
|
|
||||||
object_delta = 0 if is_overwrite else 1
|
|
||||||
|
|
||||||
quota_check = self.check_quota(
|
quota_check = self.check_quota(
|
||||||
bucket_name,
|
bucket_name,
|
||||||
additional_bytes=max(0, size_delta),
|
additional_bytes=max(0, size_delta),
|
||||||
@@ -432,7 +510,7 @@ class ObjectStorage:
|
|||||||
quota_check["quota"],
|
quota_check["quota"],
|
||||||
quota_check["usage"],
|
quota_check["usage"],
|
||||||
)
|
)
|
||||||
|
|
||||||
shutil.move(str(tmp_path), str(destination))
|
shutil.move(str(tmp_path), str(destination))
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -448,7 +526,13 @@ class ObjectStorage:
|
|||||||
combined_meta = {**internal_meta, **(metadata or {})}
|
combined_meta = {**internal_meta, **(metadata or {})}
|
||||||
self._write_metadata(bucket_id, safe_key, combined_meta)
|
self._write_metadata(bucket_id, safe_key, combined_meta)
|
||||||
|
|
||||||
self._invalidate_bucket_stats_cache(bucket_id)
|
self._update_bucket_stats_cache(
|
||||||
|
bucket_id,
|
||||||
|
bytes_delta=size_delta,
|
||||||
|
objects_delta=object_delta,
|
||||||
|
version_bytes_delta=archived_version_size,
|
||||||
|
version_count_delta=1 if archived_version_size > 0 else 0,
|
||||||
|
)
|
||||||
|
|
||||||
obj_meta = ObjectMeta(
|
obj_meta = ObjectMeta(
|
||||||
key=safe_key.as_posix(),
|
key=safe_key.as_posix(),
|
||||||
@@ -463,7 +547,7 @@ class ObjectStorage:
|
|||||||
|
|
||||||
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
|
def get_object_path(self, bucket_name: str, object_key: str) -> Path:
|
||||||
path = self._object_path(bucket_name, object_key)
|
path = self._object_path(bucket_name, object_key)
|
||||||
if not path.exists():
|
if not path.is_file():
|
||||||
raise ObjectNotFoundError("Object not found")
|
raise ObjectNotFoundError("Object not found")
|
||||||
return path
|
return path
|
||||||
|
|
||||||
@@ -475,11 +559,14 @@ class ObjectStorage:
|
|||||||
return self._read_metadata(bucket_path.name, safe_key) or {}
|
return self._read_metadata(bucket_path.name, safe_key) or {}
|
||||||
|
|
||||||
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
|
def _cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
|
||||||
"""Remove empty parent directories up to (but not including) stop_at.
|
"""Remove empty parent directories in a background thread.
|
||||||
|
|
||||||
On Windows/OneDrive, directories may be locked briefly after file deletion.
|
On Windows/OneDrive, directories may be locked briefly after file deletion.
|
||||||
This method retries with a small delay to handle that case.
|
Running this in the background avoids blocking the request thread with retries.
|
||||||
"""
|
"""
|
||||||
|
self._cleanup_executor.submit(self._do_cleanup_empty_parents, path, stop_at)
|
||||||
|
|
||||||
|
def _do_cleanup_empty_parents(self, path: Path, stop_at: Path) -> None:
|
||||||
for parent in path.parents:
|
for parent in path.parents:
|
||||||
if parent == stop_at:
|
if parent == stop_at:
|
||||||
break
|
break
|
||||||
@@ -487,7 +574,7 @@ class ObjectStorage:
|
|||||||
try:
|
try:
|
||||||
if parent.exists() and not any(parent.iterdir()):
|
if parent.exists() and not any(parent.iterdir()):
|
||||||
parent.rmdir()
|
parent.rmdir()
|
||||||
break
|
break
|
||||||
except OSError:
|
except OSError:
|
||||||
if attempt < 2:
|
if attempt < 2:
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
@@ -498,15 +585,24 @@ class ObjectStorage:
|
|||||||
path = self._object_path(bucket_name, object_key)
|
path = self._object_path(bucket_name, object_key)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return
|
return
|
||||||
|
deleted_size = path.stat().st_size
|
||||||
safe_key = path.relative_to(bucket_path)
|
safe_key = path.relative_to(bucket_path)
|
||||||
bucket_id = bucket_path.name
|
bucket_id = bucket_path.name
|
||||||
|
archived_version_size = 0
|
||||||
if self._is_versioning_enabled(bucket_path):
|
if self._is_versioning_enabled(bucket_path):
|
||||||
|
archived_version_size = deleted_size
|
||||||
self._archive_current_version(bucket_id, safe_key, reason="delete")
|
self._archive_current_version(bucket_id, safe_key, reason="delete")
|
||||||
rel = path.relative_to(bucket_path)
|
rel = path.relative_to(bucket_path)
|
||||||
self._safe_unlink(path)
|
self._safe_unlink(path)
|
||||||
self._delete_metadata(bucket_id, rel)
|
self._delete_metadata(bucket_id, rel)
|
||||||
|
|
||||||
self._invalidate_bucket_stats_cache(bucket_id)
|
self._update_bucket_stats_cache(
|
||||||
|
bucket_id,
|
||||||
|
bytes_delta=-deleted_size,
|
||||||
|
objects_delta=-1,
|
||||||
|
version_bytes_delta=archived_version_size,
|
||||||
|
version_count_delta=1 if archived_version_size > 0 else 0,
|
||||||
|
)
|
||||||
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
|
self._update_object_cache_entry(bucket_id, safe_key.as_posix(), None)
|
||||||
self._cleanup_empty_parents(path, bucket_path)
|
self._cleanup_empty_parents(path, bucket_path)
|
||||||
|
|
||||||
@@ -604,10 +700,19 @@ class ObjectStorage:
|
|||||||
return lifecycle if isinstance(lifecycle, list) else None
|
return lifecycle if isinstance(lifecycle, list) else None
|
||||||
|
|
||||||
def set_bucket_lifecycle(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
|
def set_bucket_lifecycle(self, bucket_name: str, rules: Optional[List[Dict[str, Any]]]) -> None:
|
||||||
"""Set lifecycle configuration for bucket."""
|
|
||||||
bucket_path = self._require_bucket_path(bucket_name)
|
bucket_path = self._require_bucket_path(bucket_name)
|
||||||
self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)
|
self._set_bucket_config_entry(bucket_path.name, "lifecycle", rules)
|
||||||
|
|
||||||
|
def get_bucket_website(self, bucket_name: str) -> Optional[Dict[str, Any]]:
|
||||||
|
bucket_path = self._require_bucket_path(bucket_name)
|
||||||
|
config = self._read_bucket_config(bucket_path.name)
|
||||||
|
website = config.get("website")
|
||||||
|
return website if isinstance(website, dict) else None
|
||||||
|
|
||||||
|
def set_bucket_website(self, bucket_name: str, website_config: Optional[Dict[str, Any]]) -> None:
|
||||||
|
bucket_path = self._require_bucket_path(bucket_name)
|
||||||
|
self._set_bucket_config_entry(bucket_path.name, "website", website_config)
|
||||||
|
|
||||||
def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
|
def get_bucket_quota(self, bucket_name: str) -> Dict[str, Any]:
|
||||||
"""Get quota configuration for bucket.
|
"""Get quota configuration for bucket.
|
||||||
|
|
||||||
@@ -733,6 +838,10 @@ class ObjectStorage:
|
|||||||
if not object_path.exists():
|
if not object_path.exists():
|
||||||
raise ObjectNotFoundError("Object does not exist")
|
raise ObjectNotFoundError("Object does not exist")
|
||||||
|
|
||||||
|
entry = self._read_index_entry(bucket_path.name, safe_key)
|
||||||
|
if entry is not None:
|
||||||
|
tags = entry.get("tags")
|
||||||
|
return tags if isinstance(tags, list) else []
|
||||||
for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
|
for meta_file in (self._metadata_file(bucket_path.name, safe_key), self._legacy_metadata_file(bucket_path.name, safe_key)):
|
||||||
if not meta_file.exists():
|
if not meta_file.exists():
|
||||||
continue
|
continue
|
||||||
@@ -756,30 +865,31 @@ class ObjectStorage:
|
|||||||
if not object_path.exists():
|
if not object_path.exists():
|
||||||
raise ObjectNotFoundError("Object does not exist")
|
raise ObjectNotFoundError("Object does not exist")
|
||||||
|
|
||||||
meta_file = self._metadata_file(bucket_path.name, safe_key)
|
bucket_id = bucket_path.name
|
||||||
|
existing_entry = self._read_index_entry(bucket_id, safe_key) or {}
|
||||||
existing_payload: Dict[str, Any] = {}
|
if not existing_entry:
|
||||||
if meta_file.exists():
|
meta_file = self._metadata_file(bucket_id, safe_key)
|
||||||
try:
|
if meta_file.exists():
|
||||||
existing_payload = json.loads(meta_file.read_text(encoding="utf-8"))
|
try:
|
||||||
except (OSError, json.JSONDecodeError):
|
existing_entry = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||||
pass
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
if tags:
|
if tags:
|
||||||
existing_payload["tags"] = tags
|
existing_entry["tags"] = tags
|
||||||
else:
|
else:
|
||||||
existing_payload.pop("tags", None)
|
existing_entry.pop("tags", None)
|
||||||
|
|
||||||
if existing_payload.get("metadata") or existing_payload.get("tags"):
|
if existing_entry.get("metadata") or existing_entry.get("tags"):
|
||||||
meta_file.parent.mkdir(parents=True, exist_ok=True)
|
self._write_index_entry(bucket_id, safe_key, existing_entry)
|
||||||
meta_file.write_text(json.dumps(existing_payload), encoding="utf-8")
|
else:
|
||||||
elif meta_file.exists():
|
self._delete_index_entry(bucket_id, safe_key)
|
||||||
meta_file.unlink()
|
old_meta = self._metadata_file(bucket_id, safe_key)
|
||||||
parent = meta_file.parent
|
try:
|
||||||
meta_root = self._bucket_meta_root(bucket_path.name)
|
if old_meta.exists():
|
||||||
while parent != meta_root and parent.exists() and not any(parent.iterdir()):
|
old_meta.unlink()
|
||||||
parent.rmdir()
|
except OSError:
|
||||||
parent = parent.parent
|
pass
|
||||||
|
|
||||||
def delete_object_tags(self, bucket_name: str, object_key: str) -> None:
|
def delete_object_tags(self, bucket_name: str, object_key: str) -> None:
|
||||||
"""Delete all tags from an object."""
|
"""Delete all tags from an object."""
|
||||||
@@ -828,7 +938,12 @@ class ObjectStorage:
|
|||||||
if not isinstance(metadata, dict):
|
if not isinstance(metadata, dict):
|
||||||
metadata = {}
|
metadata = {}
|
||||||
destination = bucket_path / safe_key
|
destination = bucket_path / safe_key
|
||||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
restored_size = data_path.stat().st_size
|
||||||
|
is_overwrite = destination.exists()
|
||||||
|
existing_size = destination.stat().st_size if is_overwrite else 0
|
||||||
|
archived_version_size = 0
|
||||||
|
if self._is_versioning_enabled(bucket_path) and is_overwrite:
|
||||||
|
archived_version_size = existing_size
|
||||||
self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
|
self._archive_current_version(bucket_id, safe_key, reason="restore-overwrite")
|
||||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
shutil.copy2(data_path, destination)
|
shutil.copy2(data_path, destination)
|
||||||
@@ -837,7 +952,13 @@ class ObjectStorage:
|
|||||||
else:
|
else:
|
||||||
self._delete_metadata(bucket_id, safe_key)
|
self._delete_metadata(bucket_id, safe_key)
|
||||||
stat = destination.stat()
|
stat = destination.stat()
|
||||||
self._invalidate_bucket_stats_cache(bucket_id)
|
self._update_bucket_stats_cache(
|
||||||
|
bucket_id,
|
||||||
|
bytes_delta=restored_size - existing_size,
|
||||||
|
objects_delta=0 if is_overwrite else 1,
|
||||||
|
version_bytes_delta=archived_version_size,
|
||||||
|
version_count_delta=1 if archived_version_size > 0 else 0,
|
||||||
|
)
|
||||||
return ObjectMeta(
|
return ObjectMeta(
|
||||||
key=safe_key.as_posix(),
|
key=safe_key.as_posix(),
|
||||||
size=stat.st_size,
|
size=stat.st_size,
|
||||||
@@ -861,6 +982,7 @@ class ObjectStorage:
|
|||||||
meta_path = legacy_version_dir / f"{version_id}.json"
|
meta_path = legacy_version_dir / f"{version_id}.json"
|
||||||
if not data_path.exists() and not meta_path.exists():
|
if not data_path.exists() and not meta_path.exists():
|
||||||
raise StorageError(f"Version {version_id} not found")
|
raise StorageError(f"Version {version_id} not found")
|
||||||
|
deleted_version_size = data_path.stat().st_size if data_path.exists() else 0
|
||||||
if data_path.exists():
|
if data_path.exists():
|
||||||
data_path.unlink()
|
data_path.unlink()
|
||||||
if meta_path.exists():
|
if meta_path.exists():
|
||||||
@@ -868,6 +990,12 @@ class ObjectStorage:
|
|||||||
parent = data_path.parent
|
parent = data_path.parent
|
||||||
if parent.exists() and not any(parent.iterdir()):
|
if parent.exists() and not any(parent.iterdir()):
|
||||||
parent.rmdir()
|
parent.rmdir()
|
||||||
|
if deleted_version_size > 0:
|
||||||
|
self._update_bucket_stats_cache(
|
||||||
|
bucket_id,
|
||||||
|
version_bytes_delta=-deleted_version_size,
|
||||||
|
version_count_delta=-1,
|
||||||
|
)
|
||||||
|
|
||||||
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
|
def list_orphaned_objects(self, bucket_name: str) -> List[Dict[str, Any]]:
|
||||||
bucket_path = self._bucket_path(bucket_name)
|
bucket_path = self._bucket_path(bucket_name)
|
||||||
@@ -1164,14 +1292,14 @@ class ObjectStorage:
|
|||||||
|
|
||||||
safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes)
|
safe_key = self._sanitize_object_key(manifest["object_key"], self._object_key_max_length_bytes)
|
||||||
destination = bucket_path / safe_key
|
destination = bucket_path / safe_key
|
||||||
|
|
||||||
is_overwrite = destination.exists()
|
is_overwrite = destination.exists()
|
||||||
existing_size = destination.stat().st_size if is_overwrite else 0
|
existing_size = destination.stat().st_size if is_overwrite else 0
|
||||||
|
size_delta = total_size - existing_size
|
||||||
|
object_delta = 0 if is_overwrite else 1
|
||||||
|
versioning_enabled = self._is_versioning_enabled(bucket_path)
|
||||||
|
|
||||||
if enforce_quota:
|
if enforce_quota:
|
||||||
size_delta = total_size - existing_size
|
|
||||||
object_delta = 0 if is_overwrite else 1
|
|
||||||
|
|
||||||
quota_check = self.check_quota(
|
quota_check = self.check_quota(
|
||||||
bucket_name,
|
bucket_name,
|
||||||
additional_bytes=max(0, size_delta),
|
additional_bytes=max(0, size_delta),
|
||||||
@@ -1183,14 +1311,16 @@ class ObjectStorage:
|
|||||||
quota_check["quota"],
|
quota_check["quota"],
|
||||||
quota_check["usage"],
|
quota_check["usage"],
|
||||||
)
|
)
|
||||||
|
|
||||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
|
lock_file_path = self._system_bucket_root(bucket_id) / "locks" / f"{safe_key.as_posix().replace('/', '_')}.lock"
|
||||||
|
|
||||||
|
archived_version_size = 0
|
||||||
try:
|
try:
|
||||||
with _atomic_lock_file(lock_file_path):
|
with _atomic_lock_file(lock_file_path):
|
||||||
if self._is_versioning_enabled(bucket_path) and destination.exists():
|
if versioning_enabled and destination.exists():
|
||||||
|
archived_version_size = destination.stat().st_size
|
||||||
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
self._archive_current_version(bucket_id, safe_key, reason="overwrite")
|
||||||
checksum = hashlib.md5()
|
checksum = hashlib.md5()
|
||||||
with destination.open("wb") as target:
|
with destination.open("wb") as target:
|
||||||
@@ -1210,7 +1340,13 @@ class ObjectStorage:
|
|||||||
|
|
||||||
shutil.rmtree(upload_root, ignore_errors=True)
|
shutil.rmtree(upload_root, ignore_errors=True)
|
||||||
|
|
||||||
self._invalidate_bucket_stats_cache(bucket_id)
|
self._update_bucket_stats_cache(
|
||||||
|
bucket_id,
|
||||||
|
bytes_delta=size_delta,
|
||||||
|
objects_delta=object_delta,
|
||||||
|
version_bytes_delta=archived_version_size,
|
||||||
|
version_count_delta=1 if archived_version_size > 0 else 0,
|
||||||
|
)
|
||||||
|
|
||||||
stat = destination.stat()
|
stat = destination.stat()
|
||||||
etag = checksum.hexdigest()
|
etag = checksum.hexdigest()
|
||||||
@@ -1420,7 +1556,7 @@ class ObjectStorage:
|
|||||||
if entry.is_dir(follow_symlinks=False):
|
if entry.is_dir(follow_symlinks=False):
|
||||||
if check_newer(entry.path):
|
if check_newer(entry.path):
|
||||||
return True
|
return True
|
||||||
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
|
elif entry.is_file(follow_symlinks=False) and (entry.name.endswith('.meta.json') or entry.name == '_index.json'):
|
||||||
if entry.stat().st_mtime > index_mtime:
|
if entry.stat().st_mtime > index_mtime:
|
||||||
return True
|
return True
|
||||||
except OSError:
|
except OSError:
|
||||||
@@ -1434,22 +1570,50 @@ class ObjectStorage:
|
|||||||
meta_str = str(meta_root)
|
meta_str = str(meta_root)
|
||||||
meta_len = len(meta_str) + 1
|
meta_len = len(meta_str) + 1
|
||||||
meta_files: list[tuple[str, str]] = []
|
meta_files: list[tuple[str, str]] = []
|
||||||
|
index_files: list[str] = []
|
||||||
|
|
||||||
def collect_meta_files(dir_path: str) -> None:
|
def collect_meta_files(dir_path: str) -> None:
|
||||||
try:
|
try:
|
||||||
with os.scandir(dir_path) as it:
|
with os.scandir(dir_path) as it:
|
||||||
for entry in it:
|
for entry in it:
|
||||||
if entry.is_dir(follow_symlinks=False):
|
if entry.is_dir(follow_symlinks=False):
|
||||||
collect_meta_files(entry.path)
|
collect_meta_files(entry.path)
|
||||||
elif entry.is_file(follow_symlinks=False) and entry.name.endswith('.meta.json'):
|
elif entry.is_file(follow_symlinks=False):
|
||||||
rel = entry.path[meta_len:]
|
if entry.name == '_index.json':
|
||||||
key = rel[:-10].replace(os.sep, '/')
|
index_files.append(entry.path)
|
||||||
meta_files.append((key, entry.path))
|
elif entry.name.endswith('.meta.json'):
|
||||||
|
rel = entry.path[meta_len:]
|
||||||
|
key = rel[:-10].replace(os.sep, '/')
|
||||||
|
meta_files.append((key, entry.path))
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
collect_meta_files(meta_str)
|
collect_meta_files(meta_str)
|
||||||
|
|
||||||
|
meta_cache = {}
|
||||||
|
|
||||||
|
for idx_path in index_files:
|
||||||
|
try:
|
||||||
|
with open(idx_path, 'r', encoding='utf-8') as f:
|
||||||
|
idx_data = json.load(f)
|
||||||
|
rel_dir = idx_path[meta_len:]
|
||||||
|
rel_dir = rel_dir.replace(os.sep, '/')
|
||||||
|
if rel_dir.endswith('/_index.json'):
|
||||||
|
dir_prefix = rel_dir[:-len('/_index.json')]
|
||||||
|
else:
|
||||||
|
dir_prefix = ''
|
||||||
|
for entry_name, entry_data in idx_data.items():
|
||||||
|
if dir_prefix:
|
||||||
|
key = f"{dir_prefix}/{entry_name}"
|
||||||
|
else:
|
||||||
|
key = entry_name
|
||||||
|
meta = entry_data.get("metadata", {})
|
||||||
|
etag = meta.get("__etag__")
|
||||||
|
if etag:
|
||||||
|
meta_cache[key] = etag
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
|
||||||
def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
|
def read_meta_file(item: tuple[str, str]) -> tuple[str, str | None]:
|
||||||
key, path = item
|
key, path = item
|
||||||
try:
|
try:
|
||||||
@@ -1466,15 +1630,16 @@ class ObjectStorage:
|
|||||||
return key, None
|
return key, None
|
||||||
except (OSError, UnicodeDecodeError):
|
except (OSError, UnicodeDecodeError):
|
||||||
return key, None
|
return key, None
|
||||||
|
|
||||||
if meta_files:
|
legacy_meta_files = [(k, p) for k, p in meta_files if k not in meta_cache]
|
||||||
meta_cache = {}
|
if legacy_meta_files:
|
||||||
max_workers = min((os.cpu_count() or 4) * 2, len(meta_files), 16)
|
max_workers = min((os.cpu_count() or 4) * 2, len(legacy_meta_files), 16)
|
||||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
for key, etag in executor.map(read_meta_file, meta_files):
|
for key, etag in executor.map(read_meta_file, legacy_meta_files):
|
||||||
if etag:
|
if etag:
|
||||||
meta_cache[key] = etag
|
meta_cache[key] = etag
|
||||||
|
|
||||||
|
if meta_cache:
|
||||||
try:
|
try:
|
||||||
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
|
etag_index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(etag_index_path, 'w', encoding='utf-8') as f:
|
with open(etag_index_path, 'w', encoding='utf-8') as f:
|
||||||
@@ -1523,38 +1688,46 @@ class ObjectStorage:
|
|||||||
|
|
||||||
Uses LRU eviction to prevent unbounded cache growth.
|
Uses LRU eviction to prevent unbounded cache growth.
|
||||||
Thread-safe with per-bucket locks to reduce contention.
|
Thread-safe with per-bucket locks to reduce contention.
|
||||||
|
Checks stats.json for cross-process cache invalidation.
|
||||||
"""
|
"""
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
|
||||||
|
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
cached = self._object_cache.get(bucket_id)
|
cached = self._object_cache.get(bucket_id)
|
||||||
if cached:
|
if cached:
|
||||||
objects, timestamp = cached
|
objects, timestamp, cached_stats_mtime = cached
|
||||||
if now - timestamp < self._cache_ttl:
|
if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
|
||||||
self._object_cache.move_to_end(bucket_id)
|
self._object_cache.move_to_end(bucket_id)
|
||||||
return objects
|
return objects
|
||||||
cache_version = self._cache_version.get(bucket_id, 0)
|
cache_version = self._cache_version.get(bucket_id, 0)
|
||||||
|
|
||||||
bucket_lock = self._get_bucket_lock(bucket_id)
|
bucket_lock = self._get_bucket_lock(bucket_id)
|
||||||
with bucket_lock:
|
with bucket_lock:
|
||||||
|
current_stats_mtime = self._get_cache_marker_mtime(bucket_id)
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
cached = self._object_cache.get(bucket_id)
|
cached = self._object_cache.get(bucket_id)
|
||||||
if cached:
|
if cached:
|
||||||
objects, timestamp = cached
|
objects, timestamp, cached_stats_mtime = cached
|
||||||
if now - timestamp < self._cache_ttl:
|
if now - timestamp < self._cache_ttl and current_stats_mtime == cached_stats_mtime:
|
||||||
self._object_cache.move_to_end(bucket_id)
|
self._object_cache.move_to_end(bucket_id)
|
||||||
return objects
|
return objects
|
||||||
|
|
||||||
objects = self._build_object_cache(bucket_path)
|
objects = self._build_object_cache(bucket_path)
|
||||||
|
new_stats_mtime = self._get_cache_marker_mtime(bucket_id)
|
||||||
|
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
current_version = self._cache_version.get(bucket_id, 0)
|
current_version = self._cache_version.get(bucket_id, 0)
|
||||||
if current_version != cache_version:
|
if current_version != cache_version:
|
||||||
objects = self._build_object_cache(bucket_path)
|
objects = self._build_object_cache(bucket_path)
|
||||||
|
new_stats_mtime = self._get_cache_marker_mtime(bucket_id)
|
||||||
while len(self._object_cache) >= self._object_cache_max_size:
|
while len(self._object_cache) >= self._object_cache_max_size:
|
||||||
self._object_cache.popitem(last=False)
|
self._object_cache.popitem(last=False)
|
||||||
|
|
||||||
self._object_cache[bucket_id] = (objects, time.time())
|
self._object_cache[bucket_id] = (objects, time.time(), new_stats_mtime)
|
||||||
self._object_cache.move_to_end(bucket_id)
|
self._object_cache.move_to_end(bucket_id)
|
||||||
|
self._cache_version[bucket_id] = current_version + 1
|
||||||
|
self._sorted_key_cache.pop(bucket_id, None)
|
||||||
|
|
||||||
return objects
|
return objects
|
||||||
|
|
||||||
@@ -1562,6 +1735,7 @@ class ObjectStorage:
|
|||||||
"""Invalidate the object cache and etag index for a bucket.
|
"""Invalidate the object cache and etag index for a bucket.
|
||||||
|
|
||||||
Increments version counter to signal stale reads.
|
Increments version counter to signal stale reads.
|
||||||
|
Cross-process invalidation is handled by checking stats.json mtime.
|
||||||
"""
|
"""
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
self._object_cache.pop(bucket_id, None)
|
self._object_cache.pop(bucket_id, None)
|
||||||
@@ -1573,19 +1747,37 @@ class ObjectStorage:
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _get_cache_marker_mtime(self, bucket_id: str) -> float:
|
||||||
|
"""Get a cache marker combining serial and object count for cross-process invalidation.
|
||||||
|
|
||||||
|
Returns a combined value that changes if either _cache_serial or object count changes.
|
||||||
|
This handles cases where the serial was reset but object count differs.
|
||||||
|
"""
|
||||||
|
stats_path = self._system_bucket_root(bucket_id) / "stats.json"
|
||||||
|
try:
|
||||||
|
data = json.loads(stats_path.read_text(encoding="utf-8"))
|
||||||
|
serial = data.get("_cache_serial", 0)
|
||||||
|
count = data.get("objects", 0)
|
||||||
|
return float(serial * 1000000 + count)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return 0
|
||||||
|
|
||||||
def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
|
def _update_object_cache_entry(self, bucket_id: str, key: str, meta: Optional[ObjectMeta]) -> None:
|
||||||
"""Update a single entry in the object cache instead of invalidating the whole cache.
|
"""Update a single entry in the object cache instead of invalidating the whole cache.
|
||||||
|
|
||||||
This is a performance optimization - lazy update instead of full invalidation.
|
This is a performance optimization - lazy update instead of full invalidation.
|
||||||
|
Cross-process invalidation is handled by checking stats.json mtime.
|
||||||
"""
|
"""
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
cached = self._object_cache.get(bucket_id)
|
cached = self._object_cache.get(bucket_id)
|
||||||
if cached:
|
if cached:
|
||||||
objects, timestamp = cached
|
objects, timestamp, stats_mtime = cached
|
||||||
if meta is None:
|
if meta is None:
|
||||||
objects.pop(key, None)
|
objects.pop(key, None)
|
||||||
else:
|
else:
|
||||||
objects[key] = meta
|
objects[key] = meta
|
||||||
|
self._cache_version[bucket_id] = self._cache_version.get(bucket_id, 0) + 1
|
||||||
|
self._sorted_key_cache.pop(bucket_id, None)
|
||||||
|
|
||||||
def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
|
def warm_cache(self, bucket_names: Optional[List[str]] = None) -> None:
|
||||||
"""Pre-warm the object cache for specified buckets or all buckets.
|
"""Pre-warm the object cache for specified buckets or all buckets.
|
||||||
@@ -1697,6 +1889,64 @@ class ObjectStorage:
|
|||||||
meta_rel = Path(key.as_posix() + ".meta.json")
|
meta_rel = Path(key.as_posix() + ".meta.json")
|
||||||
return meta_root / meta_rel
|
return meta_root / meta_rel
|
||||||
|
|
||||||
|
def _index_file_for_key(self, bucket_name: str, key: Path) -> tuple[Path, str]:
|
||||||
|
meta_root = self._bucket_meta_root(bucket_name)
|
||||||
|
parent = key.parent
|
||||||
|
entry_name = key.name
|
||||||
|
if parent == Path("."):
|
||||||
|
return meta_root / "_index.json", entry_name
|
||||||
|
return meta_root / parent / "_index.json", entry_name
|
||||||
|
|
||||||
|
def _get_meta_index_lock(self, index_path: str) -> threading.Lock:
|
||||||
|
with self._cache_lock:
|
||||||
|
if index_path not in self._meta_index_locks:
|
||||||
|
self._meta_index_locks[index_path] = threading.Lock()
|
||||||
|
return self._meta_index_locks[index_path]
|
||||||
|
|
||||||
|
def _read_index_entry(self, bucket_name: str, key: Path) -> Optional[Dict[str, Any]]:
|
||||||
|
index_path, entry_name = self._index_file_for_key(bucket_name, key)
|
||||||
|
if not index_path.exists():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
return index_data.get(entry_name)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _write_index_entry(self, bucket_name: str, key: Path, entry: Dict[str, Any]) -> None:
|
||||||
|
index_path, entry_name = self._index_file_for_key(bucket_name, key)
|
||||||
|
lock = self._get_meta_index_lock(str(index_path))
|
||||||
|
with lock:
|
||||||
|
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
index_data: Dict[str, Any] = {}
|
||||||
|
if index_path.exists():
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
index_data[entry_name] = entry
|
||||||
|
index_path.write_text(json.dumps(index_data), encoding="utf-8")
|
||||||
|
|
||||||
|
def _delete_index_entry(self, bucket_name: str, key: Path) -> None:
|
||||||
|
index_path, entry_name = self._index_file_for_key(bucket_name, key)
|
||||||
|
if not index_path.exists():
|
||||||
|
return
|
||||||
|
lock = self._get_meta_index_lock(str(index_path))
|
||||||
|
with lock:
|
||||||
|
try:
|
||||||
|
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return
|
||||||
|
if entry_name in index_data:
|
||||||
|
del index_data[entry_name]
|
||||||
|
if index_data:
|
||||||
|
index_path.write_text(json.dumps(index_data), encoding="utf-8")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
index_path.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
|
def _normalize_metadata(self, metadata: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
|
||||||
if not metadata:
|
if not metadata:
|
||||||
return None
|
return None
|
||||||
@@ -1708,9 +1958,13 @@ class ObjectStorage:
|
|||||||
if not clean:
|
if not clean:
|
||||||
self._delete_metadata(bucket_name, key)
|
self._delete_metadata(bucket_name, key)
|
||||||
return
|
return
|
||||||
meta_file = self._metadata_file(bucket_name, key)
|
self._write_index_entry(bucket_name, key, {"metadata": clean})
|
||||||
meta_file.parent.mkdir(parents=True, exist_ok=True)
|
old_meta = self._metadata_file(bucket_name, key)
|
||||||
meta_file.write_text(json.dumps({"metadata": clean}), encoding="utf-8")
|
try:
|
||||||
|
if old_meta.exists():
|
||||||
|
old_meta.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
|
def _archive_current_version(self, bucket_name: str, key: Path, *, reason: str) -> None:
|
||||||
bucket_path = self._bucket_path(bucket_name)
|
bucket_path = self._bucket_path(bucket_name)
|
||||||
@@ -1737,6 +1991,10 @@ class ObjectStorage:
|
|||||||
manifest_path.write_text(json.dumps(record), encoding="utf-8")
|
manifest_path.write_text(json.dumps(record), encoding="utf-8")
|
||||||
|
|
||||||
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
|
def _read_metadata(self, bucket_name: str, key: Path) -> Dict[str, str]:
|
||||||
|
entry = self._read_index_entry(bucket_name, key)
|
||||||
|
if entry is not None:
|
||||||
|
data = entry.get("metadata")
|
||||||
|
return data if isinstance(data, dict) else {}
|
||||||
for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
|
for meta_file in (self._metadata_file(bucket_name, key), self._legacy_metadata_file(bucket_name, key)):
|
||||||
if not meta_file.exists():
|
if not meta_file.exists():
|
||||||
continue
|
continue
|
||||||
@@ -1767,6 +2025,7 @@ class ObjectStorage:
|
|||||||
raise StorageError(message) from last_error
|
raise StorageError(message) from last_error
|
||||||
|
|
||||||
def _delete_metadata(self, bucket_name: str, key: Path) -> None:
|
def _delete_metadata(self, bucket_name: str, key: Path) -> None:
|
||||||
|
self._delete_index_entry(bucket_name, key)
|
||||||
locations = (
|
locations = (
|
||||||
(self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
|
(self._metadata_file(bucket_name, key), self._bucket_meta_root(bucket_name)),
|
||||||
(self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
|
(self._legacy_metadata_file(bucket_name, key), self._legacy_meta_root(bucket_name)),
|
||||||
@@ -1886,6 +2145,18 @@ class ObjectStorage:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path:
|
def _sanitize_object_key(object_key: str, max_length_bytes: int = 1024) -> Path:
|
||||||
|
if _HAS_RUST:
|
||||||
|
error = _rc.validate_object_key(object_key, max_length_bytes, os.name == "nt")
|
||||||
|
if error:
|
||||||
|
raise StorageError(error)
|
||||||
|
normalized = unicodedata.normalize("NFC", object_key)
|
||||||
|
candidate = Path(normalized)
|
||||||
|
if candidate.is_absolute():
|
||||||
|
raise StorageError("Absolute object keys are not allowed")
|
||||||
|
if getattr(candidate, "drive", ""):
|
||||||
|
raise StorageError("Object key cannot include a drive letter")
|
||||||
|
return Path(*candidate.parts) if candidate.parts else candidate
|
||||||
|
|
||||||
if not object_key:
|
if not object_key:
|
||||||
raise StorageError("Object key required")
|
raise StorageError("Object key required")
|
||||||
if "\x00" in object_key:
|
if "\x00" in object_key:
|
||||||
@@ -1899,7 +2170,7 @@ class ObjectStorage:
|
|||||||
candidate = Path(object_key)
|
candidate = Path(object_key)
|
||||||
if ".." in candidate.parts:
|
if ".." in candidate.parts:
|
||||||
raise StorageError("Object key contains parent directory references")
|
raise StorageError("Object key contains parent directory references")
|
||||||
|
|
||||||
if candidate.is_absolute():
|
if candidate.is_absolute():
|
||||||
raise StorageError("Absolute object keys are not allowed")
|
raise StorageError("Absolute object keys are not allowed")
|
||||||
if getattr(candidate, "drive", ""):
|
if getattr(candidate, "drive", ""):
|
||||||
@@ -1927,6 +2198,8 @@ class ObjectStorage:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _compute_etag(path: Path) -> str:
|
def _compute_etag(path: Path) -> str:
|
||||||
|
if _HAS_RUST:
|
||||||
|
return _rc.md5_file(str(path))
|
||||||
checksum = hashlib.md5()
|
checksum = hashlib.md5()
|
||||||
with path.open("rb") as handle:
|
with path.open("rb") as handle:
|
||||||
for chunk in iter(lambda: handle.read(8192), b""):
|
for chunk in iter(lambda: handle.read(8192), b""):
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
APP_VERSION = "0.2.5"
|
APP_VERSION = "0.3.0"
|
||||||
|
|
||||||
|
|
||||||
def get_version() -> str:
|
def get_version() -> str:
|
||||||
|
|||||||
79
app/website_domains.py
Normal file
79
app/website_domains.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
_DOMAIN_RE = re.compile(
|
||||||
|
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_domain(raw: str) -> str:
|
||||||
|
raw = raw.strip().lower()
|
||||||
|
for prefix in ("https://", "http://"):
|
||||||
|
if raw.startswith(prefix):
|
||||||
|
raw = raw[len(prefix):]
|
||||||
|
raw = raw.split("/", 1)[0]
|
||||||
|
raw = raw.split("?", 1)[0]
|
||||||
|
raw = raw.split("#", 1)[0]
|
||||||
|
if ":" in raw:
|
||||||
|
raw = raw.rsplit(":", 1)[0]
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_domain(domain: str) -> bool:
|
||||||
|
if not domain or len(domain) > 253:
|
||||||
|
return False
|
||||||
|
return bool(_DOMAIN_RE.match(domain))
|
||||||
|
|
||||||
|
|
||||||
|
class WebsiteDomainStore:
|
||||||
|
def __init__(self, config_path: Path) -> None:
|
||||||
|
self.config_path = config_path
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._domains: Dict[str, str] = {}
|
||||||
|
self.reload()
|
||||||
|
|
||||||
|
def reload(self) -> None:
|
||||||
|
if not self.config_path.exists():
|
||||||
|
self._domains = {}
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
self._domains = {k.lower(): v for k, v in data.items()}
|
||||||
|
else:
|
||||||
|
self._domains = {}
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
self._domains = {}
|
||||||
|
|
||||||
|
def _save(self) -> None:
|
||||||
|
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(self._domains, f, indent=2)
|
||||||
|
|
||||||
|
def list_all(self) -> List[Dict[str, str]]:
|
||||||
|
with self._lock:
|
||||||
|
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
||||||
|
|
||||||
|
def get_bucket(self, domain: str) -> Optional[str]:
|
||||||
|
with self._lock:
|
||||||
|
return self._domains.get(domain.lower())
|
||||||
|
|
||||||
|
def set_mapping(self, domain: str, bucket: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._domains[domain.lower()] = bucket
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
def delete_mapping(self, domain: str) -> bool:
|
||||||
|
with self._lock:
|
||||||
|
key = domain.lower()
|
||||||
|
if key not in self._domains:
|
||||||
|
return False
|
||||||
|
del self._domains[key]
|
||||||
|
self._save()
|
||||||
|
return True
|
||||||
128
docs.md
128
docs.md
@@ -7,7 +7,7 @@ This document expands on the README to describe the full workflow for running, c
|
|||||||
MyFSIO ships two Flask entrypoints that share the same storage, IAM, and bucket-policy state:
|
MyFSIO ships two Flask entrypoints that share the same storage, IAM, and bucket-policy state:
|
||||||
|
|
||||||
- **API server** – Implements the S3-compatible REST API, policy evaluation, and Signature Version 4 presign service.
|
- **API server** – Implements the S3-compatible REST API, policy evaluation, and Signature Version 4 presign service.
|
||||||
- **UI server** – Provides the browser console for buckets, IAM, and policies. It proxies to the API for presign operations.
|
- **UI server** – Provides the browser console for buckets, IAM, and policies. It proxies all storage operations through the S3 API via boto3 (SigV4-signed), mirroring the architecture used by MinIO and Garage.
|
||||||
|
|
||||||
Both servers read `AppConfig`, so editing JSON stores on disk instantly affects both surfaces.
|
Both servers read `AppConfig`, so editing JSON stores on disk instantly affects both surfaces.
|
||||||
|
|
||||||
@@ -136,7 +136,7 @@ All configuration is done via environment variables. The table below lists every
|
|||||||
| `MAX_UPLOAD_SIZE` | `1073741824` (1 GiB) | Bytes. Caps incoming uploads in both API + UI. |
|
| `MAX_UPLOAD_SIZE` | `1073741824` (1 GiB) | Bytes. Caps incoming uploads in both API + UI. |
|
||||||
| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint shown in listings. |
|
| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint shown in listings. |
|
||||||
| `SECRET_KEY` | Auto-generated | Flask session key. Auto-generates and persists if not set. **Set explicitly in production.** |
|
| `SECRET_KEY` | Auto-generated | Flask session key. Auto-generates and persists if not set. **Set explicitly in production.** |
|
||||||
| `API_BASE_URL` | `None` | Public URL for presigned URLs. Required behind proxies. |
|
| `API_BASE_URL` | `http://127.0.0.1:5000` | Internal S3 API URL used by the web UI proxy. Also used for presigned URL generation. Set to your public URL if running behind a reverse proxy. |
|
||||||
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
|
| `AWS_REGION` | `us-east-1` | Region embedded in SigV4 credential scope. |
|
||||||
| `AWS_SERVICE` | `s3` | Service string for SigV4. |
|
| `AWS_SERVICE` | `s3` | Service string for SigV4. |
|
||||||
|
|
||||||
@@ -619,13 +619,15 @@ MyFSIO implements a comprehensive Identity and Access Management (IAM) system th
|
|||||||
|
|
||||||
### Getting Started
|
### Getting Started
|
||||||
|
|
||||||
1. On first boot, `data/.myfsio.sys/config/iam.json` is seeded with `localadmin / localadmin` that has wildcard access.
|
1. On first boot, `data/.myfsio.sys/config/iam.json` is created with a randomly generated admin user. The access key and secret key are printed to the console during first startup. If you miss it, check the `iam.json` file directly—credentials are stored in plaintext.
|
||||||
2. Sign into the UI using those credentials, then open **IAM**:
|
2. Sign into the UI using the generated credentials, then open **IAM**:
|
||||||
- **Create user**: supply a display name and optional JSON inline policy array.
|
- **Create user**: supply a display name and optional JSON inline policy array.
|
||||||
- **Rotate secret**: generates a new secret key; the UI surfaces it once.
|
- **Rotate secret**: generates a new secret key; the UI surfaces it once.
|
||||||
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
- **Policy editor**: select a user, paste an array of objects (`{"bucket": "*", "actions": ["list", "read"]}`), and submit. Alias support includes AWS-style verbs (e.g., `s3:GetObject`).
|
||||||
3. Wildcard action `iam:*` is supported for admin user definitions.
|
3. Wildcard action `iam:*` is supported for admin user definitions.
|
||||||
|
|
||||||
|
> **Breaking Change (v0.2.0+):** Previous versions used fixed default credentials (`localadmin/localadmin`). If upgrading from an older version, your existing credentials remain unchanged, but new installations will generate random credentials.
|
||||||
|
|
||||||
### Authentication
|
### Authentication
|
||||||
|
|
||||||
The API expects every request to include authentication headers. The UI persists them in the Flask session after login.
|
The API expects every request to include authentication headers. The UI persists them in the Flask session after login.
|
||||||
@@ -1550,6 +1552,9 @@ GET /<bucket>?notification # Get event notifications
|
|||||||
PUT /<bucket>?notification # Set event notifications (webhooks)
|
PUT /<bucket>?notification # Set event notifications (webhooks)
|
||||||
GET /<bucket>?object-lock # Get object lock configuration
|
GET /<bucket>?object-lock # Get object lock configuration
|
||||||
PUT /<bucket>?object-lock # Set object lock configuration
|
PUT /<bucket>?object-lock # Set object lock configuration
|
||||||
|
GET /<bucket>?website # Get website configuration
|
||||||
|
PUT /<bucket>?website # Set website configuration
|
||||||
|
DELETE /<bucket>?website # Delete website configuration
|
||||||
GET /<bucket>?uploads # List active multipart uploads
|
GET /<bucket>?uploads # List active multipart uploads
|
||||||
GET /<bucket>?versions # List object versions
|
GET /<bucket>?versions # List object versions
|
||||||
GET /<bucket>?location # Get bucket location/region
|
GET /<bucket>?location # Get bucket location/region
|
||||||
@@ -1594,6 +1599,11 @@ PUT /admin/sites/<site_id> # Update peer site
|
|||||||
DELETE /admin/sites/<site_id> # Unregister peer site
|
DELETE /admin/sites/<site_id> # Unregister peer site
|
||||||
GET /admin/sites/<site_id>/health # Check peer health
|
GET /admin/sites/<site_id>/health # Check peer health
|
||||||
GET /admin/topology # Get cluster topology
|
GET /admin/topology # Get cluster topology
|
||||||
|
GET /admin/website-domains # List domain mappings
|
||||||
|
POST /admin/website-domains # Create domain mapping
|
||||||
|
GET /admin/website-domains/<domain> # Get domain mapping
|
||||||
|
PUT /admin/website-domains/<domain> # Update domain mapping
|
||||||
|
DELETE /admin/website-domains/<domain> # Delete domain mapping
|
||||||
|
|
||||||
# KMS API
|
# KMS API
|
||||||
GET /kms/keys # List KMS keys
|
GET /kms/keys # List KMS keys
|
||||||
@@ -2227,3 +2237,113 @@ curl "http://localhost:5000/my-bucket?list-type=2&start-after=photos/2024/" \
|
|||||||
| `start-after` | Start listing after this key |
|
| `start-after` | Start listing after this key |
|
||||||
| `fetch-owner` | Include owner info in response |
|
| `fetch-owner` | Include owner info in response |
|
||||||
| `encoding-type` | Set to `url` for URL-encoded keys
|
| `encoding-type` | Set to `url` for URL-encoded keys
|
||||||
|
|
||||||
|
## 26. Static Website Hosting
|
||||||
|
|
||||||
|
MyFSIO can serve S3 buckets as static websites via custom domain mappings. When a request arrives with a `Host` header matching a mapped domain, MyFSIO resolves the bucket and serves objects directly.
|
||||||
|
|
||||||
|
### Enabling
|
||||||
|
|
||||||
|
Set the environment variable:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
WEBSITE_HOSTING_ENABLED=true
|
||||||
|
```
|
||||||
|
|
||||||
|
When disabled, all website hosting endpoints return 400 and domain-based serving is skipped.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `WEBSITE_HOSTING_ENABLED` | `false` | Master switch for website hosting |
|
||||||
|
|
||||||
|
### Setting Up a Website
|
||||||
|
|
||||||
|
**Step 1: Configure the bucket website settings**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X PUT "http://localhost:5000/my-site?website" \
|
||||||
|
-H "Authorization: ..." \
|
||||||
|
-d '<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<WebsiteConfiguration>
|
||||||
|
<IndexDocument><Suffix>index.html</Suffix></IndexDocument>
|
||||||
|
<ErrorDocument><Key>404.html</Key></ErrorDocument>
|
||||||
|
</WebsiteConfiguration>'
|
||||||
|
```
|
||||||
|
|
||||||
|
- `IndexDocument` with `Suffix` is required (must not contain `/`)
|
||||||
|
- `ErrorDocument` is optional
|
||||||
|
|
||||||
|
**Step 2: Map a domain to the bucket**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:5000/admin/website-domains" \
|
||||||
|
-H "Authorization: ..." \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"domain": "example.com", "bucket": "my-site"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3: Point your domain to MyFSIO**
|
||||||
|
|
||||||
|
For HTTP-only (direct access), point DNS to the MyFSIO host on port 5000.
|
||||||
|
|
||||||
|
For HTTPS (recommended), use a reverse proxy. The critical requirement is passing the original `Host` header so MyFSIO can match the domain to a bucket.
|
||||||
|
|
||||||
|
**nginx example:**
|
||||||
|
|
||||||
|
```nginx
|
||||||
|
server {
|
||||||
|
server_name example.com;
|
||||||
|
listen 443 ssl;
|
||||||
|
|
||||||
|
ssl_certificate /etc/ssl/certs/example.com.pem;
|
||||||
|
ssl_certificate_key /etc/ssl/private/example.com.key;
|
||||||
|
|
||||||
|
location / {
|
||||||
|
proxy_pass http://127.0.0.1:5000;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`proxy_set_header Host $host;` is required — without it, MyFSIO cannot match the incoming domain to a bucket. You do not need any path-based routing rules; MyFSIO handles all object resolution internally.
|
||||||
|
|
||||||
|
### How Domain Routing Works
|
||||||
|
|
||||||
|
1. A request arrives with `Host: example.com`
|
||||||
|
2. MyFSIO's `before_request` hook strips the port and looks up the domain in the `WebsiteDomainStore`
|
||||||
|
3. If a match is found, it loads the bucket's website config (index/error documents)
|
||||||
|
4. Object key resolution:
|
||||||
|
- `/` or trailing `/` → append `index_document` (e.g., `index.html`)
|
||||||
|
- `/path` → try exact match, then try `path/index_document`
|
||||||
|
- Not found → serve `error_document` with 404 status
|
||||||
|
5. If no domain match is found, the request falls through to normal S3 API / UI routing
|
||||||
|
|
||||||
|
### Domain Mapping Admin API
|
||||||
|
|
||||||
|
All endpoints require admin (`iam:*`) permissions.
|
||||||
|
|
||||||
|
| Method | Route | Body | Description |
|
||||||
|
|--------|-------|------|-------------|
|
||||||
|
| `GET` | `/admin/website-domains` | — | List all mappings |
|
||||||
|
| `POST` | `/admin/website-domains` | `{"domain": "...", "bucket": "..."}` | Create mapping |
|
||||||
|
| `GET` | `/admin/website-domains/<domain>` | — | Get single mapping |
|
||||||
|
| `PUT` | `/admin/website-domains/<domain>` | `{"bucket": "..."}` | Update mapping |
|
||||||
|
| `DELETE` | `/admin/website-domains/<domain>` | — | Delete mapping |
|
||||||
|
|
||||||
|
### Bucket Website API
|
||||||
|
|
||||||
|
| Method | Route | Description |
|
||||||
|
|--------|-------|-------------|
|
||||||
|
| `PUT` | `/<bucket>?website` | Set website config (XML body) |
|
||||||
|
| `GET` | `/<bucket>?website` | Get website config (XML response) |
|
||||||
|
| `DELETE` | `/<bucket>?website` | Remove website config |
|
||||||
|
|
||||||
|
### Web UI
|
||||||
|
|
||||||
|
- **Per-bucket config:** Bucket Details → Properties tab → "Static Website Hosting" card
|
||||||
|
- **Domain management:** Sidebar → "Domains" (visible when hosting is enabled and user is admin)
|
||||||
|
|||||||
421
myfsio_core/Cargo.lock
generated
Normal file
421
myfsio_core/Cargo.lock
generated
Normal file
@@ -0,0 +1,421 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "2.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-buffer"
|
||||||
|
version = "0.10.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cpufeatures"
|
||||||
|
version = "0.2.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crypto-common"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
"typenum",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "digest"
|
||||||
|
version = "0.10.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||||
|
dependencies = [
|
||||||
|
"block-buffer",
|
||||||
|
"crypto-common",
|
||||||
|
"subtle",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "equivalent"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "foldhash"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "generic-array"
|
||||||
|
version = "0.14.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||||
|
dependencies = [
|
||||||
|
"typenum",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.15.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||||
|
dependencies = [
|
||||||
|
"allocator-api2",
|
||||||
|
"equivalent",
|
||||||
|
"foldhash",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hex"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hmac"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
|
||||||
|
dependencies = [
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.182"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lock_api"
|
||||||
|
version = "0.4.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
|
||||||
|
dependencies = [
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lru"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "md-5"
|
||||||
|
version = "0.10.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "myfsio_core"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"hex",
|
||||||
|
"hmac",
|
||||||
|
"lru",
|
||||||
|
"md-5",
|
||||||
|
"parking_lot",
|
||||||
|
"pyo3",
|
||||||
|
"regex",
|
||||||
|
"sha2",
|
||||||
|
"unicode-normalization",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.21.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.12.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
|
||||||
|
dependencies = [
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.9.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall",
|
||||||
|
"smallvec",
|
||||||
|
"windows-link",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "portable-atomic"
|
||||||
|
version = "1.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.106"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyo3"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"portable-atomic",
|
||||||
|
"pyo3-build-config",
|
||||||
|
"pyo3-ffi",
|
||||||
|
"pyo3-macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyo3-build-config"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
|
||||||
|
dependencies = [
|
||||||
|
"target-lexicon",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyo3-ffi"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"pyo3-build-config",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyo3-macros"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"pyo3-macros-backend",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyo3-macros-backend"
|
||||||
|
version = "0.28.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"pyo3-build-config",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redox_syscall"
|
||||||
|
version = "0.5.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scopeguard"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sha2"
|
||||||
|
version = "0.10.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"cpufeatures",
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.15.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "subtle"
|
||||||
|
version = "2.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.116"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "target-lexicon"
|
||||||
|
version = "0.13.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec_macros"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typenum"
|
||||||
|
version = "1.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-normalization"
|
||||||
|
version = "0.1.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
19
myfsio_core/Cargo.toml
Normal file
19
myfsio_core/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "myfsio_core"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "myfsio_core"
|
||||||
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
pyo3 = { version = "0.28", features = ["extension-module"] }
|
||||||
|
hmac = "0.12"
|
||||||
|
sha2 = "0.10"
|
||||||
|
md-5 = "0.10"
|
||||||
|
hex = "0.4"
|
||||||
|
unicode-normalization = "0.1"
|
||||||
|
regex = "1"
|
||||||
|
lru = "0.14"
|
||||||
|
parking_lot = "0.12"
|
||||||
11
myfsio_core/pyproject.toml
Normal file
11
myfsio_core/pyproject.toml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["maturin>=1.0,<2.0"]
|
||||||
|
build-backend = "maturin"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "myfsio_core"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
|
[tool.maturin]
|
||||||
|
features = ["pyo3/extension-module"]
|
||||||
90
myfsio_core/src/hashing.rs
Normal file
90
myfsio_core/src/hashing.rs
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
use md5::{Digest, Md5};
|
||||||
|
use pyo3::exceptions::PyIOError;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use sha2::Sha256;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
const CHUNK_SIZE: usize = 65536;
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||||
|
let path = path.to_owned();
|
||||||
|
py.detach(move || {
|
||||||
|
let mut file = File::open(&path)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||||
|
loop {
|
||||||
|
let n = file
|
||||||
|
.read(&mut buf)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hasher.update(&buf[..n]);
|
||||||
|
}
|
||||||
|
Ok(format!("{:x}", hasher.finalize()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn md5_bytes(data: &[u8]) -> String {
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
hasher.update(data);
|
||||||
|
format!("{:x}", hasher.finalize())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||||
|
let path = path.to_owned();
|
||||||
|
py.detach(move || {
|
||||||
|
let mut file = File::open(&path)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||||
|
loop {
|
||||||
|
let n = file
|
||||||
|
.read(&mut buf)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hasher.update(&buf[..n]);
|
||||||
|
}
|
||||||
|
Ok(format!("{:x}", hasher.finalize()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn sha256_bytes(data: &[u8]) -> String {
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(data);
|
||||||
|
format!("{:x}", hasher.finalize())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
|
||||||
|
let path = path.to_owned();
|
||||||
|
py.detach(move || {
|
||||||
|
let mut file = File::open(&path)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||||
|
let mut md5_hasher = Md5::new();
|
||||||
|
let mut sha_hasher = Sha256::new();
|
||||||
|
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||||
|
loop {
|
||||||
|
let n = file
|
||||||
|
.read(&mut buf)
|
||||||
|
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
md5_hasher.update(&buf[..n]);
|
||||||
|
sha_hasher.update(&buf[..n]);
|
||||||
|
}
|
||||||
|
Ok((
|
||||||
|
format!("{:x}", md5_hasher.finalize()),
|
||||||
|
format!("{:x}", sha_hasher.finalize()),
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
30
myfsio_core/src/lib.rs
Normal file
30
myfsio_core/src/lib.rs
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
mod hashing;
|
||||||
|
mod sigv4;
|
||||||
|
mod validation;
|
||||||
|
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
|
||||||
|
#[pymodule]
|
||||||
|
mod myfsio_core {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[pymodule_init]
|
||||||
|
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
|
||||||
|
|
||||||
|
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
||||||
|
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
108
myfsio_core/src/sigv4.rs
Normal file
108
myfsio_core/src/sigv4.rs
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
use hmac::{Hmac, Mac};
|
||||||
|
use lru::LruCache;
|
||||||
|
use parking_lot::Mutex;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
type HmacSha256 = Hmac<Sha256>;
|
||||||
|
|
||||||
|
struct CacheEntry {
|
||||||
|
key: Vec<u8>,
|
||||||
|
created: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
|
||||||
|
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
|
||||||
|
|
||||||
|
const CACHE_TTL_SECS: u64 = 60;
|
||||||
|
|
||||||
|
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||||
|
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||||
|
mac.update(msg);
|
||||||
|
mac.finalize().into_bytes().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn derive_signing_key(
|
||||||
|
secret_key: &str,
|
||||||
|
date_stamp: &str,
|
||||||
|
region: &str,
|
||||||
|
service: &str,
|
||||||
|
) -> Vec<u8> {
|
||||||
|
let cache_key = (
|
||||||
|
secret_key.to_owned(),
|
||||||
|
date_stamp.to_owned(),
|
||||||
|
region.to_owned(),
|
||||||
|
service.to_owned(),
|
||||||
|
);
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||||
|
if let Some(entry) = cache.get(&cache_key) {
|
||||||
|
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
|
||||||
|
return entry.key.clone();
|
||||||
|
}
|
||||||
|
cache.pop(&cache_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
|
||||||
|
let k_region = hmac_sha256(&k_date, region.as_bytes());
|
||||||
|
let k_service = hmac_sha256(&k_region, service.as_bytes());
|
||||||
|
let k_signing = hmac_sha256(&k_service, b"aws4_request");
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||||
|
cache.put(
|
||||||
|
cache_key,
|
||||||
|
CacheEntry {
|
||||||
|
key: k_signing.clone(),
|
||||||
|
created: Instant::now(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
k_signing
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||||
|
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||||
|
hex::encode(sig)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sha256_hex(data: &[u8]) -> String {
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(data);
|
||||||
|
hex::encode(hasher.finalize())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn build_string_to_sign(
|
||||||
|
amz_date: &str,
|
||||||
|
credential_scope: &str,
|
||||||
|
canonical_request: &str,
|
||||||
|
) -> String {
|
||||||
|
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||||
|
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||||
|
if a.len() != b.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut result: u8 = 0;
|
||||||
|
for (x, y) in a.bytes().zip(b.bytes()) {
|
||||||
|
result |= x ^ y;
|
||||||
|
}
|
||||||
|
result == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn clear_signing_key_cache() {
|
||||||
|
SIGNING_KEY_CACHE.lock().clear();
|
||||||
|
}
|
||||||
149
myfsio_core/src/validation.rs
Normal file
149
myfsio_core/src/validation.rs
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
use pyo3::prelude::*;
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
use unicode_normalization::UnicodeNormalization;
|
||||||
|
|
||||||
|
const WINDOWS_RESERVED: &[&str] = &[
|
||||||
|
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||||||
|
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
||||||
|
"LPT9",
|
||||||
|
];
|
||||||
|
|
||||||
|
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
||||||
|
|
||||||
|
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||||
|
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||||
|
|
||||||
|
static IP_REGEX: LazyLock<regex::Regex> =
|
||||||
|
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
|
||||||
|
pub fn validate_object_key(
|
||||||
|
object_key: &str,
|
||||||
|
max_length_bytes: usize,
|
||||||
|
is_windows: bool,
|
||||||
|
reserved_prefixes: Option<Vec<String>>,
|
||||||
|
) -> PyResult<Option<String>> {
|
||||||
|
if object_key.is_empty() {
|
||||||
|
return Ok(Some("Object key required".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if object_key.contains('\0') {
|
||||||
|
return Ok(Some("Object key contains null bytes".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let normalized: String = object_key.nfc().collect();
|
||||||
|
|
||||||
|
if normalized.len() > max_length_bytes {
|
||||||
|
return Ok(Some(format!(
|
||||||
|
"Object key exceeds maximum length of {} bytes",
|
||||||
|
max_length_bytes
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
||||||
|
return Ok(Some("Object key cannot start with a slash".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
||||||
|
normalized.split(['/', '\\']).collect()
|
||||||
|
} else {
|
||||||
|
normalized.split('/').collect()
|
||||||
|
};
|
||||||
|
|
||||||
|
for part in &parts {
|
||||||
|
if part.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if *part == ".." {
|
||||||
|
return Ok(Some(
|
||||||
|
"Object key contains parent directory references".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if *part == "." {
|
||||||
|
return Ok(Some("Object key contains invalid segments".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if part.chars().any(|c| (c as u32) < 32) {
|
||||||
|
return Ok(Some(
|
||||||
|
"Object key contains control characters".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_windows {
|
||||||
|
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
||||||
|
return Ok(Some(
|
||||||
|
"Object key contains characters not supported on Windows filesystems"
|
||||||
|
.to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if part.ends_with(' ') || part.ends_with('.') {
|
||||||
|
return Ok(Some(
|
||||||
|
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
||||||
|
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
||||||
|
return Ok(Some(format!("Invalid filename segment: {}", part)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
||||||
|
if let Some(top) = non_empty_parts.first() {
|
||||||
|
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
||||||
|
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref prefixes) = reserved_prefixes {
|
||||||
|
for prefix in prefixes {
|
||||||
|
if *top == prefix.as_str() {
|
||||||
|
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
||||||
|
let len = bucket_name.len();
|
||||||
|
if len < 3 || len > 63 {
|
||||||
|
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
let bytes = bucket_name.as_bytes();
|
||||||
|
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
||||||
|
return Some(
|
||||||
|
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
||||||
|
return Some(
|
||||||
|
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for &b in bytes {
|
||||||
|
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
||||||
|
return Some(
|
||||||
|
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if bucket_name.contains("..") {
|
||||||
|
return Some("Bucket name must not contain consecutive periods".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if IP_REGEX.is_match(bucket_name) {
|
||||||
|
return Some("Bucket name must not be formatted as an IP address".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
1
myfsio_core/target/.rustc_info.json
Normal file
1
myfsio_core/target/.rustc_info.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}
|
||||||
3
myfsio_core/target/CACHEDIR.TAG
Normal file
3
myfsio_core/target/CACHEDIR.TAG
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
Signature: 8a477f597d28d172789f06886806bc55
|
||||||
|
# This file is a cache directory tag created by cargo.
|
||||||
|
# For information about cache directory tags see https://bford.info/cachedir/
|
||||||
0
myfsio_core/target/release/.cargo-lock
Normal file
0
myfsio_core/target/release/.cargo-lock
Normal file
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
801af22cf202da8e
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
435555ec2fb592e3
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
d28af275d001c358
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
1fbf4ba9542edced
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
37923e6f5f9687ab
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
603e28136cf5763c
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
896672d759b5299c
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
914a617b9f05c9d8
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3b95cf48bbd7dc53
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
b7ba5182ce570398
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
f0a5af4d8a8c7106
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
5f316276809d5ca0
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
e417d28fc1909ceb
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
aec88a641c5288e3
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
ddc0b590ff80762b
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":17886154901722686619,"profile":1369601567987815722,"path":8608102977929876445,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\heck-b47c94fd2a7e00cb\\dep-lib-heck","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
41890ebff4143fa5
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[\"alloc\", \"default\", \"std\"]","declared_features":"[\"alloc\", \"default\", \"serde\", \"std\"]","target":4242469766639956503,"profile":2040997289075261528,"path":6793865871540733919,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hex-253414d2260adcdf\\dep-lib-hex","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3f45b8d062d94ba4
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"reset\", \"std\"]","target":12991177224612424488,"profile":2040997289075261528,"path":17893893568771568113,"deps":[[17475753849556516473,"digest",false,15621022965039188625]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hmac-3297e61b9effb758\\dep-lib-hmac","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
9896adc8892b3fe4
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":8726396592336845528,"profile":1369601567987815722,"path":18304219166357541938,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\indoc-0c686c3f403a2566\\dep-lib-indoc","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}
|
||||||
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
This file has an mtime of when this was started.
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user