Separate Python and Rust into python/ and rust/ with per-stack Dockerfiles

This commit is contained in:
2026-04-19 14:01:05 +08:00
parent be8e030940
commit c2ef37b84e
184 changed files with 96 additions and 85 deletions

176
python/tests/conftest.py Normal file
View File

@@ -0,0 +1,176 @@
import json
import sys
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from urllib.parse import quote, urlparse
import hashlib
import hmac
import pytest
from werkzeug.serving import make_server
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from app import create_api_app
@pytest.fixture()
def app(tmp_path: Path):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy",
"create_bucket", "delete_bucket", "share", "versioning", "tagging",
"encryption", "cors", "lifecycle", "replication", "quota",
"object_lock", "notification", "logging", "website"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app(
{
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
}
)
yield flask_app
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
@pytest.fixture()
def client(app):
return app.test_client()
@pytest.fixture()
def live_server(app):
server = make_server("127.0.0.1", 0, app)
host, port = server.server_address
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
time.sleep(0.05)
try:
yield f"http://{host}:{port}"
finally:
server.shutdown()
thread.join(timeout=1)
def _sign(key, msg):
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
def _get_signature_key(key, date_stamp, region_name, service_name):
k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp)
k_region = _sign(k_date, region_name)
k_service = _sign(k_region, service_name)
k_signing = _sign(k_service, "aws4_request")
return k_signing
@pytest.fixture
def signer():
def _signer(
method,
path,
headers=None,
body=None,
access_key="test",
secret_key="secret",
region="us-east-1",
service="s3",
):
if headers is None:
headers = {}
now = datetime.now(timezone.utc)
amz_date = now.strftime("%Y%m%dT%H%M%SZ")
date_stamp = now.strftime("%Y%m%d")
headers["X-Amz-Date"] = amz_date
# Host header is required for SigV4
if "Host" not in headers:
headers["Host"] = "localhost" # Default for Flask test client
# Payload hash
if body is None:
body = b""
elif isinstance(body, str):
body = body.encode("utf-8")
payload_hash = hashlib.sha256(body).hexdigest()
headers["X-Amz-Content-Sha256"] = payload_hash
# Canonical Request
canonical_uri = quote(path.split("?")[0])
# Query string
parsed = urlparse(path)
query_args = []
if parsed.query:
for pair in parsed.query.split("&"):
if "=" in pair:
k, v = pair.split("=", 1)
else:
k, v = pair, ""
query_args.append((k, v))
query_args.sort(key=lambda x: (x[0], x[1]))
canonical_query_parts = []
for k, v in query_args:
canonical_query_parts.append(f"{quote(k, safe='')}={quote(v, safe='')}")
canonical_query_string = "&".join(canonical_query_parts)
# Canonical Headers
canonical_headers_parts = []
signed_headers_parts = []
for k, v in sorted(headers.items(), key=lambda x: x[0].lower()):
k_lower = k.lower()
v_trim = " ".join(str(v).split())
canonical_headers_parts.append(f"{k_lower}:{v_trim}\n")
signed_headers_parts.append(k_lower)
canonical_headers = "".join(canonical_headers_parts)
signed_headers = ";".join(signed_headers_parts)
canonical_request = (
f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{payload_hash}"
)
# String to Sign
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
string_to_sign = (
f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
)
# Signature
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
authorization = (
f"AWS4-HMAC-SHA256 Credential={access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}"
)
headers["Authorization"] = authorization
return headers
return _signer

View File

@@ -0,0 +1,339 @@
import io
import json
import time
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.access_logging import (
AccessLogEntry,
AccessLoggingService,
LoggingConfiguration,
)
from app.storage import ObjectStorage
class TestAccessLogEntry:
def test_default_values(self):
entry = AccessLogEntry()
assert entry.bucket_owner == "-"
assert entry.bucket == "-"
assert entry.remote_ip == "-"
assert entry.requester == "-"
assert entry.operation == "-"
assert entry.http_status == 200
assert len(entry.request_id) == 16
def test_to_log_line(self):
entry = AccessLogEntry(
bucket_owner="owner123",
bucket="my-bucket",
remote_ip="192.168.1.1",
requester="user456",
request_id="REQ123456789012",
operation="REST.PUT.OBJECT",
key="test/key.txt",
request_uri="PUT /my-bucket/test/key.txt HTTP/1.1",
http_status=200,
bytes_sent=1024,
object_size=2048,
total_time_ms=150,
referrer="http://example.com",
user_agent="aws-cli/2.0",
version_id="v1",
)
log_line = entry.to_log_line()
assert "owner123" in log_line
assert "my-bucket" in log_line
assert "192.168.1.1" in log_line
assert "user456" in log_line
assert "REST.PUT.OBJECT" in log_line
assert "test/key.txt" in log_line
assert "200" in log_line
def test_to_dict(self):
entry = AccessLogEntry(
bucket_owner="owner",
bucket="bucket",
remote_ip="10.0.0.1",
requester="admin",
request_id="ABC123",
operation="REST.GET.OBJECT",
key="file.txt",
request_uri="GET /bucket/file.txt HTTP/1.1",
http_status=200,
bytes_sent=512,
object_size=512,
total_time_ms=50,
)
result = entry.to_dict()
assert result["bucket_owner"] == "owner"
assert result["bucket"] == "bucket"
assert result["remote_ip"] == "10.0.0.1"
assert result["requester"] == "admin"
assert result["operation"] == "REST.GET.OBJECT"
assert result["key"] == "file.txt"
assert result["http_status"] == 200
assert result["bytes_sent"] == 512
class TestLoggingConfiguration:
def test_default_values(self):
config = LoggingConfiguration(target_bucket="log-bucket")
assert config.target_bucket == "log-bucket"
assert config.target_prefix == ""
assert config.enabled is True
def test_to_dict(self):
config = LoggingConfiguration(
target_bucket="logs",
target_prefix="access-logs/",
enabled=True,
)
result = config.to_dict()
assert "LoggingEnabled" in result
assert result["LoggingEnabled"]["TargetBucket"] == "logs"
assert result["LoggingEnabled"]["TargetPrefix"] == "access-logs/"
def test_from_dict(self):
data = {
"LoggingEnabled": {
"TargetBucket": "my-logs",
"TargetPrefix": "bucket-logs/",
}
}
config = LoggingConfiguration.from_dict(data)
assert config is not None
assert config.target_bucket == "my-logs"
assert config.target_prefix == "bucket-logs/"
assert config.enabled is True
def test_from_dict_no_logging(self):
data = {}
config = LoggingConfiguration.from_dict(data)
assert config is None
@pytest.fixture
def storage(tmp_path: Path):
storage_root = tmp_path / "data"
storage_root.mkdir(parents=True)
return ObjectStorage(storage_root)
@pytest.fixture
def logging_service(tmp_path: Path, storage):
service = AccessLoggingService(
tmp_path,
flush_interval=3600,
max_buffer_size=10,
)
service.set_storage(storage)
yield service
service.shutdown()
class TestAccessLoggingService:
def test_get_bucket_logging_not_configured(self, logging_service):
result = logging_service.get_bucket_logging("unconfigured-bucket")
assert result is None
def test_set_and_get_bucket_logging(self, logging_service):
config = LoggingConfiguration(
target_bucket="log-bucket",
target_prefix="logs/",
)
logging_service.set_bucket_logging("source-bucket", config)
retrieved = logging_service.get_bucket_logging("source-bucket")
assert retrieved is not None
assert retrieved.target_bucket == "log-bucket"
assert retrieved.target_prefix == "logs/"
def test_delete_bucket_logging(self, logging_service):
config = LoggingConfiguration(target_bucket="logs")
logging_service.set_bucket_logging("to-delete", config)
assert logging_service.get_bucket_logging("to-delete") is not None
logging_service.delete_bucket_logging("to-delete")
logging_service._configs.clear()
assert logging_service.get_bucket_logging("to-delete") is None
def test_log_request_no_config(self, logging_service):
logging_service.log_request(
"no-config-bucket",
operation="REST.GET.OBJECT",
key="test.txt",
)
stats = logging_service.get_stats()
assert stats["buffered_entries"] == 0
def test_log_request_with_config(self, logging_service, storage):
storage.create_bucket("log-target")
config = LoggingConfiguration(
target_bucket="log-target",
target_prefix="access/",
)
logging_service.set_bucket_logging("source-bucket", config)
logging_service.log_request(
"source-bucket",
operation="REST.PUT.OBJECT",
key="uploaded.txt",
remote_ip="192.168.1.100",
requester="test-user",
http_status=200,
bytes_sent=1024,
)
stats = logging_service.get_stats()
assert stats["buffered_entries"] == 1
def test_log_request_disabled_config(self, logging_service):
config = LoggingConfiguration(
target_bucket="logs",
enabled=False,
)
logging_service.set_bucket_logging("disabled-bucket", config)
logging_service.log_request(
"disabled-bucket",
operation="REST.GET.OBJECT",
key="test.txt",
)
stats = logging_service.get_stats()
assert stats["buffered_entries"] == 0
def test_flush_buffer(self, logging_service, storage):
storage.create_bucket("flush-target")
config = LoggingConfiguration(
target_bucket="flush-target",
target_prefix="logs/",
)
logging_service.set_bucket_logging("flush-source", config)
for i in range(3):
logging_service.log_request(
"flush-source",
operation="REST.GET.OBJECT",
key=f"file{i}.txt",
)
logging_service.flush()
objects = storage.list_objects_all("flush-target")
assert len(objects) >= 1
def test_auto_flush_on_buffer_size(self, logging_service, storage):
storage.create_bucket("auto-flush-target")
config = LoggingConfiguration(
target_bucket="auto-flush-target",
target_prefix="",
)
logging_service.set_bucket_logging("auto-source", config)
for i in range(15):
logging_service.log_request(
"auto-source",
operation="REST.GET.OBJECT",
key=f"file{i}.txt",
)
objects = storage.list_objects_all("auto-flush-target")
assert len(objects) >= 1
def test_get_stats(self, logging_service, storage):
storage.create_bucket("stats-target")
config = LoggingConfiguration(target_bucket="stats-target")
logging_service.set_bucket_logging("stats-bucket", config)
logging_service.log_request(
"stats-bucket",
operation="REST.GET.OBJECT",
key="test.txt",
)
stats = logging_service.get_stats()
assert "buffered_entries" in stats
assert "target_buckets" in stats
assert stats["buffered_entries"] >= 1
def test_shutdown_flushes_buffer(self, tmp_path, storage):
storage.create_bucket("shutdown-target")
service = AccessLoggingService(tmp_path, flush_interval=3600, max_buffer_size=100)
service.set_storage(storage)
config = LoggingConfiguration(target_bucket="shutdown-target")
service.set_bucket_logging("shutdown-source", config)
service.log_request(
"shutdown-source",
operation="REST.PUT.OBJECT",
key="final.txt",
)
service.shutdown()
objects = storage.list_objects_all("shutdown-target")
assert len(objects) >= 1
def test_logging_caching(self, logging_service):
config = LoggingConfiguration(target_bucket="cached-logs")
logging_service.set_bucket_logging("cached-bucket", config)
logging_service.get_bucket_logging("cached-bucket")
assert "cached-bucket" in logging_service._configs
def test_log_request_all_fields(self, logging_service, storage):
storage.create_bucket("detailed-target")
config = LoggingConfiguration(target_bucket="detailed-target", target_prefix="detailed/")
logging_service.set_bucket_logging("detailed-source", config)
logging_service.log_request(
"detailed-source",
operation="REST.PUT.OBJECT",
key="detailed/file.txt",
remote_ip="10.0.0.1",
requester="admin-user",
request_uri="PUT /detailed-source/detailed/file.txt HTTP/1.1",
http_status=201,
error_code="",
bytes_sent=2048,
object_size=2048,
total_time_ms=100,
referrer="http://admin.example.com",
user_agent="curl/7.68.0",
version_id="v1.0",
request_id="CUSTOM_REQ_ID",
)
stats = logging_service.get_stats()
assert stats["buffered_entries"] == 1
def test_failed_flush_returns_to_buffer(self, logging_service):
config = LoggingConfiguration(target_bucket="nonexistent-target")
logging_service.set_bucket_logging("fail-source", config)
logging_service.log_request(
"fail-source",
operation="REST.GET.OBJECT",
key="test.txt",
)
initial_count = logging_service.get_stats()["buffered_entries"]
logging_service.flush()
final_count = logging_service.get_stats()["buffered_entries"]
assert final_count >= initial_count

284
python/tests/test_acl.py Normal file
View File

@@ -0,0 +1,284 @@
import json
from pathlib import Path
import pytest
from app.acl import (
Acl,
AclGrant,
AclService,
ACL_PERMISSION_FULL_CONTROL,
ACL_PERMISSION_READ,
ACL_PERMISSION_WRITE,
ACL_PERMISSION_READ_ACP,
ACL_PERMISSION_WRITE_ACP,
GRANTEE_ALL_USERS,
GRANTEE_AUTHENTICATED_USERS,
PERMISSION_TO_ACTIONS,
create_canned_acl,
CANNED_ACLS,
)
class TestAclGrant:
def test_to_dict(self):
grant = AclGrant(grantee="user123", permission=ACL_PERMISSION_READ)
result = grant.to_dict()
assert result == {"grantee": "user123", "permission": "READ"}
def test_from_dict(self):
data = {"grantee": "admin", "permission": "FULL_CONTROL"}
grant = AclGrant.from_dict(data)
assert grant.grantee == "admin"
assert grant.permission == ACL_PERMISSION_FULL_CONTROL
class TestAcl:
def test_to_dict(self):
acl = Acl(
owner="owner-user",
grants=[
AclGrant(grantee="owner-user", permission=ACL_PERMISSION_FULL_CONTROL),
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
],
)
result = acl.to_dict()
assert result["owner"] == "owner-user"
assert len(result["grants"]) == 2
assert result["grants"][0]["grantee"] == "owner-user"
assert result["grants"][1]["grantee"] == "*"
def test_from_dict(self):
data = {
"owner": "the-owner",
"grants": [
{"grantee": "the-owner", "permission": "FULL_CONTROL"},
{"grantee": "authenticated", "permission": "READ"},
],
}
acl = Acl.from_dict(data)
assert acl.owner == "the-owner"
assert len(acl.grants) == 2
assert acl.grants[0].grantee == "the-owner"
assert acl.grants[1].grantee == GRANTEE_AUTHENTICATED_USERS
def test_from_dict_empty_grants(self):
data = {"owner": "solo-owner"}
acl = Acl.from_dict(data)
assert acl.owner == "solo-owner"
assert len(acl.grants) == 0
def test_get_allowed_actions_owner(self):
acl = Acl(owner="owner123", grants=[])
actions = acl.get_allowed_actions("owner123", is_authenticated=True)
assert actions == PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL]
def test_get_allowed_actions_all_users(self):
acl = Acl(
owner="owner",
grants=[AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ)],
)
actions = acl.get_allowed_actions(None, is_authenticated=False)
assert "read" in actions
assert "list" in actions
assert "write" not in actions
def test_get_allowed_actions_authenticated_users(self):
acl = Acl(
owner="owner",
grants=[AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_WRITE)],
)
actions_authenticated = acl.get_allowed_actions("some-user", is_authenticated=True)
assert "write" in actions_authenticated
assert "delete" in actions_authenticated
actions_anonymous = acl.get_allowed_actions(None, is_authenticated=False)
assert "write" not in actions_anonymous
def test_get_allowed_actions_specific_grantee(self):
acl = Acl(
owner="owner",
grants=[
AclGrant(grantee="user-abc", permission=ACL_PERMISSION_READ),
AclGrant(grantee="user-xyz", permission=ACL_PERMISSION_WRITE),
],
)
abc_actions = acl.get_allowed_actions("user-abc", is_authenticated=True)
assert "read" in abc_actions
assert "list" in abc_actions
assert "write" not in abc_actions
xyz_actions = acl.get_allowed_actions("user-xyz", is_authenticated=True)
assert "write" in xyz_actions
assert "read" not in xyz_actions
def test_get_allowed_actions_combined(self):
acl = Acl(
owner="owner",
grants=[
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
AclGrant(grantee="special-user", permission=ACL_PERMISSION_WRITE),
],
)
actions = acl.get_allowed_actions("special-user", is_authenticated=True)
assert "read" in actions
assert "list" in actions
assert "write" in actions
assert "delete" in actions
class TestCannedAcls:
def test_private_acl(self):
acl = create_canned_acl("private", "the-owner")
assert acl.owner == "the-owner"
assert len(acl.grants) == 1
assert acl.grants[0].grantee == "the-owner"
assert acl.grants[0].permission == ACL_PERMISSION_FULL_CONTROL
def test_public_read_acl(self):
acl = create_canned_acl("public-read", "owner")
assert acl.owner == "owner"
has_owner_full_control = any(
g.grantee == "owner" and g.permission == ACL_PERMISSION_FULL_CONTROL for g in acl.grants
)
has_public_read = any(
g.grantee == GRANTEE_ALL_USERS and g.permission == ACL_PERMISSION_READ for g in acl.grants
)
assert has_owner_full_control
assert has_public_read
def test_public_read_write_acl(self):
acl = create_canned_acl("public-read-write", "owner")
assert acl.owner == "owner"
has_public_read = any(
g.grantee == GRANTEE_ALL_USERS and g.permission == ACL_PERMISSION_READ for g in acl.grants
)
has_public_write = any(
g.grantee == GRANTEE_ALL_USERS and g.permission == ACL_PERMISSION_WRITE for g in acl.grants
)
assert has_public_read
assert has_public_write
def test_authenticated_read_acl(self):
acl = create_canned_acl("authenticated-read", "owner")
has_authenticated_read = any(
g.grantee == GRANTEE_AUTHENTICATED_USERS and g.permission == ACL_PERMISSION_READ for g in acl.grants
)
assert has_authenticated_read
def test_unknown_canned_acl_defaults_to_private(self):
acl = create_canned_acl("unknown-acl", "owner")
private_acl = create_canned_acl("private", "owner")
assert acl.to_dict() == private_acl.to_dict()
@pytest.fixture
def acl_service(tmp_path: Path):
return AclService(tmp_path)
class TestAclService:
def test_get_bucket_acl_not_exists(self, acl_service):
result = acl_service.get_bucket_acl("nonexistent-bucket")
assert result is None
def test_set_and_get_bucket_acl(self, acl_service):
acl = Acl(
owner="bucket-owner",
grants=[AclGrant(grantee="bucket-owner", permission=ACL_PERMISSION_FULL_CONTROL)],
)
acl_service.set_bucket_acl("my-bucket", acl)
retrieved = acl_service.get_bucket_acl("my-bucket")
assert retrieved is not None
assert retrieved.owner == "bucket-owner"
assert len(retrieved.grants) == 1
def test_bucket_acl_caching(self, acl_service):
acl = Acl(owner="cached-owner", grants=[])
acl_service.set_bucket_acl("cached-bucket", acl)
acl_service.get_bucket_acl("cached-bucket")
assert "cached-bucket" in acl_service._bucket_acl_cache
retrieved = acl_service.get_bucket_acl("cached-bucket")
assert retrieved.owner == "cached-owner"
def test_set_bucket_canned_acl(self, acl_service):
result = acl_service.set_bucket_canned_acl("new-bucket", "public-read", "the-owner")
assert result.owner == "the-owner"
retrieved = acl_service.get_bucket_acl("new-bucket")
assert retrieved is not None
has_public_read = any(
g.grantee == GRANTEE_ALL_USERS and g.permission == ACL_PERMISSION_READ for g in retrieved.grants
)
assert has_public_read
def test_delete_bucket_acl(self, acl_service):
acl = Acl(owner="to-delete-owner", grants=[])
acl_service.set_bucket_acl("delete-me", acl)
assert acl_service.get_bucket_acl("delete-me") is not None
acl_service.delete_bucket_acl("delete-me")
acl_service._bucket_acl_cache.clear()
assert acl_service.get_bucket_acl("delete-me") is None
def test_evaluate_bucket_acl_allowed(self, acl_service):
acl = Acl(
owner="owner",
grants=[AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ)],
)
acl_service.set_bucket_acl("public-bucket", acl)
result = acl_service.evaluate_bucket_acl("public-bucket", None, "read", is_authenticated=False)
assert result is True
def test_evaluate_bucket_acl_denied(self, acl_service):
acl = Acl(
owner="owner",
grants=[AclGrant(grantee="owner", permission=ACL_PERMISSION_FULL_CONTROL)],
)
acl_service.set_bucket_acl("private-bucket", acl)
result = acl_service.evaluate_bucket_acl("private-bucket", "other-user", "write", is_authenticated=True)
assert result is False
def test_evaluate_bucket_acl_no_acl(self, acl_service):
result = acl_service.evaluate_bucket_acl("no-acl-bucket", "anyone", "read")
assert result is False
def test_get_object_acl_from_metadata(self, acl_service):
metadata = {
"__acl__": {
"owner": "object-owner",
"grants": [{"grantee": "object-owner", "permission": "FULL_CONTROL"}],
}
}
result = acl_service.get_object_acl("bucket", "key", metadata)
assert result is not None
assert result.owner == "object-owner"
def test_get_object_acl_no_acl_in_metadata(self, acl_service):
metadata = {"Content-Type": "text/plain"}
result = acl_service.get_object_acl("bucket", "key", metadata)
assert result is None
def test_create_object_acl_metadata(self, acl_service):
acl = Acl(owner="obj-owner", grants=[])
result = acl_service.create_object_acl_metadata(acl)
assert "__acl__" in result
assert result["__acl__"]["owner"] == "obj-owner"
def test_evaluate_object_acl(self, acl_service):
metadata = {
"__acl__": {
"owner": "obj-owner",
"grants": [{"grantee": "*", "permission": "READ"}],
}
}
result = acl_service.evaluate_object_acl(metadata, None, "read", is_authenticated=False)
assert result is True
result = acl_service.evaluate_object_acl(metadata, None, "write", is_authenticated=False)
assert result is False

558
python/tests/test_api.py Normal file
View File

@@ -0,0 +1,558 @@
import hashlib
import hmac
from datetime import datetime, timezone
from urllib.parse import quote
def _build_presigned_query(path: str, *, access_key: str = "test", secret_key: str = "secret", expires: int = 60) -> str:
now = datetime.now(timezone.utc)
amz_date = now.strftime("%Y%m%dT%H%M%SZ")
date_stamp = now.strftime("%Y%m%d")
region = "us-east-1"
service = "s3"
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
query_items = [
("X-Amz-Algorithm", "AWS4-HMAC-SHA256"),
("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD"),
("X-Amz-Credential", f"{access_key}/{credential_scope}"),
("X-Amz-Date", amz_date),
("X-Amz-Expires", str(expires)),
("X-Amz-SignedHeaders", "host"),
]
canonical_query = "&".join(
f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}" for k, v in sorted(query_items)
)
canonical_request = "\n".join([
"GET",
quote(path, safe="/-_.~"),
canonical_query,
"host:localhost\n",
"host",
"UNSIGNED-PAYLOAD",
])
hashed_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
string_to_sign = "\n".join([
"AWS4-HMAC-SHA256",
amz_date,
credential_scope,
hashed_request,
])
def _sign(key: bytes, msg: str) -> bytes:
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
k_date = _sign(("AWS4" + secret_key).encode("utf-8"), date_stamp)
k_region = _sign(k_date, region)
k_service = _sign(k_region, service)
signing_key = _sign(k_service, "aws4_request")
signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
return canonical_query + f"&X-Amz-Signature={signature}"
def test_bucket_and_object_lifecycle(client, signer):
headers = signer("PUT", "/photos")
response = client.put("/photos", headers=headers)
assert response.status_code == 200
headers = signer("GET", "/")
response = client.get("/", headers=headers)
assert response.status_code == 200
assert b"photos" in response.data
data = b"hello world"
headers = signer("PUT", "/photos/image.txt", body=data)
response = client.put("/photos/image.txt", headers=headers, data=data)
assert response.status_code == 200
assert "ETag" in response.headers
headers = signer("GET", "/photos")
response = client.get("/photos", headers=headers)
assert response.status_code == 200
assert b"image.txt" in response.data
headers = signer("GET", "/photos/image.txt")
response = client.get("/photos/image.txt", headers=headers)
assert response.status_code == 200
assert response.data == b"hello world"
headers = signer("DELETE", "/photos/image.txt")
response = client.delete("/photos/image.txt", headers=headers)
assert response.status_code == 204
headers = signer("DELETE", "/photos")
response = client.delete("/photos", headers=headers)
assert response.status_code == 204
def test_bulk_delete_objects(client, signer):
headers = signer("PUT", "/bulk")
assert client.put("/bulk", headers=headers).status_code == 200
headers = signer("PUT", "/bulk/first.txt", body=b"first")
assert client.put("/bulk/first.txt", headers=headers, data=b"first").status_code == 200
headers = signer("PUT", "/bulk/second.txt", body=b"second")
assert client.put("/bulk/second.txt", headers=headers, data=b"second").status_code == 200
delete_xml = b"""
<Delete>
<Object><Key>first.txt</Key></Object>
<Object><Key>missing.txt</Key></Object>
</Delete>
"""
# Note: query_string is part of the path for signing
headers = signer("POST", "/bulk?delete", headers={"Content-Type": "application/xml"}, body=delete_xml)
response = client.post(
"/bulk",
headers=headers,
query_string={"delete": ""},
data=delete_xml,
)
assert response.status_code == 200
assert b"<DeleteResult>" in response.data
headers = signer("GET", "/bulk")
listing = client.get("/bulk", headers=headers)
assert b"first.txt" not in listing.data
assert b"missing.txt" not in listing.data
assert b"second.txt" in listing.data
def test_bulk_delete_rejects_version_ids(client, signer):
headers = signer("PUT", "/bulkv")
assert client.put("/bulkv", headers=headers).status_code == 200
headers = signer("PUT", "/bulkv/keep.txt", body=b"keep")
assert client.put("/bulkv/keep.txt", headers=headers, data=b"keep").status_code == 200
delete_xml = b"""
<Delete>
<Object><Key>keep.txt</Key><VersionId>123</VersionId></Object>
</Delete>
"""
headers = signer("POST", "/bulkv?delete", headers={"Content-Type": "application/xml"}, body=delete_xml)
response = client.post(
"/bulkv",
headers=headers,
query_string={"delete": ""},
data=delete_xml,
)
assert response.status_code == 200
assert b"InvalidRequest" in response.data
headers = signer("GET", "/bulkv")
listing = client.get("/bulkv", headers=headers)
assert b"keep.txt" in listing.data
def test_request_id_header_present(client, signer):
headers = signer("GET", "/")
response = client.get("/", headers=headers)
assert response.status_code == 200
assert response.headers.get("X-Request-ID")
def test_healthcheck_returns_status(client):
response = client.get("/myfsio/health")
data = response.get_json()
assert response.status_code == 200
assert data["status"] == "ok"
assert "version" not in data
def test_missing_credentials_denied(client):
response = client.get("/")
assert response.status_code == 403
def test_presigned_url_denied_for_disabled_user(client, signer):
headers = signer("PUT", "/secure")
assert client.put("/secure", headers=headers).status_code == 200
payload = b"hello"
headers = signer("PUT", "/secure/file.txt", body=payload)
assert client.put("/secure/file.txt", headers=headers, data=payload).status_code == 200
iam = client.application.extensions["iam"]
iam.disable_user("test")
query = _build_presigned_query("/secure/file.txt")
response = client.get(f"/secure/file.txt?{query}", headers={"Host": "localhost"})
assert response.status_code == 403
assert b"User account is disabled" in response.data
def test_presigned_url_denied_for_inactive_key(client, signer):
headers = signer("PUT", "/secure2")
assert client.put("/secure2", headers=headers).status_code == 200
payload = b"hello"
headers = signer("PUT", "/secure2/file.txt", body=payload)
assert client.put("/secure2/file.txt", headers=headers, data=payload).status_code == 200
iam = client.application.extensions["iam"]
for user in iam._raw_config.get("users", []):
for key_info in user.get("access_keys", []):
if key_info.get("access_key") == "test":
key_info["status"] = "inactive"
iam._save()
iam._load()
query = _build_presigned_query("/secure2/file.txt")
response = client.get(f"/secure2/file.txt?{query}", headers={"Host": "localhost"})
assert response.status_code == 403
assert b"Access key is inactive" in response.data
def test_bucket_policies_deny_reads(client, signer):
import json
headers = signer("PUT", "/docs")
assert client.put("/docs", headers=headers).status_code == 200
headers = signer("PUT", "/docs/readme.txt", body=b"content")
assert client.put("/docs/readme.txt", headers=headers, data=b"content").status_code == 200
headers = signer("GET", "/docs/readme.txt")
response = client.get("/docs/readme.txt", headers=headers)
assert response.status_code == 200
assert response.data == b"content"
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "DenyReads",
"Effect": "Deny",
"Principal": "*",
"Action": ["s3:GetObject"],
"Resource": ["arn:aws:s3:::docs/*"],
}
],
}
policy_bytes = json.dumps(policy).encode("utf-8")
headers = signer("PUT", "/docs?policy", headers={"Content-Type": "application/json"}, body=policy_bytes)
assert client.put("/docs?policy", headers=headers, json=policy).status_code == 204
headers = signer("GET", "/docs?policy")
fetched = client.get("/docs?policy", headers=headers)
assert fetched.status_code == 200
assert fetched.get_json()["Version"] == "2012-10-17"
headers = signer("GET", "/docs/readme.txt")
denied = client.get("/docs/readme.txt", headers=headers)
assert denied.status_code == 403
headers = signer("DELETE", "/docs?policy")
assert client.delete("/docs?policy", headers=headers).status_code == 204
headers = signer("DELETE", "/docs/readme.txt")
assert client.delete("/docs/readme.txt", headers=headers).status_code == 204
headers = signer("DELETE", "/docs")
assert client.delete("/docs", headers=headers).status_code == 204
def test_trailing_slash_returns_xml(client):
response = client.get("/ghost/")
assert response.status_code == 403
assert response.mimetype == "application/xml"
assert b"<Error>" in response.data
def test_public_policy_allows_anonymous_list_and_read(client, signer):
import json
headers = signer("PUT", "/public")
assert client.put("/public", headers=headers).status_code == 200
headers = signer("PUT", "/public/hello.txt", body=b"hi")
assert client.put("/public/hello.txt", headers=headers, data=b"hi").status_code == 200
assert client.get("/public").status_code == 403
assert client.get("/public/hello.txt").status_code == 403
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "AllowList",
"Effect": "Allow",
"Principal": "*",
"Action": ["s3:ListBucket"],
"Resource": ["arn:aws:s3:::public"],
},
{
"Sid": "AllowRead",
"Effect": "Allow",
"Principal": "*",
"Action": ["s3:GetObject"],
"Resource": ["arn:aws:s3:::public/*"],
},
],
}
policy_bytes = json.dumps(policy).encode("utf-8")
headers = signer("PUT", "/public?policy", headers={"Content-Type": "application/json"}, body=policy_bytes)
assert client.put("/public?policy", headers=headers, json=policy).status_code == 204
list_response = client.get("/public")
assert list_response.status_code == 200
assert b"hello.txt" in list_response.data
obj_response = client.get("/public/hello.txt")
assert obj_response.status_code == 200
assert obj_response.data == b"hi"
headers = signer("DELETE", "/public/hello.txt")
assert client.delete("/public/hello.txt", headers=headers).status_code == 204
headers = signer("DELETE", "/public?policy")
assert client.delete("/public?policy", headers=headers).status_code == 204
headers = signer("DELETE", "/public")
assert client.delete("/public", headers=headers).status_code == 204
def test_principal_dict_with_object_get_only(client, signer):
import json
headers = signer("PUT", "/mixed")
assert client.put("/mixed", headers=headers).status_code == 200
headers = signer("PUT", "/mixed/only.txt", body=b"ok")
assert client.put("/mixed/only.txt", headers=headers, data=b"ok").status_code == 200
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "AllowObjects",
"Effect": "Allow",
"Principal": {"AWS": ["*"]},
"Action": ["s3:GetObject"],
"Resource": ["arn:aws:s3:::mixed/*"],
},
{
"Sid": "DenyList",
"Effect": "Deny",
"Principal": "*",
"Action": ["s3:ListBucket"],
"Resource": ["arn:aws:s3:::mixed"],
},
],
}
policy_bytes = json.dumps(policy).encode("utf-8")
headers = signer("PUT", "/mixed?policy", headers={"Content-Type": "application/json"}, body=policy_bytes)
assert client.put("/mixed?policy", headers=headers, json=policy).status_code == 204
assert client.get("/mixed").status_code == 403
allowed = client.get("/mixed/only.txt")
assert allowed.status_code == 200
assert allowed.data == b"ok"
headers = signer("DELETE", "/mixed/only.txt")
assert client.delete("/mixed/only.txt", headers=headers).status_code == 204
headers = signer("DELETE", "/mixed?policy")
assert client.delete("/mixed?policy", headers=headers).status_code == 204
headers = signer("DELETE", "/mixed")
assert client.delete("/mixed", headers=headers).status_code == 204
def test_bucket_policy_wildcard_resource_allows_object_get(client, signer):
import json
headers = signer("PUT", "/test")
assert client.put("/test", headers=headers).status_code == 200
headers = signer("PUT", "/test/vid.mp4", body=b"video")
assert client.put("/test/vid.mp4", headers=headers, data=b"video").status_code == 200
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {"AWS": ["*"]},
"Action": ["s3:GetObject"],
"Resource": ["arn:aws:s3:::*/*"],
},
{
"Effect": "Deny",
"Principal": {"AWS": ["*"]},
"Action": ["s3:ListBucket"],
"Resource": ["arn:aws:s3:::*"],
},
],
}
policy_bytes = json.dumps(policy).encode("utf-8")
headers = signer("PUT", "/test?policy", headers={"Content-Type": "application/json"}, body=policy_bytes)
assert client.put("/test?policy", headers=headers, json=policy).status_code == 204
listing = client.get("/test")
assert listing.status_code == 403
payload = client.get("/test/vid.mp4")
assert payload.status_code == 200
assert payload.data == b"video"
headers = signer("DELETE", "/test/vid.mp4")
assert client.delete("/test/vid.mp4", headers=headers).status_code == 204
headers = signer("DELETE", "/test?policy")
assert client.delete("/test?policy", headers=headers).status_code == 204
headers = signer("DELETE", "/test")
assert client.delete("/test", headers=headers).status_code == 204
def test_head_object_returns_metadata(client, signer):
headers = signer("PUT", "/media")
assert client.put("/media", headers=headers).status_code == 200
payload = b"metadata"
upload_headers = {"X-Amz-Meta-Test": "demo"}
# Signer needs to know about custom headers
headers = signer("PUT", "/media/info.txt", headers=upload_headers, body=payload)
assert client.put("/media/info.txt", headers=headers, data=payload).status_code == 200
headers = signer("HEAD", "/media/info.txt")
head = client.head("/media/info.txt", headers=headers)
assert head.status_code == 200
assert head.data == b""
assert head.headers["Content-Length"] == str(len(payload))
assert head.headers["X-Amz-Meta-Test"] == "demo"
def test_bucket_versioning_endpoint(client, signer):
headers = signer("PUT", "/history")
assert client.put("/history", headers=headers).status_code == 200
headers = signer("GET", "/history?versioning")
response = client.get("/history", headers=headers, query_string={"versioning": ""})
assert response.status_code == 200
assert b"<Status>Suspended</Status>" in response.data
storage = client.application.extensions["object_storage"]
storage.set_bucket_versioning("history", True)
headers = signer("GET", "/history?versioning")
enabled = client.get("/history", headers=headers, query_string={"versioning": ""})
assert enabled.status_code == 200
assert b"<Status>Enabled</Status>" in enabled.data
def test_bucket_tagging_cors_and_encryption_round_trip(client, signer):
headers = signer("PUT", "/config")
assert client.put("/config", headers=headers).status_code == 200
headers = signer("GET", "/config?tagging")
missing_tags = client.get("/config", headers=headers, query_string={"tagging": ""})
assert missing_tags.status_code == 404
tagging_xml = b"""
<Tagging>
<TagSet>
<Tag><Key>env</Key><Value>dev</Value></Tag>
<Tag><Key>team</Key><Value>platform</Value></Tag>
</TagSet>
</Tagging>
"""
headers = signer("PUT", "/config?tagging", headers={"Content-Type": "application/xml"}, body=tagging_xml)
assert (
client.put(
"/config",
headers=headers,
query_string={"tagging": ""},
data=tagging_xml,
content_type="application/xml",
).status_code
== 204
)
headers = signer("GET", "/config?tagging")
tags = client.get("/config", headers=headers, query_string={"tagging": ""})
assert tags.status_code == 200
assert b"<Key>env</Key>" in tags.data
assert b"<Value>platform</Value>" in tags.data
headers = signer("GET", "/config?cors")
missing_cors = client.get("/config", headers=headers, query_string={"cors": ""})
assert missing_cors.status_code == 404
cors_xml = b"""
<CORSConfiguration>
<CORSRule>
<AllowedOrigin>*</AllowedOrigin>
<AllowedMethod>GET</AllowedMethod>
<AllowedHeader>*</AllowedHeader>
<ExposeHeader>X-Test</ExposeHeader>
<MaxAgeSeconds>600</MaxAgeSeconds>
</CORSRule>
</CORSConfiguration>
"""
headers = signer("PUT", "/config?cors", headers={"Content-Type": "application/xml"}, body=cors_xml)
assert (
client.put(
"/config",
headers=headers,
query_string={"cors": ""},
data=cors_xml,
content_type="application/xml",
).status_code
== 204
)
headers = signer("GET", "/config?cors")
cors = client.get("/config", headers=headers, query_string={"cors": ""})
assert cors.status_code == 200
assert b"<AllowedOrigin>*</AllowedOrigin>" in cors.data
assert b"<AllowedMethod>GET</AllowedMethod>" in cors.data
# Clearing CORS rules with an empty payload removes the configuration
headers = signer("PUT", "/config?cors", body=b"")
assert (
client.put(
"/config",
headers=headers,
query_string={"cors": ""},
data=b"",
).status_code
== 204
)
headers = signer("GET", "/config?cors")
cleared_cors = client.get("/config", headers=headers, query_string={"cors": ""})
assert cleared_cors.status_code == 404
headers = signer("GET", "/config?encryption")
missing_enc = client.get("/config", headers=headers, query_string={"encryption": ""})
assert missing_enc.status_code == 404
encryption_xml = b"""
<ServerSideEncryptionConfiguration>
<Rule>
<ApplyServerSideEncryptionByDefault>
<SSEAlgorithm>AES256</SSEAlgorithm>
</ApplyServerSideEncryptionByDefault>
</Rule>
</ServerSideEncryptionConfiguration>
"""
headers = signer("PUT", "/config?encryption", headers={"Content-Type": "application/xml"}, body=encryption_xml)
assert (
client.put(
"/config",
headers=headers,
query_string={"encryption": ""},
data=encryption_xml,
content_type="application/xml",
).status_code
== 204
)
headers = signer("GET", "/config?encryption")
encryption = client.get("/config", headers=headers, query_string={"encryption": ""})
assert encryption.status_code == 200
assert b"AES256" in encryption.data

View File

@@ -0,0 +1,88 @@
import io
import pytest
from xml.etree.ElementTree import fromstring
@pytest.fixture
def client(app):
return app.test_client()
@pytest.fixture
def auth_headers(app):
return {
"X-Access-Key": "test",
"X-Secret-Key": "secret"
}
def test_multipart_upload_flow(client, auth_headers):
# 1. Create bucket
client.put("/test-bucket", headers=auth_headers)
# 2. Initiate Multipart Upload
resp = client.post("/test-bucket/large-file.txt?uploads", headers=auth_headers)
assert resp.status_code == 200
root = fromstring(resp.data)
upload_id = root.find("UploadId").text
assert upload_id
# 3. Upload Part 1
resp = client.put(
f"/test-bucket/large-file.txt?partNumber=1&uploadId={upload_id}",
headers=auth_headers,
data=b"part1"
)
assert resp.status_code == 200
etag1 = resp.headers["ETag"]
assert etag1
# 4. Upload Part 2
resp = client.put(
f"/test-bucket/large-file.txt?partNumber=2&uploadId={upload_id}",
headers=auth_headers,
data=b"part2"
)
assert resp.status_code == 200
etag2 = resp.headers["ETag"]
assert etag2
# 5. Complete Multipart Upload
xml_body = f"""
<CompleteMultipartUpload>
<Part>
<PartNumber>1</PartNumber>
<ETag>{etag1}</ETag>
</Part>
<Part>
<PartNumber>2</PartNumber>
<ETag>{etag2}</ETag>
</Part>
</CompleteMultipartUpload>
"""
resp = client.post(
f"/test-bucket/large-file.txt?uploadId={upload_id}",
headers=auth_headers,
data=xml_body
)
assert resp.status_code == 200
root = fromstring(resp.data)
assert root.find("Key").text == "large-file.txt"
# 6. Verify object content
resp = client.get("/test-bucket/large-file.txt", headers=auth_headers)
assert resp.status_code == 200
assert resp.data == b"part1part2"
def test_abort_multipart_upload(client, auth_headers):
client.put("/abort-bucket", headers=auth_headers)
resp = client.post("/abort-bucket/file.txt?uploads", headers=auth_headers)
upload_id = fromstring(resp.data).find("UploadId").text
resp = client.delete(f"/abort-bucket/file.txt?uploadId={upload_id}", headers=auth_headers)
assert resp.status_code == 204
resp = client.put(
f"/abort-bucket/file.txt?partNumber=1&uploadId={upload_id}",
headers=auth_headers,
data=b"data"
)
assert resp.status_code == 404

View File

@@ -0,0 +1,46 @@
import uuid
import boto3
import pytest
from botocore.client import Config
@pytest.mark.integration
def test_boto3_basic_operations(live_server):
bucket_name = f"boto3-test-{uuid.uuid4().hex[:8]}"
object_key = "folder/hello.txt"
s3 = boto3.client(
"s3",
endpoint_url=live_server,
aws_access_key_id="test",
aws_secret_access_key="secret",
region_name="us-east-1",
use_ssl=False,
config=Config(
signature_version="s3v4",
retries={"max_attempts": 1},
s3={"addressing_style": "path"},
),
)
s3.create_bucket(Bucket=bucket_name)
try:
put_response = s3.put_object(Bucket=bucket_name, Key=object_key, Body=b"hello from boto3")
assert "ETag" in put_response
obj = s3.get_object(Bucket=bucket_name, Key=object_key)
assert obj["Body"].read() == b"hello from boto3"
listing = s3.list_objects_v2(Bucket=bucket_name)
contents = listing.get("Contents", [])
assert contents, "list_objects_v2 should return at least the object we uploaded"
keys = {entry["Key"] for entry in contents}
assert object_key in keys
s3.delete_object(Bucket=bucket_name, Key=object_key)
post_delete = s3.list_objects_v2(Bucket=bucket_name)
assert not post_delete.get("Contents"), "Object should be removed before deleting bucket"
finally:
s3.delete_bucket(Bucket=bucket_name)

View File

@@ -0,0 +1,28 @@
import uuid
import pytest
import boto3
from botocore.client import Config
@pytest.mark.integration
def test_boto3_multipart_upload(live_server):
bucket_name = f'mp-test-{uuid.uuid4().hex[:8]}'
object_key = 'large-file.bin'
s3 = boto3.client('s3', endpoint_url=live_server, aws_access_key_id='test', aws_secret_access_key='secret', region_name='us-east-1', use_ssl=False, config=Config(signature_version='s3v4', retries={'max_attempts': 1}, s3={'addressing_style': 'path'}))
s3.create_bucket(Bucket=bucket_name)
try:
response = s3.create_multipart_upload(Bucket=bucket_name, Key=object_key)
upload_id = response['UploadId']
parts = []
part1_data = b'A' * 1024
part2_data = b'B' * 1024
resp1 = s3.upload_part(Bucket=bucket_name, Key=object_key, PartNumber=1, UploadId=upload_id, Body=part1_data)
parts.append({'PartNumber': 1, 'ETag': resp1['ETag']})
resp2 = s3.upload_part(Bucket=bucket_name, Key=object_key, PartNumber=2, UploadId=upload_id, Body=part2_data)
parts.append({'PartNumber': 2, 'ETag': resp2['ETag']})
s3.complete_multipart_upload(Bucket=bucket_name, Key=object_key, UploadId=upload_id, MultipartUpload={'Parts': parts})
obj = s3.get_object(Bucket=bucket_name, Key=object_key)
content = obj['Body'].read()
assert content == part1_data + part2_data
s3.delete_object(Bucket=bucket_name, Key=object_key)
finally:
s3.delete_bucket(Bucket=bucket_name)

View File

@@ -0,0 +1,156 @@
import hashlib
import time
import pytest
@pytest.fixture()
def bucket(client, signer):
headers = signer("PUT", "/cond-test")
client.put("/cond-test", headers=headers)
return "cond-test"
@pytest.fixture()
def uploaded(client, signer, bucket):
body = b"hello conditional"
etag = hashlib.md5(body).hexdigest()
headers = signer("PUT", f"/{bucket}/obj.txt", body=body)
resp = client.put(f"/{bucket}/obj.txt", headers=headers, data=body)
last_modified = resp.headers.get("Last-Modified")
return {"etag": etag, "last_modified": last_modified}
class TestIfMatch:
def test_get_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_get_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_non_matching_etag(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_wildcard_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_one_matches(self, client, signer, bucket, uploaded):
etag_list = f'"bad1", "{uploaded["etag"]}", "bad2"'
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": etag_list})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_multiple_etags_none_match(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Match": '"bad1", "bad2"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
class TestIfNoneMatch:
def test_get_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert uploaded["etag"] in resp.headers.get("ETag", "")
def test_get_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_matching_etag_returns_304(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": f'"{uploaded["etag"]}"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_head_non_matching_etag_returns_200(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-None-Match": '"wrongetag"'})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_wildcard_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-None-Match": "*"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
class TestIfModifiedSince:
def test_not_modified_returns_304(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
assert "ETag" in resp.headers
def test_modified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_head_not_modified(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 304
def test_if_none_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-None-Match": '"wrongetag"',
"If-Modified-Since": "Sun, 01 Jan 2034 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestIfUnmodifiedSince:
def test_unmodified_returns_200(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2034 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
def test_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_head_modified_returns_412(self, client, signer, bucket, uploaded):
headers = signer("HEAD", f"/{bucket}/obj.txt", headers={"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT"})
resp = client.head(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412
def test_if_match_takes_precedence(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"If-Unmodified-Since": "Sun, 01 Jan 2000 00:00:00 GMT",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 200
class TestConditionalWithRange:
def test_if_match_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": f'"{uploaded["etag"]}"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 206
def test_if_match_fails_with_range(self, client, signer, bucket, uploaded):
headers = signer("GET", f"/{bucket}/obj.txt", headers={
"If-Match": '"wrongetag"',
"Range": "bytes=0-4",
})
resp = client.get(f"/{bucket}/obj.txt", headers=headers)
assert resp.status_code == 412

View File

@@ -0,0 +1,69 @@
import io
import pytest
from pathlib import Path
from app.storage import ObjectStorage, StorageError
def test_concurrent_bucket_deletion(tmp_path: Path):
# This is a simplified test since true concurrency is hard to simulate deterministically in this setup
# We verify that deleting a non-existent bucket raises StorageError
storage = ObjectStorage(tmp_path)
storage.create_bucket("race")
storage.delete_bucket("race")
with pytest.raises(StorageError, match="Bucket does not exist"):
storage.delete_bucket("race")
def test_maximum_object_key_length(tmp_path: Path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("maxkey")
# AWS S3 max key length is 1024 bytes (UTF-8)
# Our implementation relies on the filesystem, so we might hit OS limits before 1024
# But let's test a reasonably long key that should work
long_key = "a" * 200
storage.put_object("maxkey", long_key, io.BytesIO(b"data"))
assert storage.get_object_path("maxkey", long_key).exists()
def test_unicode_bucket_and_object_names(tmp_path: Path):
storage = ObjectStorage(tmp_path)
# Bucket names must be lowercase, numbers, hyphens, periods
# So unicode in bucket names is NOT allowed by our validation
with pytest.raises(StorageError):
storage.create_bucket("café")
storage.create_bucket("unicode-test")
# Unicode in object keys IS allowed
key = "café/image.jpg"
storage.put_object("unicode-test", key, io.BytesIO(b"data"))
assert storage.get_object_path("unicode-test", key).exists()
# Verify listing
objects = storage.list_objects_all("unicode-test")
assert any(o.key == key for o in objects)
def test_special_characters_in_metadata(tmp_path: Path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("meta-test")
metadata = {"key": "value with spaces", "special": "!@#$%^&*()"}
storage.put_object("meta-test", "obj", io.BytesIO(b"data"), metadata=metadata)
meta = storage.get_object_metadata("meta-test", "obj")
assert meta["key"] == "value with spaces"
assert meta["special"] == "!@#$%^&*()"
def test_disk_full_scenario(tmp_path: Path, monkeypatch):
import app.storage as _storage_mod
monkeypatch.setattr(_storage_mod, "_HAS_RUST", False)
storage = ObjectStorage(tmp_path)
storage.create_bucket("full")
def mock_copyfileobj(*args, **kwargs):
raise OSError(28, "No space left on device")
import shutil
monkeypatch.setattr(shutil, "copyfileobj", mock_copyfileobj)
with pytest.raises(OSError, match="No space left on device"):
storage.put_object("full", "file", io.BytesIO(b"data"))

View File

@@ -0,0 +1,726 @@
"""Tests for encryption functionality."""
from __future__ import annotations
import base64
import io
import json
import os
import secrets
import tempfile
from pathlib import Path
import pytest
class TestLocalKeyEncryption:
"""Tests for LocalKeyEncryption provider."""
def test_create_master_key(self, tmp_path):
"""Test that master key is created if it doesn't exist."""
from app.encryption import LocalKeyEncryption
key_path = tmp_path / "keys" / "master.key"
provider = LocalKeyEncryption(key_path)
key = provider.master_key
assert key_path.exists()
assert len(key) == 32
def test_load_existing_master_key(self, tmp_path):
"""Test loading an existing master key."""
from app.encryption import LocalKeyEncryption
key_path = tmp_path / "master.key"
original_key = secrets.token_bytes(32)
key_path.write_text(base64.b64encode(original_key).decode())
provider = LocalKeyEncryption(key_path)
loaded_key = provider.master_key
assert loaded_key == original_key
def test_encrypt_decrypt_roundtrip(self, tmp_path):
"""Test that data can be encrypted and decrypted correctly."""
from app.encryption import LocalKeyEncryption
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
plaintext = b"Hello, World! This is a test message."
result = provider.encrypt(plaintext)
assert result.ciphertext != plaintext
assert result.key_id == "local"
assert len(result.nonce) == 12
assert len(result.encrypted_data_key) > 0
decrypted = provider.decrypt(
result.ciphertext,
result.nonce,
result.encrypted_data_key,
result.key_id,
)
assert decrypted == plaintext
def test_different_data_keys_per_encryption(self, tmp_path):
"""Test that each encryption uses a different data key."""
from app.encryption import LocalKeyEncryption
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
plaintext = b"Same message"
result1 = provider.encrypt(plaintext)
result2 = provider.encrypt(plaintext)
assert result1.encrypted_data_key != result2.encrypted_data_key
assert result1.nonce != result2.nonce
assert result1.ciphertext != result2.ciphertext
def test_generate_data_key(self, tmp_path):
"""Test data key generation."""
from app.encryption import LocalKeyEncryption
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
plaintext_key, encrypted_key = provider.generate_data_key()
assert len(plaintext_key) == 32
assert len(encrypted_key) > 32
decrypted_key = provider._decrypt_data_key(encrypted_key)
assert decrypted_key == plaintext_key
def test_decrypt_with_wrong_key_fails(self, tmp_path):
"""Test that decryption fails with wrong master key."""
from app.encryption import LocalKeyEncryption, EncryptionError
key_path1 = tmp_path / "master1.key"
key_path2 = tmp_path / "master2.key"
provider1 = LocalKeyEncryption(key_path1)
provider2 = LocalKeyEncryption(key_path2)
plaintext = b"Secret message"
result = provider1.encrypt(plaintext)
with pytest.raises(EncryptionError):
provider2.decrypt(
result.ciphertext,
result.nonce,
result.encrypted_data_key,
result.key_id,
)
class TestEncryptionMetadata:
"""Tests for EncryptionMetadata class."""
def test_to_dict(self):
"""Test converting metadata to dictionary."""
from app.encryption import EncryptionMetadata
nonce = secrets.token_bytes(12)
encrypted_key = secrets.token_bytes(60)
metadata = EncryptionMetadata(
algorithm="AES256",
key_id="local",
nonce=nonce,
encrypted_data_key=encrypted_key,
)
result = metadata.to_dict()
assert result["x-amz-server-side-encryption"] == "AES256"
assert result["x-amz-encryption-key-id"] == "local"
assert base64.b64decode(result["x-amz-encryption-nonce"]) == nonce
assert base64.b64decode(result["x-amz-encrypted-data-key"]) == encrypted_key
def test_from_dict(self):
"""Test creating metadata from dictionary."""
from app.encryption import EncryptionMetadata
nonce = secrets.token_bytes(12)
encrypted_key = secrets.token_bytes(60)
data = {
"x-amz-server-side-encryption": "AES256",
"x-amz-encryption-key-id": "local",
"x-amz-encryption-nonce": base64.b64encode(nonce).decode(),
"x-amz-encrypted-data-key": base64.b64encode(encrypted_key).decode(),
}
metadata = EncryptionMetadata.from_dict(data)
assert metadata is not None
assert metadata.algorithm == "AES256"
assert metadata.key_id == "local"
assert metadata.nonce == nonce
assert metadata.encrypted_data_key == encrypted_key
def test_from_dict_returns_none_for_unencrypted(self):
"""Test that from_dict returns None for unencrypted objects."""
from app.encryption import EncryptionMetadata
data = {"some-other-key": "value"}
metadata = EncryptionMetadata.from_dict(data)
assert metadata is None
class TestStreamingEncryptor:
"""Tests for streaming encryption."""
def test_encrypt_decrypt_stream(self, tmp_path):
"""Test streaming encryption and decryption."""
from app.encryption import LocalKeyEncryption, StreamingEncryptor
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
encryptor = StreamingEncryptor(provider, chunk_size=1024)
original_data = b"A" * 5000 + b"B" * 5000 + b"C" * 5000
stream = io.BytesIO(original_data)
encrypted_stream, metadata = encryptor.encrypt_stream(stream)
encrypted_data = encrypted_stream.read()
assert encrypted_data != original_data
assert metadata.algorithm == "AES256"
encrypted_stream = io.BytesIO(encrypted_data)
decrypted_stream = encryptor.decrypt_stream(encrypted_stream, metadata)
decrypted_data = decrypted_stream.read()
assert decrypted_data == original_data
def test_encrypt_small_data(self, tmp_path):
"""Test encrypting data smaller than chunk size."""
from app.encryption import LocalKeyEncryption, StreamingEncryptor
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
encryptor = StreamingEncryptor(provider, chunk_size=1024)
original_data = b"Small data"
stream = io.BytesIO(original_data)
encrypted_stream, metadata = encryptor.encrypt_stream(stream)
encrypted_stream.seek(0)
decrypted_stream = encryptor.decrypt_stream(encrypted_stream, metadata)
decrypted_data = decrypted_stream.read()
assert decrypted_data == original_data
def test_encrypt_empty_data(self, tmp_path):
"""Test encrypting empty data."""
from app.encryption import LocalKeyEncryption, StreamingEncryptor
key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(key_path)
encryptor = StreamingEncryptor(provider)
stream = io.BytesIO(b"")
encrypted_stream, metadata = encryptor.encrypt_stream(stream)
encrypted_stream.seek(0)
decrypted_stream = encryptor.decrypt_stream(encrypted_stream, metadata)
decrypted_data = decrypted_stream.read()
assert decrypted_data == b""
class TestEncryptionManager:
"""Tests for EncryptionManager."""
def test_encryption_disabled_by_default(self, tmp_path):
"""Test that encryption is disabled by default."""
from app.encryption import EncryptionManager
config = {
"encryption_enabled": False,
"encryption_master_key_path": str(tmp_path / "master.key"),
}
manager = EncryptionManager(config)
assert not manager.enabled
def test_encryption_enabled(self, tmp_path):
"""Test enabling encryption."""
from app.encryption import EncryptionManager
config = {
"encryption_enabled": True,
"encryption_master_key_path": str(tmp_path / "master.key"),
"default_encryption_algorithm": "AES256",
}
manager = EncryptionManager(config)
assert manager.enabled
assert manager.default_algorithm == "AES256"
def test_encrypt_decrypt_object(self, tmp_path):
"""Test encrypting and decrypting an object."""
from app.encryption import EncryptionManager
config = {
"encryption_enabled": True,
"encryption_master_key_path": str(tmp_path / "master.key"),
}
manager = EncryptionManager(config)
plaintext = b"Object data to encrypt"
ciphertext, metadata = manager.encrypt_object(plaintext)
assert ciphertext != plaintext
assert metadata.algorithm == "AES256"
decrypted = manager.decrypt_object(ciphertext, metadata)
assert decrypted == plaintext
class TestClientEncryptionHelper:
"""Tests for client-side encryption helpers."""
def test_generate_client_key(self):
"""Test generating a client encryption key."""
from app.encryption import ClientEncryptionHelper
key_info = ClientEncryptionHelper.generate_client_key()
assert "key" in key_info
assert key_info["algorithm"] == "AES-256-GCM"
assert "created_at" in key_info
key = base64.b64decode(key_info["key"])
assert len(key) == 32
def test_encrypt_with_key(self):
"""Test encrypting data with a client key."""
from app.encryption import ClientEncryptionHelper
key = base64.b64encode(secrets.token_bytes(32)).decode()
plaintext = b"Client-side encrypted data"
result = ClientEncryptionHelper.encrypt_with_key(plaintext, key)
assert "ciphertext" in result
assert "nonce" in result
assert result["algorithm"] == "AES-256-GCM"
def test_encrypt_decrypt_with_key(self):
"""Test round-trip client-side encryption."""
from app.encryption import ClientEncryptionHelper
key = base64.b64encode(secrets.token_bytes(32)).decode()
plaintext = b"Client-side encrypted data"
encrypted = ClientEncryptionHelper.encrypt_with_key(plaintext, key)
decrypted = ClientEncryptionHelper.decrypt_with_key(
encrypted["ciphertext"],
encrypted["nonce"],
key,
)
assert decrypted == plaintext
def test_wrong_key_fails(self):
"""Test that decryption with wrong key fails."""
from app.encryption import ClientEncryptionHelper, EncryptionError
key1 = base64.b64encode(secrets.token_bytes(32)).decode()
key2 = base64.b64encode(secrets.token_bytes(32)).decode()
plaintext = b"Secret data"
encrypted = ClientEncryptionHelper.encrypt_with_key(plaintext, key1)
with pytest.raises(EncryptionError):
ClientEncryptionHelper.decrypt_with_key(
encrypted["ciphertext"],
encrypted["nonce"],
key2,
)
class TestKMSManager:
"""Tests for KMS key management."""
def test_create_key(self, tmp_path):
"""Test creating a KMS key."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
key = kms.create_key("Test key", key_id="test-key-1")
assert key.key_id == "test-key-1"
assert key.description == "Test key"
assert key.enabled
assert keys_path.exists()
def test_list_keys(self, tmp_path):
"""Test listing KMS keys."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Key 1", key_id="key-1")
kms.create_key("Key 2", key_id="key-2")
keys = kms.list_keys()
assert len(keys) == 2
key_ids = {k.key_id for k in keys}
assert "key-1" in key_ids
assert "key-2" in key_ids
def test_get_key(self, tmp_path):
"""Test getting a specific key."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
key = kms.get_key("test-key")
assert key is not None
assert key.key_id == "test-key"
assert kms.get_key("non-existent") is None
def test_enable_disable_key(self, tmp_path):
"""Test enabling and disabling keys."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
assert kms.get_key("test-key").enabled
kms.disable_key("test-key")
assert not kms.get_key("test-key").enabled
kms.enable_key("test-key")
assert kms.get_key("test-key").enabled
def test_delete_key(self, tmp_path):
"""Test deleting a key."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
assert kms.get_key("test-key") is not None
kms.delete_key("test-key")
assert kms.get_key("test-key") is None
def test_encrypt_decrypt(self, tmp_path):
"""Test KMS encrypt and decrypt."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
key = kms.create_key("Test key", key_id="test-key")
plaintext = b"Secret data to encrypt"
ciphertext = kms.encrypt("test-key", plaintext)
assert ciphertext != plaintext
decrypted, key_id = kms.decrypt(ciphertext)
assert decrypted == plaintext
assert key_id == "test-key"
def test_encrypt_with_context(self, tmp_path):
"""Test encryption with encryption context."""
from app.kms import KMSManager, EncryptionError
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
plaintext = b"Secret data"
context = {"bucket": "test-bucket", "key": "test-key"}
ciphertext = kms.encrypt("test-key", plaintext, context)
decrypted, _ = kms.decrypt(ciphertext, context)
assert decrypted == plaintext
with pytest.raises(EncryptionError):
kms.decrypt(ciphertext, {"different": "context"})
def test_generate_data_key(self, tmp_path):
"""Test generating a data key."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
plaintext_key, encrypted_key = kms.generate_data_key("test-key")
assert len(plaintext_key) == 32
assert len(encrypted_key) > 0
decrypted_key = kms.decrypt_data_key("test-key", encrypted_key)
assert decrypted_key == plaintext_key
def test_disabled_key_cannot_encrypt(self, tmp_path):
"""Test that disabled keys cannot be used for encryption."""
from app.kms import KMSManager, EncryptionError
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
kms.disable_key("test-key")
with pytest.raises(EncryptionError, match="disabled"):
kms.encrypt("test-key", b"data")
def test_re_encrypt(self, tmp_path):
"""Test re-encrypting data with a different key."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Key 1", key_id="key-1")
kms.create_key("Key 2", key_id="key-2")
plaintext = b"Data to re-encrypt"
ciphertext1 = kms.encrypt("key-1", plaintext)
ciphertext2 = kms.re_encrypt(ciphertext1, "key-2")
decrypted, key_id = kms.decrypt(ciphertext2)
assert decrypted == plaintext
assert key_id == "key-2"
def test_generate_random(self, tmp_path):
"""Test generating random bytes."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
random1 = kms.generate_random(32)
random2 = kms.generate_random(32)
assert len(random1) == 32
assert len(random2) == 32
assert random1 != random2
def test_keys_persist_across_instances(self, tmp_path):
"""Test that keys persist and can be loaded by new instances."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms1 = KMSManager(keys_path, master_key_path)
kms1.create_key("Test key", key_id="test-key")
plaintext = b"Persistent encryption test"
ciphertext = kms1.encrypt("test-key", plaintext)
kms2 = KMSManager(keys_path, master_key_path)
decrypted, key_id = kms2.decrypt(ciphertext)
assert decrypted == plaintext
assert key_id == "test-key"
class TestKMSEncryptionProvider:
"""Tests for KMS encryption provider."""
def test_kms_encryption_provider(self, tmp_path):
"""Test using KMS as an encryption provider."""
from app.kms import KMSManager
keys_path = tmp_path / "kms_keys.json"
master_key_path = tmp_path / "master.key"
kms = KMSManager(keys_path, master_key_path)
kms.create_key("Test key", key_id="test-key")
provider = kms.get_provider("test-key")
plaintext = b"Data encrypted with KMS provider"
result = provider.encrypt(plaintext)
assert result.key_id == "test-key"
assert result.ciphertext != plaintext
decrypted = provider.decrypt(
result.ciphertext,
result.nonce,
result.encrypted_data_key,
result.key_id,
)
assert decrypted == plaintext
class TestEncryptedStorage:
"""Tests for encrypted storage layer."""
def test_put_and_get_encrypted_object(self, tmp_path):
"""Test storing and retrieving an encrypted object."""
from app.storage import ObjectStorage
from app.encryption import EncryptionManager
from app.encrypted_storage import EncryptedObjectStorage
storage_root = tmp_path / "storage"
storage = ObjectStorage(storage_root)
config = {
"encryption_enabled": True,
"encryption_master_key_path": str(tmp_path / "master.key"),
"default_encryption_algorithm": "AES256",
}
encryption = EncryptionManager(config)
encrypted_storage = EncryptedObjectStorage(storage, encryption)
storage.create_bucket("test-bucket")
storage.set_bucket_encryption("test-bucket", {
"Rules": [{"SSEAlgorithm": "AES256"}]
})
original_data = b"This is secret data that should be encrypted"
stream = io.BytesIO(original_data)
meta = encrypted_storage.put_object(
"test-bucket",
"secret.txt",
stream,
)
assert meta is not None
file_path = storage_root / "test-bucket" / "secret.txt"
stored_data = file_path.read_bytes()
assert stored_data != original_data
data, metadata = encrypted_storage.get_object_data("test-bucket", "secret.txt")
assert data == original_data
def test_no_encryption_without_config(self, tmp_path):
"""Test that objects are not encrypted without bucket config."""
from app.storage import ObjectStorage
from app.encryption import EncryptionManager
from app.encrypted_storage import EncryptedObjectStorage
storage_root = tmp_path / "storage"
storage = ObjectStorage(storage_root)
config = {
"encryption_enabled": True,
"encryption_master_key_path": str(tmp_path / "master.key"),
}
encryption = EncryptionManager(config)
encrypted_storage = EncryptedObjectStorage(storage, encryption)
storage.create_bucket("test-bucket")
original_data = b"Unencrypted data"
stream = io.BytesIO(original_data)
encrypted_storage.put_object("test-bucket", "plain.txt", stream)
file_path = storage_root / "test-bucket" / "plain.txt"
stored_data = file_path.read_bytes()
assert stored_data == original_data
def test_explicit_encryption_request(self, tmp_path):
"""Test explicitly requesting encryption."""
from app.storage import ObjectStorage
from app.encryption import EncryptionManager
from app.encrypted_storage import EncryptedObjectStorage
storage_root = tmp_path / "storage"
storage = ObjectStorage(storage_root)
config = {
"encryption_enabled": True,
"encryption_master_key_path": str(tmp_path / "master.key"),
}
encryption = EncryptionManager(config)
encrypted_storage = EncryptedObjectStorage(storage, encryption)
storage.create_bucket("test-bucket")
original_data = b"Explicitly encrypted data"
stream = io.BytesIO(original_data)
encrypted_storage.put_object(
"test-bucket",
"encrypted.txt",
stream,
server_side_encryption="AES256",
)
file_path = storage_root / "test-bucket" / "encrypted.txt"
stored_data = file_path.read_bytes()
assert stored_data != original_data
data, _ = encrypted_storage.get_object_data("test-bucket", "encrypted.txt")
assert data == original_data

356
python/tests/test_gc.py Normal file
View File

@@ -0,0 +1,356 @@
import json
import os
import time
from pathlib import Path
import pytest
from app.gc import GarbageCollector, GCResult
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
sys_root = root / ".myfsio.sys"
sys_root.mkdir()
(sys_root / "config").mkdir(parents=True)
(sys_root / "tmp").mkdir()
(sys_root / "multipart").mkdir()
(sys_root / "buckets").mkdir()
return root
@pytest.fixture
def gc(storage_root):
return GarbageCollector(
storage_root=storage_root,
interval_hours=1.0,
temp_file_max_age_hours=1.0,
multipart_max_age_days=1,
lock_file_max_age_hours=0.5,
dry_run=False,
)
def _make_old(path, hours=48):
old_time = time.time() - hours * 3600
os.utime(path, (old_time, old_time))
class TestTempFileCleanup:
def test_old_temp_files_deleted(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "abc123.tmp"
old_file.write_bytes(b"x" * 1000)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 1000
assert not old_file.exists()
def test_recent_temp_files_kept(self, storage_root, gc):
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
new_file = tmp_dir / "recent.tmp"
new_file.write_bytes(b"data")
result = gc.run_now()
assert result.temp_files_deleted == 0
assert new_file.exists()
def test_dry_run_keeps_files(self, storage_root, gc):
gc.dry_run = True
tmp_dir = storage_root / ".myfsio.sys" / "tmp"
old_file = tmp_dir / "stale.tmp"
old_file.write_bytes(b"x" * 500)
_make_old(old_file, hours=48)
result = gc.run_now()
assert result.temp_files_deleted == 1
assert result.temp_bytes_freed == 500
assert old_file.exists()
class TestMultipartCleanup:
def test_old_orphaned_multipart_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-123"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-123", "object_key": "foo.txt"}))
part = upload_dir / "part-00001.part"
part.write_bytes(b"x" * 2000)
_make_old(manifest, hours=200)
_make_old(part, hours=200)
_make_old(upload_dir, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
assert result.multipart_bytes_freed > 0
assert not upload_dir.exists()
def test_recent_multipart_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
mp_root = storage_root / ".myfsio.sys" / "multipart" / "test-bucket"
mp_root.mkdir(parents=True)
upload_dir = mp_root / "upload-new"
upload_dir.mkdir()
manifest = upload_dir / "manifest.json"
manifest.write_text(json.dumps({"upload_id": "upload-new", "object_key": "bar.txt"}))
result = gc.run_now()
assert result.multipart_uploads_deleted == 0
assert upload_dir.exists()
def test_legacy_multipart_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
legacy_mp = bucket / ".multipart" / "upload-old"
legacy_mp.mkdir(parents=True)
part = legacy_mp / "part-00001.part"
part.write_bytes(b"y" * 500)
_make_old(part, hours=200)
_make_old(legacy_mp, hours=200)
result = gc.run_now()
assert result.multipart_uploads_deleted == 1
class TestLockFileCleanup:
def test_stale_lock_files_deleted(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "some_key.lock"
lock.write_text("")
_make_old(lock, hours=2)
result = gc.run_now()
assert result.lock_files_deleted == 1
assert not lock.exists()
def test_recent_lock_kept(self, storage_root, gc):
locks_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks"
locks_dir.mkdir(parents=True)
lock = locks_dir / "active.lock"
lock.write_text("")
result = gc.run_now()
assert result.lock_files_deleted == 0
assert lock.exists()
class TestOrphanedMetadataCleanup:
def test_legacy_orphaned_metadata_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
meta_dir = bucket / ".meta"
meta_dir.mkdir()
orphan = meta_dir / "deleted_file.txt.meta.json"
orphan.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
assert not orphan.exists()
def test_valid_metadata_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "exists.txt"
obj.write_text("hello")
meta_dir = bucket / ".meta"
meta_dir.mkdir()
meta = meta_dir / "exists.txt.meta.json"
meta.write_text(json.dumps({"etag": "abc"}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 0
assert meta.exists()
def test_index_orphaned_entries_cleaned(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "keep.txt"
obj.write_text("hello")
meta_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "meta"
meta_dir.mkdir(parents=True)
index = meta_dir / "_index.json"
index.write_text(json.dumps({"keep.txt": {"etag": "a"}, "gone.txt": {"etag": "b"}}))
result = gc.run_now()
assert result.orphaned_metadata_deleted == 1
updated = json.loads(index.read_text())
assert "keep.txt" in updated
assert "gone.txt" not in updated
class TestOrphanedVersionsCleanup:
def test_orphaned_versions_deleted(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "deleted_obj.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_json = versions_dir / "v1.json"
v_bin.write_bytes(b"old data" * 100)
v_json.write_text(json.dumps({"version_id": "v1", "size": 800}))
result = gc.run_now()
assert result.orphaned_versions_deleted == 2
assert result.orphaned_version_bytes_freed == 800
def test_active_versions_kept(self, storage_root, gc):
bucket = storage_root / "test-bucket"
bucket.mkdir()
obj = bucket / "active.txt"
obj.write_text("current")
versions_dir = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "versions" / "active.txt"
versions_dir.mkdir(parents=True)
v_bin = versions_dir / "v1.bin"
v_bin.write_bytes(b"old version")
result = gc.run_now()
assert result.orphaned_versions_deleted == 0
assert v_bin.exists()
class TestEmptyDirCleanup:
def test_empty_dirs_removed(self, storage_root, gc):
empty = storage_root / ".myfsio.sys" / "buckets" / "test-bucket" / "locks" / "sub"
empty.mkdir(parents=True)
result = gc.run_now()
assert result.empty_dirs_removed > 0
assert not empty.exists()
class TestHistory:
def test_history_recorded(self, storage_root, gc):
gc.run_now()
history = gc.get_history()
assert len(history) == 1
assert "result" in history[0]
assert "timestamp" in history[0]
def test_multiple_runs(self, storage_root, gc):
gc.run_now()
gc.run_now()
gc.run_now()
history = gc.get_history()
assert len(history) == 3
assert history[0]["timestamp"] >= history[1]["timestamp"]
class TestStatus:
def test_get_status(self, storage_root, gc):
status = gc.get_status()
assert status["interval_hours"] == 1.0
assert status["dry_run"] is False
assert status["temp_file_max_age_hours"] == 1.0
assert status["multipart_max_age_days"] == 1
assert status["lock_file_max_age_hours"] == 0.5
class TestGCResult:
def test_total_bytes_freed(self):
r = GCResult(temp_bytes_freed=100, multipart_bytes_freed=200, orphaned_version_bytes_freed=300)
assert r.total_bytes_freed == 600
def test_has_work(self):
assert not GCResult().has_work
assert GCResult(temp_files_deleted=1).has_work
assert GCResult(lock_files_deleted=1).has_work
assert GCResult(empty_dirs_removed=1).has_work
class TestAdminAPI:
@pytest.fixture
def gc_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"GC_ENABLED": True,
"GC_INTERVAL_HOURS": 1.0,
})
yield flask_app
gc = flask_app.extensions.get("gc")
if gc:
gc.stop()
def test_gc_status(self, gc_app):
client = gc_app.test_client()
resp = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
def test_gc_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
def test_gc_dry_run(self, gc_app):
client = gc_app.test_client()
resp = client.post(
"/admin/gc/run",
headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"},
data=json.dumps({"dry_run": True}),
content_type="application/json",
)
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
def test_gc_history(self, gc_app):
import time
client = gc_app.test_client()
client.post("/admin/gc/run", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
for _ in range(50):
time.sleep(0.1)
status = client.get("/admin/gc/status", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}).get_json()
if not status.get("scanning"):
break
resp = client.get("/admin/gc/history", headers={"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"})
assert resp.status_code == 200
data = resp.get_json()
assert len(data["executions"]) >= 1
def test_gc_requires_admin(self, gc_app):
iam = gc_app.extensions["iam"]
user = iam.create_user(display_name="Regular")
client = gc_app.test_client()
resp = client.get(
"/admin/gc/status",
headers={"X-Access-Key": user["access_key"], "X-Secret-Key": user["secret_key"]},
)
assert resp.status_code == 403

View File

@@ -0,0 +1,58 @@
import json
import time
from datetime import timedelta
import pytest
from app.iam import IamError, IamService
def _make_service(tmp_path, *, max_attempts=3, lockout_seconds=2):
config = tmp_path / "iam.json"
payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [
{
"bucket": "*",
"actions": ["list", "read", "write", "delete", "policy"],
}
],
}
]
}
config.write_text(json.dumps(payload))
service = IamService(config, auth_max_attempts=max_attempts, auth_lockout_minutes=lockout_seconds/60)
return service
def test_lockout_triggers_after_failed_attempts(tmp_path):
service = _make_service(tmp_path, max_attempts=3, lockout_seconds=30)
for _ in range(service.auth_max_attempts):
with pytest.raises(IamError) as exc:
service.authenticate("test", "bad-secret")
assert "Invalid credentials" in str(exc.value)
with pytest.raises(IamError) as exc:
service.authenticate("test", "bad-secret")
assert "Access temporarily locked" in str(exc.value)
def test_lockout_expires_and_allows_auth(tmp_path):
service = _make_service(tmp_path, max_attempts=2, lockout_seconds=1)
for _ in range(service.auth_max_attempts):
with pytest.raises(IamError):
service.authenticate("test", "bad-secret")
with pytest.raises(IamError) as exc:
service.authenticate("test", "secret")
assert "Access temporarily locked" in str(exc.value)
time.sleep(1.1)
principal = service.authenticate("test", "secret")
assert principal.access_key == "test"

View File

@@ -0,0 +1,788 @@
import hashlib
import json
import os
import sys
import time
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from app.integrity import IntegrityChecker, IntegrityCursorStore, IntegrityResult
def _wait_scan_done(client, headers, timeout=10):
deadline = time.time() + timeout
while time.time() < deadline:
resp = client.get("/admin/integrity/status", headers=headers)
data = resp.get_json()
if not data.get("scanning"):
return
time.sleep(0.1)
raise TimeoutError("scan did not complete")
def _md5(data: bytes) -> str:
return hashlib.md5(data).hexdigest()
def _setup_bucket(storage_root: Path, bucket_name: str, objects: dict[str, bytes]) -> None:
bucket_path = storage_root / bucket_name
bucket_path.mkdir(parents=True, exist_ok=True)
meta_root = storage_root / ".myfsio.sys" / "buckets" / bucket_name / "meta"
meta_root.mkdir(parents=True, exist_ok=True)
bucket_json = storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".bucket.json"
bucket_json.write_text(json.dumps({"created": "2025-01-01"}))
for key, data in objects.items():
obj_path = bucket_path / key
obj_path.parent.mkdir(parents=True, exist_ok=True)
obj_path.write_bytes(data)
etag = _md5(data)
stat = obj_path.stat()
meta = {
"__etag__": etag,
"__size__": str(stat.st_size),
"__last_modified__": str(stat.st_mtime),
}
key_path = Path(key)
parent = key_path.parent
key_name = key_path.name
if parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / parent / "_index.json"
index_path.parent.mkdir(parents=True, exist_ok=True)
index_data = {}
if index_path.exists():
index_data = json.loads(index_path.read_text())
index_data[key_name] = {"metadata": meta}
index_path.write_text(json.dumps(index_data))
def _issues_of_type(result, issue_type):
return [i for i in result.issues if i.issue_type == issue_type]
@pytest.fixture
def storage_root(tmp_path):
root = tmp_path / "data"
root.mkdir()
(root / ".myfsio.sys" / "config").mkdir(parents=True, exist_ok=True)
return root
@pytest.fixture
def checker(storage_root):
return IntegrityChecker(
storage_root=storage_root,
interval_hours=24.0,
batch_size=1000,
auto_heal=False,
dry_run=False,
)
class TestCorruptedObjects:
def test_detect_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert len(issues) == 1
assert issues[0].bucket == "mybucket"
assert issues[0].key == "file.txt"
assert not issues[0].healed
def test_heal_corrupted(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted data")
result = checker.run_now(auto_heal=True)
assert result.corrupted_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.corrupted_objects == 0
def test_valid_objects_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello world"})
result = checker.run_now()
assert result.corrupted_objects == 0
assert result.objects_scanned >= 1
def test_corrupted_nested_key(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"sub/dir/file.txt": b"nested content"})
(storage_root / "mybucket" / "sub" / "dir" / "file.txt").write_bytes(b"bad")
result = checker.run_now()
assert result.corrupted_objects == 1
issues = _issues_of_type(result, "corrupted_object")
assert issues[0].key == "sub/dir/file.txt"
class TestOrphanedObjects:
def test_detect_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now()
assert result.orphaned_objects == 1
issues = _issues_of_type(result, "orphaned_object")
assert len(issues) == 1
def test_heal_orphaned(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan data")
result = checker.run_now(auto_heal=True)
assert result.orphaned_objects == 1
assert result.issues_healed == 1
issues = _issues_of_type(result, "orphaned_object")
assert issues[0].healed
result2 = checker.run_now()
assert result2.orphaned_objects == 0
assert result2.objects_scanned >= 1
class TestPhantomMetadata:
def test_detect_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now()
assert result.phantom_metadata == 1
issues = _issues_of_type(result, "phantom_metadata")
assert len(issues) == 1
def test_heal_phantom(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").unlink()
result = checker.run_now(auto_heal=True)
assert result.phantom_metadata == 1
assert result.issues_healed == 1
result2 = checker.run_now()
assert result2.phantom_metadata == 0
class TestStaleVersions:
def test_manifest_without_data(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "manifest without data" in issues[0].detail
def test_data_without_manifest(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 1
issues = _issues_of_type(result, "stale_version")
assert "data without manifest" in issues[0].detail
def test_heal_stale_versions(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v2.bin").write_bytes(b"old data")
result = checker.run_now(auto_heal=True)
assert result.stale_versions == 2
assert result.issues_healed == 2
assert not (versions_root / "v1.json").exists()
assert not (versions_root / "v2.bin").exists()
def test_valid_versions_pass(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
versions_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "versions" / "file.txt"
versions_root.mkdir(parents=True)
(versions_root / "v1.json").write_text(json.dumps({"etag": "abc"}))
(versions_root / "v1.bin").write_bytes(b"old data")
result = checker.run_now()
assert result.stale_versions == 0
class TestEtagCache:
def test_detect_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now()
assert result.etag_cache_inconsistencies == 1
issues = _issues_of_type(result, "etag_cache_inconsistency")
assert len(issues) == 1
def test_heal_mismatch(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
etag_path = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "etag_index.json"
etag_path.write_text(json.dumps({"file.txt": "wrong_etag"}))
result = checker.run_now(auto_heal=True)
assert result.etag_cache_inconsistencies == 1
assert result.issues_healed == 1
assert not etag_path.exists()
class TestLegacyMetadata:
def test_detect_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(issues) == 1
assert issues[0].detail == "unmigrated legacy .meta.json"
def test_detect_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now()
assert result.legacy_metadata_drifts == 1
issues = _issues_of_type(result, "legacy_metadata_drift")
assert "differs from index" in issues[0].detail
def test_heal_unmigrated(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_data = {"__etag__": _md5(b"hello"), "__size__": "5"}
legacy_meta.write_text(json.dumps(legacy_data))
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_path = meta_root / "_index.json"
index_path.unlink()
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert len(legacy_issues) == 1
assert legacy_issues[0].healed
assert not legacy_meta.exists()
index_data = json.loads(index_path.read_text())
assert "file.txt" in index_data
assert index_data["file.txt"]["metadata"]["__etag__"] == _md5(b"hello")
def test_heal_drift(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
legacy_meta = storage_root / "mybucket" / ".meta" / "file.txt.meta.json"
legacy_meta.parent.mkdir(parents=True)
legacy_meta.write_text(json.dumps({"__etag__": "different_value"}))
result = checker.run_now(auto_heal=True)
assert result.legacy_metadata_drifts == 1
legacy_issues = _issues_of_type(result, "legacy_metadata_drift")
assert legacy_issues[0].healed
assert not legacy_meta.exists()
class TestDryRun:
def test_dry_run_no_changes(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
(storage_root / "mybucket" / "file.txt").write_bytes(b"corrupted")
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan")
result = checker.run_now(auto_heal=True, dry_run=True)
assert result.corrupted_objects == 1
assert result.orphaned_objects == 1
assert result.issues_healed == 0
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
index_data = json.loads((meta_root / "_index.json").read_text())
assert "orphan.txt" not in index_data
class TestBatchSize:
def test_batch_limits_scan(self, storage_root):
objects = {f"file{i}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(
storage_root=storage_root,
batch_size=3,
)
result = checker.run_now()
assert result.objects_scanned <= 3
class TestHistory:
def test_history_recorded(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
history = checker.get_history()
assert len(history) == 1
assert "corrupted_objects" in history[0]["result"]
def test_history_multiple(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
checker.run_now()
checker.run_now()
history = checker.get_history()
assert len(history) == 3
def test_history_pagination(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
for _ in range(5):
checker.run_now()
history = checker.get_history(limit=2, offset=1)
assert len(history) == 2
AUTH_HEADERS = {"X-Access-Key": "admin", "X-Secret-Key": "adminsecret"}
class TestAdminAPI:
@pytest.fixture
def integrity_app(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": True,
"INTEGRITY_AUTO_HEAL": False,
"INTEGRITY_DRY_RUN": False,
})
yield flask_app
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
ic = flask_app.extensions.get("integrity")
if ic:
ic.stop()
def test_status_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is True
assert "interval_hours" in data
def test_run_endpoint(self, integrity_app):
client = integrity_app.test_client()
resp = client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
assert resp.status_code == 200
data = resp.get_json()
assert data["status"] == "started"
_wait_scan_done(client, AUTH_HEADERS)
resp = client.get("/admin/integrity/history?limit=1", headers=AUTH_HEADERS)
hist = resp.get_json()
assert len(hist["executions"]) >= 1
assert "corrupted_objects" in hist["executions"][0]["result"]
assert "objects_scanned" in hist["executions"][0]["result"]
def test_run_with_overrides(self, integrity_app):
client = integrity_app.test_client()
resp = client.post(
"/admin/integrity/run",
headers=AUTH_HEADERS,
json={"dry_run": True, "auto_heal": True},
)
assert resp.status_code == 200
_wait_scan_done(client, AUTH_HEADERS)
def test_history_endpoint(self, integrity_app):
client = integrity_app.test_client()
client.post("/admin/integrity/run", headers=AUTH_HEADERS, json={})
_wait_scan_done(client, AUTH_HEADERS)
resp = client.get("/admin/integrity/history", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert "executions" in data
assert len(data["executions"]) >= 1
def test_auth_required(self, integrity_app):
client = integrity_app.test_client()
resp = client.get("/admin/integrity/status")
assert resp.status_code in (401, 403)
def test_disabled_status(self, tmp_path):
from app import create_api_app
storage_root = tmp_path / "data2"
iam_config = tmp_path / "iam2.json"
bucket_policies = tmp_path / "bp2.json"
iam_payload = {
"users": [
{
"access_key": "admin",
"secret_key": "adminsecret",
"display_name": "Admin",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"INTEGRITY_ENABLED": False,
})
c = flask_app.test_client()
resp = c.get("/admin/integrity/status", headers=AUTH_HEADERS)
assert resp.status_code == 200
data = resp.get_json()
assert data["enabled"] is False
storage = flask_app.extensions.get("object_storage")
if storage:
base = getattr(storage, "storage", storage)
if hasattr(base, "shutdown_stats"):
base.shutdown_stats()
class TestMultipleBuckets:
def test_scans_multiple_buckets(self, storage_root, checker):
_setup_bucket(storage_root, "bucket1", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket2", {"b.txt": b"bbb"})
result = checker.run_now()
assert result.buckets_scanned == 2
assert result.objects_scanned >= 2
assert result.corrupted_objects == 0
class TestGetStatus:
def test_status_fields(self, checker):
status = checker.get_status()
assert "enabled" in status
assert "running" in status
assert "interval_hours" in status
assert "batch_size" in status
assert "auto_heal" in status
assert "dry_run" in status
def test_status_includes_cursor(self, storage_root, checker):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker.run_now()
status = checker.get_status()
assert "cursor" in status
assert status["cursor"]["tracked_buckets"] == 1
assert "mybucket" in status["cursor"]["buckets"]
class TestUnifiedBatchCounter:
def test_orphaned_objects_count_toward_batch(self, storage_root):
_setup_bucket(storage_root, "mybucket", {})
for i in range(10):
(storage_root / "mybucket" / f"orphan{i}.txt").write_bytes(f"data{i}".encode())
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
result = checker.run_now()
assert result.objects_scanned <= 3
def test_phantom_metadata_counts_toward_batch(self, storage_root):
objects = {f"file{i}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
for i in range(10):
(storage_root / "mybucket" / f"file{i}.txt").unlink()
checker = IntegrityChecker(storage_root=storage_root, batch_size=5)
result = checker.run_now()
assert result.objects_scanned <= 5
def test_all_check_types_contribute(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"valid.txt": b"hello"})
(storage_root / "mybucket" / "orphan.txt").write_bytes(b"orphan")
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
result = checker.run_now()
assert result.objects_scanned > 2
class TestCursorRotation:
def test_oldest_bucket_scanned_first(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-b", {"b.txt": b"bbb"})
_setup_bucket(storage_root, "bucket-c", {"c.txt": b"ccc"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=5)
checker.cursor_store.update_bucket("bucket-a", 1000.0)
checker.cursor_store.update_bucket("bucket-b", 3000.0)
checker.cursor_store.update_bucket("bucket-c", 2000.0)
ordered = checker.cursor_store.get_bucket_order(["bucket-a", "bucket-b", "bucket-c"])
assert ordered[0] == "bucket-a"
assert ordered[1] == "bucket-c"
assert ordered[2] == "bucket-b"
def test_never_scanned_buckets_first(self, storage_root):
_setup_bucket(storage_root, "bucket-old", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-new", {"b.txt": b"bbb"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker.cursor_store.update_bucket("bucket-old", time.time())
ordered = checker.cursor_store.get_bucket_order(["bucket-old", "bucket-new"])
assert ordered[0] == "bucket-new"
def test_rotation_covers_all_buckets(self, storage_root):
for name in ["bucket-a", "bucket-b", "bucket-c"]:
_setup_bucket(storage_root, name, {f"{name}.txt": name.encode()})
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
result1 = checker.run_now()
scanned_buckets_1 = set()
for issue_bucket in [storage_root]:
pass
assert result1.buckets_scanned >= 1
result2 = checker.run_now()
result3 = checker.run_now()
cursor_info = checker.cursor_store.get_info()
assert cursor_info["tracked_buckets"] == 3
def test_cursor_persistence(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker1 = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker1.run_now()
cursor1 = checker1.cursor_store.get_info()
assert cursor1["tracked_buckets"] == 1
assert "mybucket" in cursor1["buckets"]
checker2 = IntegrityChecker(storage_root=storage_root, batch_size=1000)
cursor2 = checker2.cursor_store.get_info()
assert cursor2["tracked_buckets"] == 1
assert "mybucket" in cursor2["buckets"]
def test_stale_cursor_cleanup(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {"a.txt": b"aaa"})
_setup_bucket(storage_root, "bucket-b", {"b.txt": b"bbb"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
checker.run_now()
import shutil
shutil.rmtree(storage_root / "bucket-b")
meta_b = storage_root / ".myfsio.sys" / "buckets" / "bucket-b"
if meta_b.exists():
shutil.rmtree(meta_b)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
assert "bucket-b" not in cursor_info["buckets"]
assert "bucket-a" in cursor_info["buckets"]
def test_cursor_updates_after_scan(self, storage_root):
_setup_bucket(storage_root, "mybucket", {"file.txt": b"hello"})
checker = IntegrityChecker(storage_root=storage_root, batch_size=1000)
before = time.time()
checker.run_now()
after = time.time()
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert before <= entry["last_scanned"] <= after
assert entry["completed"] is True
class TestIntraBucketCursor:
def test_resumes_from_cursor_key(self, storage_root):
objects = {f"file_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
result1 = checker.run_now()
assert result1.objects_scanned == 3
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert entry["last_key"] is not None
assert entry["completed"] is False
result2 = checker.run_now()
assert result2.objects_scanned == 3
cursor_after = checker.cursor_store.get_info()["buckets"]["mybucket"]
assert cursor_after["last_key"] > entry["last_key"]
def test_cursor_resets_after_full_pass(self, storage_root):
objects = {f"file_{i}.txt": f"data{i}".encode() for i in range(3)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=100)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
entry = cursor_info["buckets"]["mybucket"]
assert entry["last_key"] is None
assert entry["completed"] is True
def test_full_coverage_across_cycles(self, storage_root):
objects = {f"obj_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(10)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
all_scanned = 0
for _ in range(10):
result = checker.run_now()
all_scanned += result.objects_scanned
if checker.cursor_store.get_info()["buckets"]["mybucket"]["completed"]:
break
assert all_scanned >= 10
def test_deleted_cursor_key_skips_gracefully(self, storage_root):
objects = {f"file_{chr(ord('a') + i)}.txt": f"data{i}".encode() for i in range(6)}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
cursor_key = cursor_info["buckets"]["mybucket"]["last_key"]
assert cursor_key is not None
obj_path = storage_root / "mybucket" / cursor_key
meta_root = storage_root / ".myfsio.sys" / "buckets" / "mybucket" / "meta"
key_path = Path(cursor_key)
index_path = meta_root / key_path.parent / "_index.json" if key_path.parent != Path(".") else meta_root / "_index.json"
if key_path.parent == Path("."):
index_path = meta_root / "_index.json"
else:
index_path = meta_root / key_path.parent / "_index.json"
if obj_path.exists():
obj_path.unlink()
if index_path.exists():
index_data = json.loads(index_path.read_text())
index_data.pop(key_path.name, None)
index_path.write_text(json.dumps(index_data))
result2 = checker.run_now()
assert result2.objects_scanned > 0
def test_incomplete_buckets_prioritized(self, storage_root):
_setup_bucket(storage_root, "bucket-a", {f"a{i}.txt": b"a" for i in range(5)})
_setup_bucket(storage_root, "bucket-b", {f"b{i}.txt": b"b" for i in range(5)})
checker = IntegrityChecker(storage_root=storage_root, batch_size=3)
checker.run_now()
cursor_info = checker.cursor_store.get_info()
incomplete = [
name for name, info in cursor_info["buckets"].items()
if info.get("last_key") is not None
]
assert len(incomplete) >= 1
result2 = checker.run_now()
assert result2.objects_scanned > 0
def test_cursor_skips_nested_directories(self, storage_root):
objects = {
"aaa/file1.txt": b"a1",
"aaa/file2.txt": b"a2",
"bbb/file1.txt": b"b1",
"bbb/file2.txt": b"b2",
"ccc/file1.txt": b"c1",
"ccc/file2.txt": b"c2",
}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=4)
result1 = checker.run_now()
assert result1.objects_scanned == 4
cursor_info = checker.cursor_store.get_info()
cursor_key = cursor_info["buckets"]["mybucket"]["last_key"]
assert cursor_key is not None
assert cursor_key.startswith("aaa/") or cursor_key.startswith("bbb/")
result2 = checker.run_now()
assert result2.objects_scanned >= 2
all_scanned = result1.objects_scanned + result2.objects_scanned
for _ in range(10):
if checker.cursor_store.get_info()["buckets"]["mybucket"]["completed"]:
break
r = checker.run_now()
all_scanned += r.objects_scanned
assert all_scanned >= 6
def test_sorted_walk_order(self, storage_root):
objects = {
"bar.txt": b"bar",
"bar/inner.txt": b"inner",
"abc.txt": b"abc",
"zzz/deep.txt": b"deep",
}
_setup_bucket(storage_root, "mybucket", objects)
checker = IntegrityChecker(storage_root=storage_root, batch_size=100)
result = checker.run_now()
assert result.objects_scanned >= 4
assert result.total_issues == 0

View File

@@ -0,0 +1,480 @@
"""Tests for KMS API endpoints."""
from __future__ import annotations
import base64
import json
import secrets
import pytest
@pytest.fixture
def kms_client(tmp_path):
"""Create a test client with KMS enabled."""
from app import create_app
app = create_app({
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": str(tmp_path / "storage"),
"IAM_CONFIG": str(tmp_path / "iam.json"),
"BUCKET_POLICY_PATH": str(tmp_path / "policies.json"),
"ENCRYPTION_ENABLED": True,
"KMS_ENABLED": True,
"ENCRYPTION_MASTER_KEY_PATH": str(tmp_path / "master.key"),
"KMS_KEYS_PATH": str(tmp_path / "kms_keys.json"),
})
iam_config = {
"users": [
{
"access_key": "test-access-key",
"secret_key": "test-secret-key",
"display_name": "Test User",
"permissions": ["*"]
}
]
}
(tmp_path / "iam.json").write_text(json.dumps(iam_config))
return app.test_client()
@pytest.fixture
def auth_headers():
"""Get authentication headers."""
return {
"X-Access-Key": "test-access-key",
"X-Secret-Key": "test-secret-key",
}
class TestKMSKeyManagement:
"""Tests for KMS key management endpoints."""
def test_create_key(self, kms_client, auth_headers):
"""Test creating a KMS key."""
response = kms_client.post(
"/kms/keys",
json={"Description": "Test encryption key"},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert "KeyMetadata" in data
assert data["KeyMetadata"]["Description"] == "Test encryption key"
assert data["KeyMetadata"]["Enabled"] is True
assert "KeyId" in data["KeyMetadata"]
def test_create_key_with_custom_id(self, kms_client, auth_headers):
"""Test creating a key with a custom ID."""
response = kms_client.post(
"/kms/keys",
json={"KeyId": "my-custom-key", "Description": "Custom key"},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert data["KeyMetadata"]["KeyId"] == "my-custom-key"
def test_list_keys(self, kms_client, auth_headers):
"""Test listing KMS keys."""
kms_client.post("/kms/keys", json={"Description": "Key 1"}, headers=auth_headers)
kms_client.post("/kms/keys", json={"Description": "Key 2"}, headers=auth_headers)
response = kms_client.get("/kms/keys", headers=auth_headers)
assert response.status_code == 200
data = response.get_json()
assert "Keys" in data
assert len(data["Keys"]) == 2
def test_get_key(self, kms_client, auth_headers):
"""Test getting a specific key."""
create_response = kms_client.post(
"/kms/keys",
json={"KeyId": "test-key", "Description": "Test key"},
headers=auth_headers,
)
response = kms_client.get("/kms/keys/test-key", headers=auth_headers)
assert response.status_code == 200
data = response.get_json()
assert data["KeyMetadata"]["KeyId"] == "test-key"
assert data["KeyMetadata"]["Description"] == "Test key"
def test_get_nonexistent_key(self, kms_client, auth_headers):
"""Test getting a key that doesn't exist."""
response = kms_client.get("/kms/keys/nonexistent", headers=auth_headers)
assert response.status_code == 404
def test_delete_key(self, kms_client, auth_headers):
"""Test deleting a key."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.delete("/kms/keys/test-key", headers=auth_headers)
assert response.status_code == 204
get_response = kms_client.get("/kms/keys/test-key", headers=auth_headers)
assert get_response.status_code == 404
def test_enable_disable_key(self, kms_client, auth_headers):
"""Test enabling and disabling a key."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.post("/kms/keys/test-key/disable", headers=auth_headers)
assert response.status_code == 200
get_response = kms_client.get("/kms/keys/test-key", headers=auth_headers)
assert get_response.get_json()["KeyMetadata"]["Enabled"] is False
response = kms_client.post("/kms/keys/test-key/enable", headers=auth_headers)
assert response.status_code == 200
get_response = kms_client.get("/kms/keys/test-key", headers=auth_headers)
assert get_response.get_json()["KeyMetadata"]["Enabled"] is True
class TestKMSEncryption:
"""Tests for KMS encryption operations."""
def test_encrypt_decrypt(self, kms_client, auth_headers):
"""Test encrypting and decrypting data."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
plaintext = b"Hello, World!"
plaintext_b64 = base64.b64encode(plaintext).decode()
encrypt_response = kms_client.post(
"/kms/encrypt",
json={"KeyId": "test-key", "Plaintext": plaintext_b64},
headers=auth_headers,
)
assert encrypt_response.status_code == 200
encrypt_data = encrypt_response.get_json()
assert "CiphertextBlob" in encrypt_data
assert encrypt_data["KeyId"] == "test-key"
decrypt_response = kms_client.post(
"/kms/decrypt",
json={"CiphertextBlob": encrypt_data["CiphertextBlob"]},
headers=auth_headers,
)
assert decrypt_response.status_code == 200
decrypt_data = decrypt_response.get_json()
decrypted = base64.b64decode(decrypt_data["Plaintext"])
assert decrypted == plaintext
def test_encrypt_with_context(self, kms_client, auth_headers):
"""Test encryption with encryption context."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
plaintext = b"Contextualized data"
plaintext_b64 = base64.b64encode(plaintext).decode()
context = {"purpose": "testing", "bucket": "my-bucket"}
encrypt_response = kms_client.post(
"/kms/encrypt",
json={
"KeyId": "test-key",
"Plaintext": plaintext_b64,
"EncryptionContext": context,
},
headers=auth_headers,
)
assert encrypt_response.status_code == 200
ciphertext = encrypt_response.get_json()["CiphertextBlob"]
decrypt_response = kms_client.post(
"/kms/decrypt",
json={
"CiphertextBlob": ciphertext,
"EncryptionContext": context,
},
headers=auth_headers,
)
assert decrypt_response.status_code == 200
wrong_context_response = kms_client.post(
"/kms/decrypt",
json={
"CiphertextBlob": ciphertext,
"EncryptionContext": {"wrong": "context"},
},
headers=auth_headers,
)
assert wrong_context_response.status_code == 400
def test_encrypt_missing_key_id(self, kms_client, auth_headers):
"""Test encryption without KeyId."""
response = kms_client.post(
"/kms/encrypt",
json={"Plaintext": base64.b64encode(b"data").decode()},
headers=auth_headers,
)
assert response.status_code == 400
assert "KeyId is required" in response.get_json()["message"]
def test_encrypt_missing_plaintext(self, kms_client, auth_headers):
"""Test encryption without Plaintext."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.post(
"/kms/encrypt",
json={"KeyId": "test-key"},
headers=auth_headers,
)
assert response.status_code == 400
assert "Plaintext is required" in response.get_json()["message"]
class TestKMSDataKey:
"""Tests for KMS data key generation."""
def test_generate_data_key(self, kms_client, auth_headers):
"""Test generating a data key."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.post(
"/kms/generate-data-key",
json={"KeyId": "test-key"},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert "Plaintext" in data
assert "CiphertextBlob" in data
assert data["KeyId"] == "test-key"
# Verify plaintext key is 256 bits (32 bytes)
plaintext_key = base64.b64decode(data["Plaintext"])
assert len(plaintext_key) == 32
def test_generate_data_key_aes_128(self, kms_client, auth_headers):
"""Test generating an AES-128 data key."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.post(
"/kms/generate-data-key",
json={"KeyId": "test-key", "KeySpec": "AES_128"},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
# Verify plaintext key is 128 bits (16 bytes)
plaintext_key = base64.b64decode(data["Plaintext"])
assert len(plaintext_key) == 16
def test_generate_data_key_without_plaintext(self, kms_client, auth_headers):
"""Test generating a data key without plaintext."""
kms_client.post("/kms/keys", json={"KeyId": "test-key"}, headers=auth_headers)
response = kms_client.post(
"/kms/generate-data-key-without-plaintext",
json={"KeyId": "test-key"},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert "CiphertextBlob" in data
assert "Plaintext" not in data
class TestKMSReEncrypt:
"""Tests for KMS re-encryption."""
def test_re_encrypt(self, kms_client, auth_headers):
"""Test re-encrypting data with a different key."""
kms_client.post("/kms/keys", json={"KeyId": "key-1"}, headers=auth_headers)
kms_client.post("/kms/keys", json={"KeyId": "key-2"}, headers=auth_headers)
plaintext = b"Data to re-encrypt"
encrypt_response = kms_client.post(
"/kms/encrypt",
json={
"KeyId": "key-1",
"Plaintext": base64.b64encode(plaintext).decode(),
},
headers=auth_headers,
)
ciphertext = encrypt_response.get_json()["CiphertextBlob"]
re_encrypt_response = kms_client.post(
"/kms/re-encrypt",
json={
"CiphertextBlob": ciphertext,
"DestinationKeyId": "key-2",
},
headers=auth_headers,
)
assert re_encrypt_response.status_code == 200
data = re_encrypt_response.get_json()
assert data["SourceKeyId"] == "key-1"
assert data["KeyId"] == "key-2"
decrypt_response = kms_client.post(
"/kms/decrypt",
json={"CiphertextBlob": data["CiphertextBlob"]},
headers=auth_headers,
)
decrypted = base64.b64decode(decrypt_response.get_json()["Plaintext"])
assert decrypted == plaintext
class TestKMSRandom:
"""Tests for random number generation."""
def test_generate_random(self, kms_client, auth_headers):
"""Test generating random bytes."""
response = kms_client.post(
"/kms/generate-random",
json={"NumberOfBytes": 64},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
random_bytes = base64.b64decode(data["Plaintext"])
assert len(random_bytes) == 64
def test_generate_random_default_size(self, kms_client, auth_headers):
"""Test generating random bytes with default size."""
response = kms_client.post(
"/kms/generate-random",
json={},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
random_bytes = base64.b64decode(data["Plaintext"])
assert len(random_bytes) == 32
class TestClientSideEncryption:
"""Tests for client-side encryption helpers."""
def test_generate_client_key(self, kms_client, auth_headers):
"""Test generating a client encryption key."""
response = kms_client.post(
"/kms/client/generate-key",
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert "key" in data
assert data["algorithm"] == "AES-256-GCM"
key = base64.b64decode(data["key"])
assert len(key) == 32
def test_client_encrypt_decrypt(self, kms_client, auth_headers):
"""Test client-side encryption and decryption."""
key_response = kms_client.post("/kms/client/generate-key", headers=auth_headers)
key = key_response.get_json()["key"]
plaintext = b"Client-side encrypted data"
encrypt_response = kms_client.post(
"/kms/client/encrypt",
json={
"Plaintext": base64.b64encode(plaintext).decode(),
"Key": key,
},
headers=auth_headers,
)
assert encrypt_response.status_code == 200
encrypted = encrypt_response.get_json()
decrypt_response = kms_client.post(
"/kms/client/decrypt",
json={
"Ciphertext": encrypted["ciphertext"],
"Nonce": encrypted["nonce"],
"Key": key,
},
headers=auth_headers,
)
assert decrypt_response.status_code == 200
decrypted = base64.b64decode(decrypt_response.get_json()["Plaintext"])
assert decrypted == plaintext
class TestEncryptionMaterials:
"""Tests for S3 encryption materials endpoint."""
def test_get_encryption_materials(self, kms_client, auth_headers):
"""Test getting encryption materials for client-side S3 encryption."""
kms_client.post("/kms/keys", json={"KeyId": "s3-key"}, headers=auth_headers)
response = kms_client.post(
"/kms/materials/s3-key",
json={},
headers=auth_headers,
)
assert response.status_code == 200
data = response.get_json()
assert "PlaintextKey" in data
assert "EncryptedKey" in data
assert data["KeyId"] == "s3-key"
assert data["Algorithm"] == "AES-256-GCM"
key = base64.b64decode(data["PlaintextKey"])
assert len(key) == 32
class TestKMSAuthentication:
"""Tests for KMS authentication requirements."""
def test_unauthenticated_request_fails(self, kms_client):
"""Test that unauthenticated requests are rejected."""
response = kms_client.get("/kms/keys")
assert response.status_code == 403
def test_invalid_credentials_fail(self, kms_client):
"""Test that invalid credentials are rejected."""
response = kms_client.get(
"/kms/keys",
headers={
"X-Access-Key": "wrong-key",
"X-Secret-Key": "wrong-secret",
},
)
assert response.status_code == 403

View File

@@ -0,0 +1,238 @@
import io
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.lifecycle import LifecycleManager, LifecycleResult
from app.storage import ObjectStorage
@pytest.fixture
def storage(tmp_path: Path):
storage_root = tmp_path / "data"
storage_root.mkdir(parents=True)
return ObjectStorage(storage_root)
@pytest.fixture
def lifecycle_manager(storage):
manager = LifecycleManager(storage, interval_seconds=3600)
yield manager
manager.stop()
class TestLifecycleResult:
def test_default_values(self):
result = LifecycleResult(bucket_name="test-bucket")
assert result.bucket_name == "test-bucket"
assert result.objects_deleted == 0
assert result.versions_deleted == 0
assert result.uploads_aborted == 0
assert result.errors == []
assert result.execution_time_seconds == 0.0
class TestLifecycleManager:
def test_start_and_stop(self, lifecycle_manager):
lifecycle_manager.start()
assert lifecycle_manager._timer is not None
assert lifecycle_manager._shutdown is False
lifecycle_manager.stop()
assert lifecycle_manager._shutdown is True
assert lifecycle_manager._timer is None
def test_start_only_once(self, lifecycle_manager):
lifecycle_manager.start()
first_timer = lifecycle_manager._timer
lifecycle_manager.start()
assert lifecycle_manager._timer is first_timer
def test_enforce_rules_no_lifecycle(self, lifecycle_manager, storage):
storage.create_bucket("no-lifecycle-bucket")
result = lifecycle_manager.enforce_rules("no-lifecycle-bucket")
assert result.bucket_name == "no-lifecycle-bucket"
assert result.objects_deleted == 0
def test_enforce_rules_disabled_rule(self, lifecycle_manager, storage):
storage.create_bucket("disabled-bucket")
storage.set_bucket_lifecycle("disabled-bucket", [
{
"ID": "disabled-rule",
"Status": "Disabled",
"Prefix": "",
"Expiration": {"Days": 1},
}
])
old_object = storage.put_object(
"disabled-bucket",
"old-file.txt",
io.BytesIO(b"old content"),
)
result = lifecycle_manager.enforce_rules("disabled-bucket")
assert result.objects_deleted == 0
def test_enforce_expiration_by_days(self, lifecycle_manager, storage):
storage.create_bucket("expire-bucket")
storage.set_bucket_lifecycle("expire-bucket", [
{
"ID": "expire-30-days",
"Status": "Enabled",
"Prefix": "",
"Expiration": {"Days": 30},
}
])
storage.put_object(
"expire-bucket",
"recent-file.txt",
io.BytesIO(b"recent content"),
)
result = lifecycle_manager.enforce_rules("expire-bucket")
assert result.objects_deleted == 0
def test_enforce_expiration_with_prefix(self, lifecycle_manager, storage):
storage.create_bucket("prefix-bucket")
storage.set_bucket_lifecycle("prefix-bucket", [
{
"ID": "expire-logs",
"Status": "Enabled",
"Prefix": "logs/",
"Expiration": {"Days": 1},
}
])
storage.put_object("prefix-bucket", "logs/old.log", io.BytesIO(b"log data"))
storage.put_object("prefix-bucket", "data/keep.txt", io.BytesIO(b"keep this"))
result = lifecycle_manager.enforce_rules("prefix-bucket")
def test_enforce_all_buckets(self, lifecycle_manager, storage):
storage.create_bucket("bucket1")
storage.create_bucket("bucket2")
results = lifecycle_manager.enforce_all_buckets()
assert isinstance(results, dict)
def test_run_now_single_bucket(self, lifecycle_manager, storage):
storage.create_bucket("run-now-bucket")
results = lifecycle_manager.run_now("run-now-bucket")
assert "run-now-bucket" in results
def test_run_now_all_buckets(self, lifecycle_manager, storage):
storage.create_bucket("all-bucket-1")
storage.create_bucket("all-bucket-2")
results = lifecycle_manager.run_now()
assert isinstance(results, dict)
def test_enforce_abort_multipart(self, lifecycle_manager, storage):
storage.create_bucket("multipart-bucket")
storage.set_bucket_lifecycle("multipart-bucket", [
{
"ID": "abort-old-uploads",
"Status": "Enabled",
"Prefix": "",
"AbortIncompleteMultipartUpload": {"DaysAfterInitiation": 7},
}
])
upload_id = storage.initiate_multipart_upload("multipart-bucket", "large-file.bin")
result = lifecycle_manager.enforce_rules("multipart-bucket")
assert result.uploads_aborted == 0
def test_enforce_noncurrent_version_expiration(self, lifecycle_manager, storage):
storage.create_bucket("versioned-bucket")
storage.set_bucket_versioning("versioned-bucket", True)
storage.set_bucket_lifecycle("versioned-bucket", [
{
"ID": "expire-old-versions",
"Status": "Enabled",
"Prefix": "",
"NoncurrentVersionExpiration": {"NoncurrentDays": 30},
}
])
storage.put_object("versioned-bucket", "file.txt", io.BytesIO(b"v1"))
storage.put_object("versioned-bucket", "file.txt", io.BytesIO(b"v2"))
result = lifecycle_manager.enforce_rules("versioned-bucket")
assert result.bucket_name == "versioned-bucket"
def test_execution_time_tracking(self, lifecycle_manager, storage):
storage.create_bucket("timed-bucket")
storage.set_bucket_lifecycle("timed-bucket", [
{
"ID": "timer-test",
"Status": "Enabled",
"Expiration": {"Days": 1},
}
])
result = lifecycle_manager.enforce_rules("timed-bucket")
assert result.execution_time_seconds >= 0
def test_enforce_rules_with_error(self, lifecycle_manager, storage):
result = lifecycle_manager.enforce_rules("nonexistent-bucket")
assert len(result.errors) > 0 or result.objects_deleted == 0
def test_lifecycle_with_date_expiration(self, lifecycle_manager, storage):
storage.create_bucket("date-bucket")
past_date = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00Z")
storage.set_bucket_lifecycle("date-bucket", [
{
"ID": "expire-by-date",
"Status": "Enabled",
"Prefix": "",
"Expiration": {"Date": past_date},
}
])
storage.put_object("date-bucket", "should-expire.txt", io.BytesIO(b"content"))
result = lifecycle_manager.enforce_rules("date-bucket")
def test_enforce_with_filter_prefix(self, lifecycle_manager, storage):
storage.create_bucket("filter-bucket")
storage.set_bucket_lifecycle("filter-bucket", [
{
"ID": "filter-prefix-rule",
"Status": "Enabled",
"Filter": {"Prefix": "archive/"},
"Expiration": {"Days": 1},
}
])
result = lifecycle_manager.enforce_rules("filter-bucket")
assert result.bucket_name == "filter-bucket"
class TestLifecycleManagerScheduling:
def test_schedule_next_respects_shutdown(self, storage):
manager = LifecycleManager(storage, interval_seconds=1)
manager._shutdown = True
manager._schedule_next()
assert manager._timer is None
@patch.object(LifecycleManager, "enforce_all_buckets")
def test_run_enforcement_catches_exceptions(self, mock_enforce, storage):
mock_enforce.side_effect = Exception("Test error")
manager = LifecycleManager(storage, interval_seconds=3600)
manager._shutdown = True
manager._run_enforcement()
def test_shutdown_flag_prevents_scheduling(self, storage):
manager = LifecycleManager(storage, interval_seconds=1)
manager.start()
manager.stop()
assert manager._shutdown is True

View File

@@ -0,0 +1,266 @@
"""Tests for newly implemented S3 API endpoints."""
import io
import pytest
from xml.etree.ElementTree import fromstring
def _stream(data: bytes):
return io.BytesIO(data)
@pytest.fixture
def storage(app):
"""Get the storage instance from the app."""
return app.extensions["object_storage"]
class TestListObjectsV2:
"""Tests for ListObjectsV2 endpoint."""
def test_list_objects_v2_basic(self, client, signer, storage):
storage.create_bucket("v2-test")
storage.put_object("v2-test", "file1.txt", _stream(b"hello"))
storage.put_object("v2-test", "file2.txt", _stream(b"world"))
storage.put_object("v2-test", "folder/file3.txt", _stream(b"nested"))
headers = signer("GET", "/v2-test?list-type=2")
resp = client.get("/v2-test", query_string={"list-type": "2"}, headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
assert root.find("KeyCount").text == "3"
assert root.find("IsTruncated").text == "false"
keys = [el.find("Key").text for el in root.findall("Contents")]
assert "file1.txt" in keys
assert "file2.txt" in keys
assert "folder/file3.txt" in keys
def test_list_objects_v2_with_prefix_and_delimiter(self, client, signer, storage):
storage.create_bucket("prefix-test")
storage.put_object("prefix-test", "photos/2023/jan.jpg", _stream(b"jan"))
storage.put_object("prefix-test", "photos/2023/feb.jpg", _stream(b"feb"))
storage.put_object("prefix-test", "photos/2024/mar.jpg", _stream(b"mar"))
storage.put_object("prefix-test", "docs/readme.md", _stream(b"readme"))
headers = signer("GET", "/prefix-test?list-type=2&prefix=photos/&delimiter=/")
resp = client.get(
"/prefix-test",
query_string={"list-type": "2", "prefix": "photos/", "delimiter": "/"},
headers=headers
)
assert resp.status_code == 200
root = fromstring(resp.data)
prefixes = [el.find("Prefix").text for el in root.findall("CommonPrefixes")]
assert "photos/2023/" in prefixes
assert "photos/2024/" in prefixes
assert len(root.findall("Contents")) == 0
class TestPutBucketVersioning:
"""Tests for PutBucketVersioning endpoint."""
def test_put_versioning_enabled(self, client, signer, storage):
storage.create_bucket("version-test")
payload = b"""<?xml version="1.0" encoding="UTF-8"?>
<VersioningConfiguration>
<Status>Enabled</Status>
</VersioningConfiguration>"""
headers = signer("PUT", "/version-test?versioning", body=payload)
resp = client.put("/version-test", query_string={"versioning": ""}, data=payload, headers=headers)
assert resp.status_code == 200
headers = signer("GET", "/version-test?versioning")
resp = client.get("/version-test", query_string={"versioning": ""}, headers=headers)
root = fromstring(resp.data)
assert root.find("Status").text == "Enabled"
def test_put_versioning_suspended(self, client, signer, storage):
storage.create_bucket("suspend-test")
storage.set_bucket_versioning("suspend-test", True)
payload = b"""<?xml version="1.0" encoding="UTF-8"?>
<VersioningConfiguration>
<Status>Suspended</Status>
</VersioningConfiguration>"""
headers = signer("PUT", "/suspend-test?versioning", body=payload)
resp = client.put("/suspend-test", query_string={"versioning": ""}, data=payload, headers=headers)
assert resp.status_code == 200
headers = signer("GET", "/suspend-test?versioning")
resp = client.get("/suspend-test", query_string={"versioning": ""}, headers=headers)
root = fromstring(resp.data)
assert root.find("Status").text == "Suspended"
class TestDeleteBucketTagging:
"""Tests for DeleteBucketTagging endpoint."""
def test_delete_bucket_tags(self, client, signer, storage):
storage.create_bucket("tag-delete-test")
storage.set_bucket_tags("tag-delete-test", [{"Key": "env", "Value": "test"}])
headers = signer("DELETE", "/tag-delete-test?tagging")
resp = client.delete("/tag-delete-test", query_string={"tagging": ""}, headers=headers)
assert resp.status_code == 204
headers = signer("GET", "/tag-delete-test?tagging")
resp = client.get("/tag-delete-test", query_string={"tagging": ""}, headers=headers)
assert resp.status_code == 404
class TestDeleteBucketCors:
"""Tests for DeleteBucketCors endpoint."""
def test_delete_bucket_cors(self, client, signer, storage):
storage.create_bucket("cors-delete-test")
storage.set_bucket_cors("cors-delete-test", [
{"AllowedOrigins": ["*"], "AllowedMethods": ["GET"]}
])
headers = signer("DELETE", "/cors-delete-test?cors")
resp = client.delete("/cors-delete-test", query_string={"cors": ""}, headers=headers)
assert resp.status_code == 204
headers = signer("GET", "/cors-delete-test?cors")
resp = client.get("/cors-delete-test", query_string={"cors": ""}, headers=headers)
assert resp.status_code == 404
class TestGetBucketLocation:
"""Tests for GetBucketLocation endpoint."""
def test_get_bucket_location(self, client, signer, storage):
storage.create_bucket("location-test")
headers = signer("GET", "/location-test?location")
resp = client.get("/location-test", query_string={"location": ""}, headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
assert root.tag == "LocationConstraint"
class TestBucketAcl:
"""Tests for Bucket ACL operations."""
def test_get_bucket_acl(self, client, signer, storage):
storage.create_bucket("acl-test")
headers = signer("GET", "/acl-test?acl")
resp = client.get("/acl-test", query_string={"acl": ""}, headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
assert root.tag == "AccessControlPolicy"
assert root.find("Owner/ID") is not None
assert root.find(".//Permission").text == "FULL_CONTROL"
def test_put_bucket_acl(self, client, signer, storage):
storage.create_bucket("acl-put-test")
headers = signer("PUT", "/acl-put-test?acl")
headers["x-amz-acl"] = "public-read"
resp = client.put("/acl-put-test", query_string={"acl": ""}, headers=headers)
assert resp.status_code == 200
class TestCopyObject:
"""Tests for CopyObject operation."""
def test_copy_object_basic(self, client, signer, storage):
storage.create_bucket("copy-src")
storage.create_bucket("copy-dst")
storage.put_object("copy-src", "original.txt", _stream(b"original content"))
headers = signer("PUT", "/copy-dst/copied.txt")
headers["x-amz-copy-source"] = "/copy-src/original.txt"
resp = client.put("/copy-dst/copied.txt", headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
assert root.tag == "CopyObjectResult"
assert root.find("ETag") is not None
assert root.find("LastModified") is not None
path = storage.get_object_path("copy-dst", "copied.txt")
assert path.read_bytes() == b"original content"
def test_copy_object_with_metadata_replace(self, client, signer, storage):
storage.create_bucket("meta-src")
storage.create_bucket("meta-dst")
storage.put_object("meta-src", "source.txt", _stream(b"data"), metadata={"old": "value"})
headers = signer("PUT", "/meta-dst/target.txt")
headers["x-amz-copy-source"] = "/meta-src/source.txt"
headers["x-amz-metadata-directive"] = "REPLACE"
headers["x-amz-meta-new"] = "metadata"
resp = client.put("/meta-dst/target.txt", headers=headers)
assert resp.status_code == 200
meta = storage.get_object_metadata("meta-dst", "target.txt")
assert "New" in meta or "new" in meta
assert "old" not in meta and "Old" not in meta
class TestObjectTagging:
"""Tests for Object tagging operations."""
def test_put_get_delete_object_tags(self, client, signer, storage):
storage.create_bucket("obj-tag-test")
storage.put_object("obj-tag-test", "tagged.txt", _stream(b"content"))
payload = b"""<?xml version="1.0" encoding="UTF-8"?>
<Tagging>
<TagSet>
<Tag><Key>project</Key><Value>demo</Value></Tag>
<Tag><Key>env</Key><Value>test</Value></Tag>
</TagSet>
</Tagging>"""
headers = signer("PUT", "/obj-tag-test/tagged.txt?tagging", body=payload)
resp = client.put(
"/obj-tag-test/tagged.txt",
query_string={"tagging": ""},
data=payload,
headers=headers
)
assert resp.status_code == 204
headers = signer("GET", "/obj-tag-test/tagged.txt?tagging")
resp = client.get("/obj-tag-test/tagged.txt", query_string={"tagging": ""}, headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
tags = {el.find("Key").text: el.find("Value").text for el in root.findall(".//Tag")}
assert tags["project"] == "demo"
assert tags["env"] == "test"
headers = signer("DELETE", "/obj-tag-test/tagged.txt?tagging")
resp = client.delete("/obj-tag-test/tagged.txt", query_string={"tagging": ""}, headers=headers)
assert resp.status_code == 204
headers = signer("GET", "/obj-tag-test/tagged.txt?tagging")
resp = client.get("/obj-tag-test/tagged.txt", query_string={"tagging": ""}, headers=headers)
root = fromstring(resp.data)
assert len(root.findall(".//Tag")) == 0
def test_object_tags_limit(self, client, signer, storage):
storage.create_bucket("tag-limit")
storage.put_object("tag-limit", "file.txt", _stream(b"x"))
tags = "".join(f"<Tag><Key>key{i}</Key><Value>val{i}</Value></Tag>" for i in range(11))
payload = f"<Tagging><TagSet>{tags}</TagSet></Tagging>".encode()
headers = signer("PUT", "/tag-limit/file.txt?tagging", body=payload)
resp = client.put(
"/tag-limit/file.txt",
query_string={"tagging": ""},
data=payload,
headers=headers
)
assert resp.status_code == 400

View File

@@ -0,0 +1,376 @@
import json
import time
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.notifications import (
NotificationConfiguration,
NotificationEvent,
NotificationService,
WebhookDestination,
)
class TestNotificationEvent:
def test_default_values(self):
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="test-bucket",
object_key="test/key.txt",
)
assert event.event_name == "s3:ObjectCreated:Put"
assert event.bucket_name == "test-bucket"
assert event.object_key == "test/key.txt"
assert event.object_size == 0
assert event.etag == ""
assert event.version_id is None
assert event.request_id != ""
def test_to_s3_event(self):
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="my-bucket",
object_key="my/object.txt",
object_size=1024,
etag="abc123",
version_id="v1",
source_ip="192.168.1.1",
user_identity="user123",
)
result = event.to_s3_event()
assert "Records" in result
assert len(result["Records"]) == 1
record = result["Records"][0]
assert record["eventVersion"] == "2.1"
assert record["eventSource"] == "myfsio:s3"
assert record["eventName"] == "s3:ObjectCreated:Put"
assert record["s3"]["bucket"]["name"] == "my-bucket"
assert record["s3"]["object"]["key"] == "my/object.txt"
assert record["s3"]["object"]["size"] == 1024
assert record["s3"]["object"]["eTag"] == "abc123"
assert record["s3"]["object"]["versionId"] == "v1"
assert record["userIdentity"]["principalId"] == "user123"
assert record["requestParameters"]["sourceIPAddress"] == "192.168.1.1"
class TestWebhookDestination:
def test_default_values(self):
dest = WebhookDestination(url="http://example.com/webhook")
assert dest.url == "http://example.com/webhook"
assert dest.headers == {}
assert dest.timeout_seconds == 30
assert dest.retry_count == 3
assert dest.retry_delay_seconds == 1
def test_to_dict(self):
dest = WebhookDestination(
url="http://example.com/webhook",
headers={"X-Custom": "value"},
timeout_seconds=60,
retry_count=5,
retry_delay_seconds=2,
)
result = dest.to_dict()
assert result["url"] == "http://example.com/webhook"
assert result["headers"] == {"X-Custom": "value"}
assert result["timeout_seconds"] == 60
assert result["retry_count"] == 5
assert result["retry_delay_seconds"] == 2
def test_from_dict(self):
data = {
"url": "http://hook.example.com",
"headers": {"Authorization": "Bearer token"},
"timeout_seconds": 45,
"retry_count": 2,
"retry_delay_seconds": 5,
}
dest = WebhookDestination.from_dict(data)
assert dest.url == "http://hook.example.com"
assert dest.headers == {"Authorization": "Bearer token"}
assert dest.timeout_seconds == 45
assert dest.retry_count == 2
assert dest.retry_delay_seconds == 5
class TestNotificationConfiguration:
def test_matches_event_exact_match(self):
config = NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:Put"],
destination=WebhookDestination(url="http://example.com"),
)
assert config.matches_event("s3:ObjectCreated:Put", "any/key.txt") is True
assert config.matches_event("s3:ObjectCreated:Post", "any/key.txt") is False
def test_matches_event_wildcard(self):
config = NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
)
assert config.matches_event("s3:ObjectCreated:Put", "key.txt") is True
assert config.matches_event("s3:ObjectCreated:Copy", "key.txt") is True
assert config.matches_event("s3:ObjectRemoved:Delete", "key.txt") is False
def test_matches_event_with_prefix_filter(self):
config = NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
prefix_filter="logs/",
)
assert config.matches_event("s3:ObjectCreated:Put", "logs/app.log") is True
assert config.matches_event("s3:ObjectCreated:Put", "data/file.txt") is False
def test_matches_event_with_suffix_filter(self):
config = NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
suffix_filter=".jpg",
)
assert config.matches_event("s3:ObjectCreated:Put", "photos/image.jpg") is True
assert config.matches_event("s3:ObjectCreated:Put", "photos/image.png") is False
def test_matches_event_with_both_filters(self):
config = NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
prefix_filter="images/",
suffix_filter=".png",
)
assert config.matches_event("s3:ObjectCreated:Put", "images/photo.png") is True
assert config.matches_event("s3:ObjectCreated:Put", "images/photo.jpg") is False
assert config.matches_event("s3:ObjectCreated:Put", "documents/file.png") is False
def test_to_dict(self):
config = NotificationConfiguration(
id="my-config",
events=["s3:ObjectCreated:Put", "s3:ObjectRemoved:Delete"],
destination=WebhookDestination(url="http://example.com"),
prefix_filter="logs/",
suffix_filter=".log",
)
result = config.to_dict()
assert result["Id"] == "my-config"
assert result["Events"] == ["s3:ObjectCreated:Put", "s3:ObjectRemoved:Delete"]
assert "Destination" in result
assert result["Filter"]["Key"]["FilterRules"][0]["Value"] == "logs/"
assert result["Filter"]["Key"]["FilterRules"][1]["Value"] == ".log"
def test_from_dict(self):
data = {
"Id": "parsed-config",
"Events": ["s3:ObjectCreated:*"],
"Destination": {"url": "http://hook.example.com"},
"Filter": {
"Key": {
"FilterRules": [
{"Name": "prefix", "Value": "data/"},
{"Name": "suffix", "Value": ".csv"},
]
}
},
}
config = NotificationConfiguration.from_dict(data)
assert config.id == "parsed-config"
assert config.events == ["s3:ObjectCreated:*"]
assert config.destination.url == "http://hook.example.com"
assert config.prefix_filter == "data/"
assert config.suffix_filter == ".csv"
@pytest.fixture
def notification_service(tmp_path: Path):
service = NotificationService(tmp_path, worker_count=1)
yield service
service.shutdown()
class TestNotificationService:
def test_get_bucket_notifications_empty(self, notification_service):
result = notification_service.get_bucket_notifications("nonexistent-bucket")
assert result == []
def test_set_and_get_bucket_notifications(self, notification_service):
configs = [
NotificationConfiguration(
id="config1",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com/webhook1"),
),
NotificationConfiguration(
id="config2",
events=["s3:ObjectRemoved:*"],
destination=WebhookDestination(url="http://example.com/webhook2"),
),
]
notification_service.set_bucket_notifications("my-bucket", configs)
retrieved = notification_service.get_bucket_notifications("my-bucket")
assert len(retrieved) == 2
assert retrieved[0].id == "config1"
assert retrieved[1].id == "config2"
def test_delete_bucket_notifications(self, notification_service):
configs = [
NotificationConfiguration(
id="to-delete",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
),
]
notification_service.set_bucket_notifications("delete-bucket", configs)
assert len(notification_service.get_bucket_notifications("delete-bucket")) == 1
notification_service.delete_bucket_notifications("delete-bucket")
notification_service._configs.clear()
assert len(notification_service.get_bucket_notifications("delete-bucket")) == 0
def test_emit_event_no_config(self, notification_service):
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="no-config-bucket",
object_key="test.txt",
)
notification_service.emit_event(event)
assert notification_service._stats["events_queued"] == 0
def test_emit_event_matching_config(self, notification_service):
configs = [
NotificationConfiguration(
id="match-config",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com/webhook"),
),
]
notification_service.set_bucket_notifications("event-bucket", configs)
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="event-bucket",
object_key="test.txt",
)
notification_service.emit_event(event)
assert notification_service._stats["events_queued"] == 1
def test_emit_event_non_matching_config(self, notification_service):
configs = [
NotificationConfiguration(
id="delete-only",
events=["s3:ObjectRemoved:*"],
destination=WebhookDestination(url="http://example.com/webhook"),
),
]
notification_service.set_bucket_notifications("delete-bucket", configs)
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="delete-bucket",
object_key="test.txt",
)
notification_service.emit_event(event)
assert notification_service._stats["events_queued"] == 0
def test_emit_object_created(self, notification_service):
configs = [
NotificationConfiguration(
id="create-config",
events=["s3:ObjectCreated:Put"],
destination=WebhookDestination(url="http://example.com/webhook"),
),
]
notification_service.set_bucket_notifications("create-bucket", configs)
notification_service.emit_object_created(
"create-bucket",
"new-file.txt",
size=1024,
etag="abc123",
operation="Put",
)
assert notification_service._stats["events_queued"] == 1
def test_emit_object_removed(self, notification_service):
configs = [
NotificationConfiguration(
id="remove-config",
events=["s3:ObjectRemoved:Delete"],
destination=WebhookDestination(url="http://example.com/webhook"),
),
]
notification_service.set_bucket_notifications("remove-bucket", configs)
notification_service.emit_object_removed(
"remove-bucket",
"deleted-file.txt",
operation="Delete",
)
assert notification_service._stats["events_queued"] == 1
def test_get_stats(self, notification_service):
stats = notification_service.get_stats()
assert "events_queued" in stats
assert "events_sent" in stats
assert "events_failed" in stats
@patch("app.notifications._pinned_post")
@patch("app.notifications._resolve_and_check_url", return_value="93.184.216.34")
def test_send_notification_success(self, mock_resolve, mock_post, notification_service):
mock_response = MagicMock()
mock_response.status_code = 200
mock_post.return_value = mock_response
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="test-bucket",
object_key="test.txt",
)
destination = WebhookDestination(url="http://example.com/webhook")
notification_service._send_notification(event, destination)
mock_post.assert_called_once()
@patch("app.notifications._pinned_post")
@patch("app.notifications._resolve_and_check_url", return_value="93.184.216.34")
def test_send_notification_retry_on_failure(self, mock_resolve, mock_post, notification_service):
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.text = "Internal Server Error"
mock_post.return_value = mock_response
event = NotificationEvent(
event_name="s3:ObjectCreated:Put",
bucket_name="test-bucket",
object_key="test.txt",
)
destination = WebhookDestination(
url="http://example.com/webhook",
retry_count=2,
retry_delay_seconds=0,
)
with pytest.raises(RuntimeError) as exc_info:
notification_service._send_notification(event, destination)
assert "Failed after 2 attempts" in str(exc_info.value)
assert mock_post.call_count == 2
def test_notification_caching(self, notification_service):
configs = [
NotificationConfiguration(
id="cached-config",
events=["s3:ObjectCreated:*"],
destination=WebhookDestination(url="http://example.com"),
),
]
notification_service.set_bucket_notifications("cached-bucket", configs)
notification_service.get_bucket_notifications("cached-bucket")
assert "cached-bucket" in notification_service._configs

View File

@@ -0,0 +1,332 @@
import json
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
from app.object_lock import (
ObjectLockConfig,
ObjectLockError,
ObjectLockRetention,
ObjectLockService,
RetentionMode,
)
class TestRetentionMode:
def test_governance_mode(self):
assert RetentionMode.GOVERNANCE.value == "GOVERNANCE"
def test_compliance_mode(self):
assert RetentionMode.COMPLIANCE.value == "COMPLIANCE"
class TestObjectLockRetention:
def test_to_dict(self):
retain_until = datetime(2025, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=retain_until,
)
result = retention.to_dict()
assert result["Mode"] == "GOVERNANCE"
assert "2025-12-31" in result["RetainUntilDate"]
def test_from_dict(self):
data = {
"Mode": "COMPLIANCE",
"RetainUntilDate": "2030-06-15T12:00:00+00:00",
}
retention = ObjectLockRetention.from_dict(data)
assert retention is not None
assert retention.mode == RetentionMode.COMPLIANCE
assert retention.retain_until_date.year == 2030
def test_from_dict_empty(self):
result = ObjectLockRetention.from_dict({})
assert result is None
def test_from_dict_missing_mode(self):
data = {"RetainUntilDate": "2030-06-15T12:00:00+00:00"}
result = ObjectLockRetention.from_dict(data)
assert result is None
def test_from_dict_missing_date(self):
data = {"Mode": "GOVERNANCE"}
result = ObjectLockRetention.from_dict(data)
assert result is None
def test_is_expired_future_date(self):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
assert retention.is_expired() is False
def test_is_expired_past_date(self):
past = datetime.now(timezone.utc) - timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=past,
)
assert retention.is_expired() is True
class TestObjectLockConfig:
def test_to_dict_enabled(self):
config = ObjectLockConfig(enabled=True)
result = config.to_dict()
assert result["ObjectLockEnabled"] == "Enabled"
def test_to_dict_disabled(self):
config = ObjectLockConfig(enabled=False)
result = config.to_dict()
assert result["ObjectLockEnabled"] == "Disabled"
def test_from_dict_enabled(self):
data = {"ObjectLockEnabled": "Enabled"}
config = ObjectLockConfig.from_dict(data)
assert config.enabled is True
def test_from_dict_disabled(self):
data = {"ObjectLockEnabled": "Disabled"}
config = ObjectLockConfig.from_dict(data)
assert config.enabled is False
def test_from_dict_with_default_retention_days(self):
data = {
"ObjectLockEnabled": "Enabled",
"Rule": {
"DefaultRetention": {
"Mode": "GOVERNANCE",
"Days": 30,
}
},
}
config = ObjectLockConfig.from_dict(data)
assert config.enabled is True
assert config.default_retention is not None
assert config.default_retention.mode == RetentionMode.GOVERNANCE
def test_from_dict_with_default_retention_years(self):
data = {
"ObjectLockEnabled": "Enabled",
"Rule": {
"DefaultRetention": {
"Mode": "COMPLIANCE",
"Years": 1,
}
},
}
config = ObjectLockConfig.from_dict(data)
assert config.enabled is True
assert config.default_retention is not None
assert config.default_retention.mode == RetentionMode.COMPLIANCE
@pytest.fixture
def lock_service(tmp_path: Path):
return ObjectLockService(tmp_path)
class TestObjectLockService:
def test_get_bucket_lock_config_default(self, lock_service):
config = lock_service.get_bucket_lock_config("nonexistent-bucket")
assert config.enabled is False
assert config.default_retention is None
def test_set_and_get_bucket_lock_config(self, lock_service):
config = ObjectLockConfig(enabled=True)
lock_service.set_bucket_lock_config("my-bucket", config)
retrieved = lock_service.get_bucket_lock_config("my-bucket")
assert retrieved.enabled is True
def test_enable_bucket_lock(self, lock_service):
lock_service.enable_bucket_lock("lock-bucket")
config = lock_service.get_bucket_lock_config("lock-bucket")
assert config.enabled is True
def test_is_bucket_lock_enabled(self, lock_service):
assert lock_service.is_bucket_lock_enabled("new-bucket") is False
lock_service.enable_bucket_lock("new-bucket")
assert lock_service.is_bucket_lock_enabled("new-bucket") is True
def test_get_object_retention_not_set(self, lock_service):
result = lock_service.get_object_retention("bucket", "key.txt")
assert result is None
def test_set_and_get_object_retention(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "key.txt", retention)
retrieved = lock_service.get_object_retention("bucket", "key.txt")
assert retrieved is not None
assert retrieved.mode == RetentionMode.GOVERNANCE
def test_cannot_modify_compliance_retention(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.COMPLIANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "locked.txt", retention)
new_retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future + timedelta(days=10),
)
with pytest.raises(ObjectLockError) as exc_info:
lock_service.set_object_retention("bucket", "locked.txt", new_retention)
assert "COMPLIANCE" in str(exc_info.value)
def test_cannot_modify_governance_without_bypass(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "gov.txt", retention)
new_retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future + timedelta(days=10),
)
with pytest.raises(ObjectLockError) as exc_info:
lock_service.set_object_retention("bucket", "gov.txt", new_retention)
assert "GOVERNANCE" in str(exc_info.value)
def test_can_modify_governance_with_bypass(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "bypassable.txt", retention)
new_retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future + timedelta(days=10),
)
lock_service.set_object_retention("bucket", "bypassable.txt", new_retention, bypass_governance=True)
retrieved = lock_service.get_object_retention("bucket", "bypassable.txt")
assert retrieved.retain_until_date > future
def test_can_modify_expired_retention(self, lock_service):
past = datetime.now(timezone.utc) - timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.COMPLIANCE,
retain_until_date=past,
)
lock_service.set_object_retention("bucket", "expired.txt", retention)
future = datetime.now(timezone.utc) + timedelta(days=30)
new_retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "expired.txt", new_retention)
retrieved = lock_service.get_object_retention("bucket", "expired.txt")
assert retrieved.mode == RetentionMode.GOVERNANCE
def test_get_legal_hold_not_set(self, lock_service):
result = lock_service.get_legal_hold("bucket", "key.txt")
assert result is False
def test_set_and_get_legal_hold(self, lock_service):
lock_service.set_legal_hold("bucket", "held.txt", True)
assert lock_service.get_legal_hold("bucket", "held.txt") is True
lock_service.set_legal_hold("bucket", "held.txt", False)
assert lock_service.get_legal_hold("bucket", "held.txt") is False
def test_can_delete_object_no_lock(self, lock_service):
can_delete, reason = lock_service.can_delete_object("bucket", "unlocked.txt")
assert can_delete is True
assert reason == ""
def test_cannot_delete_object_with_legal_hold(self, lock_service):
lock_service.set_legal_hold("bucket", "held.txt", True)
can_delete, reason = lock_service.can_delete_object("bucket", "held.txt")
assert can_delete is False
assert "legal hold" in reason.lower()
def test_cannot_delete_object_with_compliance_retention(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.COMPLIANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "compliant.txt", retention)
can_delete, reason = lock_service.can_delete_object("bucket", "compliant.txt")
assert can_delete is False
assert "COMPLIANCE" in reason
def test_cannot_delete_governance_without_bypass(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "governed.txt", retention)
can_delete, reason = lock_service.can_delete_object("bucket", "governed.txt")
assert can_delete is False
assert "GOVERNANCE" in reason
def test_can_delete_governance_with_bypass(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "governed.txt", retention)
can_delete, reason = lock_service.can_delete_object("bucket", "governed.txt", bypass_governance=True)
assert can_delete is True
assert reason == ""
def test_can_delete_expired_retention(self, lock_service):
past = datetime.now(timezone.utc) - timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.COMPLIANCE,
retain_until_date=past,
)
lock_service.set_object_retention("bucket", "expired.txt", retention)
can_delete, reason = lock_service.can_delete_object("bucket", "expired.txt")
assert can_delete is True
def test_can_overwrite_is_same_as_delete(self, lock_service):
future = datetime.now(timezone.utc) + timedelta(days=30)
retention = ObjectLockRetention(
mode=RetentionMode.GOVERNANCE,
retain_until_date=future,
)
lock_service.set_object_retention("bucket", "overwrite.txt", retention)
can_overwrite, _ = lock_service.can_overwrite_object("bucket", "overwrite.txt")
can_delete, _ = lock_service.can_delete_object("bucket", "overwrite.txt")
assert can_overwrite == can_delete
def test_delete_object_lock_metadata(self, lock_service):
lock_service.set_legal_hold("bucket", "cleanup.txt", True)
lock_service.delete_object_lock_metadata("bucket", "cleanup.txt")
assert lock_service.get_legal_hold("bucket", "cleanup.txt") is False
def test_config_caching(self, lock_service):
config = ObjectLockConfig(enabled=True)
lock_service.set_bucket_lock_config("cached-bucket", config)
lock_service.get_bucket_lock_config("cached-bucket")
assert "cached-bucket" in lock_service._config_cache

View File

@@ -0,0 +1,297 @@
import threading
import time
from pathlib import Path
import pytest
from app.operation_metrics import (
OperationMetricsCollector,
OperationStats,
classify_endpoint,
)
class TestOperationStats:
def test_initial_state(self):
stats = OperationStats()
assert stats.count == 0
assert stats.success_count == 0
assert stats.error_count == 0
assert stats.latency_sum_ms == 0.0
assert stats.bytes_in == 0
assert stats.bytes_out == 0
def test_record_success(self):
stats = OperationStats()
stats.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
assert stats.count == 1
assert stats.success_count == 1
assert stats.error_count == 0
assert stats.latency_sum_ms == 50.0
assert stats.latency_min_ms == 50.0
assert stats.latency_max_ms == 50.0
assert stats.bytes_in == 100
assert stats.bytes_out == 200
def test_record_error(self):
stats = OperationStats()
stats.record(latency_ms=100.0, success=False, bytes_in=50, bytes_out=0)
assert stats.count == 1
assert stats.success_count == 0
assert stats.error_count == 1
def test_latency_min_max(self):
stats = OperationStats()
stats.record(latency_ms=50.0, success=True)
stats.record(latency_ms=10.0, success=True)
stats.record(latency_ms=100.0, success=True)
assert stats.latency_min_ms == 10.0
assert stats.latency_max_ms == 100.0
assert stats.latency_sum_ms == 160.0
def test_to_dict(self):
stats = OperationStats()
stats.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
stats.record(latency_ms=100.0, success=False, bytes_in=50, bytes_out=0)
result = stats.to_dict()
assert result["count"] == 2
assert result["success_count"] == 1
assert result["error_count"] == 1
assert result["latency_avg_ms"] == 75.0
assert result["latency_min_ms"] == 50.0
assert result["latency_max_ms"] == 100.0
assert result["bytes_in"] == 150
assert result["bytes_out"] == 200
def test_to_dict_empty(self):
stats = OperationStats()
result = stats.to_dict()
assert result["count"] == 0
assert result["latency_avg_ms"] == 0.0
assert result["latency_min_ms"] == 0.0
def test_merge(self):
stats1 = OperationStats()
stats1.record(latency_ms=50.0, success=True, bytes_in=100, bytes_out=200)
stats2 = OperationStats()
stats2.record(latency_ms=10.0, success=True, bytes_in=50, bytes_out=100)
stats2.record(latency_ms=100.0, success=False, bytes_in=25, bytes_out=50)
stats1.merge(stats2)
assert stats1.count == 3
assert stats1.success_count == 2
assert stats1.error_count == 1
assert stats1.latency_min_ms == 10.0
assert stats1.latency_max_ms == 100.0
assert stats1.bytes_in == 175
assert stats1.bytes_out == 350
class TestClassifyEndpoint:
def test_root_path(self):
assert classify_endpoint("/") == "service"
assert classify_endpoint("") == "service"
def test_ui_paths(self):
assert classify_endpoint("/ui") == "ui"
assert classify_endpoint("/ui/buckets") == "ui"
assert classify_endpoint("/ui/metrics") == "ui"
def test_kms_paths(self):
assert classify_endpoint("/kms") == "kms"
assert classify_endpoint("/kms/keys") == "kms"
def test_service_paths(self):
assert classify_endpoint("/myfsio/health") == "service"
def test_bucket_paths(self):
assert classify_endpoint("/mybucket") == "bucket"
assert classify_endpoint("/mybucket/") == "bucket"
def test_object_paths(self):
assert classify_endpoint("/mybucket/mykey") == "object"
assert classify_endpoint("/mybucket/folder/nested/key.txt") == "object"
class TestOperationMetricsCollector:
def test_record_and_get_stats(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
collector.record_request(
method="GET",
endpoint_type="bucket",
status_code=200,
latency_ms=50.0,
bytes_in=0,
bytes_out=1000,
)
collector.record_request(
method="PUT",
endpoint_type="object",
status_code=201,
latency_ms=100.0,
bytes_in=500,
bytes_out=0,
)
collector.record_request(
method="GET",
endpoint_type="object",
status_code=404,
latency_ms=25.0,
bytes_in=0,
bytes_out=0,
error_code="NoSuchKey",
)
stats = collector.get_current_stats()
assert stats["totals"]["count"] == 3
assert stats["totals"]["success_count"] == 2
assert stats["totals"]["error_count"] == 1
assert "GET" in stats["by_method"]
assert stats["by_method"]["GET"]["count"] == 2
assert "PUT" in stats["by_method"]
assert stats["by_method"]["PUT"]["count"] == 1
assert "bucket" in stats["by_endpoint"]
assert "object" in stats["by_endpoint"]
assert stats["by_endpoint"]["object"]["count"] == 2
assert stats["by_status_class"]["2xx"] == 2
assert stats["by_status_class"]["4xx"] == 1
assert stats["error_codes"]["NoSuchKey"] == 1
finally:
collector.shutdown()
def test_thread_safety(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
num_threads = 5
requests_per_thread = 100
threads = []
def record_requests():
for _ in range(requests_per_thread):
collector.record_request(
method="GET",
endpoint_type="object",
status_code=200,
latency_ms=10.0,
)
for _ in range(num_threads):
t = threading.Thread(target=record_requests)
threads.append(t)
t.start()
for t in threads:
t.join()
stats = collector.get_current_stats()
assert stats["totals"]["count"] == num_threads * requests_per_thread
finally:
collector.shutdown()
def test_status_class_categorization(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
collector.record_request("GET", "object", 200, 10.0)
collector.record_request("GET", "object", 204, 10.0)
collector.record_request("GET", "object", 301, 10.0)
collector.record_request("GET", "object", 304, 10.0)
collector.record_request("GET", "object", 400, 10.0)
collector.record_request("GET", "object", 403, 10.0)
collector.record_request("GET", "object", 404, 10.0)
collector.record_request("GET", "object", 500, 10.0)
collector.record_request("GET", "object", 503, 10.0)
stats = collector.get_current_stats()
assert stats["by_status_class"]["2xx"] == 2
assert stats["by_status_class"]["3xx"] == 2
assert stats["by_status_class"]["4xx"] == 3
assert stats["by_status_class"]["5xx"] == 2
finally:
collector.shutdown()
def test_error_code_tracking(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
collector.record_request("GET", "object", 404, 10.0, error_code="NoSuchKey")
collector.record_request("GET", "object", 404, 10.0, error_code="NoSuchKey")
collector.record_request("GET", "bucket", 403, 10.0, error_code="AccessDenied")
collector.record_request("PUT", "object", 500, 10.0, error_code="InternalError")
stats = collector.get_current_stats()
assert stats["error_codes"]["NoSuchKey"] == 2
assert stats["error_codes"]["AccessDenied"] == 1
assert stats["error_codes"]["InternalError"] == 1
finally:
collector.shutdown()
def test_history_persistence(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
collector.record_request("GET", "object", 200, 10.0)
collector._take_snapshot()
history = collector.get_history()
assert len(history) == 1
assert history[0]["totals"]["count"] == 1
config_path = tmp_path / ".myfsio.sys" / "config" / "operation_metrics.json"
assert config_path.exists()
finally:
collector.shutdown()
def test_get_history_with_hours_filter(self, tmp_path: Path):
collector = OperationMetricsCollector(
storage_root=tmp_path,
interval_minutes=60,
retention_hours=24,
)
try:
collector.record_request("GET", "object", 200, 10.0)
collector._take_snapshot()
history_all = collector.get_history()
history_recent = collector.get_history(hours=1)
assert len(history_all) >= len(history_recent)
finally:
collector.shutdown()

View File

@@ -0,0 +1,287 @@
import json
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.connections import ConnectionStore, RemoteConnection
from app.replication import (
ReplicationManager,
ReplicationRule,
ReplicationStats,
REPLICATION_MODE_ALL,
REPLICATION_MODE_NEW_ONLY,
_create_s3_client,
)
from app.storage import ObjectStorage
@pytest.fixture
def storage(tmp_path: Path):
storage_root = tmp_path / "data"
storage_root.mkdir(parents=True)
return ObjectStorage(storage_root)
@pytest.fixture
def connections(tmp_path: Path):
connections_path = tmp_path / "connections.json"
store = ConnectionStore(connections_path)
conn = RemoteConnection(
id="test-conn",
name="Test Remote",
endpoint_url="http://localhost:9000",
access_key="remote-access",
secret_key="remote-secret",
region="us-east-1",
)
store.add(conn)
return store
@pytest.fixture
def replication_manager(storage, connections, tmp_path):
rules_path = tmp_path / "replication_rules.json"
storage_root = tmp_path / "data"
storage_root.mkdir(exist_ok=True)
manager = ReplicationManager(storage, connections, rules_path, storage_root)
yield manager
manager.shutdown(wait=False)
class TestReplicationStats:
def test_to_dict(self):
stats = ReplicationStats(
objects_synced=10,
objects_pending=5,
objects_orphaned=2,
bytes_synced=1024,
last_sync_at=1234567890.0,
last_sync_key="test/key.txt",
)
result = stats.to_dict()
assert result["objects_synced"] == 10
assert result["objects_pending"] == 5
assert result["objects_orphaned"] == 2
assert result["bytes_synced"] == 1024
assert result["last_sync_at"] == 1234567890.0
assert result["last_sync_key"] == "test/key.txt"
def test_from_dict(self):
data = {
"objects_synced": 15,
"objects_pending": 3,
"objects_orphaned": 1,
"bytes_synced": 2048,
"last_sync_at": 9876543210.0,
"last_sync_key": "another/key.txt",
}
stats = ReplicationStats.from_dict(data)
assert stats.objects_synced == 15
assert stats.objects_pending == 3
assert stats.objects_orphaned == 1
assert stats.bytes_synced == 2048
assert stats.last_sync_at == 9876543210.0
assert stats.last_sync_key == "another/key.txt"
def test_from_dict_with_defaults(self):
stats = ReplicationStats.from_dict({})
assert stats.objects_synced == 0
assert stats.objects_pending == 0
assert stats.objects_orphaned == 0
assert stats.bytes_synced == 0
assert stats.last_sync_at is None
assert stats.last_sync_key is None
class TestReplicationRule:
def test_to_dict(self):
rule = ReplicationRule(
bucket_name="source-bucket",
target_connection_id="test-conn",
target_bucket="dest-bucket",
enabled=True,
mode=REPLICATION_MODE_ALL,
created_at=1234567890.0,
)
result = rule.to_dict()
assert result["bucket_name"] == "source-bucket"
assert result["target_connection_id"] == "test-conn"
assert result["target_bucket"] == "dest-bucket"
assert result["enabled"] is True
assert result["mode"] == REPLICATION_MODE_ALL
assert result["created_at"] == 1234567890.0
assert "stats" in result
def test_from_dict(self):
data = {
"bucket_name": "my-bucket",
"target_connection_id": "conn-123",
"target_bucket": "remote-bucket",
"enabled": False,
"mode": REPLICATION_MODE_NEW_ONLY,
"created_at": 1111111111.0,
"stats": {"objects_synced": 5},
}
rule = ReplicationRule.from_dict(data)
assert rule.bucket_name == "my-bucket"
assert rule.target_connection_id == "conn-123"
assert rule.target_bucket == "remote-bucket"
assert rule.enabled is False
assert rule.mode == REPLICATION_MODE_NEW_ONLY
assert rule.created_at == 1111111111.0
assert rule.stats.objects_synced == 5
def test_from_dict_defaults_mode(self):
data = {
"bucket_name": "my-bucket",
"target_connection_id": "conn-123",
"target_bucket": "remote-bucket",
}
rule = ReplicationRule.from_dict(data)
assert rule.mode == REPLICATION_MODE_NEW_ONLY
assert rule.created_at is None
class TestReplicationManager:
def test_get_rule_not_exists(self, replication_manager):
rule = replication_manager.get_rule("nonexistent-bucket")
assert rule is None
def test_set_and_get_rule(self, replication_manager):
rule = ReplicationRule(
bucket_name="my-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=True,
mode=REPLICATION_MODE_NEW_ONLY,
created_at=time.time(),
)
replication_manager.set_rule(rule)
retrieved = replication_manager.get_rule("my-bucket")
assert retrieved is not None
assert retrieved.bucket_name == "my-bucket"
assert retrieved.target_connection_id == "test-conn"
assert retrieved.target_bucket == "remote-bucket"
def test_delete_rule(self, replication_manager):
rule = ReplicationRule(
bucket_name="to-delete",
target_connection_id="test-conn",
target_bucket="remote-bucket",
)
replication_manager.set_rule(rule)
assert replication_manager.get_rule("to-delete") is not None
replication_manager.delete_rule("to-delete")
assert replication_manager.get_rule("to-delete") is None
def test_save_and_reload_rules(self, replication_manager, tmp_path):
rule = ReplicationRule(
bucket_name="persistent-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=True,
)
replication_manager.set_rule(rule)
rules_path = tmp_path / "replication_rules.json"
assert rules_path.exists()
data = json.loads(rules_path.read_text())
assert "persistent-bucket" in data
@patch("app.replication._create_s3_client")
def test_check_endpoint_health_success(self, mock_create_client, replication_manager, connections):
mock_client = MagicMock()
mock_client.list_buckets.return_value = {"Buckets": []}
mock_create_client.return_value = mock_client
conn = connections.get("test-conn")
result = replication_manager.check_endpoint_health(conn)
assert result is True
mock_client.list_buckets.assert_called_once()
@patch("app.replication._create_s3_client")
def test_check_endpoint_health_failure(self, mock_create_client, replication_manager, connections):
mock_client = MagicMock()
mock_client.list_buckets.side_effect = Exception("Connection refused")
mock_create_client.return_value = mock_client
conn = connections.get("test-conn")
result = replication_manager.check_endpoint_health(conn)
assert result is False
def test_trigger_replication_no_rule(self, replication_manager):
replication_manager.trigger_replication("no-such-bucket", "test.txt", "write")
def test_trigger_replication_disabled_rule(self, replication_manager):
rule = ReplicationRule(
bucket_name="disabled-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=False,
)
replication_manager.set_rule(rule)
replication_manager.trigger_replication("disabled-bucket", "test.txt", "write")
def test_trigger_replication_missing_connection(self, replication_manager):
rule = ReplicationRule(
bucket_name="orphan-bucket",
target_connection_id="missing-conn",
target_bucket="remote-bucket",
enabled=True,
)
replication_manager.set_rule(rule)
replication_manager.trigger_replication("orphan-bucket", "test.txt", "write")
def test_replicate_task_path_traversal_blocked(self, replication_manager, connections):
rule = ReplicationRule(
bucket_name="secure-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=True,
)
replication_manager.set_rule(rule)
conn = connections.get("test-conn")
replication_manager._replicate_task("secure-bucket", "../../../etc/passwd", rule, conn, "write")
replication_manager._replicate_task("secure-bucket", "/root/secret", rule, conn, "write")
replication_manager._replicate_task("secure-bucket", "..\\..\\windows\\system32", rule, conn, "write")
class TestCreateS3Client:
@patch("app.replication.boto3.client")
def test_creates_client_with_correct_config(self, mock_boto_client):
conn = RemoteConnection(
id="test",
name="Test",
endpoint_url="http://localhost:9000",
access_key="access",
secret_key="secret",
region="eu-west-1",
)
_create_s3_client(conn)
mock_boto_client.assert_called_once()
call_kwargs = mock_boto_client.call_args[1]
assert call_kwargs["endpoint_url"] == "http://localhost:9000"
assert call_kwargs["aws_access_key_id"] == "access"
assert call_kwargs["aws_secret_access_key"] == "secret"
assert call_kwargs["region_name"] == "eu-west-1"
@patch("app.replication.boto3.client")
def test_health_check_mode_minimal_retries(self, mock_boto_client):
conn = RemoteConnection(
id="test",
name="Test",
endpoint_url="http://localhost:9000",
access_key="access",
secret_key="secret",
)
_create_s3_client(conn, health_check=True)
call_kwargs = mock_boto_client.call_args[1]
config = call_kwargs["config"]
assert config.retries["max_attempts"] == 1

View File

@@ -0,0 +1,350 @@
import hashlib
import io
import os
import secrets
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
try:
import myfsio_core as _rc
HAS_RUST = True
except ImportError:
_rc = None
HAS_RUST = False
pytestmark = pytest.mark.skipif(not HAS_RUST, reason="myfsio_core not available")
class TestStreamToFileWithMd5:
def test_basic_write(self, tmp_path):
data = b"hello world" * 1000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(tmp_path_str).exists()
assert Path(tmp_path_str).read_bytes() == data
def test_empty_stream(self, tmp_path):
stream = io.BytesIO(b"")
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == 0
assert md5_hex == hashlib.md5(b"").hexdigest()
assert Path(tmp_path_str).read_bytes() == b""
def test_large_data(self, tmp_path):
data = os.urandom(1024 * 1024 * 2)
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(stream, tmp_dir)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
def test_custom_chunk_size(self, tmp_path):
data = b"x" * 10000
stream = io.BytesIO(data)
tmp_dir = str(tmp_path / "tmp")
tmp_path_str, md5_hex, size = _rc.stream_to_file_with_md5(
stream, tmp_dir, chunk_size=128
)
assert size == len(data)
assert md5_hex == hashlib.md5(data).hexdigest()
class TestAssemblePartsWithMd5:
def test_basic_assembly(self, tmp_path):
parts = []
combined = b""
for i in range(3):
data = f"part{i}data".encode() * 100
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
def test_single_part(self, tmp_path):
data = b"single part data"
p = tmp_path / "part0"
p.write_bytes(data)
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5([str(p)], dest)
assert md5_hex == hashlib.md5(data).hexdigest()
assert Path(dest).read_bytes() == data
def test_empty_parts_list(self):
with pytest.raises(ValueError, match="No parts"):
_rc.assemble_parts_with_md5([], "dummy")
def test_missing_part_file(self, tmp_path):
with pytest.raises(OSError):
_rc.assemble_parts_with_md5(
[str(tmp_path / "nonexistent")], str(tmp_path / "out")
)
def test_large_parts(self, tmp_path):
parts = []
combined = b""
for i in range(5):
data = os.urandom(512 * 1024)
combined += data
p = tmp_path / f"part{i}"
p.write_bytes(data)
parts.append(str(p))
dest = str(tmp_path / "output")
md5_hex = _rc.assemble_parts_with_md5(parts, dest)
assert md5_hex == hashlib.md5(combined).hexdigest()
assert Path(dest).read_bytes() == combined
class TestEncryptDecryptStreamChunked:
def _python_derive_chunk_nonce(self, base_nonce, chunk_index):
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
hkdf = HKDF(
algorithm=hashes.SHA256(),
length=12,
salt=base_nonce,
info=chunk_index.to_bytes(4, "big"),
)
return hkdf.derive(b"chunk_nonce")
def test_encrypt_decrypt_roundtrip(self, tmp_path):
data = b"Hello, encryption!" * 500
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count > 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == chunk_count
assert Path(decrypted_path).read_bytes() == data
def test_empty_file(self, tmp_path):
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "empty")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(b"")
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce
)
assert chunk_count == 0
chunk_count_dec = _rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, key, base_nonce
)
assert chunk_count_dec == 0
assert Path(decrypted_path).read_bytes() == b""
def test_custom_chunk_size(self, tmp_path):
data = os.urandom(10000)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=1024
)
assert chunk_count == 10
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_invalid_key_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="32 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), b"short", secrets.token_bytes(12)
)
def test_invalid_nonce_length(self, tmp_path):
input_path = str(tmp_path / "in")
Path(input_path).write_bytes(b"data")
with pytest.raises(ValueError, match="12 bytes"):
_rc.encrypt_stream_chunked(
input_path, str(tmp_path / "out"), secrets.token_bytes(32), b"short"
)
def test_wrong_key_fails_decrypt(self, tmp_path):
data = b"sensitive data"
key = secrets.token_bytes(32)
wrong_key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
with pytest.raises((ValueError, OSError)):
_rc.decrypt_stream_chunked(
encrypted_path, decrypted_path, wrong_key, base_nonce
)
def test_cross_compat_python_encrypt_rust_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat test data" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
encrypted_path = str(tmp_path / "py_encrypted")
with open(encrypted_path, "wb") as f:
f.write(b"\x00\x00\x00\x00")
aesgcm = AESGCM(key)
chunk_index = 0
offset = 0
while offset < len(data):
chunk = data[offset:offset + chunk_size]
nonce = self._python_derive_chunk_nonce(base_nonce, chunk_index)
enc = aesgcm.encrypt(nonce, chunk, None)
f.write(len(enc).to_bytes(4, "big"))
f.write(enc)
chunk_index += 1
offset += chunk_size
f.seek(0)
f.write(chunk_index.to_bytes(4, "big"))
decrypted_path = str(tmp_path / "rust_decrypted")
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
def test_cross_compat_rust_encrypt_python_decrypt(self, tmp_path):
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
data = b"cross compat reverse test" * 100
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
chunk_size = 1024
input_path = str(tmp_path / "plaintext")
encrypted_path = str(tmp_path / "rust_encrypted")
Path(input_path).write_bytes(data)
chunk_count = _rc.encrypt_stream_chunked(
input_path, encrypted_path, key, base_nonce, chunk_size=chunk_size
)
aesgcm = AESGCM(key)
with open(encrypted_path, "rb") as f:
count_bytes = f.read(4)
assert int.from_bytes(count_bytes, "big") == chunk_count
decrypted = b""
for i in range(chunk_count):
size = int.from_bytes(f.read(4), "big")
enc_chunk = f.read(size)
nonce = self._python_derive_chunk_nonce(base_nonce, i)
decrypted += aesgcm.decrypt(nonce, enc_chunk, None)
assert decrypted == data
def test_large_file_roundtrip(self, tmp_path):
data = os.urandom(1024 * 1024)
key = secrets.token_bytes(32)
base_nonce = secrets.token_bytes(12)
input_path = str(tmp_path / "large")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
_rc.encrypt_stream_chunked(input_path, encrypted_path, key, base_nonce)
_rc.decrypt_stream_chunked(encrypted_path, decrypted_path, key, base_nonce)
assert Path(decrypted_path).read_bytes() == data
class TestStreamingEncryptorFileMethods:
def test_encrypt_file_decrypt_file_roundtrip(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"file method test data" * 200
input_path = str(tmp_path / "input")
encrypted_path = str(tmp_path / "encrypted")
decrypted_path = str(tmp_path / "decrypted")
Path(input_path).write_bytes(data)
metadata = encryptor.encrypt_file(input_path, encrypted_path)
assert metadata.algorithm == "AES256"
encryptor.decrypt_file(encrypted_path, decrypted_path, metadata)
assert Path(decrypted_path).read_bytes() == data
def test_encrypt_file_matches_encrypt_stream(self, tmp_path):
from app.encryption import LocalKeyEncryption, StreamingEncryptor
master_key_path = tmp_path / "master.key"
provider = LocalKeyEncryption(master_key_path)
encryptor = StreamingEncryptor(provider, chunk_size=512)
data = b"stream vs file comparison" * 100
input_path = str(tmp_path / "input")
Path(input_path).write_bytes(data)
file_encrypted_path = str(tmp_path / "file_enc")
metadata_file = encryptor.encrypt_file(input_path, file_encrypted_path)
file_decrypted_path = str(tmp_path / "file_dec")
encryptor.decrypt_file(file_encrypted_path, file_decrypted_path, metadata_file)
assert Path(file_decrypted_path).read_bytes() == data
stream_enc, metadata_stream = encryptor.encrypt_stream(io.BytesIO(data))
stream_dec = encryptor.decrypt_stream(stream_enc, metadata_stream)
assert stream_dec.read() == data

View File

@@ -0,0 +1,460 @@
import io
import json
import time
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.connections import ConnectionStore, RemoteConnection
from app.replication import (
ReplicationManager,
ReplicationRule,
REPLICATION_MODE_BIDIRECTIONAL,
REPLICATION_MODE_NEW_ONLY,
)
from app.site_sync import (
SiteSyncWorker,
SyncState,
SyncedObjectInfo,
SiteSyncStats,
RemoteObjectMeta,
)
from app.storage import ObjectStorage
@pytest.fixture
def storage(tmp_path: Path):
storage_root = tmp_path / "data"
storage_root.mkdir(parents=True)
return ObjectStorage(storage_root)
@pytest.fixture
def connections(tmp_path: Path):
connections_path = tmp_path / "connections.json"
store = ConnectionStore(connections_path)
conn = RemoteConnection(
id="test-conn",
name="Test Remote",
endpoint_url="http://localhost:9000",
access_key="remote-access",
secret_key="remote-secret",
region="us-east-1",
)
store.add(conn)
return store
@pytest.fixture
def replication_manager(storage, connections, tmp_path):
rules_path = tmp_path / "replication_rules.json"
storage_root = tmp_path / "data"
storage_root.mkdir(exist_ok=True)
manager = ReplicationManager(storage, connections, rules_path, storage_root)
yield manager
manager.shutdown(wait=False)
@pytest.fixture
def site_sync_worker(storage, connections, replication_manager, tmp_path):
storage_root = tmp_path / "data"
worker = SiteSyncWorker(
storage=storage,
connections=connections,
replication_manager=replication_manager,
storage_root=storage_root,
interval_seconds=60,
batch_size=100,
)
yield worker
worker.shutdown()
class TestSyncedObjectInfo:
def test_to_dict(self):
info = SyncedObjectInfo(
last_synced_at=1234567890.0,
remote_etag="abc123",
source="remote",
)
result = info.to_dict()
assert result["last_synced_at"] == 1234567890.0
assert result["remote_etag"] == "abc123"
assert result["source"] == "remote"
def test_from_dict(self):
data = {
"last_synced_at": 9876543210.0,
"remote_etag": "def456",
"source": "local",
}
info = SyncedObjectInfo.from_dict(data)
assert info.last_synced_at == 9876543210.0
assert info.remote_etag == "def456"
assert info.source == "local"
class TestSyncState:
def test_to_dict(self):
state = SyncState(
synced_objects={
"test.txt": SyncedObjectInfo(
last_synced_at=1000.0,
remote_etag="etag1",
source="remote",
)
},
last_full_sync=2000.0,
)
result = state.to_dict()
assert "test.txt" in result["synced_objects"]
assert result["synced_objects"]["test.txt"]["remote_etag"] == "etag1"
assert result["last_full_sync"] == 2000.0
def test_from_dict(self):
data = {
"synced_objects": {
"file.txt": {
"last_synced_at": 3000.0,
"remote_etag": "etag2",
"source": "remote",
}
},
"last_full_sync": 4000.0,
}
state = SyncState.from_dict(data)
assert "file.txt" in state.synced_objects
assert state.synced_objects["file.txt"].remote_etag == "etag2"
assert state.last_full_sync == 4000.0
def test_from_dict_empty(self):
state = SyncState.from_dict({})
assert state.synced_objects == {}
assert state.last_full_sync is None
class TestSiteSyncStats:
def test_to_dict(self):
stats = SiteSyncStats(
last_sync_at=1234567890.0,
objects_pulled=10,
objects_skipped=5,
conflicts_resolved=2,
deletions_applied=1,
errors=0,
)
result = stats.to_dict()
assert result["objects_pulled"] == 10
assert result["objects_skipped"] == 5
assert result["conflicts_resolved"] == 2
assert result["deletions_applied"] == 1
assert result["errors"] == 0
class TestRemoteObjectMeta:
def test_from_s3_object(self):
obj = {
"Key": "test/file.txt",
"Size": 1024,
"LastModified": datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
"ETag": '"abc123def456"',
}
meta = RemoteObjectMeta.from_s3_object(obj)
assert meta.key == "test/file.txt"
assert meta.size == 1024
assert meta.last_modified == datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
assert meta.etag == "abc123def456"
class TestReplicationRuleBidirectional:
def test_rule_with_bidirectional_mode(self):
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
enabled=True,
mode=REPLICATION_MODE_BIDIRECTIONAL,
sync_deletions=True,
)
assert rule.mode == REPLICATION_MODE_BIDIRECTIONAL
assert rule.sync_deletions is True
assert rule.last_pull_at is None
def test_rule_to_dict_includes_new_fields(self):
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
sync_deletions=False,
last_pull_at=1234567890.0,
)
result = rule.to_dict()
assert result["mode"] == REPLICATION_MODE_BIDIRECTIONAL
assert result["sync_deletions"] is False
assert result["last_pull_at"] == 1234567890.0
def test_rule_from_dict_with_new_fields(self):
data = {
"bucket_name": "sync-bucket",
"target_connection_id": "test-conn",
"target_bucket": "remote-bucket",
"mode": REPLICATION_MODE_BIDIRECTIONAL,
"sync_deletions": False,
"last_pull_at": 1234567890.0,
}
rule = ReplicationRule.from_dict(data)
assert rule.mode == REPLICATION_MODE_BIDIRECTIONAL
assert rule.sync_deletions is False
assert rule.last_pull_at == 1234567890.0
def test_rule_from_dict_defaults_new_fields(self):
data = {
"bucket_name": "sync-bucket",
"target_connection_id": "test-conn",
"target_bucket": "remote-bucket",
}
rule = ReplicationRule.from_dict(data)
assert rule.sync_deletions is True
assert rule.last_pull_at is None
class TestSiteSyncWorker:
def test_start_and_shutdown(self, site_sync_worker):
site_sync_worker.start()
assert site_sync_worker._sync_thread is not None
assert site_sync_worker._sync_thread.is_alive()
site_sync_worker.shutdown()
assert not site_sync_worker._sync_thread.is_alive()
def test_trigger_sync_no_rule(self, site_sync_worker):
result = site_sync_worker.trigger_sync("nonexistent-bucket")
assert result is None
def test_trigger_sync_wrong_mode(self, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="new-only-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_NEW_ONLY,
enabled=True,
)
replication_manager.set_rule(rule)
result = site_sync_worker.trigger_sync("new-only-bucket")
assert result is None
def test_trigger_sync_disabled_rule(self, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="disabled-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=False,
)
replication_manager.set_rule(rule)
result = site_sync_worker.trigger_sync("disabled-bucket")
assert result is None
def test_get_stats_no_sync(self, site_sync_worker):
stats = site_sync_worker.get_stats("nonexistent")
assert stats is None
def test_resolve_conflict_remote_newer(self, site_sync_worker):
local_meta = MagicMock()
local_meta.last_modified = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta.etag = "local123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=datetime(2025, 1, 2, 12, 0, 0, tzinfo=timezone.utc),
etag="remote456",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "pull"
def test_resolve_conflict_local_newer(self, site_sync_worker):
local_meta = MagicMock()
local_meta.last_modified = datetime(2025, 1, 2, 12, 0, 0, tzinfo=timezone.utc)
local_meta.etag = "local123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
etag="remote456",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "keep"
def test_resolve_conflict_same_time_same_etag(self, site_sync_worker):
ts = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta = MagicMock()
local_meta.last_modified = ts
local_meta.etag = "same123"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=ts,
etag="same123",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "skip"
def test_resolve_conflict_same_time_different_etag(self, site_sync_worker):
ts = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
local_meta = MagicMock()
local_meta.last_modified = ts
local_meta.etag = "aaa"
remote_meta = RemoteObjectMeta(
key="test.txt",
size=100,
last_modified=ts,
etag="zzz",
)
result = site_sync_worker._resolve_conflict(local_meta, remote_meta)
assert result == "pull"
def test_sync_state_persistence(self, site_sync_worker, tmp_path):
bucket_name = "test-bucket"
state = SyncState(
synced_objects={
"file1.txt": SyncedObjectInfo(
last_synced_at=time.time(),
remote_etag="etag1",
source="remote",
)
},
last_full_sync=time.time(),
)
site_sync_worker._save_sync_state(bucket_name, state)
loaded = site_sync_worker._load_sync_state(bucket_name)
assert "file1.txt" in loaded.synced_objects
assert loaded.synced_objects["file1.txt"].remote_etag == "etag1"
def test_load_sync_state_nonexistent(self, site_sync_worker):
state = site_sync_worker._load_sync_state("nonexistent-bucket")
assert state.synced_objects == {}
assert state.last_full_sync is None
@patch("app.site_sync._create_sync_client")
def test_list_remote_objects(self, mock_create_client, site_sync_worker, connections, replication_manager):
mock_client = MagicMock()
mock_paginator = MagicMock()
mock_paginator.paginate.return_value = [
{
"Contents": [
{
"Key": "file1.txt",
"Size": 100,
"LastModified": datetime(2025, 1, 1, tzinfo=timezone.utc),
"ETag": '"etag1"',
},
{
"Key": "file2.txt",
"Size": 200,
"LastModified": datetime(2025, 1, 2, tzinfo=timezone.utc),
"ETag": '"etag2"',
},
]
}
]
mock_client.get_paginator.return_value = mock_paginator
mock_create_client.return_value = mock_client
rule = ReplicationRule(
bucket_name="local-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
)
conn = connections.get("test-conn")
result = site_sync_worker._list_remote_objects(rule, conn)
assert "file1.txt" in result
assert "file2.txt" in result
assert result["file1.txt"].size == 100
assert result["file2.txt"].size == 200
def test_list_local_objects(self, site_sync_worker, storage):
storage.create_bucket("test-bucket")
storage.put_object("test-bucket", "file1.txt", io.BytesIO(b"content1"))
storage.put_object("test-bucket", "file2.txt", io.BytesIO(b"content2"))
result = site_sync_worker._list_local_objects("test-bucket")
assert "file1.txt" in result
assert "file2.txt" in result
@patch("app.site_sync._create_sync_client")
def test_sync_bucket_connection_not_found(self, mock_create_client, site_sync_worker, replication_manager):
rule = ReplicationRule(
bucket_name="test-bucket",
target_connection_id="missing-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=True,
)
replication_manager.set_rule(rule)
stats = site_sync_worker._sync_bucket(rule)
assert stats.errors == 1
class TestSiteSyncIntegration:
@patch("app.site_sync._create_sync_client")
def test_full_sync_cycle(self, mock_create_client, site_sync_worker, storage, connections, replication_manager):
storage.create_bucket("sync-bucket")
storage.put_object("sync-bucket", "local-only.txt", io.BytesIO(b"local content"))
mock_client = MagicMock()
mock_paginator = MagicMock()
mock_paginator.paginate.return_value = [
{
"Contents": [
{
"Key": "remote-only.txt",
"Size": 100,
"LastModified": datetime(2025, 1, 15, tzinfo=timezone.utc),
"ETag": '"remoteetag"',
},
]
}
]
mock_client.get_paginator.return_value = mock_paginator
mock_client.head_object.return_value = {"Metadata": {}}
def mock_download(bucket, key, path):
Path(path).write_bytes(b"remote content")
mock_client.download_file.side_effect = mock_download
mock_create_client.return_value = mock_client
rule = ReplicationRule(
bucket_name="sync-bucket",
target_connection_id="test-conn",
target_bucket="remote-bucket",
mode=REPLICATION_MODE_BIDIRECTIONAL,
enabled=True,
)
replication_manager.set_rule(rule)
stats = site_sync_worker._sync_bucket(rule)
assert stats.objects_pulled == 1
assert stats.errors == 0
objects = site_sync_worker._list_local_objects("sync-bucket")
assert "local-only.txt" in objects
assert "remote-only.txt" in objects

View File

@@ -0,0 +1,234 @@
import io
import os
from pathlib import Path
import pytest
from app.storage import ObjectStorage, StorageError
def test_multipart_upload_round_trip(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("media")
upload_id = storage.initiate_multipart_upload("media", "large.bin", metadata={"env": "test"})
first_etag = storage.upload_multipart_part("media", upload_id, 1, io.BytesIO(b"hello "))
second_etag = storage.upload_multipart_part("media", upload_id, 2, io.BytesIO(b"world"))
meta = storage.complete_multipart_upload(
"media",
upload_id,
[
{"part_number": 1, "etag": first_etag},
{"part_number": 2, "etag": second_etag},
],
)
assert meta.key == "large.bin"
assert meta.size == len(b"hello world")
assert meta.metadata == {"env": "test"}
assert (tmp_path / "media" / "large.bin").read_bytes() == b"hello world"
def test_abort_multipart_upload(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("docs")
upload_id = storage.initiate_multipart_upload("docs", "draft.txt")
storage.abort_multipart_upload("docs", upload_id)
with pytest.raises(StorageError):
storage.upload_multipart_part("docs", upload_id, 1, io.BytesIO(b"data"))
def test_bucket_versioning_toggle_and_restore(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("history")
assert storage.is_versioning_enabled("history") is False
storage.set_bucket_versioning("history", True)
assert storage.is_versioning_enabled("history") is True
storage.put_object("history", "note.txt", io.BytesIO(b"v1"))
storage.put_object("history", "note.txt", io.BytesIO(b"v2"))
versions = storage.list_object_versions("history", "note.txt")
assert versions
assert versions[0]["size"] == len(b"v1")
storage.delete_object("history", "note.txt")
versions = storage.list_object_versions("history", "note.txt")
assert len(versions) >= 2
target_version = versions[-1]["version_id"]
storage.restore_object_version("history", "note.txt", target_version)
restored = (tmp_path / "history" / "note.txt").read_bytes()
assert restored == b"v1"
def test_bucket_configuration_helpers(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("cfg")
assert storage.get_bucket_tags("cfg") == []
storage.set_bucket_tags("cfg", [{"Key": "env", "Value": "dev"}])
tags = storage.get_bucket_tags("cfg")
assert tags == [{"Key": "env", "Value": "dev"}]
storage.set_bucket_tags("cfg", None)
assert storage.get_bucket_tags("cfg") == []
assert storage.get_bucket_cors("cfg") == []
cors_rules = [{"AllowedOrigins": ["*"], "AllowedMethods": ["GET"], "AllowedHeaders": ["*"]}]
storage.set_bucket_cors("cfg", cors_rules)
assert storage.get_bucket_cors("cfg") == cors_rules
storage.set_bucket_cors("cfg", None)
assert storage.get_bucket_cors("cfg") == []
assert storage.get_bucket_encryption("cfg") == {}
encryption = {"Rules": [{"SSEAlgorithm": "AES256"}]}
storage.set_bucket_encryption("cfg", encryption)
assert storage.get_bucket_encryption("cfg") == encryption
storage.set_bucket_encryption("cfg", None)
assert storage.get_bucket_encryption("cfg") == {}
def test_delete_object_retries_when_locked(tmp_path, monkeypatch):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
storage.put_object("demo", "video.mp4", io.BytesIO(b"data"))
target_path = tmp_path / "demo" / "video.mp4"
original_unlink = Path.unlink
attempts = {"count": 0}
def flaky_unlink(self, missing_ok=False):
if self == target_path and attempts["count"] < 1:
attempts["count"] += 1
raise PermissionError("locked")
return original_unlink(self, missing_ok=missing_ok)
monkeypatch.setattr(Path, "unlink", flaky_unlink)
storage.delete_object("demo", "video.mp4")
assert attempts["count"] == 1
def test_delete_bucket_handles_metadata_residue(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
storage.put_object("demo", "file.txt", io.BytesIO(b"data"), metadata={"env": "test"})
storage.delete_object("demo", "file.txt")
meta_dir = tmp_path / ".myfsio.sys" / "buckets" / "demo" / "meta"
assert meta_dir.exists()
storage.delete_bucket("demo")
assert not (tmp_path / "demo").exists()
assert not (tmp_path / ".myfsio.sys" / "buckets" / "demo").exists()
def test_delete_bucket_requires_archives_removed(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
storage.set_bucket_versioning("demo", True)
storage.put_object("demo", "file.txt", io.BytesIO(b"data"))
storage.delete_object("demo", "file.txt")
versions_dir = tmp_path / ".myfsio.sys" / "buckets" / "demo" / "versions"
assert versions_dir.exists()
with pytest.raises(StorageError):
storage.delete_bucket("demo")
storage.purge_object("demo", "file.txt")
storage.delete_bucket("demo")
assert not (tmp_path / "demo").exists()
assert not (tmp_path / ".myfsio.sys" / "buckets" / "demo").exists()
def test_delete_bucket_handles_multipart_residue(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
upload_id = storage.initiate_multipart_upload("demo", "file.txt")
# Leave upload incomplete so the system multipart directory sticks around.
multipart_dir = tmp_path / ".myfsio.sys" / "multipart" / "demo"
assert multipart_dir.exists()
assert (multipart_dir / upload_id).exists()
with pytest.raises(StorageError):
storage.delete_bucket("demo")
storage.abort_multipart_upload("demo", upload_id)
storage.delete_bucket("demo")
assert not (tmp_path / "demo").exists()
assert not multipart_dir.exists()
def test_purge_object_raises_when_file_in_use(tmp_path, monkeypatch):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
storage.put_object("demo", "clip.mp4", io.BytesIO(b"data"))
target_path = tmp_path / "demo" / "clip.mp4"
original_unlink = Path.unlink
def always_locked(self):
if self == target_path:
raise PermissionError("still locked")
return original_unlink(self)
monkeypatch.setattr(Path, "unlink", always_locked)
with pytest.raises(StorageError) as exc:
storage.purge_object("demo", "clip.mp4")
assert "in use" in str(exc.value)
@pytest.mark.parametrize(
"object_key",
[
"../secret.txt",
"folder/../secret.txt",
"/absolute.txt",
"\\backslash.txt",
"bad\x00key",
],
)
def test_object_key_sanitization_blocks_traversal(object_key):
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key(object_key)
def test_object_key_length_limit_enforced():
key = "a" * 1025
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key(key)
@pytest.mark.parametrize(
"object_key",
[
".meta/data.bin",
".versions/foo.bin",
".multipart/upload.part",
".myfsio.sys/system.bin",
],
)
def test_object_key_blocks_reserved_paths(object_key):
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key(object_key)
def test_bucket_config_filename_allowed(tmp_path):
storage = ObjectStorage(tmp_path)
storage.create_bucket("demo")
storage.put_object("demo", ".bucket.json", io.BytesIO(b"{}"))
objects = storage.list_objects_all("demo")
assert any(meta.key == ".bucket.json" for meta in objects)
@pytest.mark.skipif(os.name != "nt", reason="Windows-specific filename rules")
def test_windows_filename_rules_enforced():
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key("CON/file.txt")
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key("folder/spaces ")
with pytest.raises(StorageError):
ObjectStorage._sanitize_object_key("C:drivepath.txt")

View File

@@ -0,0 +1,125 @@
import io
import json
import threading
from pathlib import Path
from werkzeug.serving import make_server
from app import create_app
from app.s3_client import S3ProxyClient
def _build_app(tmp_path: Path):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Bulk Tester",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
app = create_app(
{
"TESTING": True,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://127.0.0.1:0",
"SECRET_KEY": "testing",
"WTF_CSRF_ENABLED": False,
}
)
server = make_server("127.0.0.1", 0, app)
host, port = server.server_address
api_url = f"http://{host}:{port}"
app.config["API_BASE_URL"] = api_url
app.extensions["s3_proxy"] = S3ProxyClient(api_base_url=api_url)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
app._test_server = server
app._test_thread = thread
return app
def _shutdown_app(app):
if hasattr(app, "_test_server"):
app._test_server.shutdown()
app._test_thread.join(timeout=2)
def _login(client):
return client.post(
"/ui/login",
data={"access_key": "test", "secret_key": "secret"},
follow_redirects=True,
)
def test_bulk_delete_json_route(tmp_path: Path):
app = _build_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("demo")
storage.put_object("demo", "first.txt", io.BytesIO(b"first"))
storage.put_object("demo", "second.txt", io.BytesIO(b"second"))
client = app.test_client()
assert _login(client).status_code == 200
response = client.post(
"/ui/buckets/demo/objects/bulk-delete",
json={"keys": ["first.txt", "missing.txt"]},
headers={"X-Requested-With": "XMLHttpRequest"},
)
assert response.status_code == 200
payload = response.get_json()
assert payload["status"] == "ok"
assert set(payload["deleted"]) == {"first.txt", "missing.txt"}
assert payload["errors"] == []
listing = storage.list_objects_all("demo")
assert {meta.key for meta in listing} == {"second.txt"}
finally:
_shutdown_app(app)
def test_bulk_delete_validation(tmp_path: Path):
app = _build_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("demo")
storage.put_object("demo", "keep.txt", io.BytesIO(b"keep"))
client = app.test_client()
assert _login(client).status_code == 200
bad_response = client.post(
"/ui/buckets/demo/objects/bulk-delete",
json={"keys": []},
headers={"X-Requested-With": "XMLHttpRequest"},
)
assert bad_response.status_code == 400
assert bad_response.get_json()["status"] == "error"
too_many = [f"obj-{index}.txt" for index in range(501)]
limit_response = client.post(
"/ui/buckets/demo/objects/bulk-delete",
json={"keys": too_many},
headers={"X-Requested-With": "XMLHttpRequest"},
)
assert limit_response.status_code == 400
assert limit_response.get_json()["status"] == "error"
still_there = storage.list_objects_all("demo")
assert {meta.key for meta in still_there} == {"keep.txt"}
finally:
_shutdown_app(app)

View File

@@ -0,0 +1,56 @@
import json
from pathlib import Path
from app import create_app
def _build_ui_app(tmp_path: Path):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
return create_app(
{
"TESTING": True,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://example.test:9000",
"SECRET_KEY": "testing",
}
)
def test_docs_requires_login(tmp_path: Path):
app = _build_ui_app(tmp_path)
client = app.test_client()
response = client.get("/ui/docs")
assert response.status_code == 302
assert response.headers["Location"].endswith("/ui/login")
def test_docs_render_for_authenticated_user(tmp_path: Path):
app = _build_ui_app(tmp_path)
client = app.test_client()
# Prime session by signing in
login_response = client.post(
"/ui/login",
data={"access_key": "test", "secret_key": "secret"},
follow_redirects=True,
)
assert login_response.status_code == 200
response = client.get("/ui/docs")
assert response.status_code == 200
assert b"Your guide to MyFSIO" in response.data
assert b"http://example.test:9000" in response.data

View File

@@ -0,0 +1,263 @@
"""Tests for UI-based encryption configuration."""
import json
import threading
from pathlib import Path
import pytest
from werkzeug.serving import make_server
from app import create_app
from app.s3_client import S3ProxyClient
def get_csrf_token(response):
"""Extract CSRF token from response HTML."""
html = response.data.decode("utf-8")
import re
match = re.search(r'name="csrf_token"\s+value="([^"]+)"', html)
return match.group(1) if match else None
def _make_encryption_app(tmp_path: Path, *, kms_enabled: bool = True):
"""Create an app with encryption enabled."""
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
},
{
"access_key": "readonly",
"secret_key": "secret",
"display_name": "Read Only User",
"policies": [{"bucket": "*", "actions": ["list", "read"]}],
},
]
}
iam_config.write_text(json.dumps(iam_payload))
config = {
"TESTING": True,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://127.0.0.1:0",
"SECRET_KEY": "testing",
"ENCRYPTION_ENABLED": True,
"WTF_CSRF_ENABLED": False,
}
if kms_enabled:
config["KMS_ENABLED"] = True
config["KMS_KEYS_PATH"] = str(tmp_path / "kms_keys.json")
config["ENCRYPTION_MASTER_KEY_PATH"] = str(tmp_path / "master.key")
app = create_app(config)
server = make_server("127.0.0.1", 0, app)
host, port = server.server_address
api_url = f"http://{host}:{port}"
app.config["API_BASE_URL"] = api_url
app.extensions["s3_proxy"] = S3ProxyClient(api_base_url=api_url)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
app._test_server = server
app._test_thread = thread
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
return app
def _shutdown_app(app):
if hasattr(app, "_test_server"):
app._test_server.shutdown()
app._test_thread.join(timeout=2)
class TestUIBucketEncryption:
"""Test bucket encryption configuration via UI."""
def test_bucket_detail_shows_encryption_card(self, tmp_path):
"""Encryption card should be visible on bucket detail page."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.get("/ui/buckets/test-bucket?tab=properties")
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "Default Encryption" in html
assert "Encryption Algorithm" in html or "Default encryption disabled" in html
finally:
_shutdown_app(app)
def test_enable_aes256_encryption(self, tmp_path):
"""Should be able to enable AES-256 encryption."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "AES256",
},
follow_redirects=True,
)
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "AES-256" in html or "encryption enabled" in html.lower()
finally:
_shutdown_app(app)
def test_enable_kms_encryption(self, tmp_path):
"""Should be able to enable KMS encryption."""
app = _make_encryption_app(tmp_path, kms_enabled=True)
try:
with app.app_context():
kms = app.extensions.get("kms")
if kms:
key = kms.create_key("test-key")
key_id = key.key_id
else:
pytest.skip("KMS not available")
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "aws:kms",
"kms_key_id": key_id,
},
follow_redirects=True,
)
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "KMS" in html or "encryption enabled" in html.lower()
finally:
_shutdown_app(app)
def test_disable_encryption(self, tmp_path):
"""Should be able to disable encryption."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "AES256",
},
)
response = client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "disable",
},
follow_redirects=True,
)
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "disabled" in html.lower() or "Default encryption disabled" in html
finally:
_shutdown_app(app)
def test_invalid_algorithm_rejected(self, tmp_path):
"""Invalid encryption algorithm should be rejected."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "INVALID",
},
follow_redirects=True,
)
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "Invalid" in html or "danger" in html
finally:
_shutdown_app(app)
def test_encryption_persists_in_config(self, tmp_path):
"""Encryption config should persist in bucket config."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "AES256",
},
)
with app.app_context():
storage = app.extensions["object_storage"]
config = storage.get_bucket_encryption("test-bucket")
assert "Rules" in config
assert len(config["Rules"]) == 1
assert config["Rules"][0]["SSEAlgorithm"] == "AES256"
finally:
_shutdown_app(app)
class TestUIEncryptionWithoutPermission:
"""Test encryption UI when user lacks permissions."""
def test_readonly_user_cannot_change_encryption(self, tmp_path):
"""Read-only user should not be able to change encryption settings."""
app = _make_encryption_app(tmp_path)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "readonly", "secret_key": "secret"}, follow_redirects=True)
response = client.post(
"/ui/buckets/test-bucket/encryption",
data={
"action": "enable",
"algorithm": "AES256",
},
follow_redirects=True,
)
assert response.status_code == 200
html = response.data.decode("utf-8")
assert "Access denied" in html or "permission" in html.lower() or "not authorized" in html.lower()
finally:
_shutdown_app(app)

View File

@@ -0,0 +1,212 @@
"""Tests for UI pagination of bucket objects."""
import json
import threading
from io import BytesIO
from pathlib import Path
import pytest
from werkzeug.serving import make_server
from app import create_app
from app.s3_client import S3ProxyClient
def _make_app(tmp_path: Path):
"""Create an app for testing with a live API server."""
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
},
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_app(
{
"TESTING": True,
"SECRET_KEY": "testing",
"WTF_CSRF_ENABLED": False,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://127.0.0.1:0",
}
)
server = make_server("127.0.0.1", 0, flask_app)
host, port = server.server_address
api_url = f"http://{host}:{port}"
flask_app.config["API_BASE_URL"] = api_url
flask_app.extensions["s3_proxy"] = S3ProxyClient(api_base_url=api_url)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
flask_app._test_server = server
flask_app._test_thread = thread
return flask_app
def _shutdown_app(app):
if hasattr(app, "_test_server"):
app._test_server.shutdown()
app._test_thread.join(timeout=2)
class TestPaginatedObjectListing:
"""Test paginated object listing API."""
def test_objects_api_returns_paginated_results(self, tmp_path):
"""Objects API should return paginated results."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
for i in range(10):
storage.put_object("test-bucket", f"file{i:02d}.txt", BytesIO(b"content"))
with app.test_client() as client:
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
resp = client.get("/ui/buckets/test-bucket/objects?max_keys=3")
assert resp.status_code == 200
data = resp.get_json()
assert len(data["objects"]) == 3
assert data["is_truncated"] is True
assert data["next_continuation_token"] is not None
finally:
_shutdown_app(app)
def test_objects_api_pagination_continuation(self, tmp_path):
"""Objects API should support continuation tokens."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
for i in range(5):
storage.put_object("test-bucket", f"file{i:02d}.txt", BytesIO(b"content"))
with app.test_client() as client:
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
resp = client.get("/ui/buckets/test-bucket/objects?max_keys=2")
assert resp.status_code == 200
data = resp.get_json()
first_page_keys = [obj["key"] for obj in data["objects"]]
assert len(first_page_keys) == 2
assert data["is_truncated"] is True
token = data["next_continuation_token"]
resp = client.get(f"/ui/buckets/test-bucket/objects?max_keys=2&continuation_token={token}")
assert resp.status_code == 200
data = resp.get_json()
second_page_keys = [obj["key"] for obj in data["objects"]]
assert len(second_page_keys) == 2
assert set(first_page_keys).isdisjoint(set(second_page_keys))
finally:
_shutdown_app(app)
def test_objects_api_prefix_filter(self, tmp_path):
"""Objects API should support prefix filtering."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
storage.put_object("test-bucket", "logs/access.log", BytesIO(b"log"))
storage.put_object("test-bucket", "logs/error.log", BytesIO(b"log"))
storage.put_object("test-bucket", "data/file.txt", BytesIO(b"data"))
with app.test_client() as client:
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
resp = client.get("/ui/buckets/test-bucket/objects?prefix=logs/")
assert resp.status_code == 200
data = resp.get_json()
keys = [obj["key"] for obj in data["objects"]]
assert all(k.startswith("logs/") for k in keys)
assert len(keys) == 2
finally:
_shutdown_app(app)
def test_objects_api_requires_authentication(self, tmp_path):
"""Objects API should require login."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
with app.test_client() as client:
resp = client.get("/ui/buckets/test-bucket/objects")
assert resp.status_code == 302
assert "/ui/login" in resp.headers.get("Location", "")
finally:
_shutdown_app(app)
def test_objects_api_returns_object_metadata(self, tmp_path):
"""Objects API should return complete object metadata."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
storage.put_object("test-bucket", "test.txt", BytesIO(b"test content"))
with app.test_client() as client:
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
resp = client.get("/ui/buckets/test-bucket/objects")
assert resp.status_code == 200
data = resp.get_json()
assert len(data["objects"]) == 1
obj = data["objects"][0]
assert obj["key"] == "test.txt"
assert obj["size"] == 12
assert "last_modified" in obj
assert "last_modified_display" in obj
assert "etag" in obj
assert "url_templates" in data
templates = data["url_templates"]
assert "preview" in templates
assert "download" in templates
assert "delete" in templates
assert "KEY_PLACEHOLDER" in templates["preview"]
finally:
_shutdown_app(app)
def test_bucket_detail_page_loads_without_objects(self, tmp_path):
"""Bucket detail page should load even with many objects."""
app = _make_app(tmp_path)
try:
storage = app.extensions["object_storage"]
storage.create_bucket("test-bucket")
for i in range(100):
storage.put_object("test-bucket", f"file{i:03d}.txt", BytesIO(b"x"))
with app.test_client() as client:
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
resp = client.get("/ui/buckets/test-bucket")
assert resp.status_code == 200
html = resp.data.decode("utf-8")
assert "bucket-detail-main.js" in html
finally:
_shutdown_app(app)

View File

@@ -0,0 +1,158 @@
import io
import json
import threading
from pathlib import Path
import pytest
from werkzeug.serving import make_server
from app import create_app
from app.s3_client import S3ProxyClient
DENY_LIST_ALLOW_GET_POLICY = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {"AWS": ["*"]},
"Action": ["s3:GetObject"],
"Resource": ["arn:aws:s3:::testbucket/*"],
},
{
"Effect": "Deny",
"Principal": {"AWS": ["*"]},
"Action": ["s3:ListBucket"],
"Resource": ["arn:aws:s3:::testbucket"],
},
],
}
def _make_ui_app(tmp_path: Path, *, enforce_policies: bool):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
app = create_app(
{
"TESTING": True,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://127.0.0.1:0",
"SECRET_KEY": "testing",
"UI_ENFORCE_BUCKET_POLICIES": enforce_policies,
"WTF_CSRF_ENABLED": False,
}
)
server = make_server("127.0.0.1", 0, app)
host, port = server.server_address
api_url = f"http://{host}:{port}"
app.config["API_BASE_URL"] = api_url
app.extensions["s3_proxy"] = S3ProxyClient(api_base_url=api_url)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
app._test_server = server
app._test_thread = thread
storage = app.extensions["object_storage"]
storage.create_bucket("testbucket")
storage.put_object("testbucket", "vid.mp4", io.BytesIO(b"video"))
policy_store = app.extensions["bucket_policies"]
policy_store.set_policy("testbucket", DENY_LIST_ALLOW_GET_POLICY)
return app
def _shutdown_app(app):
if hasattr(app, "_test_server"):
app._test_server.shutdown()
app._test_thread.join(timeout=2)
@pytest.mark.parametrize("enforce", [True, False])
def test_ui_bucket_policy_enforcement_toggle(tmp_path: Path, enforce: bool):
app = _make_ui_app(tmp_path, enforce_policies=enforce)
try:
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.get("/ui/buckets/testbucket", follow_redirects=True)
if enforce:
assert b"Access denied by bucket policy" in response.data
else:
assert response.status_code == 200
assert b"Access denied by bucket policy" not in response.data
objects_response = client.get("/ui/buckets/testbucket/objects")
assert objects_response.status_code == 403
finally:
_shutdown_app(app)
def test_ui_bucket_policy_disabled_by_default(tmp_path: Path):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
app = create_app(
{
"TESTING": True,
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://127.0.0.1:0",
"SECRET_KEY": "testing",
"WTF_CSRF_ENABLED": False,
}
)
server = make_server("127.0.0.1", 0, app)
host, port = server.server_address
api_url = f"http://{host}:{port}"
app.config["API_BASE_URL"] = api_url
app.extensions["s3_proxy"] = S3ProxyClient(api_base_url=api_url)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
app._test_server = server
app._test_thread = thread
try:
storage = app.extensions["object_storage"]
storage.create_bucket("testbucket")
storage.put_object("testbucket", "vid.mp4", io.BytesIO(b"video"))
policy_store = app.extensions["bucket_policies"]
policy_store.set_policy("testbucket", DENY_LIST_ALLOW_GET_POLICY)
client = app.test_client()
client.post("/ui/login", data={"access_key": "test", "secret_key": "secret"}, follow_redirects=True)
response = client.get("/ui/buckets/testbucket", follow_redirects=True)
assert response.status_code == 200
assert b"Access denied by bucket policy" not in response.data
objects_response = client.get("/ui/buckets/testbucket/objects")
assert objects_response.status_code == 403
finally:
_shutdown_app(app)

View File

@@ -0,0 +1,442 @@
import io
import json
from pathlib import Path
from xml.etree.ElementTree import fromstring
import pytest
from app import create_api_app
from app.website_domains import WebsiteDomainStore
def _stream(data: bytes):
return io.BytesIO(data)
@pytest.fixture()
def website_app(tmp_path: Path):
storage_root = tmp_path / "data"
iam_config = tmp_path / "iam.json"
bucket_policies = tmp_path / "bucket_policies.json"
iam_payload = {
"users": [
{
"access_key": "test",
"secret_key": "secret",
"display_name": "Test User",
"policies": [{"bucket": "*", "actions": ["list", "read", "write", "delete", "policy", "iam:*"]}],
}
]
}
iam_config.write_text(json.dumps(iam_payload))
flask_app = create_api_app(
{
"TESTING": True,
"SECRET_KEY": "testing",
"STORAGE_ROOT": storage_root,
"IAM_CONFIG": iam_config,
"BUCKET_POLICY_PATH": bucket_policies,
"API_BASE_URL": "http://testserver",
"WEBSITE_HOSTING_ENABLED": True,
}
)
yield flask_app
@pytest.fixture()
def website_client(website_app):
return website_app.test_client()
@pytest.fixture()
def storage(website_app):
return website_app.extensions["object_storage"]
class TestWebsiteDomainStore:
def test_empty_store(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
assert store.list_all() == []
assert store.get_bucket("example.com") is None
def test_set_and_get_mapping(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
store.set_mapping("example.com", "my-site")
assert store.get_bucket("example.com") == "my-site"
def test_case_insensitive(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
store.set_mapping("Example.COM", "my-site")
assert store.get_bucket("example.com") == "my-site"
assert store.get_bucket("EXAMPLE.COM") == "my-site"
def test_list_all(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
store.set_mapping("a.com", "bucket-a")
store.set_mapping("b.com", "bucket-b")
result = store.list_all()
domains = {item["domain"] for item in result}
assert domains == {"a.com", "b.com"}
def test_delete_mapping(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
store.set_mapping("example.com", "my-site")
assert store.delete_mapping("example.com") is True
assert store.get_bucket("example.com") is None
def test_delete_nonexistent(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
assert store.delete_mapping("nope.com") is False
def test_overwrite_mapping(self, tmp_path):
store = WebsiteDomainStore(tmp_path / "domains.json")
store.set_mapping("example.com", "old-bucket")
store.set_mapping("example.com", "new-bucket")
assert store.get_bucket("example.com") == "new-bucket"
def test_persistence(self, tmp_path):
path = tmp_path / "domains.json"
store1 = WebsiteDomainStore(path)
store1.set_mapping("example.com", "my-site")
store2 = WebsiteDomainStore(path)
assert store2.get_bucket("example.com") == "my-site"
def test_corrupt_file(self, tmp_path):
path = tmp_path / "domains.json"
path.write_text("not json")
store = WebsiteDomainStore(path)
assert store.list_all() == []
def test_non_dict_file(self, tmp_path):
path = tmp_path / "domains.json"
path.write_text('["not", "a", "dict"]')
store = WebsiteDomainStore(path)
assert store.list_all() == []
class TestStorageWebsiteConfig:
def test_get_website_no_config(self, storage):
storage.create_bucket("test-bucket")
assert storage.get_bucket_website("test-bucket") is None
def test_set_and_get_website(self, storage):
storage.create_bucket("test-bucket")
config = {"index_document": "index.html", "error_document": "error.html"}
storage.set_bucket_website("test-bucket", config)
result = storage.get_bucket_website("test-bucket")
assert result["index_document"] == "index.html"
assert result["error_document"] == "error.html"
def test_delete_website_config(self, storage):
storage.create_bucket("test-bucket")
storage.set_bucket_website("test-bucket", {"index_document": "index.html"})
storage.set_bucket_website("test-bucket", None)
assert storage.get_bucket_website("test-bucket") is None
def test_nonexistent_bucket(self, storage):
with pytest.raises(Exception):
storage.get_bucket_website("no-such-bucket")
class TestS3WebsiteAPI:
def test_put_website_config(self, website_client, signer):
headers = signer("PUT", "/site-bucket")
assert website_client.put("/site-bucket", headers=headers).status_code == 200
xml_body = b"""<WebsiteConfiguration>
<IndexDocument><Suffix>index.html</Suffix></IndexDocument>
<ErrorDocument><Key>404.html</Key></ErrorDocument>
</WebsiteConfiguration>"""
headers = signer("PUT", "/site-bucket?website",
headers={"Content-Type": "application/xml"}, body=xml_body)
resp = website_client.put("/site-bucket", query_string={"website": ""},
headers=headers, data=xml_body, content_type="application/xml")
assert resp.status_code == 200
def test_get_website_config(self, website_client, signer, storage):
storage.create_bucket("site-bucket")
storage.set_bucket_website("site-bucket", {
"index_document": "index.html",
"error_document": "error.html",
})
headers = signer("GET", "/site-bucket?website")
resp = website_client.get("/site-bucket", query_string={"website": ""}, headers=headers)
assert resp.status_code == 200
root = fromstring(resp.data)
suffix = root.find(".//{http://s3.amazonaws.com/doc/2006-03-01/}Suffix")
if suffix is None:
suffix = root.find(".//Suffix")
assert suffix is not None
assert suffix.text == "index.html"
def test_get_website_config_not_set(self, website_client, signer, storage):
storage.create_bucket("no-website")
headers = signer("GET", "/no-website?website")
resp = website_client.get("/no-website", query_string={"website": ""}, headers=headers)
assert resp.status_code == 404
def test_delete_website_config(self, website_client, signer, storage):
storage.create_bucket("site-bucket")
storage.set_bucket_website("site-bucket", {"index_document": "index.html"})
headers = signer("DELETE", "/site-bucket?website")
resp = website_client.delete("/site-bucket", query_string={"website": ""}, headers=headers)
assert resp.status_code == 204
assert storage.get_bucket_website("site-bucket") is None
def test_put_website_missing_index(self, website_client, signer, storage):
storage.create_bucket("site-bucket")
xml_body = b"""<WebsiteConfiguration>
<ErrorDocument><Key>error.html</Key></ErrorDocument>
</WebsiteConfiguration>"""
headers = signer("PUT", "/site-bucket?website",
headers={"Content-Type": "application/xml"}, body=xml_body)
resp = website_client.put("/site-bucket", query_string={"website": ""},
headers=headers, data=xml_body, content_type="application/xml")
assert resp.status_code == 400
def test_put_website_slash_in_suffix(self, website_client, signer, storage):
storage.create_bucket("site-bucket")
xml_body = b"""<WebsiteConfiguration>
<IndexDocument><Suffix>path/index.html</Suffix></IndexDocument>
</WebsiteConfiguration>"""
headers = signer("PUT", "/site-bucket?website",
headers={"Content-Type": "application/xml"}, body=xml_body)
resp = website_client.put("/site-bucket", query_string={"website": ""},
headers=headers, data=xml_body, content_type="application/xml")
assert resp.status_code == 400
def test_put_website_malformed_xml(self, website_client, signer, storage):
storage.create_bucket("site-bucket")
xml_body = b"not xml at all"
headers = signer("PUT", "/site-bucket?website",
headers={"Content-Type": "application/xml"}, body=xml_body)
resp = website_client.put("/site-bucket", query_string={"website": ""},
headers=headers, data=xml_body, content_type="application/xml")
assert resp.status_code == 400
def test_website_disabled(self, client, signer):
headers = signer("PUT", "/test-bucket")
assert client.put("/test-bucket", headers=headers).status_code == 200
headers = signer("GET", "/test-bucket?website")
resp = client.get("/test-bucket", query_string={"website": ""}, headers=headers)
assert resp.status_code == 400
assert b"not enabled" in resp.data
class TestAdminWebsiteDomains:
def _admin_headers(self, signer):
return signer("GET", "/admin/website-domains")
def test_list_empty(self, website_client, signer):
headers = self._admin_headers(signer)
resp = website_client.get("/admin/website-domains", headers=headers)
assert resp.status_code == 200
assert resp.get_json() == []
def test_create_mapping(self, website_client, signer, storage):
storage.create_bucket("my-site")
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"},
body=json.dumps({"domain": "example.com", "bucket": "my-site"}).encode())
resp = website_client.post("/admin/website-domains",
headers=headers,
json={"domain": "example.com", "bucket": "my-site"})
assert resp.status_code == 201
data = resp.get_json()
assert data["domain"] == "example.com"
assert data["bucket"] == "my-site"
def test_create_duplicate(self, website_client, signer, storage):
storage.create_bucket("my-site")
body = json.dumps({"domain": "dup.com", "bucket": "my-site"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
website_client.post("/admin/website-domains", headers=headers,
json={"domain": "dup.com", "bucket": "my-site"})
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
resp = website_client.post("/admin/website-domains", headers=headers,
json={"domain": "dup.com", "bucket": "my-site"})
assert resp.status_code == 409
def test_create_missing_domain(self, website_client, signer, storage):
storage.create_bucket("my-site")
body = json.dumps({"bucket": "my-site"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
resp = website_client.post("/admin/website-domains", headers=headers,
json={"bucket": "my-site"})
assert resp.status_code == 400
def test_create_nonexistent_bucket(self, website_client, signer):
body = json.dumps({"domain": "x.com", "bucket": "no-such"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
resp = website_client.post("/admin/website-domains", headers=headers,
json={"domain": "x.com", "bucket": "no-such"})
assert resp.status_code == 404
def test_get_mapping(self, website_client, signer, storage):
storage.create_bucket("my-site")
body = json.dumps({"domain": "get.com", "bucket": "my-site"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
website_client.post("/admin/website-domains", headers=headers,
json={"domain": "get.com", "bucket": "my-site"})
headers = signer("GET", "/admin/website-domains/get.com")
resp = website_client.get("/admin/website-domains/get.com", headers=headers)
assert resp.status_code == 200
assert resp.get_json()["bucket"] == "my-site"
def test_get_nonexistent(self, website_client, signer):
headers = signer("GET", "/admin/website-domains/nope.com")
resp = website_client.get("/admin/website-domains/nope.com", headers=headers)
assert resp.status_code == 404
def test_update_mapping(self, website_client, signer, storage):
storage.create_bucket("old-bucket")
storage.create_bucket("new-bucket")
body = json.dumps({"domain": "upd.com", "bucket": "old-bucket"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
website_client.post("/admin/website-domains", headers=headers,
json={"domain": "upd.com", "bucket": "old-bucket"})
body = json.dumps({"bucket": "new-bucket"}).encode()
headers = signer("PUT", "/admin/website-domains/upd.com",
headers={"Content-Type": "application/json"}, body=body)
resp = website_client.put("/admin/website-domains/upd.com", headers=headers,
json={"bucket": "new-bucket"})
assert resp.status_code == 200
assert resp.get_json()["bucket"] == "new-bucket"
def test_delete_mapping(self, website_client, signer, storage):
storage.create_bucket("del-bucket")
body = json.dumps({"domain": "del.com", "bucket": "del-bucket"}).encode()
headers = signer("POST", "/admin/website-domains",
headers={"Content-Type": "application/json"}, body=body)
website_client.post("/admin/website-domains", headers=headers,
json={"domain": "del.com", "bucket": "del-bucket"})
headers = signer("DELETE", "/admin/website-domains/del.com")
resp = website_client.delete("/admin/website-domains/del.com", headers=headers)
assert resp.status_code == 204
def test_delete_nonexistent(self, website_client, signer):
headers = signer("DELETE", "/admin/website-domains/nope.com")
resp = website_client.delete("/admin/website-domains/nope.com", headers=headers)
assert resp.status_code == 404
def test_disabled(self, website_client, signer):
with website_client.application.test_request_context():
website_client.application.config["WEBSITE_HOSTING_ENABLED"] = False
headers = signer("GET", "/admin/website-domains")
resp = website_client.get("/admin/website-domains", headers=headers)
assert resp.status_code == 400
website_client.application.config["WEBSITE_HOSTING_ENABLED"] = True
class TestWebsiteServing:
def _setup_website(self, storage, website_app):
storage.create_bucket("my-site")
storage.put_object("my-site", "index.html", _stream(b"<h1>Home</h1>"))
storage.put_object("my-site", "about.html", _stream(b"<h1>About</h1>"))
storage.put_object("my-site", "assets/style.css", _stream(b"body { color: red; }"))
storage.put_object("my-site", "sub/index.html", _stream(b"<h1>Sub</h1>"))
storage.put_object("my-site", "404.html", _stream(b"<h1>Not Found</h1>"))
storage.set_bucket_website("my-site", {
"index_document": "index.html",
"error_document": "404.html",
})
store = website_app.extensions["website_domains"]
store.set_mapping("mysite.example.com", "my-site")
def test_serve_index(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert b"<h1>Home</h1>" in resp.data
assert "text/html" in resp.content_type
def test_serve_specific_file(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/about.html", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert b"<h1>About</h1>" in resp.data
def test_serve_css(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/assets/style.css", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert b"body { color: red; }" in resp.data
assert "text/css" in resp.content_type
def test_serve_subdirectory_index(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/sub/", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert b"<h1>Sub</h1>" in resp.data
def test_serve_subdirectory_no_trailing_slash(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/sub", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert b"<h1>Sub</h1>" in resp.data
def test_serve_error_document(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/nonexistent.html", headers={"Host": "mysite.example.com"})
assert resp.status_code == 404
assert b"<h1>Not Found</h1>" in resp.data
def test_unmapped_host_passes_through(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/", headers={"Host": "unknown.example.com"})
assert resp.status_code != 200 or b"<h1>Home</h1>" not in resp.data
def test_head_request(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.head("/index.html", headers={"Host": "mysite.example.com"})
assert resp.status_code == 200
assert "Content-Length" in resp.headers
assert resp.data == b""
def test_post_not_intercepted(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.post("/index.html", headers={"Host": "mysite.example.com"})
assert resp.status_code != 200 or b"<h1>Home</h1>" not in resp.data
def test_bucket_deleted(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
for obj in storage.list_objects_all("my-site"):
storage.delete_object("my-site", obj.key)
storage.delete_bucket("my-site")
resp = website_client.get("/", headers={"Host": "mysite.example.com"})
assert resp.status_code == 404
def test_no_website_config(self, website_client, storage, website_app):
storage.create_bucket("bare-bucket")
store = website_app.extensions["website_domains"]
store.set_mapping("bare.example.com", "bare-bucket")
resp = website_client.get("/", headers={"Host": "bare.example.com"})
assert resp.status_code == 404
def test_host_with_port(self, website_client, storage, website_app):
self._setup_website(storage, website_app)
resp = website_client.get("/", headers={"Host": "mysite.example.com:5000"})
assert resp.status_code == 200
assert b"<h1>Home</h1>" in resp.data
def test_no_error_document(self, website_client, storage, website_app):
storage.create_bucket("no-err")
storage.put_object("no-err", "index.html", _stream(b"<h1>Home</h1>"))
storage.set_bucket_website("no-err", {"index_document": "index.html"})
store = website_app.extensions["website_domains"]
store.set_mapping("noerr.example.com", "no-err")
resp = website_client.get("/missing.html", headers={"Host": "noerr.example.com"})
assert resp.status_code == 404
assert b"Not Found" in resp.data