2025 lines
76 KiB
Python
2025 lines
76 KiB
Python
"""Flask blueprint exposing a subset of the S3 REST API."""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import hmac
|
|
import mimetypes
|
|
import re
|
|
import uuid
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Dict
|
|
from urllib.parse import quote, urlencode, urlparse
|
|
from xml.etree.ElementTree import Element, SubElement, tostring, fromstring, ParseError
|
|
|
|
from flask import Blueprint, Response, current_app, jsonify, request, g
|
|
from werkzeug.http import http_date
|
|
|
|
from .bucket_policies import BucketPolicyStore
|
|
from .extensions import limiter
|
|
from .iam import IamError, Principal
|
|
from .replication import ReplicationManager
|
|
from .storage import ObjectStorage, StorageError
|
|
|
|
s3_api_bp = Blueprint("s3_api", __name__)
|
|
|
|
|
|
# ---------------------- helpers ----------------------
|
|
def _storage() -> ObjectStorage:
|
|
return current_app.extensions["object_storage"]
|
|
|
|
|
|
def _iam():
|
|
return current_app.extensions["iam"]
|
|
|
|
|
|
def _replication_manager() -> ReplicationManager:
|
|
return current_app.extensions["replication"]
|
|
|
|
|
|
def _bucket_policies() -> BucketPolicyStore:
|
|
store: BucketPolicyStore = current_app.extensions["bucket_policies"]
|
|
store.maybe_reload()
|
|
return store
|
|
|
|
|
|
def _xml_response(element: Element, status: int = 200) -> Response:
|
|
xml_bytes = tostring(element, encoding="utf-8")
|
|
return Response(xml_bytes, status=status, mimetype="application/xml")
|
|
|
|
|
|
def _error_response(code: str, message: str, status: int) -> Response:
|
|
error = Element("Error")
|
|
SubElement(error, "Code").text = code
|
|
SubElement(error, "Message").text = message
|
|
SubElement(error, "Resource").text = request.path
|
|
SubElement(error, "RequestId").text = uuid.uuid4().hex
|
|
return _xml_response(error, status)
|
|
|
|
|
|
def _sign(key: bytes, msg: str) -> bytes:
|
|
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
|
|
|
|
|
|
def _get_signature_key(key: str, date_stamp: str, region_name: str, service_name: str) -> bytes:
|
|
k_date = _sign(("AWS4" + key).encode("utf-8"), date_stamp)
|
|
k_region = _sign(k_date, region_name)
|
|
k_service = _sign(k_region, service_name)
|
|
k_signing = _sign(k_service, "aws4_request")
|
|
return k_signing
|
|
|
|
|
|
def _verify_sigv4_header(req: Any, auth_header: str) -> Principal | None:
|
|
# Parse Authorization header
|
|
# AWS4-HMAC-SHA256 Credential=AKIA.../20230101/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-date, Signature=...
|
|
match = re.match(
|
|
r"AWS4-HMAC-SHA256 Credential=([^/]+)/([^/]+)/([^/]+)/([^/]+)/aws4_request, SignedHeaders=([^,]+), Signature=(.+)",
|
|
auth_header,
|
|
)
|
|
if not match:
|
|
return None
|
|
|
|
access_key, date_stamp, region, service, signed_headers_str, signature = match.groups()
|
|
|
|
# Get secret key
|
|
secret_key = _iam().get_secret_key(access_key)
|
|
if not secret_key:
|
|
raise IamError("Invalid access key")
|
|
|
|
# Canonical Request
|
|
method = req.method
|
|
canonical_uri = quote(req.path, safe="/-_.~")
|
|
|
|
# Canonical Query String
|
|
query_args = []
|
|
for key, value in req.args.items(multi=True):
|
|
query_args.append((key, value))
|
|
query_args.sort(key=lambda x: (x[0], x[1]))
|
|
|
|
canonical_query_parts = []
|
|
for k, v in query_args:
|
|
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
|
canonical_query_string = "&".join(canonical_query_parts)
|
|
|
|
# Canonical Headers
|
|
signed_headers_list = signed_headers_str.split(";")
|
|
canonical_headers_parts = []
|
|
for header in signed_headers_list:
|
|
header_val = req.headers.get(header)
|
|
if header_val is None:
|
|
header_val = ""
|
|
|
|
header_val = " ".join(header_val.split())
|
|
canonical_headers_parts.append(f"{header.lower()}:{header_val}\n")
|
|
canonical_headers = "".join(canonical_headers_parts)
|
|
|
|
# Payload Hash
|
|
payload_hash = req.headers.get("X-Amz-Content-Sha256")
|
|
if not payload_hash:
|
|
payload_hash = hashlib.sha256(req.get_data()).hexdigest()
|
|
|
|
canonical_request = f"{method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers_str}\n{payload_hash}"
|
|
|
|
# String to Sign
|
|
amz_date = req.headers.get("X-Amz-Date")
|
|
if not amz_date:
|
|
amz_date = req.headers.get("Date")
|
|
|
|
if not amz_date:
|
|
raise IamError("Missing Date header")
|
|
|
|
try:
|
|
request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
|
|
except ValueError:
|
|
raise IamError("Invalid X-Amz-Date format")
|
|
|
|
now = datetime.now(timezone.utc)
|
|
time_diff = abs((now - request_time).total_seconds())
|
|
if time_diff > 900: # 15 minutes
|
|
raise IamError("Request timestamp too old or too far in the future")
|
|
|
|
required_headers = {'host', 'x-amz-date'}
|
|
signed_headers_set = set(signed_headers_str.split(';'))
|
|
if not required_headers.issubset(signed_headers_set):
|
|
# Some clients might sign 'date' instead of 'x-amz-date'
|
|
if 'date' in signed_headers_set:
|
|
required_headers.remove('x-amz-date')
|
|
required_headers.add('date')
|
|
|
|
if not required_headers.issubset(signed_headers_set):
|
|
raise IamError("Required headers not signed")
|
|
|
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
string_to_sign = f"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}"
|
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
|
|
|
if not hmac.compare_digest(calculated_signature, signature):
|
|
raise IamError("SignatureDoesNotMatch")
|
|
|
|
return _iam().get_principal(access_key)
|
|
|
|
|
|
def _verify_sigv4_query(req: Any) -> Principal | None:
|
|
credential = req.args.get("X-Amz-Credential")
|
|
signed_headers_str = req.args.get("X-Amz-SignedHeaders")
|
|
signature = req.args.get("X-Amz-Signature")
|
|
amz_date = req.args.get("X-Amz-Date")
|
|
expires = req.args.get("X-Amz-Expires")
|
|
|
|
if not (credential and signed_headers_str and signature and amz_date and expires):
|
|
return None
|
|
|
|
try:
|
|
access_key, date_stamp, region, service, _ = credential.split("/")
|
|
except ValueError:
|
|
raise IamError("Invalid Credential format")
|
|
|
|
try:
|
|
req_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
|
|
except ValueError:
|
|
raise IamError("Invalid Date format")
|
|
|
|
now = datetime.now(timezone.utc)
|
|
if now > req_time + timedelta(seconds=int(expires)):
|
|
raise IamError("Request expired")
|
|
|
|
secret_key = _iam().get_secret_key(access_key)
|
|
if not secret_key:
|
|
raise IamError("Invalid access key")
|
|
|
|
# Canonical Request
|
|
method = req.method
|
|
canonical_uri = quote(req.path, safe="/-_.~")
|
|
|
|
# Canonical Query String
|
|
query_args = []
|
|
for key, value in req.args.items(multi=True):
|
|
if key != "X-Amz-Signature":
|
|
query_args.append((key, value))
|
|
query_args.sort(key=lambda x: (x[0], x[1]))
|
|
|
|
canonical_query_parts = []
|
|
for k, v in query_args:
|
|
canonical_query_parts.append(f"{quote(k, safe='-_.~')}={quote(v, safe='-_.~')}")
|
|
canonical_query_string = "&".join(canonical_query_parts)
|
|
|
|
# Canonical Headers
|
|
signed_headers_list = signed_headers_str.split(";")
|
|
canonical_headers_parts = []
|
|
for header in signed_headers_list:
|
|
val = req.headers.get(header, "").strip()
|
|
val = " ".join(val.split())
|
|
canonical_headers_parts.append(f"{header}:{val}\n")
|
|
canonical_headers = "".join(canonical_headers_parts)
|
|
|
|
# Payload Hash
|
|
payload_hash = "UNSIGNED-PAYLOAD"
|
|
|
|
canonical_request = "\n".join([
|
|
method,
|
|
canonical_uri,
|
|
canonical_query_string,
|
|
canonical_headers,
|
|
signed_headers_str,
|
|
payload_hash
|
|
])
|
|
|
|
# String to Sign
|
|
algorithm = "AWS4-HMAC-SHA256"
|
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
hashed_request = hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
|
|
string_to_sign = "\n".join([
|
|
algorithm,
|
|
amz_date,
|
|
credential_scope,
|
|
hashed_request
|
|
])
|
|
|
|
# Signature
|
|
signing_key = _get_signature_key(secret_key, date_stamp, region, service)
|
|
calculated_signature = hmac.new(signing_key, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
|
|
|
|
if not hmac.compare_digest(calculated_signature, signature):
|
|
raise IamError("SignatureDoesNotMatch")
|
|
|
|
return _iam().get_principal(access_key)
|
|
|
|
|
|
def _verify_sigv4(req: Any) -> Principal | None:
|
|
auth_header = req.headers.get("Authorization")
|
|
if auth_header and auth_header.startswith("AWS4-HMAC-SHA256"):
|
|
return _verify_sigv4_header(req, auth_header)
|
|
|
|
if req.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256":
|
|
return _verify_sigv4_query(req)
|
|
|
|
return None
|
|
|
|
|
|
def _require_principal():
|
|
if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \
|
|
(request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
|
|
try:
|
|
principal = _verify_sigv4(request)
|
|
if principal:
|
|
return principal, None
|
|
except IamError as exc:
|
|
return None, _error_response("AccessDenied", str(exc), 403)
|
|
except (ValueError, TypeError):
|
|
return None, _error_response("AccessDenied", "Signature verification failed", 403)
|
|
|
|
access_key = request.headers.get("X-Access-Key")
|
|
secret_key = request.headers.get("X-Secret-Key")
|
|
if not access_key or not secret_key:
|
|
return None, _error_response("AccessDenied", "Missing credentials", 403)
|
|
try:
|
|
principal = _iam().authenticate(access_key, secret_key)
|
|
return principal, None
|
|
except IamError as exc:
|
|
return None, _error_response("AccessDenied", str(exc), 403)
|
|
|
|
|
|
def _authorize_action(principal: Principal | None, bucket_name: str | None, action: str, *, object_key: str | None = None) -> None:
|
|
iam_allowed = False
|
|
iam_error: IamError | None = None
|
|
if principal is not None:
|
|
try:
|
|
_iam().authorize(principal, bucket_name, action)
|
|
iam_allowed = True
|
|
except IamError as exc:
|
|
iam_error = exc
|
|
else:
|
|
iam_error = IamError("Missing credentials")
|
|
|
|
policy_decision = None
|
|
access_key = principal.access_key if principal else None
|
|
if bucket_name:
|
|
policy_decision = _bucket_policies().evaluate(access_key, bucket_name, object_key, action)
|
|
if policy_decision == "deny":
|
|
raise IamError("Access denied by bucket policy")
|
|
|
|
if iam_allowed:
|
|
return
|
|
if policy_decision == "allow":
|
|
return
|
|
raise iam_error or IamError("Access denied")
|
|
|
|
|
|
def _enforce_bucket_policy(principal: Principal | None, bucket_name: str | None, object_key: str | None, action: str) -> None:
|
|
if not bucket_name:
|
|
return
|
|
decision = _bucket_policies().evaluate(
|
|
principal.access_key if principal else None,
|
|
bucket_name,
|
|
object_key,
|
|
action,
|
|
)
|
|
if decision == "deny":
|
|
raise IamError("Access denied by bucket policy")
|
|
|
|
|
|
def _object_principal(action: str, bucket_name: str, object_key: str):
|
|
principal, error = _require_principal()
|
|
try:
|
|
_authorize_action(principal, bucket_name, action, object_key=object_key)
|
|
return principal, None
|
|
except IamError as exc:
|
|
if not error:
|
|
return None, _error_response("AccessDenied", str(exc), 403)
|
|
if not _has_presign_params():
|
|
return None, error
|
|
try:
|
|
principal = _validate_presigned_request(action, bucket_name, object_key)
|
|
_enforce_bucket_policy(principal, bucket_name, object_key, action)
|
|
return principal, None
|
|
except IamError as exc:
|
|
return None, _error_response("AccessDenied", str(exc), 403)
|
|
|
|
|
|
def _has_presign_params() -> bool:
|
|
return bool(request.args.get("X-Amz-Algorithm"))
|
|
|
|
|
|
def _validate_presigned_request(action: str, bucket_name: str, object_key: str) -> Principal:
|
|
algorithm = request.args.get("X-Amz-Algorithm")
|
|
credential = request.args.get("X-Amz-Credential")
|
|
amz_date = request.args.get("X-Amz-Date")
|
|
signed_headers = request.args.get("X-Amz-SignedHeaders")
|
|
expires = request.args.get("X-Amz-Expires")
|
|
signature = request.args.get("X-Amz-Signature")
|
|
if not all([algorithm, credential, amz_date, signed_headers, expires, signature]):
|
|
raise IamError("Malformed presigned URL")
|
|
if algorithm != "AWS4-HMAC-SHA256":
|
|
raise IamError("Unsupported signing algorithm")
|
|
|
|
parts = credential.split("/")
|
|
if len(parts) != 5:
|
|
raise IamError("Invalid credential scope")
|
|
access_key, date_stamp, region, service, terminal = parts
|
|
if terminal != "aws4_request":
|
|
raise IamError("Invalid credential scope")
|
|
config_region = current_app.config["AWS_REGION"]
|
|
config_service = current_app.config["AWS_SERVICE"]
|
|
if region != config_region or service != config_service:
|
|
raise IamError("Credential scope mismatch")
|
|
|
|
try:
|
|
expiry = int(expires)
|
|
except ValueError as exc:
|
|
raise IamError("Invalid expiration") from exc
|
|
if expiry < 1 or expiry > 7 * 24 * 3600:
|
|
raise IamError("Expiration must be between 1 second and 7 days")
|
|
|
|
try:
|
|
request_time = datetime.strptime(amz_date, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc)
|
|
except ValueError as exc:
|
|
raise IamError("Invalid X-Amz-Date") from exc
|
|
if datetime.now(timezone.utc) > request_time + timedelta(seconds=expiry):
|
|
raise IamError("Presigned URL expired")
|
|
|
|
signed_headers_list = [header.strip().lower() for header in signed_headers.split(";") if header]
|
|
signed_headers_list.sort()
|
|
canonical_headers = _canonical_headers_from_request(signed_headers_list)
|
|
canonical_query = _canonical_query_from_request()
|
|
payload_hash = request.args.get("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD")
|
|
canonical_request = "\n".join(
|
|
[
|
|
request.method,
|
|
_canonical_uri(bucket_name, object_key),
|
|
canonical_query,
|
|
canonical_headers,
|
|
";".join(signed_headers_list),
|
|
payload_hash,
|
|
]
|
|
)
|
|
hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest()
|
|
scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
string_to_sign = "\n".join([
|
|
"AWS4-HMAC-SHA256",
|
|
amz_date,
|
|
scope,
|
|
hashed_request,
|
|
])
|
|
secret = _iam().secret_for_key(access_key)
|
|
signing_key = _derive_signing_key(secret, date_stamp, region, service)
|
|
expected = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest()
|
|
if not hmac.compare_digest(expected, signature):
|
|
raise IamError("Signature mismatch")
|
|
return _iam().principal_for_key(access_key)
|
|
|
|
|
|
def _canonical_query_from_request() -> str:
|
|
parts = []
|
|
for key in sorted(request.args.keys()):
|
|
if key == "X-Amz-Signature":
|
|
continue
|
|
values = request.args.getlist(key)
|
|
encoded_key = quote(str(key), safe="-_.~")
|
|
for value in sorted(values):
|
|
encoded_value = quote(str(value), safe="-_.~")
|
|
parts.append(f"{encoded_key}={encoded_value}")
|
|
return "&".join(parts)
|
|
|
|
|
|
def _canonical_headers_from_request(headers: list[str]) -> str:
|
|
lines = []
|
|
for header in headers:
|
|
if header == "host":
|
|
api_base = current_app.config.get("API_BASE_URL")
|
|
if api_base:
|
|
value = urlparse(api_base).netloc
|
|
else:
|
|
value = request.host
|
|
else:
|
|
value = request.headers.get(header, "")
|
|
canonical_value = " ".join(value.strip().split()) if value else ""
|
|
lines.append(f"{header}:{canonical_value}")
|
|
return "\n".join(lines) + "\n"
|
|
|
|
|
|
def _canonical_uri(bucket_name: str, object_key: str | None) -> str:
|
|
segments = [bucket_name]
|
|
if object_key:
|
|
segments.extend(object_key.split("/"))
|
|
encoded = [quote(segment, safe="-_.~") for segment in segments]
|
|
return "/" + "/".join(encoded)
|
|
|
|
|
|
def _extract_request_metadata() -> Dict[str, str]:
|
|
metadata: Dict[str, str] = {}
|
|
for header, value in request.headers.items():
|
|
if header.lower().startswith("x-amz-meta-"):
|
|
key = header[11:]
|
|
if key:
|
|
metadata[key] = value
|
|
return metadata
|
|
|
|
|
|
def _derive_signing_key(secret: str, date_stamp: str, region: str, service: str) -> bytes:
|
|
def _sign(key: bytes, msg: str) -> bytes:
|
|
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
|
|
|
|
k_date = _sign(("AWS4" + secret).encode("utf-8"), date_stamp)
|
|
k_region = _sign(k_date, region)
|
|
k_service = _sign(k_region, service)
|
|
return _sign(k_service, "aws4_request")
|
|
|
|
|
|
def _generate_presigned_url(
|
|
*,
|
|
principal: Principal,
|
|
secret_key: str,
|
|
method: str,
|
|
bucket_name: str,
|
|
object_key: str,
|
|
expires_in: int,
|
|
) -> str:
|
|
region = current_app.config["AWS_REGION"]
|
|
service = current_app.config["AWS_SERVICE"]
|
|
algorithm = "AWS4-HMAC-SHA256"
|
|
now = datetime.now(timezone.utc)
|
|
amz_date = now.strftime("%Y%m%dT%H%M%SZ")
|
|
date_stamp = now.strftime("%Y%m%d")
|
|
credential_scope = f"{date_stamp}/{region}/{service}/aws4_request"
|
|
credential = f"{principal.access_key}/{credential_scope}"
|
|
|
|
query_params = {
|
|
"X-Amz-Algorithm": algorithm,
|
|
"X-Amz-Credential": credential,
|
|
"X-Amz-Date": amz_date,
|
|
"X-Amz-Expires": str(expires_in),
|
|
"X-Amz-SignedHeaders": "host",
|
|
"X-Amz-Content-Sha256": "UNSIGNED-PAYLOAD",
|
|
}
|
|
canonical_query = _encode_query_params(query_params)
|
|
|
|
# Determine host and scheme from config or request
|
|
api_base = current_app.config.get("API_BASE_URL")
|
|
if api_base:
|
|
parsed = urlparse(api_base)
|
|
host = parsed.netloc
|
|
scheme = parsed.scheme
|
|
else:
|
|
host = request.headers.get("X-Forwarded-Host", request.host)
|
|
scheme = request.headers.get("X-Forwarded-Proto", request.scheme or "http")
|
|
|
|
canonical_headers = f"host:{host}\n"
|
|
canonical_request = "\n".join(
|
|
[
|
|
method,
|
|
_canonical_uri(bucket_name, object_key),
|
|
canonical_query,
|
|
canonical_headers,
|
|
"host",
|
|
"UNSIGNED-PAYLOAD",
|
|
]
|
|
)
|
|
hashed_request = hashlib.sha256(canonical_request.encode()).hexdigest()
|
|
string_to_sign = "\n".join(
|
|
[
|
|
algorithm,
|
|
amz_date,
|
|
credential_scope,
|
|
hashed_request,
|
|
]
|
|
)
|
|
signing_key = _derive_signing_key(secret_key, date_stamp, region, service)
|
|
signature = hmac.new(signing_key, string_to_sign.encode(), hashlib.sha256).hexdigest()
|
|
query_with_sig = canonical_query + f"&X-Amz-Signature={signature}"
|
|
return f"{scheme}://{host}{_canonical_uri(bucket_name, object_key)}?{query_with_sig}"
|
|
|
|
|
|
def _encode_query_params(params: dict[str, str]) -> str:
|
|
parts = []
|
|
for key in sorted(params.keys()):
|
|
value = params[key]
|
|
encoded_key = quote(str(key), safe="-_.~")
|
|
encoded_value = quote(str(value), safe="-_.~")
|
|
parts.append(f"{encoded_key}={encoded_value}")
|
|
return "&".join(parts)
|
|
|
|
|
|
def _strip_ns(tag: str | None) -> str:
|
|
if not tag:
|
|
return ""
|
|
return tag.split("}")[-1]
|
|
|
|
|
|
def _parse_tagging_document(payload: bytes) -> list[dict[str, str]]:
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError as exc:
|
|
raise ValueError("Malformed XML") from exc
|
|
if _strip_ns(root.tag) != "Tagging":
|
|
raise ValueError("Root element must be Tagging")
|
|
tagset = root.find(".//{*}TagSet")
|
|
if tagset is None:
|
|
tagset = root.find("TagSet")
|
|
if tagset is None:
|
|
return []
|
|
tags: list[dict[str, str]] = []
|
|
for tag_el in list(tagset):
|
|
if _strip_ns(tag_el.tag) != "Tag":
|
|
continue
|
|
key_el = tag_el.find("{*}Key")
|
|
if key_el is None:
|
|
key_el = tag_el.find("Key")
|
|
value_el = tag_el.find("{*}Value")
|
|
if value_el is None:
|
|
value_el = tag_el.find("Value")
|
|
key = (key_el.text or "").strip() if key_el is not None else ""
|
|
if not key:
|
|
continue
|
|
value = value_el.text if value_el is not None else ""
|
|
tags.append({"Key": key, "Value": value or ""})
|
|
return tags
|
|
|
|
|
|
def _render_tagging_document(tags: list[dict[str, str]]) -> Element:
|
|
root = Element("Tagging")
|
|
tagset_el = SubElement(root, "TagSet")
|
|
for tag in tags:
|
|
tag_el = SubElement(tagset_el, "Tag")
|
|
SubElement(tag_el, "Key").text = tag.get("Key", "")
|
|
SubElement(tag_el, "Value").text = tag.get("Value", "")
|
|
return root
|
|
|
|
DANGEROUS_CONTENT_TYPES = frozenset([
|
|
"text/html",
|
|
"application/xhtml+xml",
|
|
"application/javascript",
|
|
"text/javascript",
|
|
"application/x-javascript",
|
|
"text/ecmascript",
|
|
"application/ecmascript",
|
|
"image/svg+xml",
|
|
])
|
|
|
|
SAFE_EXTENSION_MAP = {
|
|
".txt": ["text/plain"],
|
|
".json": ["application/json"],
|
|
".xml": ["application/xml", "text/xml"],
|
|
".csv": ["text/csv"],
|
|
".pdf": ["application/pdf"],
|
|
".png": ["image/png"],
|
|
".jpg": ["image/jpeg"],
|
|
".jpeg": ["image/jpeg"],
|
|
".gif": ["image/gif"],
|
|
".webp": ["image/webp"],
|
|
".mp4": ["video/mp4"],
|
|
".mp3": ["audio/mpeg"],
|
|
".zip": ["application/zip"],
|
|
".gz": ["application/gzip"],
|
|
".tar": ["application/x-tar"],
|
|
}
|
|
|
|
|
|
def _validate_content_type(object_key: str, content_type: str | None) -> str | None:
|
|
"""Validate Content-Type header for security.
|
|
|
|
Returns an error message if validation fails, None otherwise.
|
|
|
|
Rules:
|
|
1. Block dangerous MIME types that can execute scripts (unless explicitly allowed)
|
|
2. Warn if Content-Type doesn't match file extension (but don't block)
|
|
"""
|
|
if not content_type:
|
|
return None
|
|
|
|
base_type = content_type.split(";")[0].strip().lower()
|
|
|
|
if base_type in DANGEROUS_CONTENT_TYPES:
|
|
ext = "." + object_key.rsplit(".", 1)[-1].lower() if "." in object_key else ""
|
|
|
|
allowed_dangerous = {
|
|
".svg": "image/svg+xml",
|
|
".html": "text/html",
|
|
".htm": "text/html",
|
|
".xhtml": "application/xhtml+xml",
|
|
".js": "application/javascript",
|
|
".mjs": "application/javascript",
|
|
}
|
|
|
|
if ext in allowed_dangerous and base_type == allowed_dangerous[ext]:
|
|
return None
|
|
|
|
return (
|
|
f"Content-Type '{content_type}' is potentially dangerous and not allowed "
|
|
f"for object key '{object_key}'. Use a safe Content-Type or rename the file "
|
|
f"with an appropriate extension."
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
def _parse_cors_document(payload: bytes) -> list[dict[str, Any]]:
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError as exc:
|
|
raise ValueError("Malformed XML") from exc
|
|
if _strip_ns(root.tag) != "CORSConfiguration":
|
|
raise ValueError("Root element must be CORSConfiguration")
|
|
rules: list[dict[str, Any]] = []
|
|
for rule_el in list(root):
|
|
if _strip_ns(rule_el.tag) != "CORSRule":
|
|
continue
|
|
rule: dict[str, Any] = {
|
|
"AllowedOrigins": [],
|
|
"AllowedMethods": [],
|
|
"AllowedHeaders": [],
|
|
"ExposeHeaders": [],
|
|
}
|
|
for child in list(rule_el):
|
|
name = _strip_ns(child.tag)
|
|
if name == "AllowedOrigin":
|
|
rule["AllowedOrigins"].append((child.text or ""))
|
|
elif name == "AllowedMethod":
|
|
rule["AllowedMethods"].append((child.text or ""))
|
|
elif name == "AllowedHeader":
|
|
rule["AllowedHeaders"].append((child.text or ""))
|
|
elif name == "ExposeHeader":
|
|
rule["ExposeHeaders"].append((child.text or ""))
|
|
elif name == "MaxAgeSeconds":
|
|
try:
|
|
rule["MaxAgeSeconds"] = int(child.text or 0)
|
|
except ValueError:
|
|
raise ValueError("MaxAgeSeconds must be an integer") from None
|
|
rules.append(rule)
|
|
return rules
|
|
|
|
|
|
def _render_cors_document(rules: list[dict[str, Any]]) -> Element:
|
|
root = Element("CORSConfiguration")
|
|
for rule in rules:
|
|
rule_el = SubElement(root, "CORSRule")
|
|
for origin in rule.get("AllowedOrigins", []):
|
|
SubElement(rule_el, "AllowedOrigin").text = origin
|
|
for method in rule.get("AllowedMethods", []):
|
|
SubElement(rule_el, "AllowedMethod").text = method
|
|
for header in rule.get("AllowedHeaders", []):
|
|
SubElement(rule_el, "AllowedHeader").text = header
|
|
for header in rule.get("ExposeHeaders", []):
|
|
SubElement(rule_el, "ExposeHeader").text = header
|
|
if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None:
|
|
SubElement(rule_el, "MaxAgeSeconds").text = str(rule["MaxAgeSeconds"])
|
|
return root
|
|
|
|
|
|
def _parse_encryption_document(payload: bytes) -> dict[str, Any]:
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError as exc:
|
|
raise ValueError("Malformed XML") from exc
|
|
if _strip_ns(root.tag) != "ServerSideEncryptionConfiguration":
|
|
raise ValueError("Root element must be ServerSideEncryptionConfiguration")
|
|
rules: list[dict[str, Any]] = []
|
|
for rule_el in list(root):
|
|
if _strip_ns(rule_el.tag) != "Rule":
|
|
continue
|
|
default_el = None
|
|
bucket_key_el = None
|
|
for child in list(rule_el):
|
|
name = _strip_ns(child.tag)
|
|
if name == "ApplyServerSideEncryptionByDefault":
|
|
default_el = child
|
|
elif name == "BucketKeyEnabled":
|
|
bucket_key_el = child
|
|
if default_el is None:
|
|
continue
|
|
algo_el = default_el.find("{*}SSEAlgorithm")
|
|
if algo_el is None:
|
|
algo_el = default_el.find("SSEAlgorithm")
|
|
if algo_el is None or not (algo_el.text or "").strip():
|
|
raise ValueError("SSEAlgorithm is required")
|
|
rule: dict[str, Any] = {"SSEAlgorithm": algo_el.text.strip()}
|
|
kms_el = default_el.find("{*}KMSMasterKeyID")
|
|
if kms_el is None:
|
|
kms_el = default_el.find("KMSMasterKeyID")
|
|
if kms_el is not None and kms_el.text:
|
|
rule["KMSMasterKeyID"] = kms_el.text.strip()
|
|
if bucket_key_el is not None and bucket_key_el.text:
|
|
rule["BucketKeyEnabled"] = bucket_key_el.text.strip().lower() in {"true", "1"}
|
|
rules.append(rule)
|
|
if not rules:
|
|
raise ValueError("At least one Rule is required")
|
|
return {"Rules": rules}
|
|
|
|
|
|
def _render_encryption_document(config: dict[str, Any]) -> Element:
|
|
root = Element("ServerSideEncryptionConfiguration")
|
|
for rule in config.get("Rules", []):
|
|
rule_el = SubElement(root, "Rule")
|
|
default_el = SubElement(rule_el, "ApplyServerSideEncryptionByDefault")
|
|
SubElement(default_el, "SSEAlgorithm").text = rule.get("SSEAlgorithm", "")
|
|
if rule.get("KMSMasterKeyID"):
|
|
SubElement(default_el, "KMSMasterKeyID").text = rule["KMSMasterKeyID"]
|
|
if "BucketKeyEnabled" in rule:
|
|
SubElement(rule_el, "BucketKeyEnabled").text = "true" if rule["BucketKeyEnabled"] else "false"
|
|
return root
|
|
|
|
|
|
def _stream_file(path, chunk_size: int = 64 * 1024):
|
|
with path.open("rb") as handle:
|
|
while True:
|
|
chunk = handle.read(chunk_size)
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
|
|
|
|
def _method_not_allowed(allowed: list[str]) -> Response:
|
|
response = _error_response(
|
|
"MethodNotAllowed",
|
|
"The specified method is not allowed for this resource",
|
|
405,
|
|
)
|
|
response.headers["Allow"] = ", ".join(sorted({method.upper() for method in allowed}))
|
|
return response
|
|
|
|
|
|
def _apply_object_headers(
|
|
response: Response,
|
|
*,
|
|
file_stat,
|
|
metadata: Dict[str, str] | None,
|
|
etag: str,
|
|
) -> None:
|
|
response.headers["Content-Length"] = str(file_stat.st_size)
|
|
response.headers["Last-Modified"] = http_date(file_stat.st_mtime)
|
|
response.headers["ETag"] = f'"{etag}"'
|
|
response.headers["Accept-Ranges"] = "bytes"
|
|
for key, value in (metadata or {}).items():
|
|
response.headers[f"X-Amz-Meta-{key}"] = value
|
|
|
|
|
|
def _maybe_handle_bucket_subresource(bucket_name: str) -> Response | None:
|
|
handlers = {
|
|
"versioning": _bucket_versioning_handler,
|
|
"tagging": _bucket_tagging_handler,
|
|
"cors": _bucket_cors_handler,
|
|
"encryption": _bucket_encryption_handler,
|
|
"location": _bucket_location_handler,
|
|
"acl": _bucket_acl_handler,
|
|
}
|
|
requested = [key for key in handlers if key in request.args]
|
|
if not requested:
|
|
return None
|
|
if len(requested) > 1:
|
|
return _error_response(
|
|
"InvalidRequest",
|
|
"Only a single bucket subresource can be requested at a time",
|
|
400,
|
|
)
|
|
handler = handlers[requested[0]]
|
|
return handler(bucket_name)
|
|
|
|
|
|
def _bucket_versioning_handler(bucket_name: str) -> Response:
|
|
if request.method not in {"GET", "PUT"}:
|
|
return _method_not_allowed(["GET", "PUT"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
|
|
if request.method == "PUT":
|
|
payload = request.get_data(cache=False) or b""
|
|
if not payload.strip():
|
|
return _error_response("MalformedXML", "Request body is required", 400)
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError:
|
|
return _error_response("MalformedXML", "Unable to parse XML document", 400)
|
|
if _strip_ns(root.tag) != "VersioningConfiguration":
|
|
return _error_response("MalformedXML", "Root element must be VersioningConfiguration", 400)
|
|
status_el = root.find("{*}Status")
|
|
if status_el is None:
|
|
status_el = root.find("Status")
|
|
status = (status_el.text or "").strip() if status_el is not None else ""
|
|
if status not in {"Enabled", "Suspended", ""}:
|
|
return _error_response("MalformedXML", "Status must be Enabled or Suspended", 400)
|
|
try:
|
|
storage.set_bucket_versioning(bucket_name, status == "Enabled")
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket versioning updated", extra={"bucket": bucket_name, "status": status})
|
|
return Response(status=200)
|
|
|
|
# GET
|
|
try:
|
|
enabled = storage.is_versioning_enabled(bucket_name)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
root = Element("VersioningConfiguration")
|
|
SubElement(root, "Status").text = "Enabled" if enabled else "Suspended"
|
|
return _xml_response(root)
|
|
|
|
|
|
def _bucket_tagging_handler(bucket_name: str) -> Response:
|
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if request.method == "GET":
|
|
try:
|
|
tags = storage.get_bucket_tags(bucket_name)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
if not tags:
|
|
return _error_response("NoSuchTagSet", "No tags are configured for this bucket", 404)
|
|
return _xml_response(_render_tagging_document(tags))
|
|
if request.method == "DELETE":
|
|
try:
|
|
storage.set_bucket_tags(bucket_name, None)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket tags deleted", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
# PUT
|
|
payload = request.get_data(cache=False) or b""
|
|
try:
|
|
tags = _parse_tagging_document(payload)
|
|
except ValueError as exc:
|
|
return _error_response("MalformedXML", str(exc), 400)
|
|
if len(tags) > 50:
|
|
return _error_response("InvalidTag", "A maximum of 50 tags is supported", 400)
|
|
try:
|
|
storage.set_bucket_tags(bucket_name, tags)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket tags updated", extra={"bucket": bucket_name, "tags": len(tags)})
|
|
return Response(status=204)
|
|
|
|
|
|
def _object_tagging_handler(bucket_name: str, object_key: str) -> Response:
|
|
"""Handle object tagging operations (GET/PUT/DELETE /<bucket>/<key>?tagging)."""
|
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
|
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
|
|
# For tagging, we use read permission for GET, write for PUT/DELETE
|
|
action = "read" if request.method == "GET" else "write"
|
|
try:
|
|
_authorize_action(principal, bucket_name, action, object_key=object_key)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
storage = _storage()
|
|
|
|
if request.method == "GET":
|
|
try:
|
|
tags = storage.get_object_tags(bucket_name, object_key)
|
|
except StorageError as exc:
|
|
message = str(exc)
|
|
if "Bucket" in message:
|
|
return _error_response("NoSuchBucket", message, 404)
|
|
return _error_response("NoSuchKey", message, 404)
|
|
return _xml_response(_render_tagging_document(tags))
|
|
|
|
if request.method == "DELETE":
|
|
try:
|
|
storage.delete_object_tags(bucket_name, object_key)
|
|
except StorageError as exc:
|
|
message = str(exc)
|
|
if "Bucket" in message:
|
|
return _error_response("NoSuchBucket", message, 404)
|
|
return _error_response("NoSuchKey", message, 404)
|
|
current_app.logger.info("Object tags deleted", extra={"bucket": bucket_name, "key": object_key})
|
|
return Response(status=204)
|
|
|
|
# PUT
|
|
payload = request.get_data(cache=False) or b""
|
|
try:
|
|
tags = _parse_tagging_document(payload)
|
|
except ValueError as exc:
|
|
return _error_response("MalformedXML", str(exc), 400)
|
|
if len(tags) > 10:
|
|
return _error_response("InvalidTag", "A maximum of 10 tags is supported for objects", 400)
|
|
try:
|
|
storage.set_object_tags(bucket_name, object_key, tags)
|
|
except StorageError as exc:
|
|
message = str(exc)
|
|
if "Bucket" in message:
|
|
return _error_response("NoSuchBucket", message, 404)
|
|
return _error_response("NoSuchKey", message, 404)
|
|
current_app.logger.info("Object tags updated", extra={"bucket": bucket_name, "key": object_key, "tags": len(tags)})
|
|
return Response(status=204)
|
|
|
|
|
|
def _sanitize_cors_rules(rules: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
sanitized: list[dict[str, Any]] = []
|
|
for rule in rules:
|
|
allowed_origins = [origin.strip() for origin in rule.get("AllowedOrigins", []) if origin and origin.strip()]
|
|
allowed_methods = [method.strip().upper() for method in rule.get("AllowedMethods", []) if method and method.strip()]
|
|
allowed_headers = [header.strip() for header in rule.get("AllowedHeaders", []) if header and header.strip()]
|
|
expose_headers = [header.strip() for header in rule.get("ExposeHeaders", []) if header and header.strip()]
|
|
if not allowed_origins or not allowed_methods:
|
|
raise ValueError("Each CORSRule must include AllowedOrigin and AllowedMethod entries")
|
|
sanitized_rule: dict[str, Any] = {
|
|
"AllowedOrigins": allowed_origins,
|
|
"AllowedMethods": allowed_methods,
|
|
}
|
|
if allowed_headers:
|
|
sanitized_rule["AllowedHeaders"] = allowed_headers
|
|
if expose_headers:
|
|
sanitized_rule["ExposeHeaders"] = expose_headers
|
|
if "MaxAgeSeconds" in rule and rule["MaxAgeSeconds"] is not None:
|
|
sanitized_rule["MaxAgeSeconds"] = int(rule["MaxAgeSeconds"])
|
|
sanitized.append(sanitized_rule)
|
|
return sanitized
|
|
|
|
|
|
def _bucket_cors_handler(bucket_name: str) -> Response:
|
|
if request.method not in {"GET", "PUT", "DELETE"}:
|
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if request.method == "GET":
|
|
try:
|
|
rules = storage.get_bucket_cors(bucket_name)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
if not rules:
|
|
return _error_response("NoSuchCORSConfiguration", "No CORS configuration found", 404)
|
|
return _xml_response(_render_cors_document(rules))
|
|
if request.method == "DELETE":
|
|
try:
|
|
storage.set_bucket_cors(bucket_name, None)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket CORS deleted", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
# PUT
|
|
payload = request.get_data(cache=False) or b""
|
|
if not payload.strip():
|
|
try:
|
|
storage.set_bucket_cors(bucket_name, None)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket CORS cleared", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
try:
|
|
rules = _parse_cors_document(payload)
|
|
sanitized = _sanitize_cors_rules(rules)
|
|
except ValueError as exc:
|
|
return _error_response("MalformedXML", str(exc), 400)
|
|
if not sanitized:
|
|
return _error_response("InvalidRequest", "At least one CORSRule must be supplied", 400)
|
|
try:
|
|
storage.set_bucket_cors(bucket_name, sanitized)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket CORS updated", extra={"bucket": bucket_name, "rules": len(sanitized)})
|
|
return Response(status=204)
|
|
|
|
|
|
def _bucket_encryption_handler(bucket_name: str) -> Response:
|
|
if request.method not in {"GET", "PUT"}:
|
|
return _method_not_allowed(["GET", "PUT"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if request.method == "GET":
|
|
try:
|
|
config = storage.get_bucket_encryption(bucket_name)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
if not config:
|
|
return _error_response(
|
|
"ServerSideEncryptionConfigurationNotFoundError",
|
|
"No server-side encryption configuration found",
|
|
404,
|
|
)
|
|
return _xml_response(_render_encryption_document(config))
|
|
payload = request.get_data(cache=False) or b""
|
|
if not payload.strip():
|
|
try:
|
|
storage.set_bucket_encryption(bucket_name, None)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket encryption cleared", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
try:
|
|
config = _parse_encryption_document(payload)
|
|
except ValueError as exc:
|
|
return _error_response("MalformedXML", str(exc), 400)
|
|
try:
|
|
storage.set_bucket_encryption(bucket_name, config)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
current_app.logger.info("Bucket encryption updated", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
|
|
|
|
def _bucket_location_handler(bucket_name: str) -> Response:
|
|
if request.method != "GET":
|
|
return _method_not_allowed(["GET"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "list")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if not storage.bucket_exists(bucket_name):
|
|
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
|
|
|
# Return the configured AWS_REGION
|
|
region = current_app.config.get("AWS_REGION", "us-east-1")
|
|
root = Element("LocationConstraint")
|
|
# AWS returns empty for us-east-1, but we'll be explicit
|
|
root.text = region if region != "us-east-1" else None
|
|
return _xml_response(root)
|
|
|
|
|
|
def _bucket_acl_handler(bucket_name: str) -> Response:
|
|
if request.method not in {"GET", "PUT"}:
|
|
return _method_not_allowed(["GET", "PUT"])
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if not storage.bucket_exists(bucket_name):
|
|
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
|
|
|
if request.method == "PUT":
|
|
# We don't fully implement ACLs, but we accept the request for compatibility
|
|
# Check for canned ACL header
|
|
canned_acl = request.headers.get("x-amz-acl", "private")
|
|
current_app.logger.info("Bucket ACL set (canned)", extra={"bucket": bucket_name, "acl": canned_acl})
|
|
return Response(status=200)
|
|
|
|
# GET - Return a basic ACL document showing full control for owner
|
|
root = Element("AccessControlPolicy")
|
|
owner = SubElement(root, "Owner")
|
|
SubElement(owner, "ID").text = principal.access_key if principal else "anonymous"
|
|
SubElement(owner, "DisplayName").text = principal.display_name if principal else "Anonymous"
|
|
|
|
acl = SubElement(root, "AccessControlList")
|
|
grant = SubElement(acl, "Grant")
|
|
grantee = SubElement(grant, "Grantee")
|
|
grantee.set("{http://www.w3.org/2001/XMLSchema-instance}type", "CanonicalUser")
|
|
SubElement(grantee, "ID").text = principal.access_key if principal else "anonymous"
|
|
SubElement(grantee, "DisplayName").text = principal.display_name if principal else "Anonymous"
|
|
SubElement(grant, "Permission").text = "FULL_CONTROL"
|
|
|
|
return _xml_response(root)
|
|
|
|
|
|
def _bulk_delete_handler(bucket_name: str) -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "delete")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
payload = request.get_data(cache=False) or b""
|
|
if not payload.strip():
|
|
return _error_response("MalformedXML", "Request body must include a Delete specification", 400)
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError:
|
|
return _error_response("MalformedXML", "Unable to parse XML document", 400)
|
|
if _strip_ns(root.tag) != "Delete":
|
|
return _error_response("MalformedXML", "Root element must be Delete", 400)
|
|
|
|
quiet = False
|
|
objects: list[dict[str, str | None]] = []
|
|
for child in list(root):
|
|
name = _strip_ns(child.tag)
|
|
if name == "Quiet":
|
|
quiet = (child.text or "").strip().lower() in {"true", "1"}
|
|
continue
|
|
if name != "Object":
|
|
continue
|
|
key_text = ""
|
|
version_text: str | None = None
|
|
for entry in list(child):
|
|
entry_name = _strip_ns(entry.tag)
|
|
if entry_name == "Key":
|
|
key_text = (entry.text or "").strip()
|
|
elif entry_name == "VersionId":
|
|
version_text = (entry.text or "").strip() or None
|
|
if not key_text:
|
|
continue
|
|
objects.append({"Key": key_text, "VersionId": version_text})
|
|
|
|
if not objects:
|
|
return _error_response("MalformedXML", "At least one Object entry is required", 400)
|
|
if len(objects) > 1000:
|
|
return _error_response("MalformedXML", "A maximum of 1000 objects can be deleted per request", 400)
|
|
|
|
storage = _storage()
|
|
deleted: list[str] = []
|
|
errors: list[dict[str, str]] = []
|
|
for entry in objects:
|
|
key = entry["Key"] or ""
|
|
version_id = entry.get("VersionId")
|
|
if version_id:
|
|
errors.append({
|
|
"Key": key,
|
|
"Code": "InvalidRequest",
|
|
"Message": "VersionId is not supported for bulk deletes",
|
|
})
|
|
continue
|
|
try:
|
|
storage.delete_object(bucket_name, key)
|
|
deleted.append(key)
|
|
except StorageError as exc:
|
|
errors.append({"Key": key, "Code": "InvalidRequest", "Message": str(exc)})
|
|
|
|
result = Element("DeleteResult")
|
|
if not quiet:
|
|
for key in deleted:
|
|
deleted_el = SubElement(result, "Deleted")
|
|
SubElement(deleted_el, "Key").text = key
|
|
for err in errors:
|
|
error_el = SubElement(result, "Error")
|
|
SubElement(error_el, "Key").text = err.get("Key", "")
|
|
SubElement(error_el, "Code").text = err.get("Code", "InvalidRequest")
|
|
SubElement(error_el, "Message").text = err.get("Message", "Request failed")
|
|
|
|
current_app.logger.info(
|
|
"Bulk object delete",
|
|
extra={"bucket": bucket_name, "deleted": len(deleted), "errors": len(errors)},
|
|
)
|
|
return _xml_response(result, status=200)
|
|
|
|
|
|
# ---------------------- routes ----------------------
|
|
@s3_api_bp.get("/")
|
|
@limiter.limit("60 per minute")
|
|
def list_buckets() -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, None, "list")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
root = Element("ListAllMyBucketsResult")
|
|
owner = SubElement(root, "Owner")
|
|
SubElement(owner, "ID").text = principal.access_key
|
|
SubElement(owner, "DisplayName").text = principal.display_name
|
|
buckets_el = SubElement(root, "Buckets")
|
|
|
|
storage_buckets = _storage().list_buckets()
|
|
allowed = set(_iam().buckets_for_principal(principal, [b.name for b in storage_buckets]))
|
|
for bucket in storage_buckets:
|
|
if bucket.name not in allowed:
|
|
continue
|
|
bucket_el = SubElement(buckets_el, "Bucket")
|
|
SubElement(bucket_el, "Name").text = bucket.name
|
|
SubElement(bucket_el, "CreationDate").text = bucket.created_at.isoformat()
|
|
|
|
return _xml_response(root)
|
|
|
|
|
|
@s3_api_bp.route("/<bucket_name>", methods=["PUT", "DELETE", "GET", "POST"], strict_slashes=False)
|
|
@limiter.limit("120 per minute")
|
|
def bucket_handler(bucket_name: str) -> Response:
|
|
storage = _storage()
|
|
subresource_response = _maybe_handle_bucket_subresource(bucket_name)
|
|
if subresource_response is not None:
|
|
return subresource_response
|
|
|
|
if request.method == "POST":
|
|
if "delete" not in request.args:
|
|
return _method_not_allowed(["GET", "PUT", "DELETE"])
|
|
return _bulk_delete_handler(bucket_name)
|
|
|
|
if request.method == "PUT":
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "write")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
try:
|
|
storage.create_bucket(bucket_name)
|
|
except FileExistsError:
|
|
return _error_response("BucketAlreadyExists", "Bucket exists", 409)
|
|
except StorageError as exc:
|
|
return _error_response("InvalidBucketName", str(exc), 400)
|
|
current_app.logger.info("Bucket created", extra={"bucket": bucket_name})
|
|
return Response(status=200)
|
|
|
|
if request.method == "DELETE":
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "delete")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
try:
|
|
storage.delete_bucket(bucket_name)
|
|
_bucket_policies().delete_policy(bucket_name)
|
|
_replication_manager().delete_rule(bucket_name)
|
|
except StorageError as exc:
|
|
code = "BucketNotEmpty" if "not empty" in str(exc) else "NoSuchBucket"
|
|
status = 409 if code == "BucketNotEmpty" else 404
|
|
return _error_response(code, str(exc), status)
|
|
current_app.logger.info("Bucket deleted", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
|
|
# GET - list objects (supports both ListObjects and ListObjectsV2)
|
|
principal, error = _require_principal()
|
|
try:
|
|
_authorize_action(principal, bucket_name, "list")
|
|
except IamError as exc:
|
|
if error:
|
|
return error
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
try:
|
|
objects = storage.list_objects(bucket_name)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
|
|
# Check if this is ListObjectsV2 (list-type=2)
|
|
list_type = request.args.get("list-type")
|
|
prefix = request.args.get("prefix", "")
|
|
delimiter = request.args.get("delimiter", "")
|
|
max_keys = min(int(request.args.get("max-keys", current_app.config["UI_PAGE_SIZE"])), 1000)
|
|
|
|
# Pagination markers
|
|
marker = request.args.get("marker", "") # ListObjects v1
|
|
continuation_token = request.args.get("continuation-token", "") # ListObjectsV2
|
|
start_after = request.args.get("start-after", "") # ListObjectsV2
|
|
|
|
# For ListObjectsV2, continuation-token takes precedence, then start-after
|
|
# For ListObjects v1, use marker
|
|
effective_start = ""
|
|
if list_type == "2":
|
|
if continuation_token:
|
|
import base64
|
|
try:
|
|
effective_start = base64.urlsafe_b64decode(continuation_token.encode()).decode("utf-8")
|
|
except Exception:
|
|
effective_start = continuation_token
|
|
elif start_after:
|
|
effective_start = start_after
|
|
else:
|
|
effective_start = marker
|
|
|
|
if prefix:
|
|
objects = [obj for obj in objects if obj.key.startswith(prefix)]
|
|
|
|
if effective_start:
|
|
objects = [obj for obj in objects if obj.key > effective_start]
|
|
|
|
common_prefixes: list[str] = []
|
|
filtered_objects: list = []
|
|
if delimiter:
|
|
seen_prefixes: set[str] = set()
|
|
for obj in objects:
|
|
key_after_prefix = obj.key[len(prefix):] if prefix else obj.key
|
|
if delimiter in key_after_prefix:
|
|
# This is a "folder" - extract the common prefix
|
|
common_prefix = prefix + key_after_prefix.split(delimiter)[0] + delimiter
|
|
if common_prefix not in seen_prefixes:
|
|
seen_prefixes.add(common_prefix)
|
|
common_prefixes.append(common_prefix)
|
|
else:
|
|
filtered_objects.append(obj)
|
|
objects = filtered_objects
|
|
common_prefixes = sorted(common_prefixes)
|
|
|
|
total_items = len(objects) + len(common_prefixes)
|
|
is_truncated = total_items > max_keys
|
|
|
|
if len(objects) >= max_keys:
|
|
objects = objects[:max_keys]
|
|
common_prefixes = []
|
|
else:
|
|
remaining = max_keys - len(objects)
|
|
common_prefixes = common_prefixes[:remaining]
|
|
|
|
next_marker = ""
|
|
next_continuation_token = ""
|
|
if is_truncated:
|
|
if objects:
|
|
next_marker = objects[-1].key
|
|
elif common_prefixes:
|
|
next_marker = common_prefixes[-1].rstrip(delimiter) if delimiter else common_prefixes[-1]
|
|
|
|
if list_type == "2" and next_marker:
|
|
import base64
|
|
next_continuation_token = base64.urlsafe_b64encode(next_marker.encode()).decode("utf-8")
|
|
|
|
if list_type == "2":
|
|
root = Element("ListBucketResult")
|
|
SubElement(root, "Name").text = bucket_name
|
|
SubElement(root, "Prefix").text = prefix
|
|
SubElement(root, "MaxKeys").text = str(max_keys)
|
|
SubElement(root, "KeyCount").text = str(len(objects) + len(common_prefixes))
|
|
SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
|
|
if delimiter:
|
|
SubElement(root, "Delimiter").text = delimiter
|
|
|
|
continuation_token = request.args.get("continuation-token", "")
|
|
start_after = request.args.get("start-after", "")
|
|
if continuation_token:
|
|
SubElement(root, "ContinuationToken").text = continuation_token
|
|
if start_after:
|
|
SubElement(root, "StartAfter").text = start_after
|
|
|
|
if is_truncated and next_continuation_token:
|
|
SubElement(root, "NextContinuationToken").text = next_continuation_token
|
|
|
|
for meta in objects:
|
|
obj_el = SubElement(root, "Contents")
|
|
SubElement(obj_el, "Key").text = meta.key
|
|
SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
|
|
SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
|
|
SubElement(obj_el, "Size").text = str(meta.size)
|
|
SubElement(obj_el, "StorageClass").text = "STANDARD"
|
|
|
|
for cp in common_prefixes:
|
|
cp_el = SubElement(root, "CommonPrefixes")
|
|
SubElement(cp_el, "Prefix").text = cp
|
|
else:
|
|
root = Element("ListBucketResult")
|
|
SubElement(root, "Name").text = bucket_name
|
|
SubElement(root, "Prefix").text = prefix
|
|
SubElement(root, "Marker").text = marker
|
|
SubElement(root, "MaxKeys").text = str(max_keys)
|
|
SubElement(root, "IsTruncated").text = "true" if is_truncated else "false"
|
|
if delimiter:
|
|
SubElement(root, "Delimiter").text = delimiter
|
|
|
|
if is_truncated and delimiter and next_marker:
|
|
SubElement(root, "NextMarker").text = next_marker
|
|
|
|
for meta in objects:
|
|
obj_el = SubElement(root, "Contents")
|
|
SubElement(obj_el, "Key").text = meta.key
|
|
SubElement(obj_el, "LastModified").text = meta.last_modified.isoformat()
|
|
SubElement(obj_el, "ETag").text = f'"{meta.etag}"'
|
|
SubElement(obj_el, "Size").text = str(meta.size)
|
|
|
|
for cp in common_prefixes:
|
|
cp_el = SubElement(root, "CommonPrefixes")
|
|
SubElement(cp_el, "Prefix").text = cp
|
|
|
|
return _xml_response(root)
|
|
|
|
|
|
@s3_api_bp.route("/<bucket_name>/<path:object_key>", methods=["PUT", "GET", "DELETE", "HEAD", "POST"], strict_slashes=False)
|
|
@limiter.limit("240 per minute")
|
|
def object_handler(bucket_name: str, object_key: str):
|
|
storage = _storage()
|
|
|
|
if "tagging" in request.args:
|
|
return _object_tagging_handler(bucket_name, object_key)
|
|
|
|
# Multipart Uploads
|
|
if request.method == "POST":
|
|
if "uploads" in request.args:
|
|
return _initiate_multipart_upload(bucket_name, object_key)
|
|
if "uploadId" in request.args:
|
|
return _complete_multipart_upload(bucket_name, object_key)
|
|
return _method_not_allowed(["GET", "PUT", "DELETE", "HEAD", "POST"])
|
|
|
|
if request.method == "PUT":
|
|
if "partNumber" in request.args and "uploadId" in request.args:
|
|
return _upload_part(bucket_name, object_key)
|
|
|
|
copy_source = request.headers.get("x-amz-copy-source")
|
|
if copy_source:
|
|
return _copy_object(bucket_name, object_key, copy_source)
|
|
|
|
_, error = _object_principal("write", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
|
|
stream = request.stream
|
|
content_encoding = request.headers.get("Content-Encoding", "").lower()
|
|
if "aws-chunked" in content_encoding:
|
|
stream = AwsChunkedDecoder(stream)
|
|
|
|
metadata = _extract_request_metadata()
|
|
|
|
content_type = request.headers.get("Content-Type")
|
|
validation_error = _validate_content_type(object_key, content_type)
|
|
if validation_error:
|
|
return _error_response("InvalidArgument", validation_error, 400)
|
|
|
|
try:
|
|
meta = storage.put_object(
|
|
bucket_name,
|
|
object_key,
|
|
stream,
|
|
metadata=metadata or None,
|
|
)
|
|
except StorageError as exc:
|
|
message = str(exc)
|
|
if "Bucket" in message:
|
|
return _error_response("NoSuchBucket", message, 404)
|
|
return _error_response("InvalidArgument", message, 400)
|
|
current_app.logger.info(
|
|
"Object uploaded",
|
|
extra={"bucket": bucket_name, "key": object_key, "size": meta.size},
|
|
)
|
|
response = Response(status=200)
|
|
response.headers["ETag"] = f'"{meta.etag}"'
|
|
|
|
# Trigger replication if not a replication request
|
|
user_agent = request.headers.get("User-Agent", "")
|
|
if "S3ReplicationAgent" not in user_agent:
|
|
_replication_manager().trigger_replication(bucket_name, object_key, action="write")
|
|
|
|
return response
|
|
|
|
if request.method in {"GET", "HEAD"}:
|
|
if request.method == "GET" and "uploadId" in request.args:
|
|
return _list_parts(bucket_name, object_key)
|
|
|
|
_, error = _object_principal("read", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
try:
|
|
path = storage.get_object_path(bucket_name, object_key)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchKey", str(exc), 404)
|
|
metadata = storage.get_object_metadata(bucket_name, object_key)
|
|
stat = path.stat()
|
|
mimetype = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
|
|
etag = storage._compute_etag(path)
|
|
|
|
if request.method == "GET":
|
|
response = Response(_stream_file(path), mimetype=mimetype, direct_passthrough=True)
|
|
logged_bytes = stat.st_size
|
|
else:
|
|
response = Response(status=200)
|
|
response.headers["Content-Type"] = mimetype
|
|
logged_bytes = 0
|
|
|
|
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
|
|
action = "Object read" if request.method == "GET" else "Object head"
|
|
current_app.logger.info(action, extra={"bucket": bucket_name, "key": object_key, "bytes": logged_bytes})
|
|
return response
|
|
|
|
if "uploadId" in request.args:
|
|
return _abort_multipart_upload(bucket_name, object_key)
|
|
|
|
_, error = _object_principal("delete", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
storage.delete_object(bucket_name, object_key)
|
|
current_app.logger.info("Object deleted", extra={"bucket": bucket_name, "key": object_key})
|
|
|
|
# Trigger replication if not a replication request
|
|
user_agent = request.headers.get("User-Agent", "")
|
|
if "S3ReplicationAgent" not in user_agent:
|
|
_replication_manager().trigger_replication(bucket_name, object_key, action="delete")
|
|
|
|
return Response(status=204)
|
|
|
|
|
|
def _list_parts(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "read", object_key=object_key)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
upload_id = request.args.get("uploadId")
|
|
if not upload_id:
|
|
return _error_response("InvalidArgument", "uploadId is required", 400)
|
|
|
|
try:
|
|
parts = _storage().list_multipart_parts(bucket_name, upload_id)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchUpload", str(exc), 404)
|
|
|
|
root = Element("ListPartsResult")
|
|
SubElement(root, "Bucket").text = bucket_name
|
|
SubElement(root, "Key").text = object_key
|
|
SubElement(root, "UploadId").text = upload_id
|
|
|
|
initiator = SubElement(root, "Initiator")
|
|
SubElement(initiator, "ID").text = principal.access_key
|
|
SubElement(initiator, "DisplayName").text = principal.display_name
|
|
|
|
owner = SubElement(root, "Owner")
|
|
SubElement(owner, "ID").text = principal.access_key
|
|
SubElement(owner, "DisplayName").text = principal.display_name
|
|
|
|
SubElement(root, "StorageClass").text = "STANDARD"
|
|
SubElement(root, "PartNumberMarker").text = "0"
|
|
SubElement(root, "NextPartNumberMarker").text = str(parts[-1]["PartNumber"]) if parts else "0"
|
|
SubElement(root, "MaxParts").text = "1000"
|
|
SubElement(root, "IsTruncated").text = "false"
|
|
|
|
for part in parts:
|
|
p = SubElement(root, "Part")
|
|
SubElement(p, "PartNumber").text = str(part["PartNumber"])
|
|
SubElement(p, "LastModified").text = part["LastModified"].isoformat()
|
|
SubElement(p, "ETag").text = f'"{part["ETag"]}"'
|
|
SubElement(p, "Size").text = str(part["Size"])
|
|
|
|
return _xml_response(root)
|
|
|
|
|
|
@s3_api_bp.route("/bucket-policy/<bucket_name>", methods=["GET", "PUT", "DELETE"])
|
|
@limiter.limit("30 per minute")
|
|
def bucket_policy_handler(bucket_name: str) -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "policy")
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if not storage.bucket_exists(bucket_name):
|
|
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
|
store = _bucket_policies()
|
|
if request.method == "GET":
|
|
policy = store.get_policy(bucket_name)
|
|
if not policy:
|
|
return _error_response("NoSuchBucketPolicy", "No bucket policy attached", 404)
|
|
return jsonify(policy)
|
|
if request.method == "DELETE":
|
|
store.delete_policy(bucket_name)
|
|
current_app.logger.info("Bucket policy removed", extra={"bucket": bucket_name})
|
|
return Response(status=204)
|
|
payload = request.get_json(silent=True)
|
|
if not payload:
|
|
return _error_response("MalformedPolicy", "Policy document must be JSON", 400)
|
|
try:
|
|
store.set_policy(bucket_name, payload)
|
|
current_app.logger.info("Bucket policy updated", extra={"bucket": bucket_name})
|
|
except ValueError as exc:
|
|
return _error_response("MalformedPolicy", str(exc), 400)
|
|
return Response(status=204)
|
|
|
|
|
|
@s3_api_bp.post("/presign/<bucket_name>/<path:object_key>")
|
|
@limiter.limit("45 per minute")
|
|
def presign_object(bucket_name: str, object_key: str):
|
|
payload = request.get_json(silent=True) or {}
|
|
method = str(payload.get("method", "GET")).upper()
|
|
allowed_methods = {"GET", "PUT", "DELETE"}
|
|
if method not in allowed_methods:
|
|
return _error_response("InvalidRequest", "Method must be GET, PUT, or DELETE", 400)
|
|
try:
|
|
expires = int(payload.get("expires_in", 900))
|
|
except (TypeError, ValueError):
|
|
return _error_response("InvalidRequest", "expires_in must be an integer", 400)
|
|
expires = max(1, min(expires, 7 * 24 * 3600))
|
|
action = "read" if method == "GET" else ("delete" if method == "DELETE" else "write")
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, action, object_key=object_key)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
storage = _storage()
|
|
if not storage.bucket_exists(bucket_name):
|
|
return _error_response("NoSuchBucket", "Bucket does not exist", 404)
|
|
if action != "write":
|
|
try:
|
|
storage.get_object_path(bucket_name, object_key)
|
|
except StorageError:
|
|
return _error_response("NoSuchKey", "Object not found", 404)
|
|
secret = _iam().secret_for_key(principal.access_key)
|
|
url = _generate_presigned_url(
|
|
principal=principal,
|
|
secret_key=secret,
|
|
method=method,
|
|
bucket_name=bucket_name,
|
|
object_key=object_key,
|
|
expires_in=expires,
|
|
)
|
|
current_app.logger.info(
|
|
"Presigned URL generated",
|
|
extra={"bucket": bucket_name, "key": object_key, "method": method},
|
|
)
|
|
return jsonify({"url": url, "method": method, "expires_in": expires})
|
|
|
|
|
|
@s3_api_bp.route("/<bucket_name>", methods=["HEAD"])
|
|
@limiter.limit("100 per minute")
|
|
def head_bucket(bucket_name: str) -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "list")
|
|
if not _storage().bucket_exists(bucket_name):
|
|
return _error_response("NoSuchBucket", "Bucket not found", 404)
|
|
return Response(status=200)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
|
|
@s3_api_bp.route("/<bucket_name>/<path:object_key>", methods=["HEAD"])
|
|
@limiter.limit("100 per minute")
|
|
def head_object(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, bucket_name, "read", object_key=object_key)
|
|
path = _storage().get_object_path(bucket_name, object_key)
|
|
metadata = _storage().get_object_metadata(bucket_name, object_key)
|
|
stat = path.stat()
|
|
etag = _storage()._compute_etag(path)
|
|
|
|
response = Response(status=200)
|
|
_apply_object_headers(response, file_stat=stat, metadata=metadata, etag=etag)
|
|
response.headers["Content-Type"] = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
|
return response
|
|
except (StorageError, FileNotFoundError):
|
|
return _error_response("NoSuchKey", "Object not found", 404)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
|
|
def _copy_object(dest_bucket: str, dest_key: str, copy_source: str) -> Response:
|
|
"""Handle S3 CopyObject operation."""
|
|
from urllib.parse import unquote
|
|
copy_source = unquote(copy_source)
|
|
if copy_source.startswith("/"):
|
|
copy_source = copy_source[1:]
|
|
|
|
parts = copy_source.split("/", 1)
|
|
if len(parts) != 2:
|
|
return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
|
|
|
|
source_bucket, source_key = parts
|
|
if not source_bucket or not source_key:
|
|
return _error_response("InvalidArgument", "Invalid x-amz-copy-source format", 400)
|
|
|
|
principal, error = _require_principal()
|
|
if error:
|
|
return error
|
|
try:
|
|
_authorize_action(principal, source_bucket, "read", object_key=source_key)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
try:
|
|
_authorize_action(principal, dest_bucket, "write", object_key=dest_key)
|
|
except IamError as exc:
|
|
return _error_response("AccessDenied", str(exc), 403)
|
|
|
|
storage = _storage()
|
|
|
|
try:
|
|
source_path = storage.get_object_path(source_bucket, source_key)
|
|
except StorageError:
|
|
return _error_response("NoSuchKey", "Source object not found", 404)
|
|
|
|
source_metadata = storage.get_object_metadata(source_bucket, source_key)
|
|
|
|
metadata_directive = request.headers.get("x-amz-metadata-directive", "COPY").upper()
|
|
if metadata_directive == "REPLACE":
|
|
metadata = _extract_request_metadata()
|
|
content_type = request.headers.get("Content-Type")
|
|
validation_error = _validate_content_type(dest_key, content_type)
|
|
if validation_error:
|
|
return _error_response("InvalidArgument", validation_error, 400)
|
|
else:
|
|
metadata = source_metadata
|
|
|
|
try:
|
|
with source_path.open("rb") as stream:
|
|
meta = storage.put_object(
|
|
dest_bucket,
|
|
dest_key,
|
|
stream,
|
|
metadata=metadata or None,
|
|
)
|
|
except StorageError as exc:
|
|
message = str(exc)
|
|
if "Bucket" in message:
|
|
return _error_response("NoSuchBucket", message, 404)
|
|
return _error_response("InvalidArgument", message, 400)
|
|
|
|
current_app.logger.info(
|
|
"Object copied",
|
|
extra={
|
|
"source_bucket": source_bucket,
|
|
"source_key": source_key,
|
|
"dest_bucket": dest_bucket,
|
|
"dest_key": dest_key,
|
|
"size": meta.size,
|
|
},
|
|
)
|
|
|
|
user_agent = request.headers.get("User-Agent", "")
|
|
if "S3ReplicationAgent" not in user_agent:
|
|
_replication_manager().trigger_replication(dest_bucket, dest_key, action="write")
|
|
|
|
root = Element("CopyObjectResult")
|
|
SubElement(root, "LastModified").text = meta.last_modified.isoformat()
|
|
SubElement(root, "ETag").text = f'"{meta.etag}"'
|
|
return _xml_response(root)
|
|
|
|
|
|
class AwsChunkedDecoder:
|
|
"""Decodes aws-chunked encoded streams."""
|
|
def __init__(self, stream):
|
|
self.stream = stream
|
|
self.buffer = b""
|
|
self.chunk_remaining = 0
|
|
self.finished = False
|
|
|
|
def read(self, size=-1):
|
|
if self.finished:
|
|
return b""
|
|
|
|
result = b""
|
|
while size == -1 or len(result) < size:
|
|
if self.chunk_remaining > 0:
|
|
to_read = self.chunk_remaining
|
|
if size != -1:
|
|
to_read = min(to_read, size - len(result))
|
|
|
|
chunk = self.stream.read(to_read)
|
|
if not chunk:
|
|
raise IOError("Unexpected EOF in chunk data")
|
|
|
|
result += chunk
|
|
self.chunk_remaining -= len(chunk)
|
|
|
|
if self.chunk_remaining == 0:
|
|
# Read CRLF after chunk data
|
|
crlf = self.stream.read(2)
|
|
if crlf != b"\r\n":
|
|
raise IOError("Malformed chunk: missing CRLF")
|
|
else:
|
|
line = b""
|
|
while True:
|
|
char = self.stream.read(1)
|
|
if not char:
|
|
if not line:
|
|
self.finished = True
|
|
return result
|
|
raise IOError("Unexpected EOF in chunk size")
|
|
line += char
|
|
if line.endswith(b"\r\n"):
|
|
break
|
|
|
|
try:
|
|
line_str = line.decode("ascii").strip()
|
|
# Handle chunk-signature extension if present (e.g. "1000;chunk-signature=...")
|
|
if ";" in line_str:
|
|
line_str = line_str.split(";")[0]
|
|
chunk_size = int(line_str, 16)
|
|
except ValueError:
|
|
raise IOError(f"Invalid chunk size: {line}")
|
|
|
|
if chunk_size == 0:
|
|
self.finished = True
|
|
while True:
|
|
line = b""
|
|
while True:
|
|
char = self.stream.read(1)
|
|
if not char:
|
|
break
|
|
line += char
|
|
if line.endswith(b"\r\n"):
|
|
break
|
|
if line == b"\r\n" or not line:
|
|
break
|
|
return result
|
|
|
|
self.chunk_remaining = chunk_size
|
|
|
|
return result
|
|
|
|
|
|
def _initiate_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _object_principal("write", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
|
|
metadata = _extract_request_metadata()
|
|
try:
|
|
upload_id = _storage().initiate_multipart_upload(
|
|
bucket_name,
|
|
object_key,
|
|
metadata=metadata or None
|
|
)
|
|
except StorageError as exc:
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
|
|
root = Element("InitiateMultipartUploadResult")
|
|
SubElement(root, "Bucket").text = bucket_name
|
|
SubElement(root, "Key").text = object_key
|
|
SubElement(root, "UploadId").text = upload_id
|
|
return _xml_response(root)
|
|
|
|
|
|
def _upload_part(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _object_principal("write", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
|
|
upload_id = request.args.get("uploadId")
|
|
part_number_str = request.args.get("partNumber")
|
|
if not upload_id or not part_number_str:
|
|
return _error_response("InvalidArgument", "uploadId and partNumber are required", 400)
|
|
|
|
try:
|
|
part_number = int(part_number_str)
|
|
except ValueError:
|
|
return _error_response("InvalidArgument", "partNumber must be an integer", 400)
|
|
|
|
stream = request.stream
|
|
content_encoding = request.headers.get("Content-Encoding", "").lower()
|
|
if "aws-chunked" in content_encoding:
|
|
stream = AwsChunkedDecoder(stream)
|
|
|
|
try:
|
|
etag = _storage().upload_multipart_part(bucket_name, upload_id, part_number, stream)
|
|
except StorageError as exc:
|
|
if "NoSuchBucket" in str(exc):
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
if "Multipart upload not found" in str(exc):
|
|
return _error_response("NoSuchUpload", str(exc), 404)
|
|
return _error_response("InvalidArgument", str(exc), 400)
|
|
|
|
response = Response(status=200)
|
|
response.headers["ETag"] = f'"{etag}"'
|
|
return response
|
|
|
|
|
|
def _complete_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _object_principal("write", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
|
|
upload_id = request.args.get("uploadId")
|
|
if not upload_id:
|
|
return _error_response("InvalidArgument", "uploadId is required", 400)
|
|
|
|
payload = request.get_data(cache=False) or b""
|
|
try:
|
|
root = fromstring(payload)
|
|
except ParseError:
|
|
return _error_response("MalformedXML", "Unable to parse XML document", 400)
|
|
|
|
if _strip_ns(root.tag) != "CompleteMultipartUpload":
|
|
return _error_response("MalformedXML", "Root element must be CompleteMultipartUpload", 400)
|
|
|
|
parts = []
|
|
for part_el in list(root):
|
|
if _strip_ns(part_el.tag) != "Part":
|
|
continue
|
|
part_number_el = part_el.find("{*}PartNumber")
|
|
if part_number_el is None:
|
|
part_number_el = part_el.find("PartNumber")
|
|
|
|
etag_el = part_el.find("{*}ETag")
|
|
if etag_el is None:
|
|
etag_el = part_el.find("ETag")
|
|
|
|
if part_number_el is not None and etag_el is not None:
|
|
parts.append({
|
|
"PartNumber": int(part_number_el.text or 0),
|
|
"ETag": (etag_el.text or "").strip('"')
|
|
})
|
|
|
|
try:
|
|
meta = _storage().complete_multipart_upload(bucket_name, upload_id, parts)
|
|
except StorageError as exc:
|
|
if "NoSuchBucket" in str(exc):
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
if "Multipart upload not found" in str(exc):
|
|
return _error_response("NoSuchUpload", str(exc), 404)
|
|
return _error_response("InvalidPart", str(exc), 400)
|
|
|
|
user_agent = request.headers.get("User-Agent", "")
|
|
if "S3ReplicationAgent" not in user_agent:
|
|
_replication_manager().trigger_replication(bucket_name, object_key, action="write")
|
|
|
|
root = Element("CompleteMultipartUploadResult")
|
|
location = f"{request.host_url}{bucket_name}/{object_key}"
|
|
SubElement(root, "Location").text = location
|
|
SubElement(root, "Bucket").text = bucket_name
|
|
SubElement(root, "Key").text = object_key
|
|
SubElement(root, "ETag").text = f'"{meta.etag}"'
|
|
|
|
return _xml_response(root)
|
|
|
|
|
|
def _abort_multipart_upload(bucket_name: str, object_key: str) -> Response:
|
|
principal, error = _object_principal("delete", bucket_name, object_key)
|
|
if error:
|
|
return error
|
|
|
|
upload_id = request.args.get("uploadId")
|
|
if not upload_id:
|
|
return _error_response("InvalidArgument", "uploadId is required", 400)
|
|
|
|
try:
|
|
_storage().abort_multipart_upload(bucket_name, upload_id)
|
|
except StorageError as exc:
|
|
# Abort is idempotent, but if bucket missing...
|
|
if "Bucket does not exist" in str(exc):
|
|
return _error_response("NoSuchBucket", str(exc), 404)
|
|
|
|
return Response(status=204)
|
|
|
|
|
|
@s3_api_bp.before_request
|
|
def resolve_principal():
|
|
g.principal = None
|
|
# Try SigV4
|
|
try:
|
|
if ("Authorization" in request.headers and request.headers["Authorization"].startswith("AWS4-HMAC-SHA256")) or \
|
|
(request.args.get("X-Amz-Algorithm") == "AWS4-HMAC-SHA256"):
|
|
g.principal = _verify_sigv4(request)
|
|
return
|
|
except Exception:
|
|
pass
|
|
|
|
# Try simple auth headers (internal/testing)
|
|
access_key = request.headers.get("X-Access-Key")
|
|
secret_key = request.headers.get("X-Secret-Key")
|
|
if access_key and secret_key:
|
|
try:
|
|
g.principal = _iam().authenticate(access_key, secret_key)
|
|
except Exception:
|
|
pass
|