Applied max-keys to combined current + archived ListObjectVersions output and reports truncation

This commit is contained in:
2026-04-22 00:12:22 +08:00
parent 8935188c8f
commit 9ec5797919
19 changed files with 1750 additions and 117 deletions

View File

@@ -3,6 +3,7 @@ use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum S3ErrorCode {
AccessDenied,
BadDigest,
BucketAlreadyExists,
BucketNotEmpty,
EntityTooLarge,
@@ -14,6 +15,7 @@ pub enum S3ErrorCode {
InvalidPolicyDocument,
InvalidRange,
InvalidRequest,
InvalidTag,
MalformedXML,
MethodNotAllowed,
NoSuchBucket,
@@ -32,6 +34,7 @@ impl S3ErrorCode {
pub fn http_status(&self) -> u16 {
match self {
Self::AccessDenied => 403,
Self::BadDigest => 400,
Self::BucketAlreadyExists => 409,
Self::BucketNotEmpty => 409,
Self::EntityTooLarge => 413,
@@ -43,6 +46,7 @@ impl S3ErrorCode {
Self::InvalidPolicyDocument => 400,
Self::InvalidRange => 416,
Self::InvalidRequest => 400,
Self::InvalidTag => 400,
Self::MalformedXML => 400,
Self::MethodNotAllowed => 405,
Self::NoSuchBucket => 404,
@@ -61,6 +65,7 @@ impl S3ErrorCode {
pub fn as_str(&self) -> &'static str {
match self {
Self::AccessDenied => "AccessDenied",
Self::BadDigest => "BadDigest",
Self::BucketAlreadyExists => "BucketAlreadyExists",
Self::BucketNotEmpty => "BucketNotEmpty",
Self::EntityTooLarge => "EntityTooLarge",
@@ -72,6 +77,7 @@ impl S3ErrorCode {
Self::InvalidPolicyDocument => "InvalidPolicyDocument",
Self::InvalidRange => "InvalidRange",
Self::InvalidRequest => "InvalidRequest",
Self::InvalidTag => "InvalidTag",
Self::MalformedXML => "MalformedXML",
Self::MethodNotAllowed => "MethodNotAllowed",
Self::NoSuchBucket => "NoSuchBucket",
@@ -90,6 +96,7 @@ impl S3ErrorCode {
pub fn default_message(&self) -> &'static str {
match self {
Self::AccessDenied => "Access Denied",
Self::BadDigest => "The Content-MD5 or checksum value you specified did not match what we received",
Self::BucketAlreadyExists => "The requested bucket name is not available",
Self::BucketNotEmpty => "The bucket you tried to delete is not empty",
Self::EntityTooLarge => "Your proposed upload exceeds the maximum allowed size",
@@ -101,6 +108,7 @@ impl S3ErrorCode {
Self::InvalidPolicyDocument => "The content of the form does not meet the conditions specified in the policy document",
Self::InvalidRange => "The requested range is not satisfiable",
Self::InvalidRequest => "Invalid request",
Self::InvalidTag => "The Tagging header is invalid",
Self::MalformedXML => "The XML you provided was not well-formed",
Self::MethodNotAllowed => "The specified method is not allowed against this resource",
Self::NoSuchBucket => "The specified bucket does not exist",

View File

@@ -112,6 +112,8 @@ pub struct VersionInfo {
pub last_modified: DateTime<Utc>,
pub etag: Option<String>,
pub is_latest: bool,
#[serde(default)]
pub is_delete_marker: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -10,6 +10,7 @@ myfsio-crypto = { path = "../myfsio-crypto" }
myfsio-storage = { path = "../myfsio-storage" }
myfsio-xml = { path = "../myfsio-xml" }
base64 = { workspace = true }
md-5 = { workspace = true }
axum = { workspace = true }
tokio = { workspace = true }
tower = { workspace = true }
@@ -29,6 +30,7 @@ percent-encoding = { workspace = true }
quick-xml = { workspace = true }
mime_guess = "2"
crc32fast = { workspace = true }
sha2 = { workspace = true }
duckdb = { workspace = true }
roxmltree = "0.20"
parking_lot = { workspace = true }

View File

@@ -1038,7 +1038,12 @@ fn s3_error_response(code: S3ErrorCode, message: &str, status: StatusCode) -> Re
(status, [("content-type", "application/xml")], err.to_xml()).into_response()
}
pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
pub async fn list_object_versions(
state: &AppState,
bucket: &str,
prefix: Option<&str>,
max_keys: usize,
) -> Response {
match state.storage.list_buckets().await {
Ok(buckets) => {
if !buckets.iter().any(|b| b.name == bucket) {
@@ -1050,13 +1055,24 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
Err(e) => return storage_err(e),
}
let fetch_limit = max_keys.saturating_add(1).max(1);
let params = myfsio_common::types::ListParams {
max_keys: 1000,
max_keys: fetch_limit,
prefix: prefix.map(ToOwned::to_owned),
..Default::default()
};
let objects = match state.storage.list_objects(bucket, &params).await {
Ok(result) => result.objects,
let object_result = match state.storage.list_objects(bucket, &params).await {
Ok(result) => result,
Err(e) => return storage_err(e),
};
let objects = object_result.objects;
let archived_versions = match state
.storage
.list_bucket_object_versions(bucket, prefix)
.await
{
Ok(versions) => versions,
Err(e) => return storage_err(e),
};
@@ -1064,11 +1080,24 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
<ListVersionsResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">",
);
xml.push_str(&format!("<Name>{}</Name>", bucket));
xml.push_str(&format!("<Name>{}</Name>", xml_escape(bucket)));
xml.push_str(&format!(
"<Prefix>{}</Prefix>",
xml_escape(prefix.unwrap_or(""))
));
xml.push_str(&format!("<MaxKeys>{}</MaxKeys>", max_keys));
for obj in &objects {
let current_count = objects.len().min(max_keys);
let remaining = max_keys.saturating_sub(current_count);
let archived_count = archived_versions.len().min(remaining);
let is_truncated = object_result.is_truncated
|| objects.len() > current_count
|| archived_versions.len() > archived_count;
xml.push_str(&format!("<IsTruncated>{}</IsTruncated>", is_truncated));
for obj in objects.iter().take(current_count) {
xml.push_str("<Version>");
xml.push_str(&format!("<Key>{}</Key>", obj.key));
xml.push_str(&format!("<Key>{}</Key>", xml_escape(&obj.key)));
xml.push_str("<VersionId>null</VersionId>");
xml.push_str("<IsLatest>true</IsLatest>");
xml.push_str(&format!(
@@ -1076,9 +1105,32 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
myfsio_xml::response::format_s3_datetime(&obj.last_modified)
));
if let Some(ref etag) = obj.etag {
xml.push_str(&format!("<ETag>\"{}\"</ETag>", etag));
xml.push_str(&format!("<ETag>\"{}\"</ETag>", xml_escape(etag)));
}
xml.push_str(&format!("<Size>{}</Size>", obj.size));
xml.push_str(&format!(
"<StorageClass>{}</StorageClass>",
xml_escape(obj.storage_class.as_deref().unwrap_or("STANDARD"))
));
xml.push_str("</Version>");
}
for version in archived_versions.iter().take(archived_count) {
xml.push_str("<Version>");
xml.push_str(&format!("<Key>{}</Key>", xml_escape(&version.key)));
xml.push_str(&format!(
"<VersionId>{}</VersionId>",
xml_escape(&version.version_id)
));
xml.push_str("<IsLatest>false</IsLatest>");
xml.push_str(&format!(
"<LastModified>{}</LastModified>",
myfsio_xml::response::format_s3_datetime(&version.last_modified)
));
if let Some(ref etag) = version.etag {
xml.push_str(&format!("<ETag>\"{}\"</ETag>", xml_escape(etag)));
}
xml.push_str(&format!("<Size>{}</Size>", version.size));
xml.push_str("<StorageClass>STANDARD</StorageClass>");
xml.push_str("</Version>");
}

File diff suppressed because it is too large Load Diff

View File

@@ -117,6 +117,7 @@ fn storage_status(err: &StorageError) -> StatusCode {
match err {
StorageError::BucketNotFound(_)
| StorageError::ObjectNotFound { .. }
| StorageError::VersionNotFound { .. }
| StorageError::UploadNotFound(_) => StatusCode::NOT_FOUND,
StorageError::InvalidBucketName(_)
| StorageError::InvalidObjectKey(_)

View File

@@ -9,7 +9,7 @@ pub mod templates;
use axum::Router;
pub const SERVER_HEADER: &str = "MyFSIO";
pub const SERVER_HEADER: &str = concat!("MyFSIO-Rust/", env!("CARGO_PKG_VERSION"));
pub fn create_ui_router(state: state::AppState) -> Router {
use axum::routing::{delete, get, post, put};

View File

@@ -1,5 +1,5 @@
use axum::extract::{Request, State};
use axum::http::{header, HeaderMap, Method, StatusCode};
use axum::http::{header, HeaderMap, Method, StatusCode, Uri};
use axum::middleware::Next;
use axum::response::{IntoResponse, Response};
@@ -15,6 +15,9 @@ use tokio::io::AsyncReadExt;
use crate::services::acl::acl_from_bucket_config;
use crate::state::AppState;
#[derive(Clone, Debug)]
struct OriginalCanonicalPath(String);
fn website_error_response(
status: StatusCode,
body: Option<Vec<u8>>,
@@ -45,7 +48,7 @@ fn website_error_response(
fn default_website_error_body(status: StatusCode) -> String {
let code = status.as_u16();
if status == StatusCode::NOT_FOUND {
"404 page not found".to_string()
"<h1>404 page not found</h1>".to_string()
} else {
let reason = status.canonical_reason().unwrap_or("Error");
format!("{code} {reason}")
@@ -324,6 +327,67 @@ async fn maybe_serve_website(
.await
}
fn virtual_host_candidate(host: &str) -> Option<String> {
let (candidate, _) = host.split_once('.')?;
if candidate.is_empty() || matches!(candidate, "www" | "s3" | "api" | "admin" | "kms") {
return None;
}
if myfsio_storage::validation::validate_bucket_name(candidate).is_some() {
return None;
}
Some(candidate.to_string())
}
async fn virtual_host_bucket(
state: &AppState,
host: &str,
path: &str,
method: &Method,
) -> Option<String> {
if path.starts_with("/ui")
|| path.starts_with("/admin")
|| path.starts_with("/kms")
|| path.starts_with("/myfsio")
{
return None;
}
let bucket = virtual_host_candidate(host)?;
if path == format!("/{}", bucket) || path.starts_with(&format!("/{}/", bucket)) {
return None;
}
match state.storage.bucket_exists(&bucket).await {
Ok(true) => Some(bucket),
Ok(false) if *method == Method::PUT && path == "/" => Some(bucket),
_ => None,
}
}
fn rewrite_uri_for_virtual_host(uri: &Uri, bucket: &str) -> Option<Uri> {
let path = uri.path();
let rewritten_path = if path == "/" {
format!("/{}/", bucket)
} else {
format!("/{}{}", bucket, path)
};
let path_and_query = match uri.query() {
Some(query) => format!("{}?{}", rewritten_path, query),
None => rewritten_path,
};
let mut parts = uri.clone().into_parts();
parts.path_and_query = Some(path_and_query.parse().ok()?);
Uri::from_parts(parts).ok()
}
fn sigv4_canonical_path(req: &Request) -> &str {
req.extensions()
.get::<OriginalCanonicalPath>()
.map(|path| path.0.as_str())
.unwrap_or_else(|| req.uri().path())
}
pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: Next) -> Response {
let start = Instant::now();
let uri = req.uri().clone();
@@ -360,7 +424,7 @@ pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: N
} else if let Some(response) = maybe_serve_website(
&state,
method.clone(),
host.unwrap_or_default(),
host.clone().unwrap_or_default(),
path.clone(),
range_header,
)
@@ -368,38 +432,53 @@ pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: N
{
response
} else {
let auth_path = if let Some(bucket) =
virtual_host_bucket(&state, host.as_deref().unwrap_or_default(), &path, &method).await
{
if let Some(rewritten) = rewrite_uri_for_virtual_host(req.uri(), &bucket) {
req.extensions_mut()
.insert(OriginalCanonicalPath(path.clone()));
*req.uri_mut() = rewritten;
req.uri().path().to_string()
} else {
path.clone()
}
} else {
path.clone()
};
match try_auth(&state, &req) {
AuthResult::NoAuth => match authorize_request(
&state,
None,
&method,
&path,
&auth_path,
&query,
copy_source.as_deref(),
)
.await
{
Ok(()) => next.run(req).await,
Err(err) => error_response(err, &path),
Err(err) => error_response(err, &auth_path),
},
AuthResult::Ok(principal) => {
if let Err(err) = authorize_request(
&state,
Some(&principal),
&method,
&path,
&auth_path,
&query,
copy_source.as_deref(),
)
.await
{
error_response(err, &path)
error_response(err, &auth_path)
} else {
req.extensions_mut().insert(principal);
next.run(req).await
}
}
AuthResult::Denied(err) => error_response(err, &path),
AuthResult::Denied(err) => error_response(err, &auth_path),
}
};
@@ -1078,7 +1157,7 @@ fn verify_sigv4_header(state: &AppState, req: &Request, auth_str: &str) -> AuthR
};
let method = req.method().as_str();
let canonical_uri = req.uri().path();
let canonical_uri = sigv4_canonical_path(req);
let query_params = parse_query_params(req.uri().query().unwrap_or(""));
@@ -1234,7 +1313,7 @@ fn verify_sigv4_query(state: &AppState, req: &Request) -> AuthResult {
};
let method = req.method().as_str();
let canonical_uri = req.uri().path();
let canonical_uri = sigv4_canonical_path(req);
let query_params_no_sig: Vec<(String, String)> = params
.iter()

View File

@@ -2121,6 +2121,445 @@ async fn test_bucket_versioning() {
assert!(body.contains("<Status>Enabled</Status>"));
}
#[tokio::test]
async fn test_versioned_object_can_be_read_and_deleted_by_version_id() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket",
Body::empty(),
))
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/versions-bucket?versioning")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from(
"<VersioningConfiguration><Status>Enabled</Status></VersioningConfiguration>",
))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("first"),
))
.await
.unwrap();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("second"),
))
.await
.unwrap();
let list_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/versions-bucket?versions",
Body::empty(),
))
.await
.unwrap();
assert_eq!(list_resp.status(), StatusCode::OK);
let list_body = String::from_utf8(
list_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
let archived_version_id = list_body
.split("<VersionId>")
.filter_map(|part| part.split_once("</VersionId>").map(|(id, _)| id))
.find(|id| *id != "null")
.expect("archived version id")
.to_string();
let version_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(version_resp.status(), StatusCode::OK);
assert_eq!(
version_resp.headers()["x-amz-version-id"].to_str().unwrap(),
archived_version_id
);
let version_body = version_resp.into_body().collect().await.unwrap().to_bytes();
assert_eq!(&version_body[..], b"first");
let traversal_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
&format!(
"/versions-bucket/doc.txt?versionId=../other/{}",
archived_version_id
),
Body::empty(),
))
.await
.unwrap();
assert_eq!(traversal_resp.status(), StatusCode::NOT_FOUND);
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("third"),
))
.await
.unwrap();
let limited_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/versions-bucket?versions&max-keys=1",
Body::empty(),
))
.await
.unwrap();
assert_eq!(limited_resp.status(), StatusCode::OK);
let limited_body = String::from_utf8(
limited_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert_eq!(limited_body.matches("<Version>").count(), 1);
assert!(limited_body.contains("<IsTruncated>true</IsTruncated>"));
let delete_resp = app
.clone()
.oneshot(signed_request(
Method::DELETE,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(delete_resp.status(), StatusCode::NO_CONTENT);
let missing_resp = app
.oneshot(signed_request(
Method::GET,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(missing_resp.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn test_retention_is_enforced_when_deleting_archived_version() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/locked-versions",
Body::empty(),
))
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions?versioning")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from(
"<VersioningConfiguration><Status>Enabled</Status></VersioningConfiguration>",
))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions/doc.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-object-lock-mode", "GOVERNANCE")
.header(
"x-amz-object-lock-retain-until-date",
"2099-01-01T00:00:00Z",
)
.body(Body::from("locked"))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions/doc.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-bypass-governance-retention", "true")
.body(Body::from("replacement"))
.unwrap(),
)
.await
.unwrap();
let list_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/locked-versions?versions",
Body::empty(),
))
.await
.unwrap();
assert_eq!(list_resp.status(), StatusCode::OK);
let list_body = String::from_utf8(
list_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
let archived_version_id = list_body
.split("<VersionId>")
.filter_map(|part| part.split_once("</VersionId>").map(|(id, _)| id))
.find(|id| *id != "null")
.expect("archived version id")
.to_string();
let denied = app
.clone()
.oneshot(signed_request(
Method::DELETE,
&format!("/locked-versions/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(denied.status(), StatusCode::FORBIDDEN);
let allowed = app
.oneshot(
Request::builder()
.method(Method::DELETE)
.uri(format!(
"/locked-versions/doc.txt?versionId={}",
archived_version_id
))
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-bypass-governance-retention", "true")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(allowed.status(), StatusCode::NO_CONTENT);
}
#[tokio::test]
async fn test_put_object_validates_content_md5() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(Method::PUT, "/md5-bucket", Body::empty()))
.await
.unwrap();
let bad_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/md5-bucket/object.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("content-md5", "AAAAAAAAAAAAAAAAAAAAAA==")
.body(Body::from("hello"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(bad_resp.status(), StatusCode::BAD_REQUEST);
let bad_body = String::from_utf8(
bad_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert!(bad_body.contains("<Code>BadDigest</Code>"));
let good_resp = app
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/md5-bucket/object.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("content-md5", "XUFAKrxLKna5cZ2REBfFkg==")
.body(Body::from("hello"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(good_resp.status(), StatusCode::OK);
}
#[tokio::test]
async fn test_put_object_tagging_and_standard_headers_are_persisted() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/headers-bucket",
Body::empty(),
))
.await
.unwrap();
let put_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/headers-bucket/report.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-tagging", "env=prod&name=quarter%201")
.header("cache-control", "max-age=60")
.header("content-disposition", "attachment")
.header("content-language", "en-US")
.header("x-amz-storage-class", "STANDARD_IA")
.body(Body::from("report"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(put_resp.status(), StatusCode::OK);
let head_resp = app
.clone()
.oneshot(signed_request(
Method::HEAD,
"/headers-bucket/report.txt",
Body::empty(),
))
.await
.unwrap();
assert_eq!(head_resp.status(), StatusCode::OK);
assert_eq!(head_resp.headers()["cache-control"], "max-age=60");
assert_eq!(head_resp.headers()["content-disposition"], "attachment");
assert_eq!(head_resp.headers()["content-language"], "en-US");
assert_eq!(head_resp.headers()["x-amz-storage-class"], "STANDARD_IA");
let tags_resp = app
.oneshot(signed_request(
Method::GET,
"/headers-bucket/report.txt?tagging",
Body::empty(),
))
.await
.unwrap();
assert_eq!(tags_resp.status(), StatusCode::OK);
let tags_body = String::from_utf8(
tags_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert!(tags_body.contains("<Key>env</Key>"));
assert!(tags_body.contains("<Value>prod</Value>"));
assert!(tags_body.contains("<Key>name</Key>"));
assert!(tags_body.contains("<Value>quarter 1</Value>"));
}
#[tokio::test]
async fn test_virtual_host_bucket_routes_to_s3_object_handlers() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(Method::PUT, "/vh-bucket", Body::empty()))
.await
.unwrap();
let put_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/hello.txt")
.header("host", "vh-bucket.localhost")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from("virtual host body"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(put_resp.status(), StatusCode::OK);
let get_resp = app
.oneshot(
Request::builder()
.method(Method::GET)
.uri("/hello.txt")
.header("host", "vh-bucket.localhost")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(get_resp.status(), StatusCode::OK);
let body = get_resp.into_body().collect().await.unwrap().to_bytes();
assert_eq!(&body[..], b"virtual host body");
}
#[tokio::test]
async fn test_bucket_tagging() {
let (app, _tmp) = test_app();
@@ -3323,7 +3762,7 @@ async fn test_static_website_default_404_returns_html_body() {
)
.unwrap();
assert_eq!(body.len(), content_length);
assert_eq!(body, "404 page not found");
assert_eq!(body, "<h1>404 page not found</h1>");
let head_resp = app
.oneshot(website_request(Method::HEAD, "/missing.html"))

View File

@@ -11,6 +11,12 @@ pub enum StorageError {
BucketNotEmpty(String),
#[error("Object not found: {bucket}/{key}")]
ObjectNotFound { bucket: String, key: String },
#[error("Object version not found: {bucket}/{key}?versionId={version_id}")]
VersionNotFound {
bucket: String,
key: String,
version_id: String,
},
#[error("Invalid bucket name: {0}")]
InvalidBucketName(String),
#[error("Invalid object key: {0}")]
@@ -46,6 +52,12 @@ impl From<StorageError> for S3Error {
S3Error::from_code(S3ErrorCode::NoSuchKey)
.with_resource(format!("/{}/{}", bucket, key))
}
StorageError::VersionNotFound {
bucket,
key,
version_id,
} => S3Error::from_code(S3ErrorCode::NoSuchVersion)
.with_resource(format!("/{}/{}?versionId={}", bucket, key, version_id)),
StorageError::InvalidBucketName(msg) => {
S3Error::new(S3ErrorCode::InvalidBucketName, msg)
}

View File

@@ -605,6 +605,144 @@ impl FsStorageBackend {
Ok(source_size)
}
fn version_record_paths(
&self,
bucket_name: &str,
key: &str,
version_id: &str,
) -> (PathBuf, PathBuf) {
let version_dir = self.version_dir(bucket_name, key);
(
version_dir.join(format!("{}.json", version_id)),
version_dir.join(format!("{}.bin", version_id)),
)
}
fn validate_version_id(bucket_name: &str, key: &str, version_id: &str) -> StorageResult<()> {
if version_id.is_empty()
|| version_id.contains('/')
|| version_id.contains('\\')
|| version_id.contains("..")
{
return Err(StorageError::VersionNotFound {
bucket: bucket_name.to_string(),
key: key.to_string(),
version_id: version_id.to_string(),
});
}
Ok(())
}
fn read_version_record_sync(
&self,
bucket_name: &str,
key: &str,
version_id: &str,
) -> StorageResult<(Value, PathBuf)> {
self.require_bucket(bucket_name)?;
self.validate_key(key)?;
Self::validate_version_id(bucket_name, key, version_id)?;
let (manifest_path, data_path) = self.version_record_paths(bucket_name, key, version_id);
if !manifest_path.is_file() || !data_path.is_file() {
return Err(StorageError::VersionNotFound {
bucket: bucket_name.to_string(),
key: key.to_string(),
version_id: version_id.to_string(),
});
}
let content = std::fs::read_to_string(&manifest_path).map_err(StorageError::Io)?;
let record = serde_json::from_str::<Value>(&content).map_err(StorageError::Json)?;
Ok((record, data_path))
}
fn version_metadata_from_record(record: &Value) -> HashMap<String, String> {
record
.get("metadata")
.and_then(Value::as_object)
.map(|meta| {
meta.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect::<HashMap<String, String>>()
})
.unwrap_or_default()
}
fn object_meta_from_version_record(
&self,
key: &str,
record: &Value,
data_path: &Path,
) -> StorageResult<ObjectMeta> {
let metadata = Self::version_metadata_from_record(record);
let data_len = std::fs::metadata(data_path)
.map(|meta| meta.len())
.unwrap_or_default();
let size = record
.get("size")
.and_then(Value::as_u64)
.unwrap_or(data_len);
let last_modified = record
.get("archived_at")
.and_then(Value::as_str)
.and_then(|value| DateTime::parse_from_rfc3339(value).ok())
.map(|value| value.with_timezone(&Utc))
.unwrap_or_else(Utc::now);
let etag = record
.get("etag")
.and_then(Value::as_str)
.map(ToOwned::to_owned)
.or_else(|| metadata.get("__etag__").cloned());
let mut obj = ObjectMeta::new(key.to_string(), size, last_modified);
obj.etag = etag;
obj.content_type = metadata.get("__content_type__").cloned();
obj.storage_class = metadata
.get("__storage_class__")
.cloned()
.or_else(|| Some("STANDARD".to_string()));
obj.metadata = metadata
.into_iter()
.filter(|(k, _)| !k.starts_with("__"))
.collect();
Ok(obj)
}
fn version_info_from_record(&self, fallback_key: &str, record: &Value) -> VersionInfo {
let version_id = record
.get("version_id")
.and_then(Value::as_str)
.unwrap_or("")
.to_string();
let key = record
.get("key")
.and_then(Value::as_str)
.unwrap_or(fallback_key)
.to_string();
let size = record.get("size").and_then(Value::as_u64).unwrap_or(0);
let archived_at = record
.get("archived_at")
.and_then(Value::as_str)
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|d| d.with_timezone(&Utc))
.unwrap_or_else(Utc::now);
let etag = record
.get("etag")
.and_then(Value::as_str)
.map(|s| s.to_string());
VersionInfo {
version_id,
key,
size,
last_modified: archived_at,
etag,
is_latest: false,
is_delete_marker: false,
}
}
fn bucket_stats_sync(&self, bucket_name: &str) -> StorageResult<BucketStats> {
let bucket_path = self.require_bucket(bucket_name)?;
@@ -1241,6 +1379,10 @@ impl crate::traits::StorageEngine for FsStorageBackend {
let mut obj = ObjectMeta::new(key.to_string(), meta.len(), lm);
obj.etag = stored_meta.get("__etag__").cloned();
obj.content_type = stored_meta.get("__content_type__").cloned();
obj.storage_class = stored_meta
.get("__storage_class__")
.cloned()
.or_else(|| Some("STANDARD".to_string()));
obj.metadata = stored_meta
.into_iter()
.filter(|(k, _)| !k.starts_with("__"))
@@ -1289,6 +1431,10 @@ impl crate::traits::StorageEngine for FsStorageBackend {
let mut obj = ObjectMeta::new(key.to_string(), meta.len(), lm);
obj.etag = stored_meta.get("__etag__").cloned();
obj.content_type = stored_meta.get("__content_type__").cloned();
obj.storage_class = stored_meta
.get("__storage_class__")
.cloned()
.or_else(|| Some("STANDARD".to_string()));
obj.metadata = stored_meta
.into_iter()
.filter(|(k, _)| !k.starts_with("__"))
@@ -1296,6 +1442,51 @@ impl crate::traits::StorageEngine for FsStorageBackend {
Ok(obj)
}
async fn get_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<(ObjectMeta, AsyncReadStream)> {
let (record, data_path) = self.read_version_record_sync(bucket, key, version_id)?;
let obj = self.object_meta_from_version_record(key, &record, &data_path)?;
let file = tokio::fs::File::open(&data_path)
.await
.map_err(StorageError::Io)?;
let stream: AsyncReadStream = Box::pin(file);
Ok((obj, stream))
}
async fn get_object_version_path(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<PathBuf> {
let (_record, data_path) = self.read_version_record_sync(bucket, key, version_id)?;
Ok(data_path)
}
async fn head_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<ObjectMeta> {
let (record, data_path) = self.read_version_record_sync(bucket, key, version_id)?;
self.object_meta_from_version_record(key, &record, &data_path)
}
async fn get_object_version_metadata(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<HashMap<String, String>> {
let (record, _data_path) = self.read_version_record_sync(bucket, key, version_id)?;
Ok(Self::version_metadata_from_record(&record))
}
async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> {
let bucket_path = self.require_bucket(bucket)?;
let path = self.object_path(bucket, key)?;
@@ -1317,6 +1508,32 @@ impl crate::traits::StorageEngine for FsStorageBackend {
Ok(())
}
async fn delete_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<()> {
self.require_bucket(bucket)?;
self.validate_key(key)?;
Self::validate_version_id(bucket, key, version_id)?;
let (manifest_path, data_path) = self.version_record_paths(bucket, key, version_id);
if !manifest_path.is_file() && !data_path.is_file() {
return Err(StorageError::VersionNotFound {
bucket: bucket.to_string(),
key: key.to_string(),
version_id: version_id.to_string(),
});
}
Self::safe_unlink(&data_path).map_err(StorageError::Io)?;
Self::safe_unlink(&manifest_path).map_err(StorageError::Io)?;
let versions_root = self.bucket_versions_root(bucket);
Self::cleanup_empty_parents(&manifest_path, &versions_root);
self.stats_cache.remove(bucket);
Ok(())
}
async fn copy_object(
&self,
src_bucket: &str,
@@ -1817,40 +2034,73 @@ impl crate::traits::StorageEngine for FsStorageBackend {
}
if let Ok(content) = std::fs::read_to_string(entry.path()) {
if let Ok(record) = serde_json::from_str::<Value>(&content) {
let version_id = record
.get("version_id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let size = record.get("size").and_then(|v| v.as_u64()).unwrap_or(0);
let archived_at = record
.get("archived_at")
.and_then(|v| v.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|d| d.with_timezone(&Utc))
.unwrap_or_else(Utc::now);
let etag = record
.get("etag")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
versions.push(VersionInfo {
version_id,
key: key.to_string(),
size,
last_modified: archived_at,
etag,
is_latest: false,
});
versions.push(self.version_info_from_record(key, &record));
}
}
}
versions.sort_by(|a, b| b.last_modified.cmp(&a.last_modified));
if let Some(first) = versions.first_mut() {
first.is_latest = true;
Ok(versions)
}
async fn list_bucket_object_versions(
&self,
bucket: &str,
prefix: Option<&str>,
) -> StorageResult<Vec<VersionInfo>> {
self.require_bucket(bucket)?;
let root = self.bucket_versions_root(bucket);
if !root.exists() {
return Ok(Vec::new());
}
let mut versions = Vec::new();
let mut stack = vec![root.clone()];
while let Some(current) = stack.pop() {
let entries = match std::fs::read_dir(&current) {
Ok(entries) => entries,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() {
stack.push(path);
continue;
}
if !ft.is_file() || path.extension().and_then(|ext| ext.to_str()) != Some("json") {
continue;
}
let content = match std::fs::read_to_string(&path) {
Ok(content) => content,
Err(_) => continue,
};
let record = match serde_json::from_str::<Value>(&content) {
Ok(record) => record,
Err(_) => continue,
};
let fallback_key = path
.parent()
.and_then(|parent| parent.strip_prefix(&root).ok())
.map(|rel| rel.to_string_lossy().replace('\\', "/"))
.unwrap_or_default();
let info = self.version_info_from_record(&fallback_key, &record);
if prefix.is_some_and(|value| !info.key.starts_with(value)) {
continue;
}
versions.push(info);
}
}
versions.sort_by(|a, b| {
a.key
.cmp(&b.key)
.then_with(|| b.last_modified.cmp(&a.last_modified))
});
Ok(versions)
}
@@ -2271,6 +2521,12 @@ mod tests {
.unwrap();
assert_eq!(versions.len(), 1);
assert_eq!(versions[0].size, 8);
let invalid_version = format!("../other/{}", versions[0].version_id);
let result = backend
.get_object_version("test-bucket", "file.txt", &invalid_version)
.await;
assert!(matches!(result, Err(StorageError::VersionNotFound { .. })));
}
#[tokio::test]

View File

@@ -34,8 +34,43 @@ pub trait StorageEngine: Send + Sync {
async fn head_object(&self, bucket: &str, key: &str) -> StorageResult<ObjectMeta>;
async fn get_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<(ObjectMeta, AsyncReadStream)>;
async fn get_object_version_path(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<PathBuf>;
async fn head_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<ObjectMeta>;
async fn get_object_version_metadata(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<HashMap<String, String>>;
async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()>;
async fn delete_object_version(
&self,
bucket: &str,
key: &str,
version_id: &str,
) -> StorageResult<()>;
async fn copy_object(
&self,
src_bucket: &str,
@@ -120,6 +155,12 @@ pub trait StorageEngine: Send + Sync {
key: &str,
) -> StorageResult<Vec<VersionInfo>>;
async fn list_bucket_object_versions(
&self,
bucket: &str,
prefix: Option<&str>,
) -> StorageResult<Vec<VersionInfo>>;
async fn get_object_tags(&self, bucket: &str, key: &str) -> StorageResult<Vec<Tag>>;
async fn set_object_tags(&self, bucket: &str, key: &str, tags: &[Tag]) -> StorageResult<()>;