Applied max-keys to combined current + archived ListObjectVersions output and reports truncation

This commit is contained in:
2026-04-22 00:12:22 +08:00
parent 8935188c8f
commit 9ec5797919
19 changed files with 1750 additions and 117 deletions

View File

@@ -10,6 +10,7 @@ myfsio-crypto = { path = "../myfsio-crypto" }
myfsio-storage = { path = "../myfsio-storage" }
myfsio-xml = { path = "../myfsio-xml" }
base64 = { workspace = true }
md-5 = { workspace = true }
axum = { workspace = true }
tokio = { workspace = true }
tower = { workspace = true }
@@ -29,6 +30,7 @@ percent-encoding = { workspace = true }
quick-xml = { workspace = true }
mime_guess = "2"
crc32fast = { workspace = true }
sha2 = { workspace = true }
duckdb = { workspace = true }
roxmltree = "0.20"
parking_lot = { workspace = true }

View File

@@ -1038,7 +1038,12 @@ fn s3_error_response(code: S3ErrorCode, message: &str, status: StatusCode) -> Re
(status, [("content-type", "application/xml")], err.to_xml()).into_response()
}
pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
pub async fn list_object_versions(
state: &AppState,
bucket: &str,
prefix: Option<&str>,
max_keys: usize,
) -> Response {
match state.storage.list_buckets().await {
Ok(buckets) => {
if !buckets.iter().any(|b| b.name == bucket) {
@@ -1050,13 +1055,24 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
Err(e) => return storage_err(e),
}
let fetch_limit = max_keys.saturating_add(1).max(1);
let params = myfsio_common::types::ListParams {
max_keys: 1000,
max_keys: fetch_limit,
prefix: prefix.map(ToOwned::to_owned),
..Default::default()
};
let objects = match state.storage.list_objects(bucket, &params).await {
Ok(result) => result.objects,
let object_result = match state.storage.list_objects(bucket, &params).await {
Ok(result) => result,
Err(e) => return storage_err(e),
};
let objects = object_result.objects;
let archived_versions = match state
.storage
.list_bucket_object_versions(bucket, prefix)
.await
{
Ok(versions) => versions,
Err(e) => return storage_err(e),
};
@@ -1064,11 +1080,24 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
<ListVersionsResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">",
);
xml.push_str(&format!("<Name>{}</Name>", bucket));
xml.push_str(&format!("<Name>{}</Name>", xml_escape(bucket)));
xml.push_str(&format!(
"<Prefix>{}</Prefix>",
xml_escape(prefix.unwrap_or(""))
));
xml.push_str(&format!("<MaxKeys>{}</MaxKeys>", max_keys));
for obj in &objects {
let current_count = objects.len().min(max_keys);
let remaining = max_keys.saturating_sub(current_count);
let archived_count = archived_versions.len().min(remaining);
let is_truncated = object_result.is_truncated
|| objects.len() > current_count
|| archived_versions.len() > archived_count;
xml.push_str(&format!("<IsTruncated>{}</IsTruncated>", is_truncated));
for obj in objects.iter().take(current_count) {
xml.push_str("<Version>");
xml.push_str(&format!("<Key>{}</Key>", obj.key));
xml.push_str(&format!("<Key>{}</Key>", xml_escape(&obj.key)));
xml.push_str("<VersionId>null</VersionId>");
xml.push_str("<IsLatest>true</IsLatest>");
xml.push_str(&format!(
@@ -1076,9 +1105,32 @@ pub async fn list_object_versions(state: &AppState, bucket: &str) -> Response {
myfsio_xml::response::format_s3_datetime(&obj.last_modified)
));
if let Some(ref etag) = obj.etag {
xml.push_str(&format!("<ETag>\"{}\"</ETag>", etag));
xml.push_str(&format!("<ETag>\"{}\"</ETag>", xml_escape(etag)));
}
xml.push_str(&format!("<Size>{}</Size>", obj.size));
xml.push_str(&format!(
"<StorageClass>{}</StorageClass>",
xml_escape(obj.storage_class.as_deref().unwrap_or("STANDARD"))
));
xml.push_str("</Version>");
}
for version in archived_versions.iter().take(archived_count) {
xml.push_str("<Version>");
xml.push_str(&format!("<Key>{}</Key>", xml_escape(&version.key)));
xml.push_str(&format!(
"<VersionId>{}</VersionId>",
xml_escape(&version.version_id)
));
xml.push_str("<IsLatest>false</IsLatest>");
xml.push_str(&format!(
"<LastModified>{}</LastModified>",
myfsio_xml::response::format_s3_datetime(&version.last_modified)
));
if let Some(ref etag) = version.etag {
xml.push_str(&format!("<ETag>\"{}\"</ETag>", xml_escape(etag)));
}
xml.push_str(&format!("<Size>{}</Size>", version.size));
xml.push_str("<StorageClass>STANDARD</StorageClass>");
xml.push_str("</Version>");
}

File diff suppressed because it is too large Load Diff

View File

@@ -117,6 +117,7 @@ fn storage_status(err: &StorageError) -> StatusCode {
match err {
StorageError::BucketNotFound(_)
| StorageError::ObjectNotFound { .. }
| StorageError::VersionNotFound { .. }
| StorageError::UploadNotFound(_) => StatusCode::NOT_FOUND,
StorageError::InvalidBucketName(_)
| StorageError::InvalidObjectKey(_)

View File

@@ -9,7 +9,7 @@ pub mod templates;
use axum::Router;
pub const SERVER_HEADER: &str = "MyFSIO";
pub const SERVER_HEADER: &str = concat!("MyFSIO-Rust/", env!("CARGO_PKG_VERSION"));
pub fn create_ui_router(state: state::AppState) -> Router {
use axum::routing::{delete, get, post, put};

View File

@@ -1,5 +1,5 @@
use axum::extract::{Request, State};
use axum::http::{header, HeaderMap, Method, StatusCode};
use axum::http::{header, HeaderMap, Method, StatusCode, Uri};
use axum::middleware::Next;
use axum::response::{IntoResponse, Response};
@@ -15,6 +15,9 @@ use tokio::io::AsyncReadExt;
use crate::services::acl::acl_from_bucket_config;
use crate::state::AppState;
#[derive(Clone, Debug)]
struct OriginalCanonicalPath(String);
fn website_error_response(
status: StatusCode,
body: Option<Vec<u8>>,
@@ -45,7 +48,7 @@ fn website_error_response(
fn default_website_error_body(status: StatusCode) -> String {
let code = status.as_u16();
if status == StatusCode::NOT_FOUND {
"404 page not found".to_string()
"<h1>404 page not found</h1>".to_string()
} else {
let reason = status.canonical_reason().unwrap_or("Error");
format!("{code} {reason}")
@@ -324,6 +327,67 @@ async fn maybe_serve_website(
.await
}
fn virtual_host_candidate(host: &str) -> Option<String> {
let (candidate, _) = host.split_once('.')?;
if candidate.is_empty() || matches!(candidate, "www" | "s3" | "api" | "admin" | "kms") {
return None;
}
if myfsio_storage::validation::validate_bucket_name(candidate).is_some() {
return None;
}
Some(candidate.to_string())
}
async fn virtual_host_bucket(
state: &AppState,
host: &str,
path: &str,
method: &Method,
) -> Option<String> {
if path.starts_with("/ui")
|| path.starts_with("/admin")
|| path.starts_with("/kms")
|| path.starts_with("/myfsio")
{
return None;
}
let bucket = virtual_host_candidate(host)?;
if path == format!("/{}", bucket) || path.starts_with(&format!("/{}/", bucket)) {
return None;
}
match state.storage.bucket_exists(&bucket).await {
Ok(true) => Some(bucket),
Ok(false) if *method == Method::PUT && path == "/" => Some(bucket),
_ => None,
}
}
fn rewrite_uri_for_virtual_host(uri: &Uri, bucket: &str) -> Option<Uri> {
let path = uri.path();
let rewritten_path = if path == "/" {
format!("/{}/", bucket)
} else {
format!("/{}{}", bucket, path)
};
let path_and_query = match uri.query() {
Some(query) => format!("{}?{}", rewritten_path, query),
None => rewritten_path,
};
let mut parts = uri.clone().into_parts();
parts.path_and_query = Some(path_and_query.parse().ok()?);
Uri::from_parts(parts).ok()
}
fn sigv4_canonical_path(req: &Request) -> &str {
req.extensions()
.get::<OriginalCanonicalPath>()
.map(|path| path.0.as_str())
.unwrap_or_else(|| req.uri().path())
}
pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: Next) -> Response {
let start = Instant::now();
let uri = req.uri().clone();
@@ -360,7 +424,7 @@ pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: N
} else if let Some(response) = maybe_serve_website(
&state,
method.clone(),
host.unwrap_or_default(),
host.clone().unwrap_or_default(),
path.clone(),
range_header,
)
@@ -368,38 +432,53 @@ pub async fn auth_layer(State(state): State<AppState>, mut req: Request, next: N
{
response
} else {
let auth_path = if let Some(bucket) =
virtual_host_bucket(&state, host.as_deref().unwrap_or_default(), &path, &method).await
{
if let Some(rewritten) = rewrite_uri_for_virtual_host(req.uri(), &bucket) {
req.extensions_mut()
.insert(OriginalCanonicalPath(path.clone()));
*req.uri_mut() = rewritten;
req.uri().path().to_string()
} else {
path.clone()
}
} else {
path.clone()
};
match try_auth(&state, &req) {
AuthResult::NoAuth => match authorize_request(
&state,
None,
&method,
&path,
&auth_path,
&query,
copy_source.as_deref(),
)
.await
{
Ok(()) => next.run(req).await,
Err(err) => error_response(err, &path),
Err(err) => error_response(err, &auth_path),
},
AuthResult::Ok(principal) => {
if let Err(err) = authorize_request(
&state,
Some(&principal),
&method,
&path,
&auth_path,
&query,
copy_source.as_deref(),
)
.await
{
error_response(err, &path)
error_response(err, &auth_path)
} else {
req.extensions_mut().insert(principal);
next.run(req).await
}
}
AuthResult::Denied(err) => error_response(err, &path),
AuthResult::Denied(err) => error_response(err, &auth_path),
}
};
@@ -1078,7 +1157,7 @@ fn verify_sigv4_header(state: &AppState, req: &Request, auth_str: &str) -> AuthR
};
let method = req.method().as_str();
let canonical_uri = req.uri().path();
let canonical_uri = sigv4_canonical_path(req);
let query_params = parse_query_params(req.uri().query().unwrap_or(""));
@@ -1234,7 +1313,7 @@ fn verify_sigv4_query(state: &AppState, req: &Request) -> AuthResult {
};
let method = req.method().as_str();
let canonical_uri = req.uri().path();
let canonical_uri = sigv4_canonical_path(req);
let query_params_no_sig: Vec<(String, String)> = params
.iter()

View File

@@ -2121,6 +2121,445 @@ async fn test_bucket_versioning() {
assert!(body.contains("<Status>Enabled</Status>"));
}
#[tokio::test]
async fn test_versioned_object_can_be_read_and_deleted_by_version_id() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket",
Body::empty(),
))
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/versions-bucket?versioning")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from(
"<VersioningConfiguration><Status>Enabled</Status></VersioningConfiguration>",
))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("first"),
))
.await
.unwrap();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("second"),
))
.await
.unwrap();
let list_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/versions-bucket?versions",
Body::empty(),
))
.await
.unwrap();
assert_eq!(list_resp.status(), StatusCode::OK);
let list_body = String::from_utf8(
list_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
let archived_version_id = list_body
.split("<VersionId>")
.filter_map(|part| part.split_once("</VersionId>").map(|(id, _)| id))
.find(|id| *id != "null")
.expect("archived version id")
.to_string();
let version_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(version_resp.status(), StatusCode::OK);
assert_eq!(
version_resp.headers()["x-amz-version-id"].to_str().unwrap(),
archived_version_id
);
let version_body = version_resp.into_body().collect().await.unwrap().to_bytes();
assert_eq!(&version_body[..], b"first");
let traversal_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
&format!(
"/versions-bucket/doc.txt?versionId=../other/{}",
archived_version_id
),
Body::empty(),
))
.await
.unwrap();
assert_eq!(traversal_resp.status(), StatusCode::NOT_FOUND);
app.clone()
.oneshot(signed_request(
Method::PUT,
"/versions-bucket/doc.txt",
Body::from("third"),
))
.await
.unwrap();
let limited_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/versions-bucket?versions&max-keys=1",
Body::empty(),
))
.await
.unwrap();
assert_eq!(limited_resp.status(), StatusCode::OK);
let limited_body = String::from_utf8(
limited_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert_eq!(limited_body.matches("<Version>").count(), 1);
assert!(limited_body.contains("<IsTruncated>true</IsTruncated>"));
let delete_resp = app
.clone()
.oneshot(signed_request(
Method::DELETE,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(delete_resp.status(), StatusCode::NO_CONTENT);
let missing_resp = app
.oneshot(signed_request(
Method::GET,
&format!("/versions-bucket/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(missing_resp.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn test_retention_is_enforced_when_deleting_archived_version() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/locked-versions",
Body::empty(),
))
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions?versioning")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from(
"<VersioningConfiguration><Status>Enabled</Status></VersioningConfiguration>",
))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions/doc.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-object-lock-mode", "GOVERNANCE")
.header(
"x-amz-object-lock-retain-until-date",
"2099-01-01T00:00:00Z",
)
.body(Body::from("locked"))
.unwrap(),
)
.await
.unwrap();
app.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/locked-versions/doc.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-bypass-governance-retention", "true")
.body(Body::from("replacement"))
.unwrap(),
)
.await
.unwrap();
let list_resp = app
.clone()
.oneshot(signed_request(
Method::GET,
"/locked-versions?versions",
Body::empty(),
))
.await
.unwrap();
assert_eq!(list_resp.status(), StatusCode::OK);
let list_body = String::from_utf8(
list_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
let archived_version_id = list_body
.split("<VersionId>")
.filter_map(|part| part.split_once("</VersionId>").map(|(id, _)| id))
.find(|id| *id != "null")
.expect("archived version id")
.to_string();
let denied = app
.clone()
.oneshot(signed_request(
Method::DELETE,
&format!("/locked-versions/doc.txt?versionId={}", archived_version_id),
Body::empty(),
))
.await
.unwrap();
assert_eq!(denied.status(), StatusCode::FORBIDDEN);
let allowed = app
.oneshot(
Request::builder()
.method(Method::DELETE)
.uri(format!(
"/locked-versions/doc.txt?versionId={}",
archived_version_id
))
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-bypass-governance-retention", "true")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(allowed.status(), StatusCode::NO_CONTENT);
}
#[tokio::test]
async fn test_put_object_validates_content_md5() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(Method::PUT, "/md5-bucket", Body::empty()))
.await
.unwrap();
let bad_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/md5-bucket/object.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("content-md5", "AAAAAAAAAAAAAAAAAAAAAA==")
.body(Body::from("hello"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(bad_resp.status(), StatusCode::BAD_REQUEST);
let bad_body = String::from_utf8(
bad_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert!(bad_body.contains("<Code>BadDigest</Code>"));
let good_resp = app
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/md5-bucket/object.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("content-md5", "XUFAKrxLKna5cZ2REBfFkg==")
.body(Body::from("hello"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(good_resp.status(), StatusCode::OK);
}
#[tokio::test]
async fn test_put_object_tagging_and_standard_headers_are_persisted() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(
Method::PUT,
"/headers-bucket",
Body::empty(),
))
.await
.unwrap();
let put_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/headers-bucket/report.txt")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.header("x-amz-tagging", "env=prod&name=quarter%201")
.header("cache-control", "max-age=60")
.header("content-disposition", "attachment")
.header("content-language", "en-US")
.header("x-amz-storage-class", "STANDARD_IA")
.body(Body::from("report"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(put_resp.status(), StatusCode::OK);
let head_resp = app
.clone()
.oneshot(signed_request(
Method::HEAD,
"/headers-bucket/report.txt",
Body::empty(),
))
.await
.unwrap();
assert_eq!(head_resp.status(), StatusCode::OK);
assert_eq!(head_resp.headers()["cache-control"], "max-age=60");
assert_eq!(head_resp.headers()["content-disposition"], "attachment");
assert_eq!(head_resp.headers()["content-language"], "en-US");
assert_eq!(head_resp.headers()["x-amz-storage-class"], "STANDARD_IA");
let tags_resp = app
.oneshot(signed_request(
Method::GET,
"/headers-bucket/report.txt?tagging",
Body::empty(),
))
.await
.unwrap();
assert_eq!(tags_resp.status(), StatusCode::OK);
let tags_body = String::from_utf8(
tags_resp
.into_body()
.collect()
.await
.unwrap()
.to_bytes()
.to_vec(),
)
.unwrap();
assert!(tags_body.contains("<Key>env</Key>"));
assert!(tags_body.contains("<Value>prod</Value>"));
assert!(tags_body.contains("<Key>name</Key>"));
assert!(tags_body.contains("<Value>quarter 1</Value>"));
}
#[tokio::test]
async fn test_virtual_host_bucket_routes_to_s3_object_handlers() {
let (app, _tmp) = test_app();
app.clone()
.oneshot(signed_request(Method::PUT, "/vh-bucket", Body::empty()))
.await
.unwrap();
let put_resp = app
.clone()
.oneshot(
Request::builder()
.method(Method::PUT)
.uri("/hello.txt")
.header("host", "vh-bucket.localhost")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::from("virtual host body"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(put_resp.status(), StatusCode::OK);
let get_resp = app
.oneshot(
Request::builder()
.method(Method::GET)
.uri("/hello.txt")
.header("host", "vh-bucket.localhost")
.header("x-access-key", TEST_ACCESS_KEY)
.header("x-secret-key", TEST_SECRET_KEY)
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(get_resp.status(), StatusCode::OK);
let body = get_resp.into_body().collect().await.unwrap().to_bytes();
assert_eq!(&body[..], b"virtual host body");
}
#[tokio::test]
async fn test_bucket_tagging() {
let (app, _tmp) = test_app();
@@ -3323,7 +3762,7 @@ async fn test_static_website_default_404_returns_html_body() {
)
.unwrap();
assert_eq!(body.len(), content_length);
assert_eq!(body, "404 page not found");
assert_eq!(body, "<h1>404 page not found</h1>");
let head_resp = app
.oneshot(website_request(Method::HEAD, "/missing.html"))