Separate Python and Rust into python/ and rust/ with per-stack Dockerfiles

This commit is contained in:
2026-04-19 14:01:05 +08:00
parent be8e030940
commit c2ef37b84e
184 changed files with 96 additions and 85 deletions

View File

@@ -0,0 +1,26 @@
[package]
name = "myfsio-storage"
version = "0.1.0"
edition = "2021"
[dependencies]
myfsio-common = { path = "../myfsio-common" }
myfsio-crypto = { path = "../myfsio-crypto" }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
dashmap = { workspace = true }
parking_lot = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
regex = { workspace = true }
unicode-normalization = { workspace = true }
md-5 = { workspace = true }
sha2 = { workspace = true }
hex = { workspace = true }
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
tempfile = "3"

View File

@@ -0,0 +1,62 @@
use myfsio_common::error::{S3Error, S3ErrorCode};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum StorageError {
#[error("Bucket not found: {0}")]
BucketNotFound(String),
#[error("Bucket already exists: {0}")]
BucketAlreadyExists(String),
#[error("Bucket not empty: {0}")]
BucketNotEmpty(String),
#[error("Object not found: {bucket}/{key}")]
ObjectNotFound { bucket: String, key: String },
#[error("Invalid bucket name: {0}")]
InvalidBucketName(String),
#[error("Invalid object key: {0}")]
InvalidObjectKey(String),
#[error("Upload not found: {0}")]
UploadNotFound(String),
#[error("Quota exceeded: {0}")]
QuotaExceeded(String),
#[error("Invalid range")]
InvalidRange,
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("Internal error: {0}")]
Internal(String),
}
impl From<StorageError> for S3Error {
fn from(err: StorageError) -> Self {
match err {
StorageError::BucketNotFound(name) => {
S3Error::from_code(S3ErrorCode::NoSuchBucket).with_resource(format!("/{}", name))
}
StorageError::BucketAlreadyExists(name) => {
S3Error::from_code(S3ErrorCode::BucketAlreadyExists)
.with_resource(format!("/{}", name))
}
StorageError::BucketNotEmpty(name) => {
S3Error::from_code(S3ErrorCode::BucketNotEmpty)
.with_resource(format!("/{}", name))
}
StorageError::ObjectNotFound { bucket, key } => {
S3Error::from_code(S3ErrorCode::NoSuchKey)
.with_resource(format!("/{}/{}", bucket, key))
}
StorageError::InvalidBucketName(msg) => S3Error::new(S3ErrorCode::InvalidBucketName, msg),
StorageError::InvalidObjectKey(msg) => S3Error::new(S3ErrorCode::InvalidKey, msg),
StorageError::UploadNotFound(id) => {
S3Error::new(S3ErrorCode::NoSuchUpload, format!("Upload {} not found", id))
}
StorageError::QuotaExceeded(msg) => S3Error::new(S3ErrorCode::QuotaExceeded, msg),
StorageError::InvalidRange => S3Error::from_code(S3ErrorCode::InvalidRange),
StorageError::Io(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
StorageError::Json(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
StorageError::Internal(msg) => S3Error::new(S3ErrorCode::InternalError, msg),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
pub mod validation;
pub mod traits;
pub mod error;
pub mod fs_backend;

View File

@@ -0,0 +1,135 @@
use crate::error::StorageError;
use myfsio_common::types::*;
use std::collections::HashMap;
use std::path::PathBuf;
use std::pin::Pin;
use tokio::io::AsyncRead;
pub type StorageResult<T> = Result<T, StorageError>;
pub type AsyncReadStream = Pin<Box<dyn AsyncRead + Send>>;
#[allow(async_fn_in_trait)]
pub trait StorageEngine: Send + Sync {
async fn list_buckets(&self) -> StorageResult<Vec<BucketMeta>>;
async fn create_bucket(&self, name: &str) -> StorageResult<()>;
async fn delete_bucket(&self, name: &str) -> StorageResult<()>;
async fn bucket_exists(&self, name: &str) -> StorageResult<bool>;
async fn bucket_stats(&self, name: &str) -> StorageResult<BucketStats>;
async fn put_object(
&self,
bucket: &str,
key: &str,
stream: AsyncReadStream,
metadata: Option<HashMap<String, String>>,
) -> StorageResult<ObjectMeta>;
async fn get_object(&self, bucket: &str, key: &str) -> StorageResult<(ObjectMeta, AsyncReadStream)>;
async fn get_object_path(&self, bucket: &str, key: &str) -> StorageResult<PathBuf>;
async fn head_object(&self, bucket: &str, key: &str) -> StorageResult<ObjectMeta>;
async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()>;
async fn copy_object(
&self,
src_bucket: &str,
src_key: &str,
dst_bucket: &str,
dst_key: &str,
) -> StorageResult<ObjectMeta>;
async fn get_object_metadata(
&self,
bucket: &str,
key: &str,
) -> StorageResult<HashMap<String, String>>;
async fn put_object_metadata(
&self,
bucket: &str,
key: &str,
metadata: &HashMap<String, String>,
) -> StorageResult<()>;
async fn list_objects(&self, bucket: &str, params: &ListParams) -> StorageResult<ListObjectsResult>;
async fn list_objects_shallow(
&self,
bucket: &str,
params: &ShallowListParams,
) -> StorageResult<ShallowListResult>;
async fn initiate_multipart(
&self,
bucket: &str,
key: &str,
metadata: Option<HashMap<String, String>>,
) -> StorageResult<String>;
async fn upload_part(
&self,
bucket: &str,
upload_id: &str,
part_number: u32,
stream: AsyncReadStream,
) -> StorageResult<String>;
async fn upload_part_copy(
&self,
bucket: &str,
upload_id: &str,
part_number: u32,
src_bucket: &str,
src_key: &str,
range: Option<(u64, u64)>,
) -> StorageResult<(String, chrono::DateTime<chrono::Utc>)>;
async fn complete_multipart(
&self,
bucket: &str,
upload_id: &str,
parts: &[PartInfo],
) -> StorageResult<ObjectMeta>;
async fn abort_multipart(&self, bucket: &str, upload_id: &str) -> StorageResult<()>;
async fn list_parts(&self, bucket: &str, upload_id: &str) -> StorageResult<Vec<PartMeta>>;
async fn list_multipart_uploads(
&self,
bucket: &str,
) -> StorageResult<Vec<MultipartUploadInfo>>;
async fn get_bucket_config(&self, bucket: &str) -> StorageResult<BucketConfig>;
async fn set_bucket_config(&self, bucket: &str, config: &BucketConfig) -> StorageResult<()>;
async fn is_versioning_enabled(&self, bucket: &str) -> StorageResult<bool>;
async fn set_versioning(&self, bucket: &str, enabled: bool) -> StorageResult<()>;
async fn list_object_versions(
&self,
bucket: &str,
key: &str,
) -> StorageResult<Vec<VersionInfo>>;
async fn get_object_tags(
&self,
bucket: &str,
key: &str,
) -> StorageResult<Vec<Tag>>;
async fn set_object_tags(
&self,
bucket: &str,
key: &str,
tags: &[Tag],
) -> StorageResult<()>;
async fn delete_object_tags(
&self,
bucket: &str,
key: &str,
) -> StorageResult<()>;
}

View File

@@ -0,0 +1,194 @@
use std::sync::LazyLock;
use unicode_normalization::UnicodeNormalization;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
"LPT9",
];
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
const SYSTEM_ROOT: &str = ".myfsio.sys";
static IP_REGEX: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
pub fn validate_object_key(
object_key: &str,
max_length_bytes: usize,
is_windows: bool,
reserved_prefixes: Option<&[&str]>,
) -> Option<String> {
if object_key.is_empty() {
return Some("Object key required".to_string());
}
if object_key.contains('\0') {
return Some("Object key contains null bytes".to_string());
}
let normalized: String = object_key.nfc().collect();
if normalized.len() > max_length_bytes {
return Some(format!(
"Object key exceeds maximum length of {} bytes",
max_length_bytes
));
}
if normalized.starts_with('/') || normalized.starts_with('\\') {
return Some("Object key cannot start with a slash".to_string());
}
let parts: Vec<&str> = if cfg!(windows) || is_windows {
normalized.split(['/', '\\']).collect()
} else {
normalized.split('/').collect()
};
for part in &parts {
if part.is_empty() {
continue;
}
if *part == ".." {
return Some("Object key contains parent directory references".to_string());
}
if *part == "." {
return Some("Object key contains invalid segments".to_string());
}
if part.chars().any(|c| (c as u32) < 32) {
return Some("Object key contains control characters".to_string());
}
if is_windows {
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
return Some(
"Object key contains characters not supported on Windows filesystems"
.to_string(),
);
}
if part.ends_with(' ') || part.ends_with('.') {
return Some(
"Object key segments cannot end with spaces or periods on Windows".to_string(),
);
}
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
return Some(format!("Invalid filename segment: {}", part));
}
}
}
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
if let Some(top) = non_empty_parts.first() {
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
return Some("Object key uses a reserved prefix".to_string());
}
if let Some(prefixes) = reserved_prefixes {
for prefix in prefixes {
if *top == *prefix {
return Some("Object key uses a reserved prefix".to_string());
}
}
}
}
None
}
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
let len = bucket_name.len();
if len < 3 || len > 63 {
return Some("Bucket name must be between 3 and 63 characters".to_string());
}
let bytes = bucket_name.as_bytes();
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
for &b in bytes {
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
return Some(
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
.to_string(),
);
}
}
if bucket_name.contains("..") {
return Some("Bucket name must not contain consecutive periods".to_string());
}
if IP_REGEX.is_match(bucket_name) {
return Some("Bucket name must not be formatted as an IP address".to_string());
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_bucket_names() {
assert!(validate_bucket_name("my-bucket").is_none());
assert!(validate_bucket_name("test123").is_none());
assert!(validate_bucket_name("my.bucket.name").is_none());
}
#[test]
fn test_invalid_bucket_names() {
assert!(validate_bucket_name("ab").is_some());
assert!(validate_bucket_name("My-Bucket").is_some());
assert!(validate_bucket_name("-bucket").is_some());
assert!(validate_bucket_name("bucket-").is_some());
assert!(validate_bucket_name("my..bucket").is_some());
assert!(validate_bucket_name("192.168.1.1").is_some());
}
#[test]
fn test_valid_object_keys() {
assert!(validate_object_key("file.txt", 1024, false, None).is_none());
assert!(validate_object_key("path/to/file.txt", 1024, false, None).is_none());
assert!(validate_object_key("a", 1024, false, None).is_none());
}
#[test]
fn test_invalid_object_keys() {
assert!(validate_object_key("", 1024, false, None).is_some());
assert!(validate_object_key("/leading-slash", 1024, false, None).is_some());
assert!(validate_object_key("path/../escape", 1024, false, None).is_some());
assert!(validate_object_key(".myfsio.sys/secret", 1024, false, None).is_some());
assert!(validate_object_key(".meta/data", 1024, false, None).is_some());
}
#[test]
fn test_object_key_max_length() {
let long_key = "a".repeat(1025);
assert!(validate_object_key(&long_key, 1024, false, None).is_some());
let ok_key = "a".repeat(1024);
assert!(validate_object_key(&ok_key, 1024, false, None).is_none());
}
#[test]
fn test_windows_validation() {
assert!(validate_object_key("CON", 1024, true, None).is_some());
assert!(validate_object_key("file<name", 1024, true, None).is_some());
assert!(validate_object_key("file.txt ", 1024, true, None).is_some());
}
}