Separate Python and Rust into python/ and rust/ with per-stack Dockerfiles
This commit is contained in:
26
rust/myfsio-engine/crates/myfsio-storage/Cargo.toml
Normal file
26
rust/myfsio-engine/crates/myfsio-storage/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "myfsio-storage"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
myfsio-crypto = { path = "../myfsio-crypto" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
unicode-normalization = { workspace = true }
|
||||
md-5 = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
tempfile = "3"
|
||||
62
rust/myfsio-engine/crates/myfsio-storage/src/error.rs
Normal file
62
rust/myfsio-engine/crates/myfsio-storage/src/error.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use myfsio_common::error::{S3Error, S3ErrorCode};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum StorageError {
|
||||
#[error("Bucket not found: {0}")]
|
||||
BucketNotFound(String),
|
||||
#[error("Bucket already exists: {0}")]
|
||||
BucketAlreadyExists(String),
|
||||
#[error("Bucket not empty: {0}")]
|
||||
BucketNotEmpty(String),
|
||||
#[error("Object not found: {bucket}/{key}")]
|
||||
ObjectNotFound { bucket: String, key: String },
|
||||
#[error("Invalid bucket name: {0}")]
|
||||
InvalidBucketName(String),
|
||||
#[error("Invalid object key: {0}")]
|
||||
InvalidObjectKey(String),
|
||||
#[error("Upload not found: {0}")]
|
||||
UploadNotFound(String),
|
||||
#[error("Quota exceeded: {0}")]
|
||||
QuotaExceeded(String),
|
||||
#[error("Invalid range")]
|
||||
InvalidRange,
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("JSON error: {0}")]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl From<StorageError> for S3Error {
|
||||
fn from(err: StorageError) -> Self {
|
||||
match err {
|
||||
StorageError::BucketNotFound(name) => {
|
||||
S3Error::from_code(S3ErrorCode::NoSuchBucket).with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::BucketAlreadyExists(name) => {
|
||||
S3Error::from_code(S3ErrorCode::BucketAlreadyExists)
|
||||
.with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::BucketNotEmpty(name) => {
|
||||
S3Error::from_code(S3ErrorCode::BucketNotEmpty)
|
||||
.with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::ObjectNotFound { bucket, key } => {
|
||||
S3Error::from_code(S3ErrorCode::NoSuchKey)
|
||||
.with_resource(format!("/{}/{}", bucket, key))
|
||||
}
|
||||
StorageError::InvalidBucketName(msg) => S3Error::new(S3ErrorCode::InvalidBucketName, msg),
|
||||
StorageError::InvalidObjectKey(msg) => S3Error::new(S3ErrorCode::InvalidKey, msg),
|
||||
StorageError::UploadNotFound(id) => {
|
||||
S3Error::new(S3ErrorCode::NoSuchUpload, format!("Upload {} not found", id))
|
||||
}
|
||||
StorageError::QuotaExceeded(msg) => S3Error::new(S3ErrorCode::QuotaExceeded, msg),
|
||||
StorageError::InvalidRange => S3Error::from_code(S3ErrorCode::InvalidRange),
|
||||
StorageError::Io(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
|
||||
StorageError::Json(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
|
||||
StorageError::Internal(msg) => S3Error::new(S3ErrorCode::InternalError, msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
2148
rust/myfsio-engine/crates/myfsio-storage/src/fs_backend.rs
Normal file
2148
rust/myfsio-engine/crates/myfsio-storage/src/fs_backend.rs
Normal file
File diff suppressed because it is too large
Load Diff
4
rust/myfsio-engine/crates/myfsio-storage/src/lib.rs
Normal file
4
rust/myfsio-engine/crates/myfsio-storage/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod validation;
|
||||
pub mod traits;
|
||||
pub mod error;
|
||||
pub mod fs_backend;
|
||||
135
rust/myfsio-engine/crates/myfsio-storage/src/traits.rs
Normal file
135
rust/myfsio-engine/crates/myfsio-storage/src/traits.rs
Normal file
@@ -0,0 +1,135 @@
|
||||
use crate::error::StorageError;
|
||||
use myfsio_common::types::*;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use tokio::io::AsyncRead;
|
||||
|
||||
pub type StorageResult<T> = Result<T, StorageError>;
|
||||
pub type AsyncReadStream = Pin<Box<dyn AsyncRead + Send>>;
|
||||
|
||||
#[allow(async_fn_in_trait)]
|
||||
pub trait StorageEngine: Send + Sync {
|
||||
async fn list_buckets(&self) -> StorageResult<Vec<BucketMeta>>;
|
||||
async fn create_bucket(&self, name: &str) -> StorageResult<()>;
|
||||
async fn delete_bucket(&self, name: &str) -> StorageResult<()>;
|
||||
async fn bucket_exists(&self, name: &str) -> StorageResult<bool>;
|
||||
async fn bucket_stats(&self, name: &str) -> StorageResult<BucketStats>;
|
||||
|
||||
async fn put_object(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
stream: AsyncReadStream,
|
||||
metadata: Option<HashMap<String, String>>,
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn get_object(&self, bucket: &str, key: &str) -> StorageResult<(ObjectMeta, AsyncReadStream)>;
|
||||
|
||||
async fn get_object_path(&self, bucket: &str, key: &str) -> StorageResult<PathBuf>;
|
||||
|
||||
async fn head_object(&self, bucket: &str, key: &str) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()>;
|
||||
|
||||
async fn copy_object(
|
||||
&self,
|
||||
src_bucket: &str,
|
||||
src_key: &str,
|
||||
dst_bucket: &str,
|
||||
dst_key: &str,
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn get_object_metadata(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<HashMap<String, String>>;
|
||||
|
||||
async fn put_object_metadata(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
metadata: &HashMap<String, String>,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
async fn list_objects(&self, bucket: &str, params: &ListParams) -> StorageResult<ListObjectsResult>;
|
||||
|
||||
async fn list_objects_shallow(
|
||||
&self,
|
||||
bucket: &str,
|
||||
params: &ShallowListParams,
|
||||
) -> StorageResult<ShallowListResult>;
|
||||
|
||||
async fn initiate_multipart(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
metadata: Option<HashMap<String, String>>,
|
||||
) -> StorageResult<String>;
|
||||
|
||||
async fn upload_part(
|
||||
&self,
|
||||
bucket: &str,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
stream: AsyncReadStream,
|
||||
) -> StorageResult<String>;
|
||||
|
||||
async fn upload_part_copy(
|
||||
&self,
|
||||
bucket: &str,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
src_bucket: &str,
|
||||
src_key: &str,
|
||||
range: Option<(u64, u64)>,
|
||||
) -> StorageResult<(String, chrono::DateTime<chrono::Utc>)>;
|
||||
|
||||
async fn complete_multipart(
|
||||
&self,
|
||||
bucket: &str,
|
||||
upload_id: &str,
|
||||
parts: &[PartInfo],
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn abort_multipart(&self, bucket: &str, upload_id: &str) -> StorageResult<()>;
|
||||
|
||||
async fn list_parts(&self, bucket: &str, upload_id: &str) -> StorageResult<Vec<PartMeta>>;
|
||||
|
||||
async fn list_multipart_uploads(
|
||||
&self,
|
||||
bucket: &str,
|
||||
) -> StorageResult<Vec<MultipartUploadInfo>>;
|
||||
|
||||
async fn get_bucket_config(&self, bucket: &str) -> StorageResult<BucketConfig>;
|
||||
async fn set_bucket_config(&self, bucket: &str, config: &BucketConfig) -> StorageResult<()>;
|
||||
|
||||
async fn is_versioning_enabled(&self, bucket: &str) -> StorageResult<bool>;
|
||||
async fn set_versioning(&self, bucket: &str, enabled: bool) -> StorageResult<()>;
|
||||
|
||||
async fn list_object_versions(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<Vec<VersionInfo>>;
|
||||
|
||||
async fn get_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<Vec<Tag>>;
|
||||
|
||||
async fn set_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
tags: &[Tag],
|
||||
) -> StorageResult<()>;
|
||||
|
||||
async fn delete_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<()>;
|
||||
}
|
||||
194
rust/myfsio-engine/crates/myfsio-storage/src/validation.rs
Normal file
194
rust/myfsio-engine/crates/myfsio-storage/src/validation.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use std::sync::LazyLock;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
const WINDOWS_RESERVED: &[&str] = &[
|
||||
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||||
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
||||
"LPT9",
|
||||
];
|
||||
|
||||
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||
|
||||
static IP_REGEX: LazyLock<regex::Regex> =
|
||||
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
||||
|
||||
pub fn validate_object_key(
|
||||
object_key: &str,
|
||||
max_length_bytes: usize,
|
||||
is_windows: bool,
|
||||
reserved_prefixes: Option<&[&str]>,
|
||||
) -> Option<String> {
|
||||
if object_key.is_empty() {
|
||||
return Some("Object key required".to_string());
|
||||
}
|
||||
|
||||
if object_key.contains('\0') {
|
||||
return Some("Object key contains null bytes".to_string());
|
||||
}
|
||||
|
||||
let normalized: String = object_key.nfc().collect();
|
||||
|
||||
if normalized.len() > max_length_bytes {
|
||||
return Some(format!(
|
||||
"Object key exceeds maximum length of {} bytes",
|
||||
max_length_bytes
|
||||
));
|
||||
}
|
||||
|
||||
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
||||
return Some("Object key cannot start with a slash".to_string());
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
||||
normalized.split(['/', '\\']).collect()
|
||||
} else {
|
||||
normalized.split('/').collect()
|
||||
};
|
||||
|
||||
for part in &parts {
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *part == ".." {
|
||||
return Some("Object key contains parent directory references".to_string());
|
||||
}
|
||||
|
||||
if *part == "." {
|
||||
return Some("Object key contains invalid segments".to_string());
|
||||
}
|
||||
|
||||
if part.chars().any(|c| (c as u32) < 32) {
|
||||
return Some("Object key contains control characters".to_string());
|
||||
}
|
||||
|
||||
if is_windows {
|
||||
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
||||
return Some(
|
||||
"Object key contains characters not supported on Windows filesystems"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
if part.ends_with(' ') || part.ends_with('.') {
|
||||
return Some(
|
||||
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
||||
);
|
||||
}
|
||||
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
||||
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
||||
return Some(format!("Invalid filename segment: {}", part));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
||||
if let Some(top) = non_empty_parts.first() {
|
||||
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
||||
return Some("Object key uses a reserved prefix".to_string());
|
||||
}
|
||||
|
||||
if let Some(prefixes) = reserved_prefixes {
|
||||
for prefix in prefixes {
|
||||
if *top == *prefix {
|
||||
return Some("Object key uses a reserved prefix".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
||||
let len = bucket_name.len();
|
||||
if len < 3 || len > 63 {
|
||||
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
||||
}
|
||||
|
||||
let bytes = bucket_name.as_bytes();
|
||||
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
for &b in bytes {
|
||||
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
||||
return Some(
|
||||
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if bucket_name.contains("..") {
|
||||
return Some("Bucket name must not contain consecutive periods".to_string());
|
||||
}
|
||||
|
||||
if IP_REGEX.is_match(bucket_name) {
|
||||
return Some("Bucket name must not be formatted as an IP address".to_string());
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_valid_bucket_names() {
|
||||
assert!(validate_bucket_name("my-bucket").is_none());
|
||||
assert!(validate_bucket_name("test123").is_none());
|
||||
assert!(validate_bucket_name("my.bucket.name").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_bucket_names() {
|
||||
assert!(validate_bucket_name("ab").is_some());
|
||||
assert!(validate_bucket_name("My-Bucket").is_some());
|
||||
assert!(validate_bucket_name("-bucket").is_some());
|
||||
assert!(validate_bucket_name("bucket-").is_some());
|
||||
assert!(validate_bucket_name("my..bucket").is_some());
|
||||
assert!(validate_bucket_name("192.168.1.1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_object_keys() {
|
||||
assert!(validate_object_key("file.txt", 1024, false, None).is_none());
|
||||
assert!(validate_object_key("path/to/file.txt", 1024, false, None).is_none());
|
||||
assert!(validate_object_key("a", 1024, false, None).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_object_keys() {
|
||||
assert!(validate_object_key("", 1024, false, None).is_some());
|
||||
assert!(validate_object_key("/leading-slash", 1024, false, None).is_some());
|
||||
assert!(validate_object_key("path/../escape", 1024, false, None).is_some());
|
||||
assert!(validate_object_key(".myfsio.sys/secret", 1024, false, None).is_some());
|
||||
assert!(validate_object_key(".meta/data", 1024, false, None).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_object_key_max_length() {
|
||||
let long_key = "a".repeat(1025);
|
||||
assert!(validate_object_key(&long_key, 1024, false, None).is_some());
|
||||
let ok_key = "a".repeat(1024);
|
||||
assert!(validate_object_key(&ok_key, 1024, false, None).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_validation() {
|
||||
assert!(validate_object_key("CON", 1024, true, None).is_some());
|
||||
assert!(validate_object_key("file<name", 1024, true, None).is_some());
|
||||
assert!(validate_object_key("file.txt ", 1024, true, None).is_some());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user