Add Rust extension module (myfsio_core) for SigV4, hashing, and validation hot paths
This commit is contained in:
90
myfsio_core/src/hashing.rs
Normal file
90
myfsio_core/src/hashing.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use md5::{Digest, Md5};
|
||||
use pyo3::exceptions::PyIOError;
|
||||
use pyo3::prelude::*;
|
||||
use sha2::Sha256;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
const CHUNK_SIZE: usize = 65536;
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn sha256_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut md5_hasher = Md5::new();
|
||||
let mut sha_hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
md5_hasher.update(&buf[..n]);
|
||||
sha_hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok((
|
||||
format!("{:x}", md5_hasher.finalize()),
|
||||
format!("{:x}", sha_hasher.finalize()),
|
||||
))
|
||||
})
|
||||
}
|
||||
30
myfsio_core/src/lib.rs
Normal file
30
myfsio_core/src/lib.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
mod hashing;
|
||||
mod sigv4;
|
||||
mod validation;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
|
||||
#[pymodule]
|
||||
mod myfsio_core {
|
||||
use super::*;
|
||||
|
||||
#[pymodule_init]
|
||||
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
108
myfsio_core/src/sigv4.rs
Normal file
108
myfsio_core/src/sigv4.rs
Normal file
@@ -0,0 +1,108 @@
|
||||
use hmac::{Hmac, Mac};
|
||||
use lru::LruCache;
|
||||
use parking_lot::Mutex;
|
||||
use pyo3::prelude::*;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Instant;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
|
||||
struct CacheEntry {
|
||||
key: Vec<u8>,
|
||||
created: Instant,
|
||||
}
|
||||
|
||||
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
|
||||
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
|
||||
|
||||
const CACHE_TTL_SECS: u64 = 60;
|
||||
|
||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||
mac.update(msg);
|
||||
mac.finalize().into_bytes().to_vec()
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn derive_signing_key(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
let cache_key = (
|
||||
secret_key.to_owned(),
|
||||
date_stamp.to_owned(),
|
||||
region.to_owned(),
|
||||
service.to_owned(),
|
||||
);
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
if let Some(entry) = cache.get(&cache_key) {
|
||||
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
|
||||
return entry.key.clone();
|
||||
}
|
||||
cache.pop(&cache_key);
|
||||
}
|
||||
}
|
||||
|
||||
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
|
||||
let k_region = hmac_sha256(&k_date, region.as_bytes());
|
||||
let k_service = hmac_sha256(&k_region, service.as_bytes());
|
||||
let k_signing = hmac_sha256(&k_service, b"aws4_request");
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
cache.put(
|
||||
cache_key,
|
||||
CacheEntry {
|
||||
key: k_signing.clone(),
|
||||
created: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
k_signing
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||
hex::encode(sig)
|
||||
}
|
||||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn build_string_to_sign(
|
||||
amz_date: &str,
|
||||
credential_scope: &str,
|
||||
canonical_request: &str,
|
||||
) -> String {
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut result: u8 = 0;
|
||||
for (x, y) in a.bytes().zip(b.bytes()) {
|
||||
result |= x ^ y;
|
||||
}
|
||||
result == 0
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn clear_signing_key_cache() {
|
||||
SIGNING_KEY_CACHE.lock().clear();
|
||||
}
|
||||
149
myfsio_core/src/validation.rs
Normal file
149
myfsio_core/src/validation.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
use pyo3::prelude::*;
|
||||
use std::sync::LazyLock;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
const WINDOWS_RESERVED: &[&str] = &[
|
||||
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||||
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
||||
"LPT9",
|
||||
];
|
||||
|
||||
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||
|
||||
static IP_REGEX: LazyLock<regex::Regex> =
|
||||
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
|
||||
pub fn validate_object_key(
|
||||
object_key: &str,
|
||||
max_length_bytes: usize,
|
||||
is_windows: bool,
|
||||
reserved_prefixes: Option<Vec<String>>,
|
||||
) -> PyResult<Option<String>> {
|
||||
if object_key.is_empty() {
|
||||
return Ok(Some("Object key required".to_string()));
|
||||
}
|
||||
|
||||
if object_key.contains('\0') {
|
||||
return Ok(Some("Object key contains null bytes".to_string()));
|
||||
}
|
||||
|
||||
let normalized: String = object_key.nfc().collect();
|
||||
|
||||
if normalized.len() > max_length_bytes {
|
||||
return Ok(Some(format!(
|
||||
"Object key exceeds maximum length of {} bytes",
|
||||
max_length_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
||||
return Ok(Some("Object key cannot start with a slash".to_string()));
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
||||
normalized.split(['/', '\\']).collect()
|
||||
} else {
|
||||
normalized.split('/').collect()
|
||||
};
|
||||
|
||||
for part in &parts {
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *part == ".." {
|
||||
return Ok(Some(
|
||||
"Object key contains parent directory references".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if *part == "." {
|
||||
return Ok(Some("Object key contains invalid segments".to_string()));
|
||||
}
|
||||
|
||||
if part.chars().any(|c| (c as u32) < 32) {
|
||||
return Ok(Some(
|
||||
"Object key contains control characters".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if is_windows {
|
||||
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
||||
return Ok(Some(
|
||||
"Object key contains characters not supported on Windows filesystems"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
if part.ends_with(' ') || part.ends_with('.') {
|
||||
return Ok(Some(
|
||||
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
||||
));
|
||||
}
|
||||
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
||||
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
||||
return Ok(Some(format!("Invalid filename segment: {}", part)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
||||
if let Some(top) = non_empty_parts.first() {
|
||||
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||
}
|
||||
|
||||
if let Some(ref prefixes) = reserved_prefixes {
|
||||
for prefix in prefixes {
|
||||
if *top == prefix.as_str() {
|
||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
||||
let len = bucket_name.len();
|
||||
if len < 3 || len > 63 {
|
||||
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
||||
}
|
||||
|
||||
let bytes = bucket_name.as_bytes();
|
||||
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
for &b in bytes {
|
||||
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
||||
return Some(
|
||||
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if bucket_name.contains("..") {
|
||||
return Some("Bucket name must not contain consecutive periods".to_string());
|
||||
}
|
||||
|
||||
if IP_REGEX.is_match(bucket_name) {
|
||||
return Some("Bucket name must not be formatted as an IP address".to_string());
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
Reference in New Issue
Block a user