Add generic AWS S3 domain support (#39)

Signed-off-by: Bala.FA <bala@minio.io>
This commit is contained in:
Bala FA 2023-09-26 05:58:32 +05:30 committed by GitHub
parent 1f5ccb3113
commit 526b2a81ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 277 additions and 82 deletions

View File

@ -13,9 +13,10 @@ jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
timeout-minutes: 5
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- name: Build - name: Build
run: | run: |
cargo fmt --all -- --check cargo fmt --all -- --check

View File

@ -2373,7 +2373,7 @@ impl<'a> Client<'a> {
&self, &self,
args: &ListenBucketNotificationArgs<'_>, args: &ListenBucketNotificationArgs<'_>,
) -> Result<ListenBucketNotificationResponse, Error> { ) -> Result<ListenBucketNotificationResponse, Error> {
if self.base_url.aws_host { if self.base_url.is_aws_host() {
return Err(Error::UnsupportedApi(String::from( return Err(Error::UnsupportedApi(String::from(
"ListenBucketNotification", "ListenBucketNotification",
))); )));

View File

@ -16,12 +16,23 @@
//! HTTP URL definitions //! HTTP URL definitions
use crate::s3::error::Error; use crate::s3::error::Error;
use crate::s3::utils::match_hostname;
use crate::s3::utils::{to_query_string, Multimap}; use crate::s3::utils::{to_query_string, Multimap};
use derivative::Derivative; use derivative::Derivative;
use hyper::http::Method; use hyper::http::Method;
use hyper::Uri; use hyper::Uri;
use lazy_static::lazy_static;
use regex::Regex;
use std::fmt; use std::fmt;
const AWS_S3_PREFIX: &str = r"^(((bucket\.|accesspoint\.)vpce(-[a-z_\d]+)+\.s3\.)|([a-z_\d-]{1,63}\.)s3-control(-[a-z_\d]+)*\.|(s3(-[a-z_\d]+)*\.))";
lazy_static! {
static ref AWS_ELB_ENDPOINT_REGEX: Regex =
Regex::new(r"^[a-z_\d-]{1,63}\.[a-z_\d-]{1,63}\.elb\.amazonaws\.com$").unwrap();
static ref AWS_S3_PREFIX_REGEX: Regex = Regex::new(AWS_S3_PREFIX).unwrap();
}
#[derive(Derivative)] #[derive(Derivative)]
#[derivative(Clone, Debug, Default)] #[derivative(Clone, Debug, Default)]
/// Represents HTTP URL /// Represents HTTP URL
@ -75,20 +86,120 @@ impl fmt::Display for Url {
} }
} }
fn extract_region(host: &str) -> String { pub fn match_aws_endpoint(value: &str) -> bool {
let tokens: Vec<&str> = host.split('.').collect(); lazy_static! {
let region = match tokens.get(1) { static ref AWS_ENDPOINT_REGEX: Regex = Regex::new(r".*\.amazonaws\.com(|\.cn)$").unwrap();
Some(r) => match *r { }
"dualstack" => match tokens.get(2) {
Some(t) => t, AWS_ENDPOINT_REGEX.is_match(value.to_lowercase().as_str())
_ => "", }
},
"amazonaws" => "", pub fn match_aws_s3_endpoint(value: &str) -> bool {
_ => r, lazy_static! {
}, static ref AWS_S3_ENDPOINT_REGEX: Regex = Regex::new(
_ => "", &(AWS_S3_PREFIX.to_string() + r"([a-z_\d-]{1,63}\.)*amazonaws\.com(|\.cn)$")
}; )
region.to_string() .unwrap();
}
let binding = value.to_lowercase();
let lvalue = binding.as_str();
if !AWS_S3_ENDPOINT_REGEX.is_match(lvalue) {
return false;
}
for token in lvalue.split('.') {
if token.starts_with('-')
|| token.starts_with('_')
|| token.ends_with('-')
|| token.ends_with('_')
|| token.starts_with("vpce-_")
|| token.starts_with("s3-control-_")
|| token.starts_with("s3-_")
{
return false;
}
}
true
}
fn get_aws_info(
host: &String,
https: bool,
region: &mut String,
aws_s3_prefix: &mut String,
aws_domain_suffix: &mut String,
dualstack: &mut bool,
) -> Result<(), Error> {
if !match_hostname(host.as_str()) {
return Ok(());
}
if AWS_ELB_ENDPOINT_REGEX.is_match(host.as_str()) {
let token = host
.get(..host.rfind(".elb.amazonaws.com").unwrap() - 1)
.unwrap();
*region = token
.get(token.rfind('.').unwrap() + 1..)
.unwrap()
.to_string();
return Ok(());
}
if !match_aws_endpoint(host.as_str()) {
return Ok(());
}
if !match_aws_s3_endpoint(host.as_str()) {
return Err(Error::UrlBuildError(
String::from("invalid Amazon AWS host ") + host,
));
}
let matcher = AWS_S3_PREFIX_REGEX.find(host.as_str()).unwrap();
let s3_prefix = host.get(..matcher.end()).unwrap();
if s3_prefix.contains("s3-accesspoint") && !https {
return Err(Error::UrlBuildError(
String::from("use HTTPS scheme for host ") + host,
));
}
let mut tokens: Vec<_> = host.get(matcher.len()..).unwrap().split('.').collect();
*dualstack = tokens[0] == "dualstack";
if *dualstack {
tokens.remove(0);
}
let mut region_in_host = String::new();
if tokens[0] != "vpce" && tokens[0] != "amazonaws" {
region_in_host = tokens[0].to_string();
tokens.remove(0);
}
let domain_suffix = tokens.join(".");
if host == "s3-external-1.amazonaws.com" {
region_in_host = "us-east-1".to_string();
}
if host == "s3-us-gov-west-1.amazonaws.com" || host == "s3-fips-us-gov-west-1.amazonaws.com" {
region_in_host = "us-gov-west-1".to_string();
}
if domain_suffix.ends_with(".cn") && !s3_prefix.ends_with("s3-accelerate.") && region.is_empty()
{
return Err(Error::UrlBuildError(
String::from("region missing in Amazon S3 China endpoint ") + host,
));
}
*region = region_in_host;
*aws_s3_prefix = s3_prefix.to_string();
*aws_domain_suffix = domain_suffix;
Ok(())
} }
#[derive(Derivative)] #[derive(Derivative)]
@ -100,13 +211,89 @@ pub struct BaseUrl {
host: String, host: String,
port: u16, port: u16,
pub region: String, pub region: String,
pub aws_host: bool, aws_s3_prefix: String,
accelerate_host: bool, aws_domain_suffix: String,
dualstack_host: bool, pub dualstack: bool,
virtual_style: bool, pub virtual_style: bool,
} }
impl BaseUrl { impl BaseUrl {
/// Checks base URL is AWS host
pub fn is_aws_host(&self) -> bool {
!self.aws_domain_suffix.is_empty()
}
fn build_aws_url(
&self,
url: &mut Url,
bucket_name: &str,
enforce_path_style: bool,
region: &str,
) -> Result<(), Error> {
let mut host = String::from(&self.aws_s3_prefix);
host.push_str(&self.aws_domain_suffix);
if host == "s3-external-1.amazonaws.com"
|| host == "s3-us-gov-west-1.amazonaws.com"
|| host == "s3-fips-us-gov-west-1.amazonaws.com"
{
url.host = host;
return Ok(());
}
host = String::from(&self.aws_s3_prefix);
if self.aws_s3_prefix.contains("s3-accelerate") {
if bucket_name.contains('.') {
return Err(Error::UrlBuildError(String::from(
"bucket name with '.' is not allowed for accelerate endpoint",
)));
}
if enforce_path_style {
host = host.replacen("-accelerate", "", 1);
}
}
if self.dualstack {
host.push_str("dualstack.");
}
if !self.aws_s3_prefix.contains("s3-accelerate") {
host.push_str(region);
host.push('.');
}
host.push_str(&self.aws_domain_suffix);
url.host = host;
Ok(())
}
fn build_list_buckets_url(&self, url: &mut Url, region: &String) {
if self.aws_domain_suffix.is_empty() {
return;
}
let mut host = String::from(&self.aws_s3_prefix);
host.push_str(&self.aws_domain_suffix);
if host == "s3-external-1.amazonaws.com"
|| host == "s3-us-gov-west-1.amazonaws.com"
|| host == "s3-fips-us-gov-west-1.amazonaws.com"
{
url.host = host;
return;
}
let mut s3_prefix = String::from(&self.aws_s3_prefix);
let mut domain_suffix = String::from(&self.aws_domain_suffix);
if s3_prefix.starts_with("s3.") || s3_prefix.starts_with("s3-") {
s3_prefix = "s3.".to_string();
domain_suffix = "amazonaws.com".to_string();
if self.aws_domain_suffix.ends_with(".cn") {
domain_suffix.push_str(".cn");
}
}
url.host = s3_prefix + region + "." + &domain_suffix;
}
/// Builds URL from base URL for given parameters for S3 operation /// Builds URL from base URL for given parameters for S3 operation
pub fn build_url( pub fn build_url(
&self, &self,
@ -127,15 +314,13 @@ impl BaseUrl {
https: self.https, https: self.https,
host: self.host.clone(), host: self.host.clone(),
port: self.port, port: self.port,
path: String::from("/"),
query: query.clone(), query: query.clone(),
..Default::default() ..Default::default()
}; };
if bucket_name.is_none() { if bucket_name.is_none() {
url.path.push('/'); self.build_list_buckets_url(&mut url, region);
if self.aws_host {
url.host = format!("s3.{}.{}", region, self.host);
}
return Ok(url); return Ok(url);
} }
@ -151,45 +336,31 @@ impl BaseUrl {
// SSL certificate validation error. // SSL certificate validation error.
(bucket.contains('.') && self.https); (bucket.contains('.') && self.https);
if self.aws_host { if !self.aws_domain_suffix.is_empty() {
let mut s3_domain = "s3.".to_string(); self.build_aws_url(&mut url, bucket, enforce_path_style, region)?;
if self.accelerate_host {
if bucket.contains('.') {
return Err(Error::UrlBuildError(String::from(
"bucket name with '.' is not allowed for accelerate endpoint",
)));
}
if !enforce_path_style {
s3_domain = "s3-accelerate.".to_string();
}
}
if self.dualstack_host {
s3_domain.push_str("dualstack.");
}
if enforce_path_style || !self.accelerate_host {
s3_domain.push_str(region);
s3_domain.push('.');
}
url.host = s3_domain + &url.host;
} }
let mut host = String::from(&url.host);
let mut path = String::new();
if enforce_path_style || !self.virtual_style { if enforce_path_style || !self.virtual_style {
url.path.push('/'); path.push('/');
url.path.push_str(bucket); path.push_str(bucket);
} else { } else {
url.host = format!("{}.{}", bucket, url.host); host = format!("{}.{}", bucket, url.host);
} }
if let Some(v) = object_name { if let Some(v) = object_name {
if !v.starts_with('/') { if !v.starts_with('/') {
url.path.push('/'); path.push('/');
} }
// FIXME: urlencode path // FIXME: urlencode path
url.path.push_str(v); path.push_str(v);
} }
url.host = host;
url.path = path;
Ok(url) Ok(url)
} }
@ -259,42 +430,28 @@ impl BaseUrl {
))); )));
} }
let mut accelerate_host = host.starts_with("s3-accelerate.");
let aws_host = (host.starts_with("s3.") || accelerate_host)
&& (host.ends_with(".amazonaws.com") || host.ends_with(".amazonaws.com.cn"));
let virtual_style = aws_host || host.ends_with("aliyuncs.com");
let mut region = String::new(); let mut region = String::new();
let mut dualstack_host = false; let mut aws_s3_prefix = String::new();
let mut aws_domain_suffix = String::new();
if aws_host { let mut dualstack: bool = false;
let mut aws_domain = "amazonaws.com"; get_aws_info(
region = extract_region(host); &host.to_string(),
https,
let is_aws_china_host = host.ends_with(".cn"); &mut region,
if is_aws_china_host { &mut aws_s3_prefix,
aws_domain = "amazonaws.com.cn"; &mut aws_domain_suffix,
if region.is_empty() { &mut dualstack,
return Err(Error::InvalidBaseUrl(String::from( )?;
"region must be provided in Amazon S3 China endpoint", let virtual_style = !aws_domain_suffix.is_empty() || host.ends_with("aliyuncs.com");
)));
}
}
dualstack_host = host.contains(".dualstack.");
host = aws_domain;
} else {
accelerate_host = false;
}
Ok(BaseUrl { Ok(BaseUrl {
https, https,
host: host.to_string(), host: host.to_string(),
port, port,
region, region,
aws_host, aws_s3_prefix,
accelerate_host, aws_domain_suffix,
dualstack_host, dualstack,
virtual_style, virtual_style,
}) })
} }

View File

@ -244,6 +244,43 @@ pub fn get_canonical_headers(map: &Multimap) -> (String, String) {
(signed_headers, canonical_headers) (signed_headers, canonical_headers)
} }
/// Checks if given hostname is valid or not
pub fn match_hostname(value: &str) -> bool {
lazy_static! {
static ref HOSTNAME_REGEX: Regex =
Regex::new(r"^([a-z_\d-]{1,63}\.)*([a-z_\d-]{1,63})$").unwrap();
}
if !HOSTNAME_REGEX.is_match(value.to_lowercase().as_str()) {
return false;
}
for token in value.split('.') {
if token.starts_with('-')
|| token.starts_with('_')
|| token.ends_with('-')
|| token.ends_with('_')
{
return false;
}
}
true
}
/// Checks if given region is valid or not
pub fn match_region(value: &str) -> bool {
lazy_static! {
static ref REGION_REGEX: Regex = Regex::new(r"^([a-z_\d-]{1,63})$").unwrap();
}
!REGION_REGEX.is_match(value.to_lowercase().as_str())
|| value.starts_with('-')
|| value.starts_with('_')
|| value.ends_with('-')
|| value.ends_with('_')
}
/// Validates given bucket name /// Validates given bucket name
pub fn check_bucket_name(bucket_name: &str, strict: bool) -> Result<(), Error> { pub fn check_bucket_name(bucket_name: &str, strict: bool) -> Result<(), Error> {
if bucket_name.trim().is_empty() { if bucket_name.trim().is_empty() {
@ -265,14 +302,14 @@ pub fn check_bucket_name(bucket_name: &str, strict: bool) -> Result<(), Error> {
} }
lazy_static! { lazy_static! {
static ref VALID_IP_ADDR_REGEX: Regex = Regex::new("^(\\d+\\.){3}\\d+$").unwrap(); static ref IPV4_REGEX: Regex = Regex::new(r"^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$").unwrap();
static ref VALID_BUCKET_NAME_REGEX: Regex = static ref VALID_BUCKET_NAME_REGEX: Regex =
Regex::new("^[A-Za-z0-9][A-Za-z0-9\\.\\-_:]{1,61}[A-Za-z0-9]$").unwrap(); Regex::new("^[A-Za-z0-9][A-Za-z0-9\\.\\-_:]{1,61}[A-Za-z0-9]$").unwrap();
static ref VALID_BUCKET_NAME_STRICT_REGEX: Regex = static ref VALID_BUCKET_NAME_STRICT_REGEX: Regex =
Regex::new("^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$").unwrap(); Regex::new("^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$").unwrap();
} }
if VALID_IP_ADDR_REGEX.is_match(bucket_name) { if IPV4_REGEX.is_match(bucket_name) {
return Err(Error::InvalidBucketName(String::from( return Err(Error::InvalidBucketName(String::from(
"bucket name cannot be an IP address", "bucket name cannot be an IP address",
))); )));