Skip to content

Commit da8f742

Browse files
authored
Fix S3 query canonicalization (#2800) (#2801)
* Fix S3 query canonicalization (#2800) * Disable listing with spaces on azurite and localstack
1 parent f845d6e commit da8f742

File tree

6 files changed

+95
-26
lines changed

6 files changed

+95
-26
lines changed

object_store/src/aws/client.rs

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
use crate::aws::credential::{AwsCredential, CredentialExt, CredentialProvider};
19+
use crate::aws::STRICT_PATH_ENCODE_SET;
1920
use crate::client::pagination::stream_paginated;
2021
use crate::client::retry::RetryExt;
2122
use crate::multipart::UploadPart;
@@ -26,26 +27,13 @@ use crate::{
2627
};
2728
use bytes::{Buf, Bytes};
2829
use chrono::{DateTime, Utc};
29-
use percent_encoding::{utf8_percent_encode, AsciiSet, PercentEncode, NON_ALPHANUMERIC};
30+
use percent_encoding::{utf8_percent_encode, PercentEncode};
3031
use reqwest::{Client as ReqwestClient, Method, Response, StatusCode};
3132
use serde::{Deserialize, Serialize};
3233
use snafu::{ResultExt, Snafu};
3334
use std::ops::Range;
3435
use std::sync::Arc;
3536

36-
// http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
37-
//
38-
// Do not URI-encode any of the unreserved characters that RFC 3986 defines:
39-
// A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ).
40-
const STRICT_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
41-
.remove(b'-')
42-
.remove(b'.')
43-
.remove(b'_')
44-
.remove(b'~');
45-
46-
/// This struct is used to maintain the URI path encoding
47-
const STRICT_PATH_ENCODE_SET: AsciiSet = STRICT_ENCODE_SET.remove(b'/');
48-
4937
/// A specialized `Error` for object store-related errors
5038
#[derive(Debug, Snafu)]
5139
#[allow(missing_docs)]

object_store/src/aws/credential.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,23 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use crate::aws::STRICT_ENCODE_SET;
1819
use crate::client::retry::RetryExt;
1920
use crate::client::token::{TemporaryToken, TokenCache};
2021
use crate::util::hmac_sha256;
2122
use crate::{Result, RetryConfig};
2223
use bytes::Buf;
2324
use chrono::{DateTime, Utc};
2425
use futures::TryFutureExt;
26+
use percent_encoding::utf8_percent_encode;
2527
use reqwest::header::{HeaderMap, HeaderValue};
2628
use reqwest::{Client, Method, Request, RequestBuilder, StatusCode};
2729
use serde::Deserialize;
2830
use std::collections::BTreeMap;
2931
use std::sync::Arc;
3032
use std::time::Instant;
3133
use tracing::warn;
34+
use url::Url;
3235

3336
type StdError = Box<dyn std::error::Error + Send + Sync>;
3437

@@ -103,13 +106,14 @@ impl<'a> RequestSigner<'a> {
103106
request.headers_mut().insert(HASH_HEADER, header_digest);
104107

105108
let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
109+
let canonical_query = canonicalize_query(request.url());
106110

107111
// https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
108112
let canonical_request = format!(
109113
"{}\n{}\n{}\n{}\n{}\n{}",
110114
request.method().as_str(),
111115
request.url().path(), // S3 doesn't percent encode this like other services
112-
request.url().query().unwrap_or(""), // This assumes the query pairs are in order
116+
canonical_query,
113117
canonical_headers,
114118
signed_headers,
115119
digest
@@ -207,6 +211,37 @@ fn hex_encode(bytes: &[u8]) -> String {
207211
out
208212
}
209213

214+
/// Canonicalizes query parameters into the AWS canonical form
215+
///
216+
/// <https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html>
217+
fn canonicalize_query(url: &Url) -> String {
218+
use std::fmt::Write;
219+
220+
let capacity = match url.query() {
221+
Some(q) if !q.is_empty() => q.len(),
222+
_ => return String::new(),
223+
};
224+
let mut encoded = String::with_capacity(capacity + 1);
225+
226+
let mut headers = url.query_pairs().collect::<Vec<_>>();
227+
headers.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
228+
229+
let mut first = true;
230+
for (k, v) in headers {
231+
if !first {
232+
encoded.push('&');
233+
}
234+
first = false;
235+
let _ = write!(
236+
encoded,
237+
"{}={}",
238+
utf8_percent_encode(k.as_ref(), &STRICT_ENCODE_SET),
239+
utf8_percent_encode(v.as_ref(), &STRICT_ENCODE_SET)
240+
);
241+
}
242+
encoded
243+
}
244+
210245
/// Canonicalizes headers into the AWS Canonical Form.
211246
///
212247
/// <https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html>

object_store/src/aws/mod.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,20 @@ use crate::{
5858
mod client;
5959
mod credential;
6060

61+
// http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
62+
//
63+
// Do not URI-encode any of the unreserved characters that RFC 3986 defines:
64+
// A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ).
65+
pub(crate) const STRICT_ENCODE_SET: percent_encoding::AsciiSet =
66+
percent_encoding::NON_ALPHANUMERIC
67+
.remove(b'-')
68+
.remove(b'.')
69+
.remove(b'_')
70+
.remove(b'~');
71+
72+
/// This struct is used to maintain the URI path encoding
73+
const STRICT_PATH_ENCODE_SET: percent_encoding::AsciiSet = STRICT_ENCODE_SET.remove(b'/');
74+
6175
/// A specialized `Error` for object store-related errors
6276
#[derive(Debug, Snafu)]
6377
#[allow(missing_docs)]
@@ -551,7 +565,7 @@ mod tests {
551565
use super::*;
552566
use crate::tests::{
553567
get_nonexistent_object, list_uses_directories_correctly, list_with_delimiter,
554-
put_get_delete_list, rename_and_copy, stream_get,
568+
put_get_delete_list_opts, rename_and_copy, stream_get,
555569
};
556570
use bytes::Bytes;
557571
use std::env;
@@ -677,9 +691,11 @@ mod tests {
677691
#[tokio::test]
678692
async fn s3_test() {
679693
let config = maybe_skip_integration!();
694+
let is_local = matches!(&config.endpoint, Some(e) if e.starts_with("http://"));
680695
let integration = config.build().unwrap();
681696

682-
put_get_delete_list(&integration).await;
697+
// Localstack doesn't support listing with spaces https://github.com/localstack/localstack/issues/6328
698+
put_get_delete_list_opts(&integration, is_local).await;
683699
list_uses_directories_correctly(&integration).await;
684700
list_with_delimiter(&integration).await;
685701
rename_and_copy(&integration).await;

object_store/src/azure/mod.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ mod tests {
595595
use super::*;
596596
use crate::tests::{
597597
copy_if_not_exists, list_uses_directories_correctly, list_with_delimiter,
598-
put_get_delete_list, rename_and_copy, stream_get,
598+
put_get_delete_list, put_get_delete_list_opts, rename_and_copy, stream_get,
599599
};
600600
use std::env;
601601

@@ -663,9 +663,10 @@ mod tests {
663663

664664
#[tokio::test]
665665
async fn azure_blob_test() {
666+
let use_emulator = env::var("AZURE_USE_EMULATOR").is_ok();
666667
let integration = maybe_skip_integration!().build().unwrap();
667-
668-
put_get_delete_list(&integration).await;
668+
// Azurite doesn't support listing with spaces - https://github.com/localstack/localstack/issues/6328
669+
put_get_delete_list_opts(&integration, use_emulator).await;
669670
list_uses_directories_correctly(&integration).await;
670671
list_with_delimiter(&integration).await;
671672
rename_and_copy(&integration).await;
@@ -687,13 +688,9 @@ mod tests {
687688
.with_container_name(
688689
env::var("OBJECT_STORE_BUCKET").expect("must be set OBJECT_STORE_BUCKET"),
689690
)
690-
.with_client_secret_authorization(
691-
env::var("AZURE_STORAGE_CLIENT_ID")
691+
.with_access_key(
692+
env::var("AZURE_STORAGE_ACCESS_KEY")
692693
.expect("must be set AZURE_STORAGE_CLIENT_ID"),
693-
env::var("AZURE_STORAGE_CLIENT_SECRET")
694-
.expect("must be set AZURE_STORAGE_CLIENT_SECRET"),
695-
env::var("AZURE_STORAGE_TENANT_ID")
696-
.expect("must be set AZURE_STORAGE_TENANT_ID"),
697694
);
698695
let integration = builder.build().unwrap();
699696

object_store/src/lib.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,13 @@ mod tests {
506506
use tokio::io::AsyncWriteExt;
507507

508508
pub(crate) async fn put_get_delete_list(storage: &DynObjectStore) {
509+
put_get_delete_list_opts(storage, false).await
510+
}
511+
512+
pub(crate) async fn put_get_delete_list_opts(
513+
storage: &DynObjectStore,
514+
skip_list_with_spaces: bool,
515+
) {
509516
delete_fixtures(storage).await;
510517

511518
let content_list = flatten_list_stream(storage, None).await.unwrap();
@@ -701,6 +708,21 @@ mod tests {
701708
assert_eq!(files, vec![path.clone()]);
702709

703710
storage.delete(&path).await.unwrap();
711+
712+
let path = Path::parse("foo bar/I contain spaces.parquet").unwrap();
713+
storage.put(&path, Bytes::from(vec![0, 1])).await.unwrap();
714+
storage.head(&path).await.unwrap();
715+
716+
if !skip_list_with_spaces {
717+
let files = flatten_list_stream(storage, Some(&Path::from("foo bar")))
718+
.await
719+
.unwrap();
720+
assert_eq!(files, vec![path.clone()]);
721+
}
722+
storage.delete(&path).await.unwrap();
723+
724+
let files = flatten_list_stream(storage, None).await.unwrap();
725+
assert!(files.is_empty(), "{:?}", files);
704726
}
705727

706728
fn get_vec_of_bytes(chunk_length: usize, num_chunks: usize) -> Vec<Bytes> {

object_store/src/path/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,4 +534,15 @@ mod tests {
534534
needle
535535
);
536536
}
537+
538+
#[test]
539+
fn path_containing_spaces() {
540+
let a = Path::from_iter(["foo bar", "baz"]);
541+
let b = Path::from("foo bar/baz");
542+
let c = Path::parse("foo bar/baz").unwrap();
543+
544+
assert_eq!(a.raw, "foo bar/baz");
545+
assert_eq!(a.raw, b.raw);
546+
assert_eq!(b.raw, c.raw);
547+
}
537548
}

0 commit comments

Comments
 (0)