Skip to content

Commit a349447

Browse files
authored
Fix Copy from percent-encoded path (#2353) (apache#2354)
1 parent f1d744e commit a349447

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

src/aws.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use futures::{
4848
Future, Stream, StreamExt, TryStreamExt,
4949
};
5050
use hyper::client::Builder as HyperBuilder;
51+
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
5152
use rusoto_core::ByteStream;
5253
use rusoto_credential::{InstanceMetadataProvider, StaticProvider};
5354
use rusoto_s3::S3;
@@ -62,6 +63,17 @@ use tokio::io::AsyncWrite;
6263
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
6364
use tracing::{debug, warn};
6465

66+
// Do not URI-encode any of the unreserved characters that RFC 3986 defines:
67+
// A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ).
68+
const STRICT_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
69+
.remove(b'-')
70+
.remove(b'.')
71+
.remove(b'_')
72+
.remove(b'~');
73+
74+
/// This struct is used to maintain the URI path encoding
75+
const STRICT_PATH_ENCODE_SET: AsciiSet = STRICT_ENCODE_SET.remove(b'/');
76+
6577
/// The maximum number of times a request will be retried in the case of an AWS server error
6678
pub const MAX_NUM_RETRIES: u32 = 3;
6779

@@ -541,9 +553,15 @@ impl ObjectStore for AmazonS3 {
541553
let to = to.as_ref();
542554
let bucket_name = self.bucket_name.clone();
543555

556+
let copy_source = format!(
557+
"{}/{}",
558+
&bucket_name,
559+
percent_encode(from.as_ref(), &STRICT_PATH_ENCODE_SET)
560+
);
561+
544562
let request_factory = move || rusoto_s3::CopyObjectRequest {
545563
bucket: bucket_name.clone(),
546-
copy_source: format!("{}/{}", &bucket_name, from),
564+
copy_source,
547565
key: to.to_string(),
548566
..Default::default()
549567
};

src/azure.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -470,14 +470,15 @@ impl ObjectStore for MicrosoftAzure {
470470

471471
impl MicrosoftAzure {
472472
/// helper function to create a source url for copy function
473-
fn get_copy_from_url(&self, from: &Path) -> Result<reqwest::Url> {
474-
Ok(reqwest::Url::parse(&format!(
475-
"{}/{}/{}",
476-
&self.blob_base_url, self.container_name, from
477-
))
478-
.context(UnableToParseUrlSnafu {
479-
container: &self.container_name,
480-
})?)
473+
fn get_copy_from_url(&self, from: &Path) -> Result<Url> {
474+
let mut url =
475+
Url::parse(&format!("{}/{}", &self.blob_base_url, self.container_name))
476+
.context(UnableToParseUrlSnafu {
477+
container: &self.container_name,
478+
})?;
479+
480+
url.path_segments_mut().unwrap().extend(from.parts());
481+
Ok(url)
481482
}
482483

483484
async fn list_impl(

src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,14 @@ mod tests {
635635

636636
assert_eq!(files, vec![emoji_file.clone()]);
637637

638+
let dst = Path::from("foo.parquet");
639+
storage.copy(&emoji_file, &dst).await.unwrap();
640+
let mut files = flatten_list_stream(storage, None).await.unwrap();
641+
files.sort_unstable();
642+
assert_eq!(files, vec![emoji_file.clone(), dst.clone()]);
643+
638644
storage.delete(&emoji_file).await.unwrap();
645+
storage.delete(&dst).await.unwrap();
639646
let files = flatten_list_stream(storage, Some(&emoji_prefix))
640647
.await
641648
.unwrap();

0 commit comments

Comments
 (0)