Skip to content

Commit c60ce14

Browse files
beneschtustvold
andauthored
object-store: remove S3ConditionalPut::ETagPutIfNotExists (#6802)
* Support real S3's If-Match semantics As of today [0] S3 now supports the If-Match for in-place conditional writes. This commit adjusts the existing support for S3ConditionalPut::Etag mode for compatibility with real S3's particular semantics, which vary slightly from MinIO and R2. Specifically: * Real S3 can occasionally return 409 Conflict when concurrent If-Match requests are in progress. These requests need to be retried. * Real S3 returns 404 Not Found instead of 412 Precondition Failed when issuing an If-Match request against an object that does not exist. Fix #6799. [0]: https://aws.amazon.com/about-aws/whats-new/2024/11/amazon-s3-functionality-conditional-writes/ * object-store: remove S3ConditionalPut::ETagPutIfNotExists Now that real S3 supports `If-Match`, we no longer need this special conditional put mode for real S3. * [XXX put in real release version] Upgrade localstack * Update .github/workflows/object_store.yml --------- Co-authored-by: Raphael Taylor-Davies <[email protected]>
1 parent e2a2beb commit c60ce14

File tree

5 files changed

+48
-25
lines changed

5 files changed

+48
-25
lines changed

.github/workflows/object_store.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ jobs:
138138

139139
- name: Setup LocalStack (AWS emulation)
140140
run: |
141-
echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:3.8.1)" >> $GITHUB_ENV
141+
echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:4.0.3)" >> $GITHUB_ENV
142142
echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV
143143
aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket
144144
aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
@@ -164,7 +164,7 @@ jobs:
164164
- name: Run object_store tests (AWS native conditional put)
165165
run: cargo test --features=aws
166166
env:
167-
AWS_CONDITIONAL_PUT: etag-put-if-not-exists
167+
AWS_CONDITIONAL_PUT: etag
168168
AWS_COPY_IF_NOT_EXISTS: multipart
169169

170170
- name: GCS Output

object_store/src/aws/client.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ pub(crate) struct Request<'a> {
290290
payload: Option<PutPayload>,
291291
use_session_creds: bool,
292292
idempotent: bool,
293+
retry_on_conflict: bool,
293294
retry_error_body: bool,
294295
}
295296

@@ -317,6 +318,13 @@ impl<'a> Request<'a> {
317318
Self { idempotent, ..self }
318319
}
319320

321+
pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self {
322+
Self {
323+
retry_on_conflict,
324+
..self
325+
}
326+
}
327+
320328
pub(crate) fn retry_error_body(self, retry_error_body: bool) -> Self {
321329
Self {
322330
retry_error_body,
@@ -412,6 +420,7 @@ impl<'a> Request<'a> {
412420
self.builder
413421
.with_aws_sigv4(credential.authorizer(), sha)
414422
.retryable(&self.config.retry_config)
423+
.retry_on_conflict(self.retry_on_conflict)
415424
.idempotent(self.idempotent)
416425
.retry_error_body(self.retry_error_body)
417426
.payload(self.payload)
@@ -448,6 +457,7 @@ impl S3Client {
448457
config: &self.config,
449458
use_session_creds: true,
450459
idempotent: false,
460+
retry_on_conflict: false,
451461
retry_error_body: false,
452462
}
453463
}

object_store/src/aws/mod.rs

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,7 @@ impl ObjectStore for AmazonS3 {
170170
match (opts.mode, &self.client.config.conditional_put) {
171171
(PutMode::Overwrite, _) => request.idempotent(true).do_put().await,
172172
(PutMode::Create | PutMode::Update(_), None) => Err(Error::NotImplemented),
173-
(
174-
PutMode::Create,
175-
Some(S3ConditionalPut::ETagMatch | S3ConditionalPut::ETagPutIfNotExists),
176-
) => {
173+
(PutMode::Create, Some(S3ConditionalPut::ETagMatch)) => {
177174
match request.header(&IF_NONE_MATCH, "*").do_put().await {
178175
// Technically If-None-Match should return NotModified but some stores,
179176
// such as R2, instead return PreconditionFailed
@@ -197,9 +194,26 @@ impl ObjectStore for AmazonS3 {
197194
source: "ETag required for conditional put".to_string().into(),
198195
})?;
199196
match put {
200-
S3ConditionalPut::ETagPutIfNotExists => Err(Error::NotImplemented),
201197
S3ConditionalPut::ETagMatch => {
202-
request.header(&IF_MATCH, etag.as_str()).do_put().await
198+
match request
199+
.header(&IF_MATCH, etag.as_str())
200+
// Real S3 will occasionally report 409 Conflict
201+
// if there are concurrent `If-Match` requests
202+
// in flight, so we need to be prepared to retry
203+
// 409 responses.
204+
.retry_on_conflict(true)
205+
.do_put()
206+
.await
207+
{
208+
// Real S3 reports NotFound rather than PreconditionFailed when the
209+
// object doesn't exist. Convert to PreconditionFailed for
210+
// consistency with R2. This also matches what the HTTP spec
211+
// says the behavior should be.
212+
Err(Error::NotFound { path, source }) => {
213+
Err(Error::Precondition { path, source })
214+
}
215+
r => r,
216+
}
203217
}
204218
S3ConditionalPut::Dynamo(d) => {
205219
d.conditional_op(&self.client, location, Some(&etag), move || {
@@ -487,6 +501,7 @@ mod tests {
487501
let integration = config.build().unwrap();
488502
let config = &integration.client.config;
489503
let test_not_exists = config.copy_if_not_exists.is_some();
504+
let test_conditional_put = config.conditional_put.is_some();
490505

491506
put_get_delete_list(&integration).await;
492507
get_opts(&integration).await;
@@ -517,9 +532,8 @@ mod tests {
517532
if test_not_exists {
518533
copy_if_not_exists(&integration).await;
519534
}
520-
if let Some(conditional_put) = &config.conditional_put {
521-
let supports_update = !matches!(conditional_put, S3ConditionalPut::ETagPutIfNotExists);
522-
put_opts(&integration, supports_update).await;
535+
if test_conditional_put {
536+
put_opts(&integration, true).await;
523537
}
524538

525539
// run integration test with unsigned payload enabled

object_store/src/aws/precondition.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,6 @@ pub enum S3ConditionalPut {
138138
/// [HTTP precondition]: https://datatracker.ietf.org/doc/html/rfc9110#name-preconditions
139139
ETagMatch,
140140

141-
/// Like `ETagMatch`, but with support for `PutMode::Create` and not
142-
/// `PutMode::Option`.
143-
///
144-
/// This is the limited form of conditional put supported by Amazon S3
145-
/// as of August 2024 ([announcement]).
146-
///
147-
/// Encoded as `etag-put-if-not-exists` ignoring whitespace.
148-
///
149-
/// [announcement]: https://aws.amazon.com/about-aws/whats-new/2024/08/amazon-s3-conditional-writes/
150-
ETagPutIfNotExists,
151-
152141
/// The name of a DynamoDB table to use for coordination
153142
///
154143
/// Encoded as either `dynamo:<TABLE_NAME>` or `dynamo:<TABLE_NAME>:<TIMEOUT_MILLIS>`
@@ -164,7 +153,6 @@ impl std::fmt::Display for S3ConditionalPut {
164153
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165154
match self {
166155
Self::ETagMatch => write!(f, "etag"),
167-
Self::ETagPutIfNotExists => write!(f, "etag-put-if-not-exists"),
168156
Self::Dynamo(lock) => write!(f, "dynamo: {}", lock.table_name()),
169157
}
170158
}
@@ -174,7 +162,6 @@ impl S3ConditionalPut {
174162
fn from_str(s: &str) -> Option<Self> {
175163
match s.trim() {
176164
"etag" => Some(Self::ETagMatch),
177-
"etag-put-if-not-exists" => Some(Self::ETagPutIfNotExists),
178165
trimmed => match trimmed.split_once(':')? {
179166
("dynamo", s) => Some(Self::Dynamo(DynamoCommit::from_str(s)?)),
180167
_ => None,

object_store/src/client/retry.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ pub(crate) struct RetryableRequest {
200200

201201
sensitive: bool,
202202
idempotent: Option<bool>,
203+
retry_on_conflict: bool,
203204
payload: Option<PutPayload>,
204205

205206
retry_error_body: bool,
@@ -217,6 +218,15 @@ impl RetryableRequest {
217218
}
218219
}
219220

221+
/// Set whether this request should be retried on a 409 Conflict response.
222+
#[cfg(feature = "aws")]
223+
pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self {
224+
Self {
225+
retry_on_conflict,
226+
..self
227+
}
228+
}
229+
220230
/// Set whether this request contains sensitive data
221231
///
222232
/// This will avoid printing out the URL in error messages
@@ -340,7 +350,8 @@ impl RetryableRequest {
340350
let status = r.status();
341351
if retries == max_retries
342352
|| now.elapsed() > retry_timeout
343-
|| !status.is_server_error()
353+
|| !(status.is_server_error()
354+
|| (self.retry_on_conflict && status == StatusCode::CONFLICT))
344355
{
345356
return Err(match status.is_client_error() {
346357
true => match r.text().await {
@@ -467,6 +478,7 @@ impl RetryExt for reqwest::RequestBuilder {
467478
idempotent: None,
468479
payload: None,
469480
sensitive: false,
481+
retry_on_conflict: false,
470482
retry_error_body: false,
471483
}
472484
}

0 commit comments

Comments
 (0)