KAFKA-12754: Improve endOffsets for TaskMetadata (#10634)

wcarlson5 · web-flow · commit f2785f3c4f83 · 2021-05-14T12:17:31.000-07:00
Improve endOffsets for TaskMetadata by updating immediately after polling a new batch

Reviewers: Anna Sophie Blee-Goldman &lt;ableegoldman@apache.org&gt;
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/TaskMetadata.java b/streams/src/main/java/org/apache/kafka/streams/processor/TaskMetadata.java
@@ -60,14 +60,23 @@ public Set<TopicPartition> topicPartitions() {
         return topicPartitions;
     }
 
+    /**
+     * This function will return a map of TopicPartitions and the highest committed offset seen so far
+     */
     public Map<TopicPartition, Long> committedOffsets() {
         return committedOffsets;
     }
 
+    /**
+     * This function will return a map of TopicPartitions and the highest offset seen so far in the Topic
+     */
     public Map<TopicPartition, Long> endOffsets() {
         return endOffsets;
     }
 
+    /**
+     * This function will return the time task idling started, if the task is not currently idling it will return empty
+     */
     public Optional<Long> timeCurrentIdlingStarted() {
         return timeCurrentIdlingStarted;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java
@@ -297,11 +297,6 @@ public Optional<Long> timeCurrentIdlingStarted() {
         return Optional.empty();
     }
 
-    @Override
-    public void updateCommittedOffsets(final TopicPartition topicPartition, final Long offset) {
-
-    }
-
     @Override
     public void addRecords(final TopicPartition partition, final Iterable<ConsumerRecord<byte[], byte[]>> records) {
         throw new IllegalStateException("Attempted to add records to task " + id() + " for invalid input partition " + partition);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
@@ -196,9 +196,13 @@ public StreamTask(final TaskId id,
         );
 
         stateMgr.registerGlobalStateStores(topology.globalStateStores());
-        this.committedOffsets = new HashMap<>();
-        this.highWatermark = new HashMap<>();
-        this.timeCurrentIdlingStarted = Optional.empty();
+        committedOffsets = new HashMap<>();
+        highWatermark = new HashMap<>();
+        for (final TopicPartition topicPartition: inputPartitions) {
+            committedOffsets.put(topicPartition, -1L);
+            highWatermark.put(topicPartition, -1L);
+        }
+        timeCurrentIdlingStarted = Optional.empty();
     }
 
     // create queues for each assigned partition and associate them
@@ -1173,7 +1177,6 @@ public Map<TopicPartition, Long> committedOffsets() {
 
     @Override
     public Map<TopicPartition, Long> highWaterMark() {
-        highWatermark.putAll(recordCollector.offsets());
         return Collections.unmodifiableMap(highWatermark);
     }
 
@@ -1188,11 +1191,14 @@ public Optional<Long> timeCurrentIdlingStarted() {
         return timeCurrentIdlingStarted;
     }
 
-    @Override
     public void updateCommittedOffsets(final TopicPartition topicPartition, final Long offset) {
         committedOffsets.put(topicPartition, offset);
     }
 
+    public void updateEndOffsets(final TopicPartition topicPartition, final Long offset) {
+        highWatermark.put(topicPartition, offset);
+    }
+
     public boolean hasRecordsQueued() {
         return numBuffered() > 0;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.consumer.ConsumerRecords;
 import org.apache.kafka.clients.consumer.InvalidOffsetException;
 import org.apache.kafka.common.KafkaException;
@@ -52,6 +53,7 @@
 import java.time.Duration;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -900,6 +902,14 @@ private long pollPhase() {
 
         final int numRecords = records.count();
 
+        for (final TopicPartition topicPartition: records.partitions()) {
+            records
+                .records(topicPartition)
+                .stream()
+                .max(Comparator.comparing(ConsumerRecord::offset))
+                .ifPresent(t -> taskManager.updateTaskEndMetadata(topicPartition, t.offset()));
+        }
+
         log.debug("Main Consumer poll completed in {} ms and fetched {} records", pollLatency, numRecords);
 
         pollSensor.record(pollLatency, now);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java
@@ -248,11 +248,4 @@ default boolean commitRequested() {
      * @return This returns the time the task started idling. If it is not idling it returns empty.
      */
     Optional<Long> timeCurrentIdlingStarted();
-
-    /**
-     * Update the committed offsets in the Task
-     * @param topicPartition
-     * @param offset
-     */
-    void updateCommittedOffsets(final TopicPartition topicPartition, final Long offset);
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
@@ -1091,7 +1091,7 @@ private void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, Offs
                     try {
                         tasks.streamsProducerForTask(task.id())
                             .commitTransaction(taskToCommit.getValue(), mainConsumer.groupMetadata());
-                        updateTaskMetadata(taskToCommit.getValue());
+                        updateTaskCommitMetadata(taskToCommit.getValue());
                     } catch (final TimeoutException timeoutException) {
                         log.error(
                             String.format("Committing task %s failed.", task.id()),
@@ -1107,7 +1107,7 @@ private void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, Offs
                 if (processingMode == EXACTLY_ONCE_V2) {
                     try {
                         tasks.threadProducer().commitTransaction(allOffsets, mainConsumer.groupMetadata());
-                        updateTaskMetadata(allOffsets);
+                        updateTaskCommitMetadata(allOffsets);
                     } catch (final TimeoutException timeoutException) {
                         log.error(
                             String.format("Committing task(s) %s failed.",
@@ -1125,7 +1125,7 @@ private void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, Offs
                 } else {
                     try {
                         mainConsumer.commitSync(allOffsets);
-                        updateTaskMetadata(allOffsets);
+                        updateTaskCommitMetadata(allOffsets);
                     } catch (final CommitFailedException error) {
                         throw new TaskMigratedException("Consumer committing offsets failed, " +
                                                             "indicating the corresponding thread is no longer part of the group", error);
@@ -1152,11 +1152,23 @@ private void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, Offs
         }
     }
 
-    private void updateTaskMetadata(final Map<TopicPartition, OffsetAndMetadata> allOffsets) {
+    private void updateTaskCommitMetadata(final Map<TopicPartition, OffsetAndMetadata> allOffsets) {
         for (final Task task: tasks.activeTasks()) {
-            for (final TopicPartition topicPartition: task.inputPartitions()) {
-                if (allOffsets.containsKey(topicPartition)) {
-                    task.updateCommittedOffsets(topicPartition, allOffsets.get(topicPartition).offset());
+            if (task instanceof StreamTask) {
+                for (final TopicPartition topicPartition : task.inputPartitions()) {
+                    if (allOffsets.containsKey(topicPartition)) {
+                        ((StreamTask) task).updateCommittedOffsets(topicPartition, allOffsets.get(topicPartition).offset());
+                    }
+                }
+            }
+        }
+    }
+
+    public void updateTaskEndMetadata(final TopicPartition topicPartition, final Long offset) {
+        for (final Task task: tasks.activeTasks()) {
+            if (task instanceof StreamTask) {
+                if (task.inputPartitions().contains(topicPartition)) {
+                    ((StreamTask) task).updateEndOffsets(topicPartition, offset);
                 }
             }
         }
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.streams.integration;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.processor.AbstractProcessor;
+import org.apache.kafka.streams.processor.TaskMetadata;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.TestUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkObjectProperties;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.purgeLocalStreamsState;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+@Category(IntegrationTest.class)
+public class TaskMetadataIntegrationTest {
+
+    public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1, new Properties(), 0L, 0L);
+
+    @BeforeClass
+    public static void startCluster() throws IOException {
+        CLUSTER.start();
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+    public static final Duration DEFAULT_DURATION = Duration.ofSeconds(30);
+
+    @Rule
+    public TestName testName = new TestName();
+
+    private String inputTopic;
+    private static StreamsBuilder builder;
+    private static Properties properties;
+    private static String appId = "TaskMetadataTest_";
+    private AtomicBoolean process;
+    private AtomicBoolean commit;
+
+    @Before
+    public void setup() {
+        final String testId = safeUniqueTestName(getClass(), testName);
+        appId = appId + testId;
+        inputTopic = "input" + testId;
+        IntegrationTestUtils.cleanStateBeforeTest(CLUSTER, inputTopic);
+
+        builder  = new StreamsBuilder();
+
+        process = new AtomicBoolean(true);
+        commit = new AtomicBoolean(true);
+
+        final KStream<String, String> stream = builder.stream(inputTopic);
+        stream.process(PauseProcessor::new);
+
+        properties  = mkObjectProperties(
+                mkMap(
+                        mkEntry(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()),
+                        mkEntry(StreamsConfig.APPLICATION_ID_CONFIG, appId),
+                        mkEntry(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()),
+                        mkEntry(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 2),
+                        mkEntry(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.StringSerde.class),
+                        mkEntry(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.StringSerde.class),
+                        mkEntry(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1L)
+                )
+        );
+    }
+
+    @Test
+    public void shouldReportCorrectCommittedOffsetInformation() {
+        try (final KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties)) {
+            IntegrationTestUtils.startApplicationAndWaitUntilRunning(Collections.singletonList(kafkaStreams), DEFAULT_DURATION);
+            final TaskMetadata taskMetadata = getTaskMetadata(kafkaStreams);
+            assertThat(taskMetadata.committedOffsets().size(), equalTo(1));
+            final TopicPartition topicPartition = new TopicPartition(inputTopic, 0);
+
+            produceMessages(0L, inputTopic, "test");
+            TestUtils.waitForCondition(() -> !process.get(), "The record was not processed");
+            TestUtils.waitForCondition(() -> taskMetadata.committedOffsets().get(topicPartition) == 1L, "the record was processed");
+            process.set(true);
+
+            produceMessages(0L, inputTopic, "test1");
+            TestUtils.waitForCondition(() -> !process.get(), "The record was not processed");
+            TestUtils.waitForCondition(() -> taskMetadata.committedOffsets().get(topicPartition) == 2L, "the record was processed");
+            process.set(true);
+
+            produceMessages(0L, inputTopic, "test1");
+            TestUtils.waitForCondition(() -> !process.get(), "The record was not processed");
+            TestUtils.waitForCondition(() -> taskMetadata.committedOffsets().get(topicPartition) == 3L, "the record was processed");
+        } catch (final Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    @Test
+    public void shouldReportCorrectEndOffsetInformation() {
+        try (final KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties)) {
+            IntegrationTestUtils.startApplicationAndWaitUntilRunning(Collections.singletonList(kafkaStreams), DEFAULT_DURATION);
+            final TaskMetadata taskMetadata = getTaskMetadata(kafkaStreams);
+            assertThat(taskMetadata.endOffsets().size(), equalTo(1));
+            final TopicPartition topicPartition = new TopicPartition(inputTopic, 0);
+            commit.set(false);
+
+            for (int i = 0; i < 10; i++) {
+                produceMessages(0L, inputTopic, "test");
+                TestUtils.waitForCondition(() -> !process.get(), "The record was not processed");
+                process.set(true);
+            }
+            assertThat(taskMetadata.endOffsets().get(topicPartition), equalTo(9L));
+
+        } catch (final Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private TaskMetadata getTaskMetadata(final KafkaStreams kafkaStreams) {
+        final List<TaskMetadata> taskMetadataList = kafkaStreams.localThreadsMetadata().stream().flatMap(t -> t.activeTasks().stream()).collect(Collectors.toList());
+        assertThat("only one task", taskMetadataList.size() == 1);
+        return taskMetadataList.get(0);
+    }
+
+    @After
+    public void teardown() throws IOException {
+        purgeLocalStreamsState(properties);
+    }
+
+    private void produceMessages(final long timestamp, final String streamOneInput, final String msg) {
+        IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(
+                streamOneInput,
+                Collections.singletonList(new KeyValue<>("1", msg)),
+                TestUtils.producerConfig(
+                        CLUSTER.bootstrapServers(),
+                        StringSerializer.class,
+                        StringSerializer.class,
+                        new Properties()),
+                timestamp);
+    }
+
+    private class PauseProcessor extends AbstractProcessor<String, String> {
+        @Override
+        public void process(final String key, final String value) {
+            while (!process.get()) {
+                try {
+                    wait(100);
+                } catch (final InterruptedException e) {
+
+                }
+            }
+            context().forward(key, value);
+            if (commit.get()) {
+                context().commit();
+            }
+            process.set(false);
+        }
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java

Original file line number	Diff line number	Diff line change
`@@ -60,14 +60,23 @@ public Set<TopicPartition> topicPartitions() {`
`60`	`60`	`return topicPartitions;`
`61`	`61`	`}`
`62`	`62`
	`63`	`+ /**`
	`64`	`+ * This function will return a map of TopicPartitions and the highest committed offset seen so far`
	`65`	`+ */`
`63`	`66`	`public Map<TopicPartition, Long> committedOffsets() {`
`64`	`67`	`return committedOffsets;`
`65`	`68`	`}`
`66`	`69`
	`70`	`+ /**`
	`71`	`+ * This function will return a map of TopicPartitions and the highest offset seen so far in the Topic`
	`72`	`+ */`
`67`	`73`	`public Map<TopicPartition, Long> endOffsets() {`
`68`	`74`	`return endOffsets;`
`69`	`75`	`}`
`70`	`76`
	`77`	`+ /**`
	`78`	`+ * This function will return the time task idling started, if the task is not currently idling it will return empty`
	`79`	`+ */`
`71`	`80`	`public Optional<Long> timeCurrentIdlingStarted() {`
`72`	`81`	`return timeCurrentIdlingStarted;`
`73`	`82`	`}`