-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-51717][SS][RocksDB] Fix SST mismatch corruption that can happen for second snapshot created for a new query #50512
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -885,6 +885,10 @@ class RocksDB( | |
|
||
val (dfsFileSuffix, immutableFileMapping) = rocksDBFileMapping.createSnapshotFileMapping( | ||
fileManager, checkpointDir, version) | ||
logInfo(log"RocksDB file mapping after creating snapshot file mapping for version " + | ||
log"${MDC(LogKeys.VERSION_NUM, version)}:\n" + | ||
log"${MDC(LogKeys.ROCKS_DB_FILE_MAPPING, rocksDBFileMapping)}") | ||
|
||
val newSnapshot = Some(RocksDBSnapshot( | ||
checkpointDir, | ||
version, | ||
|
@@ -1344,6 +1348,16 @@ class RocksDBFileMapping { | |
// from reusing SST files which have not been yet persisted to DFS, | ||
val snapshotsPendingUpload: Set[RocksDBVersionSnapshotInfo] = ConcurrentHashMap.newKeySet() | ||
|
||
/** | ||
* Clear everything stored in the file mapping. | ||
*/ | ||
def clear(): Unit = { | ||
localFileMappings.clear() | ||
snapshotsPendingUpload.clear() | ||
} | ||
|
||
override def toString: String = localFileMappings.toString() | ||
|
||
/** | ||
* Get the mapped DFS file for the given local file for a DFS load operation. | ||
* If the currently mapped DFS file was mapped in the same or newer version as the version we | ||
|
@@ -1360,14 +1374,21 @@ class RocksDBFileMapping { | |
fileManager: RocksDBFileManager, | ||
localFileName: String, | ||
versionToLoad: Long): Option[RocksDBImmutableFile] = { | ||
getDfsFileWithVersionCheck(fileManager, localFileName, _ >= versionToLoad) | ||
getDfsFileWithIncompatibilityCheck( | ||
fileManager, | ||
localFileName, | ||
// We can't reuse the current local file since it was added in the same or newer version | ||
// as the version we want to load | ||
(fileVersion, _) => fileVersion >= versionToLoad | ||
) | ||
} | ||
|
||
/** | ||
* Get the mapped DFS file for the given local file for a DFS save (i.e. checkpoint) operation. | ||
* If the currently mapped DFS file was mapped in the same or newer version as the version we | ||
* want to save (or was generated in a version which has not been uploaded to DFS yet), | ||
* the mapped DFS file is ignored. In this scenario, the local mapping to this DFS file | ||
* want to save (or was generated in a version which has not been uploaded to DFS yet) | ||
* or the mapped dfs file isn't the same size as the local file, | ||
* then the mapped DFS file is ignored. In this scenario, the local mapping to this DFS file | ||
* will be cleared, and function will return None. | ||
* | ||
* @note If the file was added in current version (i.e. versionToSave - 1), we can reuse it. | ||
|
@@ -1378,19 +1399,26 @@ class RocksDBFileMapping { | |
*/ | ||
private def getDfsFileForSave( | ||
fileManager: RocksDBFileManager, | ||
localFileName: String, | ||
localFile: File, | ||
micheal-o marked this conversation as resolved.
Show resolved
Hide resolved
|
||
versionToSave: Long): Option[RocksDBImmutableFile] = { | ||
getDfsFileWithVersionCheck(fileManager, localFileName, _ >= versionToSave) | ||
getDfsFileWithIncompatibilityCheck( | ||
fileManager, | ||
localFile.getName, | ||
(dfsFileVersion, dfsFile) => | ||
// The DFS file is not the same as the file we want to save, either if | ||
// the DFS file was added in the same or higher version, or the file size is different | ||
dfsFileVersion >= versionToSave || dfsFile.sizeBytes != localFile.length() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make clear: would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Already contains the info in dfsFile. See |
||
) | ||
} | ||
|
||
private def getDfsFileWithVersionCheck( | ||
private def getDfsFileWithIncompatibilityCheck( | ||
fileManager: RocksDBFileManager, | ||
localFileName: String, | ||
isIncompatibleVersion: Long => Boolean): Option[RocksDBImmutableFile] = { | ||
isIncompatible: (Long, RocksDBImmutableFile) => Boolean): Option[RocksDBImmutableFile] = { | ||
localFileMappings.get(localFileName).map { case (dfsFileMappedVersion, dfsFile) => | ||
val dfsFileSuffix = fileManager.dfsFileSuffix(dfsFile) | ||
val versionSnapshotInfo = RocksDBVersionSnapshotInfo(dfsFileMappedVersion, dfsFileSuffix) | ||
if (isIncompatibleVersion(dfsFileMappedVersion) || | ||
if (isIncompatible(dfsFileMappedVersion, dfsFile) || | ||
snapshotsPendingUpload.contains(versionSnapshotInfo)) { | ||
// the mapped dfs file cannot be used, delete from mapping | ||
remove(localFileName) | ||
|
@@ -1432,7 +1460,7 @@ class RocksDBFileMapping { | |
val dfsFilesSuffix = UUID.randomUUID().toString | ||
val snapshotFileMapping = localImmutableFiles.map { f => | ||
val localFileName = f.getName | ||
val existingDfsFile = getDfsFileForSave(fileManager, localFileName, version) | ||
val existingDfsFile = getDfsFileForSave(fileManager, f, version) | ||
val dfsFile = existingDfsFile.getOrElse { | ||
val newDfsFileName = fileManager.newDFSFileName(localFileName, dfsFilesSuffix) | ||
val newDfsFile = RocksDBImmutableFile(localFileName, newDfsFileName, sizeBytes = f.length()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -302,6 +302,8 @@ class RocksDBFileManager( | |
val metadata = if (version == 0) { | ||
if (localDir.exists) Utils.deleteRecursively(localDir) | ||
localDir.mkdirs() | ||
// Since we cleared the local dir, we should also clear the local file mapping | ||
rocksDBFileMapping.clear() | ||
RocksDBCheckpointMetadata(Seq.empty, 0) | ||
} else { | ||
// Delete all non-immutable files in local dir, and unzip new ones from DFS commit file | ||
|
@@ -320,6 +322,10 @@ class RocksDBFileManager( | |
} | ||
logFilesInDir(localDir, log"Loaded checkpoint files " + | ||
log"for version ${MDC(LogKeys.VERSION_NUM, version)}") | ||
logInfo(log"RocksDB file mapping after loading checkpoint version " + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto about the length of log message There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as my other comment above. For this one, we would only log it on |
||
log"${MDC(LogKeys.VERSION_NUM, version)} from DFS:\n" + | ||
log"${MDC(LogKeys.ROCKS_DB_FILE_MAPPING, rocksDBFileMapping)}") | ||
|
||
metadata | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@micheal-o
How much size of this message in general? We had a PR which reduced the log for RocksDB state store provider, and I wouldn't like to re-introduce the issue. If this is considerably huge, we'd need to make this be debug level.
cc. @anishshri-db
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@HeartSaVioR this is logged only when we create a snapshot (e.g. 1 per 10 batches), hence i don't expect it to be noisy or a lot. The content is just list of local SST files and what DFS files they are mapped to. From past experience this is also not typically a lot. Maybe, on average, should be less than 10 - 20 SST files.
This would really make debugging this type of issue easier and faster.