diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ccf4cf98..ea6e24800 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +**Features**: + +- Added debug IDs to source bundle JavaScript files and source maps. ([#762](https://github.com/getsentry/symbolic/pull/762)) + **Breaking changes**: - Change `DebugSession::source_by_path()` to return `SourceCode` enum with either file content or a URL to fetch it from. ([#758](https://github.com/getsentry/symbolic/pull/758)) diff --git a/symbolic-debuginfo/Cargo.toml b/symbolic-debuginfo/Cargo.toml index e06f27a60..88da29d34 100644 --- a/symbolic-debuginfo/Cargo.toml +++ b/symbolic-debuginfo/Cargo.toml @@ -70,6 +70,7 @@ sourcebundle = [ "regex", "serde_json", "zip", + "debugid/serde" ] # WASM processing wasm = ["bitvec", "dwarf", "wasmparser"] @@ -77,6 +78,7 @@ wasm = ["bitvec", "dwarf", "wasmparser"] [dependencies] bitvec = { version = "1.0.0", optional = true, features = ["alloc"] } dmsort = "1.0.1" +debugid = { version = "0.8.0" } elementtree = { version = "1.2.2", optional = true } elsa = { version = "1.4.0", optional = true } fallible-iterator = "0.2.0" diff --git a/symbolic-debuginfo/src/base.rs b/symbolic-debuginfo/src/base.rs index a21a04bff..f49c02ee3 100644 --- a/symbolic-debuginfo/src/base.rs +++ b/symbolic-debuginfo/src/base.rs @@ -672,7 +672,7 @@ pub type DynIterator<'a, T> = Box + 'a>; /// Represents a source file referenced by a debug information object file. #[non_exhaustive] -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum SourceCode<'a> { /// Verbatim source code/file contents. Content(Cow<'a, str>), @@ -681,6 +681,12 @@ pub enum SourceCode<'a> { Url(Cow<'a, str>), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SourceDescriptor<'source> { + pub source_code: SourceCode<'source>, + pub debug_id: Option, +} + /// A stateful session for interfacing with debug information. /// /// Debug sessions can be obtained via [`ObjectLike::debug_session`]. Since computing a session may @@ -729,7 +735,7 @@ pub trait DebugSession<'session> { /// Looks up a file's source by its full canonicalized path. /// Returns either source contents, if it was embedded, or a source link. - fn source_by_path(&self, path: &str) -> Result>, Self::Error>; + fn source_by_path(&self, path: &str) -> Result>, Self::Error>; } /// An object containing debug information. diff --git a/symbolic-debuginfo/src/breakpad.rs b/symbolic-debuginfo/src/breakpad.rs index 0e6ccb2ab..416c31453 100644 --- a/symbolic-debuginfo/src/breakpad.rs +++ b/symbolic-debuginfo/src/breakpad.rs @@ -1277,7 +1277,10 @@ impl<'data> BreakpadDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, BreakpadError> { + pub fn source_by_path( + &self, + _path: &str, + ) -> Result>, BreakpadError> { Ok(None) } } @@ -1295,7 +1298,7 @@ impl<'data, 'session> DebugSession<'session> for BreakpadDebugSession<'data> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/dwarf.rs b/symbolic-debuginfo/src/dwarf.rs index 17ab0e165..7fe1d04c6 100644 --- a/symbolic-debuginfo/src/dwarf.rs +++ b/symbolic-debuginfo/src/dwarf.rs @@ -1332,7 +1332,7 @@ impl<'data> DwarfDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, DwarfError> { + pub fn source_by_path(&self, _path: &str) -> Result>, DwarfError> { Ok(None) } } @@ -1350,7 +1350,7 @@ impl<'data, 'session> DebugSession<'session> for DwarfDebugSession<'data> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/object.rs b/symbolic-debuginfo/src/object.rs index c9419248d..d87b26e48 100644 --- a/symbolic-debuginfo/src/object.rs +++ b/symbolic-debuginfo/src/object.rs @@ -484,7 +484,7 @@ impl<'d> ObjectDebugSession<'d> { /// Looks up a file's source by its full canonicalized path. /// Returns either source contents, if it was embedded, or a source link. - pub fn source_by_path(&self, path: &str) -> Result>, ObjectError> { + pub fn source_by_path(&self, path: &str) -> Result>, ObjectError> { match *self { ObjectDebugSession::Breakpad(ref s) => { s.source_by_path(path).map_err(ObjectError::transparent) @@ -518,7 +518,7 @@ impl<'session> DebugSession<'session> for ObjectDebugSession<'_> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/pdb.rs b/symbolic-debuginfo/src/pdb.rs index bc906915e..3e538b6c6 100644 --- a/symbolic-debuginfo/src/pdb.rs +++ b/symbolic-debuginfo/src/pdb.rs @@ -638,7 +638,7 @@ impl<'d> PdbDebugSession<'d> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, PdbError> { + pub fn source_by_path(&self, _path: &str) -> Result>, PdbError> { Ok(None) } } @@ -656,7 +656,7 @@ impl<'session> DebugSession<'session> for PdbDebugSession<'_> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/ppdb.rs b/symbolic-debuginfo/src/ppdb.rs index e7b9362a0..894a63acb 100644 --- a/symbolic-debuginfo/src/ppdb.rs +++ b/symbolic-debuginfo/src/ppdb.rs @@ -191,9 +191,9 @@ impl<'data> PortablePdbDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, path: &str) -> Result>, FormatError> { + pub fn source_by_path(&self, path: &str) -> Result>, FormatError> { let sources = self.sources.borrow_with(|| self.init_sources()); - match sources.get(path) { + let source = match sources.get(path) { None => Ok(None), Some(PPDBSource::Embedded(source)) => source .get_contents() @@ -201,7 +201,13 @@ impl<'data> PortablePdbDebugSession<'data> { Some(PPDBSource::Link(document)) => { Ok(self.ppdb.get_source_link(document).map(SourceCode::Url)) } - } + }?; + + Ok(source.map(|source_code| SourceDescriptor { + source_code, + // TODO: Should we use the pdb id for this? + debug_id: None, + })) } } @@ -218,7 +224,7 @@ impl<'data, 'session> DebugSession<'session> for PortablePdbDebugSession<'data> self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 9291f7f97..a6334ffd1 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -33,6 +33,14 @@ //! [`code_id`]: struct.SourceBundle.html#method.code_id //! [`SourceBundle::debug_session`]: struct.SourceBundle.html#method.debug_session //! [`SourceBundleWriter`]: struct.SourceBundleWriter.html +//! +//! ## Artifact Bundles +//! +//! Source bundles share the format with a related concept, called an "artifact bundle". Artifact +//! bundles are essentially source bundles but they typically contain sources referred to by +//! JavaScript source maps and source maps themselves. For instance in an artifact +//! bundle a file entry has a `url` and might carry `headers` or individual debug IDs +//! per source file. use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap}; @@ -46,7 +54,7 @@ use std::sync::Arc; use lazycell::LazyCell; use parking_lot::Mutex; use regex::Regex; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; use zip::{write::FileOptions, ZipWriter}; @@ -145,7 +153,7 @@ where } /// The type of a [`SourceFileInfo`](struct.SourceFileInfo.html). -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[serde(rename_all = "snake_case")] pub enum SourceFileType { /// Regular source file. @@ -161,6 +169,41 @@ pub enum SourceFileType { IndexedRamBundle, } +#[derive(Debug, Clone, PartialEq, Eq)] +struct SourceDescriptorInternal { + zip_path: String, + debug_id: Option, +} + +impl SourceDescriptorInternal { + fn new(zip_path: String, file_info: &SourceFileInfo) -> Self { + Self { + zip_path, + debug_id: file_info.debug_id(), + } + } + + fn load( + &self, + archive: Arc>>>, + ) -> Result, SourceBundleError> { + let mut source_content = String::new(); + + let mut archive = archive.lock(); + let mut file = archive + .by_name(&self.zip_path) + .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?; + + file.read_to_string(&mut source_content) + .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?; + + Ok(SourceDescriptor { + source_code: SourceCode::Content(Cow::Owned(source_content)), + debug_id: self.debug_id, + }) + } +} + /// Meta data information of a file in a [`SourceBundle`](struct.SourceBundle.html). #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct SourceFileInfo { @@ -173,10 +216,34 @@ pub struct SourceFileInfo { #[serde(default, skip_serializing_if = "String::is_empty")] url: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + #[serde( + default, + skip_serializing_if = "BTreeMap::is_empty", + deserialize_with = "deserialize_headers" + )] headers: BTreeMap, } +/// Helper to ensure that header keys are normalized to lowercase +fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let rv: BTreeMap = Deserialize::deserialize(deserializer)?; + if rv.is_empty() + || rv + .keys() + .all(|x| !x.chars().any(|c| c.is_ascii_uppercase())) + { + Ok(rv) + } else { + Ok(rv + .into_iter() + .map(|(k, v)| (k.to_ascii_lowercase(), v)) + .collect()) + } +} + impl SourceFileInfo { /// Creates default file information. pub fn new() -> Self { @@ -226,14 +293,58 @@ impl SourceFileInfo { /// Retrieves the specified header, if it exists. pub fn header(&self, header: &str) -> Option<&str> { - self.headers.get(header).map(String::as_str) + if header.chars().any(|x| x.is_ascii_uppercase()) { + self.headers.get(header).map(String::as_str) + } else { + self.headers.iter().find_map(|(k, v)| { + if k.eq_ignore_ascii_case(header) { + Some(v.as_str()) + } else { + None + } + }) + } } /// Adds a custom attribute following header conventions. + /// + /// Header keys are converted to lowercase before writing as this is + /// the canonical format for headers. However, the file format does + /// support headers to be case insensitive and they will be lower cased + /// upon reading. + /// + /// Headers on files are primarily be used to add auxiliary information + /// to files. The following headers are known and processed: + /// + /// - `debug-id`: see [`debug_id`](Self::debug_id) + /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url) pub fn add_header(&mut self, header: String, value: String) { + let mut header = header; + if !header.chars().any(|x| x.is_ascii_uppercase()) { + header = header.to_ascii_lowercase(); + } self.headers.insert(header, value); } + /// The debug ID of this minified source or sourcemap if it has any. + /// + /// Files have a debug ID if they have a header with the key `debug-id`. + /// At present debug IDs in source bundles are only ever given to minified + /// source files. + pub fn debug_id(&self) -> Option { + self.header("debug-id").and_then(|x| x.parse().ok()) + } + + /// The source mapping URL of the given minified source. + /// + /// Files have a source mapping URL if they have a header with the + /// key `sourcemap` (or the `x-sourcemap` legacy header) as part the + /// source map specification. + pub fn source_mapping_url(&self) -> Option<&str> { + self.header("sourcemap") + .or_else(|| self.header("x-sourcemap")) + } + /// Returns `true` if this instance does not carry any information. pub fn is_empty(&self) -> bool { self.path.is_empty() && self.ty.is_none() && self.headers.is_empty() @@ -481,6 +592,7 @@ impl<'data> SourceBundle<'data> { manifest: self.manifest.clone(), archive: self.archive.clone(), files_by_path: LazyCell::new(), + files_by_debug_id: LazyCell::new(), }) } @@ -599,7 +711,8 @@ pub type SourceBundleSymbolIterator<'data> = std::iter::Empty>; pub struct SourceBundleDebugSession<'data> { manifest: Arc, archive: Arc>>>, - files_by_path: LazyCell>, + files_by_path: LazyCell>, + files_by_debug_id: LazyCell>, } impl<'data> SourceBundleDebugSession<'data> { @@ -615,50 +728,77 @@ impl<'data> SourceBundleDebugSession<'data> { std::iter::empty() } - /// Create a reverse mapping of source paths to ZIP paths. - fn get_files_by_path(&self) -> HashMap { - let files = &self.manifest.files; - let mut files_by_path = HashMap::with_capacity(files.len()); + /// Get a reverse mapping of source paths to ZIP paths. + fn files_by_path(&self) -> &HashMap { + self.files_by_path.borrow_with(|| { + let files = &self.manifest.files; + let mut files_by_path = HashMap::with_capacity(files.len()); - for (zip_path, file_info) in files { - if !file_info.path.is_empty() { - files_by_path.insert(file_info.path.clone(), zip_path.clone()); + for (zip_path, file_info) in files { + if !file_info.path.is_empty() { + let sd = SourceDescriptorInternal::new(zip_path.clone(), file_info); + files_by_path.insert(file_info.path.clone(), sd); + } } - } - files_by_path + files_by_path + }) } - /// Get the path of a file in this bundle by its logical path. - fn zip_path_by_source_path(&self, path: &str) -> Option<&str> { - self.files_by_path - .borrow_with(|| self.get_files_by_path()) - .get(path) - .map(|zip_path| zip_path.as_str()) - } + /// Get a reverse mapping of debug ID to ZIP paths. + fn files_by_debug_id(&self) -> &HashMap<(DebugId, SourceFileType), SourceDescriptorInternal> { + self.files_by_debug_id.borrow_with(|| { + let files = &self.manifest.files; + let mut files_by_debug_id = HashMap::new(); - /// Get source by the path of a file in the bundle. - fn source_by_zip_path(&self, zip_path: &str) -> Result, SourceBundleError> { - let mut archive = self.archive.lock(); - let mut file = archive - .by_name(zip_path) - .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?; - let mut source_content = String::new(); + for (zip_path, file_info) in files { + if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) { + let sd = SourceDescriptorInternal::new(zip_path.clone(), file_info); + files_by_debug_id.insert((debug_id, ty), sd); + } + } - file.read_to_string(&mut source_content) - .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?; - Ok(Some(source_content)) + files_by_debug_id + }) } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, path: &str) -> Result>, SourceBundleError> { - let zip_path = match self.zip_path_by_source_path(path) { + pub fn source_by_path( + &self, + path: &str, + ) -> Result>, SourceBundleError> { + let sd_internal = match self.files_by_path().get(path) { + Some(sd) => sd, + None => return Ok(None), + }; + + sd_internal.load(Arc::clone(&self.archive)).map(Some) + } + + /// Looks up some source by debug ID and file type. + /// + /// Lookups by [`DebugId`] require knowledge of the file that is supposed to be + /// looked up as multiple files (one per type) can share the same debug ID. + /// Special care needs to be taken about [`SourceFileType::IndexedRamBundle`] + /// and [`SourceFileType::SourceMap`] which are different file types despite + /// the name of it. + /// + /// # Note on Abstractions + /// + /// This method is currently not exposed via a standardized debug session + /// as it's primarily used for the JavaScript processing system which uses + /// different abstractions. + pub fn source_by_debug_id( + &self, + debug_id: DebugId, + ty: SourceFileType, + ) -> Result>, SourceBundleError> { + let zip_path = match self.files_by_debug_id().get(&(debug_id, ty)) { Some(zip_path) => zip_path, None => return Ok(None), }; - let content = self.source_by_zip_path(zip_path)?; - Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt)))) + zip_path.load(Arc::clone(&self.archive)).map(Some) } } @@ -675,7 +815,7 @@ impl<'data, 'session> DebugSession<'session> for SourceBundleDebugSession<'data> self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } @@ -1106,6 +1246,49 @@ mod tests { Ok(()) } + #[test] + fn test_debug_id() -> Result<(), SourceBundleError> { + let mut writer = Cursor::new(Vec::new()); + let mut bundle = SourceBundleWriter::start(&mut writer)?; + + let mut info = SourceFileInfo::default(); + info.set_ty(SourceFileType::MinifiedSource); + info.add_header( + "debug-id".into(), + "5e618b9f-54a9-4389-b196-519819dd7c47".into(), + ); + info.add_header("sourcemap".into(), "bar.js.min".into()); + bundle.add_file("bar.js", &b"filecontents"[..], info)?; + assert!(bundle.has_file("bar.js")); + + bundle.finish()?; + let bundle_bytes = writer.into_inner(); + let bundle = SourceBundle::parse(&bundle_bytes)?; + + let sess = bundle.debug_session().unwrap(); + let f = sess + .source_by_debug_id( + "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(), + SourceFileType::MinifiedSource, + ) + .unwrap() + .expect("should exist"); + assert_eq!( + f.source_code, + SourceCode::Content(Cow::Borrowed("filecontents")) + ); + + assert!(sess + .source_by_debug_id( + "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(), + SourceFileType::Source + ) + .unwrap() + .is_none()); + + Ok(()) + } + #[test] fn test_il2cpp_reference() -> Result<(), Box> { let mut cpp_file = NamedTempFile::new()?; @@ -1151,7 +1334,7 @@ mod tests { .flat_map(|f| { let path = f.abs_path_str(); session.source_by_path(&path).ok().flatten().map(|source| { - let SourceCode::Content(text) = source else { + let SourceCode::Content(text) = source.source_code else { unreachable!(); }; (path, text.into_owned()) diff --git a/symbolic-debuginfo/tests/test_objects.rs b/symbolic-debuginfo/tests/test_objects.rs index 2346013af..a37c99c28 100644 --- a/symbolic-debuginfo/tests/test_objects.rs +++ b/symbolic-debuginfo/tests/test_objects.rs @@ -776,7 +776,7 @@ fn test_ppdb_source_by_path() -> Result<(), Error> { "C:\\dev\\sentry-dotnet\\samples\\Sentry.Samples.Console.Basic\\Program.cs", ) .unwrap(); - match source.unwrap() { + match source.unwrap().source_code { SourceCode::Content(text) => assert_eq!(text.len(), 204), _ => panic!(), } @@ -803,7 +803,12 @@ fn test_ppdb_source_links() -> Result<(), Error> { for file in session.files() { let file = file.unwrap(); - match session.source_by_path(&file.path_str()).unwrap().unwrap() { + match session + .source_by_path(&file.path_str()) + .unwrap() + .unwrap() + .source_code + { SourceCode::Content(text) => { assert!(known_embedded_sources.contains(&file.name_str().as_ref())); assert!(!text.is_empty());