From 988ba56284cad064501fb1efd8488bec931a5249 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 13:34:04 +0100 Subject: [PATCH 01/14] feat: Added debug IDs to source bundle javascript files --- CHANGELOG.md | 4 + symbolic-debuginfo/Cargo.toml | 2 + symbolic-debuginfo/src/base.rs | 2 +- symbolic-debuginfo/src/sourcebundle.rs | 195 +++++++++++++++++++++++-- 4 files changed, 186 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c78548c4..bee2e56f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +**Features**: + +- Added debug IDs to source bundle JavaScript files and source maps. ([#762](https://github.com/getsentry/symbolic/pull/762)) + **Breaking changes**: - Change `DebugSession::source_by_path()` to return `SourceCode` enum with either file content or a URL to fetch it from. ([#758](https://github.com/getsentry/symbolic/pull/758)) diff --git a/symbolic-debuginfo/Cargo.toml b/symbolic-debuginfo/Cargo.toml index e06f27a60..88da29d34 100644 --- a/symbolic-debuginfo/Cargo.toml +++ b/symbolic-debuginfo/Cargo.toml @@ -70,6 +70,7 @@ sourcebundle = [ "regex", "serde_json", "zip", + "debugid/serde" ] # WASM processing wasm = ["bitvec", "dwarf", "wasmparser"] @@ -77,6 +78,7 @@ wasm = ["bitvec", "dwarf", "wasmparser"] [dependencies] bitvec = { version = "1.0.0", optional = true, features = ["alloc"] } dmsort = "1.0.1" +debugid = { version = "0.8.0" } elementtree = { version = "1.2.2", optional = true } elsa = { version = "1.4.0", optional = true } fallible-iterator = "0.2.0" diff --git a/symbolic-debuginfo/src/base.rs b/symbolic-debuginfo/src/base.rs index a21a04bff..9f061208d 100644 --- a/symbolic-debuginfo/src/base.rs +++ b/symbolic-debuginfo/src/base.rs @@ -672,7 +672,7 @@ pub type DynIterator<'a, T> = Box + 'a>; /// Represents a source file referenced by a debug information object file. #[non_exhaustive] -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum SourceCode<'a> { /// Verbatim source code/file contents. Content(Cow<'a, str>), diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 5bbeb87df..a204f9f71 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -33,6 +33,14 @@ //! [`code_id`]: struct.SourceBundle.html#method.code_id //! [`SourceBundle::debug_session`]: struct.SourceBundle.html#method.debug_session //! [`SourceBundleWriter`]: struct.SourceBundleWriter.html +//! +//! ## Artifact Bundles +//! +//! Source bundles share the format with a related concept, called an "artifact bundle". Artifact +//! bundles are essentially source bundles but they typically contain sources referred to by +//! JavaScript source maps and source maps themselves. For instance in an artifact +//! bundle a file entry has a `url` and might carry `headers` or individual debug IDs +//! per source file. use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap}; @@ -46,7 +54,7 @@ use std::sync::Arc; use lazycell::LazyCell; use parking_lot::Mutex; use regex::Regex; -use serde::{Deserialize, Serialize}; +use serde::{de, Deserialize, Serialize}; use thiserror::Error; use zip::{write::FileOptions, ZipWriter}; @@ -145,7 +153,7 @@ where } /// The type of a [`SourceFileInfo`](struct.SourceFileInfo.html). -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[serde(rename_all = "snake_case")] pub enum SourceFileType { /// Regular source file. @@ -173,10 +181,30 @@ pub struct SourceFileInfo { #[serde(default, skip_serializing_if = "String::is_empty")] url: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + #[serde( + default, + skip_serializing_if = "BTreeMap::is_empty", + deserialize_with = "deserialize_headers" + )] headers: BTreeMap, } +/// Helper to ensure that header keys are normalized to lowercase +fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> +where + D: de::Deserializer<'de>, +{ + let rv: BTreeMap = de::Deserialize::deserialize(deserializer)?; + if rv.is_empty() || rv.keys().all(|x| x.chars().all(|c| c.is_ascii_lowercase())) { + Ok(rv) + } else { + Ok(rv + .into_iter() + .map(|(k, v)| (k.to_ascii_lowercase(), v)) + .collect()) + } +} + impl SourceFileInfo { /// Creates default file information. pub fn new() -> Self { @@ -226,14 +254,58 @@ impl SourceFileInfo { /// Retrieves the specified header, if it exists. pub fn header(&self, header: &str) -> Option<&str> { - self.headers.get(header).map(String::as_str) + if header.chars().all(|x| x.is_ascii_lowercase()) { + self.headers.get(header).map(String::as_str) + } else { + self.headers.iter().find_map(|(k, v)| { + if k.eq_ignore_ascii_case(header) { + Some(v.as_str()) + } else { + None + } + }) + } } /// Adds a custom attribute following header conventions. + /// + /// Header keys are converted to lowercase before writing as this is + /// the canonical format for headers however the file format does + /// support headers to be case insensitive and they will be lower cased + /// upon reading. + /// + /// Headers on files are primarily be used to add auxiliary information + /// to files. The following headers are known and processed: + /// + /// - `debug-id`: see [`debug_id`](Self::debug_id) + /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url) pub fn add_header(&mut self, header: String, value: String) { + let mut header = header; + if !header.chars().all(|x| x.is_ascii_lowercase()) { + header = header.to_ascii_uppercase(); + } self.headers.insert(header, value); } + /// The debug ID of this minified source or sourcemap if it has any. + /// + /// Files have a debug ID if they have a header with the key `debug-id`. + /// At present debug IDs in source bundles are only ever given to minified + /// source files. + pub fn debug_id(&self) -> Option { + self.header("debug-id").and_then(|x| x.parse().ok()) + } + + /// The source mapping URL of the given minified source. + /// + /// Files have a source mapping URL if they have a header with the + /// key `sourcemap` (or the `x-sourcemap` legacy header) as part the + /// source map specification. + pub fn source_mapping_url(&self) -> Option<&str> { + self.header("sourcemap") + .or_else(|| self.header("x-sourcemap")) + } + /// Returns `true` if this instance does not carry any information. pub fn is_empty(&self) -> bool { self.path.is_empty() && self.ty.is_none() && self.headers.is_empty() @@ -309,7 +381,6 @@ struct SourceBundleManifest { pub files: BTreeMap, /// Arbitrary attributes to include in the bundle. - #[serde(flatten)] pub attributes: BTreeMap, } @@ -481,6 +552,7 @@ impl<'data> SourceBundle<'data> { manifest: self.manifest.clone(), archive: self.archive.clone(), files_by_path: LazyCell::new(), + files_by_debug_id: LazyCell::new(), }) } @@ -600,6 +672,7 @@ pub struct SourceBundleDebugSession<'data> { manifest: Arc, archive: Arc>>>, files_by_path: LazyCell>, + files_by_debug_id: LazyCell>, } impl<'data> SourceBundleDebugSession<'data> { @@ -615,28 +688,52 @@ impl<'data> SourceBundleDebugSession<'data> { std::iter::empty() } - /// Create a reverse mapping of source paths to ZIP paths. - fn get_files_by_path(&self) -> HashMap { - let files = &self.manifest.files; - let mut files_by_path = HashMap::with_capacity(files.len()); + /// Get a reverse mapping of source paths to ZIP paths. + fn files_by_path(&self) -> &HashMap { + self.files_by_path.borrow_with(|| { + let files = &self.manifest.files; + let mut files_by_path = HashMap::with_capacity(files.len()); - for (zip_path, file_info) in files { - if !file_info.path.is_empty() { - files_by_path.insert(file_info.path.clone(), zip_path.clone()); + for (zip_path, file_info) in files { + if !file_info.path.is_empty() { + files_by_path.insert(file_info.path.clone(), zip_path.clone()); + } } - } - files_by_path + files_by_path + }) + } + + /// Get a reverse mapping of debug ID to ZIP paths. + fn files_by_debug_id(&self) -> &HashMap<(DebugId, SourceFileType), String> { + self.files_by_debug_id.borrow_with(|| { + let files = &self.manifest.files; + let mut files_by_debug_id = HashMap::new(); + + for (zip_path, file_info) in files { + if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) { + files_by_debug_id.insert((debug_id, ty), zip_path.clone()); + } + } + + files_by_debug_id + }) } /// Get the path of a file in this bundle by its logical path. fn zip_path_by_source_path(&self, path: &str) -> Option<&str> { - self.files_by_path - .borrow_with(|| self.get_files_by_path()) + self.files_by_path() .get(path) .map(|zip_path| zip_path.as_str()) } + /// Get the path of a file in this bundle by its Debug ID and source file type. + fn zip_path_by_debug_id(&self, debug_id: DebugId, ty: SourceFileType) -> Option<&str> { + self.files_by_debug_id() + .get(&(debug_id, ty)) + .map(|zip_path| zip_path.as_str()) + } + /// Get source by the path of a file in the bundle. fn source_by_zip_path(&self, zip_path: &str) -> Result, SourceBundleError> { let mut archive = self.archive.lock(); @@ -660,6 +757,32 @@ impl<'data> SourceBundleDebugSession<'data> { let content = self.source_by_zip_path(zip_path)?; Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt)))) } + + /// Looks up some source by debug ID and file type. + /// + /// Lookups by [`DebugId`] require knowledge of the file that is supposed to be + /// looked up as multiple files (one per type) can share the same debug ID. + /// Special care needs to be taken about [`SourceFileType::IndexedRamBundle`] + /// and [`SourceFileType::SourceMap`] which are different file types despite + /// the name of it. + /// + /// # Note on Abstractions + /// + /// This method is currently not exposed via a standardized debug session + /// as it's primarily used for the JavaScript processing system which uses + /// different abstractions. + pub fn source_by_debug_id( + &self, + debug_id: DebugId, + ty: SourceFileType, + ) -> Result>, SourceBundleError> { + let zip_path = match self.zip_path_by_debug_id(debug_id, ty) { + Some(zip_path) => zip_path, + None => return Ok(None), + }; + let content = self.source_by_zip_path(zip_path)?; + Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt)))) + } } impl<'data, 'session> DebugSession<'session> for SourceBundleDebugSession<'data> { @@ -1114,6 +1237,46 @@ mod tests { Ok(()) } + #[test] + fn test_debug_id() -> Result<(), SourceBundleError> { + let mut writer = Cursor::new(Vec::new()); + let mut bundle = SourceBundleWriter::start(&mut writer)?; + + let mut info = SourceFileInfo::default(); + info.set_ty(SourceFileType::MinifiedSource); + info.add_header( + "debug-id".into(), + "5e618b9f-54a9-4389-b196-519819dd7c47".into(), + ); + info.add_header("sourcemap".into(), "bar.js.min".into()); + bundle.add_file("bar.js", &b"filecontents"[..], info)?; + assert!(bundle.has_file("bar.js")); + + bundle.finish()?; + let bundle_bytes = writer.into_inner(); + let bundle = SourceBundle::parse(&bundle_bytes)?; + + let sess = bundle.debug_session().unwrap(); + let f = sess + .source_by_debug_id( + "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(), + SourceFileType::MinifiedSource, + ) + .unwrap() + .expect("should exist"); + assert_eq!(f, SourceCode::Content(Cow::Borrowed("filecontents"))); + + assert!(sess + .source_by_debug_id( + "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(), + SourceFileType::Source + ) + .unwrap() + .is_none()); + + Ok(()) + } + #[test] fn test_il2cpp_reference() -> Result<(), Box> { let mut cpp_file = NamedTempFile::new()?; From 4da17241507a8601796c6c812d7e3f81f345b685 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 14:21:58 +0100 Subject: [PATCH 02/14] Update symbolic-debuginfo/src/sourcebundle.rs Co-authored-by: Sebastian Zivota --- symbolic-debuginfo/src/sourcebundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index a204f9f71..74336bd73 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -270,7 +270,7 @@ impl SourceFileInfo { /// Adds a custom attribute following header conventions. /// /// Header keys are converted to lowercase before writing as this is - /// the canonical format for headers however the file format does + /// the canonical format for headers. However, the file format does /// support headers to be case insensitive and they will be lower cased /// upon reading. /// From 2d7279a3f65c9dc9bee9199dc4bbc497e4f235eb Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 14:22:41 +0100 Subject: [PATCH 03/14] Fix header --- symbolic-debuginfo/src/sourcebundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 74336bd73..df9d50d0e 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -282,7 +282,7 @@ impl SourceFileInfo { pub fn add_header(&mut self, header: String, value: String) { let mut header = header; if !header.chars().all(|x| x.is_ascii_lowercase()) { - header = header.to_ascii_uppercase(); + header = header.to_ascii_lowercase(); } self.headers.insert(header, value); } From 10b090942f3166145c68c053dc24350ed9dc5064 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 16:35:55 +0100 Subject: [PATCH 04/14] Fix fastpath --- symbolic-debuginfo/src/sourcebundle.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index df9d50d0e..3092f9605 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -195,7 +195,11 @@ where D: de::Deserializer<'de>, { let rv: BTreeMap = de::Deserialize::deserialize(deserializer)?; - if rv.is_empty() || rv.keys().all(|x| x.chars().all(|c| c.is_ascii_lowercase())) { + if rv.is_empty() + || rv + .keys() + .all(|x| !x.chars().any(|c| c.is_ascii_uppercase())) + { Ok(rv) } else { Ok(rv @@ -254,7 +258,7 @@ impl SourceFileInfo { /// Retrieves the specified header, if it exists. pub fn header(&self, header: &str) -> Option<&str> { - if header.chars().all(|x| x.is_ascii_lowercase()) { + if header.chars().any(|x| x.is_ascii_uppercase()) { self.headers.get(header).map(String::as_str) } else { self.headers.iter().find_map(|(k, v)| { @@ -281,7 +285,7 @@ impl SourceFileInfo { /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url) pub fn add_header(&mut self, header: String, value: String) { let mut header = header; - if !header.chars().all(|x| x.is_ascii_lowercase()) { + if !header.chars().any(|x| x.is_ascii_uppercase()) { header = header.to_ascii_lowercase(); } self.headers.insert(header, value); @@ -381,6 +385,7 @@ struct SourceBundleManifest { pub files: BTreeMap, /// Arbitrary attributes to include in the bundle. + #[serde(flatten)] pub attributes: BTreeMap, } From 9840449364d877f551846cca72e261d1afaff646 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 16:37:31 +0100 Subject: [PATCH 05/14] Avoid alternative import --- symbolic-debuginfo/src/sourcebundle.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 3092f9605..adb4cff4b 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -54,7 +54,7 @@ use std::sync::Arc; use lazycell::LazyCell; use parking_lot::Mutex; use regex::Regex; -use serde::{de, Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; use zip::{write::FileOptions, ZipWriter}; @@ -192,9 +192,9 @@ pub struct SourceFileInfo { /// Helper to ensure that header keys are normalized to lowercase fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> where - D: de::Deserializer<'de>, + D: Deserializer<'de>, { - let rv: BTreeMap = de::Deserialize::deserialize(deserializer)?; + let rv: BTreeMap = Deserialize::deserialize(deserializer)?; if rv.is_empty() || rv .keys() From b4395399e6bc35afca5cc473491f8880ea6c3d45 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 18:32:08 +0100 Subject: [PATCH 06/14] Expose source file types from source file descriptor --- symbolic-debuginfo/src/base.rs | 19 ++-- symbolic-debuginfo/src/breakpad.rs | 8 +- symbolic-debuginfo/src/dwarf.rs | 8 +- symbolic-debuginfo/src/object.rs | 7 +- symbolic-debuginfo/src/pdb.rs | 8 +- symbolic-debuginfo/src/ppdb.rs | 24 +++-- symbolic-debuginfo/src/sourcebundle.rs | 107 ++++++++++++++++++++--- symbolic-debuginfo/tests/test_objects.rs | 35 ++++---- 8 files changed, 156 insertions(+), 60 deletions(-) diff --git a/symbolic-debuginfo/src/base.rs b/symbolic-debuginfo/src/base.rs index 9f061208d..8561c4151 100644 --- a/symbolic-debuginfo/src/base.rs +++ b/symbolic-debuginfo/src/base.rs @@ -6,6 +6,8 @@ use std::str::FromStr; use symbolic_common::{clean_path, join_path, Arch, CodeId, DebugId, Name}; +use crate::sourcebundle::SourceFileDescriptor; + pub(crate) trait Parse<'data>: Sized { type Error; @@ -670,17 +672,6 @@ impl fmt::Debug for Function<'_> { /// A dynamically dispatched iterator over items with the given lifetime. pub type DynIterator<'a, T> = Box + 'a>; -/// Represents a source file referenced by a debug information object file. -#[non_exhaustive] -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SourceCode<'a> { - /// Verbatim source code/file contents. - Content(Cow<'a, str>), - - /// Url (usually https) where the content can be fetched from. - Url(Cow<'a, str>), -} - /// A stateful session for interfacing with debug information. /// /// Debug sessions can be obtained via [`ObjectLike::debug_session`]. Since computing a session may @@ -728,8 +719,10 @@ pub trait DebugSession<'session> { fn files(&'session self) -> Self::FileIterator; /// Looks up a file's source by its full canonicalized path. - /// Returns either source contents, if it was embedded, or a source link. - fn source_by_path(&self, path: &str) -> Result>, Self::Error>; + /// + /// Returns a descriptor that has all the information available of the source. It can + /// either contain the source contents directly, if it was embedded, or a source link. + fn source_by_path(&self, path: &str) -> Result>, Self::Error>; } /// An object containing debug information. diff --git a/symbolic-debuginfo/src/breakpad.rs b/symbolic-debuginfo/src/breakpad.rs index 0e6ccb2ab..a3c119c5f 100644 --- a/symbolic-debuginfo/src/breakpad.rs +++ b/symbolic-debuginfo/src/breakpad.rs @@ -13,6 +13,7 @@ use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Language, Name, NameManglin use crate::base::*; use crate::function_builder::FunctionBuilder; +use crate::sourcebundle::SourceFileDescriptor; use crate::Parse; #[derive(Clone, Debug)] @@ -1277,7 +1278,10 @@ impl<'data> BreakpadDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, BreakpadError> { + pub fn source_by_path( + &self, + _path: &str, + ) -> Result>, BreakpadError> { Ok(None) } } @@ -1295,7 +1299,7 @@ impl<'data, 'session> DebugSession<'session> for BreakpadDebugSession<'data> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/dwarf.rs b/symbolic-debuginfo/src/dwarf.rs index 17ab0e165..d41a335ec 100644 --- a/symbolic-debuginfo/src/dwarf.rs +++ b/symbolic-debuginfo/src/dwarf.rs @@ -29,6 +29,7 @@ use crate::base::*; use crate::function_builder::FunctionBuilder; #[cfg(feature = "macho")] use crate::macho::BcSymbolMap; +use crate::sourcebundle::SourceFileDescriptor; /// This is a fake BcSymbolMap used when macho support is turned off since they are unfortunately /// part of the dwarf interface @@ -1332,7 +1333,10 @@ impl<'data> DwarfDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, DwarfError> { + pub fn source_by_path( + &self, + _path: &str, + ) -> Result>, DwarfError> { Ok(None) } } @@ -1350,7 +1354,7 @@ impl<'data, 'session> DebugSession<'session> for DwarfDebugSession<'data> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/object.rs b/symbolic-debuginfo/src/object.rs index c9419248d..e3281ce7f 100644 --- a/symbolic-debuginfo/src/object.rs +++ b/symbolic-debuginfo/src/object.rs @@ -484,7 +484,10 @@ impl<'d> ObjectDebugSession<'d> { /// Looks up a file's source by its full canonicalized path. /// Returns either source contents, if it was embedded, or a source link. - pub fn source_by_path(&self, path: &str) -> Result>, ObjectError> { + pub fn source_by_path( + &self, + path: &str, + ) -> Result>, ObjectError> { match *self { ObjectDebugSession::Breakpad(ref s) => { s.source_by_path(path).map_err(ObjectError::transparent) @@ -518,7 +521,7 @@ impl<'session> DebugSession<'session> for ObjectDebugSession<'_> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/pdb.rs b/symbolic-debuginfo/src/pdb.rs index bc906915e..a0ddb24b6 100644 --- a/symbolic-debuginfo/src/pdb.rs +++ b/symbolic-debuginfo/src/pdb.rs @@ -24,6 +24,7 @@ use symbolic_common::{ use crate::base::*; use crate::function_stack::FunctionStack; +use crate::sourcebundle::SourceFileDescriptor; use crate::Parse; type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>; @@ -638,7 +639,10 @@ impl<'d> PdbDebugSession<'d> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, _path: &str) -> Result>, PdbError> { + pub fn source_by_path( + &self, + _path: &str, + ) -> Result>, PdbError> { Ok(None) } } @@ -656,7 +660,7 @@ impl<'session> DebugSession<'session> for PdbDebugSession<'_> { self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/ppdb.rs b/symbolic-debuginfo/src/ppdb.rs index e7b9362a0..17fefb842 100644 --- a/symbolic-debuginfo/src/ppdb.rs +++ b/symbolic-debuginfo/src/ppdb.rs @@ -10,6 +10,7 @@ use symbolic_ppdb::EmbeddedSource; use symbolic_ppdb::{Document, FormatError, PortablePdb}; use crate::base::*; +use crate::sourcebundle::SourceFileDescriptor; /// An iterator over symbols in a [`PortablePdbObject`]. pub type PortablePdbSymbolIterator<'data> = iter::Empty>; @@ -191,16 +192,23 @@ impl<'data> PortablePdbDebugSession<'data> { } /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, path: &str) -> Result>, FormatError> { + pub fn source_by_path( + &self, + path: &str, + ) -> Result>, FormatError> { let sources = self.sources.borrow_with(|| self.init_sources()); match sources.get(path) { None => Ok(None), - Some(PPDBSource::Embedded(source)) => source - .get_contents() - .map(|bytes| Some(SourceCode::Content(from_utf8_cow_lossy(&bytes)))), - Some(PPDBSource::Link(document)) => { - Ok(self.ppdb.get_source_link(document).map(SourceCode::Url)) - } + Some(PPDBSource::Embedded(source)) => source.get_contents().map(|bytes| { + Some(SourceFileDescriptor::new_embedded( + from_utf8_cow_lossy(&bytes), + None, + )) + }), + Some(PPDBSource::Link(document)) => Ok(self + .ppdb + .get_source_link(document) + .map(SourceFileDescriptor::new_remote)), } } } @@ -218,7 +226,7 @@ impl<'data, 'session> DebugSession<'session> for PortablePdbDebugSession<'data> self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index adb4cff4b..0a5e8899a 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -189,6 +189,79 @@ pub struct SourceFileInfo { headers: BTreeMap, } +/// A descriptor that provides information about a source file. +/// +/// This descriptor is returned from [`source_by_path`](DebugSession::source_by_path) +/// and friends. +/// +/// This descriptor holds information that can be used to retrieve information +/// about the source file. A descriptor has to have at least one of the following +/// to be valid: +/// +/// - [`contents`](Self::contents) +/// - [`remote_url`](Self::remote_url) +/// - [`debug_id`](Self::debug_id) +/// +/// Debug sessions are not permitted to return invalid source file descriptors. +pub struct SourceFileDescriptor<'a> { + contents: Option>, + remote_url: Option>, + file_info: Option<&'a SourceFileInfo>, +} + +impl<'a> SourceFileDescriptor<'a> { + /// Creates an embedded source file descriptor. + pub(crate) fn new_embedded( + content: Cow<'a, str>, + file_info: Option<&'a SourceFileInfo>, + ) -> SourceFileDescriptor<'a> { + SourceFileDescriptor { + contents: Some(content), + remote_url: None, + file_info, + } + } + + /// Creates an remote source file descriptor. + pub(crate) fn new_remote(remote_url: Cow<'a, str>) -> SourceFileDescriptor<'a> { + SourceFileDescriptor { + contents: None, + remote_url: Some(remote_url), + file_info: None, + } + } + + /// The type of the file the descriptor points to. + pub fn ty(&self) -> SourceFileType { + self.file_info + .and_then(|x| x.ty()) + .unwrap_or(SourceFileType::Source) + } + + /// The contents of the source file as string, if it's available. + pub fn contents(&self) -> Option<&str> { + self.contents.as_deref() + } + + /// The remote URL that contains the source. + /// + /// There are cases where the descriptor itself is a reference in which case + /// an external service needs to be consulted to retrieve the information. + pub fn remote_url(&self) -> Option<&str> { + self.remote_url.as_deref() + } + + /// The debug ID of the file if available. + pub fn debug_id(&self) -> Option { + self.file_info.and_then(|x| x.debug_id()) + } + + /// The source mapping URL reference of the file. + pub fn source_mapping_url(&self) -> Option<&str> { + self.file_info.and_then(|x| x.source_mapping_url()) + } +} + /// Helper to ensure that header keys are normalized to lowercase fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> where @@ -752,15 +825,24 @@ impl<'data> SourceBundleDebugSession<'data> { Ok(Some(source_content)) } + /// File info by zip path. + fn file_info_by_zip_path(&self, zip_path: &str) -> Option<&SourceFileInfo> { + self.manifest.files.get(zip_path) + } + /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path(&self, path: &str) -> Result>, SourceBundleError> { + pub fn source_by_path( + &self, + path: &str, + ) -> Result>, SourceBundleError> { let zip_path = match self.zip_path_by_source_path(path) { Some(zip_path) => zip_path, None => return Ok(None), }; let content = self.source_by_zip_path(zip_path)?; - Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt)))) + let info = self.file_info_by_zip_path(zip_path); + Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) } /// Looks up some source by debug ID and file type. @@ -780,13 +862,14 @@ impl<'data> SourceBundleDebugSession<'data> { &self, debug_id: DebugId, ty: SourceFileType, - ) -> Result>, SourceBundleError> { + ) -> Result>, SourceBundleError> { let zip_path = match self.zip_path_by_debug_id(debug_id, ty) { Some(zip_path) => zip_path, None => return Ok(None), }; let content = self.source_by_zip_path(zip_path)?; - Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt)))) + let info = self.file_info_by_zip_path(zip_path); + Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) } } @@ -803,7 +886,7 @@ impl<'data, 'session> DebugSession<'session> for SourceBundleDebugSession<'data> self.files() } - fn source_by_path(&self, path: &str) -> Result>, Self::Error> { + fn source_by_path(&self, path: &str) -> Result>, Self::Error> { self.source_by_path(path) } } @@ -1269,7 +1352,8 @@ mod tests { ) .unwrap() .expect("should exist"); - assert_eq!(f, SourceCode::Content(Cow::Borrowed("filecontents"))); + assert_eq!(f.contents(), Some("filecontents")); + assert_eq!(f.ty(), SourceFileType::MinifiedSource); assert!(sess .source_by_debug_id( @@ -1326,12 +1410,11 @@ mod tests { .flatten() .flat_map(|f| { let path = f.abs_path_str(); - session.source_by_path(&path).ok().flatten().map(|source| { - let SourceCode::Content(text) = source else { - unreachable!(); - }; - (path, text.into_owned()) - }) + session + .source_by_path(&path) + .ok() + .flatten() + .map(|source| (path, source.contents().unwrap().to_string())) }) .collect(); diff --git a/symbolic-debuginfo/tests/test_objects.rs b/symbolic-debuginfo/tests/test_objects.rs index 2346013af..55151494a 100644 --- a/symbolic-debuginfo/tests/test_objects.rs +++ b/symbolic-debuginfo/tests/test_objects.rs @@ -2,7 +2,7 @@ use std::{env, ffi::CString, fmt, io::BufWriter}; use symbolic_common::ByteView; use symbolic_debuginfo::{ - elf::ElfObject, pe::PeObject, FileEntry, Function, LineInfo, Object, SourceCode, SymbolMap, + elf::ElfObject, pe::PeObject, FileEntry, Function, LineInfo, Object, SymbolMap, }; use symbolic_testutils::fixture; @@ -776,10 +776,8 @@ fn test_ppdb_source_by_path() -> Result<(), Error> { "C:\\dev\\sentry-dotnet\\samples\\Sentry.Samples.Console.Basic\\Program.cs", ) .unwrap(); - match source.unwrap() { - SourceCode::Content(text) => assert_eq!(text.len(), 204), - _ => panic!(), - } + let source = source.unwrap(); + assert_eq!(source.contents().unwrap().len(), 204); } Ok(()) @@ -803,20 +801,19 @@ fn test_ppdb_source_links() -> Result<(), Error> { for file in session.files() { let file = file.unwrap(); - match session.source_by_path(&file.path_str()).unwrap().unwrap() { - SourceCode::Content(text) => { - assert!(known_embedded_sources.contains(&file.name_str().as_ref())); - assert!(!text.is_empty()); - } - SourceCode::Url(url) => { - // testing this is simple because there's just one prefix rule in this PPDB. - let expected = file - .path_str() - .replace(src_prefix, url_prefix) - .replace('\\', "/"); - assert_eq!(url, expected); - } - _ => panic!(), + let source = session.source_by_path(&file.path_str()).unwrap().unwrap(); + if let Some(text) = source.contents() { + assert!(known_embedded_sources.contains(&file.name_str().as_ref())); + assert!(!text.is_empty()); + } else if let Some(url) = source.remote_url() { + // testing this is simple because there's just one prefix rule in this PPDB. + let expected = file + .path_str() + .replace(src_prefix, url_prefix) + .replace('\\', "/"); + assert_eq!(url, expected); + } else { + unreachable!(); } } From 483f001b478f4523b20d025a4be45af8fe5a85ae Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 22:41:04 +0100 Subject: [PATCH 07/14] Added URL based lookup and sourceMappingURL parsing --- symbolic-debuginfo/src/sourcebundle.rs | 274 ++++++++++++++++------- symbolic-debuginfo/tests/test_objects.rs | 2 +- 2 files changed, 200 insertions(+), 76 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 0a5e8899a..f73d64963 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -189,79 +189,6 @@ pub struct SourceFileInfo { headers: BTreeMap, } -/// A descriptor that provides information about a source file. -/// -/// This descriptor is returned from [`source_by_path`](DebugSession::source_by_path) -/// and friends. -/// -/// This descriptor holds information that can be used to retrieve information -/// about the source file. A descriptor has to have at least one of the following -/// to be valid: -/// -/// - [`contents`](Self::contents) -/// - [`remote_url`](Self::remote_url) -/// - [`debug_id`](Self::debug_id) -/// -/// Debug sessions are not permitted to return invalid source file descriptors. -pub struct SourceFileDescriptor<'a> { - contents: Option>, - remote_url: Option>, - file_info: Option<&'a SourceFileInfo>, -} - -impl<'a> SourceFileDescriptor<'a> { - /// Creates an embedded source file descriptor. - pub(crate) fn new_embedded( - content: Cow<'a, str>, - file_info: Option<&'a SourceFileInfo>, - ) -> SourceFileDescriptor<'a> { - SourceFileDescriptor { - contents: Some(content), - remote_url: None, - file_info, - } - } - - /// Creates an remote source file descriptor. - pub(crate) fn new_remote(remote_url: Cow<'a, str>) -> SourceFileDescriptor<'a> { - SourceFileDescriptor { - contents: None, - remote_url: Some(remote_url), - file_info: None, - } - } - - /// The type of the file the descriptor points to. - pub fn ty(&self) -> SourceFileType { - self.file_info - .and_then(|x| x.ty()) - .unwrap_or(SourceFileType::Source) - } - - /// The contents of the source file as string, if it's available. - pub fn contents(&self) -> Option<&str> { - self.contents.as_deref() - } - - /// The remote URL that contains the source. - /// - /// There are cases where the descriptor itself is a reference in which case - /// an external service needs to be consulted to retrieve the information. - pub fn remote_url(&self) -> Option<&str> { - self.remote_url.as_deref() - } - - /// The debug ID of the file if available. - pub fn debug_id(&self) -> Option { - self.file_info.and_then(|x| x.debug_id()) - } - - /// The source mapping URL reference of the file. - pub fn source_mapping_url(&self) -> Option<&str> { - self.file_info.and_then(|x| x.source_mapping_url()) - } -} - /// Helper to ensure that header keys are normalized to lowercase fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> where @@ -389,6 +316,128 @@ impl SourceFileInfo { } } +/// A descriptor that provides information about a source file. +/// +/// This descriptor is returned from [`source_by_path`](DebugSession::source_by_path) +/// and friends. +/// +/// This descriptor holds information that can be used to retrieve information +/// about the source file. A descriptor has to have at least one of the following +/// to be valid: +/// +/// - [`contents`](Self::contents) +/// - [`url`](Self::url) +/// - [`debug_id`](Self::debug_id) +/// +/// Debug sessions are not permitted to return invalid source file descriptors. +pub struct SourceFileDescriptor<'a> { + contents: Option>, + remote_url: Option>, + file_info: Option<&'a SourceFileInfo>, +} + +impl<'a> SourceFileDescriptor<'a> { + /// Creates an embedded source file descriptor. + pub(crate) fn new_embedded( + content: Cow<'a, str>, + file_info: Option<&'a SourceFileInfo>, + ) -> SourceFileDescriptor<'a> { + SourceFileDescriptor { + contents: Some(content), + remote_url: None, + file_info, + } + } + + /// Creates an remote source file descriptor. + pub(crate) fn new_remote(remote_url: Cow<'a, str>) -> SourceFileDescriptor<'a> { + SourceFileDescriptor { + contents: None, + remote_url: Some(remote_url), + file_info: None, + } + } + + /// The type of the file the descriptor points to. + pub fn ty(&self) -> SourceFileType { + self.file_info + .and_then(|x| x.ty()) + .unwrap_or(SourceFileType::Source) + } + + /// The contents of the source file as string, if it's available. + /// + /// Portable PDBs for instance will often have source information, but rely on + /// remote file fetching via Sourcelink to get to the contents. In that case + /// a file descriptor is created, but the contents are missing and instead the + /// [`url`](Self::url) can be used. + pub fn contents(&self) -> Option<&str> { + self.contents.as_deref() + } + + /// If available returns the URL of this source. + /// + /// For certain files this is the canoncial URL of where the file is placed. This + /// for instance is the case for minified JavaScript files or source maps which might + /// have a canonical URL. In case of portable PDBs this is also where you would fetch + /// the source code from if source links are used. + pub fn url(&self) -> Option<&str> { + if let Some(ref url) = self.remote_url { + Some(url) + } else { + self.file_info.and_then(|x| x.url()) + } + } + + /// If available returns the file path of this source. + /// + /// For source bundles that are a companion file to a debug file, this is the canonical + /// path of the source file. + pub fn path(&self) -> Option<&str> { + self.file_info.and_then(|x| x.path()) + } + + /// The debug ID of the file if available. + /// + /// For source maps or minified source files symbolic supports embedded debug IDs. If they + /// are in use, the debug ID is returned from here. + pub fn debug_id(&self) -> Option { + self.file_info.and_then(|x| x.debug_id()) + } + + /// The source mapping URL reference of the file. + /// + /// This is used to refer to a source map from a minified file. Only minified source files + /// will have a relationship to a source map. + pub fn source_mapping_url(&self) -> Option<&str> { + if let Some(file_info) = self.file_info { + if let Some(url) = file_info.source_mapping_url() { + return Some(url); + } + } + if let Some(ref contents) = self.contents { + if let Some(url) = discover_sourcemaps_location(contents) { + return Some(url); + } + } + None + } +} + +/// Parses a sourceMappingURL comment in a file to discover a sourcemap reference. +fn discover_sourcemaps_location(contents: &str) -> Option<&str> { + for line in contents.lines().rev() { + if line.starts_with("//# sourceMappingURL=") || line.starts_with("//@ sourceMappingURL=") { + let possible_sourcemap = line[21..].trim(); + if possible_sourcemap.starts_with("data:application/json") { + return None; + } + return Some(possible_sourcemap); + } + } + None +} + /// Version number of a [`SourceBundle`](struct.SourceBundle.html). #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct SourceBundleVersion(pub u32); @@ -630,6 +679,7 @@ impl<'data> SourceBundle<'data> { manifest: self.manifest.clone(), archive: self.archive.clone(), files_by_path: LazyCell::new(), + files_by_url: LazyCell::new(), files_by_debug_id: LazyCell::new(), }) } @@ -750,6 +800,7 @@ pub struct SourceBundleDebugSession<'data> { manifest: Arc, archive: Arc>>>, files_by_path: LazyCell>, + files_by_url: LazyCell>, files_by_debug_id: LazyCell>, } @@ -782,6 +833,22 @@ impl<'data> SourceBundleDebugSession<'data> { }) } + /// Get a reverse mapping of URLs to ZIP paths. + fn files_by_url(&self) -> &HashMap { + self.files_by_url.borrow_with(|| { + let files = &self.manifest.files; + let mut files_by_url = HashMap::with_capacity(files.len()); + + for (zip_path, file_info) in files { + if !file_info.url.is_empty() { + files_by_url.insert(file_info.url.clone(), zip_path.clone()); + } + } + + files_by_url + }) + } + /// Get a reverse mapping of debug ID to ZIP paths. fn files_by_debug_id(&self) -> &HashMap<(DebugId, SourceFileType), String> { self.files_by_debug_id.borrow_with(|| { @@ -805,6 +872,13 @@ impl<'data> SourceBundleDebugSession<'data> { .map(|zip_path| zip_path.as_str()) } + /// Get the path of a file in this bundle by its url + fn zip_path_by_url(&self, url: &str) -> Option<&str> { + self.files_by_url() + .get(url) + .map(|zip_path| zip_path.as_str()) + } + /// Get the path of a file in this bundle by its Debug ID and source file type. fn zip_path_by_debug_id(&self, debug_id: DebugId, ty: SourceFileType) -> Option<&str> { self.files_by_debug_id() @@ -845,6 +919,21 @@ impl<'data> SourceBundleDebugSession<'data> { Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) } + /// Like [`source_by_path`](Self::source_by_path) but looks up by URL. + pub fn source_by_url( + &self, + url: &str, + ) -> Result>, SourceBundleError> { + let zip_path = match self.zip_path_by_url(url) { + Some(zip_path) => zip_path, + None => return Ok(None), + }; + + let content = self.source_by_zip_path(zip_path)?; + let info = self.file_info_by_zip_path(zip_path); + Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) + } + /// Looks up some source by debug ID and file type. /// /// Lookups by [`DebugId`] require knowledge of the file that is supposed to be @@ -1326,17 +1415,19 @@ mod tests { } #[test] - fn test_debug_id() -> Result<(), SourceBundleError> { + fn test_source_descriptor() -> Result<(), SourceBundleError> { let mut writer = Cursor::new(Vec::new()); let mut bundle = SourceBundleWriter::start(&mut writer)?; let mut info = SourceFileInfo::default(); + info.set_url("https://example.com/bar.js.min".into()); + info.set_path("/files/bar.js.min".into()); info.set_ty(SourceFileType::MinifiedSource); info.add_header( "debug-id".into(), "5e618b9f-54a9-4389-b196-519819dd7c47".into(), ); - info.add_header("sourcemap".into(), "bar.js.min".into()); + info.add_header("sourcemap".into(), "bar.js.map".into()); bundle.add_file("bar.js", &b"filecontents"[..], info)?; assert!(bundle.has_file("bar.js")); @@ -1354,6 +1445,9 @@ mod tests { .expect("should exist"); assert_eq!(f.contents(), Some("filecontents")); assert_eq!(f.ty(), SourceFileType::MinifiedSource); + assert_eq!(f.url(), Some("https://example.com/bar.js.min")); + assert_eq!(f.path(), Some("/files/bar.js.min")); + assert_eq!(f.source_mapping_url(), Some("bar.js.map")); assert!(sess .source_by_debug_id( @@ -1366,6 +1460,36 @@ mod tests { Ok(()) } + #[test] + fn test_source_mapping_url() -> Result<(), SourceBundleError> { + let mut writer = Cursor::new(Vec::new()); + let mut bundle = SourceBundleWriter::start(&mut writer)?; + + let mut info = SourceFileInfo::default(); + info.set_url("https://example.com/bar.min.js".into()); + info.set_ty(SourceFileType::MinifiedSource); + bundle.add_file( + "bar.js", + &b"filecontents\n//@ sourceMappingURL=bar.js.map"[..], + info, + )?; + + bundle.finish()?; + let bundle_bytes = writer.into_inner(); + let bundle = SourceBundle::parse(&bundle_bytes)?; + + let sess = bundle.debug_session().unwrap(); + let f = sess + .source_by_url("https://example.com/bar.min.js") + .unwrap() + .expect("should exist"); + assert_eq!(f.ty(), SourceFileType::MinifiedSource); + assert_eq!(f.url(), Some("https://example.com/bar.min.js")); + assert_eq!(f.source_mapping_url(), Some("bar.js.map")); + + Ok(()) + } + #[test] fn test_il2cpp_reference() -> Result<(), Box> { let mut cpp_file = NamedTempFile::new()?; diff --git a/symbolic-debuginfo/tests/test_objects.rs b/symbolic-debuginfo/tests/test_objects.rs index 55151494a..5607a4284 100644 --- a/symbolic-debuginfo/tests/test_objects.rs +++ b/symbolic-debuginfo/tests/test_objects.rs @@ -805,7 +805,7 @@ fn test_ppdb_source_links() -> Result<(), Error> { if let Some(text) = source.contents() { assert!(known_embedded_sources.contains(&file.name_str().as_ref())); assert!(!text.is_empty()); - } else if let Some(url) = source.remote_url() { + } else if let Some(url) = source.url() { // testing this is simple because there's just one prefix rule in this PPDB. let expected = file .path_str() From 217ad68dcbdafcf3c3d572db3cce0a6b7e38b74a Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 22:41:55 +0100 Subject: [PATCH 08/14] Update symbolic-debuginfo/src/sourcebundle.rs Co-authored-by: Arpad Borsos --- symbolic-debuginfo/src/sourcebundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index f73d64963..7b336de0c 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -258,7 +258,7 @@ impl SourceFileInfo { /// Retrieves the specified header, if it exists. pub fn header(&self, header: &str) -> Option<&str> { - if header.chars().any(|x| x.is_ascii_uppercase()) { + if !header.chars().any(|x| x.is_ascii_uppercase()) { self.headers.get(header).map(String::as_str) } else { self.headers.iter().find_map(|(k, v)| { From 45c5d7b450cc70230a40e8fc0fc7881b0c1a57a1 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 20 Feb 2023 22:42:02 +0100 Subject: [PATCH 09/14] Update symbolic-debuginfo/src/sourcebundle.rs Co-authored-by: Arpad Borsos --- symbolic-debuginfo/src/sourcebundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 7b336de0c..99ebd571b 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -285,7 +285,7 @@ impl SourceFileInfo { /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url) pub fn add_header(&mut self, header: String, value: String) { let mut header = header; - if !header.chars().any(|x| x.is_ascii_uppercase()) { + if header.chars().any(|x| x.is_ascii_uppercase()) { header = header.to_ascii_lowercase(); } self.headers.insert(header, value); From e1609bf7c45611b23ae67b33458b1c8081b16268 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 21 Feb 2023 09:53:20 +0100 Subject: [PATCH 10/14] Reuse one large index for source bundle lookups --- symbolic-debuginfo/src/sourcebundle.rs | 88 +++++++------------------- 1 file changed, 24 insertions(+), 64 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 99ebd571b..c4a220d61 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -678,9 +678,7 @@ impl<'data> SourceBundle<'data> { Ok(SourceBundleDebugSession { manifest: self.manifest.clone(), archive: self.archive.clone(), - files_by_path: LazyCell::new(), - files_by_url: LazyCell::new(), - files_by_debug_id: LazyCell::new(), + indexed_files: LazyCell::new(), }) } @@ -795,13 +793,18 @@ impl<'data: 'object, 'object> ObjectLike<'data, 'object> for SourceBundle<'data> /// An iterator yielding symbols from a source bundle. pub type SourceBundleSymbolIterator<'data> = std::iter::Empty>; +#[derive(Debug, Hash, PartialEq, Eq)] +enum FileKey<'a> { + Path(Cow<'a, str>), + Url(Cow<'a, str>), + DebugId(DebugId, SourceFileType), +} + /// Debug session for SourceBundle objects. pub struct SourceBundleDebugSession<'data> { manifest: Arc, archive: Arc>>>, - files_by_path: LazyCell>, - files_by_url: LazyCell>, - files_by_debug_id: LazyCell>, + indexed_files: LazyCell, Arc>>, } impl<'data> SourceBundleDebugSession<'data> { @@ -817,75 +820,32 @@ impl<'data> SourceBundleDebugSession<'data> { std::iter::empty() } - /// Get a reverse mapping of source paths to ZIP paths. - fn files_by_path(&self) -> &HashMap { - self.files_by_path.borrow_with(|| { + /// Get the indexed file mapping. + fn indexed_files(&self) -> &HashMap> { + self.indexed_files.borrow_with(|| { let files = &self.manifest.files; - let mut files_by_path = HashMap::with_capacity(files.len()); + let mut rv = HashMap::with_capacity(files.len()); for (zip_path, file_info) in files { + let zip_path = Arc::new(zip_path.clone()); if !file_info.path.is_empty() { - files_by_path.insert(file_info.path.clone(), zip_path.clone()); + rv.insert( + FileKey::Path(file_info.path.clone().into()), + zip_path.clone(), + ); } - } - - files_by_path - }) - } - - /// Get a reverse mapping of URLs to ZIP paths. - fn files_by_url(&self) -> &HashMap { - self.files_by_url.borrow_with(|| { - let files = &self.manifest.files; - let mut files_by_url = HashMap::with_capacity(files.len()); - - for (zip_path, file_info) in files { if !file_info.url.is_empty() { - files_by_url.insert(file_info.url.clone(), zip_path.clone()); + rv.insert(FileKey::Url(file_info.url.clone().into()), zip_path.clone()); } - } - - files_by_url - }) - } - - /// Get a reverse mapping of debug ID to ZIP paths. - fn files_by_debug_id(&self) -> &HashMap<(DebugId, SourceFileType), String> { - self.files_by_debug_id.borrow_with(|| { - let files = &self.manifest.files; - let mut files_by_debug_id = HashMap::new(); - - for (zip_path, file_info) in files { if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) { - files_by_debug_id.insert((debug_id, ty), zip_path.clone()); + rv.insert(FileKey::DebugId(debug_id, ty), zip_path.clone()); } } - files_by_debug_id + rv }) } - /// Get the path of a file in this bundle by its logical path. - fn zip_path_by_source_path(&self, path: &str) -> Option<&str> { - self.files_by_path() - .get(path) - .map(|zip_path| zip_path.as_str()) - } - - /// Get the path of a file in this bundle by its url - fn zip_path_by_url(&self, url: &str) -> Option<&str> { - self.files_by_url() - .get(url) - .map(|zip_path| zip_path.as_str()) - } - - /// Get the path of a file in this bundle by its Debug ID and source file type. - fn zip_path_by_debug_id(&self, debug_id: DebugId, ty: SourceFileType) -> Option<&str> { - self.files_by_debug_id() - .get(&(debug_id, ty)) - .map(|zip_path| zip_path.as_str()) - } - /// Get source by the path of a file in the bundle. fn source_by_zip_path(&self, zip_path: &str) -> Result, SourceBundleError> { let mut archive = self.archive.lock(); @@ -909,7 +869,7 @@ impl<'data> SourceBundleDebugSession<'data> { &self, path: &str, ) -> Result>, SourceBundleError> { - let zip_path = match self.zip_path_by_source_path(path) { + let zip_path = match self.indexed_files().get(&FileKey::Path(path.into())) { Some(zip_path) => zip_path, None => return Ok(None), }; @@ -924,7 +884,7 @@ impl<'data> SourceBundleDebugSession<'data> { &self, url: &str, ) -> Result>, SourceBundleError> { - let zip_path = match self.zip_path_by_url(url) { + let zip_path = match self.indexed_files().get(&FileKey::Url(url.into())) { Some(zip_path) => zip_path, None => return Ok(None), }; @@ -952,7 +912,7 @@ impl<'data> SourceBundleDebugSession<'data> { debug_id: DebugId, ty: SourceFileType, ) -> Result>, SourceBundleError> { - let zip_path = match self.zip_path_by_debug_id(debug_id, ty) { + let zip_path = match self.indexed_files().get(&FileKey::DebugId(debug_id, ty)) { Some(zip_path) => zip_path, None => return Ok(None), }; From d6fa0d7dc14041272fbf1655acc71cf6d9d2c88e Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 21 Feb 2023 10:29:03 +0100 Subject: [PATCH 11/14] Kill duplicate code --- symbolic-debuginfo/src/sourcebundle.rs | 32 +++++++++++--------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index c4a220d61..91e2462d9 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -864,12 +864,11 @@ impl<'data> SourceBundleDebugSession<'data> { self.manifest.files.get(zip_path) } - /// See [DebugSession::source_by_path] for more information. - pub fn source_by_path( + fn lookup_source( &self, - path: &str, + key: FileKey, ) -> Result>, SourceBundleError> { - let zip_path = match self.indexed_files().get(&FileKey::Path(path.into())) { + let zip_path = match self.indexed_files().get(&key) { Some(zip_path) => zip_path, None => return Ok(None), }; @@ -879,19 +878,20 @@ impl<'data> SourceBundleDebugSession<'data> { Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) } + /// See [DebugSession::source_by_path] for more information. + pub fn source_by_path( + &self, + path: &str, + ) -> Result>, SourceBundleError> { + self.lookup_source(FileKey::Path(path.into())) + } + /// Like [`source_by_path`](Self::source_by_path) but looks up by URL. pub fn source_by_url( &self, url: &str, ) -> Result>, SourceBundleError> { - let zip_path = match self.indexed_files().get(&FileKey::Url(url.into())) { - Some(zip_path) => zip_path, - None => return Ok(None), - }; - - let content = self.source_by_zip_path(zip_path)?; - let info = self.file_info_by_zip_path(zip_path); - Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) + self.lookup_source(FileKey::Url(url.into())) } /// Looks up some source by debug ID and file type. @@ -912,13 +912,7 @@ impl<'data> SourceBundleDebugSession<'data> { debug_id: DebugId, ty: SourceFileType, ) -> Result>, SourceBundleError> { - let zip_path = match self.indexed_files().get(&FileKey::DebugId(debug_id, ty)) { - Some(zip_path) => zip_path, - None => return Ok(None), - }; - let content = self.source_by_zip_path(zip_path)?; - let info = self.file_info_by_zip_path(zip_path); - Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) + self.lookup_source(FileKey::DebugId(debug_id, ty)) } } From a9df5e1677a4aec2efe8e7b684f75baeb09b23fc Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 21 Feb 2023 11:41:27 +0100 Subject: [PATCH 12/14] Always return the source mapping URL --- symbolic-debuginfo/src/sourcebundle.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 91e2462d9..722fe915e 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -428,11 +428,7 @@ impl<'a> SourceFileDescriptor<'a> { fn discover_sourcemaps_location(contents: &str) -> Option<&str> { for line in contents.lines().rev() { if line.starts_with("//# sourceMappingURL=") || line.starts_with("//@ sourceMappingURL=") { - let possible_sourcemap = line[21..].trim(); - if possible_sourcemap.starts_with("data:application/json") { - return None; - } - return Some(possible_sourcemap); + return Some(line[21..].trim()); } } None From fba2b6981fd91ad13d287ccc06e78cb8a830035f Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 21 Feb 2023 11:47:53 +0100 Subject: [PATCH 13/14] Rename confusing function --- symbolic-debuginfo/src/sourcebundle.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index 722fe915e..e5c721c57 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -855,22 +855,18 @@ impl<'data> SourceBundleDebugSession<'data> { Ok(Some(source_content)) } - /// File info by zip path. - fn file_info_by_zip_path(&self, zip_path: &str) -> Option<&SourceFileInfo> { - self.manifest.files.get(zip_path) - } - - fn lookup_source( + /// Looks up a source file descriptor. + fn get_source_file_descriptor( &self, key: FileKey, ) -> Result>, SourceBundleError> { let zip_path = match self.indexed_files().get(&key) { - Some(zip_path) => zip_path, + Some(zip_path) => zip_path.as_str(), None => return Ok(None), }; let content = self.source_by_zip_path(zip_path)?; - let info = self.file_info_by_zip_path(zip_path); + let info = self.manifest.files.get(zip_path); Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info))) } @@ -879,7 +875,7 @@ impl<'data> SourceBundleDebugSession<'data> { &self, path: &str, ) -> Result>, SourceBundleError> { - self.lookup_source(FileKey::Path(path.into())) + self.get_source_file_descriptor(FileKey::Path(path.into())) } /// Like [`source_by_path`](Self::source_by_path) but looks up by URL. @@ -887,7 +883,7 @@ impl<'data> SourceBundleDebugSession<'data> { &self, url: &str, ) -> Result>, SourceBundleError> { - self.lookup_source(FileKey::Url(url.into())) + self.get_source_file_descriptor(FileKey::Url(url.into())) } /// Looks up some source by debug ID and file type. @@ -908,7 +904,7 @@ impl<'data> SourceBundleDebugSession<'data> { debug_id: DebugId, ty: SourceFileType, ) -> Result>, SourceBundleError> { - self.lookup_source(FileKey::DebugId(debug_id, ty)) + self.get_source_file_descriptor(FileKey::DebugId(debug_id, ty)) } } From 9172abc8eb17b4907dbb3e3a35749293a48ec4c5 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 21 Feb 2023 13:07:16 +0100 Subject: [PATCH 14/14] Fix bad merge --- symbolic-debuginfo/src/sourcebundle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/symbolic-debuginfo/src/sourcebundle.rs b/symbolic-debuginfo/src/sourcebundle.rs index e5c721c57..82f4052b8 100644 --- a/symbolic-debuginfo/src/sourcebundle.rs +++ b/symbolic-debuginfo/src/sourcebundle.rs @@ -1159,7 +1159,7 @@ where where O: ObjectLike<'data, 'object, Error = E>, E: std::error::Error + Send + Sync + 'static, - F: FnMut(&FileEntry, &Option>) -> bool, + F: FnMut(&FileEntry, &Option>) -> bool, { let mut files_handled = BTreeSet::new(); let mut referenced_files = BTreeSet::new();