Skip to content

Commit 835a85c

Browse files
committed
feat: Added debug IDs to source bundle javascript files
1 parent 6394b02 commit 835a85c

File tree

4 files changed

+186
-17
lines changed

4 files changed

+186
-17
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## Unreleased
44

5+
**Features**:
6+
7+
- Added debug IDs to source bundle JavaScript files and source maps. ([#762](https://github.com/getsentry/symbolic/pull/762))
8+
59
**Breaking changes**:
610

711
- Change `DebugSession::source_by_path()` to return `SourceCode` enum with either file content or a URL to fetch it from. ([#758](https://github.com/getsentry/symbolic/pull/758))

symbolic-debuginfo/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,15 @@ sourcebundle = [
7070
"regex",
7171
"serde_json",
7272
"zip",
73+
"debugid/serde"
7374
]
7475
# WASM processing
7576
wasm = ["bitvec", "dwarf", "wasmparser"]
7677

7778
[dependencies]
7879
bitvec = { version = "1.0.0", optional = true, features = ["alloc"] }
7980
dmsort = "1.0.1"
81+
debugid = { version = "0.8.0" }
8082
elementtree = { version = "1.2.2", optional = true }
8183
elsa = { version = "1.4.0", optional = true }
8284
fallible-iterator = "0.2.0"

symbolic-debuginfo/src/base.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ pub type DynIterator<'a, T> = Box<dyn Iterator<Item = T> + 'a>;
672672

673673
/// Represents a source file referenced by a debug information object file.
674674
#[non_exhaustive]
675-
#[derive(Debug, Clone)]
675+
#[derive(Debug, Clone, PartialEq, Eq)]
676676
pub enum SourceCode<'a> {
677677
/// Verbatim source code/file contents.
678678
Content(Cow<'a, str>),

symbolic-debuginfo/src/sourcebundle.rs

Lines changed: 179 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@
3333
//! [`code_id`]: struct.SourceBundle.html#method.code_id
3434
//! [`SourceBundle::debug_session`]: struct.SourceBundle.html#method.debug_session
3535
//! [`SourceBundleWriter`]: struct.SourceBundleWriter.html
36+
//!
37+
//! ## Artifact Bundles
38+
//!
39+
//! Source bundles share the format with a related concept, called an "artifact bundle". Artifact
40+
//! bundles are essentially source bundles but they typically contain sources referred to by
41+
//! JavaScript source maps and source maps themselves. For instance in an artifact
42+
//! bundle a file entry has a `url` and might carry `headers` or individual debug IDs
43+
//! per source file.
3644
3745
use std::borrow::Cow;
3846
use std::collections::{BTreeMap, BTreeSet, HashMap};
@@ -46,7 +54,7 @@ use std::sync::Arc;
4654
use lazycell::LazyCell;
4755
use parking_lot::Mutex;
4856
use regex::Regex;
49-
use serde::{Deserialize, Serialize};
57+
use serde::{de, Deserialize, Serialize};
5058
use thiserror::Error;
5159
use zip::{write::FileOptions, ZipWriter};
5260

@@ -145,7 +153,7 @@ where
145153
}
146154

147155
/// The type of a [`SourceFileInfo`](struct.SourceFileInfo.html).
148-
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
156+
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
149157
#[serde(rename_all = "snake_case")]
150158
pub enum SourceFileType {
151159
/// Regular source file.
@@ -173,10 +181,30 @@ pub struct SourceFileInfo {
173181
#[serde(default, skip_serializing_if = "String::is_empty")]
174182
url: String,
175183

176-
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
184+
#[serde(
185+
default,
186+
skip_serializing_if = "BTreeMap::is_empty",
187+
deserialize_with = "deserialize_headers"
188+
)]
177189
headers: BTreeMap<String, String>,
178190
}
179191

192+
/// Helper to ensure that header keys are normalized to lowercase
193+
fn deserialize_headers<'de, D>(deserializer: D) -> Result<BTreeMap<String, String>, D::Error>
194+
where
195+
D: de::Deserializer<'de>,
196+
{
197+
let rv: BTreeMap<String, String> = de::Deserialize::deserialize(deserializer)?;
198+
if rv.is_empty() || rv.keys().all(|x| x.chars().all(|c| c.is_ascii_lowercase())) {
199+
Ok(rv)
200+
} else {
201+
Ok(rv
202+
.into_iter()
203+
.map(|(k, v)| (k.to_ascii_lowercase(), v))
204+
.collect())
205+
}
206+
}
207+
180208
impl SourceFileInfo {
181209
/// Creates default file information.
182210
pub fn new() -> Self {
@@ -226,14 +254,58 @@ impl SourceFileInfo {
226254

227255
/// Retrieves the specified header, if it exists.
228256
pub fn header(&self, header: &str) -> Option<&str> {
229-
self.headers.get(header).map(String::as_str)
257+
if header.chars().all(|x| x.is_ascii_lowercase()) {
258+
self.headers.get(header).map(String::as_str)
259+
} else {
260+
self.headers.iter().find_map(|(k, v)| {
261+
if k.eq_ignore_ascii_case(header) {
262+
Some(v.as_str())
263+
} else {
264+
None
265+
}
266+
})
267+
}
230268
}
231269

232270
/// Adds a custom attribute following header conventions.
271+
///
272+
/// Header keys are converted to lowercase before writing as this is
273+
/// the canonical format for headers however the file format does
274+
/// support headers to be case insensitive and they will be lower cased
275+
/// upon reading.
276+
///
277+
/// Headers on files are primarily be used to add auxiliary information
278+
/// to files. The following headers are known and processed:
279+
///
280+
/// - `debug-id`: see [`debug_id`](Self::debug_id)
281+
/// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url)
233282
pub fn add_header(&mut self, header: String, value: String) {
283+
let mut header = header;
284+
if !header.chars().all(|x| x.is_ascii_lowercase()) {
285+
header = header.to_ascii_uppercase();
286+
}
234287
self.headers.insert(header, value);
235288
}
236289

290+
/// The debug ID of this minified source or sourcemap if it has any.
291+
///
292+
/// Files have a debug ID if they have a header with the key `debug-id`.
293+
/// At present debug IDs in source bundles are only ever given to minified
294+
/// source files.
295+
pub fn debug_id(&self) -> Option<DebugId> {
296+
self.header("debug-id").and_then(|x| x.parse().ok())
297+
}
298+
299+
/// The source mapping URL of the given minified source.
300+
///
301+
/// Files have a source mapping URL if they have a header with the
302+
/// key `sourcemap` (or the `x-sourcemap` legacy header) as part the
303+
/// source map specification.
304+
pub fn source_mapping_url(&self) -> Option<&str> {
305+
self.header("sourcemap")
306+
.or_else(|| self.header("x-sourcemap"))
307+
}
308+
237309
/// Returns `true` if this instance does not carry any information.
238310
pub fn is_empty(&self) -> bool {
239311
self.path.is_empty() && self.ty.is_none() && self.headers.is_empty()
@@ -309,7 +381,6 @@ struct SourceBundleManifest {
309381
pub files: BTreeMap<String, SourceFileInfo>,
310382

311383
/// Arbitrary attributes to include in the bundle.
312-
#[serde(flatten)]
313384
pub attributes: BTreeMap<String, String>,
314385
}
315386

@@ -481,6 +552,7 @@ impl<'data> SourceBundle<'data> {
481552
manifest: self.manifest.clone(),
482553
archive: self.archive.clone(),
483554
files_by_path: LazyCell::new(),
555+
files_by_debug_id: LazyCell::new(),
484556
})
485557
}
486558

@@ -600,6 +672,7 @@ pub struct SourceBundleDebugSession<'data> {
600672
manifest: Arc<SourceBundleManifest>,
601673
archive: Arc<Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>>,
602674
files_by_path: LazyCell<HashMap<String, String>>,
675+
files_by_debug_id: LazyCell<HashMap<(DebugId, SourceFileType), String>>,
603676
}
604677

605678
impl<'data> SourceBundleDebugSession<'data> {
@@ -615,28 +688,52 @@ impl<'data> SourceBundleDebugSession<'data> {
615688
std::iter::empty()
616689
}
617690

618-
/// Create a reverse mapping of source paths to ZIP paths.
619-
fn get_files_by_path(&self) -> HashMap<String, String> {
620-
let files = &self.manifest.files;
621-
let mut files_by_path = HashMap::with_capacity(files.len());
691+
/// Get a reverse mapping of source paths to ZIP paths.
692+
fn files_by_path(&self) -> &HashMap<String, String> {
693+
self.files_by_path.borrow_with(|| {
694+
let files = &self.manifest.files;
695+
let mut files_by_path = HashMap::with_capacity(files.len());
622696

623-
for (zip_path, file_info) in files {
624-
if !file_info.path.is_empty() {
625-
files_by_path.insert(file_info.path.clone(), zip_path.clone());
697+
for (zip_path, file_info) in files {
698+
if !file_info.path.is_empty() {
699+
files_by_path.insert(file_info.path.clone(), zip_path.clone());
700+
}
626701
}
627-
}
628702

629-
files_by_path
703+
files_by_path
704+
})
705+
}
706+
707+
/// Get a reverse mapping of debug ID to ZIP paths.
708+
fn files_by_debug_id(&self) -> &HashMap<(DebugId, SourceFileType), String> {
709+
self.files_by_debug_id.borrow_with(|| {
710+
let files = &self.manifest.files;
711+
let mut files_by_debug_id = HashMap::new();
712+
713+
for (zip_path, file_info) in files {
714+
if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) {
715+
files_by_debug_id.insert((debug_id, ty), zip_path.clone());
716+
}
717+
}
718+
719+
files_by_debug_id
720+
})
630721
}
631722

632723
/// Get the path of a file in this bundle by its logical path.
633724
fn zip_path_by_source_path(&self, path: &str) -> Option<&str> {
634-
self.files_by_path
635-
.borrow_with(|| self.get_files_by_path())
725+
self.files_by_path()
636726
.get(path)
637727
.map(|zip_path| zip_path.as_str())
638728
}
639729

730+
/// Get the path of a file in this bundle by its Debug ID and source file type.
731+
fn zip_path_by_debug_id(&self, debug_id: DebugId, ty: SourceFileType) -> Option<&str> {
732+
self.files_by_debug_id()
733+
.get(&(debug_id, ty))
734+
.map(|zip_path| zip_path.as_str())
735+
}
736+
640737
/// Get source by the path of a file in the bundle.
641738
fn source_by_zip_path(&self, zip_path: &str) -> Result<Option<String>, SourceBundleError> {
642739
let mut archive = self.archive.lock();
@@ -660,6 +757,32 @@ impl<'data> SourceBundleDebugSession<'data> {
660757
let content = self.source_by_zip_path(zip_path)?;
661758
Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt))))
662759
}
760+
761+
/// Looks up some source by debug ID and file type.
762+
///
763+
/// Lookups by [`DebugId`] require knowledge of the file that is supposed to be
764+
/// looked up as multiple files (one per type) can share the same debug ID.
765+
/// Special care needs to be taken about [`SourceFileType::IndexedRamBundle`]
766+
/// and [`SourceFileType::SourceMap`] which are different file types despite
767+
/// the name of it.
768+
///
769+
/// # Note on Abstractions
770+
///
771+
/// This method is currently not exposed via a standardized debug session
772+
/// as it's primarily used for the JavaScript processing system which uses
773+
/// different abstractions.
774+
pub fn source_by_debug_id(
775+
&self,
776+
debug_id: DebugId,
777+
ty: SourceFileType,
778+
) -> Result<Option<SourceCode<'_>>, SourceBundleError> {
779+
let zip_path = match self.zip_path_by_debug_id(debug_id, ty) {
780+
Some(zip_path) => zip_path,
781+
None => return Ok(None),
782+
};
783+
let content = self.source_by_zip_path(zip_path)?;
784+
Ok(content.map(|opt| SourceCode::Content(Cow::Owned(opt))))
785+
}
663786
}
664787

665788
impl<'data, 'session> DebugSession<'session> for SourceBundleDebugSession<'data> {
@@ -1106,6 +1229,46 @@ mod tests {
11061229
Ok(())
11071230
}
11081231

1232+
#[test]
1233+
fn test_debug_id() -> Result<(), SourceBundleError> {
1234+
let mut writer = Cursor::new(Vec::new());
1235+
let mut bundle = SourceBundleWriter::start(&mut writer)?;
1236+
1237+
let mut info = SourceFileInfo::default();
1238+
info.set_ty(SourceFileType::MinifiedSource);
1239+
info.add_header(
1240+
"debug-id".into(),
1241+
"5e618b9f-54a9-4389-b196-519819dd7c47".into(),
1242+
);
1243+
info.add_header("sourcemap".into(), "bar.js.min".into());
1244+
bundle.add_file("bar.js", &b"filecontents"[..], info)?;
1245+
assert!(bundle.has_file("bar.js"));
1246+
1247+
bundle.finish()?;
1248+
let bundle_bytes = writer.into_inner();
1249+
let bundle = SourceBundle::parse(&bundle_bytes)?;
1250+
1251+
let sess = bundle.debug_session().unwrap();
1252+
let f = sess
1253+
.source_by_debug_id(
1254+
"5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1255+
SourceFileType::MinifiedSource,
1256+
)
1257+
.unwrap()
1258+
.expect("should exist");
1259+
assert_eq!(f, SourceCode::Content(Cow::Borrowed("filecontents")));
1260+
1261+
assert!(sess
1262+
.source_by_debug_id(
1263+
"5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1264+
SourceFileType::Source
1265+
)
1266+
.unwrap()
1267+
.is_none());
1268+
1269+
Ok(())
1270+
}
1271+
11091272
#[test]
11101273
fn test_il2cpp_reference() -> Result<(), Box<dyn std::error::Error>> {
11111274
let mut cpp_file = NamedTempFile::new()?;

0 commit comments

Comments
 (0)