Skip to content

Commit 26654cd

Browse files
authored
arrow2-convert migration 5: sunset arrow2-convert (#3917)
The end of our wonderful journey. - `NumInstances` control column now has an actual dedicated component type. - `EntityPath` is now a component. - `Into<Cow<Self>>` impls have been cleaned up to generate way less code. - `arrow2_convert` is fully gone. --- `arrow2-convert` migration PR series: - #3853 - #3855 - #3897 - #3902 - #3917
1 parent e537874 commit 26654cd

File tree

146 files changed

+870
-1825
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+870
-1825
lines changed

Cargo.lock

Lines changed: 1 addition & 76 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/re_arrow_store/src/store.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,8 @@ impl DataStore {
265265
/// dataframes.
266266
///
267267
/// See [`DataStoreConfig::store_insert_ids`].
268-
pub fn insert_id_key() -> ComponentName {
269-
"rerun.insert_id".into()
268+
pub fn insert_id_component_name() -> ComponentName {
269+
"rerun.controls.InsertId".into()
270270
}
271271

272272
/// Return the current `StoreGeneration`. This can be used to determine whether the

crates/re_arrow_store/src/store_arrow.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@ use std::collections::BTreeMap;
22

33
use arrow2::{array::Array, chunk::Chunk, datatypes::Schema};
44
use nohash_hasher::IntMap;
5-
use re_log_types::{
6-
DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID,
7-
COLUMN_NUM_INSTANCES,
8-
};
5+
use re_log_types::{DataCellColumn, DataTable, DataTableResult, NumInstances, RowId, Timeline};
96
use re_types_core::ComponentName;
107

118
use crate::store::{IndexedBucket, IndexedBucketInner, PersistentIndexedTable};
@@ -93,7 +90,7 @@ fn serialize(
9390
col_time: Option<(Timeline, &[i64])>,
9491
col_insert_id: &[u64],
9592
col_row_id: &[RowId],
96-
col_num_instances: &[u32],
93+
col_num_instances: &[NumInstances],
9794
table: &IntMap<ComponentName, DataCellColumn>,
9895
) -> DataTableResult<(Schema, Chunk<Box<dyn Array>>)> {
9996
re_tracing::profile_function!();
@@ -128,7 +125,7 @@ fn serialize_control_columns(
128125
col_time: Option<(Timeline, &[i64])>,
129126
col_insert_id: &[u64],
130127
col_row_id: &[RowId],
131-
col_num_instances: &[u32],
128+
col_num_instances: &[NumInstances],
132129
) -> DataTableResult<(Schema, Vec<Box<dyn Array>>)> {
133130
re_tracing::profile_function!();
134131

@@ -143,8 +140,11 @@ fn serialize_control_columns(
143140

144141
// NOTE: Optional column, so make sure it's actually there:
145142
if !col_insert_id.is_empty() {
146-
let (insert_id_field, insert_id_column) =
147-
DataTable::serialize_primitive_column(COLUMN_INSERT_ID, col_insert_id, None);
143+
let (insert_id_field, insert_id_column) = DataTable::serialize_primitive_column(
144+
&crate::DataStore::insert_id_component_name(),
145+
col_insert_id,
146+
None,
147+
);
148148
schema.fields.push(insert_id_field);
149149
columns.push(insert_id_column);
150150
}
@@ -164,7 +164,7 @@ fn serialize_control_columns(
164164
}
165165

166166
let (num_instances_field, num_instances_column) =
167-
DataTable::serialize_primitive_column(COLUMN_NUM_INSTANCES, col_num_instances, None);
167+
DataTable::serialize_control_column(col_num_instances)?;
168168
schema.fields.push(num_instances_field);
169169
columns.push(num_instances_column);
170170

crates/re_arrow_store/src/store_gc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ impl DataStore {
373373
self.timeless_tables.retain(|_, table| {
374374
// If any column is non-empty, we need to keep this table
375375
for num in &table.col_num_instances {
376-
if num != &0 {
376+
if num.get() != 0 {
377377
return true;
378378
}
379379
}
@@ -395,7 +395,7 @@ impl DataStore {
395395
for bucket in table.buckets.values() {
396396
let inner = bucket.inner.read();
397397
for num in &inner.col_num_instances {
398-
if num != &0 {
398+
if num.get() != 0 {
399399
return true;
400400
}
401401
}

crates/re_arrow_store/src/store_polars.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ impl DataStore {
119119
let df = sort_df_columns(&df, self.config.store_insert_ids, &timelines);
120120

121121
let has_timeless = df.column(TIMELESS_COL).is_ok();
122-
let insert_id_col = DataStore::insert_id_key();
122+
let insert_id_col = DataStore::insert_id_component_name();
123123

124124
const ASCENDING: bool = false;
125125
const DESCENDING: bool = true;
@@ -264,7 +264,7 @@ fn insert_ids_as_series(col_insert_id: &InsertIdVec) -> Series {
264264

265265
let insert_ids = arrow2::array::UInt64Array::from_slice(col_insert_id.as_slice());
266266
new_infallible_series(
267-
DataStore::insert_id_key().as_ref(),
267+
DataStore::insert_id_component_name().as_ref(),
268268
&insert_ids,
269269
insert_ids.len(),
270270
)

crates/re_arrow_store/src/store_sanity.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use re_log_types::{DataCellColumn, RowId, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_TIMEPOINT};
1+
use re_log_types::{DataCellColumn, NumInstances, RowId, TimeRange};
22
use re_types_core::{ComponentName, Loggable, SizeBytes as _};
33

44
use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable};
@@ -184,14 +184,16 @@ impl IndexedBucket {
184184

185185
// All columns should be `Self::num_rows` long.
186186
{
187+
const COLUMN_TIMEPOINT: &str = "rerun.controls.TimePoint";
188+
187189
let num_rows = self.num_rows();
188190

189191
let column_lengths = [
190192
(!col_insert_id.is_empty())
191-
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
193+
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
192194
Some((COLUMN_TIMEPOINT.into(), col_time.len())),
193195
Some((RowId::name(), col_row_id.len())),
194-
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
196+
Some((NumInstances::name(), col_num_instances.len())),
195197
]
196198
.into_iter()
197199
.flatten()
@@ -270,9 +272,9 @@ impl PersistentIndexedTable {
270272

271273
let column_lengths = [
272274
(!col_insert_id.is_empty())
273-
.then(|| (DataStore::insert_id_key(), col_insert_id.len())), //
275+
.then(|| (DataStore::insert_id_component_name(), col_insert_id.len())), //
274276
Some((RowId::name(), col_row_id.len())),
275-
Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())),
277+
Some((NumInstances::name(), col_num_instances.len())),
276278
]
277279
.into_iter()
278280
.flatten()

crates/re_arrow_store/src/store_write.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ impl DataStore {
163163
// one… unless we've already generated one of this exact length in the past,
164164
// in which case we can simply re-use that cell.
165165

166-
Some(self.generate_cluster_cell(num_instances))
166+
Some(self.generate_cluster_cell(num_instances.into()))
167167
};
168168

169169
let insert_id = self.config.store_insert_ids.then_some(self.insert_id);

crates/re_arrow_store/tests/data_store.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> D
810810
let num_instances = row.num_instances();
811811
Series::try_from((
812812
cluster_key.as_ref(),
813-
DataCell::from_component::<InstanceKey>(0..num_instances as u64)
813+
DataCell::from_component::<InstanceKey>(0..num_instances.get() as u64)
814814
.to_arrow_monolist(),
815815
))
816816
.unwrap()

crates/re_log_types/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ arrow2 = { workspace = true, features = [
5050
"io_print",
5151
"compute_concatenate",
5252
] }
53-
arrow2_convert.workspace = true
5453
backtrace.workspace = true
54+
bytemuck.workspace = true
5555
document-features.workspace = true
5656
fixed = { version = "1.17", default-features = false, features = ["serde"] }
5757
# `fixed` depends on `half`, so even though `half` is not directly used in this crate,

crates/re_log_types/src/data_cell.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
7171
/// ## Example
7272
///
7373
/// ```rust
74-
/// # use arrow2_convert::field::ArrowField as _;
7574
/// # use itertools::Itertools as _;
7675
/// #
7776
/// # use re_log_types::DataCell;
@@ -97,7 +96,7 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
9796
/// #
9897
/// # assert_eq!(MyPoint::name(), cell.component_name());
9998
/// # assert_eq!(3, cell.num_instances());
100-
/// # assert_eq!(cell.datatype(), &MyPoint::data_type());
99+
/// # assert_eq!(cell.datatype(), &MyPoint::arrow_datatype());
101100
/// #
102101
/// # assert_eq!(points, cell.to_native().as_slice());
103102
/// ```
@@ -157,7 +156,6 @@ pub struct DataCellInner {
157156
pub(crate) values: Box<dyn arrow2::array::Array>,
158157
}
159158

160-
// TODO(cmc): We should be able to build a cell from non-reference types.
161159
// TODO(#1696): We shouldn't have to specify the component name separately, this should be
162160
// part of the metadata by using an extension.
163161
// TODO(#1696): Check that the array is indeed a leaf / component type when building a cell from an
@@ -232,6 +230,7 @@ impl DataCell {
232230
}
233231

234232
/// Builds a cell from an iterable of items that can be turned into a [`Component`].
233+
#[inline]
235234
pub fn from_component<'a, C>(values: impl IntoIterator<Item = impl Into<C>>) -> Self
236235
where
237236
C: Component + Clone + 'a,
@@ -241,10 +240,7 @@ impl DataCell {
241240
}
242241

243242
/// Builds a cell from an iterable of items that can be turned into a [`Component`].
244-
///
245-
/// ⚠ Due to quirks in `arrow2-convert`, this requires consuming and collecting the passed-in
246-
/// iterator into a vector first.
247-
/// Prefer [`Self::from_native`] when performance matters.
243+
#[inline]
248244
pub fn from_component_sparse<'a, C>(
249245
values: impl IntoIterator<Item = Option<impl Into<C>>>,
250246
) -> Self

crates/re_log_types/src/data_row.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use smallvec::SmallVec;
44

55
use re_types_core::{AsComponents, ComponentName, SizeBytes};
66

7-
use crate::{DataCell, DataCellError, DataTable, EntityPath, TableId, TimePoint};
7+
use crate::{DataCell, DataCellError, DataTable, EntityPath, NumInstances, TableId, TimePoint};
88

99
// ---
1010

@@ -249,7 +249,7 @@ pub struct DataRow {
249249
/// - 0 instance (clear),
250250
/// - 1 instance (splat),
251251
/// - `num_instances` instances (standard).
252-
pub num_instances: u32,
252+
pub num_instances: NumInstances,
253253

254254
/// The actual cells (== columns, == components).
255255
pub cells: DataCellRow,
@@ -344,7 +344,7 @@ impl DataRow {
344344
row_id,
345345
entity_path,
346346
timepoint,
347-
num_instances,
347+
num_instances: num_instances.into(),
348348
cells,
349349
})
350350
}
@@ -401,7 +401,7 @@ impl DataRow {
401401
}
402402

403403
#[inline]
404-
pub fn num_instances(&self) -> u32 {
404+
pub fn num_instances(&self) -> NumInstances {
405405
self.num_instances
406406
}
407407

0 commit comments

Comments
 (0)