1use std::fmt;
10use std::num::{ParseFloatError, ParseIntError, TryFromIntError};
11use std::path::Path;
12
13use nano_core::{BranchSchema, Event};
14#[cfg(feature = "http")]
15pub use nano_rootio::HttpSourceOptions;
16
17pub mod datacard;
18pub mod samples;
19
20pub type Result<T> = std::result::Result<T, RootError>;
21
22#[derive(Debug)]
23pub enum RootError {
24 Io(std::io::Error),
25 Parse(String),
26 Format(fmt::Error),
27 Decompression(String),
28 UnsupportedCompression(String),
29 IntConversion(TryFromIntError),
30 ParseFloat(ParseFloatError),
31 ParseInt(ParseIntError),
32 Other(String),
33}
34
35impl RootError {
36 pub fn parse(message: impl Into<String>) -> Self {
37 Self::Parse(message.into())
38 }
39
40 pub fn decompression(message: impl Into<String>) -> Self {
41 Self::Decompression(message.into())
42 }
43
44 pub fn other(message: impl Into<String>) -> Self {
45 Self::Other(message.into())
46 }
47}
48
49impl fmt::Display for RootError {
50 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51 match self {
52 Self::Io(err) => write!(f, "{err}"),
53 Self::Parse(message) => write!(f, "{message}"),
54 Self::Format(err) => write!(f, "{err}"),
55 Self::Decompression(message) => write!(f, "{message}"),
56 Self::UnsupportedCompression(magic) => {
57 write!(f, "unsupported ROOT compression algorithm `{magic}`")
58 }
59 Self::IntConversion(err) => write!(f, "{err}"),
60 Self::ParseFloat(err) => write!(f, "{err}"),
61 Self::ParseInt(err) => write!(f, "{err}"),
62 Self::Other(message) => write!(f, "{message}"),
63 }
64 }
65}
66
67impl std::error::Error for RootError {
68 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
69 match self {
70 Self::Io(err) => Some(err),
71 Self::Format(err) => Some(err),
72 Self::IntConversion(err) => Some(err),
73 Self::ParseFloat(err) => Some(err),
74 Self::ParseInt(err) => Some(err),
75 Self::Parse(_)
76 | Self::Decompression(_)
77 | Self::UnsupportedCompression(_)
78 | Self::Other(_) => None,
79 }
80 }
81}
82
83impl From<std::io::Error> for RootError {
84 fn from(err: std::io::Error) -> Self {
85 Self::Io(err)
86 }
87}
88
89impl From<fmt::Error> for RootError {
90 fn from(err: fmt::Error) -> Self {
91 Self::Format(err)
92 }
93}
94
95impl From<TryFromIntError> for RootError {
96 fn from(err: TryFromIntError) -> Self {
97 Self::IntConversion(err)
98 }
99}
100
101impl From<ParseFloatError> for RootError {
102 fn from(err: ParseFloatError) -> Self {
103 Self::ParseFloat(err)
104 }
105}
106
107impl From<ParseIntError> for RootError {
108 fn from(err: ParseIntError) -> Self {
109 Self::ParseInt(err)
110 }
111}
112
113impl From<nano_rootio::Error> for RootError {
114 fn from(err: nano_rootio::Error) -> Self {
115 match err {
116 nano_rootio::Error::Io(err) => Self::Io(err),
117 nano_rootio::Error::Parse { message, .. } => Self::Parse(message),
118 nano_rootio::Error::Decompression(message) => Self::Decompression(message),
119 nano_rootio::Error::UnsupportedCompression(magic) => {
120 Self::UnsupportedCompression(magic)
121 }
122 other => Self::Other(other.to_string()),
123 }
124 }
125}
126
127pub fn read_events(path: &Path, schema: BranchSchema) -> Result<Vec<Event>> {
129 events(path, &schema)?.collect()
130}
131
132pub fn read_events_from_tree(
134 path: &Path,
135 tree_name: &str,
136 schema: BranchSchema,
137) -> Result<Vec<Event>> {
138 events_from_tree(path, tree_name, &schema)?.collect()
139}
140
141pub fn events(path: &Path, schema: &BranchSchema) -> Result<impl Iterator<Item = Result<Event>>> {
143 events_from_tree(path, "Events", schema)
144}
145
146pub fn events_from_tree(
148 path: &Path,
149 tree_name: &str,
150 schema: &BranchSchema,
151) -> Result<impl Iterator<Item = Result<Event>>> {
152 events_chunked_from_tree(path, tree_name, schema, reader::DEFAULT_CHUNK_SIZE)
153}
154
155pub fn events_chunked(
157 path: &Path,
158 schema: &BranchSchema,
159 chunk_size: usize,
160) -> Result<impl Iterator<Item = Result<Event>>> {
161 events_chunked_from_tree(path, "Events", schema, chunk_size)
162}
163
164pub fn events_chunked_from_tree(
166 path: &Path,
167 tree_name: &str,
168 schema: &BranchSchema,
169 chunk_size: usize,
170) -> Result<impl Iterator<Item = Result<Event>>> {
171 reader::EventIterator::new(path, tree_name, schema, chunk_size)
172}
173
174#[cfg(feature = "http")]
176pub fn events_url(url: &str, schema: &BranchSchema) -> Result<reader::EventIterator> {
177 events_url_from_tree(url, "Events", schema)
178}
179
180#[cfg(feature = "http")]
182pub fn events_url_from_tree(
183 url: &str,
184 tree_name: &str,
185 schema: &BranchSchema,
186) -> Result<reader::EventIterator> {
187 events_url_chunked_from_tree(url, tree_name, schema, reader::DEFAULT_CHUNK_SIZE)
188}
189
190#[cfg(feature = "http")]
192pub fn events_url_chunked(
193 url: &str,
194 schema: &BranchSchema,
195 chunk_size: usize,
196) -> Result<reader::EventIterator> {
197 events_url_chunked_from_tree(url, "Events", schema, chunk_size)
198}
199
200#[cfg(feature = "http")]
203pub fn events_url_chunked_from_tree(
204 url: &str,
205 tree_name: &str,
206 schema: &BranchSchema,
207 chunk_size: usize,
208) -> Result<reader::EventIterator> {
209 events_url_chunked_from_tree_with_options(
210 url,
211 tree_name,
212 schema,
213 chunk_size,
214 HttpSourceOptions::from_env(),
215 )
216}
217
218#[cfg(feature = "http")]
220pub fn events_url_chunked_from_tree_with_options(
221 url: &str,
222 tree_name: &str,
223 schema: &BranchSchema,
224 chunk_size: usize,
225 options: HttpSourceOptions,
226) -> Result<reader::EventIterator> {
227 let file = nano_rootio::RootFile::open_url_with_options(url, options)?;
228 reader::EventIterator::new_remote_file(file, url, tree_name, schema, chunk_size)
229}
230
231pub mod reader {
232 use std::path::Path;
233 use std::sync::Arc;
234
235 use nano_core::{
236 BranchColumn, BranchSchema, BranchSpec, BranchType, Event, EventColumns, JaggedColumn,
237 };
238 use nano_rootio::{BasketPayloadCache, RootFile, Tree};
239
240 use crate::{Result, RootError};
241
242 pub const DEFAULT_CHUNK_SIZE: usize = 65_536;
243
244 enum EventIteratorBackend {
245 Local {
246 tree: Tree,
247 },
248 #[cfg(feature = "http")]
249 Remote {
250 file: RootFile,
251 tree: Tree,
252 },
253 }
254
255 pub struct EventIterator {
256 file_size: u64,
257 backend: EventIteratorBackend,
258 schema: Arc<BranchSchema>,
259 chunk_size: usize,
260 total_entries: usize,
261 next_entry: usize,
262 chunk_start: usize,
263 chunk_len: usize,
264 chunk_row: usize,
265 columns: Option<Arc<EventColumns>>,
266 }
267
268 impl EventIterator {
269 pub fn new(
270 path: &Path,
271 tree_name: &str,
272 schema: &BranchSchema,
273 chunk_size: usize,
274 ) -> Result<Self> {
275 let file = RootFile::open(path)?;
276 let file_size = file.file_size();
277 let tree = tree_by_name_or_first(&file, &path.display().to_string(), tree_name)?;
278 let total_entries = usize::try_from(tree.entries())?;
279 Ok(Self {
280 file_size,
281 backend: EventIteratorBackend::Local { tree },
282 schema: Arc::new(schema.clone()),
283 chunk_size: chunk_size.max(1),
284 total_entries,
285 next_entry: 0,
286 chunk_start: 0,
287 chunk_len: 0,
288 chunk_row: 0,
289 columns: None,
290 })
291 }
292
293 #[cfg(feature = "http")]
294 pub fn new_remote_file(
295 file: RootFile,
296 source_label: &str,
297 tree_name: &str,
298 schema: &BranchSchema,
299 chunk_size: usize,
300 ) -> Result<Self> {
301 let file_size = file.file_size();
302 let tree = tree_by_name_or_first(&file, source_label, tree_name)?;
303 let total_entries = usize::try_from(tree.entries()).map_err(RootError::from)?;
304 Ok(Self {
305 file_size,
306 backend: EventIteratorBackend::Remote { file, tree },
307 schema: Arc::new(schema.clone()),
308 chunk_size: chunk_size.max(1),
309 total_entries,
310 next_entry: 0,
311 chunk_start: 0,
312 chunk_len: 0,
313 chunk_row: 0,
314 columns: None,
315 })
316 }
317
318 pub fn bytes_fetched(&self) -> u64 {
319 match &self.backend {
320 EventIteratorBackend::Local { .. } => 0,
321 #[cfg(feature = "http")]
322 EventIteratorBackend::Remote { file, .. } => file.bytes_fetched(),
323 }
324 }
325
326 pub fn file_size(&self) -> u64 {
327 self.file_size
328 }
329
330 fn load_next_chunk(&mut self) -> Result<bool> {
331 if self.next_entry >= self.total_entries {
332 self.columns = None;
333 self.chunk_len = 0;
334 self.chunk_row = 0;
335 return Ok(false);
336 }
337
338 let start = self.next_entry;
339 let len = self.chunk_size.min(self.total_entries - start);
340 let columns = match &self.backend {
341 EventIteratorBackend::Local { tree } => {
342 read_columns_window(tree, self.schema.specs(), start, len)?
343 }
344 #[cfg(feature = "http")]
345 EventIteratorBackend::Remote { tree, .. } => {
346 read_columns_window(tree, self.schema.specs(), start, len)?
347 }
348 };
349 Event::validate_event_columns(&self.schema, &columns, len - 1)
350 .map_err(|err| RootError::other(err.to_string()))?;
351 self.columns = Some(Arc::new(columns));
352 self.chunk_start = start;
353 self.chunk_len = len;
354 self.chunk_row = 0;
355 self.next_entry += len;
356 Ok(true)
357 }
358 }
359
360 impl Iterator for EventIterator {
361 type Item = Result<Event>;
362
363 fn next(&mut self) -> Option<Self::Item> {
364 if self.chunk_row >= self.chunk_len {
365 match self.load_next_chunk() {
366 Ok(true) => {}
367 Ok(false) => return None,
368 Err(err) => return Some(Err(err)),
369 }
370 }
371
372 let columns = self.columns.as_ref()?.clone();
373 let row_index = self.chunk_row;
374 let entry = self.chunk_start + row_index;
375 self.chunk_row += 1;
376
377 Some(Ok(Event::from_validated_event_columns_at(
378 self.schema.clone(),
379 columns,
380 entry,
381 row_index,
382 )))
383 }
384 }
385
386 fn tree_by_name_or_first(file: &RootFile, source_label: &str, tree_name: &str) -> Result<Tree> {
387 if file
388 .objects()
389 .iter()
390 .any(|item| item.name() == tree_name && item.class() == "TTree")
391 {
392 return file.tree(tree_name).map_err(RootError::from);
393 }
394
395 let object = file
396 .objects()
397 .into_iter()
398 .find(|item| item.class() == "TTree")
399 .ok_or_else(|| RootError::other(format!("No TTree found in {source_label}")))?;
400 file.tree(object.name()).map_err(RootError::from)
401 }
402
403 fn read_columns_window(
404 tree: &Tree,
405 specs: &[BranchSpec],
406 start: usize,
407 len: usize,
408 ) -> Result<EventColumns> {
409 let mut columns = Vec::with_capacity(specs.len());
410 let mut cache = BasketPayloadCache::new();
411
412 for spec in specs {
413 let read_result = if spec.branch_type.is_vector() {
414 read_vector_column_window(tree, spec, start, len, &mut cache)
415 } else {
416 read_scalar_column_window(tree, spec, start, len, &mut cache)
417 };
418 match read_result {
419 Ok(column) => {
420 columns.push((spec.name.clone(), column));
421 }
422 Err(err) if spec.optional => {
423 let _ = err;
424 }
425 Err(err) => return Err(err),
426 }
427 }
428
429 Ok(EventColumns::from_ordered(columns))
430 }
431
432 fn read_scalar_column_window(
433 tree: &Tree,
434 spec: &BranchSpec,
435 start: usize,
436 len: usize,
437 cache: &mut BasketPayloadCache,
438 ) -> Result<BranchColumn> {
439 let start = i64::try_from(start)?;
440 let column = match spec.branch_type {
441 BranchType::Bool => {
442 BranchColumn::Bool(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
443 }
444 BranchType::I8 => {
445 BranchColumn::I8(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
446 }
447 BranchType::U8 => {
448 BranchColumn::U8(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
449 }
450 BranchType::I16 => {
451 BranchColumn::I16(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
452 }
453 BranchType::U16 => {
454 BranchColumn::U16(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
455 }
456 BranchType::I32 => {
457 BranchColumn::I32(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
458 }
459 BranchType::U32 => {
460 BranchColumn::U32(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
461 }
462 BranchType::I64 => {
463 BranchColumn::I64(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
464 }
465 BranchType::U64 => {
466 BranchColumn::U64(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
467 }
468 BranchType::F32 => {
469 BranchColumn::F32(tree.read_scalar_range_cached(&spec.name, start, len, cache)?)
470 }
471 branch_type => {
472 return Err(RootError::other(format!(
473 "branch `{}` has non-scalar type {:?}",
474 spec.name, branch_type
475 )));
476 }
477 };
478 Ok(column)
479 }
480
481 fn read_vector_column_window(
482 tree: &Tree,
483 spec: &BranchSpec,
484 start: usize,
485 len: usize,
486 cache: &mut BasketPayloadCache,
487 ) -> Result<BranchColumn> {
488 let count_branch = count_branch_name(&spec.name)?;
489 let start = i64::try_from(start)?;
490
491 let column = match spec.branch_type {
492 BranchType::VecBool => BranchColumn::VecBool(tree.read_jagged_range_cached(
493 &spec.name,
494 &count_branch,
495 start,
496 len,
497 cache,
498 )?),
499 BranchType::VecI8 => BranchColumn::VecI8(tree.read_jagged_range_cached(
500 &spec.name,
501 &count_branch,
502 start,
503 len,
504 cache,
505 )?),
506 BranchType::VecU8 => BranchColumn::VecU8(tree.read_jagged_range_cached(
507 &spec.name,
508 &count_branch,
509 start,
510 len,
511 cache,
512 )?),
513 BranchType::VecI16 => BranchColumn::VecI16(tree.read_jagged_range_cached(
514 &spec.name,
515 &count_branch,
516 start,
517 len,
518 cache,
519 )?),
520 BranchType::VecU16 => BranchColumn::VecU16(tree.read_jagged_range_cached(
521 &spec.name,
522 &count_branch,
523 start,
524 len,
525 cache,
526 )?),
527 BranchType::VecI32 => BranchColumn::VecI32(tree.read_jagged_range_cached(
528 &spec.name,
529 &count_branch,
530 start,
531 len,
532 cache,
533 )?),
534 BranchType::VecU32 => BranchColumn::VecU32(tree.read_jagged_range_cached(
535 &spec.name,
536 &count_branch,
537 start,
538 len,
539 cache,
540 )?),
541 BranchType::VecI64 => BranchColumn::VecI64(tree.read_jagged_range_cached(
542 &spec.name,
543 &count_branch,
544 start,
545 len,
546 cache,
547 )?),
548 BranchType::VecU64 => BranchColumn::VecU64(tree.read_jagged_range_cached(
549 &spec.name,
550 &count_branch,
551 start,
552 len,
553 cache,
554 )?),
555 BranchType::VecF32 => {
556 let values = tree.read_jagged_flat_range_cached(
557 &spec.name,
558 &count_branch,
559 start,
560 len,
561 cache,
562 )?;
563 BranchColumn::FlatVecF32(JaggedColumn::new(values.offsets, values.values))
564 }
565 branch_type => {
566 return Err(RootError::other(format!(
567 "branch `{}` has non-vector type {:?}",
568 spec.name, branch_type
569 )));
570 }
571 };
572 Ok(column)
573 }
574
575 fn count_branch_name(branch_name: &str) -> Result<String> {
576 let (object_name, _) = branch_name.split_once('_').ok_or_else(|| {
577 RootError::other(format!(
578 "cannot infer NanoAOD count branch for vector branch `{branch_name}`"
579 ))
580 })?;
581 Ok(format!("n{object_name}"))
582 }
583}
584
585pub mod writer {
586 use std::fmt::Display;
587 use std::path::Path;
588
589 use nano_analysis::{Hist1D, HistSet1D};
590 use nano_rootio::write::{
591 write_histograms as write_root_histograms, write_tree, Branch, HistogramAxis, Th1F,
592 };
593
594 use crate::Result;
595
596 #[derive(Debug, Clone, PartialEq)]
598 pub enum OutputBranch {
599 Bool(String, Vec<bool>),
600 I32(String, Vec<i32>),
601 U32(String, Vec<u32>),
602 U64(String, Vec<u64>),
603 F32(String, Vec<f32>),
604 VecF32(String, Vec<Vec<f32>>),
605 }
606
607 impl OutputBranch {
608 pub fn bool(name: impl Into<String>, values: Vec<bool>) -> Self {
609 Self::Bool(name.into(), values)
610 }
611
612 pub fn i32(name: impl Into<String>, values: Vec<i32>) -> Self {
613 Self::I32(name.into(), values)
614 }
615
616 pub fn u32(name: impl Into<String>, values: Vec<u32>) -> Self {
617 Self::U32(name.into(), values)
618 }
619
620 pub fn u64(name: impl Into<String>, values: Vec<u64>) -> Self {
621 Self::U64(name.into(), values)
622 }
623
624 pub fn f32(name: impl Into<String>, values: Vec<f32>) -> Self {
625 Self::F32(name.into(), values)
626 }
627
628 pub fn vec_f32(name: impl Into<String>, values: Vec<Vec<f32>>) -> Self {
629 Self::VecF32(name.into(), values)
630 }
631
632 fn to_root_branch(&self) -> Branch {
633 match self {
634 Self::Bool(name, values) => Branch::bool(name, values.clone()),
635 Self::I32(name, values) => Branch::i32(name, values.clone()),
636 Self::U32(name, values) => Branch::u32(name, values.clone()),
637 Self::U64(name, values) => Branch::u64(name, values.clone()),
638 Self::F32(name, values) => Branch::f32(name, values.clone()),
639 Self::VecF32(name, values) => Branch::vec_f32(name, values.clone()),
640 }
641 }
642 }
643
644 pub fn write_events(path: &Path, branches: &[OutputBranch]) -> Result<()> {
646 let root_branches = branches
647 .iter()
648 .map(OutputBranch::to_root_branch)
649 .collect::<Vec<_>>();
650 Ok(write_tree(path, "Events", &root_branches)?)
651 }
652
653 pub fn write_histograms(path: &Path, histograms: &[(&str, &Hist1D)]) -> Result<()> {
655 let root_histograms = histograms
656 .iter()
657 .map(|(name, hist)| to_root_histogram(name, hist))
658 .collect::<Vec<_>>();
659 Ok(write_root_histograms(path, &root_histograms)?)
660 }
661
662 pub fn write_histogram_sets<S>(path: &Path, histograms: &[(&str, &HistSet1D<S>)]) -> Result<()>
666 where
667 S: Ord + Display,
668 {
669 let root_histograms = histograms
670 .iter()
671 .flat_map(|(base_name, set)| {
672 set.iter().map(move |(variation, hist)| {
673 let name = format!("{base_name}_{variation}");
674 to_root_histogram(&name, hist)
675 })
676 })
677 .collect::<Vec<_>>();
678 Ok(write_root_histograms(path, &root_histograms)?)
679 }
680
681 fn to_root_histogram(name: &str, hist: &Hist1D) -> Th1F {
682 let contents = std::iter::once(hist.underflow())
683 .chain(hist.bins().iter().copied())
684 .chain(std::iter::once(hist.overflow()))
685 .collect::<Vec<_>>();
686 let sumw2 = std::iter::once(hist.underflow_sumw2())
687 .chain(hist.bin_sumw2().iter().copied())
688 .chain(std::iter::once(hist.overflow_sumw2()))
689 .collect::<Vec<_>>();
690 Th1F::new(
691 name,
692 name,
693 HistogramAxis::Fixed {
694 bins: hist.nbins(),
695 low: hist.low(),
696 high: hist.high(),
697 },
698 contents,
699 sumw2,
700 hist.entries(),
701 )
702 .with_weighted_x_stats(hist.sumwx(), hist.sumwx2())
703 }
704}
705
706pub mod read {
707 use std::path::Path;
708
709 use nano_rootio::RootFile;
710
711 use crate::{Result, RootError};
712
713 pub fn read_i32_branch(path: &Path, branch_name: &str) -> Result<Vec<i32>> {
715 let file = RootFile::open(path)?;
716 let tree_name = file
717 .objects()
718 .into_iter()
719 .find(|item| item.class() == "TTree")
720 .map(|item| item.name().to_string())
721 .ok_or_else(|| RootError::other(format!("No TTree found in {}", path.display())))?;
722 let tree = file.tree(&tree_name)?;
723 Ok(tree.read_scalar(branch_name)?)
724 }
725
726 pub async fn read_i32_branch_async(path: &Path, branch_name: &str) -> Result<Vec<i32>> {
728 read_i32_branch(path, branch_name)
729 }
730}
731
732#[cfg(test)]
733mod tests {
734 use std::path::Path;
735
736 use super::read::read_i32_branch;
737
738 #[test]
739 fn reads_simple_root_i32_branch() {
740 let path = Path::new("../root-io/src/test_data/simple.root");
741 let values = read_i32_branch(path, "one").unwrap();
742 assert_eq!(values, vec![1, 2, 3, 4]);
743 }
744}