diff --git a/MIR/collect-wavenumber-metadata.R b/MIR/collect-wavenumber-metadata.R
index c2b7ebd..9a5674f 100644
--- a/MIR/collect-wavenumber-metadata.R
+++ b/MIR/collect-wavenumber-metadata.R
@@ -8,27 +8,35 @@ source('../code/snapshot-preparation/snapshot-functions.R')
 base.path <- 'E:/temp/MIR_work/processed-collections'
 
 ## paths to full collection
-# as of 2023-01-16 there are 1594 collections
+# as of 2023-01-16 there are 1594 collections (including some non-public)
+# as of 2024-02-14: 1387 public collections
 f <- list.files(base.path, full.names = TRUE)
 
-# 2023-01-07: 1594 collections
-length(f)
-
 ## testing: ok
 # z <- wavenumberMetadata(f[1])
 # str(z)
 
+## TODO: not robust to NULL list elements
 
 ## collection metadata by collection/sample/integer wn-sequence
 # ~ 2 minutes
 plan(multisession)
 
 system.time(
-  z <- future_map(f, .progress = TRUE, .f = wavenumberMetadata)
+  z <- future_map(f, .progress = TRUE, .f = safely(wavenumberMetadata))
 )
 
 plan(sequential)
 
+# test for error conditions
+e <- whochsapply(z, '[', 'error')
+which(!sapply(e, is.null))
+
+f[1054]
+
+
+
+
 # flatten
 z <- do.call('rbind', z)
 row.names(z) <- NULL
@@ -113,6 +121,9 @@ nrow(x <- z[z$wnID == 3, ])
 knitr::kable(x[, 1:2], row.names = FALSE)
 range(as.numeric(strsplit(x$wn[1], split = ',', fixed = TRUE)[[1]]))
 
+## cleanup
+rm(list = ls())
+gc(reset = TRUE)
 
 
 
diff --git a/MIR/main.R b/MIR/main.R
index 875f0dc..13f84cd 100644
--- a/MIR/main.R
+++ b/MIR/main.R
@@ -3,6 +3,8 @@
 ##
 
 
+# WD
+setwd('MIR')
 
 # build a set of RDS, one per spectra collection
 # save to temporary folder for later use
diff --git a/MIR/pre-process-collections.R b/MIR/pre-process-collections.R
index 19c1982..0df21cd 100644
--- a/MIR/pre-process-collections.R
+++ b/MIR/pre-process-collections.R
@@ -17,33 +17,41 @@ dir.create(proc.path, recursive = TRUE)
 
 
 ## paths to full collection
-# as of 2023-01-16 there are 1594 collections
+# as of 2023-01-16 there are 1594 collections (including some non-public)
+# as of 2024-02-14: 1387 public collections
 p <- list.dirs(file.path(base.path, 'MIR_Library'), recursive = TRUE, full.names = TRUE)
 
 # remove the top-level directory
-p <- p[-1]
+# also remove the _INSTRUCTIONS dir
+p <- p[-c(1:2)]
+
 
 
 ## pre-process OPUS files
 # result is a set of RDS files, by spectra collection
-# ~ 15 minutes
+# ~ 13 minutes
 # WD is the bottle-neck
-# files cannot be open in OPUS software
 
 # test working as expected: OK
 # processOpusCollection(p[1], .output = proc.path)
 
+# missing `ab` list element: 'e:/MIR/MIR_Library/C2019USNJ085/'
 
 plan(multisession)
 
-# writes RDS, no output here
+# writes RDS, result is a list of collections with no usable data
 system.time(
-  .trash <- future_map(p, .progress = TRUE, .f = processOpusCollection, .output = proc.path)
+  e <- future_map(p, .progress = TRUE, .f = processOpusCollection, .output = proc.path)
 )
 
 plan(sequential)
 
-# done
+# keep track of errors
+
+
+## cleanup
+rm(list = ls())
+gc(reset = TRUE)
 
 
 
diff --git a/code/snapshot-preparation/snapshot-functions.R b/code/snapshot-preparation/snapshot-functions.R
index df85b49..ed2de8b 100644
--- a/code/snapshot-preparation/snapshot-functions.R
+++ b/code/snapshot-preparation/snapshot-functions.R
@@ -10,21 +10,45 @@ processOpusCollection <- function(.collection, .output) {
   # spectra ID
   .sID <- gsub(pattern = '.0', replacement = '', x = basename(.files), fixed = TRUE)
   
+  ## TODO: data_only = TRUE bug
+  ##       -> https://github.com/spectral-cockpit/opusreader2/issues/104
+  
+  ## TODO: warnings:
+  ##       -> In get_meta_utc_datetime(timestamp) : NAs introduced by coercion
+  
   # load all spectra objects in collection
   # there may be cases with no usable data (why?)
   # result is an empty list
-  x <- opusreader2::read_opus(.files, data_only = TRUE, parallel = FALSE, progress_bar = FALSE)
+  x <- opusreader2::read_opus(.files, data_only = FALSE, parallel = FALSE, progress_bar = FALSE)
+  
+  ## TODO: review with data_only = FALSE
+  ## TODO: something wrong with C2019USNJ085/*
   
   # find bad files / parse error (?)
   idx <- which(sapply(x, length) < 1)
   if(length(idx) > 0) {
+    # keep track / warn
+    .msg <- sprintf("unusable .0 file: %s [%s]", .sID[idx], .cID)
+    message(.msg)
+    
     # remove spectra + sample ID
     x <- x[-idx]
     .sID <- .sID[-idx]
-    
+  }
+  
+  # find spectra missing 'ab' element
+  idx <- which(sapply(x, function(i) {
+    is.null(i$ab)
+  }))
+  
+  if(length(idx) > 0) {
     # keep track / warn
-    .msg <- sprintf("unusable .0 file: %s [%s]", .sID[idx], .cID)
-    message(.msg)
+    .msg <- sprintf("missing `ab` file: %s [%s]", .sID[idx], .cID)
+    message(paste(.msg, collapse = '\n'))
+    
+    # remove spectra + sample ID
+    x <- x[-idx]
+    .sID <- .sID[-idx]
   }
   
   # extract components from OPUS object
@@ -37,18 +61,29 @@ processOpusCollection <- function(.collection, .output) {
     
   })
   
+  # keep track of collection ID
+  attr(.res, 'collection') <- .cID
   
   # keep track of sample IDs in the spectra list
   names(.res) <- .sID
   
-  # keep track of collection ID
-  attr(.res, 'collection') <- .cID
-  
-  # save to RDS
-  .file <- sprintf('%s.rds', file.path(.output, .cID))
-  saveRDS(.res, file = .file)
+  # test for empty set
+  # all files in collection are invalid
+  # result is NULL
+  if(length(.res) < 1) {
+    .res <- NULL
+    
+    # return to collection ID to calling function
+    return(.cID)
+    
+    # do not save RDS
+  } else {
+    # everything is fine
+    # save to RDS
+    .file <- sprintf('%s.rds', file.path(.output, .cID))
+    saveRDS(.res, file = .file)
+  }
   
-  ## TODO: return error status
 }