Skip to content

Commit

Permalink
reserve some extra builtins, add encoding uniqueness test
Browse files Browse the repository at this point in the history
  • Loading branch information
lwwmanning committed Sep 19, 2024
1 parent cfdfc8a commit 2e1ceb3
Showing 1 changed file with 84 additions and 20 deletions.
104 changes: 84 additions & 20 deletions vortex-array/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ use crate::{Array, ArrayDef, ArrayTrait};
/// EncodingId is a unique name and numerical code of the array
///
/// 0x0000 - reserved marker encoding
/// 0x0001 - 0x04FF - vortex internal encodings
/// 0x0401 - 0x7FFF - well known extension encodings
/// 0x8000 - 0xFFFF - custom extension encodings
/// 0x0001 - 0x0400 - vortex internal encodings (1 - 1024)
/// 0x0401 - 0x7FFF - well known extension encodings (1025 - 32767)
/// 0x8000 - 0xFFFF - custom extension encodings (32768 - 65535)
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub struct EncodingId(&'static str, u16);

Expand Down Expand Up @@ -98,27 +98,91 @@ pub trait ArrayEncodingRef {

#[doc = "Encoding ID constants for all Vortex-provided encodings"]
pub mod ids {
// reserved - 0x0000
pub const RESERVED: u16 = 0;

// Vortex built-in encodings (1 - 15)
// built-ins first
pub const NULL: u16 = 1;
pub const BOOL: u16 = 2;
pub const PRIMITIVE: u16 = 3;
pub const VAR_BIN: u16 = 4;
pub const VAR_BIN_VIEW: u16 = 5;
pub const EXTENSION: u16 = 6;
pub const STRUCT: u16 = 7;
pub const STRUCT: u16 = 4;
pub const VAR_BIN: u16 = 5;
pub const VAR_BIN_VIEW: u16 = 6;
pub const EXTENSION: u16 = 7;
pub const SPARSE: u16 = 8;
pub const CONSTANT: u16 = 9;
pub const CHUNKED: u16 = 10;
pub const BYTE_BOOL: u16 = 11;
pub const ALP: u16 = 12;
pub const FL_BITPACKED: u16 = 13;
pub const FL_FOR: u16 = 14;
pub const FL_DELTA: u16 = 15;
pub const ROARING_BOOL: u16 = 16;
pub const ROARING_INT: u16 = 17;
pub const RUN_END: u16 = 18;
pub const DICT: u16 = 19;
pub const ZIGZAG: u16 = 20;
pub const DATE_TIME_PARTS: u16 = 21;
pub const RUN_END_BOOL: u16 = 22;
pub const FSST: u16 = 23;

// currently unused, saved for future built-ins
// e.g., List, FixedList, Union, Tensor, etc.
pub const RESERVED_11: u16 = 11;
pub const RESERVED_12: u16 = 12;
pub const RESERVED_13: u16 = 13;
pub const RESERVED_14: u16 = 14;
pub const RESERVED_15: u16 = 15;
pub const RESERVED_16: u16 = 16;

// bundled extensions
pub const ALP: u16 = 17;
pub const BYTE_BOOL: u16 = 18;
pub const DATE_TIME_PARTS: u16 = 19;
pub const DICT: u16 = 20;
pub const FL_BITPACKED: u16 = 21;
pub const FL_DELTA: u16 = 22;
pub const FL_FOR: u16 = 23;
pub const FSST: u16 = 24;
pub const ROARING_BOOL: u16 = 25;
pub const ROARING_INT: u16 = 26;
pub const RUN_END: u16 = 27;
pub const RUN_END_BOOL: u16 = 28;
pub const ZIGZAG: u16 = 29;
}

#[cfg(test)]
mod tests {
use std::collections::HashSet;

use super::ids;

#[test]
fn test_encoding_id() {
let all_ids = [
ids::RESERVED,
ids::NULL,
ids::BOOL,
ids::PRIMITIVE,
ids::STRUCT,
ids::VAR_BIN,
ids::VAR_BIN_VIEW,
ids::EXTENSION,
ids::SPARSE,
ids::CONSTANT,
ids::CHUNKED,
ids::RESERVED_11,
ids::RESERVED_12,
ids::RESERVED_13,
ids::RESERVED_14,
ids::RESERVED_15,
ids::RESERVED_16,
ids::ALP,
ids::BYTE_BOOL,
ids::DATE_TIME_PARTS,
ids::DICT,
ids::FL_BITPACKED,
ids::FL_DELTA,
ids::FL_FOR,
ids::FSST,
ids::ROARING_BOOL,
ids::ROARING_INT,
ids::RUN_END,
ids::RUN_END_BOOL,
ids::ZIGZAG,
];

let mut ids_set = HashSet::with_capacity(all_ids.len());
ids_set.extend(all_ids);
assert_eq!(ids_set.len(), all_ids.len());
assert!(ids_set.iter().max().unwrap() <= &0x0400);
}
}

0 comments on commit 2e1ceb3

Please sign in to comment.