Skip to content

Commit

Permalink
switch to ahash for perf (#543)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Nov 16, 2024
1 parent 5bb9dc2 commit a5cbc64
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 36 deletions.
12 changes: 2 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ resolver = "2"
members = [ "vegafusion-common", "vegafusion-core", "vegafusion-runtime", "vegafusion-python", "vegafusion-wasm", "vegafusion-server", "examples/rust-examples",]

[workspace.dependencies]
deterministic-hash = "1.0.1"
async-trait = "0.1.73"
futures = "0.3.21"
url = "2.3.1"
Expand Down Expand Up @@ -39,6 +38,9 @@ version = "0.51.0"
version = "0.4.35"
default-features = false

[workspace.dependencies.ahash]
version = "0.8.11"

[workspace.dependencies.chrono-tz]
version = "0.9.0"
features = [ "case-insensitive", "filter-by-regex",]
Expand Down
6 changes: 3 additions & 3 deletions vegafusion-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ proto = ["datafusion-proto", "datafusion-proto-common", "prost"]
[dependencies]
thiserror = "^1.0.29"

[dependencies.deterministic-hash]
workspace = true

[dependencies.chrono]
workspace = true
optional = true
Expand All @@ -25,6 +22,9 @@ optional = true
workspace = true
optional = true

[dependencies.ahash]
workspace = true

[dependencies.serde_json]
workspace = true
default-features = false
Expand Down
5 changes: 3 additions & 2 deletions vegafusion-common/src/data/table.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use datafusion_common::ScalarValue;

use ahash::RandomState;
use arrow::{
array::{ArrayData, ArrayRef, StructArray, UInt32Array},
compute::concat_batches,
datatypes::{DataType, Field, Schema, SchemaRef},
ipc::{reader::StreamReader, writer::StreamWriter},
record_batch::RecordBatch,
};
use std::hash::BuildHasher;

use crate::{
data::{ORDER_COL, ORDER_COL_DTYPE},
Expand All @@ -19,7 +21,6 @@ use arrow::array::{
};
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::pretty_format_batches;
use std::hash::DefaultHasher;
use std::{
hash::{Hash, Hasher},
io::Cursor,
Expand Down Expand Up @@ -434,7 +435,7 @@ impl VegaFusionTable {
}

pub fn get_hash(&self) -> u64 {
let mut hasher = deterministic_hash::DeterministicHasher::new(DefaultHasher::new());
let mut hasher = RandomState::with_seed(123).build_hasher();
self.hash(&mut hasher);
hasher.finish()
}
Expand Down
2 changes: 1 addition & 1 deletion vegafusion-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ log = "0.4.22"
[dependencies.lazy_static]
workspace = true

[dependencies.deterministic-hash]
[dependencies.ahash]
workspace = true

[dependencies.prost]
Expand Down
14 changes: 4 additions & 10 deletions vegafusion-core/src/data/dataset.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use crate::error::Result;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use vegafusion_common::data::table::VegaFusionTable;
use vegafusion_common::datafusion_expr::LogicalPlan;

Expand All @@ -14,11 +12,9 @@ impl VegaFusionDataset {
pub fn fingerprint(&self) -> String {
match self {
VegaFusionDataset::Table { hash, .. } => hash.to_string(),
VegaFusionDataset::Plan { plan } => {
let mut hasher = deterministic_hash::DeterministicHasher::new(DefaultHasher::new());
plan.hash(&mut hasher);
hasher.finish().to_string()
}
VegaFusionDataset::Plan { plan } => ahash::RandomState::with_seed(123)
.hash_one(plan)
.to_string(),
}
}

Expand All @@ -29,9 +25,7 @@ impl VegaFusionDataset {

pub fn from_table_ipc_bytes(ipc_bytes: &[u8]) -> Result<Self> {
// Hash ipc bytes
let mut hasher = deterministic_hash::DeterministicHasher::new(DefaultHasher::new());
ipc_bytes.hash(&mut hasher);
let hash = hasher.finish();
let hash = ahash::RandomState::with_seed(123).hash_one(&ipc_bytes);
let table = VegaFusionTable::from_ipc_bytes(ipc_bytes)?;
Ok(Self::Table { table, hash })
}
Expand Down
8 changes: 3 additions & 5 deletions vegafusion-core/src/task_graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ use crate::task_graph::task_value::TaskValue;
use crate::proto::gen::tasks::task::TaskKind;
use crate::proto::gen::tasks::task_value::Data;
use crate::proto::gen::tasks::TaskValue as ProtoTaskValue;
use std::collections::hash_map::DefaultHasher;
use std::convert::TryFrom;
use std::hash::{Hash, Hasher};
use std::hash::{BuildHasher, Hash, Hasher};

struct PetgraphEdge {
output_var: Option<Variable>,
Expand Down Expand Up @@ -208,8 +207,7 @@ impl TaskGraph {
let mut id_fingerprints: Vec<u64> = Vec::with_capacity(self.nodes.len());
for (i, node) in self.nodes.iter().enumerate() {
let task = node.task();
let mut hasher = deterministic_hash::DeterministicHasher::new(DefaultHasher::new());

let mut hasher = ahash::RandomState::with_seed(123).build_hasher();
if let TaskKind::Value(value) = task.task_kind() {
// Only hash the distinction between Scalar and Table, not the value itself.
// The state fingerprint takes the value into account.
Expand Down Expand Up @@ -249,7 +247,7 @@ impl TaskGraph {
let mut state_fingerprints: Vec<u64> = Vec::with_capacity(self.nodes.len());
for (i, node) in self.nodes.iter().enumerate() {
let task = node.task();
let mut hasher = deterministic_hash::DeterministicHasher::new(DefaultHasher::new());
let mut hasher = ahash::RandomState::with_seed(123).build_hasher();

if matches!(task.task_kind(), TaskKind::Value(_)) {
// Hash the task with inline TaskValue
Expand Down
3 changes: 0 additions & 3 deletions vegafusion-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ workspace = true
workspace = true
features = ["tls"]

[dependencies.deterministic-hash]
version = "1.0.1"

[dependencies.serde]
version = "1.0.137"
features = ["derive"]
Expand Down
1 change: 0 additions & 1 deletion vegafusion-runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ async-lock = "2.8.0"
tempfile = "3.3.0"
futures-util = "0.3.21"
bytes = "1.1.0"
deterministic-hash = "1.0.1"
log = "0.4.17"
env_logger = "0.10.0"
ordered-float = "3.6.0"
Expand Down

0 comments on commit a5cbc64

Please sign in to comment.