use crate::attribute::OwnedAttribute; use crate::name::OwnedName; use std::collections::hash_map::RandomState; use std::collections::HashSet; use std::hash::BuildHasher; use std::hash::Hash; use std::hash::Hasher; /// An ordered set pub(crate) struct AttributesSet { vec: Vec, /// Uses a no-op hasher, because these u64s are hashes already may_contain: HashSet, /// This is real hasher for the `OwnedName` hasher: RandomState, } /// Use linear search and don't allocate `HashSet` if there are few attributes, /// because allocation costs more than a few comparisons. const HASH_THRESHOLD: usize = 8; impl AttributesSet { pub fn new() -> Self { Self { vec: Vec::new(), hasher: RandomState::new(), may_contain: HashSet::default(), } } fn hash(&self, val: &OwnedName) -> u64 { let mut h = self.hasher.build_hasher(); val.hash(&mut h); h.finish() } pub fn len(&self) -> usize { self.vec.len() } pub fn contains(&self, name: &OwnedName) -> bool { // fall back to linear search only on duplicate or hash collision (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) && self.vec.iter().any(move |a| &a.name == name) } pub fn push(&mut self, attr: OwnedAttribute) { if self.vec.len() >= HASH_THRESHOLD { if self.vec.len() == HASH_THRESHOLD { self.may_contain.reserve(HASH_THRESHOLD * 2); for attr in &self.vec { self.may_contain.insert(self.hash(&attr.name)); } } self.may_contain.insert(self.hash(&attr.name)); } self.vec.push(attr); } pub fn into_vec(self) -> Vec { self.vec } } #[test] fn indexset() { let mut s = AttributesSet::new(); let not_here = OwnedName { local_name: "attr1000".into(), namespace: Some("test".into()), prefix: None, }; // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic for i in 0..50000 { let name = OwnedName { local_name: format!("attr{i}"), namespace: None, prefix: None, }; assert!(!s.contains(&name)); s.push(OwnedAttribute { name, value: String::new() }); assert!(!s.contains(¬_here)); } assert!(s.contains(&OwnedName { local_name: "attr1234".into(), namespace: None, prefix: None, })); assert!(s.contains(&OwnedName { local_name: "attr0".into(), namespace: None, prefix: None, })); assert!(s.contains(&OwnedName { local_name: "attr49999".into(), namespace: None, prefix: None, })); } /// Hashser that does nothing except passing u64 through struct U64Hasher(u64); impl Hasher for U64Hasher { fn finish(&self) -> u64 { self.0 } fn write(&mut self, slice: &[u8]) { for &v in slice { self.0 ^= u64::from(v) } // unused in practice } fn write_u64(&mut self, i: u64) { self.0 ^= i; } } #[derive(Default)] struct U64HasherBuilder; impl BuildHasher for U64HasherBuilder { type Hasher = U64Hasher; fn build_hasher(&self) -> U64Hasher { U64Hasher(0) } }