1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright (C) 2024 The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

#![doc = include_str!("../README.md")]

use std::path::Path;

use cxx::Exception;

pub mod build;
pub use build::*;

mod backends;

pub mod encoders;
pub use encoders::*;

pub mod hashing;
pub use hashing::*;

pub mod minimality;
pub use minimality::*;

mod partitioned_phf;
pub use partitioned_phf::*;

mod structs;

mod single_phf;
pub use single_phf::*;

mod utils;
#[allow(unused_imports)] // check() is feature-gated
pub use utils::*;

/// A [perfect-hash function](https://en.wikipedia.org/wiki/Perfect_hash_function)
/// implemented with the [PTHash algorithm](https://dl.acm.org/doi/10.1145/3404835.3462849)
pub trait Phf: Sized + Send + Sync {
    /// Whether instances of this function have their values in the range `[0; num_keys)`.
    const MINIMAL: bool;

    /// Builds the function from a set of keys
    ///
    /// In plain English, this function's trait bound on keys is that they should be
    /// a collection that can provide cloneable iterators of hashable values.
    fn build_in_internal_memory_from_bytes<Keys: IntoIterator>(
        &mut self,
        keys: Keys,
        config: &BuildConfiguration,
    ) -> Result<BuildTimings, Exception>
    where
        <Keys as IntoIterator>::IntoIter: ExactSizeIterator + Clone,
        <<Keys as IntoIterator>::IntoIter as Iterator>::Item: Hashable;

    /// Returns the hash of the given key
    ///
    /// If the `key` was not one of the keys passed to
    /// [`build_in_internal_memory_from_bytes`](Self::build_in_internal_memory_from_bytes)
    /// when building the function, the hash will collide with another key's
    fn hash(&self, key: impl Hashable) -> u64;

    /// Returns the number of bits needed to represent this perfect-hash function
    fn num_bits(&self) -> usize;
    /// Returns the number of keys used to build this perfect-hash function
    fn num_keys(&self) -> u64;
    /// Largest value returned by [`Self::hash`] plus 1
    fn table_size(&self) -> u64;

    /// Dump this function to disk
    fn save(&mut self, path: impl AsRef<Path>) -> Result<usize, Exception>;
    /// Load this function from disk
    fn load(path: impl AsRef<Path>) -> Result<Self, Exception>;
}