1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
// Copyright (C) 2023-2024 The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information
//! Node labels
//!
//! [`SwhGraphProperties`] is populated by the `load_properties` and `load_all_properties`
//! of [`SwhUnidirectionalGraph`](crate::graph::SwhUnidirectionalGraph) and
//! [`SwhBidirectionalGraph`](crate::graph::SwhBidirectionalGraph) and returned by
//! their `properties` method.
//!
//! ```no_run
//! # use std::path::PathBuf;
//! use swh_graph::graph::SwhGraphWithProperties;
//! use swh_graph::java_compat::mph::gov::GOVMPH;
//! use swh_graph::SwhGraphProperties;
//!
//! let properties: &SwhGraphProperties<_, _, _, _, _, _> =
//! swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
//! .expect("Could not load graph")
//! .load_all_properties::<GOVMPH>()
//! .expect("Could not load properties")
//! .properties();
//! ```
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use byteorder::BigEndian;
use mmap_rs::Mmap;
use crate::mph::SwhidMphf;
use crate::utils::mmap::NumberMmap;
use crate::utils::GetIndex;
use crate::OutOfBoundError;
pub(crate) mod suffixes {
pub const NODE2SWHID: &str = ".node2swhid.bin";
pub const NODE2TYPE: &str = ".node2type.bin";
pub const AUTHOR_TIMESTAMP: &str = ".property.author_timestamp.bin";
pub const AUTHOR_TIMESTAMP_OFFSET: &str = ".property.author_timestamp_offset.bin";
pub const COMMITTER_TIMESTAMP: &str = ".property.committer_timestamp.bin";
pub const COMMITTER_TIMESTAMP_OFFSET: &str = ".property.committer_timestamp_offset.bin";
pub const AUTHOR_ID: &str = ".property.author_id.bin";
pub const COMMITTER_ID: &str = ".property.committer_id.bin";
pub const CONTENT_IS_SKIPPED: &str = ".property.content.is_skipped.bits";
pub const CONTENT_LENGTH: &str = ".property.content.length.bin";
pub const MESSAGE: &str = ".property.message.bin";
pub const MESSAGE_OFFSET: &str = ".property.message.offset.bin";
pub const TAG_NAME: &str = ".property.tag_name.bin";
pub const TAG_NAME_OFFSET: &str = ".property.tag_name.offset.bin";
pub const LABEL_NAME: &str = ".labels.fcl";
}
/// Properties on graph nodes
///
/// This structures has many type parameters, to allow loading only some properties,
/// and checking at compile time that only loaded properties are accessed.
///
/// Extra properties can be loaded, following the builder pattern on the owning graph.
/// For example, this does not compile:
///
/// ```compile_fail
/// # use std::path::PathBuf;
/// use swh_graph::graph::SwhGraphWithProperties;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
/// .expect("Could not load graph")
/// .init_properties()
/// .properties()
/// .author_timestamp(42);
/// ```
///
/// but this does:
///
/// ```no_run
/// # use std::path::PathBuf;
/// use swh_graph::graph::SwhGraphWithProperties;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
/// .expect("Could not load graph")
/// .init_properties()
/// .load_properties(SwhGraphProperties::load_timestamps)
/// .expect("Could not load timestamp properties")
/// .properties()
/// .author_timestamp(42);
/// ```
pub struct SwhGraphProperties<
MAPS: MaybeMaps,
TIMESTAMPS: MaybeTimestamps,
PERSONS: MaybePersons,
CONTENTS: MaybeContents,
STRINGS: MaybeStrings,
LABELNAMES: MaybeLabelNames,
> {
path: PathBuf,
num_nodes: usize,
maps: MAPS,
timestamps: TIMESTAMPS,
persons: PERSONS,
contents: CONTENTS,
strings: STRINGS,
label_names: LABELNAMES,
}
pub type AllSwhGraphProperties<MPHF> = SwhGraphProperties<
MappedMaps<MPHF>,
MappedTimestamps,
MappedPersons,
MappedContents,
MappedStrings,
MappedLabelNames,
>;
fn mmap(path: &Path) -> Result<Mmap> {
let file_len = path
.metadata()
.with_context(|| format!("Could not stat {}", path.display()))?
.len();
let file =
std::fs::File::open(path).with_context(|| format!("Could not open {}", path.display()))?;
let data = unsafe {
mmap_rs::MmapOptions::new(file_len as _)
.with_context(|| format!("Could not initialize mmap of size {}", file_len))?
.with_flags(mmap_rs::MmapFlags::TRANSPARENT_HUGE_PAGES)
.with_file(file, 0)
.map()
.with_context(|| format!("Could not mmap {}", path.display()))?
};
#[cfg(target_os = "linux")]
unsafe {
libc::madvise(data.as_ptr() as *mut _, data.len(), libc::MADV_RANDOM)
};
Ok(data)
}
impl SwhGraphProperties<NoMaps, NoTimestamps, NoPersons, NoContents, NoStrings, NoLabelNames> {
/// Creates an empty [`SwhGraphProperties`] instance, which will load properties
/// from the given path prefix.
pub fn new(path: impl AsRef<Path>, num_nodes: usize) -> Self {
SwhGraphProperties {
path: path.as_ref().to_owned(),
num_nodes,
maps: NoMaps,
timestamps: NoTimestamps,
persons: NoPersons,
contents: NoContents,
strings: NoStrings,
label_names: NoLabelNames,
}
}
/// Consumes an empty [`SwhGraphProperties`] instance and returns a new one
/// with all properties loaded and all methods available.
///
/// ```no_run
/// # use std::path::PathBuf;
/// use swh_graph::graph::SwhGraphWithProperties;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
/// .load_all::<GOVMPH>()
/// .expect("Could not load properties");
/// ```
///
/// is equivalent to:
///
/// ```no_run
/// # use std::path::PathBuf;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
/// .load_maps::<GOVMPH>()
/// .expect("Could not load node2swhid/swhid2node")
/// .load_timestamps()
/// .expect("Could not load timestamp properties")
/// .load_persons()
/// .expect("Could not load person properties")
/// .load_contents()
/// .expect("Could not load content properties")
/// .load_strings()
/// .expect("Could not load string properties");
/// ```
pub fn load_all<MPHF: SwhidMphf>(self) -> Result<AllSwhGraphProperties<MPHF>> {
self.load_maps()?
.load_timestamps()?
.load_persons()?
.load_contents()?
.load_strings()?
.load_label_names()
}
}
mod maps;
pub use maps::{MappedMaps, Maps, MaybeMaps, NoMaps, NodeIdFromSwhidError, VecMaps};
mod timestamps;
pub use timestamps::{MappedTimestamps, MaybeTimestamps, NoTimestamps, Timestamps, VecTimestamps};
mod persons;
pub use persons::{MappedPersons, MaybePersons, NoPersons, Persons, VecPersons};
mod contents;
pub use contents::{Contents, MappedContents, MaybeContents, NoContents, VecContents};
mod strings;
pub use strings::{MappedStrings, MaybeStrings, NoStrings, Strings, VecStrings};
mod label_names;
pub use label_names::{
LabelIdFromNameError, LabelNames, MappedLabelNames, MaybeLabelNames, NoLabelNames,
VecLabelNames,
};