1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
// Copyright (C) 2023-2024  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

//! Node labels
//!
//! [`SwhGraphProperties`] is populated by the `load_properties` and `load_all_properties`
//! of [`SwhUnidirectionalGraph`](crate::graph::SwhUnidirectionalGraph) and
//! [`SwhBidirectionalGraph`](crate::graph::SwhBidirectionalGraph) and returned by
//! their `properties` method.
//!
//! ```no_run
//! # use std::path::PathBuf;
//! use swh_graph::graph::SwhGraphWithProperties;
//! use swh_graph::java_compat::mph::gov::GOVMPH;
//! use swh_graph::SwhGraphProperties;
//!
//! let properties: &SwhGraphProperties<_, _, _, _, _, _> =
//!     swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
//!     .expect("Could not load graph")
//!     .load_all_properties::<GOVMPH>()
//!     .expect("Could not load properties")
//!     .properties();
//! ```

use std::path::{Path, PathBuf};

use anyhow::{Context, Result};
use byteorder::BigEndian;
use mmap_rs::Mmap;

use crate::mph::SwhidMphf;
use crate::utils::mmap::NumberMmap;
use crate::utils::GetIndex;
use crate::OutOfBoundError;

pub(crate) mod suffixes {
    pub const NODE2SWHID: &str = ".node2swhid.bin";
    pub const NODE2TYPE: &str = ".node2type.bin";
    pub const AUTHOR_TIMESTAMP: &str = ".property.author_timestamp.bin";
    pub const AUTHOR_TIMESTAMP_OFFSET: &str = ".property.author_timestamp_offset.bin";
    pub const COMMITTER_TIMESTAMP: &str = ".property.committer_timestamp.bin";
    pub const COMMITTER_TIMESTAMP_OFFSET: &str = ".property.committer_timestamp_offset.bin";
    pub const AUTHOR_ID: &str = ".property.author_id.bin";
    pub const COMMITTER_ID: &str = ".property.committer_id.bin";
    pub const CONTENT_IS_SKIPPED: &str = ".property.content.is_skipped.bits";
    pub const CONTENT_LENGTH: &str = ".property.content.length.bin";
    pub const MESSAGE: &str = ".property.message.bin";
    pub const MESSAGE_OFFSET: &str = ".property.message.offset.bin";
    pub const TAG_NAME: &str = ".property.tag_name.bin";
    pub const TAG_NAME_OFFSET: &str = ".property.tag_name.offset.bin";
    pub const LABEL_NAME: &str = ".labels.fcl";
}

/// Properties on graph nodes
///
/// This structures has many type parameters, to allow loading only some properties,
/// and checking at compile time that only loaded properties are accessed.
///
/// Extra properties can be loaded, following the builder pattern on the owning graph.
/// For example, this does not compile:
///
/// ```compile_fail
/// # use std::path::PathBuf;
/// use swh_graph::graph::SwhGraphWithProperties;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
///     .expect("Could not load graph")
///     .init_properties()
///     .properties()
///     .author_timestamp(42);
/// ```
///
/// but this does:
///
/// ```no_run
/// # use std::path::PathBuf;
/// use swh_graph::graph::SwhGraphWithProperties;
/// use swh_graph::java_compat::mph::gov::GOVMPH;
/// use swh_graph::SwhGraphProperties;
///
/// swh_graph::graph::load_unidirectional(PathBuf::from("./graph"))
///     .expect("Could not load graph")
///     .init_properties()
///     .load_properties(SwhGraphProperties::load_timestamps)
///     .expect("Could not load timestamp properties")
///     .properties()
///     .author_timestamp(42);
/// ```
pub struct SwhGraphProperties<
    MAPS: MaybeMaps,
    TIMESTAMPS: MaybeTimestamps,
    PERSONS: MaybePersons,
    CONTENTS: MaybeContents,
    STRINGS: MaybeStrings,
    LABELNAMES: MaybeLabelNames,
> {
    path: PathBuf,
    num_nodes: usize,
    maps: MAPS,
    timestamps: TIMESTAMPS,
    persons: PERSONS,
    contents: CONTENTS,
    strings: STRINGS,
    label_names: LABELNAMES,
}

pub type AllSwhGraphProperties<MPHF> = SwhGraphProperties<
    MappedMaps<MPHF>,
    MappedTimestamps,
    MappedPersons,
    MappedContents,
    MappedStrings,
    MappedLabelNames,
>;

fn mmap(path: &Path) -> Result<Mmap> {
    let file_len = path
        .metadata()
        .with_context(|| format!("Could not stat {}", path.display()))?
        .len();
    let file =
        std::fs::File::open(path).with_context(|| format!("Could not open {}", path.display()))?;
    let data = unsafe {
        mmap_rs::MmapOptions::new(file_len as _)
            .with_context(|| format!("Could not initialize mmap of size {}", file_len))?
            .with_flags(mmap_rs::MmapFlags::TRANSPARENT_HUGE_PAGES)
            .with_file(file, 0)
            .map()
            .with_context(|| format!("Could not mmap {}", path.display()))?
    };
    #[cfg(target_os = "linux")]
    unsafe {
        libc::madvise(data.as_ptr() as *mut _, data.len(), libc::MADV_RANDOM)
    };
    Ok(data)
}

impl SwhGraphProperties<NoMaps, NoTimestamps, NoPersons, NoContents, NoStrings, NoLabelNames> {
    /// Creates an empty [`SwhGraphProperties`] instance, which will load properties
    /// from the given path prefix.
    pub fn new(path: impl AsRef<Path>, num_nodes: usize) -> Self {
        SwhGraphProperties {
            path: path.as_ref().to_owned(),
            num_nodes,
            maps: NoMaps,
            timestamps: NoTimestamps,
            persons: NoPersons,
            contents: NoContents,
            strings: NoStrings,
            label_names: NoLabelNames,
        }
    }

    /// Consumes an empty [`SwhGraphProperties`] instance and returns a new one
    /// with all properties loaded and all methods available.
    ///
    /// ```no_run
    /// # use std::path::PathBuf;
    ///  use swh_graph::graph::SwhGraphWithProperties;
    /// use swh_graph::java_compat::mph::gov::GOVMPH;
    /// use swh_graph::SwhGraphProperties;
    ///
    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
    ///     .load_all::<GOVMPH>()
    ///     .expect("Could not load properties");
    /// ```
    ///
    /// is equivalent to:
    ///
    /// ```no_run
    /// # use std::path::PathBuf;
    /// use swh_graph::java_compat::mph::gov::GOVMPH;
    /// use swh_graph::SwhGraphProperties;
    ///
    /// SwhGraphProperties::new(PathBuf::from("./graph"), 123)
    ///     .load_maps::<GOVMPH>()
    ///     .expect("Could not load node2swhid/swhid2node")
    ///     .load_timestamps()
    ///     .expect("Could not load timestamp properties")
    ///     .load_persons()
    ///     .expect("Could not load person properties")
    ///     .load_contents()
    ///     .expect("Could not load content properties")
    ///     .load_strings()
    ///     .expect("Could not load string properties");
    /// ```
    pub fn load_all<MPHF: SwhidMphf>(self) -> Result<AllSwhGraphProperties<MPHF>> {
        self.load_maps()?
            .load_timestamps()?
            .load_persons()?
            .load_contents()?
            .load_strings()?
            .load_label_names()
    }
}

mod maps;
pub use maps::{MappedMaps, Maps, MaybeMaps, NoMaps, NodeIdFromSwhidError, VecMaps};

mod timestamps;
pub use timestamps::{MappedTimestamps, MaybeTimestamps, NoTimestamps, Timestamps, VecTimestamps};

mod persons;
pub use persons::{MappedPersons, MaybePersons, NoPersons, Persons, VecPersons};

mod contents;
pub use contents::{Contents, MappedContents, MaybeContents, NoContents, VecContents};

mod strings;
pub use strings::{MappedStrings, MaybeStrings, NoStrings, Strings, VecStrings};

mod label_names;
pub use label_names::{
    LabelIdFromNameError, LabelNames, MappedLabelNames, MaybeLabelNames, NoLabelNames,
    VecLabelNames,
};