1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
// Copyright (C) 2023-2024  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use anyhow::{ensure, Context, Result};
use mmap_rs::Mmap;

use super::suffixes::*;
use super::*;
use crate::graph::NodeId;
use crate::utils::suffix_path;

/// Trait implemented by both [`NoContents`] and all implementors of [`Contents`],
/// to allow loading content properties only if needed.
pub trait MaybeContents {}

pub struct MappedContents {
    is_skipped_content: NumberMmap<BigEndian, u64, Mmap>,
    content_length: NumberMmap<BigEndian, u64, Mmap>,
}
impl<C: Contents> MaybeContents for C {}

/// Placeholder for when "contents" properties are not loaded.
pub struct NoContents;
impl MaybeContents for NoContents {}

/// Trait for backend storage of content properties (either in-memory or memory-mapped)
pub trait Contents {
    type Data<'a>: GetIndex<Output = u64> + 'a
    where
        Self: 'a;

    fn is_skipped_content(&self) -> Self::Data<'_>;
    fn content_length(&self) -> Self::Data<'_>;
}

impl Contents for MappedContents {
    type Data<'a> = &'a NumberMmap<BigEndian, u64, Mmap> where Self: 'a;

    #[inline(always)]
    fn is_skipped_content(&self) -> Self::Data<'_> {
        &self.is_skipped_content
    }
    #[inline(always)]
    fn content_length(&self) -> Self::Data<'_> {
        &self.content_length
    }
}

pub struct VecContents {
    is_skipped_content: Vec<u64>,
    content_length: Vec<u64>,
}

impl VecContents {
    pub fn new(data: Vec<(bool, Option<u64>)>) -> Result<Self> {
        let bit_vec_len = data.len().div_ceil(64);
        let mut is_skipped_content = vec![0; bit_vec_len];
        let mut content_length = Vec::with_capacity(data.len());
        for (node_id, (is_skipped, length)) in data.into_iter().enumerate() {
            ensure!(
                length != Some(u64::MAX),
                "content length may not be {}",
                u64::MAX
            );
            content_length.push(length.unwrap_or(u64::MAX));
            if is_skipped {
                let cell_id = node_id / (u64::BITS as usize);
                let mask = 1 << (node_id % (u64::BITS as usize));
                is_skipped_content[cell_id] |= mask;
            }
        }
        Ok(VecContents {
            is_skipped_content,
            content_length,
        })
    }
}

impl Contents for VecContents {
    type Data<'a> = &'a [u64] where Self: 'a;

    #[inline(always)]
    fn is_skipped_content(&self) -> Self::Data<'_> {
        self.is_skipped_content.as_slice()
    }
    #[inline(always)]
    fn content_length(&self) -> Self::Data<'_> {
        self.content_length.as_slice()
    }
}

impl<
        MAPS: MaybeMaps,
        TIMESTAMPS: MaybeTimestamps,
        PERSONS: MaybePersons,
        STRINGS: MaybeStrings,
        LABELNAMES: MaybeLabelNames,
    > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, NoContents, STRINGS, LABELNAMES>
{
    /// Consumes a [`SwhGraphProperties`] and returns a new one with these methods
    /// available:
    ///
    /// * [`SwhGraphProperties::is_skipped_content`]
    /// * [`SwhGraphProperties::content_length`]
    pub fn load_contents(
        self,
    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, MappedContents, STRINGS, LABELNAMES>>
    {
        let contents = MappedContents {
            is_skipped_content: NumberMmap::new(
                suffix_path(&self.path, CONTENT_IS_SKIPPED),
                self.num_nodes.div_ceil(u64::BITS.try_into().unwrap()),
            )
            .context("Could not load is_skipped_content")?,
            content_length: NumberMmap::new(
                suffix_path(&self.path, CONTENT_LENGTH),
                self.num_nodes,
            )
            .context("Could not load content_length")?,
        };
        self.with_contents(contents)
    }

    /// Alternative to [`load_contents`](Self::load_contents) that allows using arbitrary
    /// contents implementations
    pub fn with_contents<CONTENTS: Contents>(
        self,
        contents: CONTENTS,
    ) -> Result<SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>> {
        Ok(SwhGraphProperties {
            maps: self.maps,
            timestamps: self.timestamps,
            persons: self.persons,
            contents,
            strings: self.strings,
            label_names: self.label_names,
            path: self.path,
            num_nodes: self.num_nodes,
        })
    }
}

/// Functions to access properties of `content` nodes
///
/// Only available after calling [`load_contents`](SwhGraphProperties::load_contents)
/// or [`load_all_properties`](crate::graph::SwhBidirectionalGraph::load_all_properties)
impl<
        MAPS: MaybeMaps,
        TIMESTAMPS: MaybeTimestamps,
        PERSONS: MaybePersons,
        CONTENTS: Contents,
        STRINGS: MaybeStrings,
        LABELNAMES: MaybeLabelNames,
    > SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
{
    /// Returns whether the node is a skipped content
    ///
    /// Non-content objects get a `false` value, like non-skipped contents.
    ///
    /// # Panics
    ///
    /// If the node id does not exist.
    #[inline]
    pub fn is_skipped_content(&self, node_id: NodeId) -> bool {
        self.try_is_skipped_content(node_id)
            .unwrap_or_else(|e| panic!("Cannot get is_skipped_content bit of node: {}", e))
    }

    /// Returns whether the node is a skipped content, or `Err` if the node id does not exist
    ///
    /// Non-content objects get a `false` value, like non-skipped contents.
    #[inline]
    pub fn try_is_skipped_content(&self, node_id: NodeId) -> Result<bool, OutOfBoundError> {
        if node_id >= self.num_nodes {
            return Err(OutOfBoundError {
                index: node_id,
                len: self.num_nodes,
            });
        }
        let cell_id = node_id / (u64::BITS as usize);
        let mask = 1 << (node_id % (u64::BITS as usize));

        // Safe because we checked node_id is lower than the length, and the length of
        // self.contents.is_skipped_content() is checked when creating the mmap
        let cell = unsafe { self.contents.is_skipped_content().get_unchecked(cell_id) };

        Ok((cell & mask) != 0)
    }

    /// Returns the length of the given content.
    ///
    /// May be `None` for skipped contents
    ///
    /// # Panics
    ///
    /// If the node id does not exist.
    #[inline]
    pub fn content_length(&self, node_id: NodeId) -> Option<u64> {
        self.try_content_length(node_id)
            .unwrap_or_else(|e| panic!("Cannot get content length: {}", e))
    }

    /// Returns the length of the given content, or `Err` if the node id does not exist
    ///
    /// May be `Ok(None)` for skipped contents
    #[inline]
    pub fn try_content_length(&self, node_id: NodeId) -> Result<Option<u64>, OutOfBoundError> {
        match self.contents.content_length().get(node_id) {
            None => Err(OutOfBoundError {
                // id does not exist
                index: node_id,
                len: self.contents.content_length().len(),
            }),
            Some(u64::MAX) => Ok(None), // Skipped content with no length
            Some(length) => Ok(Some(length)),
        }
    }
}