1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// Copyright (C) 2023  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use std::fs::File;
use std::marker::PhantomData;
use std::path::Path;

use anyhow::{bail, Context, Result};
use byteorder::ByteOrder;
use mmap_rs::{Mmap, MmapFlags};

/// Newtype for [`Mmap`] used to store arrays of any integers
///
/// instead of slices of u8
pub struct NumberMmap<E: ByteOrder, N: common_traits::AsBytes, B> {
    data: B,
    len: usize,
    offset: usize,
    _number: PhantomData<N>,
    _endianness: PhantomData<E>,
}

impl<E: ByteOrder, N: common_traits::AsBytes> NumberMmap<E, N, Mmap> {
    pub fn new<P: AsRef<Path>>(path: P, len: usize) -> Result<NumberMmap<E, N, Mmap>> {
        let path = path.as_ref();
        let file_len = path
            .metadata()
            .with_context(|| format!("Could not stat {}", path.display()))?
            .len();
        if file_len < (len * N::BYTES) as u64 {
            // We have to allow length > num_nodes because graphs compressed
            // with the Java implementation used zero padding at the end
            bail!(
                "{} is too short: expected at least {} bytes ({} items), got {}",
                path.display(),
                len * N::BYTES,
                len,
                file_len,
            );
        }
        let file =
            File::open(path).with_context(|| format!("Could not open {}", path.display()))?;
        Self::new_with_file_and_offset(path, len, file, 0)
    }

    pub fn new_with_file_and_offset<P: AsRef<Path>>(
        path: P,
        len: usize,
        file: File,
        offset: usize,
    ) -> Result<NumberMmap<E, N, Mmap>> {
        let path = path.as_ref();
        let file_len = len * N::BYTES;
        let data = unsafe {
            mmap_rs::MmapOptions::new(file_len as _)
                .with_context(|| format!("Could not initialize mmap of size {}", file_len))?
                .with_flags(MmapFlags::TRANSPARENT_HUGE_PAGES)
                .with_file(file, 0)
                .map()
                .with_context(|| format!("Could not mmap {}", path.display()))?
        };
        #[cfg(target_os = "linux")]
        unsafe {
            libc::madvise(data.as_ptr() as *mut _, data.len(), libc::MADV_RANDOM)
        };

        if data.len() % N::BYTES != 0 {
            bail!(
                "Cannot interpret mmap of size {} as array of {}",
                data.len(),
                std::any::type_name::<N>()
            );
        }
        Ok(NumberMmap {
            data,
            len,
            offset,
            _number: PhantomData,
            _endianness: PhantomData,
        })
    }

    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.len
    }
}

impl<E: ByteOrder, N: common_traits::AsBytes> NumberMmap<E, N, Mmap> {
    fn get_slice(&self, index: usize) -> Option<&[u8]> {
        let start = (index * N::BYTES) + self.offset;
        self.data.get(start..(start + N::BYTES))
    }

    unsafe fn get_slice_unchecked(&self, index: usize) -> &[u8] {
        let start = (index * N::BYTES) + self.offset;
        self.data.get_unchecked(start..(start + N::BYTES))
    }
}

macro_rules! impl_number_mmap {
    ($ty:ty, $fn:ident) => {
        impl<E: ByteOrder> crate::utils::GetIndex for &NumberMmap<E, $ty, Mmap> {
            type Output = $ty;

            fn len(&self) -> usize {
                NumberMmap::len(self)
            }

            /// Returns an item
            fn get(&self, index: usize) -> Option<$ty> {
                self.get_slice(index).map(E::$fn)
            }

            /// Returns an item
            ///
            /// # Safety
            ///
            /// Undefined behavior if `index >= len()`
            unsafe fn get_unchecked(&self, index: usize) -> $ty {
                E::$fn(self.get_slice_unchecked(index))
            }
        }
    };
}

impl_number_mmap!(i16, read_i16);
impl_number_mmap!(u32, read_u32);
impl_number_mmap!(i64, read_i64);
impl_number_mmap!(u64, read_u64);