1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// Copyright (C) 2024  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use swh_graph::collections::{AdaptiveNodeSet, NodeSet};
use swh_graph::graph::*;
use swh_graph::SWHType;

#[derive(Debug, PartialEq, Eq)]
pub struct EarliestRevision {
    pub node: usize,
    pub ts: i64,
    pub rev_occurrences: u64,
}

/// Given a content/directory id, returns the id of the oldest revision that contains it
pub fn find_earliest_revision<G>(graph: &G, src: usize) -> Option<EarliestRevision>
where
    G: SwhBackwardGraph + SwhGraphWithProperties,
    <G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
    <G as SwhGraphWithProperties>::Timestamps: swh_graph::properties::Timestamps,
{
    // Initialize the DFS
    let mut stack = Vec::new();
    let mut visited = AdaptiveNodeSet::new(graph.num_nodes());
    stack.push(src);
    visited.insert(src);

    let mut visited_revisions = 0u64;
    // pair of (earliest_rev, earliest_ts)
    let mut earliest_rev: Option<(usize, i64)> = None;

    while let Some(node) = stack.pop() {
        let node_type = graph.properties().node_type(node);
        if node_type == SWHType::Revision {
            visited_revisions += 1;
            let Some(committer_ts) = graph.properties().committer_timestamp(node) else {
                continue;
            };
            if committer_ts != i64::MIN && committer_ts != 0 {
                // exclude missing and zero (= epoch) as plausible earliest timestamps
                // as they are almost certainly bogus values

                // Update earliest_rev if the current node has a lower timestamp
                match earliest_rev {
                    None => earliest_rev = Some((node, committer_ts)),
                    Some((_, earliest_ts)) if committer_ts < earliest_ts => {
                        earliest_rev = Some((node, committer_ts))
                    }
                    _ => (),
                }
            }
        } else {
            // push ancestors to the stack
            for pred in graph.predecessors(node) {
                use swh_graph::SWHType::*;

                if visited.contains(pred) {
                    continue;
                }

                let pred_type = graph.properties().node_type(pred);

                // Only arcs with type cnt:dir,dir:dir,dir:rev
                match (node_type, pred_type) {
                    (Content, Directory) | (Directory, Directory) | (Directory, Revision) => {
                        stack.push(pred);
                        visited.insert(pred);
                    }
                    _ => {}
                }
            }
        }
    }

    earliest_rev.map(|(earliest_rev_id, earliest_ts)| EarliestRevision {
        node: earliest_rev_id,
        ts: earliest_ts,
        rev_occurrences: visited_revisions,
    })
}