1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Copyright (C) 2024  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use anyhow::{bail, Result};
use sux::prelude::BitVec;

use swh_graph::collections::PathStack;
use swh_graph::collections::{AdaptiveNodeSet, NodeSet};
use swh_graph::graph::*;
use swh_graph::labels::FilenameId;
use swh_graph::SWHType;

/// Yielded by `dfs_with_path` to allow building a path as a `Vec<u8>` only when needed
pub struct PathParts<'a> {
    parts: &'a [FilenameId],
    path_to_directory: bool,
}

impl<'a> PathParts<'a> {
    pub fn build_path<G>(&self, graph: &G) -> Vec<u8>
    where
        G: SwhGraphWithProperties,
        <G as SwhGraphWithProperties>::LabelNames: swh_graph::properties::LabelNames,
    {
        let mut path = Vec::with_capacity(self.parts.len() * 2 + 1);
        for &part in self.parts.iter() {
            path.extend(graph.properties().label_name(part));
            path.push(b'/');
        }
        if !self.path_to_directory {
            path.pop();
        }
        path
    }
}

/// Traverses backward from a directory or content, calling the callbacks on any directory
/// or revision node found.
///
/// `reachable_nodes` is the set of all nodes reachable from a head revision/release
/// (nodes not in the set are ignored).
///
/// If `on_directory` returns `false`, the directory's predecessors are ignored.
///
/// FIXME: `on_directory` is always called on the `root`, even if the `root` is a content
pub fn backward_dfs_with_path<G>(
    graph: &G,
    reachable_nodes: &BitVec,
    mut on_directory: impl FnMut(NodeId, PathParts) -> Result<bool>,
    mut on_revrel: impl FnMut(NodeId, PathParts) -> Result<()>,
    root: NodeId,
) -> Result<()>
where
    G: SwhLabelledBackwardGraph + SwhGraphWithProperties,
    <G as SwhGraphWithProperties>::LabelNames: swh_graph::properties::LabelNames,
    <G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
{
    if !reachable_nodes.get(root) {
        return Ok(());
    }

    let mut visited = AdaptiveNodeSet::new(graph.num_nodes());
    let mut stack = Vec::new();
    let mut path_stack = PathStack::new();

    let root_is_directory = match graph.properties().node_type(root) {
        SWHType::Content => false,
        SWHType::Directory => true,
        _ => panic!(
            "backward_dfs_with_path called started from {}",
            graph.properties().swhid(root)
        ),
    };

    stack.push(root);
    path_stack.push([]);
    visited.insert(root);

    while let Some(node) = stack.pop() {
        let path_parts: Vec<_> = path_stack.pop().unwrap().collect();

        let should_recurse = on_directory(
            node,
            PathParts {
                parts: &path_parts,
                path_to_directory: root_is_directory,
            },
        )?;

        if should_recurse {
            // Look for frontiers in subdirectories
            for (pred, labels) in graph.labelled_predecessors(node) {
                if !reachable_nodes.get(pred) || visited.contains(pred) {
                    continue;
                }

                visited.insert(pred);

                match graph.properties().node_type(pred) {
                    SWHType::Directory => {
                        // If the same subdir/file is present in a directory twice under the same name,
                        // pick any name to represent both.
                        let Some(first_label) = labels.into_iter().next() else {
                            bail!(
                                "{} <- {} has no labels",
                                graph.properties().swhid(node),
                                graph.properties().swhid(pred),
                            )
                        };

                        // This is a dir->* arc, so its label is necessarily a DirEntry
                        let first_label: swh_graph::labels::DirEntry = first_label.into();

                        stack.push(pred);
                        path_stack.push(path_parts.iter().copied());
                        path_stack.push_filename(first_label.filename_id());
                    }

                    SWHType::Revision | SWHType::Release => {
                        on_revrel(
                            pred,
                            PathParts {
                                parts: &path_parts,
                                path_to_directory: root_is_directory,
                            },
                        )?;

                        if graph.properties().node_type(pred) == SWHType::Revision {
                            for predpred in graph.predecessors(pred) {
                                if graph.properties().node_type(predpred) == SWHType::Release {
                                    on_revrel(
                                        predpred,
                                        PathParts {
                                            parts: &path_parts,
                                            path_to_directory: root_is_directory,
                                        },
                                    )?;
                                }
                            }
                        }
                    }
                    _ => (),
                }
            }
        }
    }

    Ok(())
}