paralegal_flow/
discover.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
//! MIR visitor ([`CollectingVisitor`]) that populates the [`MarkerDatabase`]
//! and discovers functions marked for analysis.
//!
//! Essentially this discovers all local `paralegal_flow::*` annotations.

use std::rc::Rc;

use crate::{
    ana::{InlineJudge, SPDGGenerator},
    desc::*,
    stats::Stats,
    sym_vec,
    utils::*,
};

use flowistry_pdg_construction::body_cache::BodyCache;

use rustc_hir::{
    def_id::LocalDefId,
    intravisit::{self, FnKind},
    BodyId,
};
use rustc_middle::{hir::nested_filter::OnlyBodies, ty::TyCtxt};
use rustc_span::{symbol::Ident, Span, Symbol};

use anyhow::Result;

use self::resolve::expect_resolve_string_to_def_id;

/// Values of this type can be matched against Rust attributes
pub type AttrMatchT = Vec<Symbol>;

/// This visitor traverses the items in the analyzed crate to discover
/// annotations and analysis targets and store them in this struct. After the
/// discovery phase [`Self::analyze`] is used to drive the
/// actual analysis. All of this is conveniently encapsulated in the
/// [`Self::run`] method.
pub struct CollectingVisitor<'tcx> {
    /// Reference to rust compiler queries.
    pub tcx: TyCtxt<'tcx>,
    /// Command line arguments.
    pub opts: &'static crate::Args,
    /// Functions that are annotated with `#[paralegal_flow::analyze]`. For these we will
    /// later perform the analysis
    pub functions_to_analyze: Vec<FnToAnalyze>,

    stats: Stats,

    inline_judge: InlineJudge<'tcx>,

    body_cache: Rc<BodyCache<'tcx>>,
    /// This will match the annotation `#[paralegal_flow::analyze]` when using
    /// [`MetaItemMatch::match_extract`](crate::utils::MetaItemMatch::match_extract)
    analyze_marker: AttrMatchT,
}

/// A function we will be targeting to analyze with
/// [`CollectingVisitor::handle_target`].
pub struct FnToAnalyze {
    pub name: Ident,
    pub def_id: LocalDefId,
}

impl FnToAnalyze {
    /// Give me a name that describes this function.
    pub fn name(&self) -> Symbol {
        self.name.name
    }
}

impl<'tcx> CollectingVisitor<'tcx> {
    pub(crate) fn new(tcx: TyCtxt<'tcx>, opts: &'static crate::Args, stats: Stats) -> Self {
        let functions_to_analyze = opts
            .anactrl()
            .selected_targets()
            .iter()
            .filter_map(|path| {
                let def_id = expect_resolve_string_to_def_id(tcx, path, opts.relaxed())?;
                if !def_id.is_local() {
                    tcx.dcx().span_err(tcx.def_span(def_id), format!("found an external function {def_id:?} as analysis target. Analysis targets are required to be local."));
                    return None;
                }
                Some(FnToAnalyze {
                    def_id: def_id.as_local()?,
                    name: tcx.opt_item_ident(def_id).unwrap(),
                })
            })
            .collect();
        let body_cache = Rc::new(BodyCache::new(tcx));
        let inline_judge = InlineJudge::new(tcx, body_cache.clone(), opts);
        Self {
            tcx,
            opts,
            functions_to_analyze,
            inline_judge,
            stats,
            analyze_marker: sym_vec!["paralegal_flow", "analyze"],
            body_cache,
        }
    }

    /// After running the discovery with `visit_all_item_likes_in_crate`, create
    /// the read-only [`SPDGGenerator`] upon which the analysis will run.
    fn into_generator(self) -> SPDGGenerator<'tcx> {
        SPDGGenerator::new(
            self.inline_judge,
            self.opts,
            self.tcx,
            self.body_cache,
            self.stats,
        )
    }

    /// Driver function. Performs the data collection via visit, then calls
    /// [`Self::analyze`] to construct the Forge friendly description of all
    /// endpoints.
    pub fn run(mut self) -> Result<(ProgramDescription, AnalyzerStats)> {
        let tcx = self.tcx;
        tcx.hir().visit_all_item_likes_in_crate(&mut self);
        let targets = std::mem::take(&mut self.functions_to_analyze);
        self.into_generator().analyze(targets)
    }

    /// Does the function named by this id have the `paralegal_flow::analyze` annotation
    fn should_analyze_function(&self, ident: LocalDefId) -> bool {
        self.tcx
            .hir()
            .attrs(self.tcx.local_def_id_to_hir_id(ident))
            .iter()
            .any(|a| a.matches_path(&self.analyze_marker))
    }
}

impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> {
    type NestedFilter = OnlyBodies;

    fn nested_visit_map(&mut self) -> Self::Map {
        self.tcx.hir()
    }

    /// Finds the functions that have been marked as targets.
    fn visit_fn(
        &mut self,
        kind: FnKind<'tcx>,
        declaration: &'tcx rustc_hir::FnDecl<'tcx>,
        body_id: BodyId,
        _s: Span,
        id: LocalDefId,
    ) {
        match &kind {
            FnKind::ItemFn(name, _, _) | FnKind::Method(name, _)
                if self.should_analyze_function(id) =>
            {
                self.functions_to_analyze.push(FnToAnalyze {
                    name: *name,
                    def_id: id,
                });
            }
            _ => (),
        }

        // dispatch to recursive walk. This is probably unnecessary but if in
        // the future we decide to do something with nested items we may need
        // it.
        intravisit::walk_fn(self, kind, declaration, body_id, id)
    }
}