flowistry_pdg/
pdg.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
//! The representation of the PDG.

use std::{collections::HashMap, fmt};

use allocative::{ident_key, Allocative};
use internment::Intern;
use serde::{Deserialize, Serialize};

use crate::rustc_portable::*;
#[cfg(feature = "rustc")]
use crate::rustc_proxies;

/// Extends a MIR body's `Location` with `Start` (before the first instruction) and `End` (after all returns).
#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize)]
pub enum RichLocation {
    /// The point *after* a location in a body.
    #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::Location"))]
    Location(Location),

    /// The start of the body.
    ///
    /// Note that [`Location::START`] is different from [`RichLocation::Start`]!
    /// The latter is *before* the former in time.
    Start,

    /// The end of the body, after all possible return statements.
    End,
}

impl Allocative for RichLocation {
    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
        visitor.visit_simple_sized::<Self>();
    }
}

impl RichLocation {
    /// Returns true if this is a `Start` location.
    pub fn is_start(self) -> bool {
        matches!(self, RichLocation::Start)
    }

    /// Returns true if this is an `End` location.
    pub fn is_end(self) -> bool {
        matches!(self, RichLocation::End)
    }

    pub fn is_real(self) -> bool {
        matches!(self, RichLocation::Location(_))
    }

    /// Returns the [`Location`] in `self`, panicking otherwise.
    pub fn unwrap_location(self) -> Location {
        self.as_location()
            .expect("RichLocation was unexpectedly Start")
    }

    /// Returns the [`Location`] in `self`, returning `None` otherwise.
    pub fn as_location(self) -> Option<Location> {
        match self {
            RichLocation::Location(location) => Some(location),
            RichLocation::Start | RichLocation::End => None,
        }
    }
}

impl fmt::Display for RichLocation {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            RichLocation::Location(loc) => write!(f, "{loc:?}"),
            RichLocation::Start => write!(f, "start"),
            RichLocation::End => write!(f, "end"),
        }
    }
}

impl From<Location> for RichLocation {
    fn from(value: Location) -> Self {
        RichLocation::Location(value)
    }
}

/// A [`RichLocation`] within a specific point in a codebase.
#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize, Allocative)]
pub struct GlobalLocation {
    /// The function containing the location.
    #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))]
    #[allocative(visit = allocative_visit_simple_sized)]
    pub function: DefId,

    /// The location of an instruction in the function, or the function's start.
    pub location: RichLocation,
}

#[cfg(not(feature = "rustc"))]

impl fmt::Display for GlobalLocation {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{:?}::{}", self.function, self.location)
    }
}

/// A location within the global call-graph.
///
/// The first location is the root of the call-graph.
/// The last location is the currently-called function.
///
/// Invariant: a call string should never be empty, i.e.,
/// there should always be at least one [`GlobalLocation`] in a call-string.
///
/// Note: This type is copyable due to interning.
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
pub struct CallString(Intern<CallStringInner>);

impl Allocative for CallString {
    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
        let mut visitor = visitor.enter_self_sized::<Self>();
        allocative_visit_intern_t(self.0, &mut visitor);
        visitor.exit();
    }
}

thread_local! {
    static TRACK_INTERN_AS_UNIQUE: bool = {
        if let Ok(val) = std::env::var("ALLOCATIVE_TRACK_INTERN_AS_UNIQUE") {
            val == "true" || val == "1"
        } else {
            false
        }
    }
}

pub fn allocative_visit_intern_t<T: Allocative + ?Sized>(
    intern: Intern<T>,
    visitor: &mut allocative::Visitor<'_>,
) {
    let track_as_unique = TRACK_INTERN_AS_UNIQUE.with(|v| *v);
    let ptr_size = std::mem::size_of::<*const T>();
    if !track_as_unique {
        let mut visitor = visitor.enter_self_sized::<Intern<T>>();
        {
            let ptr: &T = intern.as_ref();
            let as_ptr = ptr as *const T as *const ();

            let inner_visitor = visitor.enter_shared(ident_key!(intern_value), ptr_size, as_ptr);
            if let Some(mut visitor) = inner_visitor {
                ptr.visit(&mut visitor);
                visitor.exit();
            }
        }
        visitor.exit();
    } else {
        let mut visitor = visitor.enter_self_sized::<Intern<T>>();
        let inner: &T = intern.as_ref();
        {
            let mut visitor = visitor.enter_unique(ident_key!(pointee), ptr_size);
            inner.visit(&mut visitor);
            visitor.exit();
        }
        visitor.exit();
    }
}

impl Serialize for CallString {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        self.0.as_ref().serialize(serializer)
        // let mut seq = serializer.serialize_seq(Some(self.0.len()))?;
        // for loc in self.0.iter() {
        //     seq.serialize_element(loc)?;
        // }
        // seq.end()
    }
}

impl<'de> Deserialize<'de> for CallString {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        let locs = <Box<[GlobalLocation]>>::deserialize(deserializer)?;
        Ok(CallString::new(locs.as_ref()))
    }
}

type CallStringInner = [GlobalLocation];

impl CallString {
    /// Create a new call string from a list of global locations.
    pub fn new(locs: &CallStringInner) -> Self {
        CallString(Intern::from(locs))
    }

    /// Split the leaf (the current instruction) from the caller for the
    /// function (if any) and return both. Same as `(self.leaf(), self.caller())`.
    pub fn pop(self) -> (GlobalLocation, Option<CallString>) {
        let (last, rest) = self
            .0
            .split_last()
            .expect("Invariant broken, call strings must have at least length 1");

        (*last, (!rest.is_empty()).then(|| CallString::new(rest)))
    }

    /// Create an initial call string for the single location `loc`.
    pub fn single(loc: GlobalLocation) -> Self {
        Self::new(&[loc])
    }

    /// Returns the leaf of the call string (the currently-called function).
    pub fn leaf(self) -> GlobalLocation {
        *self.0.last().unwrap()
    }

    /// Returns the call string minus the leaf. Returns `None` if this location
    /// is at the root.
    pub fn caller(self) -> Option<Self> {
        self.pop().1
    }

    /// Returns an iterator over the locations in the call string, starting at
    /// the leaf and going to the root.
    pub fn iter(&self) -> impl DoubleEndedIterator<Item = GlobalLocation> + '_ {
        self.0.iter().rev().copied()
    }

    /// Adds a new call site to the end of the call string.
    pub fn push(self, loc: GlobalLocation) -> Self {
        let string = self.0.iter().copied().chain(Some(loc)).collect::<Box<_>>();
        CallString::new(&string)
    }

    pub fn push_front(self, loc: GlobalLocation) -> Self {
        CallString::new(
            [loc]
                .into_iter()
                .chain(self.0.iter().copied())
                .collect::<Box<_>>()
                .as_ref(),
        )
    }

    pub fn is_at_root(self) -> bool {
        self.0.len() == 1
    }

    pub fn root(self) -> GlobalLocation {
        *self.0.first().unwrap()
    }

    pub fn stable_id(self) -> usize {
        let r: &'static CallStringInner = self.0.as_ref();
        r.as_ptr() as usize
    }

    /// Returns an iterator over the locations in the call string, starting at
    /// the root and going to the leaf.
    pub fn iter_from_root(&self) -> impl DoubleEndedIterator<Item = GlobalLocation> + '_ {
        self.0.iter().copied()
    }

    pub fn len(self) -> usize {
        self.0.len()
    }

    pub fn is_empty(self) -> bool {
        self.0.is_empty()
    }
}

impl fmt::Display for CallString {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for (i, loc) in self.0.iter().enumerate() {
            if i > 0 {
                write!(f, "←")?;
            }
            loc.fmt(f)?;
        }
        Ok(())
    }
}

/// Additional information about the source of data.
///
/// If the operation is a function call this contains the argument index
#[derive(
    PartialEq,
    Eq,
    PartialOrd,
    Ord,
    Hash,
    Clone,
    Copy,
    Debug,
    Serialize,
    Deserialize,
    strum::EnumIs,
    Allocative,
)]
pub enum SourceUse {
    Operand,
    Argument(u8),
}

/// Additional information about this mutation.
#[derive(
    PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize, strum::EnumIs, Allocative,
)]
pub enum TargetUse {
    /// A function returned, assigning to it's return destination
    Return,
    /// This mutation is a non-function assign
    Assign,
    /// A mutable argument was modified by a function call
    MutArg(u8),
}

/// A function that is fit to be handed to `#[allocative(visit = "...")]` for a
/// map where the key does not implement [`Allocative`], but also has no
/// attached heap data.
///
/// Uses [`SimpleSizedAllocativeWrapper`] under the hood, see its documentation
/// for more information.
pub fn allocative_visit_map_coerce_key<'a, K, V: Allocative>(
    map: &'a HashMap<K, V>,
    visitor: &mut allocative::Visitor<'a>,
) {
    let coerced: &HashMap<SimpleSizedAllocativeWrapper<K>, V> = unsafe { std::mem::transmute(map) };
    coerced.visit(visitor);
}

/// A function fit to be handed to `#[allocative(visit = "...")]`, if the type
/// does not have any attached heap data.
pub fn allocative_visit_simple_sized<T>(_: &T, visitor: &mut allocative::Visitor<'_>) {
    visitor.visit_simple_sized::<T>();
}

/// A wrapper that guarantees to be the same size as `T` and implements the
/// [`Allocative`] trait.
///
/// This uses [`allocative::Visitor::visit_simple_sized`] on `T` for sizing,
/// meaning that `T` will only be recorded with the size of the object itself,
/// *not* with its size in the heap.
///
/// The idea is that you can transmute any object of type `T` or any object that
/// *contains* `T` into one that is (or contains) `SimpleSizedAllocativeWrapper<T>`.
///
/// For example if you want the size of `HashMap<Unsupported, usize>` you can
/// `std::mem::transmute` it to
/// `HashMap<SimpleSizedAllocativeWrapper<Unsupported>, usize>` and then simply
/// ask for the size. However if there is attached heap data, liek in
/// `HashMap<UnsuportedString, usize>`, then the size reported will be incorrect.
#[repr(transparent)]
pub struct SimpleSizedAllocativeWrapper<T>(T);

impl<T> Allocative for SimpleSizedAllocativeWrapper<T> {
    fn visit<'a, 'b: 'a>(&self, visitor: &'a mut allocative::Visitor<'b>) {
        visitor.visit_simple_sized::<T>();
    }
}