1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
//! The SmartNoise rust validator contains methods for evaluating and constructing
//! differentially private analyses.
//!
//! The validator defines a set of statically checkable properties that are
//! necessary for a differentially private analysis, and then checks that the submitted analysis
//! satisfies the properties.
//!
//! The validator also takes simple components from the SmartNoise runtime and combines them
//! into more complex mechanisms.
//!
//! - [Top-level documentation](https://opendp.github.io/smartnoise-core/)

#![warn(unused_extern_crates)]
#![forbid(unsafe_code)]
#![allow(clippy::implicit_hasher)]

// `error_chain!` can recurse deeply
#![recursion_limit = "1024"]
#[macro_use]
extern crate error_chain;
#[macro_use] extern crate indexmap;

use std::collections::{HashMap, HashSet};
use std::iter::FromIterator;

use indexmap::map::IndexMap;

#[doc(hidden)]
pub use errors::*;

use crate::base::{IndexKey, Release, Value, ValueProperties};
// import all trait implementations
use crate::components::*;
use crate::utilities::get_public_arguments;
use crate::utilities::privacy::compute_graph_privacy_usage;

#[doc(hidden)]
pub mod errors {
    // Create the Error, ErrorKind, ResultExt, and Result types
    error_chain! {}
}

#[derive(Debug)]
pub struct Warnable<T: std::fmt::Debug>(T, Vec<Error>);
impl<T: std::fmt::Debug> Warnable<T> {
    pub fn new(value: T) -> Self {
        Warnable(value, Vec::new())
    }
}
impl<T: std::fmt::Debug> From<T> for Warnable<T> {
    fn from(value: T) -> Self {
        Warnable::new(value)
    }
}

// trait which holds `display_chain`

pub mod base;
pub mod bindings;
pub mod utilities;
pub mod components;
pub mod docs;

// include protobuf-generated traits
pub mod proto {
    include!(concat!(env!("OUT_DIR"), "/smartnoise.rs"));
}

// define the useful macro for building hashmaps globally
#[macro_export]
#[doc(hidden)]
macro_rules! hashmap {
    ($( $key: expr => $val: expr ),*) => {{
         #[allow(unused_mut)]
         let mut map = ::std::collections::HashMap::new();
         $( map.insert($key, $val); )*
         map
    }}
}

pub type Float = f64;
pub type Integer = i64;

/// Validate if an analysis is well-formed.
///
/// Checks that the graph is a DAG.
/// Checks that static properties are met on all components.
///
/// Useful for static validation of an analysis.
/// Since some components require public arguments, mechanisms that depend on other mechanisms cannot be verified until the components they depend on have been validated.
///
/// The system may also be run dynamically- prior to expanding each node, calling the expand_component endpoint will also validate the component being expanded.
/// NOTE: Evaluating the graph dynamically opens up additional potential timing attacks.
pub fn validate_analysis(
    privacy_definition: Option<proto::PrivacyDefinition>,
    mut computation_graph: HashMap<u32, proto::Component>,
    mut release: base::Release
) -> Result<()> {
    utilities::propagate_properties(
        &privacy_definition,
        &mut computation_graph,
        &mut release,
        None,
        false)?;
    Ok(())
}


/// Compute overall privacy usage of an analysis.
///
/// The privacy usage is sum of the privacy usages for each node.
/// The Release's actual privacy usage, if defined, takes priority over the maximum allowable privacy usage defined in the Analysis.
pub fn compute_privacy_usage(
    privacy_definition: proto::PrivacyDefinition,
    mut computation_graph: HashMap<u32, proto::Component>,
    mut release: base::Release
) -> Result<proto::PrivacyUsage> {

    let properties = utilities::propagate_properties(
        &Some(privacy_definition.clone()),
        &mut computation_graph,
        &mut release, None, false)?.0;

    let privacy_usage = compute_graph_privacy_usage(
        &computation_graph, &privacy_definition, &properties, &release)?;

    utilities::privacy::privacy_usage_check(&privacy_usage, None, false)?;

    Ok(privacy_usage)
}


/// Generate a json string with a summary/report of the Analysis and Release
pub fn generate_report(
    privacy_definition: proto::PrivacyDefinition,
    computation_graph: HashMap<u32, proto::Component>,
    mut release: base::Release
) -> Result<String> {

    let graph_properties = utilities::propagate_properties(
        &Some(privacy_definition),
        &mut computation_graph.clone(),
        &mut release, None, false)?.0;

    // variable names
    let mut nodes_varnames: HashMap<u32, Vec<IndexKey>> = HashMap::new();

    utilities::get_traversal(&computation_graph)?.iter().for_each(|node_id| {
        let component: proto::Component = computation_graph.get(&node_id).unwrap().to_owned();
        let public_arguments = match utilities::get_public_arguments(&component, &release) {
            Ok(v) => v, Err(_) => return
        };

        // variable names for argument nodes
        let mut arguments_vars: IndexMap<base::IndexKey, Vec<IndexKey>> = IndexMap::new();

        // iterate through argument nodes
        for (field_id, field) in &component.arguments() {
            // get variable names corresponding to that argument
            if let Some(arg_vars) = nodes_varnames.get(field) {
                arguments_vars.insert(field_id.clone(), arg_vars.clone());
            }
        }

        // get variable names for this node
        let node_vars = component.get_names(
            public_arguments,
            arguments_vars,
            release.get(node_id).map(|v| v.value.clone()).as_ref());

        // update names in indexmap
        node_vars.map(|v| nodes_varnames.insert(*node_id, v)).ok();

    });

    // generate summaries for any component that has a release, and has summarize implemented on it
    let release_schemas = computation_graph.iter()
        .map(|(node_id, component)| {
            let public_arguments = utilities::get_public_arguments(&component, &release)?;
            let input_properties = utilities::get_input_properties(&component, &graph_properties)?;
            let variable_names = nodes_varnames.get(&node_id);
            // ignore nodes without released values
            let node_release = match release.get(node_id) {
                Some(node_release) => node_release.value.clone(),
                None => return Ok(None)
            };
            component.summarize(
                *node_id,
                &component,
                public_arguments,
                input_properties,
                &node_release,
                variable_names,
            )
        })
        .collect::<Result<Vec<Option<Vec<utilities::json::JSONRelease>>>>>()?.into_iter()
        .filter_map(|v| v).flat_map(|v| v)
        .collect::<Vec<utilities::json::JSONRelease>>();

    match serde_json::to_string(&release_schemas) {
        Ok(serialized) => Ok(serialized),
        Err(_) => Err("unable to parse report into json".into())
    }
}


/// Estimate the privacy usage necessary to bound accuracy to a given value.
///
/// No context about the analysis is necessary, just the privacy definition and properties of the arguments of the component.
pub fn accuracy_to_privacy_usage(
    component: proto::Component,
    privacy_definition: proto::PrivacyDefinition,
    mut properties: IndexMap<IndexKey, base::ValueProperties>,
    accuracies: proto::Accuracies,
    mut public_arguments: IndexMap<IndexKey, base::ReleaseNode>
) -> Result<proto::PrivacyUsages> {

    let proto_properties = component.arguments().iter()
        .filter_map(|(name, idx)| Some((*idx, properties.remove(name)?)))
        .collect::<HashMap<u32, base::ValueProperties>>();

    let mut release = component.arguments().iter()
        .filter_map(|(name, idx)| Some((*idx, public_arguments.remove(name)?)))
        .collect::<Release>();

    let mut computation_graph = hashmap![
        component.arguments().values().max().cloned().unwrap_or(0) + 1 => component
    ];

    utilities::propagate_properties(
        &Some(privacy_definition.clone()),
        &mut computation_graph,
        &mut release,
        Some(proto_properties),
        true,
    )?;

    let privacy_usages = computation_graph.iter().map(|(idx, component)| {
        Ok(component.accuracy_to_privacy_usage(
            &accuracies,
            get_public_arguments(&component, &release)?)?
            .and_then(|usages| Some((*idx, usages))))
    })
        .collect::<Result<Vec<Option<(u32, Vec<proto::PrivacyUsage>)>>>>()?
        .into_iter().filter_map(|v| v)
        .collect::<HashMap<u32, Vec<proto::PrivacyUsage>>>();

    Ok(proto::PrivacyUsages {
        values: privacy_usages.into_iter().map(|(_, v)| v).collect::<Vec<Vec<proto::PrivacyUsage>>>()
            .first()
            .ok_or_else(|| Error::from("privacy usage is not defined"))?.clone()
    })
}


/// Estimate the accuracy of the release of a component, based on a privacy usage.
///
/// No context about the analysis is necessary, just the properties of the arguments of the component.
pub fn privacy_usage_to_accuracy(
    component: proto::Component,
    privacy_definition: proto::PrivacyDefinition,
    properties: IndexMap<IndexKey, base::ValueProperties>,
    mut public_arguments: IndexMap<IndexKey, base::ReleaseNode>,
    alpha: f64
) -> Result<proto::Accuracies> {

    let proto_properties = component.arguments().iter()
        .filter_map(|(name, idx)| Some((*idx, properties.get(name)?.clone())))
        .collect::<HashMap<u32, base::ValueProperties>>();

    let mut release: Release = component.arguments().iter()
        .filter_map(|(name, idx)| Some((*idx, public_arguments.remove(name)?)))
        .collect();

    let mut computation_graph = hashmap![
        component.arguments().values().max().cloned().unwrap_or(0) + 1 => component
    ];

    utilities::propagate_properties(
        &Some(privacy_definition.clone()),
        &mut computation_graph,
        &mut release,
        Some(proto_properties),
        false,
    )?;

    let accuracies = computation_graph.iter().map(|(idx, component)| {
        Ok(component.privacy_usage_to_accuracy(
            get_public_arguments(&component, &release)?,
            alpha,
        )?
            .and_then(|accuracies| Some((*idx, accuracies))))
    })
        .collect::<Result<Vec<Option<(u32, Vec<proto::Accuracy>)>>>>()?
        .into_iter().filter_map(|v| v)
        .collect::<HashMap<u32, Vec<proto::Accuracy>>>();

    Ok(proto::Accuracies {
        values: accuracies.into_iter().map(|(_, v)| v).collect::<Vec<Vec<proto::Accuracy>>>()
            // TODO: propagate/combine accuracies, don't just take the first
            .first()
            .ok_or_else(|| Error::from("accuracy is not defined"))?.clone()
    })
}

/// Expand a component that may be representable as smaller components, and propagate its properties.
///
/// This is function may be called interactively from the runtime as the runtime executes the computational graph, to allow for dynamic graph validation.
/// This is opposed to statically validating a graph, where the nodes in the graph that are dependent on the releases of mechanisms cannot be known and validated until the first release is made.
pub fn expand_component(
    component: proto::Component,
    mut properties: IndexMap<IndexKey, ValueProperties>,
    public_arguments: IndexMap<IndexKey, base::ReleaseNode>,
    privacy_definition: Option<proto::PrivacyDefinition>,
    component_id: u32,
    maximum_id: u32,
) -> Result<base::ComponentExpansion> {
    let argument_ids = component.arguments();

    for (k, v) in &public_arguments {
        if !v.public {
            return Err("private data should not be sent to the validator".into())
        }
        let node_id = argument_ids.get(k)
            .ok_or_else(|| Error::from(format!("unrecognized argument: {:?}", k)))?;

        properties.insert(
            k.clone(),
            utilities::inference::infer_property(&v.value, properties.get(k), *node_id)?);
    }

    let public_values = public_arguments.iter()
        .map(|(name, release_node)| (name.clone(), &release_node.value))
        .collect::<IndexMap<IndexKey, &Value>>();

    let mut result = component.expand_component(
        &privacy_definition,
        &component,
        &public_values,
        &properties,
        component_id,
        maximum_id,
    ).chain_err(|| format!("at node_id {:?}", component_id))?;

    // annoying block to ensure that
    //    component, properties and public values are updated after the expansion
    let component = if let Some(v) = result.computation_graph
        .get(&component_id) { v.clone() } else { component };
    let argument_indices = component.arguments();
    let properties = argument_indices.iter()
        .filter_map(|(k, id)| Some((k.clone(), result.properties.get(&id).or_else(|| properties.get(k))?.clone())))
        .collect::<IndexMap<_, _>>();
    let public_values = argument_indices.iter()
        .filter_map(|(k, id)| Some((k.clone(), result.releases.get(&id)
            .or_else(|| public_arguments.get(k))
            .map(|v| &v.value)?)))
        .collect::<IndexMap<IndexKey, &Value>>();

    if result.traversal.is_empty() {
        let Warnable(propagated_property, propagation_warnings) = component
            .propagate_property(&privacy_definition, public_values, properties, component_id)
            .chain_err(|| format!("at node_id {:?}", component_id))?;

        result.warnings.extend(propagation_warnings.into_iter()
            .map(|err| err.chain_err(|| format!("at node_id {:?}", component_id))));
        result.properties.insert(component_id.to_owned(), propagated_property);
    }

    Ok(result)
}


/// Retrieve the static properties from every reachable node on the graph.
pub fn get_properties(
    privacy_definition: Option<proto::PrivacyDefinition>,
    mut computation_graph: HashMap<u32, proto::Component>,
    mut release: base::Release,
    node_ids: Vec<u32>
) -> Result<(HashMap<u32, ValueProperties>, Vec<Error>)> {

    if !node_ids.is_empty() {
        let mut ancestors = HashSet::<u32>::new();
        let mut traversal = Vec::from_iter(node_ids.into_iter());
        while !traversal.is_empty() {
            let node_id = traversal.pop().unwrap();
            if let Some(component) = computation_graph.get(&node_id){
                component.arguments().values().for_each(|v| traversal.push(*v))
            }
            ancestors.insert(node_id);
        }
        computation_graph = computation_graph.iter()
            .filter(|(idx, _)| ancestors.contains(idx))
            .map(|(idx, component)| (*idx, component.clone()))
            .collect::<HashMap<u32, proto::Component>>();
        release = release.iter()
            .filter(|(idx, _)| ancestors.contains(idx))
            .map(|(idx, release_node)|
                (*idx, release_node.clone()))
            .collect();
    }

    // don't return all properties- only those in the original graph
    let keep_ids = HashSet::<u32>::from_iter(computation_graph.keys().cloned());

    let (mut properties, warnings) = utilities::propagate_properties(
        &privacy_definition, &mut computation_graph,
        &mut release, None, true,
    )?;

    properties.retain(|node_id, _| keep_ids.contains(node_id));
    Ok((properties, warnings))
}