1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
use smartnoise_validator::errors::*;

use ndarray::prelude::*;
use crate::NodeArguments;
use smartnoise_validator::base::{Value, Array, ReleaseNode, IndexKey};
use crate::components::Evaluable;

use smartnoise_validator::proto;
use smartnoise_validator::utilities::{array::get_ith_column, take_argument};
use crate::utilities::standardize_columns;
use indexmap::map::IndexMap;

impl Evaluable for proto::ToDataframe {
    fn evaluate(&self, _privacy_definition: &Option<proto::PrivacyDefinition>, mut arguments: NodeArguments) -> Result<ReleaseNode> {
        // force the input to be an array- reject indexmap and jagged
        let data = take_argument(&mut arguments, "data")?.array()?;

        let column_names  = take_argument(&mut arguments, "names")?
            .array()?.string()?;

        // num columns is sufficient shared information to build the dataframe
        let num_columns = match column_names.clone().into_dimensionality::<Ix1>() {
            Ok(column_names) => column_names,
            Err(_) => return Err("column names must be one-dimensional".into())
        }.to_vec().len();

        // split each column name into its own column
        Ok(ReleaseNode::new(Value::Dataframe(match data {
            Array::Float(array) => {
                let standardized = standardize_columns(array, num_columns)?;
                column_names.into_iter().enumerate()
                    .map(|(idx, name)| Ok((name.to_string().into(), get_ith_column(&standardized, idx)?.into())))
                    .collect::<Result<IndexMap<IndexKey, Value>>>()?
            }
            Array::Int(array) => {
                let standardized = standardize_columns(array, num_columns)?;
                column_names.into_iter().enumerate()
                    .map(|(idx, name)| Ok((name.to_string().into(), get_ith_column(&standardized, idx)?.into())))
                    .collect::<Result<IndexMap<IndexKey, Value>>>()?
            }
            Array::Bool(array) => {
                let standardized = standardize_columns(array, num_columns)?;
                column_names.into_iter().enumerate()
                    .map(|(idx, name)| Ok((name.to_string().into(), get_ith_column(&standardized, idx)?.into())))
                    .collect::<Result<IndexMap<IndexKey, Value>>>()?
            }
            Array::Str(array) => {
                let standardized = standardize_columns(array, num_columns)?;
                column_names.into_iter().enumerate()
                    .map(|(idx, name)| Ok((name.to_string().into(), get_ith_column(&standardized, idx)?.into())))
                    .collect::<Result<IndexMap<IndexKey, Value>>>()?
            }
        })))
    }
}