1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
use crate::errors::*;

use crate::base::{Array, Value, ValueProperties, IndexKey};

use crate::{proto, base, Warnable};
use crate::components::{Component, Named};

use ndarray::ArrayD;
use ndarray::prelude::*;
use crate::utilities::{get_argument};
use indexmap::map::IndexMap;
use crate::utilities::properties::{select_properties, stack_properties};

impl Component for proto::Index {
    fn propagate_property(
        &self,
        _privacy_definition: &Option<proto::PrivacyDefinition>,
        mut public_arguments: IndexMap<base::IndexKey, &Value>,
        properties: base::NodeProperties,
        node_id: u32
    ) -> Result<Warnable<ValueProperties>> {
        let data_property = properties.get::<IndexKey>(&"data".into())
            .ok_or("data: missing")?.clone();

        let mut dimensionality = None;

        let properties = match data_property {
            ValueProperties::Dataframe(data_property) => if let Some(column_names) = public_arguments.get::<IndexKey>(&"names".into()) {
                let column_names = column_names.ref_array()?;
                dimensionality = Some(column_names.shape().len() as i64 + 1);
                match column_names.to_owned() {
                    Array::Float(_) => return Err("floats are not valid indexes".into()),
                    Array::Int(names) => to_name_vec(names)?.into_iter()
                        .map(|v| data_property.children.get::<IndexKey>(&v.into()).cloned())
                        .collect::<Option<Vec<ValueProperties>>>(),
                    Array::Str(names) => to_name_vec(names)?.into_iter()
                        .map(|v| data_property.children.get::<IndexKey>(&v.into()).cloned())
                        .collect::<Option<Vec<ValueProperties>>>(),
                    Array::Bool(names) => to_name_vec(names)?.into_iter()
                        .map(|v| data_property.children.get::<IndexKey>(&v.into()).cloned())
                        .collect::<Option<Vec<ValueProperties>>>(),
                }
                    .ok_or_else(|| Error::from("columns: unknown column in index"))

            } else if let Some(indices) = public_arguments.remove::<IndexKey>(&"indices".into()) {
                let indices = indices.clone().array()?.int()?;
                dimensionality = Some(indices.shape().len() as i64 + 1);
                to_name_vec(indices)?.into_iter()
                    .map(|idx| data_property.children.get_index(idx as usize).map(|v| v.1.clone()))
                    .collect::<Option<Vec<ValueProperties>>>()
                    .ok_or_else(|| Error::from("index does not exist"))

            } else if let Some(mask) = public_arguments.remove::<IndexKey>(&"mask".into()) {
                let mask = mask.clone().array()?.bool()?;
                if mask.shape().len() != 1 {
                    return Err("mask: must be 1-dimensional".into())
                }
                dimensionality = Some(2);
                let mask = to_name_vec(mask)?;
                if mask.len() != data_property.children.len() {
                    return Err("mask: must be same length as the number of columns".into())
                }
                Ok(data_property.children.into_iter().zip(mask)
                    .filter(|(_, m)| *m).map(|(v, _)| v.1)
                    .collect::<Vec<ValueProperties>>())
            } else {
                return Err("one of names, indices or mask must be supplied".into())
            }

            ValueProperties::Partitions(data_property) => {
                let names = get_argument(&public_arguments, "names")?
                    .to_owned().array()?;

                let partition_key = IndexKey::new(names)?;
                let part_properties = data_property.children.get::<IndexKey>(&partition_key)
                    .ok_or_else(|| format!("unknown partition index: {:?}", partition_key))?.clone();

                return Ok(Warnable::new(part_properties))
            },


            ValueProperties::Array(data_property) => {
                if !data_property.releasable {
                    data_property.assert_is_not_aggregated()?;
                }

                if let Some(indices) = public_arguments.remove::<IndexKey>(&"indices".into()) {
                    let indices = indices.clone().array()?.int()?;
                    dimensionality = Some(indices.shape().len() as i64 + 1);

                    to_name_vec(indices)?.into_iter()
                        .map(|idx| select_properties(&data_property, idx as usize))
                        .collect::<Result<Vec<ValueProperties>>>()

                } else if let Some(mask) = public_arguments.remove::<IndexKey>(&"mask".into()) {
                    let mask = mask.clone().array()?.bool()?;
                    if mask.shape().len() != 1 {
                        return Err("mask: must be 1-dimensional".into())
                    }
                    dimensionality = Some(2);
                    let mask = to_name_vec(mask)?;
                    if mask.len() != data_property.num_columns()? as usize {
                        return Err("mask: must be same length as the number of columns".into())
                    }
                    mask.into_iter().enumerate().filter(|(_, mask)| *mask)
                        .map(|(idx, _)| select_properties(&data_property, idx))
                        .collect::<Result<Vec<ValueProperties>>>()
                } else {
                    return Err("either indices or mask must be supplied".into())
                }
            },
            ValueProperties::Jagged(_) => Err("indexing is not supported on vectors".into()),
            ValueProperties::Function(_) => Err("indexing is not suppported on functions".into())
        }?;

        stack_properties(&properties, dimensionality, node_id).map(Warnable::new)
    }
}

impl Named for proto::Index {
    fn get_names(
        &self,
        public_arguments: IndexMap<base::IndexKey, &Value>,
        argument_variables: IndexMap<base::IndexKey, Vec<IndexKey>>,
        _release: Option<&Value>
    ) -> Result<Vec<IndexKey>> {
        if let Some(names) = public_arguments.get::<IndexKey>(&"names".into()) {
            return Ok(match names.ref_array()? {
                Array::Int(names) => names.iter()
                    .map(|n| n.clone().into())
                    .collect(),
                Array::Bool(names) => names.iter()
                    .map(|n| n.clone().into())
                    .collect(),
                Array::Str(names) => names.iter()
                    .map(|n| n.clone().into())
                    .collect(),
                _ => return Err("column names may not be floats".into())
            })
        }
        let input_names = argument_variables.get::<IndexKey>(&"data".into())
            .ok_or_else(|| Error::from("column names on data must be known"))?;

        if let Some(indices) = public_arguments.get::<IndexKey>(&"indices".into()) {
            indices.ref_array()?.ref_int()?.iter()
                .map(|idx| input_names.get(*idx as usize).cloned())
                .collect::<Option<Vec<IndexKey>>>()
                .ok_or_else(|| Error::from("attempted to retrieve an out-of-bounds name"))
        } else if let Some(mask) = public_arguments.get::<IndexKey>(&"mask".into()) {
            Ok(mask.ref_array()?.ref_bool()?.iter()
                .zip(input_names.iter())
                .filter(|(&mask, _)| mask)
                .map(|(_, name)| name.clone())
                .collect::<Vec<IndexKey>>())
        } else {
            Err("one of names, indices or mask must be supplied".into())
        }
    }
}

pub fn to_name_vec<T: Clone>(columns: ArrayD<T>) -> Result<Vec<T>> {
    match columns.ndim() {
        0 => Ok(vec![columns.first()
            .ok_or_else(|| Error::from("At least one column name must be supplied"))?.clone()]),
        1 => match columns.into_dimensionality::<Ix1>() {
            Ok(columns) => Ok(columns.to_vec()),
            Err(_) => Err("column names must be 1-dimensional".into())
        },
        _ => Err("dimensionality of column names must be less than 2".into())
    }
}