Function smartnoise_runtime::components::clamp::clamp_categorical[][src]

pub fn clamp_categorical<T: Ord + Hash + Clone>(
    data: ArrayD<T>,
    categories: Vec<Vec<T>>,
    null_value: ArrayD<T>
) -> Result<ArrayD<T>> where
    T: Clone,
    T: PartialEq,
    T: Default

Clamps each column of categorical data to desired set.

Clamping for categorical data is not as obvious a concept as clamping for numeric data. Clamping takes elements not included in categories and maps them to the null_value.

This is useful in the library because having a well-defined set of categories (and a default way to refer to elements outside of this set) is important for common procedures like a differentially private histogram release.

Arguments

Return

Data clamped to desired bounds.

Example

use ndarray::{ArrayD, arr2, arr1};
use smartnoise_runtime::components::clamp::clamp_categorical;
let data: ArrayD<String> = arr2(&[["a".to_string(), "b".to_string(), "3".to_string()],
                                  ["a".to_string(), "2".to_string(), "b".to_string()]]).into_dyn();
let categories: Vec<Vec<String>> = vec![vec!["a".to_string(), "b".to_string()],
                                                vec!["a".to_string(), "b".to_string()],
                                                vec!["a".to_string(), "b".to_string()]];
let null_value: ArrayD<String> = arr1(&["not_a_letter".to_string(),
                                        "not_a_letter".to_string(),
                                        "not_a_letter".to_string()]).into_dyn();

let clamped_data = clamp_categorical(data, categories, null_value).unwrap();
assert_eq!(clamped_data, arr2(&[["a".to_string(), "b".to_string(), "not_a_letter".to_string()],
                               ["a".to_string(), "not_a_letter".to_string(), "b".to_string()]]).into_dyn());