use std::cmp::Ordering;
use std::ops::AddAssign;
use ieee754::Ieee754;
use ndarray::{ArrayD, Axis, Zip};
use ndarray::prelude::IxDyn;
use openssl::rand::rand_bytes;
use smartnoise_validator::errors::*;
use smartnoise_validator::utilities::array::{slow_select, slow_stack};
pub mod mechanisms;
pub mod noise;
pub fn get_num_columns<T>(data: &ArrayD<T>) -> Result<i64> {
match data.ndim() {
0 | 1 => Ok(1),
2 => Ok(data.len_of(Axis(1)) as i64),
_ => Err("data may be at most 2-dimensional".into())
}
}
pub fn get_num_rows<T>(data: &ArrayD<T>) -> Result<i64> {
match data.ndim() {
0 => Ok(1),
1 | 2 => Ok(data.len_of(Axis(0)) as i64),
_ => Err("data may be at most 2-dimensional".into())
}
}
pub fn broadcast_map<T, U>(
left: ArrayD<T>,
right: ArrayD<T>,
operator: &dyn Fn(&T, &T) -> U) -> Result<ArrayD<U>> where T: std::clone::Clone, U: Default {
let shape = match left.ndim().cmp(&right.ndim()) {
Ordering::Less => right.shape(),
Ordering::Equal => if left.len() > right.len() { left.shape() } else { right.shape() },
Ordering::Greater => left.shape()
}.to_vec();
let left = to_nd(left, shape.len())?;
let right = to_nd(right, shape.len())?;
let mut output: ArrayD<U> = ndarray::Array::default(shape.clone());
Zip::from(&mut output)
.and(left.broadcast(shape.clone()).ok_or("could not broadcast left argument")?)
.and(right.broadcast(shape).ok_or("could not broadcast right argument")?)
.apply(|acc, l, r| *acc = operator(&l, &r));
Ok(output)
}
#[cfg(test)]
mod test_broadcast_map {
use ndarray::{arr0, arr1, arr2};
use crate::utilities::broadcast_map;
#[test]
fn test_broadcasting() {
let data0d = arr0(2.).into_dyn();
let data1d = arr1(&[2., 3., 5.]).into_dyn();
let data2d = arr2(&[[2., 4.], [3., 7.], [5., 2.]]).into_dyn();
assert_eq!(
broadcast_map(data0d.clone(), data1d.clone(), &|l, r| l * r).unwrap(),
arr1(&[4., 6., 10.]).into_dyn());
assert_eq!(
broadcast_map(data1d.clone(), data2d.clone(), &|l, r| l / r).unwrap(),
arr2(&[[1., 2. / 4.], [1., 3. / 7.], [1., 5. / 2.]]).into_dyn());
assert_eq!(
broadcast_map(data2d, data0d, &|l, r| l + r).unwrap(),
arr2(&[[4., 6.], [5., 9.], [7., 4.]]).into_dyn());
}
#[test]
fn non_conformable() {
let left = arr1(&[2., 3., 5.]).into_dyn();
let right = arr1(&[2., 3., 5., 6.]).into_dyn();
assert!(broadcast_map(
left, right, &|l, r| l * r,
).is_err());
}
#[test]
#[should_panic]
fn arraynd_left_broadcast() {
let left = arr0(2.).into_dyn();
let right = arr1(&[2., 3., 5., 6.]).into_dyn();
let _broadcast = left / right;
}
}
pub fn to_nd<T>(mut array: ArrayD<T>, ndim: usize) -> Result<ArrayD<T>> {
match (ndim as i32) - (array.ndim() as i32) {
0 => {}
i if i < 0 => {
(0..-(i as i32)).try_for_each(|_| match array.shape().last()
.ok_or_else(|| Error::from("ndim may not be negative"))? {
1 => {
array.index_axis_inplace(Axis(array.ndim() - 1), 0);
Ok(())
},
_ => Err(Error::from("cannot remove non-singleton trailing axis"))
})?
}
i if i > 0 => (0..i).for_each(|_| array.insert_axis_inplace(Axis(array.ndim()))),
_ => return Err("invalid dimensionality".into())
};
Ok(array)
}
pub fn standardize_columns<T: Default + Clone>(array: ArrayD<T>, column_len: usize) -> Result<ArrayD<T>> {
Ok(match array.ndim() {
0 => return Err("dataset may not be a scalar".into()),
1 => match column_len {
0 => slow_select(&array, Axis(1), &[]),
1 => array,
_ => slow_stack(
Axis(1),
&[array.view(), ndarray::Array::<T, IxDyn>::default(IxDyn(&[array.len(), column_len])).view()])?
},
2 => match array.len_of(Axis(1)).cmp(&column_len) {
Ordering::Less => slow_stack(
Axis(1),
&[array.view(), ndarray::Array::<T, IxDyn>::default(IxDyn(&[
array.len_of(Axis(0)),
column_len - array.len_of(Axis(1))])).view()],
)?,
Ordering::Equal => array,
Ordering::Greater => slow_select(&array, Axis(1), &(0..column_len).collect::<Vec<_>>())
},
_ => return Err("array must be 1 or 2-dimensional".into())
})
}
pub fn get_bytes(n_bytes: usize) -> Result<String> {
let mut buffer = vec!(0_u8; n_bytes);
fill_bytes(&mut buffer)?;
let new_buffer = buffer.into_iter()
.map(|v| format!("{:08b}", v))
.collect::<Vec<String>>();
Ok(new_buffer.concat())
}
pub fn fill_bytes(mut buffer: &mut [u8]) -> Result<()> {
if let Err(e) = rand_bytes(&mut buffer) {
Err(format!("OpenSSL Error: {}", e).into())
} else { Ok(()) }
}
pub fn f64_to_binary(num: f64) -> String {
let (sign, exponent, mantissa) = num.decompose_raw();
let sign_string = (sign as i64).to_string();
let mantissa_string = format!("{:052b}", mantissa);
let exponent_string = format!("{:011b}", exponent);
vec![sign_string, exponent_string, mantissa_string].concat()
}
pub fn binary_to_f64(binary_string: &str) -> Result<f64> {
let sign = &binary_string[0..1];
let sign_bool = sign.parse::<i32>()? != 0;
let exponent = &binary_string[1..12];
let exponent_int = u16::from_str_radix(exponent, 2)?;
let mantissa = &binary_string[12..];
let mantissa_int = u64::from_str_radix(mantissa, 2)?;
Ok(f64::recompose_raw(sign_bool, exponent_int, mantissa_int))
}
pub fn split_ieee_into_components(binary_string: String) -> (String, String, String) {
return (binary_string[0..1].to_string(), binary_string[1..12].to_string(), binary_string[12..].to_string());
}
pub fn combine_components_into_ieee(
(sign, exponent, mantissa): (String, String, String)
) -> String {
vec![sign, exponent, mantissa].concat()
}
#[cfg(feature="use-mpfr")]
pub fn sample_from_set<T>(
candidate_set: &[T], weights: &[smartnoise_validator::Float],
_enforce_constant_time: bool
) -> Result<T> where T: Clone {
macro_rules! to_rug {($v:expr) => {rug::Float::with_val(53, $v)}}
let weights_rug: Vec<rug::Float> = weights.iter().map(|w| to_rug!(w)).collect();
let sample: rug::Float = noise::sample_uniform_mpfr(
0.,
to_rug!(rug::Float::sum(weights_rug.iter())).to_f64())?;
let mut cum_prob = to_rug!(0.);
for (i, weight) in weights_rug.into_iter().enumerate() {
cum_prob.add_assign(weight);
if cum_prob >= sample {
return Ok(candidate_set[i].clone())
}
}
return Ok(candidate_set[candidate_set.len() - 1].clone())
}
#[cfg(not(feature="use-mpfr"))]
pub fn sample_from_set<T>(
candidate_set: &[T], weights: &[smartnoise_validator::Float],
enforce_constant_time: bool
) -> Result<T> where T: Clone {
let sample: f64 = noise::sample_uniform(0., weights.iter().sum(), enforce_constant_time)?;
let mut cumulative = 0.;
let mut return_index: usize = 0;
loop {
cumulative += weights[return_index];
if cumulative >= sample { break }
return_index += 1;
}
Ok(candidate_set[return_index].clone())
}
#[cfg(feature="use-mpfr")]
pub fn create_subset<T>(
set: &[T], weights: &[f64], k: usize,
_enforce_constant_time: bool
) -> Result<Vec<T>> where T: Clone {
if k > set.len() { return Err("k must be less than the set length".into()); }
use rug::Float;
use rug::ops::Pow;
let weights_rug: Vec<rug::Float> = weights.iter().map(|w| Float::with_val(53, w)).collect();
let weights_sum: rug::Float = Float::with_val(53, Float::sum(weights_rug.iter()));
let probabilities: Vec<rug::Float> = weights_rug.iter().map(|w| w / weights_sum.clone()).collect();
let mut key_vec = probabilities.into_iter()
.take(set.len()).enumerate()
.map(|(i, prob)| Ok((noise::sample_uniform_mpfr(0., 1.)?.pow(1. / prob), i)))
.collect::<Result<Vec<(rug::Float, usize)>>>()?;
let mut top_indices: Vec<usize> = Vec::with_capacity(k);
key_vec.sort_by(|a, b|
b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
top_indices.extend(key_vec.iter()
.take(k).map(|v| v.1));
let mut subset: Vec<T> = Vec::with_capacity(k);
for value in top_indices.iter().map(|&index| set[index].clone()) {
subset.push(value);
}
Ok(subset)
}
#[cfg(not(feature="use-mpfr"))]
pub fn create_subset<T>(
set: &[T], weights: &[f64], k: usize,
enforce_constant_time: bool
) -> Result<Vec<T>> where T: Clone {
if k > set.len() { return Err("k must be less than the set length".into()); }
let weights_sum: f64 = weights.iter().sum();
let probabilities: Vec<f64> = weights.iter().map(|w| w / weights_sum).collect();
let mut key_vec = (0..set.len())
.map(|i| Ok((
noise::sample_uniform(0., 1., enforce_constant_time)?
.powf(1. / probabilities[i]),
i
)))
.collect::<Result<Vec<(f64, usize)>>>()?;
key_vec.sort_by(|a, b|
b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
Ok(key_vec.iter().take(k).map(|v| set[v.1].clone()).collect())
}
pub fn get_closest_multiple_of_lambda(x: f64, m: i16) -> Result<f64> {
let (sign, mut exponent, mantissa) = x.decompose();
exponent -= m;
let (sign, mut exponent, mantissa) = match exponent {
exponent if exponent >= 52 => (sign, exponent, mantissa),
exponent if exponent == -1 => (sign, 0, 0),
exponent if exponent < -1 => (sign, -1023 - m, 0),
_ => {
let integer_mask: u64 = ((1u64 << exponent) - 1) << (52 - exponent);
let integer_mantissa: u64 = mantissa & integer_mask;
if mantissa & (1u64 << (52 - (exponent + 1))) == 0u64 {
(sign, exponent, integer_mantissa)
} else {
if integer_mantissa == integer_mask {
(sign, exponent + 1, 0)
} else {
(sign, exponent, integer_mantissa + (1u64 << (52 - exponent)))
}
}
}
};
exponent += m;
Ok(f64::recompose(sign, exponent, mantissa))
}
#[cfg(test)]
mod test_get_closest_multiple_of_lambda {
use smartnoise_validator::hashmap;
use crate::utilities::get_closest_multiple_of_lambda;
#[test]
fn test_get_closest_multiple_of_lambda_range() {
(0..100).for_each(|i| {
let x = 1. - 0.01 * (i as f64);
println!("{}: {}", x, get_closest_multiple_of_lambda(x, -1).unwrap())
});
}
#[test]
fn test_get_closest_multiple_of_lambda() {
let input = vec![-30.01, -2.51, -1.01, -0.76, -0.51, -0.26, 0.0, 0.26, 0.51, 0.76, 1.01, 2.51, 30.01];
hashmap![
-2 => vec![-30., -2.5, -1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 2.5, 30.0],
-1 => vec![-30., -2.5, -1.0, -1.0, -0.5, -0.5, 0.0, 0.5, 0.5, 1.0, 1.0, 2.5, 30.0],
0 => vec![-30., -3.0, -1.0, -1.0, -1.0, -0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 3.0, 30.0],
1 => vec![-30., -2.0, -2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 30.0],
2 => vec![-32., -4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 32.0]
].into_iter().for_each(|(m, outputs)| {
input.iter().copied().zip(outputs.into_iter())
.for_each(|(input, expected)| {
let actual = get_closest_multiple_of_lambda(input, m).unwrap();
println!("m: {:?}, input: {:?}, actual: {:?}, expected: {:?}",
m, input, actual, expected);
assert_eq!(actual, expected)
})
});
}
}