use super::super::interpolate::Nearest;
use super::super::{Quantile1dExt, QuantileExt};
use super::errors::BinsBuildError;
use super::{Bins, Edges};
use ndarray::prelude::*;
use ndarray::Data;
use noisy_float::types::n64;
use num_traits::{FromPrimitive, NumOps, Zero};
pub trait BinsBuildingStrategy {
type Elem: Ord;
fn from_array<S>(array: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
Self: std::marker::Sized;
fn build(&self) -> Bins<Self::Elem>;
fn n_bins(&self) -> usize;
}
#[derive(Debug)]
struct EquiSpaced<T> {
bin_width: T,
min: T,
max: T,
}
#[derive(Debug)]
pub struct Sqrt<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct Rice<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct Sturges<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
pub struct FreedmanDiaconis<T> {
builder: EquiSpaced<T>,
}
#[derive(Debug)]
enum SturgesOrFD<T> {
Sturges(Sturges<T>),
FreedmanDiaconis(FreedmanDiaconis<T>),
}
#[derive(Debug)]
pub struct Auto<T> {
builder: SturgesOrFD<T>,
}
impl<T> EquiSpaced<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
fn new(bin_width: T, min: T, max: T) -> Result<Self, BinsBuildError> {
if (bin_width <= T::zero()) || (min >= max) {
Err(BinsBuildError::Strategy)
} else {
Ok(Self {
bin_width,
min,
max,
})
}
}
fn build(&self) -> Bins<T> {
let n_bins = self.n_bins();
let mut edges: Vec<T> = vec![];
for i in 0..(n_bins + 1) {
let edge = self.min.clone() + T::from_usize(i).unwrap() * self.bin_width.clone();
edges.push(edge);
}
Bins::new(Edges::from(edges))
}
fn n_bins(&self) -> usize {
let mut max_edge = self.min.clone();
let mut n_bins = 0;
while max_edge <= self.max {
max_edge = max_edge + self.bin_width.clone();
n_bins += 1;
}
return n_bins;
}
fn bin_width(&self) -> T {
self.bin_width.clone()
}
}
impl<T> BinsBuildingStrategy for Sqrt<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
let n_bins = (n_elems as f64).sqrt().round() as usize;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
Ok(Self { builder })
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sqrt<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Rice<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
let n_bins = (2. * (n_elems as f64).powf(1. / 3.)).round() as usize;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
Ok(Self { builder })
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Rice<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Sturges<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_elems = a.len();
let n_bins = (n_elems as f64).log2().round() as usize + 1;
let min = a.min()?;
let max = a.max()?;
let bin_width = compute_bin_width(min.clone(), max.clone(), n_bins);
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
Ok(Self { builder })
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> Sturges<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for FreedmanDiaconis<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let n_points = a.len();
if n_points == 0 {
return Err(BinsBuildError::EmptyInput);
}
let mut a_copy = a.to_owned();
let first_quartile = a_copy.quantile_mut(n64(0.25), &Nearest).unwrap();
let third_quartile = a_copy.quantile_mut(n64(0.75), &Nearest).unwrap();
let iqr = third_quartile - first_quartile;
let bin_width = FreedmanDiaconis::compute_bin_width(n_points, iqr);
let min = a.min()?;
let max = a.max()?;
let builder = EquiSpaced::new(bin_width, min.clone(), max.clone())?;
Ok(Self { builder })
}
fn build(&self) -> Bins<T> {
self.builder.build()
}
fn n_bins(&self) -> usize {
self.builder.n_bins()
}
}
impl<T> FreedmanDiaconis<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
fn compute_bin_width(n_bins: usize, iqr: T) -> T {
let denominator = (n_bins as f64).powf(1. / 3.);
let bin_width = T::from_usize(2).unwrap() * iqr / T::from_f64(denominator).unwrap();
bin_width
}
pub fn bin_width(&self) -> T {
self.builder.bin_width()
}
}
impl<T> BinsBuildingStrategy for Auto<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
type Elem = T;
fn from_array<S>(a: &ArrayBase<S, Ix1>) -> Result<Self, BinsBuildError>
where
S: Data<Elem = Self::Elem>,
{
let fd_builder = FreedmanDiaconis::from_array(&a);
let sturges_builder = Sturges::from_array(&a);
match (fd_builder, sturges_builder) {
(Err(_), Ok(sturges_builder)) => {
let builder = SturgesOrFD::Sturges(sturges_builder);
Ok(Self { builder })
}
(Ok(fd_builder), Err(_)) => {
let builder = SturgesOrFD::FreedmanDiaconis(fd_builder);
Ok(Self { builder })
}
(Ok(fd_builder), Ok(sturges_builder)) => {
let builder = if fd_builder.bin_width() > sturges_builder.bin_width() {
SturgesOrFD::Sturges(sturges_builder)
} else {
SturgesOrFD::FreedmanDiaconis(fd_builder)
};
Ok(Self { builder })
}
(Err(err), Err(_)) => Err(err),
}
}
fn build(&self) -> Bins<T> {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.build(),
SturgesOrFD::Sturges(b) => b.build(),
}
}
fn n_bins(&self) -> usize {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.n_bins(),
SturgesOrFD::Sturges(b) => b.n_bins(),
}
}
}
impl<T> Auto<T>
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
pub fn bin_width(&self) -> T {
match &self.builder {
SturgesOrFD::FreedmanDiaconis(b) => b.bin_width(),
SturgesOrFD::Sturges(b) => b.bin_width(),
}
}
}
fn compute_bin_width<T>(min: T, max: T, n_bins: usize) -> T
where
T: Ord + Clone + FromPrimitive + NumOps + Zero,
{
let range = max.clone() - min.clone();
let bin_width = range / T::from_usize(n_bins).unwrap();
bin_width
}
#[cfg(test)]
mod equispaced_tests {
use super::*;
#[test]
fn bin_width_has_to_be_positive() {
assert!(EquiSpaced::new(0, 0, 200).is_err());
}
#[test]
fn min_has_to_be_strictly_smaller_than_max() {
assert!(EquiSpaced::new(10, 0, 0).is_err());
}
}
#[cfg(test)]
mod sqrt_tests {
use super::*;
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(Sqrt::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy());
}
#[test]
fn empty_arrays_are_bad() {
assert!(Sqrt::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input());
}
}
#[cfg(test)]
mod rice_tests {
use super::*;
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(Rice::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy());
}
#[test]
fn empty_arrays_are_bad() {
assert!(Rice::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input());
}
}
#[cfg(test)]
mod sturges_tests {
use super::*;
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(Sturges::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy());
}
#[test]
fn empty_arrays_are_bad() {
assert!(Sturges::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input());
}
}
#[cfg(test)]
mod fd_tests {
use super::*;
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(FreedmanDiaconis::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy());
}
#[test]
fn zero_iqr_is_bad() {
assert!(
FreedmanDiaconis::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])
.unwrap_err()
.is_strategy()
);
}
#[test]
fn empty_arrays_are_bad() {
assert!(FreedmanDiaconis::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input());
}
}
#[cfg(test)]
mod auto_tests {
use super::*;
use ndarray::array;
#[test]
fn constant_array_are_bad() {
assert!(Auto::from_array(&array![1, 1, 1, 1, 1, 1, 1])
.unwrap_err()
.is_strategy());
}
#[test]
fn zero_iqr_is_handled_by_sturged() {
assert!(Auto::from_array(&array![-20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20]).is_ok());
}
#[test]
fn empty_arrays_are_bad() {
assert!(Auto::<usize>::from_array(&array![])
.unwrap_err()
.is_empty_input());
}
}