1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
use super::bins::Bins;
use super::errors::BinsBuildError;
use super::strategies::BinsBuildingStrategy;
use itertools::izip;
use ndarray::{ArrayBase, Axis, Data, Ix1, Ix2};
use std::ops::Range;

/// A `Grid` is a partition of a rectangular region of an *n*-dimensional
/// space—e.g. [*a*<sub>0</sub>, *b*<sub>0</sub>) × ⋯ × [*a*<sub>*n*−1</sub>,
/// *b*<sub>*n*−1</sub>)—into a collection of rectangular *n*-dimensional bins.
///
/// The grid is **fully determined by its 1-dimensional projections** on the
/// coordinate axes. For example, this is a partition that can be represented
/// as a `Grid` struct:
/// ```text
/// +---+-------+-+
/// |   |       | |
/// +---+-------+-+
/// |   |       | |
/// |   |       | |
/// |   |       | |
/// |   |       | |
/// +---+-------+-+
/// ```
/// while the next one can't:
/// ```text
/// +---+-------+-+
/// |   |       | |
/// |   +-------+-+
/// |   |         |
/// |   |         |
/// |   |         |
/// |   |         |
/// +---+-------+-+
/// ```
///
/// # Example:
///
/// ```
/// use ndarray::{Array, array};
/// use ndarray_stats::{HistogramExt,
///                     histogram::{Histogram, Grid, GridBuilder,
///                                 Edges, Bins, strategies::Auto}};
/// use noisy_float::types::{N64, n64};
///
/// // 1-dimensional observations, as a (n_observations, 1) 2-d matrix
/// let observations = Array::from_shape_vec(
///     (12, 1),
///     vec![1, 4, 5, 2, 100, 20, 50, 65, 27, 40, 45, 23],
/// ).unwrap();
///
/// // The optimal grid layout is inferred from the data,
/// // specifying a strategy (Auto in this case)
/// let grid = GridBuilder::<Auto<usize>>::from_array(&observations).unwrap().build();
/// let expected_grid = Grid::from(vec![Bins::new(Edges::from(vec![1, 20, 39, 58, 77, 96, 115]))]);
/// assert_eq!(grid, expected_grid);
///
/// let histogram = observations.histogram(grid);
///
/// let histogram_matrix = histogram.counts();
/// // Bins are left inclusive, right exclusive!
/// let expected = array![4, 3, 3, 1, 0, 1];
/// assert_eq!(histogram_matrix, expected.into_dyn());
/// ```
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Grid<A: Ord> {
    projections: Vec<Bins<A>>,
}

impl<A: Ord> From<Vec<Bins<A>>> for Grid<A> {
    /// Get a `Grid` instance from a `Vec<Bins<A>>`.
    ///
    /// The `i`-th element in `Vec<Bins<A>>` represents the 1-dimensional
    /// projection of the bin grid on the `i`-th axis.
    ///
    /// Alternatively, a `Grid` can be built directly from data using a
    /// [`GridBuilder`].
    ///
    /// [`GridBuilder`]: struct.GridBuilder.html
    fn from(projections: Vec<Bins<A>>) -> Self {
        Grid { projections }
    }
}

impl<A: Ord> Grid<A> {
    /// Returns `n`, the number of dimensions of the region partitioned by the grid.
    pub fn ndim(&self) -> usize {
        self.projections.len()
    }

    /// Returns the number of bins along each coordinate axis.
    pub fn shape(&self) -> Vec<usize> {
        self.projections.iter().map(|e| e.len()).collect()
    }

    /// Returns the grid projections on the coordinate axes as a slice of immutable references.
    pub fn projections(&self) -> &[Bins<A>] {
        &self.projections
    }

    /// Returns the index of the *n*-dimensional bin containing the point, if
    /// one exists.
    ///
    /// Returns `None` if the point is outside the grid.
    ///
    /// **Panics** if `point.len()` does not equal `self.ndim()`.
    pub fn index_of<S>(&self, point: &ArrayBase<S, Ix1>) -> Option<Vec<usize>>
    where
        S: Data<Elem = A>,
    {
        assert_eq!(
            point.len(),
            self.ndim(),
            "Dimension mismatch: the point has {:?} dimensions, the grid \
             expected {:?} dimensions.",
            point.len(),
            self.ndim()
        );
        point
            .iter()
            .zip(self.projections.iter())
            .map(|(v, e)| e.index_of(v))
            .collect()
    }
}

impl<A: Ord + Clone> Grid<A> {
    /// Given `i=(i_0, ..., i_{n-1})`, an `n`-dimensional index, it returns
    /// `I_{i_0}x...xI_{i_{n-1}}`, an `n`-dimensional bin, where `I_{i_j}` is
    /// the `i_j`-th interval on the `j`-th projection of the grid on the coordinate axes.
    ///
    /// **Panics** if at least one among `(i_0, ..., i_{n-1})` is out of bounds on the respective
    /// coordinate axis - i.e. if there exists `j` such that `i_j >= self.projections[j].len()`.
    pub fn index(&self, index: &[usize]) -> Vec<Range<A>> {
        assert_eq!(
            index.len(),
            self.ndim(),
            "Dimension mismatch: the index has {0:?} dimensions, the grid \
             expected {1:?} dimensions.",
            index.len(),
            self.ndim()
        );
        izip!(&self.projections, index)
            .map(|(bins, &i)| bins.index(i))
            .collect()
    }
}

/// `GridBuilder`, given a [`strategy`] and some observations, returns a [`Grid`]
/// instance for [`histogram`] computation.
///
/// [`Grid`]: struct.Grid.html
/// [`histogram`]: trait.HistogramExt.html
/// [`strategy`]: strategies/index.html
pub struct GridBuilder<B: BinsBuildingStrategy> {
    bin_builders: Vec<B>,
}

impl<A, B> GridBuilder<B>
where
    A: Ord,
    B: BinsBuildingStrategy<Elem = A>,
{
    /// Given some observations in a 2-dimensional array with shape `(n_observations, n_dimension)`
    /// it returns a `GridBuilder` instance that has learned the required parameter
    /// to build a [`Grid`] according to the specified [`strategy`].
    ///
    /// It returns `Err` if it is not possible to build a [`Grid`] given
    /// the observed data according to the chosen [`strategy`].
    ///
    /// [`Grid`]: struct.Grid.html
    /// [`strategy`]: strategies/index.html
    pub fn from_array<S>(array: &ArrayBase<S, Ix2>) -> Result<Self, BinsBuildError>
    where
        S: Data<Elem = A>,
    {
        let bin_builders = array
            .axis_iter(Axis(1))
            .map(|data| B::from_array(&data))
            .collect::<Result<Vec<B>, BinsBuildError>>()?;
        Ok(Self { bin_builders })
    }

    /// Returns a [`Grid`] instance, built accordingly to the specified [`strategy`]
    /// using the parameters inferred from observations in [`from_array`].
    ///
    /// [`Grid`]: struct.Grid.html
    /// [`strategy`]: strategies/index.html
    /// [`from_array`]: #method.from_array.html
    pub fn build(&self) -> Grid<A> {
        let projections: Vec<_> = self.bin_builders.iter().map(|b| b.build()).collect();
        Grid::from(projections)
    }
}