aboutsummaryrefslogtreecommitdiff
path: root/v_windows/v/vlib/math/stats
diff options
context:
space:
mode:
Diffstat (limited to 'v_windows/v/vlib/math/stats')
-rw-r--r--v_windows/v/vlib/math/stats/stats.v249
-rw-r--r--v_windows/v/vlib/math/stats/stats_test.v269
2 files changed, 518 insertions, 0 deletions
diff --git a/v_windows/v/vlib/math/stats/stats.v b/v_windows/v/vlib/math/stats/stats.v
new file mode 100644
index 0000000..d7317bf
--- /dev/null
+++ b/v_windows/v/vlib/math/stats/stats.v
@@ -0,0 +1,249 @@
+module stats
+
+import math
+
+// TODO: Implement all of them with generics
+
+// This module defines the following statistical operations on f64 array
+// ---------------------------
+// | Summary of Functions |
+// ---------------------------
+// -----------------------------------------------------------------------
+// freq - Frequency
+// mean - Mean
+// geometric_mean - Geometric Mean
+// harmonic_mean - Harmonic Mean
+// median - Median
+// mode - Mode
+// rms - Root Mean Square
+// population_variance - Population Variance
+// sample_variance - Sample Variance
+// population_stddev - Population Standard Deviation
+// sample_stddev - Sample Standard Deviation
+// mean_absdev - Mean Absolute Deviation
+// min - Minimum of the Array
+// max - Maximum of the Array
+// range - Range of the Array ( max - min )
+// -----------------------------------------------------------------------
+
+// Measure of Occurance
+// Frequency of a given number
+// Based on
+// https://www.mathsisfun.com/data/frequency-distribution.html
+pub fn freq(arr []f64, val f64) int {
+ if arr.len == 0 {
+ return 0
+ }
+ mut count := 0
+ for v in arr {
+ if v == val {
+ count++
+ }
+ }
+ return count
+}
+
+// Measure of Central Tendancy
+// Mean of the given input array
+// Based on
+// https://www.mathsisfun.com/data/central-measures.html
+pub fn mean(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut sum := f64(0)
+ for v in arr {
+ sum += v
+ }
+ return sum / f64(arr.len)
+}
+
+// Measure of Central Tendancy
+// Geometric Mean of the given input array
+// Based on
+// https://www.mathsisfun.com/numbers/geometric-mean.html
+pub fn geometric_mean(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut sum := f64(1)
+ for v in arr {
+ sum *= v
+ }
+ return math.pow(sum, f64(1) / arr.len)
+}
+
+// Measure of Central Tendancy
+// Harmonic Mean of the given input array
+// Based on
+// https://www.mathsisfun.com/numbers/harmonic-mean.html
+pub fn harmonic_mean(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut sum := f64(0)
+ for v in arr {
+ sum += f64(1) / v
+ }
+ return f64(arr.len) / sum
+}
+
+// Measure of Central Tendancy
+// Median of the given input array ( input array is assumed to be sorted )
+// Based on
+// https://www.mathsisfun.com/data/central-measures.html
+pub fn median(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ if arr.len % 2 == 0 {
+ mid := (arr.len / 2) - 1
+ return (arr[mid] + arr[mid + 1]) / f64(2)
+ } else {
+ return arr[((arr.len - 1) / 2)]
+ }
+}
+
+// Measure of Central Tendancy
+// Mode of the given input array
+// Based on
+// https://www.mathsisfun.com/data/central-measures.html
+pub fn mode(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut freqs := []int{}
+ for v in arr {
+ freqs << freq(arr, v)
+ }
+ mut max := 0
+ for i in 0 .. freqs.len {
+ if freqs[i] > freqs[max] {
+ max = i
+ }
+ }
+ return arr[max]
+}
+
+// Root Mean Square of the given input array
+// Based on
+// https://en.wikipedia.org/wiki/Root_mean_square
+pub fn rms(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut sum := f64(0)
+ for v in arr {
+ sum += math.pow(v, 2)
+ }
+ return math.sqrt(sum / f64(arr.len))
+}
+
+// Measure of Dispersion / Spread
+// Population Variance of the given input array
+// Based on
+// https://www.mathsisfun.com/data/standard-deviation.html
+pub fn population_variance(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ m := mean(arr)
+ mut sum := f64(0)
+ for v in arr {
+ sum += math.pow(v - m, 2)
+ }
+ return sum / f64(arr.len)
+}
+
+// Measure of Dispersion / Spread
+// Sample Variance of the given input array
+// Based on
+// https://www.mathsisfun.com/data/standard-deviation.html
+pub fn sample_variance(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ m := mean(arr)
+ mut sum := f64(0)
+ for v in arr {
+ sum += math.pow(v - m, 2)
+ }
+ return sum / f64(arr.len - 1)
+}
+
+// Measure of Dispersion / Spread
+// Population Standard Deviation of the given input array
+// Based on
+// https://www.mathsisfun.com/data/standard-deviation.html
+pub fn population_stddev(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ return math.sqrt(population_variance(arr))
+}
+
+// Measure of Dispersion / Spread
+// Sample Standard Deviation of the given input array
+// Based on
+// https://www.mathsisfun.com/data/standard-deviation.html
+pub fn sample_stddev(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ return math.sqrt(sample_variance(arr))
+}
+
+// Measure of Dispersion / Spread
+// Mean Absolute Deviation of the given input array
+// Based on
+// https://en.wikipedia.org/wiki/Average_absolute_deviation
+pub fn mean_absdev(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ amean := mean(arr)
+ mut sum := f64(0)
+ for v in arr {
+ sum += math.abs(v - amean)
+ }
+ return sum / f64(arr.len)
+}
+
+// Minimum of the given input array
+pub fn min(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut min := arr[0]
+ for v in arr {
+ if v < min {
+ min = v
+ }
+ }
+ return min
+}
+
+// Maximum of the given input array
+pub fn max(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ mut max := arr[0]
+ for v in arr {
+ if v > max {
+ max = v
+ }
+ }
+ return max
+}
+
+// Measure of Dispersion / Spread
+// Range ( Maximum - Minimum ) of the given input array
+// Based on
+// https://www.mathsisfun.com/data/range.html
+pub fn range(arr []f64) f64 {
+ if arr.len == 0 {
+ return f64(0)
+ }
+ return max(arr) - min(arr)
+}
diff --git a/v_windows/v/vlib/math/stats/stats_test.v b/v_windows/v/vlib/math/stats/stats_test.v
new file mode 100644
index 0000000..c18daff
--- /dev/null
+++ b/v_windows/v/vlib/math/stats/stats_test.v
@@ -0,0 +1,269 @@
+import math.stats
+import math
+
+fn test_freq() {
+ // Tests were also verified on Wolfram Alpha
+ data := [f64(10.0), f64(10.0), f64(5.9), f64(2.7)]
+ mut o := stats.freq(data, 10.0)
+ assert o == 2
+ o = stats.freq(data, 2.7)
+ assert o == 1
+ o = stats.freq(data, 15)
+ assert o == 0
+}
+
+fn tst_res(str1 string, str2 string) bool {
+ if (math.abs(str1.f64() - str2.f64())) < 1e-5 {
+ return true
+ }
+ return false
+}
+
+fn test_mean() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '5.762500')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '17.650000')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '37.708000')
+}
+
+fn test_geometric_mean() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.geometric_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '5.15993')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.geometric_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0)
+ || o.str() == '-nan(ind)' // Because in math it yields a complex number
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.geometric_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '25.064496')
+}
+
+fn test_harmonic_mean() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.harmonic_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '4.626519')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.harmonic_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '9.134577')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.harmonic_mean(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '16.555477')
+}
+
+fn test_median() {
+ // Tests were also verified on Wolfram Alpha
+ // Assumes sorted array
+
+ // Even
+ mut data := [f64(2.7), f64(4.45), f64(5.9), f64(10.0)]
+ mut o := stats.median(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '5.175000')
+ data = [f64(-3.0), f64(1.89), f64(4.4), f64(67.31)]
+ o = stats.median(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '3.145000')
+ data = [f64(7.88), f64(12.0), f64(54.83), f64(76.122)]
+ o = stats.median(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '33.415000')
+
+ // Odd
+ data = [f64(2.7), f64(4.45), f64(5.9), f64(10.0), f64(22)]
+ o = stats.median(data)
+ assert o == f64(5.9)
+ data = [f64(-3.0), f64(1.89), f64(4.4), f64(9), f64(67.31)]
+ o = stats.median(data)
+ assert o == f64(4.4)
+ data = [f64(7.88), f64(3.3), f64(12.0), f64(54.83), f64(76.122)]
+ o = stats.median(data)
+ assert o == f64(12.0)
+}
+
+fn test_mode() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(2.7), f64(2.7), f64(4.45), f64(5.9), f64(10.0)]
+ mut o := stats.mode(data)
+ assert o == f64(2.7)
+ data = [f64(-3.0), f64(1.89), f64(1.89), f64(1.89), f64(9), f64(4.4), f64(4.4), f64(9),
+ f64(67.31),
+ ]
+ o = stats.mode(data)
+ assert o == f64(1.89)
+ // Testing greedy nature
+ data = [f64(2.0), f64(4.0), f64(2.0), f64(4.0)]
+ o = stats.mode(data)
+ assert o == f64(2.0)
+}
+
+fn test_rms() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.rms(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '6.362046')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.rms(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '33.773393')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.rms(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '47.452561')
+}
+
+fn test_population_variance() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.population_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '7.269219')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.population_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '829.119550')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.population_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '829.852282')
+}
+
+fn test_sample_variance() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.sample_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '9.692292')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.sample_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '1105.492733')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.sample_variance(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '1106.469709')
+}
+
+fn test_population_stddev() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.population_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '2.696149')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.population_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '28.794436')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.population_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '28.807157')
+}
+
+fn test_sample_stddev() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.sample_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '3.113245')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.sample_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '33.248951')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.sample_stddev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '33.263639')
+}
+
+fn test_mean_absdev() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.mean_absdev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '2.187500')
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.mean_absdev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '24.830000')
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.mean_absdev(data)
+ // Some issue with precision comparison in f64 using == operator hence serializing to string
+ assert tst_res(o.str(), '27.768000')
+}
+
+fn test_min() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.min(data)
+ assert o == f64(2.7)
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.min(data)
+ assert o == f64(-3.0)
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.min(data)
+ assert o == f64(7.88)
+}
+
+fn test_max() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.max(data)
+ assert o == f64(10.0)
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.max(data)
+ assert o == f64(67.31)
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.max(data)
+ assert o == f64(76.122)
+}
+
+fn test_range() {
+ // Tests were also verified on Wolfram Alpha
+ mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
+ mut o := stats.range(data)
+ assert o == f64(7.3)
+ data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
+ o = stats.range(data)
+ assert o == f64(70.31)
+ data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
+ o = stats.range(data)
+ assert o == f64(68.242)
+}
+
+fn test_passing_empty() {
+ data := []f64{}
+ assert stats.freq(data, 0) == 0
+ assert stats.mean(data) == f64(0)
+ assert stats.geometric_mean(data) == f64(0)
+ assert stats.harmonic_mean(data) == f64(0)
+ assert stats.median(data) == f64(0)
+ assert stats.mode(data) == f64(0)
+ assert stats.rms(data) == f64(0)
+ assert stats.population_variance(data) == f64(0)
+ assert stats.sample_variance(data) == f64(0)
+ assert stats.population_stddev(data) == f64(0)
+ assert stats.sample_stddev(data) == f64(0)
+ assert stats.mean_absdev(data) == f64(0)
+ assert stats.min(data) == f64(0)
+ assert stats.max(data) == f64(0)
+ assert stats.range(data) == f64(0)
+}