diff options
author | Indrajith K L | 2022-12-03 17:00:20 +0530 |
---|---|---|
committer | Indrajith K L | 2022-12-03 17:00:20 +0530 |
commit | f5c4671bfbad96bf346bd7e9a21fc4317b4959df (patch) | |
tree | 2764fc62da58f2ba8da7ed341643fc359873142f /v_windows/v/vlib/math/stats | |
download | cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.gz cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.tar.bz2 cli-tools-windows-f5c4671bfbad96bf346bd7e9a21fc4317b4959df.zip |
Diffstat (limited to 'v_windows/v/vlib/math/stats')
-rw-r--r-- | v_windows/v/vlib/math/stats/stats.v | 249 | ||||
-rw-r--r-- | v_windows/v/vlib/math/stats/stats_test.v | 269 |
2 files changed, 518 insertions, 0 deletions
diff --git a/v_windows/v/vlib/math/stats/stats.v b/v_windows/v/vlib/math/stats/stats.v new file mode 100644 index 0000000..d7317bf --- /dev/null +++ b/v_windows/v/vlib/math/stats/stats.v @@ -0,0 +1,249 @@ +module stats + +import math + +// TODO: Implement all of them with generics + +// This module defines the following statistical operations on f64 array +// --------------------------- +// | Summary of Functions | +// --------------------------- +// ----------------------------------------------------------------------- +// freq - Frequency +// mean - Mean +// geometric_mean - Geometric Mean +// harmonic_mean - Harmonic Mean +// median - Median +// mode - Mode +// rms - Root Mean Square +// population_variance - Population Variance +// sample_variance - Sample Variance +// population_stddev - Population Standard Deviation +// sample_stddev - Sample Standard Deviation +// mean_absdev - Mean Absolute Deviation +// min - Minimum of the Array +// max - Maximum of the Array +// range - Range of the Array ( max - min ) +// ----------------------------------------------------------------------- + +// Measure of Occurance +// Frequency of a given number +// Based on +// https://www.mathsisfun.com/data/frequency-distribution.html +pub fn freq(arr []f64, val f64) int { + if arr.len == 0 { + return 0 + } + mut count := 0 + for v in arr { + if v == val { + count++ + } + } + return count +} + +// Measure of Central Tendancy +// Mean of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += v + } + return sum / f64(arr.len) +} + +// Measure of Central Tendancy +// Geometric Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/geometric-mean.html +pub fn geometric_mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(1) + for v in arr { + sum *= v + } + return math.pow(sum, f64(1) / arr.len) +} + +// Measure of Central Tendancy +// Harmonic Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/harmonic-mean.html +pub fn harmonic_mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += f64(1) / v + } + return f64(arr.len) / sum +} + +// Measure of Central Tendancy +// Median of the given input array ( input array is assumed to be sorted ) +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn median(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + if arr.len % 2 == 0 { + mid := (arr.len / 2) - 1 + return (arr[mid] + arr[mid + 1]) / f64(2) + } else { + return arr[((arr.len - 1) / 2)] + } +} + +// Measure of Central Tendancy +// Mode of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mode(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut freqs := []int{} + for v in arr { + freqs << freq(arr, v) + } + mut max := 0 + for i in 0 .. freqs.len { + if freqs[i] > freqs[max] { + max = i + } + } + return arr[max] +} + +// Root Mean Square of the given input array +// Based on +// https://en.wikipedia.org/wiki/Root_mean_square +pub fn rms(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += math.pow(v, 2) + } + return math.sqrt(sum / f64(arr.len)) +} + +// Measure of Dispersion / Spread +// Population Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_variance(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + m := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.pow(v - m, 2) + } + return sum / f64(arr.len) +} + +// Measure of Dispersion / Spread +// Sample Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_variance(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + m := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.pow(v - m, 2) + } + return sum / f64(arr.len - 1) +} + +// Measure of Dispersion / Spread +// Population Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_stddev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return math.sqrt(population_variance(arr)) +} + +// Measure of Dispersion / Spread +// Sample Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_stddev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return math.sqrt(sample_variance(arr)) +} + +// Measure of Dispersion / Spread +// Mean Absolute Deviation of the given input array +// Based on +// https://en.wikipedia.org/wiki/Average_absolute_deviation +pub fn mean_absdev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + amean := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.abs(v - amean) + } + return sum / f64(arr.len) +} + +// Minimum of the given input array +pub fn min(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut min := arr[0] + for v in arr { + if v < min { + min = v + } + } + return min +} + +// Maximum of the given input array +pub fn max(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut max := arr[0] + for v in arr { + if v > max { + max = v + } + } + return max +} + +// Measure of Dispersion / Spread +// Range ( Maximum - Minimum ) of the given input array +// Based on +// https://www.mathsisfun.com/data/range.html +pub fn range(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return max(arr) - min(arr) +} diff --git a/v_windows/v/vlib/math/stats/stats_test.v b/v_windows/v/vlib/math/stats/stats_test.v new file mode 100644 index 0000000..c18daff --- /dev/null +++ b/v_windows/v/vlib/math/stats/stats_test.v @@ -0,0 +1,269 @@ +import math.stats +import math + +fn test_freq() { + // Tests were also verified on Wolfram Alpha + data := [f64(10.0), f64(10.0), f64(5.9), f64(2.7)] + mut o := stats.freq(data, 10.0) + assert o == 2 + o = stats.freq(data, 2.7) + assert o == 1 + o = stats.freq(data, 15) + assert o == 0 +} + +fn tst_res(str1 string, str2 string) bool { + if (math.abs(str1.f64() - str2.f64())) < 1e-5 { + return true + } + return false +} + +fn test_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '5.762500') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '17.650000') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '37.708000') +} + +fn test_geometric_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.geometric_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '5.15993') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.geometric_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0) + || o.str() == '-nan(ind)' // Because in math it yields a complex number + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.geometric_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '25.064496') +} + +fn test_harmonic_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '4.626519') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '9.134577') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '16.555477') +} + +fn test_median() { + // Tests were also verified on Wolfram Alpha + // Assumes sorted array + + // Even + mut data := [f64(2.7), f64(4.45), f64(5.9), f64(10.0)] + mut o := stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '5.175000') + data = [f64(-3.0), f64(1.89), f64(4.4), f64(67.31)] + o = stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '3.145000') + data = [f64(7.88), f64(12.0), f64(54.83), f64(76.122)] + o = stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '33.415000') + + // Odd + data = [f64(2.7), f64(4.45), f64(5.9), f64(10.0), f64(22)] + o = stats.median(data) + assert o == f64(5.9) + data = [f64(-3.0), f64(1.89), f64(4.4), f64(9), f64(67.31)] + o = stats.median(data) + assert o == f64(4.4) + data = [f64(7.88), f64(3.3), f64(12.0), f64(54.83), f64(76.122)] + o = stats.median(data) + assert o == f64(12.0) +} + +fn test_mode() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(2.7), f64(2.7), f64(4.45), f64(5.9), f64(10.0)] + mut o := stats.mode(data) + assert o == f64(2.7) + data = [f64(-3.0), f64(1.89), f64(1.89), f64(1.89), f64(9), f64(4.4), f64(4.4), f64(9), + f64(67.31), + ] + o = stats.mode(data) + assert o == f64(1.89) + // Testing greedy nature + data = [f64(2.0), f64(4.0), f64(2.0), f64(4.0)] + o = stats.mode(data) + assert o == f64(2.0) +} + +fn test_rms() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '6.362046') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '33.773393') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '47.452561') +} + +fn test_population_variance() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '7.269219') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '829.119550') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '829.852282') +} + +fn test_sample_variance() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '9.692292') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '1105.492733') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '1106.469709') +} + +fn test_population_stddev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '2.696149') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '28.794436') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '28.807157') +} + +fn test_sample_stddev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '3.113245') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '33.248951') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '33.263639') +} + +fn test_mean_absdev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '2.187500') + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '24.830000') + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert tst_res(o.str(), '27.768000') +} + +fn test_min() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.min(data) + assert o == f64(2.7) + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.min(data) + assert o == f64(-3.0) + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.min(data) + assert o == f64(7.88) +} + +fn test_max() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.max(data) + assert o == f64(10.0) + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.max(data) + assert o == f64(67.31) + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.max(data) + assert o == f64(76.122) +} + +fn test_range() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut o := stats.range(data) + assert o == f64(7.3) + data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + o = stats.range(data) + assert o == f64(70.31) + data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + o = stats.range(data) + assert o == f64(68.242) +} + +fn test_passing_empty() { + data := []f64{} + assert stats.freq(data, 0) == 0 + assert stats.mean(data) == f64(0) + assert stats.geometric_mean(data) == f64(0) + assert stats.harmonic_mean(data) == f64(0) + assert stats.median(data) == f64(0) + assert stats.mode(data) == f64(0) + assert stats.rms(data) == f64(0) + assert stats.population_variance(data) == f64(0) + assert stats.sample_variance(data) == f64(0) + assert stats.population_stddev(data) == f64(0) + assert stats.sample_stddev(data) == f64(0) + assert stats.mean_absdev(data) == f64(0) + assert stats.min(data) == f64(0) + assert stats.max(data) == f64(0) + assert stats.range(data) == f64(0) +} |