diff options
Diffstat (limited to 'v_windows/v/vlib/math/stats')
| -rw-r--r-- | v_windows/v/vlib/math/stats/stats.v | 249 | ||||
| -rw-r--r-- | v_windows/v/vlib/math/stats/stats_test.v | 269 | 
2 files changed, 518 insertions, 0 deletions
diff --git a/v_windows/v/vlib/math/stats/stats.v b/v_windows/v/vlib/math/stats/stats.v new file mode 100644 index 0000000..d7317bf --- /dev/null +++ b/v_windows/v/vlib/math/stats/stats.v @@ -0,0 +1,249 @@ +module stats + +import math + +// TODO: Implement all of them with generics + +// This module defines the following statistical operations on f64 array +//  --------------------------- +// |   Summary of Functions    | +//  --------------------------- +// ----------------------------------------------------------------------- +// freq - Frequency +// mean - Mean +// geometric_mean - Geometric Mean +// harmonic_mean - Harmonic Mean +// median - Median +// mode - Mode +// rms - Root Mean Square +// population_variance - Population Variance +// sample_variance - Sample Variance +// population_stddev - Population Standard Deviation +// sample_stddev - Sample Standard Deviation +// mean_absdev - Mean Absolute Deviation +// min - Minimum of the Array +// max - Maximum of the Array +// range - Range of the Array ( max - min ) +// ----------------------------------------------------------------------- + +// Measure of Occurance +// Frequency of a given number +// Based on +// https://www.mathsisfun.com/data/frequency-distribution.html +pub fn freq(arr []f64, val f64) int { +	if arr.len == 0 { +		return 0 +	} +	mut count := 0 +	for v in arr { +		if v == val { +			count++ +		} +	} +	return count +} + +// Measure of Central Tendancy +// Mean of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mean(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut sum := f64(0) +	for v in arr { +		sum += v +	} +	return sum / f64(arr.len) +} + +// Measure of Central Tendancy +// Geometric Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/geometric-mean.html +pub fn geometric_mean(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut sum := f64(1) +	for v in arr { +		sum *= v +	} +	return math.pow(sum, f64(1) / arr.len) +} + +// Measure of Central Tendancy +// Harmonic Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/harmonic-mean.html +pub fn harmonic_mean(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut sum := f64(0) +	for v in arr { +		sum += f64(1) / v +	} +	return f64(arr.len) / sum +} + +// Measure of Central Tendancy +// Median of the given input array ( input array is assumed to be sorted ) +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn median(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	if arr.len % 2 == 0 { +		mid := (arr.len / 2) - 1 +		return (arr[mid] + arr[mid + 1]) / f64(2) +	} else { +		return arr[((arr.len - 1) / 2)] +	} +} + +// Measure of Central Tendancy +// Mode of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mode(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut freqs := []int{} +	for v in arr { +		freqs << freq(arr, v) +	} +	mut max := 0 +	for i in 0 .. freqs.len { +		if freqs[i] > freqs[max] { +			max = i +		} +	} +	return arr[max] +} + +// Root Mean Square of the given input array +// Based on +// https://en.wikipedia.org/wiki/Root_mean_square +pub fn rms(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut sum := f64(0) +	for v in arr { +		sum += math.pow(v, 2) +	} +	return math.sqrt(sum / f64(arr.len)) +} + +// Measure of Dispersion / Spread +// Population Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_variance(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	m := mean(arr) +	mut sum := f64(0) +	for v in arr { +		sum += math.pow(v - m, 2) +	} +	return sum / f64(arr.len) +} + +// Measure of Dispersion / Spread +// Sample Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_variance(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	m := mean(arr) +	mut sum := f64(0) +	for v in arr { +		sum += math.pow(v - m, 2) +	} +	return sum / f64(arr.len - 1) +} + +// Measure of Dispersion / Spread +// Population Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_stddev(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	return math.sqrt(population_variance(arr)) +} + +// Measure of Dispersion / Spread +// Sample Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_stddev(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	return math.sqrt(sample_variance(arr)) +} + +// Measure of Dispersion / Spread +// Mean Absolute Deviation of the given input array +// Based on +// https://en.wikipedia.org/wiki/Average_absolute_deviation +pub fn mean_absdev(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	amean := mean(arr) +	mut sum := f64(0) +	for v in arr { +		sum += math.abs(v - amean) +	} +	return sum / f64(arr.len) +} + +// Minimum of the given input array +pub fn min(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut min := arr[0] +	for v in arr { +		if v < min { +			min = v +		} +	} +	return min +} + +// Maximum of the given input array +pub fn max(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	mut max := arr[0] +	for v in arr { +		if v > max { +			max = v +		} +	} +	return max +} + +// Measure of Dispersion / Spread +// Range ( Maximum - Minimum ) of the given input array +// Based on +// https://www.mathsisfun.com/data/range.html +pub fn range(arr []f64) f64 { +	if arr.len == 0 { +		return f64(0) +	} +	return max(arr) - min(arr) +} diff --git a/v_windows/v/vlib/math/stats/stats_test.v b/v_windows/v/vlib/math/stats/stats_test.v new file mode 100644 index 0000000..c18daff --- /dev/null +++ b/v_windows/v/vlib/math/stats/stats_test.v @@ -0,0 +1,269 @@ +import math.stats +import math + +fn test_freq() { +	// Tests were also verified on Wolfram Alpha +	data := [f64(10.0), f64(10.0), f64(5.9), f64(2.7)] +	mut o := stats.freq(data, 10.0) +	assert o == 2 +	o = stats.freq(data, 2.7) +	assert o == 1 +	o = stats.freq(data, 15) +	assert o == 0 +} + +fn tst_res(str1 string, str2 string) bool { +	if (math.abs(str1.f64() - str2.f64())) < 1e-5 { +		return true +	} +	return false +} + +fn test_mean() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '5.762500') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '17.650000') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '37.708000') +} + +fn test_geometric_mean() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.geometric_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '5.15993') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.geometric_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0) +		|| o.str() == '-nan(ind)' // Because in math it yields a complex number +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.geometric_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '25.064496') +} + +fn test_harmonic_mean() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.harmonic_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '4.626519') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.harmonic_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '9.134577') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.harmonic_mean(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '16.555477') +} + +fn test_median() { +	// Tests were also verified on Wolfram Alpha +	// Assumes sorted array + +	// Even +	mut data := [f64(2.7), f64(4.45), f64(5.9), f64(10.0)] +	mut o := stats.median(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '5.175000') +	data = [f64(-3.0), f64(1.89), f64(4.4), f64(67.31)] +	o = stats.median(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '3.145000') +	data = [f64(7.88), f64(12.0), f64(54.83), f64(76.122)] +	o = stats.median(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '33.415000') + +	// Odd +	data = [f64(2.7), f64(4.45), f64(5.9), f64(10.0), f64(22)] +	o = stats.median(data) +	assert o == f64(5.9) +	data = [f64(-3.0), f64(1.89), f64(4.4), f64(9), f64(67.31)] +	o = stats.median(data) +	assert o == f64(4.4) +	data = [f64(7.88), f64(3.3), f64(12.0), f64(54.83), f64(76.122)] +	o = stats.median(data) +	assert o == f64(12.0) +} + +fn test_mode() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(2.7), f64(2.7), f64(4.45), f64(5.9), f64(10.0)] +	mut o := stats.mode(data) +	assert o == f64(2.7) +	data = [f64(-3.0), f64(1.89), f64(1.89), f64(1.89), f64(9), f64(4.4), f64(4.4), f64(9), +		f64(67.31), +	] +	o = stats.mode(data) +	assert o == f64(1.89) +	// Testing greedy nature +	data = [f64(2.0), f64(4.0), f64(2.0), f64(4.0)] +	o = stats.mode(data) +	assert o == f64(2.0) +} + +fn test_rms() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.rms(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '6.362046') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.rms(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '33.773393') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.rms(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '47.452561') +} + +fn test_population_variance() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.population_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '7.269219') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.population_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '829.119550') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.population_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '829.852282') +} + +fn test_sample_variance() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.sample_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '9.692292') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.sample_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '1105.492733') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.sample_variance(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '1106.469709') +} + +fn test_population_stddev() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.population_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '2.696149') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.population_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '28.794436') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.population_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '28.807157') +} + +fn test_sample_stddev() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.sample_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '3.113245') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.sample_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '33.248951') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.sample_stddev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '33.263639') +} + +fn test_mean_absdev() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.mean_absdev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '2.187500') +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.mean_absdev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '24.830000') +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.mean_absdev(data) +	// Some issue with precision comparison in f64 using == operator hence serializing to string +	assert tst_res(o.str(), '27.768000') +} + +fn test_min() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.min(data) +	assert o == f64(2.7) +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.min(data) +	assert o == f64(-3.0) +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.min(data) +	assert o == f64(7.88) +} + +fn test_max() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.max(data) +	assert o == f64(10.0) +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.max(data) +	assert o == f64(67.31) +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.max(data) +	assert o == f64(76.122) +} + +fn test_range() { +	// Tests were also verified on Wolfram Alpha +	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] +	mut o := stats.range(data) +	assert o == f64(7.3) +	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] +	o = stats.range(data) +	assert o == f64(70.31) +	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] +	o = stats.range(data) +	assert o == f64(68.242) +} + +fn test_passing_empty() { +	data := []f64{} +	assert stats.freq(data, 0) == 0 +	assert stats.mean(data) == f64(0) +	assert stats.geometric_mean(data) == f64(0) +	assert stats.harmonic_mean(data) == f64(0) +	assert stats.median(data) == f64(0) +	assert stats.mode(data) == f64(0) +	assert stats.rms(data) == f64(0) +	assert stats.population_variance(data) == f64(0) +	assert stats.sample_variance(data) == f64(0) +	assert stats.population_stddev(data) == f64(0) +	assert stats.sample_stddev(data) == f64(0) +	assert stats.mean_absdev(data) == f64(0) +	assert stats.min(data) == f64(0) +	assert stats.max(data) == f64(0) +	assert stats.range(data) == f64(0) +}  | 
