-
Notifications
You must be signed in to change notification settings - Fork 11
/
stats.js
210 lines (182 loc) · 5.04 KB
/
stats.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"use strict";
module.exports.numbers = numbers
module.exports.sum = sum
module.exports.mean = mean
module.exports.median = median
module.exports.mode = mode
module.exports.variance = populationVariance
module.exports.sampleVariance = sampleVariance
module.exports.populationVariance = populationVariance
module.exports.stdev = populationStdev
module.exports.sampleStdev = sampleStdev
module.exports.populationStdev = populationStdev
module.exports.percentile = percentile
module.exports.histogram = histogram
var isNumber = require("isnumber")
function numbers(vals) {
var nums = []
if (vals == null)
return nums
for (var i = 0; i < vals.length; i++) {
if (isNumber(vals[i]))
nums.push(+vals[i])
}
return nums
}
function nsort(vals) {
return vals.sort(function numericSort(a, b) { return a - b })
}
function sum(vals) {
vals = numbers(vals)
var total = 0
for (var i = 0; i < vals.length; i++) {
total += vals[i]
}
return total
}
function mean(vals) {
vals = numbers(vals)
if (vals.length === 0) return NaN
return (sum(vals) / vals.length)
}
function median(vals) {
vals = numbers(vals)
if (vals.length === 0) return NaN
var half = (vals.length / 2) | 0
vals = nsort(vals)
if (vals.length % 2) {
// Odd length, true middle element
return vals[half]
}
else {
// Even length, average middle two elements
return (vals[half-1] + vals[half]) / 2.0
}
}
// Returns the mode of a unimodal dataset
// If the dataset is multi-modal, returns a Set containing the modes
function mode(vals) {
vals = numbers(vals)
if (vals.length === 0) return NaN
var mode = NaN
var dist = {}
for (var i = 0; i < vals.length; i++) {
var value = vals[i]
var me = dist[value] || 0
me++
dist[value] = me
}
var rank = numbers(Object.keys(dist).sort(function sortMembers(a, b) { return dist[b] - dist[a] }))
mode = rank[0]
if (dist[rank[1]] == dist[mode]) {
// multi-modal
if (rank.length == vals.length) {
// all values are modes
return vals
}
var modes = new Set([mode])
var modeCount = dist[mode]
for (var i = 1; i < rank.length; i++) {
if (dist[rank[i]] == modeCount) {
modes.add(rank[i])
}
else {
break
}
}
return modes
}
return mode
}
// This helper finds the mean of all the values, then squares the difference
// from the mean for each value and returns the resulting array. This is the
// core of the varience functions - the difference being dividing by N or N-1.
function valuesMinusMeanSquared(vals) {
vals = numbers(vals)
var avg = mean(vals)
var diffs = []
for (var i = 0; i < vals.length; i++) {
diffs.push(Math.pow((vals[i] - avg), 2))
}
return diffs
}
// Population Variance = average squared deviation from mean
function populationVariance(vals) {
return mean(valuesMinusMeanSquared(vals))
}
// Sample Variance
function sampleVariance(vals) {
var diffs = valuesMinusMeanSquared(vals)
if (diffs.length <= 1) return NaN
return sum(diffs) / (diffs.length - 1)
}
// Population Standard Deviation = sqrt of population variance
function populationStdev(vals) {
return Math.sqrt(populationVariance(vals))
}
// Sample Standard Deviation = sqrt of sample variance
function sampleStdev(vals) {
return Math.sqrt(sampleVariance(vals))
}
function percentile(vals, ptile) {
vals = numbers(vals)
if (vals.length === 0 || ptile == null || ptile < 0) return NaN
// Fudge anything over 100 to 1.0
if (ptile > 1) ptile = 1
vals = nsort(vals)
var i = (vals.length * ptile) - 0.5
if ((i | 0) === i) return vals[i]
// interpolated percentile -- using Estimation method
var int_part = i | 0
var fract = i - int_part
return (1 - fract) * vals[int_part] + fract * vals[Math.min(int_part + 1, vals.length - 1)]
}
function histogram (vals, bins) {
if (vals == null) {
return null
}
vals = nsort(numbers(vals))
if (vals.length === 0) {
return null
}
if (bins == null) {
// pick bins by simple method: Math.sqrt(n)
bins = Math.sqrt(vals.length)
}
bins = Math.round(bins)
if (bins < 1) {
bins = 1
}
var min = vals[0]
var max = vals[vals.length - 1]
if (min === max) {
// fudge for non-variant data
min = min - 0.5
max = max + 0.5
}
var range = (max - min)
// make the bins slightly larger by expanding the range about 10%
// this helps with dumb floating point stuff
var binWidth = (range + (range * 0.05)) / bins
var midpoint = (min + max) / 2
// even bin count, midpoint makes an edge
var leftEdge = midpoint - (binWidth * Math.floor(bins / 2))
if (bins % 2 !== 0) {
// odd bin count, center middle bin on midpoint
var leftEdge = (midpoint - (binWidth / 2)) - (binWidth * Math.floor(bins / 2))
}
var hist = {
values: Array(bins).fill(0),
bins: bins,
binWidth: binWidth,
binLimits: [leftEdge, leftEdge + (binWidth * bins)]
}
var binIndex = 0
for (var i = 0; i < vals.length; i++) {
while (vals[i] > (((binIndex + 1) * binWidth) + leftEdge)) {
binIndex++
}
hist.values[binIndex]++
}
return hist
}