@@ -421,36 +421,101 @@ realXcY(x::Real, y::Real) = x*y
421
421
realXcY (x:: Complex , y:: Complex ) = real (x)* real (y) + imag (x)* imag (y)
422
422
423
423
"""
424
- sem(x)
424
+ sem(x; mean=nothing)
425
+ sem(x::AbstractArray[, weights::AbstractWeights]; mean=nothing)
425
426
426
- Return the standard error of the mean of collection `x`,
427
- i.e. `sqrt(var(x, corrected=true) / length(x))`.
427
+ Return the standard error of the mean for a collection `x`.
428
+ A pre-computed `mean` may be provided.
429
+
430
+ When not using weights, this is the (sample) standard deviation
431
+ divided by the sample size. If weights are used, the
432
+ variance of the sample mean is calculated as follows:
433
+
434
+ * `AnalyticWeights`: Not implemented.
435
+ * `FrequencyWeights`: ``\\ frac{\\ sum_{i=1}^n w_i (x_i - \\ bar{x_i})^2}{(\\ sum w_i) (\\ sum w_i - 1)}``
436
+ * `ProbabilityWeights`: ``\\ frac{n}{n-1} \\ frac{\\ sum_{i=1}^n w_i^2 (x_i - \\ bar{x_i})^2}{\\ left( \\ sum w_i \\ right)^2}``
437
+
438
+ The standard error is then the square root of the above quantities.
439
+
440
+ # References
441
+
442
+ Carl-Erik Särndal, Bengt Swensson, Jan Wretman (1992). Model Assisted Survey Sampling.
443
+ New York: Springer. pp. 51-53.
428
444
"""
429
- function sem (x)
430
- y = iterate (x)
431
- if y === nothing
445
+ function sem (x; mean = nothing )
446
+ if isempty (x)
447
+ # Return the NaN of the type that we would get for a nonempty x
432
448
T = eltype (x)
433
- # Return the NaN of the type that we would get, had this collection
434
- # contained any elements (this is consistent with std)
435
- return oftype (sqrt ((abs2 (zero (T)) + abs2 (zero (T)))/ 2 ), NaN )
436
- end
437
- count = 1
438
- value, state = y
439
- y = iterate (x, state)
440
- # Use Welford algorithm as seen in (among other places)
441
- # Knuth's TAOCP, Vol 2, page 232, 3rd edition.
442
- M = value / 1
443
- S = real (zero (M))
444
- while y != = nothing
449
+ _mean = mean === nothing ? zero (T) / 1 : mean
450
+ z = abs2 (zero (T) - _mean)
451
+ return oftype ((z + z) / 2 , NaN )
452
+ elseif mean === nothing
453
+ n = 0
454
+ y = iterate (x)
455
+ value, state = y
456
+ # Use Welford algorithm as seen in (among other places)
457
+ # Knuth's TAOCP, Vol 2, page 232, 3rd edition.
458
+ _mean = value / 1
459
+ sse = real (zero (_mean))
460
+ while y != = nothing
461
+ value, state = y
462
+ y = iterate (x, state)
463
+ n += 1
464
+ new_mean = _mean + (value - _mean) / n
465
+ sse += realXcY (value - _mean, value - new_mean)
466
+ _mean = new_mean
467
+ end
468
+ else
469
+ n = 1
470
+ y = iterate (x)
445
471
value, state = y
446
- y = iterate (x, state)
447
- count += 1
448
- new_M = M + (value - M) / count
449
- S = S + realXcY (value - M, value - new_M)
450
- M = new_M
472
+ sse = abs2 (value - mean)
473
+ while (y = iterate (x, state)) != = nothing
474
+ value, state = y
475
+ n += 1
476
+ sse += abs2 (value - mean)
477
+ end
478
+ end
479
+ variance = sse / (n - 1 )
480
+ return sqrt (variance / n)
481
+ end
482
+
483
+ function sem (x:: AbstractArray ; mean= nothing )
484
+ if isempty (x)
485
+ # Return the NaN of the type that we would get for a nonempty x
486
+ T = eltype (x)
487
+ _mean = mean === nothing ? zero (T) / 1 : mean
488
+ z = abs2 (zero (T) - _mean)
489
+ return oftype ((z + z) / 2 , NaN )
490
+ end
491
+ return sqrt (var (x; mean= mean, corrected= true ) / length (x))
492
+ end
493
+
494
+ function sem (x:: AbstractArray , weights:: UnitWeights ; mean= nothing )
495
+ if length (x) ≠ length (weights)
496
+ throw (DimensionMismatch (" array and weights do not have the same length" ))
497
+ end
498
+ return sem (x; mean= mean)
499
+ end
500
+
501
+
502
+ # Weighted methods for the above
503
+ sem (x:: AbstractArray , weights:: FrequencyWeights ; mean= nothing ) =
504
+ sqrt (var (x, weights; mean= mean, corrected= true ) / sum (weights))
505
+
506
+ function sem (x:: AbstractArray , weights:: ProbabilityWeights ; mean= nothing )
507
+ if isempty (x)
508
+ # Return the NaN of the type that we would get for a nonempty x
509
+ return var (x, weights; mean= mean, corrected= true ) / 0
510
+ else
511
+ _mean = mean === nothing ? Statistics. mean (x, weights) : mean
512
+ # sum of squared errors = sse
513
+ sse = sum (Broadcast. instantiate (Broadcast. broadcasted (x, weights) do x_i, w
514
+ return abs2 (w * (x_i - _mean))
515
+ end ))
516
+ n = count (! iszero, weights)
517
+ return sqrt (sse * n / (n - 1 )) / sum (weights)
451
518
end
452
- var = S / (count - 1 )
453
- return sqrt (var/ count)
454
519
end
455
520
456
521
# Median absolute deviation
0 commit comments