File tree 15 files changed +40
-40
lines changed 15 files changed +40
-40
lines changed Original file line number Diff line number Diff line change 1
- (defproject sample " 2.1.0"
1
+ (defproject sampling " 2.1.0"
2
2
:description " Random Sampling in Clojure"
3
- :url " https://github.com/bigmlcom/sample "
3
+ :url " https://github.com/bigmlcom/sampling "
4
4
:license {:name " Apache License, Version 2.0"
5
5
:url " http://www.apache.org/licenses/LICENSE-2.0" }
6
6
:dependencies [[org.clojure/clojure " 1.4.0" ]
7
7
[incanter/parallelcolt " 0.9.4" ]
8
8
[org.clojure/data.finger-tree " 0.0.1" ]]
9
- :aot [bigml.sample .reservoir.mergeable])
9
+ :aot [bigml.sampling .reservoir.mergeable])
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .occurrence
5
+ (ns bigml.sampling .occurrence
6
6
" Provides functions for computing the number of occurrences to be
7
7
expected for an item in a population when sampled with
8
8
replacement."
9
9
(:import (cern.jet.math.tdouble DoubleArithmetic))
10
- (:require (bigml.sample [random :as random])))
10
+ (:require (bigml.sampling [random :as random])))
11
11
12
12
(def default-probability-cutoff
13
13
" The cumulative-probabilities fn will stop calculating occurrence
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .random
5
+ (ns bigml.sampling .random
6
6
" Functions for creating and using a random number generator."
7
7
(:import (cern.jet.random.tdouble.engine MersenneTwister64)
8
8
(java.util Random)))
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .reservoir
5
+ (ns bigml.sampling .reservoir
6
6
" Provides random sampling using reservoirs. This is useful when the
7
7
original population can't be kept in memory but the sample set
8
8
can."
9
- (:require (bigml.sample .reservoir [efraimidis :as efraimidis]
10
- [insertion :as insertion]))
11
- (:import (bigml.sample .reservoir.mergeable MergeableReservoir))
9
+ (:require (bigml.sampling .reservoir [efraimidis :as efraimidis]
10
+ [insertion :as insertion]))
11
+ (:import (bigml.sampling .reservoir.mergeable MergeableReservoir))
12
12
(:refer-clojure :exclude [merge]))
13
13
14
14
(def ^:private implementations
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .reservoir.efraimidis
5
+ (ns bigml.sampling .reservoir.efraimidis
6
6
" Provides weighted random sampling using reservoirs as described by
7
7
Efraimidis and Spirakis.
8
8
http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf"
9
- (:require (bigml.sample [random :as random]
10
- [util :as util])
9
+ (:require (bigml.sampling [random :as random]
10
+ [util :as util])
11
11
(clojure.data [finger-tree :as tree]))
12
- (:import (bigml.sample .reservoir.mergeable MergeableReservoir)))
12
+ (:import (bigml.sampling .reservoir.mergeable MergeableReservoir)))
13
13
14
14
(def ^:private compare-k
15
15
#(compare (:k %1 ) (:k %2 )))
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .reservoir.insertion
5
+ (ns bigml.sampling .reservoir.insertion
6
6
" Provides random sampling using reservoirs. Uses an insertion
7
7
method that might originally be from Chao's 'A general purpose
8
8
unequal probability sampling plan'. It's behind a paywall,
9
9
however, so that remains a mystery to me."
10
- (:require (bigml.sample [simple :as simple]
11
- [random :as random]
12
- [occurrence :as occurrence]))
13
- (:import (bigml.sample .reservoir.mergeable MergeableReservoir)))
10
+ (:require (bigml.sampling [simple :as simple]
11
+ [random :as random]
12
+ [occurrence :as occurrence]))
13
+ (:import (bigml.sampling .reservoir.mergeable MergeableReservoir)))
14
14
15
15
(defmulti ^:private insert
16
16
(fn [reservoir _]
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .reservoir.mergeable
5
+ (ns bigml.sampling .reservoir.mergeable
6
6
" Provides the definition for mergeable reservoirs." )
7
7
8
8
(defprotocol MergeableReservoir
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .simple
5
+ (ns bigml.sampling .simple
6
6
" Provides simple random sampling. The original population is kept in
7
7
memory but the resulting sample set is produced as a lazy
8
8
sequence."
9
- (:require (bigml.sample [random :as random]
10
- [util :as util])))
9
+ (:require (bigml.sampling [random :as random]
10
+ [util :as util])))
11
11
12
12
(defn- with-replacement [coll rnd]
13
13
(when-not (empty? coll)
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .stream
5
+ (ns bigml.sampling .stream
6
6
" Provides streaming sampling. Neither the input population or the
7
7
resulting sample are kept in memory. The order of the sample is
8
8
not randomized, but will be in the order of the input population."
9
- (:require (bigml.sample [random :as random]
10
- [occurrence :as occurrence])))
9
+ (:require (bigml.sampling [random :as random]
10
+ [occurrence :as occurrence])))
11
11
12
12
(defn- rate-distribution [sample-size pop-size]
13
13
(apply sorted-map
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .util
5
+ (ns bigml.sampling .util
6
6
" Provides utility functions." )
7
7
8
8
(defn validated-weigh
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .test.occurrence
5
+ (ns bigml.sampling .test.occurrence
6
6
(:use clojure.test)
7
- (:require (bigml.sample [occurrence :as occurrence])))
7
+ (:require (bigml.sampling [occurrence :as occurrence])))
8
8
9
9
(def big-result
10
10
1498231660179642550080525374062985229379154060073454416056804436265250417504978421344703666672011193783194306251922106632531575096104465752579970958417306283423558722428981480592122380206679550814874547016793880384420005011964284022150602938812288536154567998961655336231440060094535026560416077739589623596000N )
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .test.reservoir
5
+ (ns bigml.sampling .test.reservoir
6
6
(:use clojure.test
7
- bigml.sample .test.util)
8
- (:require (bigml.sample [reservoir :as reservoir])))
7
+ bigml.sampling .test.util)
8
+ (:require (bigml.sampling [reservoir :as reservoir])))
9
9
10
10
(deftest sample
11
11
(is (about-eq (reduce + (reservoir/sample (range 1000 ) 500 ))
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .test.simple
5
+ (ns bigml.sampling .test.simple
6
6
(:use clojure.test
7
- bigml.sample .test.util)
8
- (:require (bigml.sample [simple :as simple]
9
- [random :as random])))
7
+ bigml.sampling .test.util)
8
+ (:require (bigml.sampling [simple :as simple]
9
+ [random :as random])))
10
10
11
11
(deftest sample
12
12
(is (about-eq (reduce + (take 500 (simple/sample (range 1000 ))))
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .test.stream
5
+ (ns bigml.sampling .test.stream
6
6
(:use clojure.test
7
- bigml.sample .test.util)
8
- (:require (bigml.sample [stream :as stream])))
7
+ bigml.sampling .test.util)
8
+ (:require (bigml.sampling [stream :as stream])))
9
9
10
10
(deftest sample
11
11
(is (about-eq (reduce + (stream/sample (range 1000 ) 500 1000 ))
Original file line number Diff line number Diff line change 2
2
; ; Licensed under the Apache License, Version 2.0
3
3
; ; http://www.apache.org/licenses/LICENSE-2.0
4
4
5
- (ns bigml.sample .test.util )
5
+ (ns bigml.sampling .test.util )
6
6
7
7
(defn about-eq
8
8
" Returns true if the absolute value of the difference
You can’t perform that action at this time.
0 commit comments