Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build and tests upgrade #18

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ lib_managed/
src_managed/
project/boot/
project/plugins/project/
.bsp/
project/.boot/
project/.ivy/
project/.sbtboot/
.idea/

# Scala-IDE specific
.scala_dependencies
Expand Down
8 changes: 4 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ limitations under the License.
// publish isarn-sketches-java for exactly one scala version:
// sbt isarn_sketches_java/publish

scalaVersion := "2.12.8"
scalaVersion := "2.12.14"

crossScalaVersions := Seq("2.11.12", "2.12.8")
crossScalaVersions := Seq("2.11.12", "2.12.14")

// these do not "inherit" when defined at top level, so
// define them here for inclusion in each subproject.
Expand Down Expand Up @@ -95,7 +95,7 @@ previewFixedPort := Some(4444)

lazy val isarn_sketches_java = (project in file("isarn-sketches-java"))
.settings(name := "isarn-sketches-java")
.enablePlugins(GenJavadocPlugin, PublishJavadocPlugin)
//.enablePlugins(GenJavadocPlugin, PublishJavadocPlugin)
.settings(siteSubProjectSettings :_*)
.settings(
crossPaths := false, // drop off Scala suffix from artifact names
Expand All @@ -114,7 +114,7 @@ lazy val isarn_sketches = (project in file("."))
"org.isarnproject" %% "isarn-algebra-api" % "0.0.3",
"org.isarnproject" %% "isarn-collections" % "0.0.4",
"org.isarnproject" %% "isarn-scalatest" % "0.0.3" % Test,
"org.scalatest" %% "scalatest" % "3.0.5" % Test,
"org.scalatest" %% "scalatest" % "3.2.5" % Test,
"org.apache.commons" % "commons-math3" % "3.6.1" % Test)
)
.settings(publishSettings :_*)
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.3.12
sbt.version=1.5.4
15 changes: 7 additions & 8 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
resolvers += Resolver.url(
"bintray-sbt-plugin-releases",
url("http://dl.bintray.com/content/sbt/sbt-plugin-releases"))(
Resolver.ivyStylePatterns)

resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"

resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven"
resolvers ++= Seq(
"jgit-repo".at("https://download.eclipse.org/jgit/maven"),
//"sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/",
//Resolver.url("bintray-sbt-plugin-releases", url("https://dl.bintray.com/content/sbt/sbt-plugin-releases"))(
// Resolver.ivyStylePatterns
//)
)

addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3")

Expand Down
156 changes: 81 additions & 75 deletions src/test/scala/org/isarnproject/sketches/TDigestTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ limitations under the License.

package org.isarnproject.sketches

import org.scalatest._

import org.isarnproject.scalatest.matchers.seq._
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AsyncWordSpec


class TDigestTest extends AsyncWordSpec with Matchers {

class TDigestTest extends FlatSpec with Matchers {
import org.apache.commons.math3.distribution.RealDistribution
import org.apache.commons.math3.distribution.IntegerDistribution


val seed = 235711L
scala.util.Random.setSeed(seed)

Expand All @@ -41,7 +44,7 @@ class TDigestTest extends FlatSpec with Matchers {
.map(x => math.abs(td.cdf(x) - dist.cumulativeProbability(x))).max

val dInv = (0.01 to 0.99 by 0.01).iterator
.map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv
.map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv

val pass = d <= maxD && dInv <= maxDI
if (!pass) Console.err.println(s"testTDvsDist failure: d= $d dInv= $dInv")
Expand All @@ -59,7 +62,7 @@ class TDigestTest extends FlatSpec with Matchers {
}

def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = {
td.nclusters should be <=(td.maxDiscrete)
td.nclusters should be <= (td.maxDiscrete)
val tdSamples = Array.fill(10000) { td.samplePMF }
val distSamples = Array.fill(10000) { dist.sample.toDouble }
val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest()
Expand Down Expand Up @@ -103,91 +106,94 @@ class TDigestTest extends FlatSpec with Matchers {
testMonotoneCDF(dist) && testMonotoneCDFI(dist)
}

it should "sketch a uniform distribution" in {
import org.apache.commons.math3.distribution.UniformRealDistribution
val dist = new UniformRealDistribution()
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true)
}
it should {

it should "sketch a normal distribution" in {
import org.apache.commons.math3.distribution.NormalDistribution
val dist = new NormalDistribution()
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true)
}
"sketch a uniform distribution" in {
import org.apache.commons.math3.distribution.UniformRealDistribution
val dist = new UniformRealDistribution()
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true)
}

it should "sketch an exponential distribution" in {
import org.apache.commons.math3.distribution.ExponentialDistribution
val dist = new ExponentialDistribution(1.0)
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true)
}
"sketch a normal distribution" in {
import org.apache.commons.math3.distribution.NormalDistribution
val dist = new NormalDistribution()
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true)
}

it should "aggregate with another t-digest using ++" in {
import org.apache.commons.math3.distribution.NormalDistribution
val dist = new NormalDistribution()
dist.reseedRandomGenerator(seed)
"sketch an exponential distribution" in {
import org.apache.commons.math3.distribution.ExponentialDistribution
val dist = new ExponentialDistribution(1.0)
testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true)
}

"aggregate with another t-digest using ++" in {
import org.apache.commons.math3.distribution.NormalDistribution
val dist = new NormalDistribution()
dist.reseedRandomGenerator(seed)

val td1 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta)
val td2 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta)

testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be (true)
}

it should "respect monotonic cdf and inverse" in {
import org.apache.commons.math3.distribution.ExponentialDistribution
import org.apache.commons.math3.distribution.NormalDistribution
import org.apache.commons.math3.distribution.UniformRealDistribution

testMonotone(new UniformRealDistribution()) should be (true)
testMonotone(new ExponentialDistribution(1.0)) should be (true)
testMonotone(new NormalDistribution(0.0, 0.1)) should be (true)
}

it should "respect maxDiscrete parameter" in {
import org.apache.commons.math3.distribution.GeometricDistribution
val gd = new GeometricDistribution(0.33)
val data = gd.sample(1000000)
val dataUniq = data.distinct.sorted
val kt = dataUniq.map(_.toDouble).toSet
val td = TDigest.sketch(data, maxDiscrete = 50)
val clust = td.clusters
clust.keys.toSet should be (kt)
val D = clust.keys.map { x => td.cdfDiscrete(x) }
.zip(dataUniq.map { k => gd.cumulativeProbability(k) })
.map { case (p1, p2) => math.abs(p1 - p2) }
.max
(D <= 0.01) should be (true)
testSamplingPMF(td, gd) should be (true)
}

it should "respect maxDiscrete parameter over ++" in {
import org.apache.commons.math3.distribution.GeometricDistribution
val gd = new GeometricDistribution(0.33)
testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be(true)
}

"respect monotonic cdf and inverse" in {
import org.apache.commons.math3.distribution.ExponentialDistribution
import org.apache.commons.math3.distribution.NormalDistribution
import org.apache.commons.math3.distribution.UniformRealDistribution

testMonotone(new UniformRealDistribution()) should be(true)
testMonotone(new ExponentialDistribution(1.0)) should be(true)
testMonotone(new NormalDistribution(0.0, 0.1)) should be(true)
}

"respect maxDiscrete parameter" in {
import org.apache.commons.math3.distribution.GeometricDistribution
val gd = new GeometricDistribution(0.33)
val data = gd.sample(1000000)
val dataUniq = data.distinct.sorted
val kt = dataUniq.map(_.toDouble).toSet
val td = TDigest.sketch(data, maxDiscrete = 50)
val clust = td.clusters
clust.keys.toSet should be(kt)
val D = clust.keys.map { x => td.cdfDiscrete(x) }
.zip(dataUniq.map { k => gd.cumulativeProbability(k) })
.map { case (p1, p2) => math.abs(p1 - p2) }
.max
(D <= 0.01) should be(true)
testSamplingPMF(td, gd) should be(true)
}

"respect maxDiscrete parameter over ++" in {
import org.apache.commons.math3.distribution.GeometricDistribution
val gd = new GeometricDistribution(0.33)
val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000), maxDiscrete = 50) }
val td = tdvec.reduce(_ ++ _)
val clust = td.clusters
clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys)
val D = clust.keys.map { x => td.cdfDiscrete(x) }
.zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) })
.map { case (p1, p2) => math.abs(p1 - p2) }
.max
(D <= 0.01) should be (true)
testSamplingPMF(td, gd) should be (true)
}
val td = tdvec.reduce(_ ++ _)
val clust = td.clusters
clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys)
val D = clust.keys.map { x => td.cdfDiscrete(x) }
.zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) })
.map { case (p1, p2) => math.abs(p1 - p2) }
.max
(D <= 0.01) should be(true)
testSamplingPMF(td, gd) should be(true)
}

it should "serialize and deserialize" in {
import org.apache.commons.math3.distribution.NormalDistribution
"serialize and deserialize" in {
import org.apache.commons.math3.distribution.NormalDistribution

import org.isarnproject.scalatest.serde.roundTripSerDe
import org.isarnproject.scalatest.serde.roundTripSerDe

val dist = new NormalDistribution()
dist.reseedRandomGenerator(seed)
val dist = new NormalDistribution()
dist.reseedRandomGenerator(seed)

val tdo = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta)

val tdi = roundTripSerDe(tdo)
val tdi = roundTripSerDe(tdo)

(tdi == tdo) should be (true)
(tdi == tdo) should be(true)

testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be (true)
testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be(true)
}
}
}
Loading