Skip to content

Commit

Permalink
🔖 Release v1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBakerEffendi committed Mar 31, 2022
1 parent c3e3df6 commit e0398c5
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 45 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [1.2.0] - 2022-03-31

### Changed

- `OverflowDbDriver` now takes a `DataFlowCacheConfig` argument that specifies
data flow engine specific configurations.
- `OverflowDbDriver::nodesReachableBy` renamed to `flowsBetween` and now takes functions
as `sources` and `sinks` parameters.

## [1.1.19] - 2022-03-30

### Changed
Expand Down
13 changes: 7 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@ name := "Plume"
inThisBuild(
List(
organization := "com.github.plume-oss",
version := "1.1.19",
version := "1.2.0",
scalaVersion := "2.13.8",
crossScalaVersions := Seq("2.13.8", "3.1.1"),
resolvers ++= Seq(
Resolver.mavenLocal,
Resolver.mavenCentral,
Resolver.JCenterRepository
Resolver.JCenterRepository,
"Gradle Tooling" at "https://repo.gradle.org/gradle/libs-releases-local/"
)
)
)

val cpgVersion = "1.3.521"
val joernVersion = "1.1.661"
val cpgVersion = "1.3.523"
val joernVersion = "1.1.678"
val sootVersion = "4.3.0"
val tinkerGraphVersion = "3.4.11"
val neo4jVersion = "4.4.3"
val neo4jVersion = "4.4.5"
val apacheCodecVersion = "1.15"
val apacheIoVersion = "2.11.0"
val apacheLangVersion = "3.12.0"
Expand All @@ -43,7 +44,7 @@ Test / parallelExecution := false

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % cpgVersion,
"io.shiftleft" %% "semanticcpg" % cpgVersion,
"io.joern" %% "semanticcpg" % joernVersion,
"io.joern" %% "dataflowengineoss" % joernVersion,
"io.joern" %% "x2cpg" % joernVersion,
"io.joern" %% "jimple2cpg" % joernVersion,
Expand Down
9 changes: 8 additions & 1 deletion src/main/scala/com/github/plume/oss/domain/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory
import overflowdb.traversal.jIteratortoTraversal

import java.io.{File, FileInputStream, FileOutputStream}
import java.nio.file.Path
import java.nio.file.{Path, Paths}
import java.util.concurrent.ConcurrentHashMap
import scala.jdk.CollectionConverters
import scala.jdk.CollectionConverters.ConcurrentMapHasAsScala
Expand All @@ -28,6 +28,13 @@ package object domain {
.addModule(DefaultScalaModule)
.build()

case class DataFlowCacheConfig(
dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
compressDataFlowCache: Boolean = true,
maxCallDepth: Int = 2,
maxCachedPaths: Int = 1_000
)

/** Given an object and a path, will serialize the object to the given path.
* @param o object to serialize.
* @param p path to write serialized data to.
Expand Down
77 changes: 50 additions & 27 deletions src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
package com.github.plume.oss.drivers

import com.github.plume.oss.PlumeStatistics
import com.github.plume.oss.domain.{
SerialReachableByResult,
deserializeCache,
deserializeResultTable,
serializeCache
}
import com.github.plume.oss.domain._
import com.github.plume.oss.drivers.OverflowDbDriver.newOverflowGraph
import com.github.plume.oss.passes.callgraph.PlumeDynamicCallLinker
import com.github.plume.oss.util.BatchedUpdateUtil._
Expand All @@ -25,7 +20,7 @@ import overflowdb.traversal.{Traversal, jIteratortoTraversal}
import overflowdb.{BatchedUpdate, Config, DetachedNodeData, Edge, Node}

import java.io.{FileOutputStream, OutputStreamWriter, File => JFile}
import java.nio.file.{Files, Path, Paths}
import java.nio.file.Files
import java.util.concurrent.ConcurrentHashMap
import scala.collection.mutable
import scala.io.{BufferedSource, Source}
Expand All @@ -36,18 +31,15 @@ import scala.util.{Failure, Success, Try, Using}
* @param storageLocation where the database will serialize to and deserialize from.
* @param heapPercentageThreshold the percentage of the JVM heap from when the database will begin swapping to disk.
* @param serializationStatsEnabled enables saving of serialization statistics.
* @param dataFlowCacheFile the path to the cache file where data-flow paths are saved to. If None then data flow
* results will not be saved.
* @param compressDataFlowCache whether to compress the serialized data-flow cache specified at dataFlowCacheFile.
* @param cacheConfig contains various configurations for using the data flow tracking capabilities of OverflowDB.
*/
final case class OverflowDbDriver(
storageLocation: Option[String] = Option(
JFile.createTempFile("plume-", ".odb").getAbsolutePath
),
heapPercentageThreshold: Int = 80,
serializationStatsEnabled: Boolean = false,
dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
compressDataFlowCache: Boolean = true
cacheConfig: DataFlowCacheConfig = DataFlowCacheConfig()
) extends IDriver {

private val logger = LoggerFactory.getLogger(classOf[OverflowDbDriver])
Expand All @@ -74,13 +66,13 @@ final case class OverflowDbDriver(
/** Reads the saved cache on the disk and retrieves it as a serializable object
*/
private def fetchCacheFromDisk: Option[ConcurrentHashMap[Long, Vector[SerialReachableByResult]]] =
dataFlowCacheFile match {
cacheConfig.dataFlowCacheFile match {
case Some(filePath) =>
if (Files.isRegularFile(filePath))
Some(
PlumeStatistics.time(
PlumeStatistics.TIME_RETRIEVING_CACHE,
{ deserializeCache(filePath, compressDataFlowCache) }
{ deserializeCache(filePath, cacheConfig.compressDataFlowCache) }
)
)
else
Expand All @@ -96,10 +88,13 @@ final case class OverflowDbDriver(
private implicit var context: EngineContext =
EngineContext(
Semantics.fromList(List()),
EngineConfig(initialTable = resultTable)
EngineConfig(
maxCallDepth = cacheConfig.maxCallDepth,
initialTable = resultTable
)
)

private def saveDataflowCache(): Unit = dataFlowCacheFile match {
private def saveDataflowCache(): Unit = cacheConfig.dataFlowCacheFile match {
case Some(filePath) if resultTable.isDefined && resultTable.get.table.nonEmpty =>
PlumeStatistics.time(
PlumeStatistics.TIME_STORING_CACHE, {
Expand All @@ -110,13 +105,13 @@ final case class OverflowDbDriver(
t.put(n.id(), v.map(SerialReachableByResult.apply))
}
// Write to disk
serializeCache(t, filePath, compressDataFlowCache)
serializeCache(t, filePath, cacheConfig.compressDataFlowCache)
}
)
case _ => // Do nothing
}

/** Sets the context for the data-flow engine when performing [[nodesReachableBy]] queries.
/** Sets the context for the data-flow engine when performing [[flowsBetween]] queries.
*
* @param maxCallDepth the new method call depth.
* @param methodSemantics the file containing method semantics for external methods.
Expand Down Expand Up @@ -179,7 +174,11 @@ final case class OverflowDbDriver(

override def clear(): Unit = {
cpg.graph.nodes.asScala.foreach(safeRemove)
dataFlowCacheFile match {
resultTable match {
case Some(table) => table.table.clear()
case None =>
}
cacheConfig.dataFlowCacheFile match {
case Some(filePath) => filePath.toFile.delete()
case None => // Do nothing
}
Expand Down Expand Up @@ -378,17 +377,42 @@ final case class OverflowDbDriver(
* @param sanitizers a set of full method names to filter paths out with.
* @return the source nodes whose data flows to the given sinks uninterrupted.
*/
def nodesReachableBy(
source: Traversal[CfgNode],
sink: Traversal[CfgNode],
def flowsBetween(
source: () => Traversal[CfgNode],
sink: () => Traversal[CfgNode],
sanitizers: Set[String] = Set.empty[String]
): List[ReachableByResult] =
PlumeStatistics.time(
PlumeStatistics.TIME_REACHABLE_BY_QUERYING, {
import io.shiftleft.semanticcpg.language._
// Strip the cache of only nodes that will be used the most in this query to get fast starts/finishes
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
val newCache = new ResultTable
val oldCache = resultTable.getOrElse(new ResultTable)
var currPathsInCache = 0
scala.util.Random
.shuffle(source().l ++ sink().l)
.flatMap { x =>
oldCache.get(x) match {
case Some(paths) => Some((x, paths))
case None => None
}
}
.foreach { case (startOrEndNode, paths) =>
if (currPathsInCache + paths.size <= cacheConfig.maxCachedPaths) {
currPathsInCache += paths.size
newCache.add(startOrEndNode, paths)
}
}
oldCache.table.clear()
resultTable = Some(newCache)
setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable)
case _ =>
}

val results: List[ReachableByResult] = sink
.reachableByDetailed(source)(context)
val results: List[ReachableByResult] = sink()
.reachableByDetailed(source())(context)
captureDataflowCache(results)
results
// Remove a source/sink arguments referring to itself
Expand All @@ -408,16 +432,15 @@ final case class OverflowDbDriver(
)

private def captureDataflowCache(results: List[ReachableByResult]): Unit = {
dataFlowCacheFile match {
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
// Reload latest results to the query engine context
// Capture latest results
resultTable = (results
.map(_.table) ++ List(resultTable).flatten).distinct
.reduceOption((a: ResultTable, b: ResultTable) => {
b.table.foreach { case (k, v) => a.add(k, v) }
a
})
setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable)
case None => // Do nothing since no table means we aren't saving data and instead keeping memory low
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/test/scala/com/github/plume/oss/DiffTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
driver.clear()
driver.close()
Paths.get(storage.get).toFile.delete()
driver.dataFlowCacheFile match {
driver.cacheConfig.dataFlowCacheFile match {
case Some(jsonFile) => new File(jsonFile.toFile.getAbsolutePath + ".lz4").delete()
case None =>
}
Expand Down Expand Up @@ -102,12 +102,12 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
val sinkNodesId1 = driver.cpg.call(Operators.addition).id.l

val r1 = driver
.nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
.flowsBetween( () => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition))
.map(_.path.map(_.node.id()))
val cH1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
val cM1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
val hitRatio1 = cH1.toDouble / (cH1 + cM1) * 100
logger.info(s"Cache hit ratio $hitRatio1%")
logger.info(s"Cache hit ratio $hitRatio1% ($cH1 vs $cM1)")
cH1 should be <= cM1
QueryEngineStatistics.reset()

Expand All @@ -119,7 +119,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
val sinkNodesId2 = driver.cpg.call(Operators.addition).id.l

val r2 = driver
.nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
.flowsBetween(() => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition))
.map(_.path.map(_.node.id()))
val cH2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
val cM2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
Expand All @@ -131,7 +131,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
r1 shouldBe r2
// The cache should have a higher number of hits now from re-using the first query
val hitRatio2 = cH2.toDouble / (cH2 + cM2) * 100
logger.info(s"Cache hit ratio $hitRatio2%")
logger.info(s"Cache hit ratio $hitRatio2% ($cH2 vs $cM2)")
hitRatio2 should be >= hitRatio1
}

Expand Down
13 changes: 8 additions & 5 deletions src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.nodesReachableBy(cpg.parameter("a"), cpg.call("<operator>.*"))
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("<operator>.*"))
val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code)))

v1.head shouldBe ("foo", "int a")
Expand All @@ -58,7 +58,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.nodesReachableBy(cpg.parameter("a"), cpg.call("bar"))
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("bar"))
val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code)))

v1.head shouldBe ("foo", "int a")
Expand All @@ -69,7 +69,9 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.nodesReachableBy(cpg.parameter("a"), cpg.call("println"))
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("println"))

r.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println)

r.size shouldBe 2

Expand All @@ -87,10 +89,11 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
def source = cpg.call("taint").argument
def sink = cpg.call("baz")

val r1 = driver.nodesReachableBy(source, sink)
val r1 = driver.flowsBetween(() => source, () => sink)
r1.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println)
r1.size shouldBe 1

val r2 = driver.nodesReachableBy(source, sink, Set("Foo.falseClean:int(int)"))
val r2 = driver.flowsBetween(() => source, () => sink, Set("Foo.falseClean:int(int)"))
r2.size shouldBe 0
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.github.plume.oss.testfixtures
import com.github.plume.oss.{Jimple2Cpg, PlumeStatistics}
import com.github.plume.oss.drivers.OverflowDbDriver
import com.github.plume.oss.JavaCompiler.compileJava
import com.github.plume.oss.domain.DataFlowCacheConfig
import io.joern.x2cpg.testfixtures.{CodeToCpgFixture, LanguageFrontend}
import io.shiftleft.codepropertygraph.Cpg
import org.slf4j.LoggerFactory
Expand All @@ -16,7 +17,7 @@ class PlumeFrontend(val _driver: Option[OverflowDbDriver]) extends LanguageFront
private val logger = LoggerFactory.getLogger(classOf[PlumeFrontend])
val driver: OverflowDbDriver = _driver match {
case Some(d) => d
case None => new OverflowDbDriver(dataFlowCacheFile = None)
case None => new OverflowDbDriver(cacheConfig = DataFlowCacheConfig(dataFlowCacheFile = None))
}
override val fileSuffix: String = ".java"

Expand Down

0 comments on commit e0398c5

Please sign in to comment.