Skip to content

Commit

Permalink
flatgraph (#1769)
Browse files Browse the repository at this point in the history
flatgraph port WIP

squashed commits from
michael/flatgraph.backup*
  • Loading branch information
mpollmeier authored Jul 16, 2024
1 parent 231816e commit 88d9845
Show file tree
Hide file tree
Showing 348 changed files with 86,768 additions and 86,000 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
target/
codepropertygraph/project/
/codepropertygraph/src/main/resources/cpg.json
/codepropertygraph/src/test/resources/cpg.odb.fg
private-key.pem
travis_wait_*
**/*.pyc
Expand All @@ -14,7 +15,7 @@ project/.bloop
.project
*.class
/.classpath
/project/project/
/project/project/target
/project/target/
/target
/foo.c
Expand Down
3 changes: 1 addition & 2 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
name := "codepropertygraph"

// parsed by project/Versions.scala, updated by updateDependencies.sh
val overflowdbVersion = "1.193"
val overflowdbCodegenVersion = "2.112"
val flatgraphVersion = "0.0.81"

inThisBuild(
List(
Expand Down
5 changes: 3 additions & 2 deletions codepropertygraph/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ name := "codepropertygraph"
dependsOn(Projects.protoBindings, Projects.domainClasses)

libraryDependencies ++= Seq(
"io.shiftleft" %% "overflowdb-traversal" % Versions.overflowdb,
"io.shiftleft" %% "overflowdb-formats" % Versions.overflowdb,
"io.joern" %% "flatgraph-formats" % Versions.flatgraph,
"io.joern" %% "flatgraph-help" % Versions.flatgraph,
"io.joern" %% "flatgraph-odb-convert" % Versions.flatgraph,
"com.github.scopt" %% "scopt" % "4.0.1",
"com.github.pathikrit" %% "better-files" % "3.9.2",
"org.slf4j" % "slf4j-api" % "2.0.6",
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
package io.shiftleft.codepropertygraph

import overflowdb.Graph
import overflowdb.traversal.help.DocSearchPackages
import flatgraph.Graph
import flatgraph.help.DocSearchPackages
import io.shiftleft.codepropertygraph.generated

/** TODO this is now being generated as well - for now we'll just forward calls to `generated.Cpg` next step is to
* remove this class and move remove the `generated` part from the generated package
*/
object Cpg {
implicit val docSearchPackages: DocSearchPackages =
DocSearchPackages("io.shiftleft", "io.joern")
val defaultDocSearchPackage: DocSearchPackages = generated.Cpg.defaultDocSearchPackage

/** Syntactic sugar for `new Cpg(graph)`. Usage: `Cpg(graph)` or simply `Cpg` if you have an `implicit Graph` in scope
*/
def apply(implicit graph: Graph) = new Cpg(graph)
def apply(implicit graph: Graph) = generated.Cpg(graph)

/** Create an empty code property graph
*/
def emptyCpg: Cpg =
new Cpg(emptyGraph)
def emptyCpg: generated.Cpg =
generated.Cpg(emptyGraph)

/** Instantiate cpg with storage. If the storage file already exists, it will load (a subset of) the data into memory.
* Otherwise it will create an empty cpg. In either case, configuring storage means that OverflowDb will be stored to
Expand All @@ -26,13 +26,10 @@ object Cpg {
* @param path
* to the storage file, e.g. /home/user1/overflowdb.bin
*/
def withStorage(path: String): Cpg =
new Cpg(generated.Cpg.withStorage(path).graph)

def withConfig(config: overflowdb.Config): Cpg =
Cpg(generated.Cpg.withConfig(config).graph)
def withStorage(path: String, deserializeOnClose: Boolean = true): generated.Cpg =
generated.Cpg.withStorage(java.nio.file.Paths.get(path), deserializeOnClose)

def emptyGraph: Graph =
generated.Cpg.emptyGraph
generated.Cpg.empty.graph

}
Original file line number Diff line number Diff line change
@@ -1,91 +1,117 @@
package io.shiftleft.codepropertygraph.cpgloading

import better.files.File
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.PropertyNames
import org.slf4j.{Logger, LoggerFactory}

import scala.util.Try
import java.io.FileNotFoundException
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, Paths}
import scala.util.Using

object CpgLoader {
private val logger: Logger = LoggerFactory.getLogger(getClass)

private val logger: Logger = LoggerFactory.getLogger(classOf[CpgLoader])
/** Load a Code Property Graph from the given file */
def load(filename: String): Cpg =
load(Paths.get(filename))

/** Load a Code Property Graph
*
* @param filename
* name of file that stores the code property graph
* @param config
* loader configuration
/** Load a Code Property Graph from the given file - persist in given second file. I.e. the given input file will not
* be modified, all changes will be written to the given 'persistTo' file. *
*/
def load(filename: String, config: CpgLoaderConfig = CpgLoaderConfig()): Cpg =
new CpgLoader().load(filename, config)
def load(from: String, persistTo: String): Cpg =
load(Paths.get(from), Paths.get(persistTo))

/** Load Code Property Graph from an overflow DB file
*
* @param config
* loader config
/** Load a Code Property Graph from the given file
*
* This methods loads the CPG from an existing overflow DB file, specified in config.overflowDbConfig. In particular,
* this config specifies the filename. For example, to load the database at "foo.db", you can issue the following:
*
* val odbConfig = Config.withDefaults().withStorageLocation(config.spPath) val config =
* CpgLoaderConfig().withOverflowConfig(odbConfig) CpgLoader.loadFromOverflowDb(config)
* Notes:
* - detects the format as either flatgraph, overflowdb or proto
* - a flatgraph storage opened straight away
* - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
* - OverflowDb and proto formats are first converted to flatgraph, and therefor we create a new flatgraph storage
* path, which can be obtained via `cpg.graph.storagePathMaybe`
*/
def loadFromOverflowDb(config: CpgLoaderConfig = CpgLoaderConfig()): Cpg = {
new CpgLoader().loadFromOverflowDb(config)
def load(path: Path): Cpg = {
val absolutePath = path.toAbsolutePath
if (!Files.exists(absolutePath)) {
throw new FileNotFoundException(s"given input file $absolutePath does not exist")
} else if (isProtoFormat(absolutePath)) {
load(path, persistTo = absolutePath.resolveSibling(s"${path.getFileName}.fg"))
} else if (isOverflowDbFormat(absolutePath)) {
load(absolutePath, persistTo = path.resolveSibling(s"${path.getFileName}.fg"))
} else {
// assuming it's flatgraph format
Cpg.withStorage(absolutePath)
}
}

/** Create any indexes necessary for quick access.
*
* @param cpg
* the CPG to create indexes in
*/
def createIndexes(cpg: Cpg): Unit =
new CpgLoader().createIndexes(cpg)

/** Determine whether the CPG is a legacy (proto) CPG
/** Load a Code Property Graph from the given file, but persist it in the given second file. I.e. the given input file
* will not be modified, all changes will be written to the given 'persistTo' file.
*
* @param filename
* name of the file to probe
* Notes:
* - if the given 'persistTo' file already exists, it will be overridden
* - detects the format as either flatgraph, overflowdb or proto
* - a flatgraph storage is copied to the `persistTo` file and then opened straight away
* - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
*/
def isLegacyCpg(filename: String): Boolean =
isLegacyCpg(File(filename))

/** Determine whether the CPG is a legacy (proto) CPG
*
* @param file
* file to probe
*/
def isLegacyCpg(file: File): Boolean = {
val bytes = file.bytes
Try {
bytes.next() == 'P' && bytes.next() == 'K'
}.getOrElse(false)
def load(from: Path, persistTo: Path): Cpg = {
val absolutePath = from.toAbsolutePath
if (persistTo != from)
Files.deleteIfExists(persistTo)

if (!Files.exists(absolutePath)) {
throw new FileNotFoundException(s"given input file $absolutePath does not exist")
} else if (isProtoFormat(absolutePath)) {
logger.debug(s"Converting $from from proto cpg into new flatgraph storage: $persistTo")
ProtoCpgLoader.loadFromProtoZip(absolutePath.toString, Option(persistTo))
} else if (isOverflowDbFormat(absolutePath)) {
loadFromOverflowDb(absolutePath, persistTo)
} else if (isFlatgraphFormat(absolutePath)) {
Files.copy(absolutePath, persistTo)
Cpg.withStorage(persistTo)
} else {
throw new AssertionError(
s"unknown file format - we probed the first bytes but it didn't look like one of our known formats (proto.zip, flatgraph, overflowdb)"
)
}
}

}
/** Determine whether the CPG is a legacy (proto) CPG */
def isProtoFormat(path: Path): Boolean =
probeFirstBytes(path, "PK")

private class CpgLoader {
/** Determine whether the CPG is a proto CPG */
def isProtoFormat(filename: String): Boolean =
isProtoFormat(Paths.get(filename))

import CpgLoader.logger
def isOverflowDbFormat(path: Path): Boolean =
probeFirstBytes(path, "H:2")

def load(filename: String, config: CpgLoaderConfig = CpgLoaderConfig.withoutOverflow): Cpg = {
logger.debug("Loading " + filename)
def isFlatgraphFormat(path: Path): Boolean =
probeFirstBytes(path, "FLT GRPH") // flatgraph.storage.MagicBytesString

val cpg =
ProtoCpgLoader.loadFromProtoZip(filename, config.overflowDbConfig)
if (config.createIndexes) { createIndexes(cpg) }
cpg
/** Load Code Property Graph from an overflow DB file, by first converting it into a flatgraph binary */
def loadFromOverflowDb(path: Path, persistTo: Path): Cpg = {
logger.info(s"Converting $path from overflowdb to new flatgraph storage: $persistTo")
flatgraph.convert.Convert.convertOdbToFlatgraph(overflowDbFile = path, outputFile = persistTo)
Cpg.withStorage(persistTo)
}

def loadFromOverflowDb(config: CpgLoaderConfig = CpgLoaderConfig()): Cpg = {
val cpg = Cpg.withConfig(config.overflowDbConfig)
if (config.createIndexes) { createIndexes(cpg) }
cpg
/** Determine whether the CPG is a legacy (proto) CPG */
@deprecated("use `isProtoCpg` instead")
def isLegacyCpg(filename: String): Boolean =
isProtoFormat(Paths.get(filename))

/** Determine whether the CPG is a legacy (proto) CPG */
@deprecated("use `isProtoCpg` instead")
def isLegacyCpg(path: Path): Boolean =
isProtoFormat(path)

private def probeFirstBytes(path: Path, probeFor: String): Boolean = {
Using(Files.newInputStream(path)) { is =>
val buffer = new Array[Byte](probeFor.size)
is.read(buffer)
new String(buffer, StandardCharsets.UTF_8) == probeFor
}.getOrElse(false)
}

def createIndexes(cpg: Cpg): Unit =
cpg.graph.indexManager.createNodePropertyIndex(PropertyNames.FULL_NAME)

}

This file was deleted.

This file was deleted.

Loading

0 comments on commit 88d9845

Please sign in to comment.