diff --git a/.github/workflows/sbt_test.yml b/.github/workflows/sbt_test.yml new file mode 100644 index 0000000..5d6831f --- /dev/null +++ b/.github/workflows/sbt_test.yml @@ -0,0 +1,21 @@ +name: sbt test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 1.17 + uses: actions/setup-java@v1 + with: + java-version: 1.17 + - name: Run tests + run: sbt stage scalafmtCheck diff --git a/.github/workflows/sbt_test.yml.deprecated b/.github/workflows/sbt_test.yml.deprecated new file mode 100644 index 0000000..f82a593 --- /dev/null +++ b/.github/workflows/sbt_test.yml.deprecated @@ -0,0 +1,32 @@ +name: sbt test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master, polishingForFullAutomation] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 1.8 + uses: actions/setup-java@v1 + with: + java-version: 1.8 + - name: Set up PHP 7.4 + uses: shivammathur/setup-php@v2 + with: + php-version: '7.4' + extensions: 'xdebug, opcache' + - name: create composer vendor dir + run: mkdir -p $HOME/.config/composer/vendor/ + - name: Setup php-parse + run: COMPOSER_VENDOR_DIR=$HOME/.config/composer/vendor/ composer require nikic/php-parser + - name: Setup config + run : mv main.conf.default main.conf + - name: Run tests + run: sbt test scalafmtCheck diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f1a7a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +*.bin.zip +*.class +*.log +*.cpg +*~ +target +/.classpath +/.idea +/.settings +.antlr/ +/bin/ +/build/ +/.bsp/ +.project +main.conf +/resources/docker/multilayer-php-cpg/privateCreate.sh diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..58e4074 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,2 @@ +Simon Koch +Fabian Yamaguchi diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0bfde9a --- /dev/null +++ b/LICENSE @@ -0,0 +1,213 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 ShiftLeft, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +======================================================================== +MIT Licenses +======================================================================== + +The Apache TinkerPop project bundles the following components under the MIT License: + + bootstrap/carousel 3.3.5 (http://getbootstrap.com/) - for details, see license/bootstrap + jquery 1.11.0 (https://jquery.com/) - for details, see license/jquery + normalize.css 2.1.2 (http://necolas.github.io/normalize.css/) - for details, see licenses/normalize + prism.css/js (http://prismjs.com) - for details, see licenses/prism diff --git a/README.md b/README.md new file mode 100644 index 0000000..32a9aa6 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +# php-cpg +A (Bytecode) CPG creator for PHP + +# Status +![Scala CI](https://github.com/simkoc/php-cpg/workflows/Scala%20CI/badge.svg) + +# Dependencies + +- [patched](https://github.com/simkoc/php-src) PHP + - PHP 7.4.27 from branch [`PHP-7.4.27`](https://github.com/simkoc/php-src/tree/PHP-7.4.27) + - PHP 8.2.2 from branch [`PHP-8.2.2`](https://github.com/simkoc/php-src/tree/PHP-8.2.2) + - we recommend to build them locally for dev purposes + - consider using the Dockerfiles in resources/docker for productive use +- sbt +- scala +- php-parse (only for the source code layer) + +# Building and Running + +## Dependencies + +You need to download and install our [patched](https://github.com/simkoc/php-src/) PHP versions. + +Create the `main.conf` by using the main.conf.default and adjust the parameters according to your system. + +## Building the project +The project can be build and run with the subsequent steps, yet it will only work with the `bytecode` argument. The source code support relies on `php-parse`, you can find the install instructions in the sub section `Source code support`. +``` +sbt stage +./php2cpg -h +no more arguments and a subaction has still to be chosen + +usage: PHP Cpg Creator [rootFolder] {-c,-o,-e,-h,-f,-s,-l,-v} log-evaluation bytecode + +create a cpg from PHP + + [rootFolder] the root folder of the project + + -c/--config the config file to use (def:./main.conf) + + -o/--output the destination file into which to store the cpg (def:) + + -e/--endings the file endings included for analysis (def:) + + -h/--help prints this help message + + -f/--forced when set files are simply overwritten + + -s/--strictParsing if set each project file has to be parsed successfully + + -l/--permissiveLinking linking strategy for call graph set to permissive + + -v/--verbose output some progress indicators + + log-evaluation + + bytecode create a bytecode CPG +``` + +## Source code support +Source code is analyzed using `php-parse`. The following install steps are taken from the official install instructions: https://getcomposer.org/download/. +``` +php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" +php -r "if (hash_file('sha384', 'composer-setup.php') === '795f976fe0ebd8b75f26a6dd68f78fd3453ce79f32ecb33e7fd087d39bfeb978342fb73ac986cd4f54edd0dc902601dc') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" +php composer-setup.php +php -r "unlink('composer-setup.php');" + +# Location is expected by php2cpg +mkdir -p $HOME/.config/composer/vendor/ && COMPOSER_VENDOR_DIR=$HOME/.config/composer/vendor/ php composer.phar require nikic/php-parser +``` + + +# Documentation + +[Opcodes](./documentation/README.md) + +# Using Docker + +Build the image +``` +docker build -t phpcpg /your/path/to/php-cpg/ --build-arg GH_TOKEN= +``` + +Run the image +``` +➜ ~ docker run -it phpcpg /bin/bash +root@f43557f5be46:/php-cpg# ./php2cpg +The first parameter has to be to determine which cpg layer to create +usage: +./php2cpg sourcecode main.php +./php2cpg bytecode main.php +``` + +## Token + +Create the github token at https://github.com/settings/tokens. The token needs to have `repo` permissions. + diff --git a/build.sbt b/build.sbt new file mode 100644 index 0000000..c0289ec --- /dev/null +++ b/build.sbt @@ -0,0 +1,183 @@ +name := "multilayer php cpg generator" +ThisBuild / scalaVersion := "2.13.12" +organization := "io.joern" +licenses += "Apache-2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0") +scmInfo := Some(ScmInfo(url("https://github.com/simkoc/php-cpg"), + "scm:git@github.com:ShiftLeftSecurity/php-cpg.git")) +exportJars := true +Global / onChangedBuildSource := ReloadOnSourceChanges +dependsOn( + bytecode, + sourcecode, + configAndLogging + ) +enablePlugins(JavaAppPackaging, GitVersioning) +// Projects + +import sbtrelease.ReleasePlugin.autoImport.ReleaseTransformations._ +releasePublishArtifactsAction := PgpKeys.publishSigned.value +releaseVersionBump := sbtrelease.Version.Bump.Bugfix + + +lazy val configAndLogging = project + .in(file("configAndReporting")) + .settings( + organization := "io.joern", + name := "config-and-reporting", + libraryDependencies ++=Seq( + dependencies.argparse, + dependencies.typesafeConfig, + dependencies.json, + dependencies.airframelog, + dependencies.codepropertygraph + ), + settings, + exportJars := false + ) + +lazy val bytecode = project + .in(file("layerByteCode")) + .settings( + organization := "io.joern", + name := "PHPCPG-ByteCode-Layer", + libraryDependencies ++= commonDependencies ++ Seq( + dependencies.fastparse, + ), + settings, + exportJars := true, + ) + .dependsOn(configAndLogging) + .enablePlugins(JavaAppPackaging) + +lazy val sourcecode = project + .in(file("layerSourceCode")) + .settings( + organization := "io.joern", + name := "PHPCPG-SourceCode-Layer", + libraryDependencies ++= commonDependencies ++ Seq( + dependencies.scalaParserCombinators, + ), + settings, + exportJars := true, + ) + .dependsOn(configAndLogging) + .enablePlugins(JavaAppPackaging) + +val cpgVersion = "1.3.493" + +lazy val dependencies = + new { + val scopt = "com.github.scopt" %% "scopt" % "3.7.1" + val codepropertygraph = "io.shiftleft" %% "codepropertygraph" % cpgVersion + val codepropertygraphProtos = "io.shiftleft" %% "codepropertygraph-protos" % cpgVersion + val semanticcpg = "io.shiftleft" %% "semanticcpg" % cpgVersion + val semanticcpgtests = "io.shiftleft" %% "semanticcpg" % cpgVersion % Test classifier "tests" + val slf4jnop = "org.slf4j" % "slf4j-nop" % "1.7.32" + val airframelog = "org.wvlet.airframe" %% "airframe-log" % "21.12.0" + val betterFiles = "com.github.pathikrit" %% "better-files" % "3.8.0" + val scalaParallelCollection = "org.scala-lang.modules" %% "scala-parallel-collections" % "0.2.0" + val scalatest = "org.scalatest" %% "scalatest" % "3.1.0" % Test + val fastparse = "com.lihaoyi" %% "fastparse" % "2.2.2" + val scalaParserCombinators = "org.scala-lang.modules" %% "scala-parser-combinators" % "1.1.2" + val commonsio = "commons-io" % "commons-io" % "2.8.0" + val json = "io.spray" %% "spray-json" % "1.3.6" + val typesafeConfig = "com.typesafe" % "config" % "1.4.1" + val argparse = "de.halcony" %% "scala-argparse" % "1.1.11" + } + + +lazy val commonDependencies = Seq( + dependencies.scopt, + dependencies.codepropertygraph, + dependencies.codepropertygraphProtos, + dependencies.semanticcpg, + dependencies.semanticcpgtests, + dependencies.airframelog, + dependencies.slf4jnop, + dependencies.betterFiles, + dependencies.scalaParallelCollection, + dependencies.scalatest, + dependencies.commonsio, + dependencies.argparse, + dependencies.json +) + +// settings + +lazy val settings = + commonSettings + +lazy val commonSettings = Seq ( + scalacOptions ++=compilerOptions, + resolvers +="Sonatype OSS" at "https://oss.sonatype.org/content/repositories/public", + testOptions += Tests.Argument(TestFrameworks.JUnit, "-a", "-v"), + Test / fork := false, + compile / javacOptions ++= Seq("-Xlint:all", "-Xlint:-cast", "-g"), + ThisBuild / resolvers ++= Seq( + Resolver.mavenLocal, + Resolver.bintrayRepo("shiftleft", "maven"), + Resolver.bintrayRepo("mpollmeier", "maven"), + "Artifactory release local" at "https://shiftleft.jfrog.io/shiftleft/libs-release-local", + "Apache public" at "https://repository.apache.org/content/groups/public/", + "Sonatype OSS" at "https://oss.sonatype.org/content/repositories/public", + "Bedatadriven for SOOT dependencies" at "https://nexus.bedatadriven.com/content/groups/public" + ), + checkstyleConfigLocation := CheckstyleConfigLocation.File("config/checkstyle/google_checks.xml"), + checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Info) +) + +lazy val compilerOptions = Seq( + "-deprecation", // Emit warning and location for usages of deprecated APIs. + "-encoding", "utf-8", // Specify character encoding used by source files. + "-explaintypes", // Explain type errors in more detail. + "-feature", // Emit warning and location for usages of features that should be imported explicitly. + "-language:existentials", // Existential types (besides wildcard types) can be written and inferred + "-language:experimental.macros", // Allow macro definition (besides implementation and application) + "-language:higherKinds", // Allow higher-kinded types + "-language:implicitConversions", // Allow definition of implicit functions called views + "-unchecked", // Enable additional warnings where generated code depends on assumptions. + "-Xcheckinit", // Wrap field accessors to throw an exception on uninitialized access. + // "-Xfatal-warnings", // Fail the compilation if there are any warnings. + "-Xlint:adapted-args", // Warn if an argument list is modified to match the receiver. + "-Xlint:constant", // Evaluation of a constant arithmetic expression results in an error. + "-Xlint:delayedinit-select", // Selecting member of DelayedInit. + "-Xlint:doc-detached", // A Scaladoc comment appears to be detached from its element. + "-Xlint:inaccessible", // Warn about inaccessible types in method signatures. + "-Xlint:infer-any", // Warn when a type argument is inferred to be `Any`. + // Malte: We don't want warnings for php-code with '$' :) + // "-Xlint:missing-interpolator", // A string literal appears to be missing an interpolator id. + "-Xlint:option-implicit", // Option.apply used implicit view. + "-Xlint:package-object-classes", // Class or object defined in package object. + "-Xlint:poly-implicit-overload", // Parameterized overloaded implicit methods are not visible as view bounds. + "-Xlint:private-shadow", // A private field (or class parameter) shadows a superclass field. + "-Xlint:stars-align", // Pattern sequence wildcard must align with sequence component. + "-Xlint:type-parameter-shadow", // A local type parameter shadows a type already in scope. + "-Ywarn-dead-code", // Warn when dead code is identified. + "-Ywarn-extra-implicit", // Warn when more than one implicit parameter section is defined. + //"-Xlint:nullary-override", // Warn when non-nullary def f() overrides nullary def f. + "-Xlint:nullary-unit", // Warn when nullary methods return Unit. + "-Ywarn-numeric-widen", // Warn when numerics are widened. + "-Ywarn-unused:implicits", // Warn if an implicit parameter is unused. + "-Ywarn-unused:imports", // Warn if an import selector is not referenced. + "-Ywarn-unused:locals", // Warn if a local definition is unused. + "-Ywarn-unused:params", // Warn if a value parameter is unused. + "-Ywarn-unused:patvars", // Warn if a variable bound in a pattern is unused. + "-Ywarn-unused:privates", // Warn if a private member is unused. + // "-Ywarn-value-discard" // Warn when non-Unit expression results are unused. +) +// skip skaladoc +Compile / doc / sources := Seq.empty +Compile / packageDoc / publishArtifact := false +releaseVersionBump := sbtrelease.Version.Bump.Next + +releaseProcess := Seq[ReleaseStep]( + runClean, + runTest, + inquireVersions, + setReleaseVersion, + releaseStepCommand("publishLocal"), + releaseStepCommand("stage"), + setNextVersion, + commitNextVersion, + pushChanges, +) diff --git a/configAndReporting/src/main/scala/io/joern/config/CPGConfig.scala b/configAndReporting/src/main/scala/io/joern/config/CPGConfig.scala new file mode 100644 index 0000000..b416530 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/config/CPGConfig.scala @@ -0,0 +1,162 @@ +package io.joern.config + +import com.typesafe.config.ConfigFactory +import de.halcony.argparse.{FlagValue, OptionalValue, Parser} +import io.joern.reporting.LogEvaluation + +import java.io.File +import scala.jdk.CollectionConverters.ListHasAsScala +import scala.sys.exit + +case class PHPInterpreter(interpreter: String, phpini: String) + +case class CPGConfig(layer: Option[String], + output: String, + rootFolder: String, + files: List[String], + strictLinking: Boolean, + strictParsing: Boolean, + php7: PHPInterpreter, + php8: PHPInterpreter, + phpversion: String, + report: Boolean, + forced: Boolean, + passes: Set[String], + verbose: Boolean) + +object CPGConfig { + + private val defaultConfigFile = "./main.conf" + + private val parser = Parser("PHP Cpg Creator", "create a cpg from PHP") + .addOptional("config", + "c", + "config", + Some(defaultConfigFile), + "the config file to use") + .addPositional("rootFolder", "the root folder of the project") + .addOptional( + "output", + "o", + "output", + None, + s"the destination file into which to store the cpg (def:)") + .addFlag("forced", "f", "forced", "when set files are simply overwritten") + .addFlag("strictParsing", + "s", + "strictParsing", + "if set each project file has to be parsed successfully") + .addFlag("permissiveLinking", + "l", + "permissiveLinking", + "linking strategy for call graph set to permissive") + .addOptional("fileEndings", + "e", + "endings", + None, + "the file endings included for analysis (def:)") + .addFlag("verbose", "v", "verbose", "whether or not CPG log spam is active") + .addSubparser(LogEvaluation.parser) + .addSubparser(Parser("bytecode", "create a bytecode CPG") + .addDefault[String]("layer", "bytecode") + .addPositional("version", "the php version to use {7,8}") + .addOptional( + "php", + "p", + "php", + None, + "the php interpreter to use to create bytecode (def:)") + .addOptional("phpini", "i", "phpini", None, "the php.ini (def:)")) + + def initializeConfig(argsv: Array[String]): CPGConfig = { + val pargs = try { + parser.parse(argsv) + } catch { + case _: de.halcony.argparse.ParsingException => + exit(1) + case x: Throwable => + println(s"unable to initialize: ${x.getMessage}") + exit(1) + } + if (pargs.getValue[String]("layer") == "log-profiler") { + LogEvaluation.profiler(pargs) + return null + } + val config = pargs.getValue[String]("config") + val preConfig = initializeConfig(config) + val interpreter: Option[PHPInterpreter] = + (pargs.get[OptionalValue[String]]("php").value, + pargs.get[OptionalValue[String]]("phpini").value) match { + case (Some(interpreter), Some(ini)) => + Some(PHPInterpreter(interpreter, ini)) + case (Some(interpreter), None) => + pargs.getValue[String]("version") match { + case "7" => Some(PHPInterpreter(interpreter, preConfig.php7.phpini)) + case "8" => Some(PHPInterpreter(interpreter, preConfig.php8.phpini)) + } + case (None, Some(ini)) => + pargs.getValue[String]("version") match { + case "7" => Some(PHPInterpreter(preConfig.php7.interpreter, ini)) + case "8" => Some(PHPInterpreter(preConfig.php8.interpreter, ini)) + } + case (None, None) => None + } + CPGConfig( + Some(pargs.getValue[String]("layer")), + pargs + .get[de.halcony.argparse.OptionalValue[String]]("output") + .value + .getOrElse(preConfig.output), + pargs.getValue[String]("rootFolder"), + pargs + .get[OptionalValue[String]]("fileEndings") + .value + .getOrElse(preConfig.files.mkString(",")) + .split(",") + .toList, + if (pargs.get[FlagValue]("permissiveLinking").provided) { + !pargs.get[FlagValue]("permissiveLinking").value + } else { + preConfig.strictLinking + }, + if (pargs.get[FlagValue]("strictParsing").provided) { + pargs.get[FlagValue]("strictParsing").value + } else { + preConfig.strictParsing + }, + interpreter.getOrElse(preConfig.php7), + interpreter.getOrElse(preConfig.php8), + pargs.getValue[String]("version"), + preConfig.report, + pargs.get[FlagValue]("forced").value, + preConfig.passes, + pargs.get[FlagValue]("verbose").value + ) + } + + def initializeConfig(configFile: String = defaultConfigFile): CPGConfig = { + val conf = ConfigFactory.parseFile(new File(configFile)) + CPGConfig( + None, + conf.getString("cpg.output"), + "CONFIGHASNONE", + conf.getStringList("cpg.files").asScala.toList, + conf.getBoolean("cpg.strictLinking"), + conf.getBoolean("cpg.strictParsing"), + PHPInterpreter(conf.getString("cpg.php.7.interpreter"), + conf.getString("cpg.php.7.phpini")), + PHPInterpreter(conf.getString("cpg.php.8.interpreter"), + conf.getString("cpg.php.8.phpini")), + "CONFIGHASNONE", + conf.getBoolean("cpg.report"), + conf.getBoolean("cpg.forced"), + verbose = false, + passes = conf + .getStringList("cpg.activePasses") + .asScala + .map(_.toLowerCase) + .toSet + ) + } + +} diff --git a/configAndReporting/src/main/scala/io/joern/reporting/AbortPass.scala b/configAndReporting/src/main/scala/io/joern/reporting/AbortPass.scala new file mode 100644 index 0000000..541b670 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/AbortPass.scala @@ -0,0 +1,3 @@ +package io.joern.reporting + +object AbortPass extends Throwable diff --git a/configAndReporting/src/main/scala/io/joern/reporting/JSONConverter.scala b/configAndReporting/src/main/scala/io/joern/reporting/JSONConverter.scala new file mode 100644 index 0000000..b90e915 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/JSONConverter.scala @@ -0,0 +1,71 @@ +package io.joern.reporting + +import spray.json.{ + DefaultJsonProtocol, + JsArray, + JsBoolean, + JsObject, + JsString, + JsValue, + RootJsonFormat, + enrichAny +} + +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import scala.collection.mutable.ListBuffer + +object JSONConverter extends DefaultJsonProtocol { + + implicit val passWarningFormat: RootJsonFormat[PassWarning] = jsonFormat5( + PassWarning) + + implicit val passErrorFormat: RootJsonFormat[PassError] = jsonFormat6( + PassError) + + implicit object LocalDateTimeJSONFormat + extends RootJsonFormat[LocalDateTime] { + val sdf2: DateTimeFormatter = + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS") + + override def write(obj: LocalDateTime): JsValue = { + JsString(sdf2.format(obj)) + } + + override def read(json: JsValue): LocalDateTime = { + LocalDateTime.from(sdf2.parse(json.asInstanceOf[JsString].value)) + } + + } + + implicit val passReportFormat: RootJsonFormat[PassReport] = jsonFormat5( + PassReport) + + implicit val linkingReportFormat: RootJsonFormat[LinkingReport] = jsonFormat5( + LinkingReport) + + implicit object ReportFormat extends RootJsonFormat[Report] { + override def write(obj: Report): JsValue = { + JsObject( + "success" -> JsBoolean(obj.successWithinSpecs), + "passes" -> JsArray(obj.passReports.map(_.toJson).toVector), + "linker" -> obj.linkingReport.toJson + ) + } + + override def read(json: JsValue): Report = { + val content = json.asJsObject.fields + val lb = ListBuffer( + content("passes") + .asInstanceOf[JsArray] + .elements + .map(_.convertTo[PassReport]) + .toList: _*) + new Report( + lb, + Some(content("linker").asJsObject.convertTo[LinkingReport]) + ) + } + } + +} diff --git a/configAndReporting/src/main/scala/io/joern/reporting/Linking.scala b/configAndReporting/src/main/scala/io/joern/reporting/Linking.scala new file mode 100644 index 0000000..ce3a32c --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/Linking.scala @@ -0,0 +1,49 @@ +package io.joern.reporting + +import scala.collection.mutable.ListBuffer + +trait Linking { + + private val internalFunctions: ListBuffer[String] = new ListBuffer[String]() + private val linkedReferencedFunctions: ListBuffer[String] = + new ListBuffer[String]() + private val unlinkedReferencedFunctions: ListBuffer[String] = + new ListBuffer[String]() + private val linkedReferencedMethods: ListBuffer[String] = + new ListBuffer[String]() + private val unlinkedReferencedMethod: ListBuffer[String] = + new ListBuffer[String]() + + def unlinkedInternalFunction(str: String): Unit = + internalFunctions.synchronized { + internalFunctions.addOne(str) + } + + def linkedFunction(str: String): Unit = + linkedReferencedFunctions.synchronized { + linkedReferencedFunctions.addOne(str) + } + + def unableToLinkFunction(str: String): Unit = + unlinkedReferencedFunctions.synchronized { + unlinkedReferencedFunctions.addOne(str) + } + + def linkedMethod(str: String): Unit = linkedReferencedMethods.synchronized { + linkedReferencedMethods.addOne(str) + } + + def unableToLinkMethod(str: String): Unit = + unlinkedReferencedMethod.synchronized { + unlinkedReferencedMethod.addOne(str) + } + + def getLinkingReport: LinkingReport = LinkingReport( + internalFunctions.toSet.toList, + linkedReferencedFunctions.toSet.toList, + unlinkedReferencedFunctions.toSet.toList, + linkedReferencedMethods.toSet.toList, + unlinkedReferencedMethod.toSet.toList + ) + +} diff --git a/configAndReporting/src/main/scala/io/joern/reporting/LinkingReport.scala b/configAndReporting/src/main/scala/io/joern/reporting/LinkingReport.scala new file mode 100644 index 0000000..bf8dc43 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/LinkingReport.scala @@ -0,0 +1,7 @@ +package io.joern.reporting + +case class LinkingReport(internalFunctions: List[String], + linkedReferencedFunctions: List[String], + unlinkedDefinedFunctions: List[String], + linkedReferencedMethods: List[String], + unlinkedReferencedMethods: List[String]) diff --git a/configAndReporting/src/main/scala/io/joern/reporting/LogEvaluation.scala b/configAndReporting/src/main/scala/io/joern/reporting/LogEvaluation.scala new file mode 100644 index 0000000..3bf46a4 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/LogEvaluation.scala @@ -0,0 +1,63 @@ +package io.joern.reporting + +import de.halcony.argparse.{Parser, ParsingResult} + +import java.io.File +import java.time.temporal.ChronoUnit +import scala.collection.mutable.{ListBuffer, Map => MMap} +import scala.util.{Failure, Success, Try} + +object LogEvaluation { + + val parser: Parser = Parser("log-evaluation") + .addSubparser( + Parser("profile") + .addDefault[String]("layer", "log-profiler")) + + def profiler(pargs: ParsingResult): Unit = { + val folder = pargs.getValue[String]("rootFolder") + if (!new File(folder).isDirectory) { + throw new RuntimeException("You need to provide a proper folder") + } else { + val deltas: MMap[String, ListBuffer[Long]] = MMap() + new File(folder) + .listFiles() + //.map{elem => println(elem.getAbsolutePath); elem} + .filter(_.isFile) + //.map{elem => println(elem.getAbsolutePath); elem} + .filter(_.getPath.endsWith(".report")) + //.map{elem => println(elem.getAbsolutePath); elem} + .map(file => Try(Report.apply(file.getAbsolutePath))) + .filter { + case Failure(_) => false + case Success(_) => true + } + .map(_.get) + .foreach { report => + report.passReports.foreach { pass => + if (deltas.contains(pass.name)) { + deltas(pass.name) + .addOne(pass.start.until(pass.end, ChronoUnit.SECONDS)) + } else { + deltas.addOne( + pass.name -> ListBuffer( + pass.start.until(pass.end, ChronoUnit.SECONDS))) + } + } + } + deltas.foreach { + case (str, value) => + val avg = value.sum.toDouble / value.length.toDouble + val max = value.max + val sigma = Math.sqrt( + value + .map(num => (num.toDouble - avg) * (num.toDouble - avg)) + .sum / (value.length - 1)) + println( + s"$str ~ %.2f seconds (+/- %.2f/max %.2f)" + .format(avg, sigma, max.toDouble)) + } + } + } + +} diff --git a/configAndReporting/src/main/scala/io/joern/reporting/PassReport.scala b/configAndReporting/src/main/scala/io/joern/reporting/PassReport.scala new file mode 100644 index 0000000..61c70b2 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/PassReport.scala @@ -0,0 +1,32 @@ +package io.joern.reporting + +import java.time.LocalDateTime + +case class PassWarning(file: String, + line: String, + method: String, + bytecode: String, + description: String) { + + override def toString: String = { + s"{$file:$line}{$method}{$bytecode} $description" + } + +} + +case class PassError(file: String, + line: String, + method: String, + bytecode: String, + description: String, + stacktrace: String) { + override def toString: String = { + s"{$file:$line}{$method}{$bytecode} $description \n $stacktrace" + } +} + +case class PassReport(name: String, + errors: Seq[PassError], + warnings: Seq[PassWarning], + start: LocalDateTime, + end: LocalDateTime = LocalDateTime.now()) diff --git a/configAndReporting/src/main/scala/io/joern/reporting/Report.scala b/configAndReporting/src/main/scala/io/joern/reporting/Report.scala new file mode 100644 index 0000000..ac6628b --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/Report.scala @@ -0,0 +1,73 @@ +package io.joern.reporting + +import io.joern.reporting.JSONConverter._ +import spray.json._ + +import java.io.{File, FileWriter} +import scala.collection.mutable.ListBuffer + +class Report(val passReports: ListBuffer[PassReport] = ListBuffer(), + var linkingReport: Option[LinkingReport]) { + + def success: Boolean = !passReports.exists(_.errors.nonEmpty) + + /** tests whether the cpg creation was successfull within specs + * + * this test allows for parsing issues as well as {closure} errors in + * + * @return + */ + def successWithinSpecs: Boolean = { + passReports + .filter(_.name != "FileParser") // parsing errors are expected in large projects + .flatMap(_.errors) + .isEmpty + } + + def addReport(report: PassReport): Unit = { + passReports.synchronized { + passReports.addOne(report) + } + } + + def addReport(report: LinkingReport): Unit = { + linkingReport = Some(report) + } + + def writeToFile(file: String): Unit = { + val fileWriter = new FileWriter(new File(file)) + try { + fileWriter.write(this.toJson.prettyPrint) + } finally { + fileWriter.flush() + fileWriter.close() + } + } + + def prettyPrintErrors: String = { + val sb = new StringBuilder() + passReports.filter(_.errors.nonEmpty).foreach { report => + sb.append(report.name + "\n") + report.errors.foreach { err => + sb.append(err.toString + "\n") + } + } + sb.toString() + } + +} + +object Report { + + def apply(): Report = { + new Report(ListBuffer(), None) + } + + def apply(file: String): Report = { + val source = scala.io.Source.fromFile(file) + val lines = try source.mkString + finally source.close() + spray.json.JsonParser(lines).convertTo[Report] + } + +} diff --git a/configAndReporting/src/main/scala/io/joern/reporting/Reporting.scala b/configAndReporting/src/main/scala/io/joern/reporting/Reporting.scala new file mode 100644 index 0000000..87534e2 --- /dev/null +++ b/configAndReporting/src/main/scala/io/joern/reporting/Reporting.scala @@ -0,0 +1,157 @@ +package io.joern.reporting + +import io.shiftleft.codepropertygraph.generated.nodes.{CfgNode, Method} +import wvlet.log.LogSupport + +import java.io.{OutputStream, PrintStream} +import java.time.LocalDateTime +import scala.collection.mutable.ListBuffer + +case class ReportableError(file: String, + line: Int, + method: String, + bytecode: String, + description: String) + extends Throwable {} + +object ReportableError { + + def apply(method: Method, + cfgNode: CfgNode, + description: String): ReportableError = { + val lineNumber: Int = + cfgNode.lineNumber.getOrElse(Integer.getInteger("-1")).toInt + ReportableError(method.filename, + lineNumber, + method.fullName, + cfgNode.code, + description) + } + +} + +object Reporting { + + def stringifyErrorStack(x: Throwable): String = { + val str: StringBuilder = new StringBuilder() + val printTo = new PrintStream(new OutputStream { + override def write(b: Int): Unit = { + str.addOne(b.toChar) + } + override def toString: String = str.toString() + }) + x.printStackTrace(printTo) + str.toString() + } + +} + +trait Reporting extends LogSupport { + + val name: String + val start: LocalDateTime = LocalDateTime.now() + + val elb: ListBuffer[PassError] = new ListBuffer[PassError]() + val wlb: ListBuffer[PassWarning] = new ListBuffer[PassWarning]() + + private def withErrorReporting(method: Option[Method])( + func: => Unit): Unit = { + try { + func + } catch { + case err: ReportableError => reportError(err) + case thr: Throwable => + method match { + case Some(value) => + reportError(value.filename, + "NA", + value.fullName, + "NA", + thr.getMessage, + thr) + case None => + reportError("NA", "NA", "NA", "NA", thr.getMessage, thr) + } + } + } + + protected def withErrorReporting(method: Method)(func: => Unit): Unit = { + withErrorReporting(Some(method))(func) + } + + protected def withErrorReporting()(func: => Unit): Unit = { + withErrorReporting(None)(func) + } + + protected def reportWarning(file: String, + line: String, + method: String, + bytecode: String, + description: String): Unit = { + wlb.synchronized { + val warning = PassWarning(file, line, method, bytecode, description) + warn(warning) + wlb.addOne(warning) + } + } + + protected def reportWarning(method: Method, + cfgNode: CfgNode, + description: String): Unit = { + reportWarning( + method.filename, + cfgNode.lineNumber.getOrElse(Integer.getInteger("-1")).toString, + method.fullName, + cfgNode.code, + description) + } + + @deprecated( + "Pass the underlying throwable as well as last argument! Otherwise debugging is hard!") + protected def reportError(file: String, + line: String, + method: String, + bytecode: String, + description: String): Unit = { + reportError(file, line, method, bytecode, description, None) + } + + protected def reportError(file: String, + line: String, + method: String, + bytecode: String, + description: String, + e: Throwable): Unit = { + reportError(file, line, method, bytecode, description, Some(e)) + } + + private def reportError(file: String, + line: String, + method: String, + bytecode: String, + description: String, + e: Option[Throwable]): Unit = { + elb.synchronized { + val err = PassError(file, line, method, bytecode, description, e match { + case Some(value) => Reporting.stringifyErrorStack(value) + case None => "no error provided" + }) + error(err) + elb.addOne(err) + } + } + + protected def reportError(err: ReportableError): Unit = { + reportError(err.file, + err.line.toString, + err.method, + err.bytecode, + err.description, + Some(err)) + } + + def getReport: PassReport = { + PassReport(name, elb.toList, wlb.toList, start) + } + +} diff --git a/documentation/INIT_DYNAMIC_CALL.md b/documentation/INIT_DYNAMIC_CALL.md new file mode 100644 index 0000000..e4082d0 --- /dev/null +++ b/documentation/INIT_DYNAMIC_CALL.md @@ -0,0 +1,8 @@ +# INIT_DYNAMIC_CALL + +``` +INIT_DYNAMIC_CALL +``` + +Initiates the call to the function stored in ``. `` defines the amount of arguments passed to the +called function. \ No newline at end of file diff --git a/documentation/INIT_FCALL.md b/documentation/INIT_FCALL.md new file mode 100644 index 0000000..cf7213b --- /dev/null +++ b/documentation/INIT_FCALL.md @@ -0,0 +1,8 @@ +# INIT_FCALL + +``` +INIT_FCALL +``` + +Starts preparing a function call. `` defines the amount of arguments passed on. The use of `` is +unclear so far [TODO]. `` is the name of the function to be called. \ No newline at end of file diff --git a/documentation/INIT_FS_FCALL_BY_NAME.md b/documentation/INIT_FS_FCALL_BY_NAME.md new file mode 100644 index 0000000..4e7e433 --- /dev/null +++ b/documentation/INIT_FS_FCALL_BY_NAME.md @@ -0,0 +1,8 @@ +# INIT_FS_FCALL_BY_NAME + +``` +INIT_FS_FCALL_BY_NAME +``` + +Initiates a call to the function referenced by `` which is a fully qualified name including namespace. +`` defines the amount of arguments passed to the function. \ No newline at end of file diff --git a/documentation/INIT_METHOD_CALL.md b/documentation/INIT_METHOD_CALL.md new file mode 100644 index 0000000..a4ebf6b --- /dev/null +++ b/documentation/INIT_METHOD_CALL.md @@ -0,0 +1,8 @@ +# INIT_METHOD_CALL + +``` +INIT_METHOD_CALL (THIS | ) +``` + +Initiates a method call to the method named in `` on the object provided either by `THIS` or by ``. +`` defines the amount of arguments passed to the call. \ No newline at end of file diff --git a/documentation/INIT_STATIC_METHOD_CALL.md b/documentation/INIT_STATIC_METHOD_CALL.md new file mode 100644 index 0000000..73f2f16 --- /dev/null +++ b/documentation/INIT_STATIC_METHOD_CALL.md @@ -0,0 +1,20 @@ +# INIT_STATIC_METHOD_CALL + +``` +INIT_STATIC_METHOD_CALL +``` + +Initiates a call to the static method `` of class named `` with `` passed arguments. + +``` +INIT_STATIC_METHOD_CALL +``` + +Initiates a call to the static method `` with `` arguments passed on. TODO: the meaning of `` +and `` is unknown. + +``` +INIT_STATIC_METHOD_CALL CONSTRUCTOR +``` + +TODO: this call is not yet understood \ No newline at end of file diff --git a/documentation/NEW.md b/documentation/NEW.md new file mode 100644 index 0000000..e93198c --- /dev/null +++ b/documentation/NEW.md @@ -0,0 +1,9 @@ +# NEW + +``` +NEW ( | ) +``` + +Initiates the cration of a new object and basically acts like a function call start. The `` designates the +amount of arguments passed on to the constructor. `` or `` designates the class or classname that +is to be instantiated. \ No newline at end of file diff --git a/documentation/README.md b/documentation/README.md new file mode 100644 index 0000000..3a87b1a --- /dev/null +++ b/documentation/README.md @@ -0,0 +1,193 @@ +# Opcodes +The PHP bytecode CPG is based on PHP opcodes. Recent PHP versions are compiled from the PHP source code to an intermediare bytecode used by the PHP virtual machine. Check out [nikic's great blog](https://www.npopov.com) for a better introduction and deep-dive into the topic. + +This documentation is a **work in progress**. + +## Ressources and sources + +- https://www.npopov.com/2017/04/14/PHP-7-Virtual-machine.html +- https://stackoverflow.com/questions/36031490/what-is-ext-nop-and-ext-stmt-php-opcode +- https://www.npopov.com/2022/05/22/The-opcache-optimizer.html + - includes an explanation of the bitflags used below +- https://3v4l.org/ also offers opcode dumps + - If you want to find out when a particular opcodes is generated, Google it with `site:3v4l.org` to find usages. +- PHP src + - e.g. list of opcodes of PHP 7.4.28: https://github.com/php/php-src/blob/b3c6c35570283c457cd5ee6d56301a1bc7f2f8e6/Zend/zend_vm_opcodes.h +- PHP doc + - documents some opcodes, but tends to be inconclusive + +## Dumping the opcodes + +```shell +php -d zend_extension=opcache -d opcache.enable_cli=1 -d opcache.opt_debug_level=0x10000 --syntax-check +``` +Can be used to dump the opcodes. This is handy to get a feeling for PHP opcodes. + +## CPG limitations +Since the CPG is built from bytecode it can not implement everything up-to-spec. + +### Example: + +```php + 10) { + print("a"); +} +``` + +converts to: + +``` +$_main: ; (lines=7, args=0, vars=1, tmps=4) + ; (before optimizer) + ; if.php:1-6 +L0 (2): T1 = FETCH_R (global) string("_GET") +L1 (2): T2 = FETCH_DIM_R T1 string("x") +L2 (2): ASSIGN CV0($x) T2 +L3 (3): T4 = IS_SMALLER int(10) CV0($x) +L4 (3): JMPZ T4 L6 +L5 (4): ECHO string("a") +L6 (6): RETURN int(1) +``` + +which is then converted to a cpg. The bytecode instructions are modeled as calls. +The CPG only implements basic functionality from the spec, e.g it doesn't support constructs like `cpg.assignment`. + +```scala +joern> cpg.assignment.l +res0: List[operatorextension.OpNodes.Assignment] = List() +``` + +But that functionality can be queried via calls: +```scala +joern> cpg.call("ASSIGN").l +res1: List[Call] = List( + Call( + id -> 512409557603043114L, + argumentIndex -> -1, + argumentName -> None, + code -> "ASSIGN CV($x) T2", + columnNumber -> None, + dispatchType -> "STATIC_DISPATCH", + dynamicTypeHintFullName -> ArraySeq(), + lineNumber -> Some(value = 1), + methodFullName -> "", + name -> "ASSIGN", + order -> 2, + signature -> "", + typeFullName -> "" + ) +) +``` + +## Opcodes + +This is a WIP list of documented bytecodes. + +| opcode | argc | coflo | daflo | call | doc | comment | +|----------------------------|------|-------|-------|------|-------------------------------------|----------------------------------------------------------------------------| +| NEW | 2 | x | x | x | [yes](./NEW.md) | init creates obeject | +| INIT_FCALL | 3 | x | x | x | [yes](./INIT_FCALL.md) | init call function | +| INIT_METHOD_CALL | 3 | x | x | x | [yes](./INIT_METHOD_CALL.md) | init call method | +| INIT_NS_FCALL_BY_NAME | 2 | x | x | x | [yes](./INIT_FS_FCALL_BY_NAME.md) | init call a function referenced by namespace | +| INIT_DYNAMIC_CALL | 2 | x | x | x | [yes](./INIT_DYNAMIC_CALL.md) | init call a function saved in a variable | +| INIT_FCALL_BY_NAME | 2 | x | x | x | [todo] | init call a function | +| INIT_STATIC_METHOD_CALL | 3/4 | x | x | x | [yes](./INIT_STATIC_METHOD_CALL.md) | init call a static method | +| DO_FCALL | 0 | x | x | y | [todo] | perform latest call init | +| DO_ICALL | 0 | x | x | y | [todo] | perform latest call init | +| DO_UCALL | 0 | x | x | y | [todo] | perform latest call init | +| DO_FCALL_BY_NAME | 0 | x | x | y | [todo] | perform latest call for `INIT_FCALL_BY_NAME` | +| SEND_VAL | 2 | x | y | x | [todo] | send value (const, tmp) to function call | +| SEND_VAR | 2 | x | y | x | [todo] | send variable (CV, var) to function call | +| SEND_REF | | | | | | send argument by reference | +| SEND_VAR_EX | 2 | x | y | x | [todo] | variant of SEND_VAR but by-value or by-reference is determined at runtime | +| SEND_VAL_EX | 2 | x | y | x | [todo] | variant of SEND_VAL, but by-value or by-reference is determined statically | +| SEND_VAR_NO_REF_EX | 2 | x | y | x | [todo] | send pseudo variable, e.g. function call results | +| SEND_FUNC_ARG | 2 | x | y | x | [todo] | [todo] | +| ISSET_ISEMPTY_DIM_OBJ | 3 | x | x | x | no | checks if the dimension of obj is set | +| ISSET_ISEMPTY_CV | 2 | x | x | x | [todo] | [todo] | +| ISSET_ISEMPTY_PROP_OBJ | 3 | x | x | x | no | checks if the property of the object is set | +| FETCH_OBJ_FUNC_ARG | 2/3 | x | y | x | [todo] | [todo] | +| FETCH_STATIC_PROP_R | 3 | x | y | x | [todo] | [todo] | +| FETCH_STATIC_PROP_W | 4 | x | y | x | [todo] | [todo] | +| FETCH_STATIC_PROP_FUNC_ARG | 3 | x | y | x | [todo] | [todo] | +| FETCH_OBJ_R | 2 | x | y | x | [todo] | [todo] | +| FETCH_OBJ_W | 3 | x | y | x | [todo] | [todo] | +| FETCH_OBJ_IS | 2 | x | y | x | [todo] | [todo] | +| FETCH_DIM_R | 2 | x | y | x | [todo] | get value in array x1 at index x2 | +| FETCH_DIM_FUNC_ARG | 2 | x | y | x | [todo] | [todo] | +| FETCH_LIST_R | 2 | x | y | x | [todo] | [todo] | +| FETCH_CLASS_CONSTANT | 2/3 | x | y | x | [todo] | [todo] | +| FETCH_CLASS | 2 | x | y | x | [todo] | [todo] | +| FETCH_CONSTANT | 3 | x | y | x | [todo] | [todo] | +| FETCH_THIS | 0 | x | y | x | [todo] | [todo] | +| INIT_ARRAY | 3/4 | x | x | x | no | inits a new array | +| ADD_ARRAY_ELEMENT | 2 | x | y | x | [todo] | [todo] | +| CHECK_FUNC_ARG | 1 | x | x | x | [todo] | [todo] | +| RECV | 1 | x | y | x | [todo] | callsite opcode to receive arguments | +| RECV_INIT | 2 | x | y | x | [todo] | [todo] | +| JMPNZ | 2 | y | x | x | [todo] | jump to on non zero | +| JMPNZ_EX | 2 | y | x | x | [todo] | [todo] | +| JMPZ_EX | 2 | y | x | x | [todo] | [todo] | +| JMPZ | 2 | y | x | x | [todo] | jump to on zero | +| JMP | 1 | y | x | x | [todo] | jump to | +| JMPZNZ | 2 | y | x | x | [todo] | [todo] | +| FE_FETCH_R | 3 | n | y | x | [todo] | [todo] | +| FE_RESET_R | 2 | x | y | x | [todo] | [todo] | +| ASSIGN_DIM | 2 | x | y | x | [todo] | [todo] | +| ASSIGN | 2 | x | y | x | [todo] | [todo] | +| ASSIGN_OP | 3 | x | y | x | [todo] | [todo] | +| ASSIGN_STATIC_PROP | 1 | x | y | x | [todo] | [todo] | +| ASSIGN_OBJ | 2 | x | y | x | [todo] | [todo] | +| ROPE_INIT | 2 | x | x | x | [yes](./ROPE.md) | init rope string concatination | +| ROPE_ADD | 3 | x | y | x | [yes](./ROPE.md) | concat string to rope | +| ROPE_END | 3 | x | y | x | [yes](./ROPE.md) | concat string to rope and end rope concationation | +| CAST | 2 | x | x | x | no | casts the second value to the provided type | +| NOP | 0 | x | x | x | no | does nothing | +| BEGIN_SILENCE | 0 | x | x | x | [todo] | [todo] | +| EXT_STMT | 0 | x | x | x | [todo] | hook point for debuggers | +| EXT_NOP | 0 | x | x | x | [todo] | [todo] | +| RETURN | 1 | y | y | x | no | returns the given value | +| ECHO | 1 | x | x | x | no | echoes the given value | +| BW_NOT | 1 | x | x | x | [todo] | bitwise not | +| BOOL_NOT | 1 | x | x | x | no | inverts the given boolean value | +| QM_ASSIGN | 1 | x | y | x | [todo] | implements ternary operator (`? :`) | +| PRE_INC | 1 | x | y | x | no | increments given value | +| POST_INC | 1 | x | y | x | no | increments given value | +| PRE_DEC | 1 | x | y | x | no | decrements given value | +| POST_DEC | 1 | x | y | x | no | decrements given value | +| FREE | 1 | x | y | x | no | frees the given reference | +| PRINT | 1 | x | x | x | no | prints the given value | +| FE_FREE | 1 | x | y | x | [todo] | [todo] | +| END_SILENCE | 1 | x | x | x | [todo] | [todo] | +| BOOL | 1 | x | x | x | no | [todo] | +| OP_DATA | 1 | x | y | x | [todo] | carries additional param for previous bytecode | +| CATCH | 1 | x | x | x | no | catches the given type of exception | +| THROW | 1 | y | y | x | no | throws the given error object | +| CONCAT | 2 | x | x | x | [todo] | [todo] | +| FAST_CONCAT | 2 | x | x | x | [todo] | [todo] | +| ADD | 2 | x | x | x | no | add the two given values | +| SUB | 2 | x | x | x | no | subtract the two given values | +| MUL | 2 | x | x | x | no | multiply the two given values | +| DIV | 2 | x | x | x | no | divide the two given values | +| MOD | 2 | x | x | x | no | modulo the two given values | +| SL | 2 | x | x | x | no | shift bits left value2 by value1 count | +| SR | 2 | x | x | x | no | shift bits right value2 by value1 count | +| BW_OR | 2 | x | x | x | no | bitwise or the two values | +| BW_AND | 2 | x | x | x | no | bitwise and the two given values | +| BW_XOR | 2 | x | x | x | no | bitwise xor the two given values | +| BOOL_OR | 2 | x | x | x | no | or the two given values | +| IS_EQUAL | 2 | x | x | x | no | check if the two given values are equal | +| IS_NOT_EQUAL | 2 | x | x | x | no | check if the two given values ar enot equal | +| IS_IDENTICAL | 2 | x | x | x | no | check if the two given values are identical | +| IS_NOT_IDENTICAL | 2 | x | x | x | no | check if the two given values are not identical | +| IS_SMALLER | 2 | x | x | x | no | check if the first value is smaller | +| IS_SMALLER_OR_EQUAL | 2 | x | x | x | no | check if the first value is not smaller | +| BIND_GLOBAL | 2 | x | y | x | [todo] | create a global value | +| DECLARE_CLASS_DELAYED | 2 | x | x | x | [todo] | [todo] | +| UNSET_DIM | 2 | x | y | x | [todo] | unset the dimension value | +| INSTANCEOF | 2 | x | x | x | no | check if value2 is an instance of value1 | +| COALESCE | | | | | | implements null coalescing operator (`??`) | +| SPACESHIP | | | | | | implements spaceship operator (`<=>`) | + diff --git a/documentation/ROPE.md b/documentation/ROPE.md new file mode 100644 index 0000000..a1b221b --- /dev/null +++ b/documentation/ROPE.md @@ -0,0 +1,10 @@ +```php +echo ("https://${bar}$a"); +``` + +``` +T3 = ROPE_INIT 3 string("https://") +T3 = ROPE_ADD 1 T3 CV0($bar) +T2 = ROPE_END 2 T3 CV1($a) +ECHO T2 +``` diff --git a/layerByteCode/resources/byte-code-gen.py b/layerByteCode/resources/byte-code-gen.py new file mode 100755 index 0000000..668e768 --- /dev/null +++ b/layerByteCode/resources/byte-code-gen.py @@ -0,0 +1,46 @@ +#!/bin/env python3 + +from os import listdir, walk, mkdir +from os.path import isfile, join, isdir +from subprocess import Popen +import sys + +PHP_PATH = "/usr/bin/php" + +def createByteCodeFile(filePath, shadowFilePath): + with open(shadowFilePath,'w') as f: + process = Popen(["/usr/bin/php7", + "-d","opcache.enable_cli=1", + "-d","opcache.opt_debug_level=0x50000", + "-d","opcache.log_verbosity_level=0", + "--syntax-check", filePath], stdout=f, stderr=f) + exitCode = process.wait() + if exitCode != 0: + print("Converting file {} resulted in exit code {}".format(filePath, exitCode)) + + +def traverseFolder(baseFolder, shadowFolder): + print("traversing folder {}".format(baseFolder)) + for elems in listdir(baseFolder): + if isfile(join(baseFolder,elems)): + if join(baseFolder,elems).endswith(".php"): + createByteCodeFile(join(baseFolder, elems), + join(shadowFolder, "{}.byte".format(elems))) + if isdir(join(baseFolder,elems)): + if(shadowFolder != baseFolder): + mkdir(join(shadowFolder, elems)) + traverseFolder(join(baseFolder, elems), + join(shadowFolder, elems)) + + +def main(baseFolder, shadowFolder): + traverseFolder(baseFolder, shadowFolder) + + +if(len(sys.argv) != 3): + print("./byte-code-gen.py [projectFolder] [shadowFolder]") + print(" [projectFolder] the folder of the project to be converted to bytecode") + print(" [shadowFolder] the folder in which the byte code files should be saved") + print(" and imitates the folder structure of [projectFolder]") +else: + main(sys.argv[1], sys.argv[2]) diff --git a/layerByteCode/resources/fileProcessor.py b/layerByteCode/resources/fileProcessor.py new file mode 100755 index 0000000..61f47d4 --- /dev/null +++ b/layerByteCode/resources/fileProcessor.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +import argparse +import os + +def __parse_file(file_name): + data = [] + with open(file_name, "r") as file: + for line in file: + data.append(line) + return data + +def __write_file(data, file_name): + with open(file_name, "w") as file: + file.writelines(data) + file.flush() + os.system("sync") + +def __replace(data, constant, file): + processed_data = [] + for line in data: + # Check if a line contains an uneven number of single quotes + if line.count("\'") % 2 != 0: + #exit(1) + print("WARNING! There is a uneven number of single quotes in this line of file {}. We leave this line as it is and don't process it".format(file)) + processed_data.append(line) + continue + + # Process line + second = -1 + while line[second +1: len(line)].find("\'") != -1: + # Find first single quote + first = line.find("\'", second+1) + # Find second single quote + second = line.find("\'", first+1) + # Search for double quotes in between the first and second single quote and replace them with the constant + line = line[:first] + line[first:second].replace("\"", constant) + line[second:] + processed_data.append(line) + return processed_data + + + +def process(file_name, constant): + """ + Replaces all double quotes which are wrapped in single code by a constant.\n + Parameters + ---------- + file_name : str\n + File to be processed\n + constant : str\n + Constant by which double quotes in single quotes shall be replaced\n + Returns + ---------- + str\n + Path to the processed file + """ + #print("processing") + data = __parse_file(file_name) + #print(data) + data = __replace(data, constant, file_name) + #print(data) + + #__write_file(data, file_name + ".processed") + __write_file(data, file_name) + #return file_name + ".processed" + + +def unprocess(file_name,constant): + # print('sed -i "s/{}/\\"/g" {}'.format(constant,file_name)) + os.system('sed -i "s/{}/\\"/g" {}'.format(constant,file_name)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("file", help="file to be processed") + parser.add_argument("constant", help="constant by which double quotes in single quotes shall be replaced") + parser.add_argument("--revert",action="store_true") + args = parser.parse_args() + #print("processing {}".format(args.file)) + if args.revert : + unprocess(args.file, args.constant) + else : + process(args.file, args.constant) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/layerByteCode/resources/integrationtesting/php8_1.php b/layerByteCode/resources/integrationtesting/php8_1.php new file mode 100644 index 0000000..f815ad7 --- /dev/null +++ b/layerByteCode/resources/integrationtesting/php8_1.php @@ -0,0 +1,16 @@ + $o?->f(), + 2 => htmlspecialchars($string, double_encode: false), + 3 => $c("a"), + }; + match ($a + 1) { + 2 => print(1), + }; + exit(1); +} +f(); diff --git a/layerByteCode/resources/semantic/default.semantics b/layerByteCode/resources/semantic/default.semantics new file mode 100644 index 0000000..e09919a --- /dev/null +++ b/layerByteCode/resources/semantic/default.semantics @@ -0,0 +1,8 @@ +"ASSIGN" 2->1 +"=" 2->1 +ASSIGN_DIM ?->1 +ASSIGN_OP 3->2 +POST_DEC 1->1 +PRE_DEC 1->1 +POST_INC 1->1 +PRE_INC 1->1 \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/basicConditional/main.php b/layerByteCode/resources/unittesting/testprojects/basicConditional/main.php new file mode 100644 index 0000000..6de4f4d --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/basicConditional/main.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/functionDefinedTwice/bar.php b/layerByteCode/resources/unittesting/testprojects/functionDefinedTwice/bar.php new file mode 100644 index 0000000..515498e --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/functionDefinedTwice/bar.php @@ -0,0 +1,11 @@ +getEmitter(); + } +} diff --git a/layerByteCode/resources/unittesting/testprojects/guzzleunknown/HasEmitterTrait.php b/layerByteCode/resources/unittesting/testprojects/guzzleunknown/HasEmitterTrait.php new file mode 100644 index 0000000..395f256 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/guzzleunknown/HasEmitterTrait.php @@ -0,0 +1,12 @@ +getEmitter(); + } +} diff --git a/layerByteCode/resources/unittesting/testprojects/inheritance/classA.php b/layerByteCode/resources/unittesting/testprojects/inheritance/classA.php new file mode 100644 index 0000000..474890b --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/inheritance/classA.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/inheritance/classB.php b/layerByteCode/resources/unittesting/testprojects/inheritance/classB.php new file mode 100644 index 0000000..da42581 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/inheritance/classB.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/first.php b/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/first.php new file mode 100644 index 0000000..3d4bac8 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/first.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/second.php b/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/second.php new file mode 100644 index 0000000..6e8e529 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/multipleFilesProject/second.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/nameCollision/a.php b/layerByteCode/resources/unittesting/testprojects/nameCollision/a.php new file mode 100644 index 0000000..756e487 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/nameCollision/a.php @@ -0,0 +1,16 @@ +foo(); + } +} diff --git a/layerByteCode/resources/unittesting/testprojects/nameCollision/b.php b/layerByteCode/resources/unittesting/testprojects/nameCollision/b.php new file mode 100644 index 0000000..8b4375b --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/nameCollision/b.php @@ -0,0 +1,8 @@ +foo(); diff --git a/layerByteCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php b/layerByteCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php new file mode 100644 index 0000000..e05fdd3 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/parsingSideCases/main.php b/layerByteCode/resources/unittesting/testprojects/parsingSideCases/main.php new file mode 100644 index 0000000..5420348 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/parsingSideCases/main.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/singleClassProject/main.php b/layerByteCode/resources/unittesting/testprojects/singleClassProject/main.php new file mode 100644 index 0000000..868fb51 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/singleClassProject/main.php @@ -0,0 +1,22 @@ + test($rest); + test($rest); + } + } + + $var = new Basic("value"); + $var->test2("other"); + ?> \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/trivialNameSpaceUsage/main.php b/layerByteCode/resources/unittesting/testprojects/trivialNameSpaceUsage/main.php new file mode 100644 index 0000000..9f109e8 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/trivialNameSpaceUsage/main.php @@ -0,0 +1,36 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/twoFunctionsAndMain/main.php b/layerByteCode/resources/unittesting/testprojects/twoFunctionsAndMain/main.php new file mode 100644 index 0000000..abd21bf --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/twoFunctionsAndMain/main.php @@ -0,0 +1,12 @@ + \ No newline at end of file diff --git a/layerByteCode/resources/unittesting/testprojects/twoInternalFunctionsOneNamespace/a.php b/layerByteCode/resources/unittesting/testprojects/twoInternalFunctionsOneNamespace/a.php new file mode 100644 index 0000000..786df43 --- /dev/null +++ b/layerByteCode/resources/unittesting/testprojects/twoInternalFunctionsOneNamespace/a.php @@ -0,0 +1,3 @@ + + (new FileParser7(files, config.php7.interpreter, config.php7.phpini), + config.php7.interpreter) + case io.joern.bytecode.parser.PHPVersion.V8 => + (new FileParser8(files, + config.php8.interpreter, + config.php8.phpini, + config.strictParsing), + config.php8.interpreter) + } + } + // check if interpreter exists + try { + s"$interpreter --version".!(ProcessLogger(_ => (), err => println(err))) + } catch { + case _: Exception => + println( + s"Couldn't run `$interpreter --version`. Did you configure the interpreter path correctly?") + sys.exit(1) + } + + // check if it is the right version + val version_check_output = s"${version match { + case PHPVersion.V7 => config.php7.interpreter + case PHPVersion.V8 => config.php8.interpreter + }} --version".!! + val is_expected_version = version match { + case PHPVersion.V7 => + version_check_output.startsWith(PHP7_EXPECTED_VSTRING) + case PHPVersion.V8 => + version_check_output.startsWith(PHP8_EXPECTED_VSTRING) + } + if (!is_expected_version) { + error("Unexpected PHP version detected!") + version match { + case PHPVersion.V7 => + error( + "expected " + PHP7_EXPECTED_VSTRING + " but got: " + version_check_output + .slice(0, 10)) + case PHPVersion.V8 => + error( + "expected " + PHP8_EXPECTED_VSTRING + " but got " + version_check_output + .slice(0, 10)) + } + throw new RuntimeException("Unexpected PHP version") + } + + val filesMethodDefinitionPairs = fileParser.run() + report.addReport(fileParser.getReport) + if (config.verbose) info("FileParser done.") + + if (config.passes.contains("metadata")) { + val mdp = new MetaDataPass(cpg, keyPools.next()) + mdp.createAndApply() + report.addReport(mdp.getReport) + } else { + warn("MetaDataPass has been deactivated") + } + + // creating the basic source code AST, in method packages + if (config.passes.contains("methodscreation")) { + val mcp = + new MethodsCreationPass(filesMethodDefinitionPairs, + cpg, + keyPools.next()) + mcp.createAndApply() + report.addReport(mcp.getReport) + if (config.verbose) info("Method creation pass done.") + } else { + warn("MethodsCreationPass has been deactivated") + } + + // create the newly introduced different types + if (config.passes.contains("typecreation")) { + val tcp = + new TypeCreationPass(TypeCreationPass.getTypeIndicatingNodes(cpg), cpg) + tcp.createAndApply() + report.addReport(tcp.getReport) + } else { + warn("TypeCreationPass has been deactivated") + } + + if (config.passes.contains("cfgintrabbcreation")) { + val cintrabcp = new CfgIntraBBCreationPass(filesMethodDefinitionPairs, + cpg, + keyPools.next()) + cintrabcp.createAndApply() + report.addReport(cintrabcp.getReport) + } else { + warn("CfgIntraBBCreationPass has been deactivated") + } + + if (config.passes.contains("cfginterbbcreation")) { + val cinterbcp = new CfgInterBBCreationPass(filesMethodDefinitionPairs, + cpg, + keyPools.next()) + cinterbcp.createAndApply() + report.addReport(cinterbcp.getReport) + } else { + warn("CfgInterBBCreationPass has been deactivated") + } + if (config.verbose && config.passes.contains("cfgintrabbcreation")) + info("CFG creation done.") + + if (config.passes.contains("deleteunreachablecode")) { + val ducp = + new DeleteUnreachableCodePass(cpg.method.l, cpg, keyPools.next()) + ducp.createAndApply() + report.addReport(ducp.getReport) + if (config.verbose) info("DCE done.") + } else { + warn("DeleteUnreachableCodePass has been deactivated") + } + + if (config.passes.contains("dominator")) { + val dp = new DominatorPass(cpg.method.l, cpg, keyPools.next()) + dp.createAndApply() + report.addReport(dp.getReport) + } else { + warn("DominatorPass has been deactivated") + } + + if (config.passes.contains("postdominator")) { + val dpp = new DominatorPass(cpg.method.l, cpg, keyPools.next(), true) + dpp.createAndApply() + report.addReport(dpp.getReport) + } else { + warn("PostDominatorPass has been deactivated") + } + + if (config.verbose && config.passes.contains("dominator") || config.passes + .contains("postdominator")) info("Dominator(s) done.") + + if (config.passes.contains("datadependency")) { + val ddp = new DataDependencyPass(cpg.method.l, cpg, keyPools.next()) + ddp.createAndApply() + report.addReport(ddp.getReport) + info("DDG done.") + val aaep = new AddArrayElementPass(cpg.method.l, cpg, keyPools.next()) + aaep.createAndApply() + report.addReport(aaep.getReport) + info("DDG-AAE done.") + } else { + warn("DataDependencyPass has been deactivated") + } + + // this is basically a fixpoint iteration until all empty opcodes are deleted + // horribly bad in terms of performance and style ... but it works .. hopefully + // @Fabian: Is this smart? I am not really sure what the keypool does + if (config.passes.contains("deleteemptyopcodes")) { + val deleteKeyPool = keyPools.next() + var somethingChanged = false + do { + val deop = new DeleteEmptyOpcodesPass( + DeleteEmptyOpcodesPass.getMethodDeclarations(cpg), + cpg, + deleteKeyPool) + deop.createAndApply() + somethingChanged = deop.somethingChanged + report.addReport(deop.getReport) + } while (somethingChanged) + if (config.verbose) info("Empty opcode deletion done.") + } else { + warn("DeleteEmptyOpcodesPass has been deactivated") + } + + if (config.passes.contains("deleteemptymethods")) { + val demd = new DeleteEmptyMethodDeclarations( + DeleteEmptyMethodDeclarations.getMethodDeclarations(cpg), + cpg, + keyPools.next()) + demd.createAndApply() + } else { + warn("DeleteEmptyMethodsPass has been deactivated") + } + + if (config.passes.contains("inheritance")) { + val ip = new InheritancePass( + InheritancePass.getInheritanceIndicatingCalls(cpg), + cpg, + keyPools.next()) + ip.createAndApply() + report.addReport(ip.getReport) + } else { + warn("InheritancePass has been deactivated") + } + + // we first need to create possible stub methods + if (config.passes.contains("createstubmethods")) { + val stub = + new CreateStubMethodNodesPass(cpg, + keyPools.next(), + strict = config.strictLinking, + interpreter) + stub.createAndApply() + report.addReport(stub.getReport) + report.addReport(stub.getLinkingReport) + info("Stub creation done.") + } else { + warn("CreateStubMethodsPass has been deactivated") + } + + // for call linking to work properly + if (config.passes.contains("callfinishing")) { + val call = + new CallFinishingPass(cpg.method.l, + cpg, + keyPools.next(), + strict = config.strictLinking) + call.createAndApply() + report.addReport(call.getReport) + info("Call finishing pass done.") + } else { + warn("CallFinishingPass has been deactivated") + } + + if (config.passes.contains("namespacemembercreation")) { + val nmcp = new NamespaceMemberCreationPass( + NamespaceMemberCreationPass.getNamespaceMemberRelevantFunctions(cpg), + cpg, + keyPools.next()) + nmcp.createAndApply() + report.addReport(nmcp.getReport) + } else { + warn("NamespaceMemberCreationPass has been deactivated") + } + + if (config.passes.contains("localidentification")) { + val lip = new LocalIdentificationPass( + LocalIdentificationPass.getRelevantMethodDeclarations(cpg), + cpg, + keyPools.next()) + lip.createAndApply() + report.addReport(lip.getReport) + } else { + warn("LocalIdentificationPass has been deactivated") + } + + // Enhancements: + val context = new LayerCreatorContext(cpg) + val phpscpg = new PhpScpg() + phpscpg.run(context) + report.addReport(phpscpg.getReport) + if (config.verbose) info("Finished CPG population.") + cpg + } + + def getFinalReport: Report = { + report.addReport(this.getReport) + report + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/layers/PhpScpg.scala b/layerByteCode/src/main/scala/io/joern/bytecode/layers/PhpScpg.scala new file mode 100644 index 0000000..6e718a2 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/layers/PhpScpg.scala @@ -0,0 +1,71 @@ +package io.joern.bytecode.layers + +import io.joern.bytecode.passes.ParamInterpretPass +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.CpgPassBase +import io.shiftleft.semanticcpg.layers.{ + LayerCreator, + LayerCreatorContext, + LayerCreatorOptions +} +import io.shiftleft.semanticcpg.passes.base.{ContainsEdgePass, NamespaceCreator} + +object PhpScpg { + val overlayName: String = "phpsemanticcpg" + val description: String = "linked code property graph (OSS-PHP)" + + def defaultOpts = new LayerCreatorOptions() +} + +class PhpScpg(optionsUnused: LayerCreatorOptions = null) + extends LayerCreator + with Reporting { + + override val name = "PhpScpg" + override val overlayName: String = PhpScpg.overlayName + override val description: String = PhpScpg.description + + override def create(context: LayerCreatorContext, + serializeInverse: Boolean): Unit = { + withErrorReporting() { + val cpg = context.cpg + val enhancementExecList = createEnhancementExecList(cpg) + enhancementExecList.zipWithIndex.foreach { + case (pass, index) => + val serializedCpg = + initSerializedCpg(context.outputDir, pass.name, index) + pass.createApplySerializeAndStore(serializedCpg, serializeInverse) + serializedCpg.close() + } + } + } + + private def createEnhancementExecList(cpg: Cpg): Iterator[CpgPassBase] = { + Iterator( + //new MethodDecoratorPass(cpg), + // new CapturingLinker(cpg), // was removed in https://github.com/ShiftLeftSecurity/codepropertygraph/pull/1255 + // new Linker(cpg), + // caller linker does not work as call start (with the name) and call end + // are not the same and each opcode is an unlinkable call + //new StaticCallLinker(cpg), + //new DynamicCallLinker(cpg), + //new MethodRefLinker(cpg), + // new MemberAccessLinker(cpg), + // new MethodExternalDecoratorPass(cpg), // removed in https://github.com/ShiftLeftSecurity/codepropertygraph/pull/1489/ + new ContainsEdgePass(cpg), + new NamespaceCreator(cpg), + // I rather implement this on my own to get a grip on the underlying algorithm + // and to ease debugging in case something does not work (which doesn't as I am getting errors) + // new CfgDominatorPass(cpg), + // no idea what this is doing + // new CdgPass(cpg), + new ParamInterpretPass(cpg), + ) + } + + /* 2021-26-11: probe was removed in https://github.com/ShiftLeftSecurity/codepropertygraph/pull/1488 + override def probe(cpg: Cpg): Boolean = { + cpg.graph.nodes(NodeTypes.METHOD_PARAMETER_OUT).hasNext + }*/ +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/FileParser.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/FileParser.scala new file mode 100644 index 0000000..67b456b --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/FileParser.scala @@ -0,0 +1,10 @@ +package io.joern.bytecode.parser + +import io.joern.bytecode.parser.constructs.MethodDefinitionPair +import io.joern.reporting.Reporting + +trait FileParser extends Reporting { + + def run(): List[Seq[MethodDefinitionPair]] + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/PHPVersion.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/PHPVersion.scala new file mode 100644 index 0000000..019cd8a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/PHPVersion.scala @@ -0,0 +1,14 @@ +package io.joern.bytecode.parser + +import io.joern.bytecode.parser.PHPVersion.{V7, V8} + +object PHPVersion extends Enumeration { + type PHPVersion = Value + val V7, V8 = Value +} + +trait PHPVersions { + + def getPhpVersions = Set(V7, V8) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/ByteCodeDumpBlocks.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/ByteCodeDumpBlocks.scala new file mode 100644 index 0000000..b614570 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/ByteCodeDumpBlocks.scala @@ -0,0 +1,75 @@ +package io.joern.bytecode.parser.constructs + +case class ExceptionTable(tableEntry: Seq[ExceptionTableLine]) + +case class LiveRanges(rangesEntry: Seq[LiveRangeLine]) + +abstract class MethodDefinition(name: String, + classname: Option[String], + namespace: Option[String]) { + + val lines: Int + val args: Int + val vars: Int + val tmps: Int + val parsingMetaInfo: Seq[String] + val fileName: String + val lineStart: Int + val lineEnd: Int + + def fullyQualifiedName: String = { + val nspace = namespace match { + case Some(name) => name + "\\" + case None => "" + } + val cname = classname match { + case Some(name) => name + "::" + case None => "" + } + s"$nspace$cname$name" + } +} + +case class ByteCodeDefinitionsBlock(name: String, + classname: Option[String], + namespace: Option[String], + lines: Int, + args: Int, + vars: Int, + tmps: Int, + parsingMetaInfo: Seq[String], + fileName: String, + lineStart: Int, + lineEnd: Int, + rangeLine: Option[String], + instructions: Seq[InstructionLine], + liveRanges: Option[LiveRanges], + exceptionTable: Option[ExceptionTable]) + extends MethodDefinition(name, classname, namespace) + +case class BasicBlock(number: Int, + attributes: Seq[String], + firstInstruction: Int, + lastInstruction: Int, + instructions: Seq[InstructionLine], + followedBy: Option[Seq[Int]]) + +case class ControlFlowDefinitionsBlock(name: String, + classname: Option[String], + namespace: Option[String], + lines: Int, + args: Int, + vars: Int, + tmps: Int, + parsingMetaInfo: Seq[String], + fileName: String, + lineStart: Int, + lineEnd: Int, + rangeLine: Option[String], + blocks: Seq[BasicBlock], + liveRanges: Option[LiveRanges], + exceptionTable: Option[ExceptionTable]) + extends MethodDefinition(name, classname, namespace) + +case class MethodDefinitionPair(byteCodeBlock: ByteCodeDefinitionsBlock, + controlFlowBlock: ControlFlowDefinitionsBlock) diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/InstructionLine.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/InstructionLine.scala new file mode 100644 index 0000000..9245647 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/InstructionLine.scala @@ -0,0 +1,82 @@ +package io.joern.bytecode.parser.constructs + +//import ByteCodeParser._ +sealed trait ByteCodeDumpLine + +case class ExceptionTableLine(content: List[String]) extends ByteCodeDumpLine {} + +case class LiveRangeLine(varNum: Int, start: Int, end: Int, rangeType: String) + extends ByteCodeDumpLine + +case class MethodHeader(var name: String, + var classname: Option[String], + var namespace: Option[String], + lines: Int, + args: Int, + vars: Int, + tmps: Int) + extends ByteCodeDumpLine { + // PHP treats class names and namespace names case insensitive + classname match { + case Some(name) => classname = Some(name.toLowerCase) + case None => + } + namespace match { + case Some(name) => namespace = Some(name.toLowerCase) + case None => + } + //this is required as the path traversals for the cpg use regexp and $ is a regexp special character + if (name == "$_main") { + name = "DLR_main" + } else { + assert(name != "DLR_main") + } + name = name.toLowerCase +} + +case class MethodHeaderMetaParsingInfo(metaInfo: Seq[String]) + extends ByteCodeDumpLine + +case class MethodHeaderMetaFileInfo(fileName: String, + lineStart: Int, + lineEnd: Int) + extends ByteCodeDumpLine + +object MethodHeaderMetaFileInfo { + + def apply(line: String): MethodHeaderMetaFileInfo = { + val split: Array[String] = line.split(":") + val (lstart, lend) = split.last.split("-").toList match { + case start :: end :: Nil => + (start.toInt, end.toInt) + case x => + throw new RuntimeException(s"bad method header meta file info $x") + } + val file = split.slice(0, split.length - 1).mkString(":") + MethodHeaderMetaFileInfo(file, lstart, lend) + } + +} + +case class MethodHeaderRangeLine(string: String) extends ByteCodeDumpLine + +case class BBDefinitionLine(number: Int, + attributes: Seq[String], + firstInstruction: Int, + lastInstruction: Int) + extends ByteCodeDumpLine + +sealed trait Instruction + +case class InstructionLine(opNumber: Option[Integer], + var fileLine: Option[Integer], + instruction: Instruction) + extends ByteCodeDumpLine + +case class Assignment(lhs: Variable, rhs: Opcode) extends Instruction { + override def toString: String = "ASSIGN" +} + +case class Operation(op: Opcode) extends Instruction { + override def toString: String = op.code +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Opcodes.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Opcodes.scala new file mode 100644 index 0000000..e607cda --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Opcodes.scala @@ -0,0 +1,105 @@ +package io.joern.bytecode.parser.constructs + +sealed trait Opcode { + val code: String +} + +case class NoValueOperation(code: String) extends Opcode + +case class SingleValueOperation(code: String, value: Value) extends Opcode + +case class DualValueOperation(code: String, lhs: Value, rhs: Value) + extends Opcode + +case class TripleValueOperation(code: String, + first: Value, + second: Value, + third: Value) + extends Opcode + +case class QuadrupleValueOperation(code: String, + first: Value, + second: Value, + third: Value, + fourth: Value) + extends Opcode + +case class QuintupleValueOperation(code: String, + first: Value, + second: Value, + third: Value, + fourth: Value, + fifth: Value) + extends Opcode + + +case class MatchOpcode(code: String, + matchee: Variable, + values: Seq[KeyValuePair], + default: String) + extends Opcode + + +case class INIT_FCALL(paramCount: Int, var2: Int, var function: StringLiteral) + extends Opcode { + val code = "INIT_FCALL" + function = StringLiteral(function.value.toLowerCase()) +} + +case class INIT_DYNAMIC_CALL(paramCount: Int, variable: Variable) + extends Opcode { + val code = "INIT_DYNAMIC_CALL" +} + +case class INIT_FCALL_BY_NAME(paramCount: Int, var function: String) + extends Opcode { + val code = "INIT_FCALL_BY_NAME" + function = function.toLowerCase +} + +case class INIT_METHOD_CALL(paramCount: Int, + objectVar: Variable, + var method: Value) + extends Opcode { + val code = "INIT_METHOD_CALL" + method = method match { + case StringLiteral(x) => StringLiteral(x.toLowerCase()) + case x: Variable => x + case _ => + throw new RuntimeException(s"unexpected value as method $method") + } +} + +case class INIT_NS_FCALL_BY_NAME(paramCount: Int, var function: String) + extends Opcode { + val code = "INIT_FS_FCALL_BY_NAME" + function = function.toLowerCase +} + +case class INIT_STATIC_METHOD_CALL(paramCount: Int, + firstKeyWord: Option[Value], + secondKeyword: Option[Value], + var baseClass: Option[Value], + var method: Value) + extends Opcode { + val code = "INIT_STATIC_METHOD_CALL" + baseClass = baseClass match { + case Some(name: StringLiteral) => + Some(StringLiteral(name.value.toLowerCase)) + case x => x + } + method = method match { + case StringLiteral(value) => StringLiteral(value.toLowerCase()) + case x => x + } +} + +case class INIT_USER_CALL(param_count: Int, + func_type: StringLiteral, + act_on: Value) + extends Opcode { + val code = "INIT_USER_CALL" +} + +case class SWITCH(code: String, value: Value, switches: Seq[(String, Int)]) + extends Opcode diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Values.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Values.scala new file mode 100644 index 0000000..bad9f66 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/constructs/Values.scala @@ -0,0 +1,97 @@ +package io.joern.bytecode.parser.constructs + +import spray.json._ +import spray.json.{DefaultJsonProtocol, RootJsonFormat} +import spray.json.DefaultJsonProtocol._ + +sealed trait Value + +case class Variable(name: String, tmp: Boolean, reference: Boolean = false) + extends Value + +case class ArrayValue(content: Option[List[ArrayKeyValuePair]]) extends Value + +case class AssignOpLiteral(value: String) extends Value + +case class StringLiteral(value: String) extends Value + +case class IntegerLiteral(value: Long) extends Value + +case class FloatLiteral(value: Float) extends Value + +case class BooleanLiteral(value: Boolean) extends Value + +case class TryCatchLiteral(value: Int) extends Value + +case class Null() extends Value + +case class Zval(ttype: Int) extends Value + +case class ByteCodeKeyword(value: String) extends Value + +case class ByteCodePlaceIndicator(value: String) extends Value + +case class ByteCodeConstructor() extends Value + +case class KeyValuePair(key: Either[Int, String], value: String) extends Value + +case class ArrayKeyValuePair(key: Either[Int, String], value: Value) + extends Value + +case class DefaultKeyValuePair(value: String) extends Value + +object MyJsonProtocol extends DefaultJsonProtocol { + + implicit object keyFormat extends RootJsonFormat[Either[Int, String]] { + override def write(obj: Either[Int, String]): JsValue = obj match { + case Left(value) => value.toString.toJson + case Right(value) => value.toJson + } + + override def read(json: JsValue): Either[Int, String] = ??? + } + + implicit object valueFormat extends RootJsonFormat[Value] { + override def write(obj: Value): JsValue = obj match { + case Variable(name, tmp, ref) => + (if (ref || tmp) s"$name" else "CV($" + name + ")").toJson + case av: ArrayValue => arrayValueFormat.write(av) + case AssignOpLiteral(value) => value.toJson + case StringLiteral(value) => value.toJson + case IntegerLiteral(value) => value.toJson + case FloatLiteral(value) => value.toJson + case BooleanLiteral(value) => value.toJson + case TryCatchLiteral(value) => value.toJson + case Null() => JsNull + case Zval(ttype) => ttype.toJson + case ByteCodeKeyword(value) => value.toJson + case ByteCodePlaceIndicator(value) => value.toJson + case ByteCodeConstructor() => JsString("CONSTRUCTOR") + case KeyValuePair(key, value) => { + key -> value + }.toJson + case _: ArrayKeyValuePair => ??? + case DefaultKeyValuePair(value) => value.toJson + } + + override def read(json: JsValue): Value = ??? + } + + implicit object arrayValueFormat extends RootJsonFormat[ArrayValue] { + override def write(obj: ArrayValue): JsValue = { + obj.content match { + case Some(value) => + value + .flatMap(x => + Map { + x.key -> x.value + }) + .toMap + .toJson + case None => "PHP2CPG-NESTED-ARRAY-LIMITATION".toJson + } + } + + override def read(json: JsValue): ArrayValue = ??? + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Basics.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Basics.scala new file mode 100644 index 0000000..2de4c4a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Basics.scala @@ -0,0 +1,37 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ + +object Basics { + + def anyNonCapitalLetter[_: P]: P[Unit] = CharIn("a-z") + + def anyCapitalLetter[_: P]: P[Unit] = CharIn("A-Z") + + def anyLetter[_: P]: P[Unit] = P(anyNonCapitalLetter | anyCapitalLetter) + + def anyNumber[_: P]: P[Unit] = CharIn("0-9") + + //def legalIdentifier[_: P]: P[Unit] = + // P("_".? ~ (anyLetter | anyLetter | anyNumber | "_").rep) + def legalIdentifier[_: P]: P[Unit] = + P((!(" " | ")" | "(") ~ AnyChar).rep) + + def legalFileIdentifier[_: P]: P[Unit] = + P((anyLetter | anyNumber | " " | "_" | "-" | ".").rep) + + def nonEscapedAnyChar[_: P]: P[Unit] = P(!"\\" ~ AnyChar) + + def escapedQuotation[_: P]: P[Unit] = P("\\\"") + + def escapedSlash[_: P]: P[Unit] = P("\\") + + def whiteSpaceAndWhiteSpaceControl[_: P]: P[Unit] = + P('\n'.toString | '\t'.toString | CharIn(" ")) + + def legalString[_: P]: P[Unit] = + P("\"" ~ + (!"\"" ~ (escapedQuotation | whiteSpaceAndWhiteSpaceControl | escapedSlash | nonEscapedAnyChar)).rep + ~ "\"") +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ByteCodeBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ByteCodeBlock.scala new file mode 100644 index 0000000..ed64e77 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ByteCodeBlock.scala @@ -0,0 +1,60 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics._ +import io.joern.bytecode.parser.php7.ExceptionTableBlock._ +import io.joern.bytecode.parser.php7.HeaderBlock._ +import io.joern.bytecode.parser.php7.Instructions._ +import io.joern.bytecode.parser.php7.LiveRangesBlock._ + +object ByteCodeBlock { + + def parseLineNumber[_: P]: P[String] = P("L" ~ anyNumber.rep.!) + def getLineNumber[_: P]: P[Integer] = + P(parseLineNumber.map(x => Integer.valueOf(x))) + + def parseFileLineNumber[_: P]: P[String] = P("(" ~ anyNumber.rep.! ~ ")") + def getFileLineNUmber[_: P]: P[Integer] = + P(parseFileLineNumber.map(x => Integer.valueOf(x))) + + def parseDefiningInstructionLine[_: P]: P[(Integer, Integer, Instruction)] = + P(getLineNumber ~ " " ~ getFileLineNUmber ~ ":" ~/ " ".rep ~ getInstruction) + def getDefiningInstructionLine[_: P]: P[InstructionLine] = + P(parseDefiningInstructionLine.map(x => + InstructionLine(Some(x._1), Some(x._2), x._3))) + + def parseByteCodeBlock[_: P]: P[(MethodHeader, + MethodHeaderMetaParsingInfo, + MethodHeaderMetaFileInfo, + Seq[InstructionLine], + Option[LiveRanges], + Option[ExceptionTable])] = + P( + getHeaderBlock ~ + (&("L" ~ anyNumber) ~ getDefiningInstructionLine ~/ "\n").rep ~ + (&("LIVE RANGES:") ~ getLiveRangesBlock).? ~ + (&("EXCEPTION TABLE:") ~ getExceptionTableBlock).?) + def getByteCodeBlock[_: P]: P[ByteCodeDefinitionsBlock] = + P( + parseByteCodeBlock.map(x => + ByteCodeDefinitionsBlock( + x._1.name, + x._1.classname, + x._1.namespace, + x._1.lines, + x._1.args, + x._1.vars, + x._1.tmps, + x._2.metaInfo, + x._3.fileName, + x._3.lineStart, + x._3.lineEnd, + None, // php7 has no range line + x._4, + x._5, + x._6 + ))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ControlFlowBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ControlFlowBlock.scala new file mode 100644 index 0000000..9ca9a96 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ControlFlowBlock.scala @@ -0,0 +1,86 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics._ +import io.joern.bytecode.parser.php7.ExceptionTableBlock.getExceptionTableBlock +import io.joern.bytecode.parser.php7.HeaderBlock._ +import io.joern.bytecode.parser.php7.Instructions._ +import io.joern.bytecode.parser.php7.LiveRangesBlock.getLiveRangesBlock + +object ControlFlowBlock { + + def parseBBInstructionLine[_: P]: P[Instruction] = P(" ".rep ~ getInstruction) + def getBBInstructionLine[_: P]: P[InstructionLine] = + P(parseBBInstructionLine.map(x => InstructionLine(None, None, x))) + + def parseBBIdent[_: P]: P[String] = P("BB" ~ anyNumber.rep.!) + def getBBIdent[_: P]: P[Int] = P(parseBBIdent.map(Integer.parseInt)) + + def parseBBAttribute[_: P]: P[Unit] = + P("start" | "exit" | "target" | "follow" | "unreachable_free" | "unreachable" | "catch" | "try" | "finally_end" | "finally") + + def parseBBInstructionLines[_: P]: P[(String, String)] = + P("lines=[" ~ anyNumber.rep.! ~ "-" ~ anyNumber.rep.! ~ "]") + def getBBInstructionLines[_: P]: P[(Int, Int)] = + P(parseBBInstructionLines.map(x => + (Integer.parseInt(x._1), Integer.parseInt(x._2)))) + + def parseBBDefinitionLine[_: P]: P[(Int, Seq[String], (Int, Int))] = + P(getBBIdent ~ ": " ~ (parseBBAttribute.! ~ " ").rep ~ getBBInstructionLines) + def getBBDefinitionLine[_: P]: P[BBDefinitionLine] = + P(parseBBDefinitionLine.map(x => + BBDefinitionLine(x._1, x._2, x._3._1, x._3._2))) + + def parseBBToLine[_: P]: P[Seq[String]] = + P(" ".rep ~ ";" ~ " to=(" ~ ("BB" ~ anyNumber.rep.! ~ ", ".?).rep ~ ")") + def getBBToLine[_: P]: P[Seq[Int]] = + P(parseBBToLine.map(x => x.map(Integer.parseInt))) + + def parseBasicBlock[_: P] + : P[(BBDefinitionLine, Option[Seq[Int]], Seq[InstructionLine])] = + P(getBBDefinitionLine ~ ("\n" ~ getBBToLine).? ~ "\n" ~ (getBBInstructionLine ~ "\n").rep) + def getBasicBlock[_: P]: P[BasicBlock] = + P( + parseBasicBlock.map( + x => + BasicBlock(x._1.number, + x._1.attributes, + x._1.firstInstruction, + x._1.lastInstruction, + x._3, + x._2))) + + def parseControlFlowBlock[_: P]: P[(MethodHeader, + MethodHeaderMetaParsingInfo, + MethodHeaderMetaFileInfo, + Seq[BasicBlock], + Option[LiveRanges], + Option[ExceptionTable])] = + P( + getHeaderBlock ~ getBasicBlock.rep ~ + (&("LIVE RANGES:") ~ getLiveRangesBlock).? ~ + (&("EXCEPTION TABLE:") ~ getExceptionTableBlock).?) + def getControlFlowBlock[_: P]: P[ControlFlowDefinitionsBlock] = + P( + parseControlFlowBlock.map(x => + ControlFlowDefinitionsBlock( + x._1.name, + x._1.classname, + x._1.namespace, + x._1.lines, + x._1.args, + x._1.vars, + x._1.tmps, + x._2.metaInfo, + x._3.fileName, + x._3.lineStart, + x._3.lineEnd, + None, // php7 has no range line + x._4, + x._5, + x._6 + ))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ExceptionTableBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ExceptionTableBlock.scala new file mode 100644 index 0000000..d9a546a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/ExceptionTableBlock.scala @@ -0,0 +1,28 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ExceptionTable, ExceptionTableLine} +import io.joern.bytecode.parser.php7.instructions.Utility.parseTarget + +object ExceptionTableBlock { + + def parseExceptionTableLine[_: P]: P[Seq[String]] = + P( + " ".rep + ~ ((parseTarget | "-".!) ~ ", ").rep(1) + ~ (parseTarget | "-".!) + ).map(x => x._1 :+ x._2) + + def getExceptionTableLine[_: P]: P[ExceptionTableLine] = + P(parseExceptionTableLine.map { x => + ExceptionTableLine(x.toList) + }) + + def parseExceptionTableBlock[_: P]: P[Seq[ExceptionTableLine]] = + P("EXCEPTION TABLE:\n" ~ (getExceptionTableLine ~ "\n").rep) + + def getExceptionTableBlock[_: P]: P[ExceptionTable] = + P(parseExceptionTableBlock.map(x => ExceptionTable(x))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/FileParser7.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/FileParser7.scala new file mode 100644 index 0000000..110b0bd --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/FileParser7.scala @@ -0,0 +1,164 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.FileParser +import io.joern.bytecode.parser.constructs.MethodDefinitionPair +import io.joern.bytecode.parser.php7.MethodDefinition.getFullMethodDefinitionBlock +import io.joern.reporting.Reporting + +import java.io.{ByteArrayOutputStream, PrintWriter, File => JFile} +import java.nio.charset.StandardCharsets +import java.text.SimpleDateFormat +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContext, Future} +import scala.sys.process._ + +object FileParser7 { + + trait PHPParseError extends Throwable { + def getMessage: String + } + + case class PHPSyntaxError(msg: String) extends PHPParseError { + override def getMessage: String = s"Syntax Error: $msg" + } + + case class PHPFatalError(msg: String) extends PHPParseError { + override def getMessage: String = s"PHP Error: $msg" + } + + case class PHPWarning(msg: String) extends PHPParseError { + override def getMessage: String = s"PHP Warn: $msg" + } + + case class BytecodeParseError(msg: String) extends Throwable { + override def getMessage: String = msg + } + + val PREPROCESSING = false + + def parsePossibleParseError[_: P]: P[String] = + P("PHP Parse error: " ~ AnyChar.rep.!) + + def parsePossibleFatalError[_: P]: P[String] = + P("PHP Fatal error: " ~ AnyChar.rep.!) + + def parsePossibleWarning[_: P]: P[String] = + P("PHP Warning: " ~ AnyChar.rep.!) + + def parseLastLine[_: P]: P[Unit] = + P("No syntax errors " ~/ "detected in " ~/ AnyChar.rep) + + def parseByteCodeDump[_: P]: P[Seq[MethodDefinitionPair]] = + P(("\n" ~ getFullMethodDefinitionBlock).rep) + + def actualParse(input: String, file: String): Seq[MethodDefinitionPair] = { + //Some(new PrintWriter("input_dump.txt")).foreach { p => + // p.write(input); p.close() + //} + fastparse.parse(input, parsePossibleParseError(_)) match { + case Parsed.Success(errorMessage, _) => throw PHPSyntaxError(errorMessage) + case _ => + } + fastparse.parse(input, parsePossibleFatalError(_)) match { + case Parsed.Success(errorMessage, _) => throw PHPFatalError(errorMessage) + case _ => + } + fastparse.parse(input, parsePossibleWarning(_)) match { + case Parsed.Success(message, _) => throw PHPWarning(message) + case _ => + } + try { + fastparse.parse(input, parseByteCodeDump(_)) match { + case Parsed.Success(value, length) => + if (length != input.length) { + val delta = if (input.length < length + 1000) { + input.length + } else { + length + 1000 + } + throw BytecodeParseError( + s"remaining code is:\n>>${input.substring(length, delta)} [...]<<") + } + value + case x: Parsed.Failure => + val t = x.trace(true) + throw BytecodeParseError(s"${x.toString()}\n${t.longMsg}") + } + } catch { + case x: Throwable => + throw BytecodeParseError( + s"In file $file encountered:${x.toString}\n${Reporting.stringifyErrorStack(x)}\n") + } + } + + def setTimestamp(filePath: String): Boolean = { + val file = new JFile(filePath) + val date = new SimpleDateFormat("MM/dd/yyyy") + val last = date.parse("10/03/1990") + file.setLastModified(last.getTime) + } + + def generatePhpByteCodeDump(filePath: String, + phpInterpreter: String, + phpini: String): String = { + val stderrStream = new ByteArrayOutputStream() + val stdoutStream = new ByteArrayOutputStream() + val stdoutWriter = + new PrintWriter(stdoutStream, true, StandardCharsets.UTF_8) + val stderrWriter = + new PrintWriter(stderrStream, true, StandardCharsets.UTF_8) + val command = s"$phpInterpreter -c $phpini -d opcache.enable_cli=1 -d opcache.opt_debug_level=0x50000 -d opcache.log_verbosity_level=0 --syntax-check ${"\""}" + filePath + s"${"\""}" + command.!(ProcessLogger(stdoutWriter.println, stderrWriter.println)) + stderrWriter.close() + stdoutWriter.close() + val ret = stderrStream.toString + ret + } + + def parseFromFile(file: java.io.File, + phpInterpreter: String, + phpini: String): Seq[MethodDefinitionPair] = { + val string = + generatePhpByteCodeDump(file.getPath, phpInterpreter, phpini: String) + actualParse(string, file.getPath) + } + + def parseFromByteCodeDump( + dump: String, + strict: Boolean = true): Seq[MethodDefinitionPair] = { + actualParse(dump, "") + } + +} + +class FileParser7(files: List[JFile], phpInterpreter: String, phpini: String) + extends FileParser { + + val name = "FileParser" + def run(): List[Seq[MethodDefinitionPair]] = { + implicit val ec: ExecutionContext = ExecutionContext.global + + val list_of_futures: Seq[Future[Option[Seq[MethodDefinitionPair]]]] = + files.map(file => + Future { + var to_return: Option[Seq[MethodDefinitionPair]] = None + withErrorReporting() { + FileParser7.parseFromFile(file, phpInterpreter, phpini) match { + case Nil => + reportWarning(file.getPath, + "", + "", + "", + "no methods extracted - usually a bad sign") + case x => to_return = Some(x) + } + } + to_return + }) + val nested_res = + Await.result(Future.sequence(list_of_futures), Duration.Inf) + nested_res.flatten.toList + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/HeaderBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/HeaderBlock.scala new file mode 100644 index 0000000..4d30f6b --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/HeaderBlock.scala @@ -0,0 +1,130 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ + MethodHeader, + MethodHeaderMetaFileInfo, + MethodHeaderMetaParsingInfo, + MethodHeaderRangeLine +} +import io.joern.bytecode.parser.php7.Basics.{anyLetter, anyNumber} + +object HeaderBlock { + + def parseValidNameSpace[_: P]: P[String] = P((anyLetter | "\\").rep.! ~ "\\") + + def parseValidClassname[_: P]: P[String] = + P((("_" | anyLetter) ~ ("_" | anyLetter | anyNumber).rep).! ~ "::") + + def parseHeaderBlockMethodIdentifier[_: P]: P[String] = + P(("::" | !(" " | ":") ~ AnyChar).rep.!) + + def getHeaderBlockMethodIdentifier[_: P] + : P[(Option[String], Option[String], String)] = + P(parseHeaderBlockMethodIdentifier.map(x => { + x.split("::").toList match { + case classString :: methodString :: Nil => + val lastSlash = classString.lastIndexOf("\\") + if (lastSlash == -1) { + (None, Some(classString), methodString) + } else { + val (namespace, classname) = classString.splitAt(lastSlash) + (if (namespace == "") None else Some(namespace), + Some(classname.substring(1)), + methodString) + } + case methodString :: Nil => + val lastSlash = methodString.lastIndexOf("\\") + if (lastSlash == -1) { + (None, None, methodString) + } else { + val (namespace, methodname) = methodString.splitAt(lastSlash) + (if (namespace == "") None else Some(namespace), + None, + methodname.substring(1)) + } + case _ => throw new RuntimeException("unexpected") + } + })) + + def parseHeaderBlockLinesValue[_: P]: P[String] = + P("lines" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBLockLinesValue[_: P]: P[Int] = + P(parseHeaderBlockLinesValue.map(_.toInt)) + + def parseHeaderBlockArgsValue[_: P]: P[String] = + P("args" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlocArgsValue[_: P]: P[Int] = + P(parseHeaderBlockArgsValue.map(_.toInt)) + + def parseHeaderBlockVarsValue[_: P]: P[String] = + P("vars" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlockVarsValue[_: P]: P[Int] = + P(parseHeaderBlockVarsValue.map(_.toInt)) + + def parseHeaderBlockTmpsValue[_: P]: P[String] = + P("tmps" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlockTmpsValue[_: P]: P[Int] = + P(parseHeaderBlockTmpsValue.map(_.toInt)) + + def parseHeaderBlockMethodMetaBlock[_: P]: P[(Int, Int, Int, Int)] = + P( + "(" ~ getHeaderBLockLinesValue ~ + ", " ~ getHeaderBlocArgsValue ~ + ", " ~ getHeaderBlockVarsValue ~ + ", " ~ getHeaderBlockTmpsValue ~ ")") + + def parseHeaderBlockMethodDefinitionLine[_: P] + : P[(Option[String], Option[String], String, (Int, Int, Int, Int))] = + P(getHeaderBlockMethodIdentifier ~ ":" ~ " ; " ~ parseHeaderBlockMethodMetaBlock) + + def getHeaderBLockMethodDefinitionLine[_: P]: P[MethodHeader] = + P( + parseHeaderBlockMethodDefinitionLine.map( + result => + MethodHeader(result._3, + result._2, + result._1, + result._4._1, + result._4._2, + result._4._3, + result._4._4))) + + // time definition + def parseHeaderBlockMetaLineParsingWord[_: P]: P[Unit] = + P("before" | "optimizer" | "block" | "pass") + + def parseHeaderBlockMetaLineParsing[_: P]: P[Seq[String]] = + P(" ".rep ~ ";" ~ " " ~ "(" ~ (parseHeaderBlockMetaLineParsingWord.! ~ " ".?).rep ~ ")") + + def getHeaderBlockMetaLineParsing[_: P]: P[MethodHeaderMetaParsingInfo] = + P(parseHeaderBlockMetaLineParsing.map(x => MethodHeaderMetaParsingInfo(x))) + + def newline[_: P]: P[Unit] = P("\n" | "\r\n" | "\r" | "\f") + + def parseHeaderBlockMetaLineFileInfo[_: P]: P[String] = + P(" ".rep ~ ";" ~ " " ~ (!newline ~ AnyChar).rep.!) + def getHeaderBlockMetaLineFileInfo[_: P]: P[MethodHeaderMetaFileInfo] = + P(parseHeaderBlockMetaLineFileInfo.map(x => MethodHeaderMetaFileInfo(x))) + + def parseHeaderBlockRangeLine[_: P]: P[(String, String)] = + P(" ".rep ~ "; " ~ "return".! ~ (!"\n" ~ AnyChar).rep.!) + + def getHeaderBlockRangeLine[_: P]: P[MethodHeaderRangeLine] = + parseHeaderBlockRangeLine + .map(x => x._1 + x._2) + .map(MethodHeaderRangeLine) + + def getHeaderBlock[_: P] + : P[(MethodHeader, MethodHeaderMetaParsingInfo, MethodHeaderMetaFileInfo)] = + P( + getHeaderBLockMethodDefinitionLine ~ "\n" ~ + getHeaderBlockMetaLineParsing ~ "\n" ~ + getHeaderBlockMetaLineFileInfo ~ "\n") + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Instructions.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Instructions.scala new file mode 100644 index 0000000..6b53e83 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Instructions.scala @@ -0,0 +1,48 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Array._ +import io.joern.bytecode.parser.php7.instructions.Assign._ +import io.joern.bytecode.parser.php7.instructions.Bind._ +import io.joern.bytecode.parser.php7.instructions.CallRelated._ +import io.joern.bytecode.parser.php7.instructions.ClassRelated._ +import io.joern.bytecode.parser.php7.instructions.ControlConstructs._ +import io.joern.bytecode.parser.php7.instructions.Fe._ +import io.joern.bytecode.parser.php7.instructions.Fetch.parseFetchCommand +import io.joern.bytecode.parser.php7.instructions.Generic._ +import io.joern.bytecode.parser.php7.instructions.IncDecStaticProp._ +import io.joern.bytecode.parser.php7.instructions.Isset._ +import io.joern.bytecode.parser.php7.instructions.Jump.getJmpCommand +import io.joern.bytecode.parser.php7.instructions.LambdaRelated._ +import io.joern.bytecode.parser.php7.instructions.Rope._ +import io.joern.bytecode.parser.php7.instructions.TypeRelated._ + +object Instructions { + + // here we may extend the available options each time we create a new one + def parseOperation[_: P]: P[Opcode] = + P(parseInitCallCommands | getNew | getInitMethodCall | getSwitchStatement | //order is important as INIT_FCALL is substring of INIT_FCALL_BY_NAME + parseAssignCommand | getCheckFuncArg | + getSendCommand | getRecv | getRecvInit | getRecvVariadic | getTicks | getFuncGetArgs | getIssetIsEmptyStaticProp | + getFeReset | parseFetchCommand | getAssignDim | getReturnCommand | getBindLexical | getInstanceOf | getGetClass | + getRopeInit | getRopeAdd | getRopeEnd | getCast | getFeResetRw | getFeFetch | getBindStatic | getFastCall | + getJmpCommand | getInitArray | getAddArrayElement | getInArray | getExit | getCatch | getCoalesce | getYield | getFastRet | + getIssetCommand | getQuadrupleValueCommand | getVerifyReturnType | getDeclareClass | getIncDecStaticProp | getAddArrayUnpack | + getTripleValueCommand | getDualValueCommand | getSingleValueCommand | getNoValueCommand | getDeclareAnonClass) //jumps + + def getOperation[_: P]: P[Operation] = + P(parseOperation.map(x => constructs.Operation(x))) + + def parseAssignment[_: P]: P[(Variable, Operation)] = + P(getAnyVariable ~ " = " ~ getOperation) + + def getAssignment[_: P]: P[Assignment] = + P(parseAssignment.map(x => constructs.Assignment(x._1, x._2.op))) + + def getInstruction[_: P]: P[Instruction] = P(getAssignment | getOperation) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Literals.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Literals.scala new file mode 100644 index 0000000..73387ae --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Literals.scala @@ -0,0 +1,148 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.utils.decodeBase64 + +import java.nio.charset.StandardCharsets +import java.util.Base64 + +object Literals { + + def parseCharacterNotExiting[_: P]: P[String] = + P(!"\")" ~ AnyChar.!) + + def parseStringLiteral[_: P]: P[String] = + P("string(\"" ~ parseCharacterNotExiting.rep.! ~ "\")") + + def getStringLiteral[_: P]: P[StringLiteral] = + P(parseStringLiteral.map(x => StringLiteral(decodeBase64(x)))) + + def parseValidNumber[_: P]: P[Unit] = P("-".? ~ anyNumber.rep) + + def parseIntLiteral[_: P]: P[String] = P("int(" ~ parseValidNumber.! ~ ")") + def getIntLiteral[_: P]: P[IntegerLiteral] = + P(parseIntLiteral.map(x => IntegerLiteral(x.toLong))) + + def parseFLoatEString[_: P]: P[String] = + P(("-".? ~ parseFloatNumberString ~ "e" ~ ("-" | "+") ~ anyNumber.rep).!) + + def parseFloatNumberString[_: P]: P[String] = + P(("-".? ~ anyNumber.rep ~ ".".? ~ anyNumber.rep).!) + + def parseFloatLiteral[_: P]: P[String] = + P("float(" ~/ ("nan".! | "-inf".! | "inf".! | parseFLoatEString | parseFloatNumberString) ~ ")") + + def getFloatLiteral[_: P]: P[FloatLiteral] = + P(parseFloatLiteral.map(x => + FloatLiteral(x match { + case "inf" => Float.PositiveInfinity + case "-inf" => Float.NegativeInfinity + case "nan" => Float.NaN + case x => x.toFloat + }))) + + def parseBooleanLiteral[_: P]: P[String] = + P("bool(" ~/ ("true" | "false").! ~/ ")") + + def getBooleanLiteral[_: P]: P[BooleanLiteral] = + P(parseBooleanLiteral.map(x => BooleanLiteral(x == "true"))) + + def parseTryCatch[_: P]: P[String] = + P("try-catch(" ~ anyNumber.! ~ ")") + + def getTryCatch[_: P]: P[TryCatchLiteral] = + P(parseTryCatch.map(x => TryCatchLiteral(x.toInt))) + + def parseZvalLiteral[_: P]: P[String] = + P("zval(type=" ~ anyNumber.rep.! ~ ")") + + def getZvalLiteral[_: P]: P[Zval] = + P(parseZvalLiteral.map(x => Zval(x.toInt))) + + def parseNull[_: P]: P[Unit] = P("null") + + def getNull[_: P]: P[Null] = P(parseNull.map(_ => Null())) + + //def parseType[_: P]: P[String] = P("(" ~ ("long" | "int").! ~ ")") + //def getType[_: P]: P[Type] = P(parseType.map(x => Type(x))) + + def parseArray[_: P]: P[Unit] = P("array(...)") + + def getArray[_: P]: P[ArrayValue] = P(parseArray.map(_ => ArrayValue(None))) + + def parseBytecodeKeyword[_: P]: P[String] = + P( + "(" ~ + ("self" | + "parent" | + "ref" | + "array" | + "double" | + "string" | + "long" | + "int" | + "object" | + "bool" | + "no-autolod" | + "function" | + "null" | + "resource" | + "isset" | + "unqualified" | + "in-namespace" | + "empty" | + "packed" | + "exception" | + "require_once" | + "require" | + "include_once" | + "include" | + "obj write" | + "dim write" | + "global+lock" | + "global" | + "value" | + "eval" | + "local" | + "static").! + ~ ")") + + def getByteCodeKeyword[_: P]: P[ByteCodeKeyword] = + P(parseBytecodeKeyword.map(x => ByteCodeKeyword(x))) + + def parseByteCodeConstructor[_: P]: P[Unit] = { + P("CONSTRUCTOR") + } + + def getByteCodeConstructor[_: P]: P[Value] = { + parseByteCodeConstructor.map { _ => + ByteCodeConstructor() + } + } + + def parseByteCodePlaceIndicator[_: P]: P[String] = P( + "NEXT".! | + "THIS".! + ) + + def getByteCodePlaceIndicator[_: P]: P[ByteCodePlaceIndicator] = P( + parseByteCodePlaceIndicator.map(x => ByteCodePlaceIndicator(x)) + ) + + def parseAssignOpCmdString[_: P]: P[String] = + P("(" ~ + ("ADD" | "SUB" | "DIV" | "MUL" | "MOD" | "POW" | "SL" | "SR" | "CONCAT" | "BW_OR" | "BW_AND" | "BW_XOR").! ~ + ")") + + def getAssignOpCmd[_: P]: P[AssignOpLiteral] = { + P(parseAssignOpCmdString.map(x => AssignOpLiteral(x))) + } + + def getAnyLiteral[_: P]: P[Value] = + P(getByteCodePlaceIndicator | getStringLiteral | getIntLiteral | getFloatLiteral + | getBooleanLiteral | getByteCodeConstructor | + getNull | getArray | getZvalLiteral | getByteCodeKeyword | getAssignOpCmd | getTryCatch) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/LiveRangesBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/LiveRangesBlock.scala new file mode 100644 index 0000000..7ebc92d --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/LiveRangesBlock.scala @@ -0,0 +1,30 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{LiveRangeLine, LiveRanges} +import io.joern.bytecode.parser.php7.Basics.anyNumber + +object LiveRangesBlock { + + def parseLiveRangeType[_: P]: P[String] = + P("(" ~ ("loop" | "new" | "rope" | "silence" | "tmp/var").! ~ ")") + + def parseLiveRangeLine[_: P]: P[(String, String, String, String)] = + P( + " ".rep ~ + anyNumber.rep.! ~ ": " ~ + "L" ~ anyNumber.rep.! ~ " - " ~ + "L" ~ anyNumber.rep.! ~ " " ~/ + parseLiveRangeType) + def getLiveRangeLine[_: P]: P[LiveRangeLine] = + P(parseLiveRangeLine.map(x => + LiveRangeLine(x._1.toInt, x._2.toInt, x._3.toInt, x._4))) + + def parseLiveRangesBlock[_: P]: P[Seq[LiveRangeLine]] = + P("LIVE RANGES:\n" ~/ (getLiveRangeLine ~ "\n").rep) + + def getLiveRangesBlock[_: P]: P[LiveRanges] = + P(parseLiveRangesBlock.map(x => LiveRanges(x))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/MethodDefinition.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/MethodDefinition.scala new file mode 100644 index 0000000..ef7d18f --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/MethodDefinition.scala @@ -0,0 +1,24 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ + ByteCodeDefinitionsBlock, + ControlFlowDefinitionsBlock, + MethodDefinitionPair +} +import io.joern.bytecode.parser.php7.ByteCodeBlock.getByteCodeBlock +import io.joern.bytecode.parser.php7.ControlFlowBlock.getControlFlowBlock + +object MethodDefinition { + + def parseFullMethodDefinitionBlock[_: P] + : P[(ByteCodeDefinitionsBlock, ControlFlowDefinitionsBlock)] = + P(getByteCodeBlock ~/ "\n" ~/ getControlFlowBlock) + def getFullMethodDefinitionBlock[_: P]: P[MethodDefinitionPair] = + P( + parseFullMethodDefinitionBlock.map( + x => MethodDefinitionPair(x._1, x._2) + )) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Variables.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Variables.scala new file mode 100644 index 0000000..1a3c90e --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/Variables.scala @@ -0,0 +1,29 @@ +package io.joern.bytecode.parser.php7 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.Variable +import io.joern.bytecode.parser.php7.Basics.{anyNumber, legalIdentifier} + +object Variables { + + def parseVariable[_: P]: P[String] = + P("CV" ~ anyNumber.rep ~/ "($" ~ legalIdentifier.! ~ ")") + + def getVariable[_: P]: P[Variable] = + P(parseVariable.map(Variable(_, tmp = false))) + + def parseTemporary[_: P]: P[String] = P(("T" ~ anyNumber.rep).!) + + def getTemporary[_: P]: P[Variable] = + P(parseTemporary.map(Variable(_, tmp = true))) + + def parseReference[_: P]: P[String] = P(("V" ~ anyNumber.rep).!) + + def getReference[_: P]: P[Variable] = + P(parseReference.map(x => Variable(x, tmp = true, reference = true))) + + def getAnyVariable[_: P]: P[Variable] = + P(getVariable | getTemporary | getReference) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Array.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Array.scala new file mode 100644 index 0000000..b3fd428 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Array.scala @@ -0,0 +1,60 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Array { + + def parseInitArray[_: P]: P[(String, Seq[Value])] = + P("INIT_ARRAY " ~ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getInitArray[_: P]: P[Opcode] = + P(parseInitArray.map { x => + val pos = IntegerLiteral(x._1.toLong) + x._2 match { + case first :: second :: Nil => + TripleValueOperation("INIT_ARRAY", pos, first, second) + case first :: second :: third :: Nil => + QuadrupleValueOperation("INIT_ARRAY", pos, first, second, third) + case first :: second :: third :: fourth :: Nil => + QuintupleValueOperation("INIT_ARRAY", + pos, + first, + second, + third, + fourth) + } + }) + + def parseAddArrayElement[_: P]: P[Seq[Value]] = + P("ADD_ARRAY_ELEMENT" ~ (" " ~ getAnyValue).rep) + + def getAddArrayElement[_: P]: P[Opcode] = + P(parseAddArrayElement.map { + case first :: second :: Nil => + DualValueOperation("ADD_ARRAY_ELEMENT", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ADD_ARRAY_ELEMENT", first, second, third) + }) + + def parseInArray[_: P]: P[(String, Value, Value)] = + P("IN_ARRAY " ~/ anyNumber.rep.! ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getInArray[_: P]: P[TripleValueOperation] = + P( + parseInArray.map( + x => + TripleValueOperation("IN_ARRAY", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + + def parseAddArrayUnpack[_: P]: P[Value] = + P("ADD_ARRAY_UNPACK" ~ " " ~ getAnyValue) + def getAddArrayUnpack[_: P]: P[Opcode] = + P( + parseAddArrayUnpack.map(x => SingleValueOperation("ADD_ARRAY_UNPACK", x)) + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Assign.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Assign.scala new file mode 100644 index 0000000..481e82b --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Assign.scala @@ -0,0 +1,117 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Literals.getAssignOpCmd +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Assign { + + def parseAssignDim[_: P]: P[(Variable, Value)] = + P("ASSIGN_DIM " ~/ getAnyVariable ~ " " ~ getAnyValue) + def getAssignDim[_: P]: P[DualValueOperation] = + P( + parseAssignDim.map( + x => + DualValueOperation( + "ASSIGN_DIM", + x._1, + x._2 + ))) + + def parseAssign[_: P]: P[(Value, Value)] = + P("ASSIGN " ~/ getAnyValue ~ " " ~/ getAnyValue) + def getAssign[_: P]: P[DualValueOperation] = + P(parseAssign.map(x => DualValueOperation("ASSIGN", x._1, x._2))) + + def parseAssignOp[_: P]: P[(Value, Value, Value)] = + P( + "ASSIGN_OP " ~/ + getAssignOpCmd ~/ " " ~ + getAnyValue ~ " " ~ + getAnyValue) + def getAssignOp[_: P]: P[TripleValueOperation] = + P(parseAssignOp.map(x => + TripleValueOperation("ASSIGN_OP", x._1, x._2, x._3))) + + def parseAssignStaticProp[_: P]: P[Seq[Value]] = + P("ASSIGN_STATIC_PROP" ~/ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getAssignStaticProp[_: P]: P[Opcode] = + P(parseAssignStaticProp.map { + case first :: Nil => SingleValueOperation("ASSIGN_STATIC_PROP_1", first) + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_2", first, second) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP", + Seq(1, 2), + list.length) + }) + + def parseAssignStaticPropOp[_: P]: P[Seq[Value]] = + P(&("ASSIGN_STATIC_PROP_OP ") ~ "ASSIGN_STATIC_PROP_OP" ~ (" " ~ getAnyValue).rep) + def getAssignStaticPropOp[_: P]: P[Opcode] = + P(parseAssignStaticPropOp.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_OP", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_STATIC_PROP_OP", first, second, third) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP_OP", + Seq(2, 3), + list.length) + }) + + def parseAssignStaticPropRef[_: P]: P[Seq[Value]] = + P(&("ASSIGN_STATIC_PROP_REF ") ~ "ASSIGN_STATIC_PROP_REF" ~ (" " ~ getAnyValue).rep) + def getAssignStaticPropRef[_: P]: P[Opcode] = + P(parseAssignStaticPropRef.map { + case first :: Nil => + SingleValueOperation("ASSIGN_STATIC_PROP_REF", first) + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_REF", first, second) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP_REF", + Seq(1), + list.length) + }) + + def parseAssignObj[_: P]: P[(Value, Value)] = + P("ASSIGN_OBJ " ~ getAnyValue ~ " " ~ getAnyValue) + def getAssignObj[_: P]: P[DualValueOperation] = + P(parseAssignObj.map(x => DualValueOperation("ASSIGN_OBJ", x._1, x._2))) + + def parseAssignRef[_: P]: P[Seq[Value]] = + P("ASSIGN_REF" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) // the End is needed to ensure that unit tests work + def getAssignRef[_: P]: P[Opcode] = + P(parseAssignRef.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_REF_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_REF_3", first, second, third) + }) + + def parseAssignObjRef[_: P]: P[Seq[Value]] = + P("ASSIGN_OBJ_REF" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getAssignObjRef[_: P]: P[Opcode] = + P(parseAssignObjRef.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_OBJ_REF_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_OBJ_REF_3", first, second, third) + }) + + def parseAssignCommand[_: P]: P[Opcode] = P( + &("ASSIGN ") ~ getAssign | + &("ASSIGN_OBJ ") ~ getAssignObj | + &("ASSIGN_DIM ") ~ getAssignDim | + &("ASSIGN_STATIC_PROP ") ~ getAssignStaticProp | + &("ASSIGN_STATIC_PROP_OP ") ~ getAssignStaticPropOp | + &("ASSIGN_STATIC_PROP_REF ") ~ getAssignStaticPropRef | + &("ASSIGN_OP ") ~ getAssignOp | + &("ASSIGN_REF ") ~ getAssignRef | + &("ASSIGN_OBJ_REF ") ~ getAssignObjRef + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Bind.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Bind.scala new file mode 100644 index 0000000..63eeb76 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Bind.scala @@ -0,0 +1,30 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Bind { + + def parseBindStatic[_: P]: P[Seq[Value]] = + P(&("BIND_STATIC ") ~ "BIND_STATIC" ~ (" " ~ getAnyValue).rep) + def getBindStatic[_: P]: P[Opcode] = + P(parseBindStatic.map { + case first :: second :: Nil => + DualValueOperation("BIND_STATIC", first, second) + case first :: Nil => SingleValueOperation("BIND_STATIC", first) + }) + + def parseBindLexical[_: P]: P[Seq[Value]] = + P(&("BIND_LEXICAL ") ~ "BIND_LEXICAL" ~ (" " ~ getAnyValue).rep) + def getBindLexical[_: P]: P[Opcode] = + P(parseBindLexical.map { + case first :: second :: third :: Nil => + TripleValueOperation("BIND_LEXICAL", first, second, third) + case first :: second :: Nil => + DualValueOperation("BIND_LEXICAL", first, second) + case first :: Nil => SingleValueOperation("BIND_LEXICAL", first) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/CallRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/CallRelated.scala new file mode 100644 index 0000000..59db839 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/CallRelated.scala @@ -0,0 +1,294 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.Literals.{ + getAnyLiteral, + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue +import io.joern.reporting.ReportableError + +object CallRelated { + + def parseNew[_: P]: P[(String, Seq[Value])] = + P("NEW " ~ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getNew[_: P]: P[Opcode] = + P(parseNew.map { + case (args, first :: second :: Nil) => + TripleValueOperation("NEW", IntegerLiteral(args.toLong), first, second) + case (args, first :: Nil) => + DualValueOperation("NEW", IntegerLiteral(args.toLong), first) + case x => + throw ReportableError("", + -1, + "", + "", + s"when parsing New unexpected result tuple $x") + }) + + def parseInitFcall[_: P]: P[(String, String, StringLiteral)] = + P("INIT_FCALL" ~/ " " ~ anyNumber.rep.! ~ " " ~ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitFcall[_: P]: P[INIT_FCALL] = + P(parseInitFcall.map(x => INIT_FCALL(x._1.toInt, x._2.toInt, x._3))) + + def parseInitMethodCall[_: P]: P[(String, Object, Value)] = + P("INIT_METHOD_CALL " ~/ anyNumber.rep.! ~ " " ~ ("THIS".! | getAnyVariable) ~ " " ~ getAnyValue) + def getInitMethodCall[_: P]: P[INIT_METHOD_CALL] = + P( + parseInitMethodCall.map(x => + INIT_METHOD_CALL( + x._1.toInt, + x._2 match { + case x: Variable => x + case x: String => Variable(x, tmp = false, reference = true) + case _ => + throw new RuntimeException("unknown object reference in call") + }, + x._3 + ))) + + def parseInitNsFcallByName[_: P]: P[(String, StringLiteral)] = + P("INIT_NS_FCALL_BY_NAME " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitNsFcallByName[_: P]: P[INIT_NS_FCALL_BY_NAME] = + P(parseInitNsFcallByName.map(x => + INIT_NS_FCALL_BY_NAME(x._1.toInt, x._2.value))) + + def parseInitDynamicCall[_: P]: P[(String, Variable)] = + P("INIT_DYNAMIC_CALL " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable) + def getInitDynamicCall[_: P]: P[INIT_DYNAMIC_CALL] = + P(parseInitDynamicCall.map(x => INIT_DYNAMIC_CALL(x._1.toInt, x._2))) + + def parseInitFcallByName[_: P]: P[(String, StringLiteral)] = + P("INIT_FCALL_BY_NAME " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitFcallByName[_: P]: P[INIT_FCALL_BY_NAME] = + P(parseInitFcallByName.map(x => INIT_FCALL_BY_NAME(x._1.toInt, x._2.value))) + + def parseInitStaticMethodCallA[_: P]: P[(String, Seq[Value])] = + P("INIT_STATIC_METHOD_CALL " ~/ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getInitStaticMethodCallA[_: P]: P[INIT_STATIC_METHOD_CALL] = + P(parseInitStaticMethodCallA.map { + case (args, first :: second :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, None, None, Some(first), second) + case (args, first :: second :: third :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, + Some(first), + None, + Some(second), + third) + case (args, first :: second :: third :: fourth :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, + Some(first), + Some(second), + Some(third), + fourth) + case x => + throw ReportableError( + "", + -1, + "", + "", + s"when parsing InitStaticMethodCall unexpected result tuple $x") + }) + + def parseInitStaticMethodCallB[_: P] + : P[(String, ByteCodeKeyword, ByteCodeKeyword)] = + P("INIT_STATIC_METHOD_CALL " ~ + anyNumber.rep.! ~ " " ~ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ " " ~ "CONSTRUCTOR") + def getInitStaticMethodCallB[_: P]: P[QuadrupleValueOperation] = + P( + parseInitStaticMethodCallB.map( + x => + QuadrupleValueOperation("INIT_STATIC_METHOD_CALL", + IntegerLiteral(x._1.toLong), + x._2, + x._3, + StringLiteral("CONSTRUCTOR")) + )) + + def getInitStaticMethodCall[_: P]: P[Opcode] = + P(getInitStaticMethodCallB | getInitStaticMethodCallA) + + def parseInitUserCall[_: P]: P[(String, StringLiteral, Value)] = + P("INIT_USER_CALL " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral ~ " " ~ getAnyValue) + + def getInitUserCall[_: P]: P[Opcode] = + P(parseInitUserCall.map(x => INIT_USER_CALL(x._1.toInt, x._2, x._3))) + + def parseInitCallCommands[_: P]: P[Opcode] = P( + &("INIT_FCALL ") ~ getInitFcall | + &("INIT_METHOD_CALL ") ~ getInitMethodCall | + &("INIT_NS_FCALL_BY_NAME ") ~ getInitNsFcallByName | + &("INIT_DYNAMIC_CALL ") ~ getInitDynamicCall | + &("INIT_FCALL_BY_NAME ") ~ getInitFcallByName | + &("INIT_STATIC_METHOD_CALL ") ~ getInitStaticMethodCall | + &("INIT_USER_CALL ") ~ getInitUserCall + ) + + def getPayloadReference[_: P]: P[Value] = + P(anyNumber.rep(1).!.map(x => IntegerLiteral(x.toLong)) | getStringLiteral) + + def parseSendVarEx[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVarEx[_: P]: P[DualValueOperation] = + P(parseSendVarEx.map(x => DualValueOperation("SEND_VAR_EX", x._1, x._2))) + + def parseSendValEx[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendValEx[_: P]: P[DualValueOperation] = + P(parseSendVarEx.map { x => + DualValueOperation("SEND_VAL_EX", x._1, x._2) + }) + + def parseSendVal[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVal[_: P]: P[DualValueOperation] = + P(parseSendVal.map { x => + //assert(x._2 != "", s"after parsing ${x._1} we encounter an empty integer string") + DualValueOperation("SEND_VAL", x._1, x._2) + }) + + def parseSendVar[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVar[_: P]: P[DualValueOperation] = { + P(parseSendVar.map(x => DualValueOperation("SEND_VAR", x._1, x._2))) + } + + def parseSendVarNoRefEx[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendVarNoRefEx[_: P]: P[DualValueOperation] = + P(parseSendVarNoRefEx.map(x => + DualValueOperation("SEND_VAR_NO_REF_EX", x._1, x._2))) + + def parseSendVarNoRef[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendVarNoRef[_: P]: P[DualValueOperation] = + P(parseSendVarNoRef.map(x => + DualValueOperation("SEND_VAR_NO_REF", x._1, x._2))) + + def parseSendFuncVar[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendFuncVar[_: P]: P[DualValueOperation] = + P(parseSendFuncVar.map(x => + DualValueOperation("SEND_FUNC_ARG", x._1, x._2))) + + def parseSendUser[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendUser[_: P]: P[DualValueOperation] = + P(parseSendUser.map(x => DualValueOperation("SEND_USER", x._1, x._2))) + + def parseSendRef[_: P]: P[(Value, Value)] = { + P("SEND_REF " ~ getAnyValue ~ " " ~ getPayloadReference) + } + def getSendRef[_: P]: P[DualValueOperation] = P( + parseSendRef.map(x => DualValueOperation("SEND_REF", x._1, x._2)) + ) + + // 2021-12-14: not sure if this should also use getPayloadReference /Malte + def parseSendArray[_: P]: P[(String, Seq[Value])] = + P("SEND_ARRAY " ~/ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + + def getSendArray[_: P]: P[Opcode] = + P(parseSendArray.map { x => + x._2 match { + case first :: Nil => + DualValueOperation("SEND_ARRAY", IntegerLiteral(x._1.toLong), first) + case first :: second :: Nil => + TripleValueOperation("SEND_ARRAY", + IntegerLiteral(x._1.toLong), + first, + second) + } + }) + + def getSendCommand[_: P]: P[Opcode] = + P( + "SEND_VAR_NO_REF_EX " ~/ getSendVarNoRefEx | + "SEND_VAR_NO_REF " ~/ getSendVarNoRef | + "SEND_VAL_EX " ~/ getSendValEx | + "SEND_VAR_EX " ~/ getSendVarEx | + "SEND_VAL " ~/ getSendVal | + "SEND_VAR " ~/ getSendVar | + "SEND_USER " ~/ getSendUser | + "SEND_FUNC_ARG " ~/ getSendFuncVar | + &("SEND_REF ") ~/ getSendRef | + &("SEND_ARRAY ") ~/ getSendArray) + + def parseRecv[_: P]: P[String] = P("RECV " ~/ anyNumber.rep.!) + def getRecv[_: P]: P[SingleValueOperation] = + P(parseRecv.map(x => + SingleValueOperation("RECV", IntegerLiteral(x.toLong)))) + + def parseRecvInit[_: P]: P[(String, Value)] = + P("RECV_INIT " ~/ anyNumber.rep.! ~ " " ~/ getAnyLiteral) + def getRecvInit[_: P]: P[DualValueOperation] = + P(parseRecvInit.map(x => + DualValueOperation("RECV_INIT", IntegerLiteral(x._1.toLong), x._2))) + + def parseRecvVariadic[_: P]: P[String] = + P("RECV_VARIADIC" ~ " " ~ anyNumber.rep.!) + def getRecvVariadic[_: P]: P[Opcode] = + P(parseRecvVariadic.map(x => + SingleValueOperation("RECV_VARIADIC", IntegerLiteral(x.toLong)))) + + def parseCheckFuncArg[_: P]: P[String] = + P("CHECK_FUNC_ARG " ~/ anyNumber.rep.!) + def getCheckFuncArg[_: P]: P[SingleValueOperation] = + P(parseCheckFuncArg.map(x => + SingleValueOperation("CHECK_FUNC_ARG", IntegerLiteral(x.toLong)))) + + def parseFuncGetArgs[_: P]: P[Seq[Value]] = + P(&("FUNC_GET_ARGS") ~ "FUNC_GET_ARGS" ~ (" " ~ getAnyValue).rep) + def getFuncGetArgs[_: P]: P[Opcode] = + P(parseFuncGetArgs.map { + case Nil => NoValueOperation("FUNC_GET_ARGS") + case single :: Nil => SingleValueOperation("FUNC_GET_ARGS", single) + }) + + def parseReturnByRef[_: P]: P[Seq[Value]] = + P("RETURN_BY_REF" ~ (" " ~ getAnyValue).rep) + def getReturnByRef[_: P]: P[Opcode] = + P(parseReturnByRef.map { + case first :: Nil => SingleValueOperation("RETURN_BY_REF", first) + case first :: second :: Nil => + DualValueOperation("RETURN_BY_REF", first, second) + }) + + def parseReturn[_: P]: P[Value] = + P("RETURN " ~ getAnyValue) + def getReturn[_: P]: P[Opcode] = + P(parseReturn.map(SingleValueOperation("RETURN", _))) + + def getReturnCommand[_: P]: P[Opcode] = + P( + &("RETURN_BY_REF ") ~ getReturnByRef | + &("RETURN ") ~ getReturn) + + def parseVerifyReturnType[_: P]: P[Seq[Value]] = + P("VERIFY_RETURN_TYPE" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getVerifyReturnType[_: P]: P[Opcode] = + P(parseVerifyReturnType.map { + case Nil => NoValueOperation("VERIFY_RETURN_TYPE") + case single :: Nil => SingleValueOperation("VERIFY_RETURN_TYPE", single) + case list => + throw new UnexpectedArgumentCount("VERIFY_RETURN_TYPE", + Seq(1, 2), + list.length) + }) + + def parseFastRet[_: P]: P[Seq[Value]] = + P(&("FAST_RET") ~ "FAST_RET" ~ (" " ~ getAnyValue).rep) + def getFastRet[_: P]: P[Opcode] = + P(parseFastRet.map { + case first :: Nil => SingleValueOperation("FAST_RET", first) + case first :: second :: Nil => + DualValueOperation("FAST_RET", first, second) + case list => + throw new UnexpectedArgumentCount("FAST_RET", Seq(1, 2), list.length) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ClassRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ClassRelated.scala new file mode 100644 index 0000000..8097fbc --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ClassRelated.scala @@ -0,0 +1,40 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object ClassRelated { + + def parseGetClass[_: P]: P[Seq[Value]] = + P(&("GET_CLASS") ~ "GET_CLASS" ~ (" " ~ getAnyValue).rep) + def getGetClass[_: P]: P[Opcode] = + P(parseGetClass.map { + case Nil => NoValueOperation("GET_CLASS") + case single :: Nil => SingleValueOperation("GET_CLASS", single) + }) + + def parseDeclareAnonClass[_: P]: P[Seq[Value]] = + P(&("DECLARE_ANON_CLASS ") ~ "DECLARE_ANON_CLASS" ~ (" " ~ getAnyValue).rep) + def getDeclareAnonClass[_: P]: P[Opcode] = + P( + parseDeclareAnonClass.map { + case first :: Nil => SingleValueOperation("DECLARE_ANON_CLASS", first) + case first :: second :: Nil => + DualValueOperation("DECLARE_ANON_CLASS", first, second) + } + ) + + def parseDeclareClass[_: P]: P[Seq[Value]] = + P( + &("DECLARE_CLASS ") ~ "DECLARE_CLASS" ~ (" " ~ getAnyValue).rep ~ &( + "\n" | End)) + def getDeclareClass[_: P]: P[Opcode] = + P(parseDeclareClass.map { + case single :: Nil => SingleValueOperation("DECLARE_CLASS", single) + case first :: second :: Nil => + DualValueOperation("VERIFY_RETURN_TYPE", first, second) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ControlConstructs.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ControlConstructs.scala new file mode 100644 index 0000000..69c803c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/ControlConstructs.scala @@ -0,0 +1,67 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.instructions.Utility.{ + getAnyValue, + parseStringInQuotes, + parseTarget +} +import io.joern.bytecode.parser.utils.decodeBase64 + +object ControlConstructs { + + def parseStringDestinationPattern[_: P]: P[(String, String)] = + P( + (parseStringInQuotes + .map(decodeBase64) | "default".!) ~/ ": " ~ parseTarget) + def parseSwitchString[_: P] + : P[(Value, (String, String), Seq[(String, String)])] = + P("SWITCH_STRING " ~/ getAnyValue ~ " " ~ parseStringDestinationPattern ~ ("," ~ " " ~ parseStringDestinationPattern).rep) + def getSwitchString[_: P]: P[Opcode] = + P( + parseSwitchString.map( + x => + SWITCH("SWITCH_STRING", + x._1, + (Seq(x._2) ++ x._3).map(x => (x._1, x._2.toInt))))) + + def parseNumberDestinationPattern[_: P]: P[(String, String)] = + P(("default".! | ("-".? ~ anyNumber.rep).!) ~ ": " ~ parseTarget) + def parseSwitchLong[_: P] + : P[(Value, (String, String), Seq[(String, String)])] = + P("SWITCH_LONG " ~/ getAnyValue ~ " " ~ parseNumberDestinationPattern ~ ("," ~ " " ~ parseNumberDestinationPattern).rep) + def getSwitchLong[_: P]: P[Opcode] = + P( + parseSwitchLong.map( + x => + SWITCH("SWITCH_LONG", + x._1, + (Seq(x._2) ++ x._3).map(x => (x._1, x._2.toInt))))) + + def getSwitchStatement[_: P]: P[Opcode] = + P( + &("SWITCH_STRING ") ~ getSwitchString | + &("SWITCH_LONG ") ~ getSwitchLong + ) + + def parseExit[_: P]: P[Option[Value]] = + P("EXIT" ~ (" " ~ getAnyValue).?) + + def getExit[_: P]: P[Opcode] = + P(parseExit.map { + case Some(x) => SingleValueOperation("EXIT", x) + case None => NoValueOperation("EXIT") + }) + + def parseCatch[_: P]: P[(Value, Option[String])] = + P("CATCH " ~ getAnyValue ~ (" " ~ parseTarget).?) + def getCatch[_: P]: P[Opcode] = + P(parseCatch.map { + case (value, Some(x)) => + DualValueOperation("CATCH", value, IntegerLiteral(x.toLong)) + case (value, None) => SingleValueOperation("CATCH", value) + }) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fe.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fe.scala new file mode 100644 index 0000000..3769369 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fe.scala @@ -0,0 +1,60 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Literals.getArray +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Utility.{ + getAnyValue, + parseTarget +} + +object Fe { + + def parseFeFetchR[_: P]: P[(Variable, Variable, String)] = + P("FE_FETCH_R " ~/ getAnyVariable ~ " " ~/ getAnyVariable ~ " " ~/ parseTarget) + def getFeFetchR[_: P]: P[TripleValueOperation] = + P( + parseFeFetchR.map( + x => + TripleValueOperation("FE_FETCH_R", + x._1, + x._2, + IntegerLiteral(x._3.toLong)))) + + def parseFeFetchRw[_: P]: P[(Variable, Variable, String)] = + P("FE_FETCH_RW " ~/ getAnyVariable ~ " " ~/ getAnyVariable ~ " " ~/ parseTarget) + def getFeFetchRw[_: P]: P[TripleValueOperation] = + P( + parseFeFetchRw.map( + x => + TripleValueOperation("FE_FETCH_RW", + x._1, + x._2, + IntegerLiteral(x._3.toLong)))) + + def getFeFetch[_: P]: P[Opcode] = + P( + &("FE_FETCH_RW ") ~/ getFeFetchRw | + &("FE_FETCH_R ") ~/ getFeFetchR) + + def parseFeResetRw[_: P]: P[(Value, String)] = + P("FE_RESET_RW " ~/ (getAnyVariable | getArray) ~ " " ~ parseTarget) + def getFeResetRw[_: P]: P[DualValueOperation] = + P(parseFeResetRw.map(x => + DualValueOperation("FE_RESET_RW", x._1, IntegerLiteral(x._2.toLong)))) + + def parseFeResetR[_: P]: P[(Value, String)] = + P("FE_RESET_R " ~/ getAnyValue ~ " " ~/ parseTarget) + def getFeResetR[_: P]: P[DualValueOperation] = + P(parseFeResetR.map(x => + DualValueOperation("FE_RESET_R", x._1, IntegerLiteral(x._2.toLong)))) + + def getFeReset[_: P]: P[Opcode] = + P( + &("FE_RESET_RW ") ~ getFeResetRw | + &("FE_RESET_R ") ~ getFeResetR + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fetch.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fetch.scala new file mode 100644 index 0000000..17755c0 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Fetch.scala @@ -0,0 +1,221 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Literals.{ + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Fetch { + + def parseFetchObjFuncArgA[_: P]: P[(Option[ByteCodeKeyword], Value, Value)] = + P("FETCH_OBJ_FUNC_ARG " ~ (getByteCodeKeyword ~ " ").? ~ getAnyValue ~ " " ~ getAnyValue) + def getFetchObjFuncArgA[_: P]: P[Opcode] = + P(parseFetchObjFuncArgA.map(x => + x._1 match { + case Some(keyword) => + TripleValueOperation("FETCH_OBJ_FUNC_ARG", keyword, x._2, x._3) + case None => DualValueOperation("FETCH_OBJ_FUNC_ARG", x._2, x._3) + })) + + def parseFetchObjFuncArgB[_: P]: P[(ByteCodeKeyword, StringLiteral)] = + P("FETCH_OBJ_FUNC_ARG " ~ getByteCodeKeyword ~ " " ~ "THIS" ~ " " ~ getStringLiteral) + def getFetchObjFuncArgB[_: P]: P[TripleValueOperation] = + P( + parseFetchObjFuncArgB.map( + x => + TripleValueOperation("FETCH_OBJ_FUNC_ARG", + x._1, + StringLiteral("THIS"), + x._2))) + + def getFetchObjFuncArg[_: P]: P[Opcode] = + P(getFetchObjFuncArgA | getFetchObjFuncArgB) + + def parseStaticPropR[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_R" ~/ (" " ~ getAnyValue).rep) + def getStaticPropR[_: P]: P[Opcode] = + P(parseStaticPropR.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_R", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_R", first, second, third) + }) + + def parseStaticPropW[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_W" ~/ (" " ~ getAnyValue).rep) + def getStaticPropW[_: P]: P[Opcode] = + parseStaticPropW.map { + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("FETCH_STATIC_PROP_W", + first, + second, + third, + fourth) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_W", first, second, third) + //case first :: second :: Nil => + //DualValueOperation("FETCH_STATIC_PROP_W",first,second) + } + + def parseStaticPropFuncArg[_: P]: P[Seq[Value]] = + P(&("FETCH_STATIC_PROP_FUNC_ARG ") ~ "FETCH_STATIC_PROP_FUNC_ARG" ~/ (" " ~ getAnyValue).rep) + def getStaticPropFuncArg[_: P]: P[Opcode] = + P(parseStaticPropFuncArg.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_FUNC_ARG", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_FUNC_ARG", first, second, third) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("FETCH_STATIC_PROP_FUNC_ARG", + first, + second, + third, + fourth) + }) + + def parseFetchStaticPropIs[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_IS" ~/ (" " ~ getAnyValue).rep) + def getFetchStaticPropIs[_: P]: P[Opcode] = + P(parseFetchStaticPropIs.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_IS", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_IS", first, second, third) + }) + + def parseFetchDimR[_: P]: P[(Value, Value)] = + P("FETCH_DIM_R " ~/ getAnyValue ~ " " ~/ getAnyValue) + def getFetchDimR[_: P]: P[DualValueOperation] = + P(parseFetchDimR.map(x => DualValueOperation("FETCH_DIM_R", x._1, x._2))) + + def parseFetchListR[_: P]: P[(Value, Value)] = + P("FETCH_LIST_R " ~/ getAnyValue ~ " " ~ getAnyValue) + def getFetchListR[_: P]: P[DualValueOperation] = + P(parseFetchListR.map(x => DualValueOperation("FETCH_LIST_R", x._1, x._2))) + + def parseFetchClassConstantA[_: P] + : P[(ByteCodeKeyword, ByteCodeKeyword, StringLiteral)] = + P("FETCH_CLASS_CONSTANT " ~ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ " " ~ getStringLiteral) + def getFetchClassConstantA[_: P]: P[TripleValueOperation] = + P(parseFetchClassConstantA.map(x => + TripleValueOperation("FETCH_CLASS_CONSTANT", x._1, x._2, x._3))) + + def parseFetchClassConstantB[_: P]: P[(Value, Value)] = + P("FETCH_CLASS_CONSTANT " ~ getAnyValue ~ " " ~ getAnyValue) + def getFetchClassConstantB[_: P]: P[DualValueOperation] = + P(parseFetchClassConstantB.map(x => + DualValueOperation("FETCH_CLASS_CONSTANT", x._1, x._2))) + + def parseFetchClass[_: P]: P[Seq[Value]] = + P("FETCH_CLASS" ~ (" " ~ getAnyValue).rep) + def getFetchClass[_: P]: P[Opcode] = + P(parseFetchClass.map { + case first :: second :: Nil => + DualValueOperation("FETCH_CLASS", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_CLASS", first, second, third) + }) + + def parseFetchClassName[_: P]: P[Value] = + P("FETCH_CLASS_NAME" ~ " " ~ getAnyValue) + def getFetchClassName[_: P]: P[Opcode] = + P(parseFetchClassName.map(x => SingleValueOperation("FETCH_CLASS_NAME", x))) + + def getFetchClassConstant[_: P]: P[Opcode] = + P(getFetchClassConstantA | getFetchClassConstantB) + + def parseFetchDimFuncArg[_: P]: P[(Value, Value)] = + P("FETCH_DIM_FUNC_ARG " ~/ getAnyValue ~ " " ~ getAnyValue) + def getFetchDimFuncArg[_: P]: P[DualValueOperation] = + P(parseFetchDimFuncArg.map(x => + DualValueOperation("FETCH_DIM_FUNC_ARG", x._1, x._2))) + + def parseFetchConstant[_: P]: P[Seq[Value]] = + P("FETCH_CONSTANT" ~ (" " ~ getAnyValue).rep) + def getFetchConstant[_: P]: P[Opcode] = P( + parseFetchConstant.map { + case first :: Nil => SingleValueOperation("FETCH_CONSTANT", first) + case first :: second :: Nil => + DualValueOperation("FETCH_CONSTANT", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_CONSTANT", first, second, third) + } + ) + + def parseFetchR[_: P]: P[(ByteCodeKeyword, Value)] = + P( + "FETCH_R " ~ getByteCodeKeyword ~ " " ~ getAnyValue + ) + def getFetchR[_: P]: P[DualValueOperation] = + P( + parseFetchR.map( + x => DualValueOperation("FETCH_R", x._1, x._2) + )) + + def parseFetchIs[_: P]: P[(ByteCodeKeyword, Value)] = + P( + "FETCH_IS " ~ getByteCodeKeyword ~ " " ~ getAnyValue + ) + def getFetchIs[_: P]: P[DualValueOperation] = + P( + parseFetchIs.map( + x => DualValueOperation("FETCH_IS", x._1, x._2) + ) + ) + + def parseFetchObjW[_: P]: P[Seq[Value]] = + P("FETCH_OBJ_W" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getFetchObjW[_: P]: P[Opcode] = + P(parseFetchObjW.map { + case first :: second :: Nil => + DualValueOperation("FETCH_OBJ_W_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_OBJ_W_3", first, second, third) + }) + + def parseFetchStaticPropUnset[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_UNSET" ~ (" " ~ getAnyValue).rep) + def getFetchStaticPropUnset[_: P]: P[Opcode] = + P(parseFetchStaticPropUnset.map { + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_UNSET", first, second, third) + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_UNSET", first, second) + }) + + def parseFetchStaticPropRw[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_RW" ~ (" " ~ getAnyValue).rep) + def getFetchStaticPropRw[_: P]: P[Opcode] = + P(parseFetchStaticPropRw.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_RW", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_RW", first, second, third) + }) + + def parseFetchCommand[_: P]: P[Opcode] = P( + &("FETCH_OBJ_FUNC_ARG ") ~ getFetchObjFuncArg | + &("FETCH_DIM_FUNC_ARG") ~ getFetchDimFuncArg | + &("FETCH_STATIC_PROP_FUNC_ARG ") ~ getStaticPropFuncArg | + &("FETCH_STATIC_PROP_R ") ~ getStaticPropR | + &("FETCH_STATIC_PROP_W ") ~ getStaticPropW | + &("FETCH_STATIC_PROP_IS ") ~ getFetchStaticPropIs | + &("FETCH_STATIC_PROP_UNSET ") ~ getFetchStaticPropUnset | + &("FETCH_STATIC_PROP_RW ") ~ getFetchStaticPropRw | + //&("FETCH_OBJ_R ") ~ getFetchObjR | + &("FETCH_CLASS_NAME ") ~ getFetchClassName | + &("FETCH_DIM_R ") ~ getFetchDimR | + &("FETCH_LIST_R ") ~ getFetchListR | + &("FETCH_CLASS ") ~ getFetchClass | + &("FETCH_CLASS_CONSTANT ") ~ getFetchClassConstant | + &("FETCH_CONSTANT ") ~ getFetchConstant | + &("FETCH_R ") ~ getFetchR | + &("FETCH_IS ") ~ getFetchIs | + &("FETCH_OBJ_W ") ~ getFetchObjW + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Generic.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Generic.scala new file mode 100644 index 0000000..6d312a0 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Generic.scala @@ -0,0 +1,97 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Generic { + + def parseNoValueCommandString[_: P]: P[Unit] = P( + "DO_FCALL_BY_NAME" | "DO_ICALL" | "DO_UCALL" | "DO_FCALL" | "NOP" | "BEGIN_SILENCE" | "EXT_STMT" | "EXT_NOP" | + "FETCH_THIS" | "GENERATOR_CREATE" | "GET_CALLED_CLASS" | "FUNC_NUM_ARGS" | "CHECK_UNDEF_ARGS" | + "FETCH_GLOBALS" | + "VERIFY_NEVER_TYPE" | + "CALLABLE_CONVERT" + ) + + def parseNoValueCommand[_: P]: P[String] = P(parseNoValueCommandString.!) + + def getNoValueCommand[_: P]: P[NoValueOperation] = + P(parseNoValueCommand.map(x => NoValueOperation(x))) + + def parseSingleValueCommandString[_: P]: P[Unit] = P( + "ECHO " | "BW_NOT " | "BOOL_NOT " | "QM_ASSIGN " | "PRE_INC " | "POST_INC " | "PRE_DEC " | "POST_DEC " | "FREE " | + "PRINT " | "FE_FREE " | "END_SILENCE " | "BOOL " | "OP_DATA " | "THROW " | "STRLEN " | "SEND_UNPACK " | + "COUNT " | "DEFINED " | "DECLARE_FUNCTION " | "GET_TYPE " | "UNSET_CV " | "COPY_TMP " | + "CLONE " | "MAKE_REF " | "SEPARATE " | "DECLARE_LAMBDA_FUNCTION " | "GENERATOR_RETURN " | "DISCARD_EXCEPTION " | + "MATCH_ERROR" | "CHECK_VAR " + ) + + def parseSingleValueCommand[_: P]: P[(String, Value)] = P( + parseSingleValueCommandString.! ~/ getAnyValue + ) + + def getSingleValueCommand[_: P]: P[SingleValueOperation] = P( + parseSingleValueCommand.map(x => + SingleValueOperation(x._1.substring(0, x._1.length - 1), x._2)) + ) + + def parseDualValueCommandString[_: P]: P[Unit] = P( + "CONCAT " | "FAST_CONCAT " | "ADD " | "SUB " | "MUL " | "DIV " | "MOD " | "SL " | "SR " | "BW_OR " | + "BW_AND " | "BW_XOR " | "BOOL_OR " | "IS_EQUAL " | "IS_NOT_EQUAL " | "IS_IDENTICAL " | "IS_NOT_IDENTICAL " | + "IS_SMALLER " | "IS_SMALLER_OR_EQUAL " | "BIND_GLOBAL " | "DECLARE_CLASS_DELAYED " | + "DECLARE_CONST " | "INCLUDE_OR_EVAL " | "FETCH_FUNC_ARG " | "FETCH_DIM_FUNC_ARG " | "POW " | + "FETCH_DIM_R " | "FETCH_W " | "FETCH_DIM_W " | "ARRAY_KEY_EXISTS " | "FETCH_OBJ_RW " | + "FETCH_OBJ_R " | "FETCH_RW " | "FETCH_OBJ_IS " | "FETCH_DIM_IS " | "TYPE_CHECK " | "FETCH_DIM_RW " | + "UNSET_OBJ " | "FETCH_UNSET " | "UNSET_DIM " | "FETCH_DIM_UNSET " | "CASE " | "FETCH_OBJ_UNSET " | "UNSET_STATIC_PROP " | + "POST_INC_OBJ " | "PRE_INC_OBJ " | "POST_DEC_OBJ " | "PRE_DEC_OBJ " | "BOOL_XOR " | "SPACESHIP " | "UNSET_VAR " | + "CASE_STRICT " + ) + + def parseDualValueCommand[_: P]: P[(String, Value, Value)] = + P( + parseDualValueCommandString.! ~/ + getAnyValue ~ + " " ~ + getAnyValue) + + def getDualValueCommand[_: P]: P[DualValueOperation] = + P(parseDualValueCommand.map(x => + DualValueOperation(x._1.substring(0, x._1.length - 1), x._2, x._3))) + + def parseTripleValueCommandString[_: P]: P[Unit] = P( + "ASSIGN_DIM_OP " | "ASSIGN_OBJ_OP " | "ISSET_ISEMPTY_VAR " + ) + + def parseTripleValueCommand[_: P]: P[(String, Value, Value, Value)] = + P(parseTripleValueCommandString.! ~/ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + + def getTripleValueCommand[_: P]: P[TripleValueOperation] = + P( + parseTripleValueCommand.map( + x => + TripleValueOperation(x._1.substring(0, x._1.length - 1), + x._2, + x._3, + x._4))) + + def parseQuadrupleValueCommandString[_: P]: P[Unit] = P( + "DOESNOTEXIST " + ) + + def parseQuadrupleValueCommand[_: P] + : P[(String, Value, Value, Value, Value)] = P( + parseQuadrupleValueCommandString.! ~/ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue + ) + + def getQuadrupleValueCommand[_: P]: P[QuadrupleValueOperation] = { + parseQuadrupleValueCommand.map( + x => + QuadrupleValueOperation(x._1.substring(0, x._1.length - 1), + x._2, + x._3, + x._4, + x._5)) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/IncDecStaticProp.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/IncDecStaticProp.scala new file mode 100644 index 0000000..110f414 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/IncDecStaticProp.scala @@ -0,0 +1,27 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue +import io.joern.reporting.ReportableError + +object IncDecStaticProp { + + def parseIncDecStaticProp[_: P]: P[(String, Seq[Value])] = + P(&(("POST_INC" | "PRE_INC" | "POST_DEC" | "PRE_DEC") ~ "_STATIC_PROP ") ~ (("POST_INC" | "PRE_INC" | "POST_DEC" | "PRE_DEC") ~ "_STATIC_PROP").! ~ (" " ~ getAnyValue).rep) + def getIncDecStaticProp[_: P]: P[Opcode] = + P(parseIncDecStaticProp.map { + case (opString, first :: Nil) => SingleValueOperation(opString, first) + case (opString, first :: second :: Nil) => + DualValueOperation(opString, first, second) + case x => + throw ReportableError( + "", + -1, + "", + "", + s"when parsing IncDecStaticProp unexpected result tuple $x") + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Isset.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Isset.scala new file mode 100644 index 0000000..31c5e31 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Isset.scala @@ -0,0 +1,72 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Literals.{ + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Isset { + + def parseIssetIsemptyVar[_: P]: P[(ByteCodeKeyword, ByteCodeKeyword, Value)] = + P( + "ISSET_ISEMPTY_VAR " ~/ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ + " " ~ (getAnyVariable | getStringLiteral)) + def getIssetIsemptyVar[_: P]: P[TripleValueOperation] = + P(parseIssetIsemptyVar.map(x => + TripleValueOperation("ISSET_ISEMPTY_VAR", x._1, x._2, x._3))) + + def parseIssetIsemptyDimObj[_: P]: P[(ByteCodeKeyword, Value, Value)] = + P("ISSET_ISEMPTY_DIM_OBJ " ~/ getByteCodeKeyword ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getIssetIsemptyDimObj[_: P]: P[TripleValueOperation] = + P(parseIssetIsemptyDimObj.map(x => + TripleValueOperation("ISSET_ISEMPTY_DIM_OBJ", x._1, x._2, x._3))) + + def parseIssetIsemptyCv[_: P]: P[(ByteCodeKeyword, Variable)] = + P("ISSET_ISEMPTY_CV " ~/ getByteCodeKeyword ~ " " ~ getAnyVariable) + def getIssetIsemptyCv[_: P]: P[DualValueOperation] = + P(parseIssetIsemptyCv.map(x => + DualValueOperation("ISSET_ISEMPTY_CV", x._1, x._2))) + + def parseIssetIsemptyPropObj[_: P]: P[(ByteCodeKeyword, Value, Value)] = + P("ISSET_ISEMPTY_PROP_OBJ " ~ getByteCodeKeyword ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getIssetIsemptyPropObj[_: P]: P[TripleValueOperation] = + P( + parseIssetIsemptyPropObj.map( + x => TripleValueOperation("ISSET_ISEMPTY_PROP_OBJ", x._1, x._2, x._3) + )) + + def parseIssetIsEmptyStaticProp[_: P]: P[Seq[Value]] = + P(&("ISSET_ISEMPTY_STATIC_PROP ") ~ "ISSET_ISEMPTY_STATIC_PROP" ~ (" " ~ getAnyValue).rep) + def getIssetIsEmptyStaticProp[_: P]: P[Opcode] = + P(parseIssetIsEmptyStaticProp.map { + case first :: second :: third :: Nil => + TripleValueOperation("ISSET_ISEMPTY_STATIC_PROP", first, second, third) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("ISSET_ISEMPTY_STATIC_PROP", + first, + second, + third, + fourth) + }) + + def parseIssetIsemptyThis[_: P]: P[String] = P("ISSET_ISEMPTY_THIS".!) + def getIssetIsemptyThis[_: P]: P[NoValueOperation] = + P(parseIssetIsemptyThis.map { _ => + NoValueOperation("ISSET_ISEMPTY_THIS") + }) + + def getIssetCommand[_: P]: P[Opcode] = + P( + getIssetIsemptyVar | + getIssetIsemptyCv | + getIssetIsemptyDimObj | + getIssetIsemptyPropObj | + getIssetIsEmptyStaticProp | + getIssetIsemptyThis) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Jump.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Jump.scala new file mode 100644 index 0000000..bf6c325 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Jump.scala @@ -0,0 +1,79 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Jump { + + def parseJmpnz[_: P]: P[(Value, String)] = + P("JMPNZ " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpnz[_: P]: P[DualValueOperation] = + P(parseJmpnz.map(x => + DualValueOperation("JMPNZ", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpnzEx[_: P]: P[(Value, String)] = + P("JMPNZ_EX " ~ getAnyValue ~ " " ~ ("L" | "BB") ~ anyNumber.rep.!) + def getJmpnzEx[_: P]: P[DualValueOperation] = + P(parseJmpnzEx.map(x => + DualValueOperation("JMPNZ_EX", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpz[_: P]: P[(Value, String)] = + P("JMPZ " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpz[_: P]: P[DualValueOperation] = + P(parseJmpz.map(x => + DualValueOperation("JMPZ", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmp[_: P]: P[String] = P("JMP " ~ Utility.parseTarget) + def getJmp[_: P]: P[SingleValueOperation] = + P(parseJmp.map(x => SingleValueOperation("JMP", IntegerLiteral(x.toLong)))) + + def parseJmpzEx[_: P]: P[(Value, String)] = + P("JMPZ_EX " ~ getAnyValue ~ " " ~ ("L" | "BB") ~ anyNumber.rep.!) + def getJmpzEx[_: P]: P[DualValueOperation] = + P(parseJmpzEx.map(x => + DualValueOperation("JMPZ_EX", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpZnz[_: P]: P[(Value, String, String)] = P( + "JMPZNZ " ~ getAnyValue ~ " " ~ + Utility.parseTarget ~ " " ~ + Utility.parseTarget + ) + def getJmpZnz[_: P]: P[TripleValueOperation] = + P( + parseJmpZnz.map( + x => + TripleValueOperation("JMPZNZ", + x._1, + IntegerLiteral(x._2.toLong), + IntegerLiteral(x._3.toLong)) + )) + + def parseJmpSet[_: P]: P[(Value, String)] = + P("JMP_SET" ~ " " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpSet[_: P]: P[DualValueOperation] = + P(parseJmpSet.map(x => + DualValueOperation("JMP_SET", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpNull[_: P]: P[(Value, String)] = + P("JMP_NULL " ~ getAnyValue ~ " " ~ Utility.parseTarget) + + def getJmpNull[_: P]: P[DualValueOperation] = + P(parseJmpNull.map(x => + DualValueOperation("JMP_NULL", x._1, IntegerLiteral(x._2.toLong)))) + + def getJmpCommand[_: P]: P[Opcode] = + P( + &("JMPZ_EX ") ~ getJmpzEx | + &("JMPNZ_EX ") ~ getJmpnzEx | + &("JMPNZ ") ~ getJmpnz | + &("JMPZ ") ~ getJmpz | + &("JMPZNZ ") ~ getJmpZnz | + &("JMP_SET ") ~ getJmpSet | + &("JMP_NULL") ~ getJmpNull | + &("JMP ") ~ getJmp + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/LambdaRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/LambdaRelated.scala new file mode 100644 index 0000000..40dc8c9 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/LambdaRelated.scala @@ -0,0 +1,49 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object LambdaRelated { + + def parseYield[_: P]: P[Seq[Value]] = + P("YIELD" ~ (" " ~ getAnyValue).rep) + def getYieldOpcode[_: P]: P[Opcode] = + P(parseYield.map { + case Nil => NoValueOperation("YIELD") + case single :: Nil => SingleValueOperation("YIELD", single) + case first :: second :: Nil => DualValueOperation("YIELD", first, second) + }) + + def parseYieldFrom[_: P]: P[Seq[Value]] = + P("YIELD_FROM" ~ (" " ~ getAnyValue).rep) + def getYieldFrom[_: P]: P[Opcode] = + P(parseYieldFrom.map { + case single :: Nil => SingleValueOperation("YIELD_FROM", single) + }) + + def getYield[_: P]: P[Opcode] = + P( + &("YIELD_FROM ") ~ getYieldFrom | + &("YIELD " | "YIELD") ~ getYieldOpcode + ) + + def parseTicks[_: P]: P[String] = + P(&("TICKS ") ~ "TICKS" ~ " " ~ anyNumber.rep.!) + def getTicks[_: P]: P[Opcode] = + P(parseTicks.map(x => + SingleValueOperation("TICKS", IntegerLiteral(x.toLong)))) + + def parseFastCall[_: P]: P[(String, Option[Value])] = + P("FAST_CALL " ~ ("BB" | "L") ~ anyNumber.rep.! ~ (" " ~ getAnyValue).?) + def getFastCall[_: P]: P[Opcode] = + P(parseFastCall.map { + case (line, Some(value)) => + DualValueOperation("FAST_CALL", IntegerLiteral(line.toLong), value) + case (line, None) => + SingleValueOperation("FAST_CALL", IntegerLiteral(line.toLong)) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Rope.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Rope.scala new file mode 100644 index 0000000..7728a03 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Rope.scala @@ -0,0 +1,40 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyNumber +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php7.instructions.Utility.getAnyValue + +object Rope { + + def parseRopeInit[_: P]: P[(String, Value)] = + P("ROPE_INIT " ~/ anyNumber.rep.! ~ " " ~ getAnyValue) + def getRopeInit[_: P]: P[DualValueOperation] = + P(parseRopeInit.map(x => + DualValueOperation("ROPE_INIT", IntegerLiteral(x._1.toLong), x._2))) + + def parseRopeAdd[_: P]: P[(String, Variable, Value)] = + P("ROPE_ADD " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable ~ " " ~ getAnyValue) + def getRopeAdd[_: P]: P[TripleValueOperation] = + P( + parseRopeAdd.map( + x => + TripleValueOperation("ROPE_ADD", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + + def parseRopeEnd[_: P]: P[(String, Variable, Value)] = + P("ROPE_END " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable ~ " " ~ getAnyValue) + def getRopeEnd[_: P]: P[TripleValueOperation] = + P( + parseRopeEnd.map( + x => + TripleValueOperation("ROPE_END", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/TypeRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/TypeRelated.scala new file mode 100644 index 0000000..0fe1168 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/TypeRelated.scala @@ -0,0 +1,41 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Basics.anyLetter +import io.joern.bytecode.parser.php7.instructions.Utility.{ + getAnyValue, + parseTarget +} + +object TypeRelated { + + def parseCastTypeString[_: P]: P[String] = P("(" ~ anyLetter.rep.! ~ ")") + def parseCast[_: P]: P[(Object, Value)] = + P("CAST " ~ (parseCastTypeString | getAnyValue) ~ " " ~ getAnyValue) + def getCast[_: P]: P[DualValueOperation] = + P(parseCast.map(x => + x._1 match { + case value: String => + DualValueOperation("CAST", StringLiteral(value), x._2) + case value: Value => DualValueOperation("CAST", value, x._2) + })) + + def parseInstanceOf[_: P]: P[Seq[Value]] = + P(&("INSTANCEOF ") ~ "INSTANCEOF" ~ (" " ~ getAnyValue).rep) + def getInstanceOf[_: P]: P[Opcode] = + P(parseInstanceOf.map { + case first :: second :: Nil => + DualValueOperation("INSTANCEOF", first, second) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("INSTANCEOF", first, second, third, fourth) + }) + + def parseCoalesce[_: P]: P[(Value, String)] = + P("COALESCE " ~/ getAnyValue ~ " " ~ parseTarget) + def getCoalesce[_: P]: P[Opcode] = + P(parseCoalesce.map(x => + DualValueOperation("COALESCE", x._1, IntegerLiteral(x._2.toLong)))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/UnexpectedArgumentCount.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/UnexpectedArgumentCount.scala new file mode 100644 index 0000000..e8541f1 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/UnexpectedArgumentCount.scala @@ -0,0 +1,9 @@ +package io.joern.bytecode.parser.php7.instructions + +class UnexpectedArgumentCount(opcode: String, + expected: Seq[Int], + encountered: Int) + extends Throwable { + override def getMessage: String = + s"$opcode was encountered with $encountered args but only {${expected.mkString(",")}} are supported" +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Utility.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Utility.scala new file mode 100644 index 0000000..fb6f22b --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php7/instructions/Utility.scala @@ -0,0 +1,19 @@ +package io.joern.bytecode.parser.php7.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs.Value +import io.joern.bytecode.parser.php7.Literals.getAnyLiteral +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.Basics.anyNumber + +object Utility { + + def getAnyValue[_: P]: P[Value] = P(getAnyLiteral | getAnyVariable) + + def parseStringInQuotes[_: P]: P[String] = + P("\"" ~ (!"\"" ~ AnyChar).rep.! ~ "\"") + + def parseTarget[_: P]: P[String] = P(("BB" | "L") ~ anyNumber.rep(1).!) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Basics.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Basics.scala new file mode 100644 index 0000000..9c38adb --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Basics.scala @@ -0,0 +1,37 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ + +object Basics { + + def anyNonCapitalLetter[_: P]: P[Unit] = CharIn("a-z") + + def anyCapitalLetter[_: P]: P[Unit] = CharIn("A-Z") + + def anyLetter[_: P]: P[Unit] = P(anyNonCapitalLetter | anyCapitalLetter) + + def anyNumber[_: P]: P[Unit] = CharIn("0-9") + + //def legalIdentifier[_: P]: P[Unit] = + // P("_".? ~ (anyLetter | anyLetter | anyNumber | "_").rep) + def legalIdentifier[_: P]: P[Unit] = + P((!(" " | ")" | "(") ~ AnyChar).rep) + + def legalFileIdentifier[_: P]: P[Unit] = + P((anyLetter | anyNumber | " " | "_" | "-" | ".").rep) + + def nonEscapedAnyChar[_: P]: P[Unit] = P(!"\\" ~ AnyChar) + + def escapedQuotation[_: P]: P[Unit] = P("\\\"") + + def escapedSlash[_: P]: P[Unit] = P("\\") + + def whiteSpaceAndWhiteSpaceControl[_: P]: P[Unit] = + P('\n'.toString | '\t'.toString | CharIn(" ")) + + def legalString[_: P]: P[Unit] = + P("\"" ~ + (!"\"" ~ (escapedQuotation | whiteSpaceAndWhiteSpaceControl | escapedSlash | nonEscapedAnyChar)).rep + ~ "\"") +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ByteCodeBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ByteCodeBlock.scala new file mode 100644 index 0000000..670a3e4 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ByteCodeBlock.scala @@ -0,0 +1,65 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.ExceptionTableBlock.getExceptionTableBlock +import io.joern.bytecode.parser.php8.HeaderBlock.getHeaderBlock +import io.joern.bytecode.parser.php8.Instructions.getInstruction +import io.joern.bytecode.parser.php8.LiveRangesBlock.getLiveRangesBlock + +object ByteCodeBlock { + + def parseLineNumber[_: P]: P[String] = P(anyNumber.rep.!) + def getLineNumber[_: P]: P[Integer] = + P(parseLineNumber.map(x => Integer.valueOf(x))) + + def parseFileLineNumber[_: P]: P[String] = P("(" ~ anyNumber.rep.! ~ ")") + def getFileLineNUmber[_: P]: P[Integer] = + P(parseFileLineNumber.map(x => Integer.valueOf(x))) + + def parseDefiningInstructionLine[_: P]: P[(Integer, Instruction)] = + P(getLineNumber ~/ " ".rep ~ getInstruction) + def getDefiningInstructionLine[_: P]: P[InstructionLine] = + P(parseDefiningInstructionLine.map(x => + InstructionLine(Some(x._1), None, x._2))) + + def parseByteCodeBlock[_: P]: P[(MethodHeader, + MethodHeaderMetaParsingInfo, + MethodHeaderMetaFileInfo, + MethodHeaderRangeLine, + Seq[InstructionLine], + Option[LiveRanges], + Option[ExceptionTable])] = + P( + getHeaderBlock ~ + (&(anyNumber) ~ getDefiningInstructionLine ~/ "\n").rep ~ + (&("LIVE RANGES:") ~ getLiveRangesBlock).? ~ + (&("EXCEPTION TABLE:") ~ getExceptionTableBlock).?) + def getByteCodeBlock[_: P]: P[ByteCodeDefinitionsBlock] = + P( + parseByteCodeBlock.map(x => + ByteCodeDefinitionsBlock( + x._1.name, + x._1.classname, + x._1.namespace, + x._1.lines, + x._1.args, + x._1.vars, + x._1.tmps, + x._2.metaInfo, + x._3.fileName, + x._3.lineStart, + x._3.lineEnd, + Some(x._4.string), + x._5.map(it => { + val tmp = it; + tmp.fileLine = Some(x._3.lineStart); + tmp + }), + x._6, + x._7 + ))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ControlFlowBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ControlFlowBlock.scala new file mode 100644 index 0000000..04053c0 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ControlFlowBlock.scala @@ -0,0 +1,98 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.ExceptionTableBlock.getExceptionTableBlock +import io.joern.bytecode.parser.php8.HeaderBlock.getHeaderBlock +import io.joern.bytecode.parser.php8.Instructions.getInstruction +import io.joern.bytecode.parser.php8.LiveRangesBlock.getLiveRangesBlock + +object ControlFlowBlock { + + def parseBBInstructionLine[_: P]: P[Instruction] = + P((" " | anyNumber).rep ~ getInstruction) + def getBBInstructionLine[_: P]: P[InstructionLine] = + P(parseBBInstructionLine.map(x => InstructionLine(None, None, x))) + + def parseBBIdent[_: P]: P[String] = P("BB" ~ anyNumber.rep(1).!) + + def getBBIdent[_: P]: P[Int] = P(parseBBIdent.map(Integer.parseInt)) + + def parseBBAttribute[_: P]: P[Unit] = + P("start" | "exit" | "target" | "follow" | "unreachable_free" | "unreachable" | "catch" | "try" | "finally_end" | "finally") + + def parseBBInstructionLines[_: P]: P[(String, String)] = + P("lines=[" ~ anyNumber.rep.! ~ "-" ~ anyNumber.rep.! ~ "]") + def getBBInstructionLines[_: P]: P[(Int, Int)] = + P(parseBBInstructionLines.map(x => + (Integer.parseInt(x._1), Integer.parseInt(x._2)))) + + def parseBBDefinitionLine[_: P]: P[(Int, Seq[String], (Int, Int))] = + P(getBBIdent ~ ":\n" ~ " ".rep ~ "; " ~ (parseBBAttribute.! ~ " ").rep ~ getBBInstructionLines) + def getBBDefinitionLine[_: P]: P[BBDefinitionLine] = + P(parseBBDefinitionLine.map(x => + BBDefinitionLine(x._1, x._2, x._3._1, x._3._2))) + + def parseBBToLine[_: P]: P[Seq[String]] = + P(" ".rep ~ ";" ~ " to=(" ~ ("BB" ~ anyNumber.rep.! ~ ", ".?).rep ~ ")") + def getBBToLine[_: P]: P[Seq[Int]] = + P(parseBBToLine.map(x => x.map(Integer.parseInt))) + + def parseBasicBlock[_: P] + : P[(BBDefinitionLine, Option[Seq[Int]], Seq[InstructionLine])] = + P(getBBDefinitionLine ~ ("\n" ~ getBBToLine).? ~ "\n" ~ (getBBInstructionLine ~ "\n").rep) + + def getBasicBlock[_: P]: P[BasicBlock] = + P( + parseBasicBlock.map( + x => + BasicBlock(x._1.number, + x._1.attributes, + x._1.firstInstruction, + x._1.lastInstruction, + x._3, + x._2))) + + def parseBBSeq[_: P]: P[(BasicBlock, Seq[BasicBlock])] = + P(getBasicBlock ~ ("\n" ~ getBasicBlock).rep) + + def getBBSeq[_: P]: P[Seq[BasicBlock]] = { + parseBBSeq.map(x => x._1 +: x._2) + } + + def parseControlFlowBlock[_: P]: P[(MethodHeader, + MethodHeaderMetaParsingInfo, + MethodHeaderMetaFileInfo, + MethodHeaderRangeLine, + Seq[BasicBlock], + Option[LiveRanges], + Option[ExceptionTable])] = + P( + getHeaderBlock ~ getBBSeq ~ + (&("LIVE RANGES:") ~ getLiveRangesBlock).? ~ + (&("EXCEPTION TABLE:") ~ getExceptionTableBlock).?) + + def getControlFlowBlock[_: P]: P[ControlFlowDefinitionsBlock] = + P( + parseControlFlowBlock.map(x => + ControlFlowDefinitionsBlock( + x._1.name, + x._1.classname, + x._1.namespace, + x._1.lines, + x._1.args, + x._1.vars, + x._1.tmps, + x._2.metaInfo, + x._3.fileName, + x._3.lineStart, + x._3.lineEnd, + Some(x._4.string), + x._5, + x._6, + x._7 + ))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ExceptionTableBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ExceptionTableBlock.scala new file mode 100644 index 0000000..1bccb01 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/ExceptionTableBlock.scala @@ -0,0 +1,26 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ExceptionTable, ExceptionTableLine} +import io.joern.bytecode.parser.php8.instructions.Utility.parseTarget + +object ExceptionTableBlock { + + def parseExceptionTableLine[_: P]: P[Seq[String]] = + P( + " ".rep ~ (" ".rep ~ (parseTarget | "-".!) ~ ", ".?).rep(1) ~ "\n" + ) + + def getExceptionTableLine[_: P]: P[ExceptionTableLine] = + P(parseExceptionTableLine.map { x => + ExceptionTableLine(x.toList) + }) + + def parseExceptionTableBlock[_: P]: P[Seq[ExceptionTableLine]] = + P("EXCEPTION TABLE:\n" ~/ getExceptionTableLine.rep(1)) + + def getExceptionTableBlock[_: P]: P[ExceptionTable] = + P(parseExceptionTableBlock.map(x => ExceptionTable(x))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/FileParser8.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/FileParser8.scala new file mode 100644 index 0000000..69f9efc --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/FileParser8.scala @@ -0,0 +1,187 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.FileParser +import io.joern.bytecode.parser.constructs.MethodDefinitionPair +import io.joern.bytecode.parser.php8.MethodDefinition.getFullMethodDefinitionBlock +import io.joern.reporting.Reporting + +import java.io.{ByteArrayOutputStream, PrintWriter, File => JFile} +import java.nio.charset.StandardCharsets +import java.text.SimpleDateFormat +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContext, Future} +import scala.sys.process._ + +object FileParser8 { + + trait PHPParseError extends Throwable { + def getMessage: String + } + + case class PHPSyntaxError(msg: String) extends PHPParseError { + override def getMessage: String = s"Syntax Error: $msg" + } + + case class PHPFatalError(msg: String) extends PHPParseError { + override def getMessage: String = s"PHP Error: $msg" + } + + case class PHPWarning(msg: String) extends PHPParseError { + override def getMessage: String = s"PHP Warn: $msg" + } + + case class BytecodeParseError(msg: String) extends Throwable { + override def getMessage: String = msg + } + + val PREPROCESSING = false + val MAX_BYTECODE_OUTPUT = 1000 + + def parsePossibleParseError[_: P]: P[String] = + P("PHP Parse error: " ~ AnyChar.rep.!) + + def parsePossibleFatalError[_: P]: P[String] = + P("PHP Fatal error: " ~ AnyChar.rep.!) + + def parsePossibleWarning[_: P]: P[String] = + P("PHP Warning: " ~ AnyChar.rep.!) + + def parseLastLine[_: P]: P[Unit] = + P("No syntax errors " ~/ "detected in " ~/ AnyChar.rep) + + def parseByteCodeDump[_: P]: P[Seq[MethodDefinitionPair]] = + P(("\n" ~ getFullMethodDefinitionBlock).rep ~ End) + + def parsePHPEventsWithoutThrowing[_: P]: P[Unit] = + P((parsePossibleParseError | parsePossibleFatalError | parsePossibleWarning | "\n").rep) + + def parsePHPEvents(input: String) = { + fastparse.parse(input, parsePossibleParseError(_)) match { + case Parsed.Success(errorMessage, _) => throw PHPSyntaxError(errorMessage) + case _ => + } + fastparse.parse(input, parsePossibleFatalError(_)) match { + case Parsed.Success(errorMessage, _) => throw PHPFatalError(errorMessage) + case _ => + } + fastparse.parse(input, parsePossibleWarning(_)) match { + case Parsed.Success(message, _) => throw PHPWarning(message) + case _ => + } + } + + def actualParse(input: String, + file: String, + strict: Boolean = true): Seq[MethodDefinitionPair] = { + //Some(new PrintWriter("input_dump.txt")).foreach { p => + // p.write(input); p.close() + //} + parsePHPEvents(input) + try { + fastparse.parse(input, parseByteCodeDump(_)) match { + case Parsed.Success(value, length) => + if (length != input.length) { + val remainder = input.substring(length, input.length) + if (strict) { + parsePHPEvents(remainder) + } else { + fastparse.parse(remainder, parsePHPEventsWithoutThrowing(_)) match { + case Parsed.Success(_, length) => + if (length != remainder.length) { + throw BytecodeParseError( + s"remaining code is:\n>>${remainder.substring(length, remainder.length.min(length + MAX_BYTECODE_OUTPUT))} [...]<<") + } + case x: Parsed.Failure => + val t = x.trace(true) + throw BytecodeParseError(s"${x.toString()}\n${t.longMsg}") + } + } + } + value + case x: Parsed.Failure => + val t = x.trace(true) + throw BytecodeParseError(s"${x.toString()}\n${t.longMsg}") + } + } catch { + case x: Throwable => + throw BytecodeParseError( + s"In file $file encountered:${x.toString}\n${Reporting.stringifyErrorStack(x)}\n") + } + } + + def setTimestamp(filePath: String): Boolean = { + val file = new JFile(filePath) + val date = new SimpleDateFormat("MM/dd/yyyy") + val last = date.parse("10/03/1990") + file.setLastModified(last.getTime) + } + + def generatePhpByteCodeDump(filePath: String, + phpInterpreter: String, + phpini: String): String = { + val stderrStream = new ByteArrayOutputStream() + val stdoutStream = new ByteArrayOutputStream() + val stdoutWriter = + new PrintWriter(stdoutStream, true, StandardCharsets.UTF_8) + val stderrWriter = + new PrintWriter(stderrStream, true, StandardCharsets.UTF_8) + val command = s"$phpInterpreter -c $phpini -d opcache.enable_cli=1 -d opcache.opt_debug_level=0x50000 -d opcache.log_verbosity_level=0 --syntax-check ${"\""}" + filePath + s"${"\""}" + command.!(ProcessLogger(stdoutWriter.println, stderrWriter.println)) + stderrWriter.close() + stdoutWriter.close() + val ret = stderrStream.toString + ret + } + + def parseFromFile(file: java.io.File, + phpInterpreter: String, + phpini: String, + strict: Boolean = true): Seq[MethodDefinitionPair] = { + val string = + generatePhpByteCodeDump(file.getPath, phpInterpreter, phpini: String) + actualParse(string, file.getPath, strict) + } + + def parseFromByteCodeDump( + dump: String, + strict: Boolean = true): Seq[MethodDefinitionPair] = { + actualParse(dump, "", strict) + } +} + +class FileParser8(files: Seq[JFile], + phpInterpreter: String, + phpini: String, + strict: Boolean) + extends FileParser { + + val name = "FileParser" + + def run(): List[Seq[MethodDefinitionPair]] = { + implicit val ec: ExecutionContext = ExecutionContext.global + + val list_of_futures: Seq[Future[Option[Seq[MethodDefinitionPair]]]] = + files.map(file => + Future { + var to_return: Option[Seq[MethodDefinitionPair]] = None + withErrorReporting() { + FileParser8 + .parseFromFile(file, phpInterpreter, phpini, strict) match { + case Nil => + reportWarning(file.getPath, + "", + "", + "", + "no methods extracted - usually a bad sign") + case x => to_return = Some(x) + } + } + to_return + }) + val nested_res = + Await.result(Future.sequence(list_of_futures), Duration.Inf) + nested_res.flatten.toList + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/HeaderBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/HeaderBlock.scala new file mode 100644 index 0000000..6bbcaf8 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/HeaderBlock.scala @@ -0,0 +1,134 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ + MethodHeader, + MethodHeaderMetaFileInfo, + MethodHeaderMetaParsingInfo, + MethodHeaderRangeLine +} +import io.joern.bytecode.parser.php8.Basics.{anyLetter, anyNumber} + +object HeaderBlock { + + def parseValidNameSpace[_: P]: P[String] = P((anyLetter | "\\").rep.! ~ "\\") + + def parseValidClassname[_: P]: P[String] = + P((("_" | anyLetter) ~ ("_" | anyLetter | anyNumber).rep).! ~ "::") + + def parseHeaderBlockMethodIdentifier[_: P]: P[String] = + P(("::" | !(" " | ":") ~ AnyChar).rep.!) + + def getHeaderBlockMethodIdentifier[_: P] + : P[(Option[String], Option[String], String)] = + P(parseHeaderBlockMethodIdentifier.map(x => { + x.split("::").toList match { + case classString :: methodString :: Nil => + val lastSlash = classString.lastIndexOf("\\") + if (lastSlash == -1) { + (None, Some(classString), methodString) + } else { + val (namespace, classname) = classString.splitAt(lastSlash) + (if (namespace == "") None else Some(namespace), + Some(classname.substring(1)), + methodString) + } + case methodString :: Nil => + val lastSlash = methodString.lastIndexOf("\\") + if (lastSlash == -1) { + (None, None, methodString) + } else { + val (namespace, methodname) = methodString.splitAt(lastSlash) + (if (namespace == "") None else Some(namespace), + None, + methodname.substring(1)) + } + case _ => throw new RuntimeException("unexpected") + } + })) + + def parseHeaderBlockLinesValue[_: P]: P[String] = + P("lines" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBLockLinesValue[_: P]: P[Int] = + P(parseHeaderBlockLinesValue.map(_.toInt)) + + def parseHeaderBlockArgsValue[_: P]: P[String] = + P("args" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlocArgsValue[_: P]: P[Int] = + P(parseHeaderBlockArgsValue.map(_.toInt)) + + def parseHeaderBlockVarsValue[_: P]: P[String] = + P("vars" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlockVarsValue[_: P]: P[Int] = + P(parseHeaderBlockVarsValue.map(_.toInt)) + + def parseHeaderBlockTmpsValue[_: P]: P[String] = + P("tmps" ~ "=" ~ anyNumber.rep.!) + + def getHeaderBlockTmpsValue[_: P]: P[Int] = + P(parseHeaderBlockTmpsValue.map(_.toInt)) + + def parseHeaderBlockMethodMetaBlock[_: P]: P[(Int, Int, Int, Int)] = + P( + "(" ~ getHeaderBLockLinesValue ~ + ", " ~ getHeaderBlocArgsValue ~ + ", " ~ getHeaderBlockVarsValue ~ + ", " ~ getHeaderBlockTmpsValue ~ ")") + + def parseHeaderBlockMethodDefinitionLine[_: P] + : P[(Option[String], Option[String], String, (Int, Int, Int, Int))] = + P(getHeaderBlockMethodIdentifier ~ ":\n" ~ " ".rep ~ "; " ~ parseHeaderBlockMethodMetaBlock) + + def getHeaderBLockMethodDefinitionLine[_: P]: P[MethodHeader] = + P( + parseHeaderBlockMethodDefinitionLine.map( + result => + MethodHeader(result._3, + result._2, + result._1, + result._4._1, + result._4._2, + result._4._3, + result._4._4))) + + // time definition + def parseHeaderBlockMetaLineParsingWord[_: P]: P[Unit] = + P("before" | "optimizer" | "block" | "pass") + + def parseHeaderBlockMetaLineParsing[_: P]: P[Seq[String]] = + P(" ".rep ~ ";" ~ " " ~ "(" ~ (parseHeaderBlockMetaLineParsingWord.! ~ " ".?).rep ~ ")") + + def getHeaderBlockMetaLineParsing[_: P]: P[MethodHeaderMetaParsingInfo] = + P(parseHeaderBlockMetaLineParsing.map(x => MethodHeaderMetaParsingInfo(x))) + + def newline[_: P]: P[Unit] = P("\n" | "\r\n" | "\r" | "\f") + + def parseHeaderBlockMetaLineFileInfo[_: P]: P[String] = + P(" ".rep ~ ";" ~ " " ~ (!newline ~ AnyChar).rep.!) + + def getHeaderBlockMetaLineFileInfo[_: P]: P[MethodHeaderMetaFileInfo] = + P(parseHeaderBlockMetaLineFileInfo.map(x => MethodHeaderMetaFileInfo(x))) + + def parseHeaderBlockRangeLine[_: P]: P[(String, String)] = + P(" ".rep ~ "; " ~ "return".! ~ (!"\n" ~ AnyChar).rep.!) + + def getHeaderBlockRangeLine[_: P]: P[MethodHeaderRangeLine] = + parseHeaderBlockRangeLine + .map(x => x._1 + x._2) + .map(MethodHeaderRangeLine) + + def getHeaderBlock[_: P]: P[(MethodHeader, + MethodHeaderMetaParsingInfo, + MethodHeaderMetaFileInfo, + MethodHeaderRangeLine)] = + P( + getHeaderBLockMethodDefinitionLine ~ "\n" ~ + getHeaderBlockMetaLineParsing ~ "\n" ~ + getHeaderBlockMetaLineFileInfo ~ "\n" ~ + getHeaderBlockRangeLine ~ "\n") + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Instructions.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Instructions.scala new file mode 100644 index 0000000..7d91f88 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Instructions.scala @@ -0,0 +1,67 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Array._ +import io.joern.bytecode.parser.php8.instructions.Assign._ +import io.joern.bytecode.parser.php8.instructions.Bind._ +import io.joern.bytecode.parser.php8.instructions.CallRelated._ +import io.joern.bytecode.parser.php8.instructions.ClassRelated._ +import io.joern.bytecode.parser.php8.instructions.ControlConstructs._ +import io.joern.bytecode.parser.php8.instructions.Fe._ +import io.joern.bytecode.parser.php8.instructions.Fetch._ +import io.joern.bytecode.parser.php8.instructions.Generic._ +import io.joern.bytecode.parser.php8.instructions.IncDecStaticProp._ +import io.joern.bytecode.parser.php8.instructions.Isset._ +import io.joern.bytecode.parser.php8.instructions.Jump._ +import io.joern.bytecode.parser.php8.instructions.LambdaRelated._ +import io.joern.bytecode.parser.php8.instructions.MatchRelated._ +import io.joern.bytecode.parser.php8.instructions.Rope._ +import io.joern.bytecode.parser.php8.instructions.TypeRelated._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Instructions { + + // here we may extend the available options each time we create a new one + def parseOperation[_: P]: P[Opcode] = + P(parseInitCallCommands | getNew | getInitMethodCall | getSwitchStatement | //order is important as INIT_FCALL is substring of INIT_FCALL_BY_NAME + parseAssignCommand | getCheckFuncArg | + getSendCommand | getRecv | getRecvInit | getRecvVariadic | getTicks | getFuncGetArgs | getIssetIsEmptyStaticProp | + getFeReset | parseFetchCommand | getAssignDim | getReturnCommand | getBindLexical | getInstanceOf | getGetClass | + getRopeInit | getRopeAdd | getRopeEnd | getCast | getFeResetRw | getFeFetch | getBindStatic | getFastCall | + getJmpCommand | getInitArray | getAddArrayElement | getInArray | getExit | getCatch | getCoalesce | getYield | getFastRet | + getIssetCommand | getQuadrupleValueCommand | getVerifyReturnType | getDeclareClass | getIncDecStaticProp | getAddArrayUnpack | + getDeclareFunction | getDeclareLambdaFunction | getMatch | getTypeCheck | + getTripleValueCommand | getDualValueCommand | getSingleValueCommand | getNoValueCommand | getDeclareAnonClass) //jumps + + def getOperation[_: P]: P[Operation] = + P(parseOperation.map(x => constructs.Operation(x))) + + def parseAssignment[_: P]: P[(Variable, Operation)] = + P(getAnyVariable ~ " = " ~ getOperation) + + def getAssignment[_: P]: P[Assignment] = + P(parseAssignment.map(x => constructs.Assignment(x._1, x._2.op))) + + def getInstruction[_: P]: P[Instruction] = P(getAssignment | getOperation) + + def getDeclareFunction[_: P]: P[DualValueOperation] = + P("DECLARE_FUNCTION".! ~/ " " ~ getAnyValue ~ " " ~ anyNumber.rep.!) + .map(x => DualValueOperation(x._1, x._2, IntegerLiteral(x._3.toLong))) + + def parseDeclareLambdaFunction[_: P]: P[Value] = { + P( + "DECLARE_LAMBDA_FUNCTION" ~/ " " ~ (getAnyValue | anyNumber + .rep(1) + .! + .map(x => IntegerLiteral(x.toLong)))) + } + def getDeclareLambdaFunction[_: P]: P[SingleValueOperation] = { + P(parseDeclareLambdaFunction).map( + SingleValueOperation("DECLARE_LAMBDA_FUNCTION", _)) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Literals.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Literals.scala new file mode 100644 index 0000000..49a93c2 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Literals.scala @@ -0,0 +1,160 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.utils.decodeBase64 + +object Literals { + + def parseCharacterNotExiting[_: P]: P[String] = + P(!"\")" ~ AnyChar.!) + + def parseStringLiteral[_: P]: P[String] = + P("string(\"" ~ parseCharacterNotExiting.rep.! ~ "\")") + + def getStringLiteral[_: P]: P[StringLiteral] = + P(parseStringLiteral.map(x => StringLiteral(decodeBase64(x)))) + + def parseValidNumber[_: P]: P[Unit] = P("-".? ~ anyNumber.rep) + + def parseIntLiteral[_: P]: P[String] = P("int(" ~ parseValidNumber.! ~ ")") + + def getIntLiteral[_: P]: P[IntegerLiteral] = + P(parseIntLiteral.map(x => IntegerLiteral(x.toLong))) + + def parseFLoatEString[_: P]: P[String] = + P(("-".? ~ parseFloatNumberString ~ "e" ~ ("-" | "+") ~ anyNumber.rep).!) + + def parseFloatNumberString[_: P]: P[String] = + P(("-".? ~ anyNumber.rep ~ ".".? ~ anyNumber.rep).!) + + def parseFloatLiteral[_: P]: P[String] = + P("float(" ~/ ("nan".! | "-inf".! | "inf".! | parseFLoatEString | parseFloatNumberString) ~ ")") + + def getFloatLiteral[_: P]: P[FloatLiteral] = + P(parseFloatLiteral.map(x => + FloatLiteral(x match { + case "inf" => Float.PositiveInfinity + case "-inf" => Float.NegativeInfinity + case "nan" => Float.NaN + case x => x.toFloat + }))) + + def parseBooleanLiteral[_: P]: P[String] = + P("bool".? ~ "(" ~ ("true" | "false").! ~/ ")") + + def getBooleanLiteral[_: P]: P[BooleanLiteral] = + P(parseBooleanLiteral.map(x => BooleanLiteral(x == "true"))) + + def parseTryCatch[_: P]: P[String] = + P("try-catch(" ~ anyNumber.! ~ ")") + + def getTryCatch[_: P]: P[TryCatchLiteral] = + P(parseTryCatch.map(x => TryCatchLiteral(x.toInt))) + + def parseZvalLiteral[_: P]: P[String] = + P("zval(type=" ~ anyNumber.rep.! ~ ")") + + def getZvalLiteral[_: P]: P[Zval] = + P(parseZvalLiteral.map(x => Zval(x.toInt))) + + def parseNull[_: P]: P[Unit] = P("null") + + def getNull[_: P]: P[Null] = P(parseNull.map(_ => Null())) + + //def parseType[_: P]: P[String] = P("(" ~ ("long" | "int").! ~ ")") + //def getType[_: P]: P[Type] = P(parseType.map(x => Type(x))) + + def parseArrayKeyValuePair[_: P]: P[ArrayKeyValuePair] = + P( + ((("N:" ~ getStringLiteral) + .map(x => Right(x.value)) | ("P:" ~ getIntLiteral).map(x => + Left(x.value.toInt))) ~ " " ~ getAnyLiteral ~ "|").map(x => + ArrayKeyValuePair(key = x._1, value = x._2))) + + def parseArray[_: P]: P[Seq[ArrayKeyValuePair]] = + "array(" ~ parseArrayKeyValuePair.rep(0) ~ ")" + + def parseUnknownArray[_: P]: P[Unit] = P("array(...)") + + def getArray[_: P]: P[ArrayValue] = + P(parseUnknownArray.map(_ => ArrayValue(None)) | parseArray.map(x => + ArrayValue(Some(x.toList)))) + + def parseBytecodeKeyword[_: P]: P[String] = + P( + "(" ~ + ("self" | + "parent" | + "ref" | + "array" | + "double" | + "string" | + "long" | + "int" | + "object" | + "bool" | + "function" | + "null" | + "resource" | + "isset" | + "unqualified-in-namespace" | + "unqualified" | + "in-namespace" | + "empty" | + "packed" | + "exception" | + "require_once" | + "require" | + "include_once" | + "include" | + "obj write" | + "dim write" | + "global+lock" | + "global" | + "value" | + "eval" | + "local" | + "static" | + "no-autoload" | + "silent").! + ~ ")") + + def getByteCodeKeyword[_: P]: P[ByteCodeKeyword] = + P(parseBytecodeKeyword.map(x => ByteCodeKeyword(x))) + + def parseByteCodeConstructor[_: P]: P[Unit] = { + P("CONSTRUCTOR") + } + + def getByteCodeConstructor[_: P]: P[Value] = { + parseByteCodeConstructor.map { _ => + ByteCodeConstructor() + } + } + + def parseByteCodePlaceIndicator[_: P]: P[String] = P( + "NEXT".! | + "THIS".! + ) + + def getByteCodePlaceIndicator[_: P]: P[ByteCodePlaceIndicator] = P( + parseByteCodePlaceIndicator.map(x => ByteCodePlaceIndicator(x)) + ) + + def parseAssignOpCmdString[_: P]: P[String] = + P("(" ~ + ("ADD" | "SUB" | "DIV" | "MUL" | "MOD" | "POW" | "SL" | "SR" | "CONCAT" | "BW_OR" | "BW_AND" | "BW_XOR").! ~ + ")") + + def getAssignOpCmd[_: P]: P[AssignOpLiteral] = { + P(parseAssignOpCmdString.map(x => AssignOpLiteral(x))) + } + + def getAnyLiteral[_: P]: P[Value] = + P(getByteCodePlaceIndicator | getStringLiteral | getIntLiteral | getFloatLiteral + | getBooleanLiteral | getByteCodeConstructor | + getNull | getArray | getZvalLiteral | getByteCodeKeyword | getAssignOpCmd | getTryCatch) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/LiveRangesBlock.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/LiveRangesBlock.scala new file mode 100644 index 0000000..0db6261 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/LiveRangesBlock.scala @@ -0,0 +1,31 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{LiveRangeLine, LiveRanges} +import io.joern.bytecode.parser.php8.Basics.anyNumber + +object LiveRangesBlock { + + def parseLiveRangeType[_: P]: P[String] = + P("(" ~ ("loop" | "new" | "rope" | "silence" | "tmp/var").! ~ ")") + + def parseLiveRangeLine[_: P]: P[(String, String, String, String)] = + P( + " ".rep ~ + anyNumber.rep.! ~ ": " ~ + anyNumber.rep.! ~ " - " ~ + anyNumber.rep.! ~ " " ~/ + parseLiveRangeType) + + def getLiveRangeLine[_: P]: P[LiveRangeLine] = + P(parseLiveRangeLine.map(x => + LiveRangeLine(x._1.toInt, x._2.toInt, x._3.toInt, x._4))) + + def parseLiveRangesBlock[_: P]: P[Seq[LiveRangeLine]] = + P("LIVE RANGES:\n" ~/ (getLiveRangeLine ~ "\n").rep) + + def getLiveRangesBlock[_: P]: P[LiveRanges] = + P(parseLiveRangesBlock.map(x => LiveRanges(x))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/MethodDefinition.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/MethodDefinition.scala new file mode 100644 index 0000000..c77939a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/MethodDefinition.scala @@ -0,0 +1,24 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.{ + ByteCodeDefinitionsBlock, + ControlFlowDefinitionsBlock, + MethodDefinitionPair +} +import io.joern.bytecode.parser.php8.ByteCodeBlock.getByteCodeBlock +import io.joern.bytecode.parser.php8.ControlFlowBlock.getControlFlowBlock + +object MethodDefinition { + + def parseFullMethodDefinitionBlock[_: P] + : P[(ByteCodeDefinitionsBlock, ControlFlowDefinitionsBlock)] = + P(getByteCodeBlock ~/ "\n".? ~/ getControlFlowBlock) + def getFullMethodDefinitionBlock[_: P]: P[MethodDefinitionPair] = + P( + parseFullMethodDefinitionBlock.map( + x => MethodDefinitionPair(x._1, x._2) + )) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Variables.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Variables.scala new file mode 100644 index 0000000..50866d5 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/Variables.scala @@ -0,0 +1,29 @@ +package io.joern.bytecode.parser.php8 + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs.Variable +import io.joern.bytecode.parser.php8.Basics.{anyNumber, legalIdentifier} + +object Variables { + + def parseVariable[_: P]: P[String] = + P("CV" ~ anyNumber.rep ~/ "($" ~ legalIdentifier.! ~ ")") + + def getVariable[_: P]: P[Variable] = + P(parseVariable.map(Variable(_, tmp = false))) + + def parseTemporary[_: P]: P[String] = P(("T" ~ anyNumber.rep).!) + + def getTemporary[_: P]: P[Variable] = + P(parseTemporary.map(Variable(_, tmp = true))) + + def parseReference[_: P]: P[String] = P(("V" ~ anyNumber.rep).!) + + def getReference[_: P]: P[Variable] = + P(parseReference.map(x => Variable(x, tmp = true, reference = true))) + + def getAnyVariable[_: P]: P[Variable] = + P(getVariable | getTemporary | getReference) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Array.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Array.scala new file mode 100644 index 0000000..bbdc28a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Array.scala @@ -0,0 +1,62 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Array { + + def parseInitArray[_: P]: P[(String, Seq[Value])] = + P("INIT_ARRAY " ~ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getInitArray[_: P]: P[Opcode] = + P(parseInitArray.map { x => + val pos = IntegerLiteral(x._1.toLong) + x._2 match { + case first :: Nil => + DualValueOperation("INIT_ARRAY", pos, first) + case first :: second :: Nil => + TripleValueOperation("INIT_ARRAY", pos, first, second) + case first :: second :: third :: Nil => + QuadrupleValueOperation("INIT_ARRAY", pos, first, second, third) + case first :: second :: third :: fourth :: Nil => + QuintupleValueOperation("INIT_ARRAY", + pos, + first, + second, + third, + fourth) + } + }) + + def parseAddArrayElement[_: P]: P[Seq[Value]] = + P("ADD_ARRAY_ELEMENT" ~ (" " ~ getAnyValue).rep) + + def getAddArrayElement[_: P]: P[Opcode] = + P(parseAddArrayElement.map { + case first :: second :: Nil => + DualValueOperation("ADD_ARRAY_ELEMENT", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ADD_ARRAY_ELEMENT", first, second, third) + }) + + def parseInArray[_: P]: P[(String, Value, Value)] = + P("IN_ARRAY " ~/ anyNumber.rep.! ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getInArray[_: P]: P[TripleValueOperation] = + P( + parseInArray.map( + x => + TripleValueOperation("IN_ARRAY", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + + def parseAddArrayUnpack[_: P]: P[Value] = + P("ADD_ARRAY_UNPACK" ~ " " ~ getAnyValue) + def getAddArrayUnpack[_: P]: P[Opcode] = + P( + parseAddArrayUnpack.map(x => SingleValueOperation("ADD_ARRAY_UNPACK", x)) + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Assign.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Assign.scala new file mode 100644 index 0000000..e67f728 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Assign.scala @@ -0,0 +1,117 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.Literals.getAssignOpCmd +import io.joern.bytecode.parser.php7.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Assign { + + def parseAssignDim[_: P]: P[(Variable, Value)] = + P("ASSIGN_DIM " ~/ getAnyVariable ~ " " ~ getAnyValue) + def getAssignDim[_: P]: P[DualValueOperation] = + P( + parseAssignDim.map( + x => + DualValueOperation( + "ASSIGN_DIM", + x._1, + x._2 + ))) + + def parseAssign[_: P]: P[(Value, Value)] = + P("ASSIGN " ~/ getAnyValue ~ " " ~/ getAnyValue) + def getAssign[_: P]: P[DualValueOperation] = + P(parseAssign.map(x => DualValueOperation("ASSIGN", x._1, x._2))) + + def parseAssignOp[_: P]: P[(Value, Value, Value)] = + P( + "ASSIGN_OP " ~/ + getAssignOpCmd ~/ " " ~ + getAnyValue ~ " " ~ + getAnyValue) + def getAssignOp[_: P]: P[TripleValueOperation] = + P(parseAssignOp.map(x => + TripleValueOperation("ASSIGN_OP", x._1, x._2, x._3))) + + def parseAssignStaticProp[_: P]: P[Seq[Value]] = + P("ASSIGN_STATIC_PROP" ~/ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getAssignStaticProp[_: P]: P[Opcode] = + P(parseAssignStaticProp.map { + case first :: Nil => SingleValueOperation("ASSIGN_STATIC_PROP_1", first) + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_2", first, second) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP", + Seq(1, 2), + list.length) + }) + + def parseAssignStaticPropOp[_: P]: P[Seq[Value]] = + P(&("ASSIGN_STATIC_PROP_OP ") ~ "ASSIGN_STATIC_PROP_OP" ~ (" " ~ getAnyValue).rep) + def getAssignStaticPropOp[_: P]: P[Opcode] = + P(parseAssignStaticPropOp.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_OP", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_STATIC_PROP_OP", first, second, third) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP_OP", + Seq(2, 3), + list.length) + }) + + def parseAssignStaticPropRef[_: P]: P[Seq[Value]] = + P(&("ASSIGN_STATIC_PROP_REF ") ~ "ASSIGN_STATIC_PROP_REF" ~ (" " ~ getAnyValue).rep) + def getAssignStaticPropRef[_: P]: P[Opcode] = + P(parseAssignStaticPropRef.map { + case first :: Nil => + SingleValueOperation("ASSIGN_STATIC_PROP_REF", first) + case first :: second :: Nil => + DualValueOperation("ASSIGN_STATIC_PROP_REF", first, second) + case list => + throw new UnexpectedArgumentCount("ASSIGN_STATIC_PROP_REF", + Seq(1), + list.length) + }) + + def parseAssignObj[_: P]: P[(Value, Value)] = + P("ASSIGN_OBJ " ~ getAnyValue ~ " " ~ getAnyValue) + def getAssignObj[_: P]: P[DualValueOperation] = + P(parseAssignObj.map(x => DualValueOperation("ASSIGN_OBJ", x._1, x._2))) + + def parseAssignRef[_: P]: P[Seq[Value]] = + P("ASSIGN_REF" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) // the End is needed to ensure that unit tests work + def getAssignRef[_: P]: P[Opcode] = + P(parseAssignRef.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_REF_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_REF_3", first, second, third) + }) + + def parseAssignObjRef[_: P]: P[Seq[Value]] = + P("ASSIGN_OBJ_REF" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getAssignObjRef[_: P]: P[Opcode] = + P(parseAssignObjRef.map { + case first :: second :: Nil => + DualValueOperation("ASSIGN_OBJ_REF_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("ASSIGN_OBJ_REF_3", first, second, third) + }) + + def parseAssignCommand[_: P]: P[Opcode] = P( + &("ASSIGN ") ~ getAssign | + &("ASSIGN_OBJ ") ~ getAssignObj | + &("ASSIGN_DIM ") ~ getAssignDim | + &("ASSIGN_STATIC_PROP ") ~ getAssignStaticProp | + &("ASSIGN_STATIC_PROP_OP ") ~ getAssignStaticPropOp | + &("ASSIGN_STATIC_PROP_REF ") ~ getAssignStaticPropRef | + &("ASSIGN_OP ") ~ getAssignOp | + &("ASSIGN_REF ") ~ getAssignRef | + &("ASSIGN_OBJ_REF ") ~ getAssignObjRef + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Bind.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Bind.scala new file mode 100644 index 0000000..424cfff --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Bind.scala @@ -0,0 +1,30 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Bind { + + def parseBindStatic[_: P]: P[Seq[Value]] = + P(&("BIND_STATIC ") ~ "BIND_STATIC" ~ (" " ~ getAnyValue).rep) + def getBindStatic[_: P]: P[Opcode] = + P(parseBindStatic.map { + case first :: second :: Nil => + DualValueOperation("BIND_STATIC", first, second) + case first :: Nil => SingleValueOperation("BIND_STATIC", first) + }) + + def parseBindLexical[_: P]: P[Seq[Value]] = + P(&("BIND_LEXICAL ") ~ "BIND_LEXICAL" ~ (" " ~ getAnyValue).rep) + def getBindLexical[_: P]: P[Opcode] = + P(parseBindLexical.map { + case first :: second :: third :: Nil => + TripleValueOperation("BIND_LEXICAL", first, second, third) + case first :: second :: Nil => + DualValueOperation("BIND_LEXICAL", first, second) + case first :: Nil => SingleValueOperation("BIND_LEXICAL", first) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/CallRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/CallRelated.scala new file mode 100644 index 0000000..97032ea --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/CallRelated.scala @@ -0,0 +1,298 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.Literals.{ + getAnyLiteral, + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue +import io.joern.reporting.ReportableError + +object CallRelated { + + def parseNew[_: P]: P[(String, Seq[Value])] = + P("NEW " ~ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getNew[_: P]: P[Opcode] = + P(parseNew.map { + case (args, first :: second :: Nil) => + TripleValueOperation("NEW", IntegerLiteral(args.toLong), first, second) + case (args, first :: Nil) => + DualValueOperation("NEW", IntegerLiteral(args.toLong), first) + case x => + throw ReportableError("", + -1, + "", + "", + s"when parsing New unexpected result tuple $x") + }) + + def parseInitFcall[_: P]: P[(String, String, StringLiteral)] = + P("INIT_FCALL" ~/ " " ~ anyNumber.rep.! ~ " " ~ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitFcall[_: P]: P[INIT_FCALL] = + P(parseInitFcall.map(x => INIT_FCALL(x._1.toInt, x._2.toInt, x._3))) + + def parseInitMethodCall[_: P]: P[(String, Object, Value)] = + P("INIT_METHOD_CALL " ~/ anyNumber.rep.! ~ " " ~ ("THIS".! | getAnyVariable) ~ " " ~ getAnyValue) + def getInitMethodCall[_: P]: P[INIT_METHOD_CALL] = + P( + parseInitMethodCall.map(x => + INIT_METHOD_CALL( + x._1.toInt, + x._2 match { + case x: Variable => x + case x: String => Variable(x, tmp = false, reference = true) + case _ => + throw new RuntimeException("unknown object reference in call") + }, + x._3 + ))) + + def parseInitNsFcallByName[_: P]: P[(String, StringLiteral)] = + P("INIT_NS_FCALL_BY_NAME " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitNsFcallByName[_: P]: P[INIT_NS_FCALL_BY_NAME] = + P(parseInitNsFcallByName.map(x => + INIT_NS_FCALL_BY_NAME(x._1.toInt, x._2.value))) + + def parseInitDynamicCall[_: P]: P[(String, Variable)] = + P("INIT_DYNAMIC_CALL " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable) + def getInitDynamicCall[_: P]: P[INIT_DYNAMIC_CALL] = + P(parseInitDynamicCall.map(x => INIT_DYNAMIC_CALL(x._1.toInt, x._2))) + + def parseInitFcallByName[_: P]: P[(String, StringLiteral)] = + P("INIT_FCALL_BY_NAME " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral) + def getInitFcallByName[_: P]: P[INIT_FCALL_BY_NAME] = + P(parseInitFcallByName.map(x => INIT_FCALL_BY_NAME(x._1.toInt, x._2.value))) + + def parseInitStaticMethodCallA[_: P]: P[(String, Seq[Value])] = + P("INIT_STATIC_METHOD_CALL " ~/ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + def getInitStaticMethodCallA[_: P]: P[INIT_STATIC_METHOD_CALL] = + P(parseInitStaticMethodCallA.map { + case (args, first :: second :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, None, None, Some(first), second) + case (args, first :: second :: third :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, + Some(first), + None, + Some(second), + third) + case (args, first :: second :: third :: fourth :: Nil) => + INIT_STATIC_METHOD_CALL(args.toInt, + Some(first), + Some(second), + Some(third), + fourth) + case x => + throw ReportableError( + "", + -1, + "", + "", + s"when parsing InitStaticMethodCall unexpected result tuple $x") + }) + + def parseInitStaticMethodCallB[_: P] + : P[(String, ByteCodeKeyword, ByteCodeKeyword)] = + P("INIT_STATIC_METHOD_CALL " ~ + anyNumber.rep.! ~ " " ~ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ " " ~ "CONSTRUCTOR") + + def getInitStaticMethodCallB[_: P]: P[QuadrupleValueOperation] = + P( + parseInitStaticMethodCallB.map( + x => + QuadrupleValueOperation("INIT_STATIC_METHOD_CALL", + IntegerLiteral(x._1.toLong), + x._2, + x._3, + StringLiteral("CONSTRUCTOR")) + )) + + def getInitStaticMethodCall[_: P]: P[Opcode] = + P(getInitStaticMethodCallB | getInitStaticMethodCallA) + + def parseInitUserCall[_: P]: P[(String, StringLiteral, Value)] = + P("INIT_USER_CALL " ~/ anyNumber.rep.! ~ " " ~ getStringLiteral ~ " " ~ getAnyValue) + + def getInitUserCall[_: P]: P[Opcode] = + P(parseInitUserCall.map(x => INIT_USER_CALL(x._1.toInt, x._2, x._3))) + + def parseInitCallCommands[_: P]: P[Opcode] = P( + &("INIT_FCALL ") ~ getInitFcall | + &("INIT_METHOD_CALL ") ~ getInitMethodCall | + &("INIT_NS_FCALL_BY_NAME ") ~ getInitNsFcallByName | + &("INIT_DYNAMIC_CALL ") ~ getInitDynamicCall | + &("INIT_FCALL_BY_NAME ") ~ getInitFcallByName | + &("INIT_STATIC_METHOD_CALL ") ~ getInitStaticMethodCall | + &("INIT_USER_CALL ") ~ getInitUserCall + ) + + def getPayloadReference[_: P]: P[Value] = + P(anyNumber.rep(1).!.map(x => IntegerLiteral(x.toLong)) | getStringLiteral) + + def parseSendVarEx[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVarEx[_: P]: P[DualValueOperation] = + P(parseSendVarEx.map(x => DualValueOperation("SEND_VAR_EX", x._1, x._2))) + + def parseSendValEx[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendValEx[_: P]: P[DualValueOperation] = + P(parseSendVarEx.map { x => + DualValueOperation("SEND_VAL_EX", x._1, x._2) + }) + + def parseSendVal[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVal[_: P]: P[DualValueOperation] = + P(parseSendVal.map { x => + //assert(x._2 != "", s"after parsing ${x._1} we encounter an empty integer string") + DualValueOperation("SEND_VAL", x._1, x._2) + }) + + def parseSendVar[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendVar[_: P]: P[DualValueOperation] = { + P(parseSendVar.map(x => DualValueOperation("SEND_VAR", x._1, x._2))) + } + + def parseSendVarNoRefEx[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendVarNoRefEx[_: P]: P[DualValueOperation] = + P(parseSendVarNoRefEx.map(x => + DualValueOperation("SEND_VAR_NO_REF_EX", x._1, x._2))) + + def parseSendVarNoRef[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendVarNoRef[_: P]: P[DualValueOperation] = + P(parseSendVarNoRef.map(x => + DualValueOperation("SEND_VAR_NO_REF", x._1, x._2))) + + def parseSendFuncVar[_: P]: P[(Variable, Value)] = + P(getAnyVariable ~ " " ~ getPayloadReference) + def getSendFuncVar[_: P]: P[DualValueOperation] = + P(parseSendFuncVar.map(x => + DualValueOperation("SEND_FUNC_ARG", x._1, x._2))) + + def parseSendUser[_: P]: P[(Value, Value)] = + P(getAnyValue ~ " " ~ getPayloadReference) + def getSendUser[_: P]: P[DualValueOperation] = + P(parseSendUser.map(x => DualValueOperation("SEND_USER", x._1, x._2))) + + def parseSendRef[_: P]: P[(Value, Value)] = { + P("SEND_REF " ~ getAnyValue ~ " " ~ getPayloadReference) + } + def getSendRef[_: P]: P[DualValueOperation] = P( + parseSendRef.map(x => DualValueOperation("SEND_REF", x._1, x._2)) + ) + + // 2021-12-14: not sure if this should also use getPayloadReference /Malte + def parseSendArray[_: P]: P[(String, Seq[Value])] = + P("SEND_ARRAY " ~/ anyNumber.rep.! ~ (" " ~ getAnyValue).rep) + + def getSendArray[_: P]: P[Opcode] = + P(parseSendArray.map { x => + x._2 match { + case first :: Nil => + DualValueOperation("SEND_ARRAY", IntegerLiteral(x._1.toLong), first) + case first :: second :: Nil => + TripleValueOperation("SEND_ARRAY", + IntegerLiteral(x._1.toLong), + first, + second) + } + }) + + def getSendCommand[_: P]: P[Opcode] = + P( + "SEND_VAR_NO_REF_EX " ~/ getSendVarNoRefEx | + "SEND_VAR_NO_REF " ~/ getSendVarNoRef | + "SEND_VAL_EX " ~/ getSendValEx | + "SEND_VAR_EX " ~/ getSendVarEx | + "SEND_VAL " ~/ getSendVal | + "SEND_VAR " ~/ getSendVar | + "SEND_USER " ~/ getSendUser | + "SEND_FUNC_ARG " ~/ getSendFuncVar | + &("SEND_REF ") ~/ getSendRef | + &("SEND_ARRAY ") ~/ getSendArray) + + def parseRecv[_: P]: P[String] = P("RECV " ~/ anyNumber.rep.!) + def getRecv[_: P]: P[SingleValueOperation] = + P(parseRecv.map(x => + SingleValueOperation("RECV", IntegerLiteral(x.toLong)))) + + def parseRecvInit[_: P]: P[(String, Value)] = + P("RECV_INIT " ~/ anyNumber.rep.! ~ " " ~/ getAnyLiteral) + def getRecvInit[_: P]: P[DualValueOperation] = + P(parseRecvInit.map(x => + DualValueOperation("RECV_INIT", IntegerLiteral(x._1.toLong), x._2))) + + def parseRecvVariadic[_: P]: P[String] = + P("RECV_VARIADIC" ~ " " ~ anyNumber.rep.!) + def getRecvVariadic[_: P]: P[Opcode] = + P(parseRecvVariadic.map(x => + SingleValueOperation("RECV_VARIADIC", IntegerLiteral(x.toLong)))) + + def parseCheckFuncArg[_: P]: P[Value] = + P( + "CHECK_FUNC_ARG " ~/ (anyNumber + .rep(1) + .! + .map(x => IntegerLiteral(x.toLong)) | getStringLiteral)) + def getCheckFuncArg[_: P]: P[SingleValueOperation] = + P(parseCheckFuncArg.map(x => SingleValueOperation("CHECK_FUNC_ARG", x))) + + def parseFuncGetArgs[_: P]: P[Seq[Value]] = + P(&("FUNC_GET_ARGS") ~ "FUNC_GET_ARGS" ~ (" " ~ getAnyValue).rep) + def getFuncGetArgs[_: P]: P[Opcode] = + P(parseFuncGetArgs.map { + case Nil => NoValueOperation("FUNC_GET_ARGS") + case single :: Nil => SingleValueOperation("FUNC_GET_ARGS", single) + }) + + def parseReturnByRef[_: P]: P[Seq[Value]] = + P("RETURN_BY_REF" ~ (" " ~ getAnyValue).rep) + def getReturnByRef[_: P]: P[Opcode] = + P(parseReturnByRef.map { + case first :: Nil => SingleValueOperation("RETURN_BY_REF", first) + case first :: second :: Nil => + DualValueOperation("RETURN_BY_REF", first, second) + }) + + def parseReturn[_: P]: P[Value] = + P("RETURN " ~ getAnyValue) + def getReturn[_: P]: P[Opcode] = + P(parseReturn.map(SingleValueOperation("RETURN", _))) + + def getReturnCommand[_: P]: P[Opcode] = + P( + &("RETURN_BY_REF ") ~ getReturnByRef | + &("RETURN ") ~ getReturn) + + def parseVerifyReturnType[_: P]: P[Seq[Value]] = + P("VERIFY_RETURN_TYPE" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getVerifyReturnType[_: P]: P[Opcode] = + P(parseVerifyReturnType.map { + case Nil => NoValueOperation("VERIFY_RETURN_TYPE") + case single :: Nil => SingleValueOperation("VERIFY_RETURN_TYPE", single) + case list => + throw new UnexpectedArgumentCount("VERIFY_RETURN_TYPE", + Seq(1, 2), + list.length) + }) + + def parseFastRet[_: P]: P[Seq[Value]] = + P(&("FAST_RET") ~ "FAST_RET" ~ (" " ~ getAnyValue).rep) + def getFastRet[_: P]: P[Opcode] = + P(parseFastRet.map { + case first :: Nil => SingleValueOperation("FAST_RET", first) + case first :: second :: Nil => + DualValueOperation("FAST_RET", first, second) + case list => + throw new UnexpectedArgumentCount("FAST_RET", Seq(1, 2), list.length) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ClassRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ClassRelated.scala new file mode 100644 index 0000000..65321f4 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ClassRelated.scala @@ -0,0 +1,40 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object ClassRelated { + + def parseGetClass[_: P]: P[Seq[Value]] = + P(&("GET_CLASS") ~ "GET_CLASS" ~ (" " ~ getAnyValue).rep) + def getGetClass[_: P]: P[Opcode] = + P(parseGetClass.map { + case Nil => NoValueOperation("GET_CLASS") + case single :: Nil => SingleValueOperation("GET_CLASS", single) + }) + + def parseDeclareAnonClass[_: P]: P[Seq[Value]] = + P(&("DECLARE_ANON_CLASS ") ~ "DECLARE_ANON_CLASS" ~ (" " ~ getAnyValue).rep) + def getDeclareAnonClass[_: P]: P[Opcode] = + P( + parseDeclareAnonClass.map { + case first :: Nil => SingleValueOperation("DECLARE_ANON_CLASS", first) + case first :: second :: Nil => + DualValueOperation("DECLARE_ANON_CLASS", first, second) + } + ) + + def parseDeclareClass[_: P]: P[Seq[Value]] = + P( + &("DECLARE_CLASS ") ~ "DECLARE_CLASS" ~ (" " ~ getAnyValue).rep ~ &( + "\n" | End)) + def getDeclareClass[_: P]: P[Opcode] = + P(parseDeclareClass.map { + case single :: Nil => SingleValueOperation("DECLARE_CLASS", single) + case first :: second :: Nil => + DualValueOperation("VERIFY_RETURN_TYPE", first, second) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ControlConstructs.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ControlConstructs.scala new file mode 100644 index 0000000..5ca2bb6 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/ControlConstructs.scala @@ -0,0 +1,67 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.instructions.Utility.{ + getAnyValue, + parseStringInQuotes, + parseTarget +} +import io.joern.bytecode.parser.utils.decodeBase64 + +object ControlConstructs { + + def parseStringDestinationPattern[_: P]: P[(String, String)] = + P( + (parseStringInQuotes + .map(decodeBase64) | "default".!) ~/ ": " ~ parseTarget) + def parseSwitchString[_: P] + : P[(Value, (String, String), Seq[(String, String)])] = + P("SWITCH_STRING " ~/ getAnyValue ~ " " ~ parseStringDestinationPattern ~ ("," ~ " " ~ parseStringDestinationPattern).rep) + def getSwitchString[_: P]: P[Opcode] = + P( + parseSwitchString.map( + x => + SWITCH("SWITCH_STRING", + x._1, + (Seq(x._2) ++ x._3).map(x => (x._1, x._2.toInt))))) + + def parseNumberDestinationPattern[_: P]: P[(String, String)] = + P(("default".! | ("-".? ~ anyNumber.rep).!) ~ ": " ~ parseTarget) + def parseSwitchLong[_: P] + : P[(Value, (String, String), Seq[(String, String)])] = + P("SWITCH_LONG " ~/ getAnyValue ~ " " ~ parseNumberDestinationPattern ~ ("," ~ " " ~ parseNumberDestinationPattern).rep) + def getSwitchLong[_: P]: P[Opcode] = + P( + parseSwitchLong.map( + x => + SWITCH("SWITCH_LONG", + x._1, + (Seq(x._2) ++ x._3).map(x => (x._1, x._2.toInt))))) + + def getSwitchStatement[_: P]: P[Opcode] = + P( + &("SWITCH_STRING ") ~ getSwitchString | + &("SWITCH_LONG ") ~ getSwitchLong + ) + + def parseExit[_: P]: P[Option[Value]] = + P("EXIT" ~ (" " ~ getAnyValue).?) + + def getExit[_: P]: P[Opcode] = + P(parseExit.map { + case Some(x) => SingleValueOperation("EXIT", x) + case None => NoValueOperation("EXIT") + }) + + def parseCatch[_: P]: P[(Value, Option[String])] = + P("CATCH " ~ getAnyValue ~ (" " ~ parseTarget).?) + def getCatch[_: P]: P[Opcode] = + P(parseCatch.map { + case (value, Some(x)) => + DualValueOperation("CATCH", value, IntegerLiteral(x.toLong)) + case (value, None) => SingleValueOperation("CATCH", value) + }) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fe.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fe.scala new file mode 100644 index 0000000..3a4b3c9 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fe.scala @@ -0,0 +1,60 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Literals.getArray +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.{ + getAnyValue, + parseTarget +} + +object Fe { + + def parseFeFetchR[_: P]: P[(Variable, Variable, String)] = + P("FE_FETCH_R " ~/ getAnyVariable ~ " " ~/ getAnyVariable ~ " " ~/ parseTarget) + def getFeFetchR[_: P]: P[TripleValueOperation] = + P( + parseFeFetchR.map( + x => + TripleValueOperation("FE_FETCH_R", + x._1, + x._2, + IntegerLiteral(x._3.toLong)))) + + def parseFeFetchRw[_: P]: P[(Variable, Variable, String)] = + P("FE_FETCH_RW " ~/ getAnyVariable ~ " " ~/ getAnyVariable ~ " " ~/ parseTarget) + def getFeFetchRw[_: P]: P[TripleValueOperation] = + P( + parseFeFetchRw.map( + x => + TripleValueOperation("FE_FETCH_RW", + x._1, + x._2, + IntegerLiteral(x._3.toLong)))) + + def getFeFetch[_: P]: P[Opcode] = + P( + &("FE_FETCH_RW ") ~/ getFeFetchRw | + &("FE_FETCH_R ") ~/ getFeFetchR) + + def parseFeResetRw[_: P]: P[(Value, String)] = + P("FE_RESET_RW " ~/ (getAnyVariable | getArray) ~ " " ~ parseTarget) + def getFeResetRw[_: P]: P[DualValueOperation] = + P(parseFeResetRw.map(x => + DualValueOperation("FE_RESET_RW", x._1, IntegerLiteral(x._2.toLong)))) + + def parseFeResetR[_: P]: P[(Value, String)] = + P("FE_RESET_R " ~/ getAnyValue ~ " " ~/ parseTarget) + def getFeResetR[_: P]: P[DualValueOperation] = + P(parseFeResetR.map(x => + DualValueOperation("FE_RESET_R", x._1, IntegerLiteral(x._2.toLong)))) + + def getFeReset[_: P]: P[Opcode] = + P( + &("FE_RESET_RW ") ~ getFeResetRw | + &("FE_RESET_R ") ~ getFeResetR + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fetch.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fetch.scala new file mode 100644 index 0000000..d3797bb --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Fetch.scala @@ -0,0 +1,224 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Literals.{ + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Fetch { + + def parseFetchObjFuncArgA[_: P]: P[(Option[ByteCodeKeyword], Value, Value)] = + P("FETCH_OBJ_FUNC_ARG " ~ (getByteCodeKeyword ~ " ").? ~ getAnyValue ~ " " ~ getAnyValue) + def getFetchObjFuncArgA[_: P]: P[Opcode] = + P(parseFetchObjFuncArgA.map(x => + x._1 match { + case Some(keyword) => + TripleValueOperation("FETCH_OBJ_FUNC_ARG", keyword, x._2, x._3) + case None => DualValueOperation("FETCH_OBJ_FUNC_ARG", x._2, x._3) + })) + + def parseFetchObjFuncArgB[_: P]: P[(ByteCodeKeyword, StringLiteral)] = + P("FETCH_OBJ_FUNC_ARG " ~ getByteCodeKeyword ~ " " ~ "THIS" ~ " " ~ getStringLiteral) + def getFetchObjFuncArgB[_: P]: P[TripleValueOperation] = + P( + parseFetchObjFuncArgB.map( + x => + TripleValueOperation("FETCH_OBJ_FUNC_ARG", + x._1, + StringLiteral("THIS"), + x._2))) + + def getFetchObjFuncArg[_: P]: P[Opcode] = + P(getFetchObjFuncArgA | getFetchObjFuncArgB) + + def parseStaticPropR[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_R" ~/ (" " ~ getAnyValue).rep) + def getStaticPropR[_: P]: P[Opcode] = + P(parseStaticPropR.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_R", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_R", first, second, third) + }) + + def parseStaticPropW[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_W" ~/ (" " ~ getAnyValue).rep) + def getStaticPropW[_: P]: P[Opcode] = + parseStaticPropW.map { + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("FETCH_STATIC_PROP_W", + first, + second, + third, + fourth) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_W", first, second, third) + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_W", first, second) + } + + def parseStaticPropFuncArg[_: P]: P[Seq[Value]] = + P(&("FETCH_STATIC_PROP_FUNC_ARG ") ~ "FETCH_STATIC_PROP_FUNC_ARG" ~/ (" " ~ getAnyValue).rep) + def getStaticPropFuncArg[_: P]: P[Opcode] = + P(parseStaticPropFuncArg.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_FUNC_ARG", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_FUNC_ARG", first, second, third) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("FETCH_STATIC_PROP_FUNC_ARG", + first, + second, + third, + fourth) + }) + + def parseFetchStaticPropIs[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_IS" ~/ (" " ~ getAnyValue).rep) + def getFetchStaticPropIs[_: P]: P[Opcode] = + P(parseFetchStaticPropIs.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_IS", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_IS", first, second, third) + }) + + def parseFetchDimR[_: P]: P[(Value, Value)] = + P("FETCH_DIM_R " ~/ getAnyValue ~ " " ~/ getAnyValue) + def getFetchDimR[_: P]: P[DualValueOperation] = + P(parseFetchDimR.map(x => DualValueOperation("FETCH_DIM_R", x._1, x._2))) + + def parseFetchListR[_: P]: P[(Value, Value)] = + P("FETCH_LIST_R " ~/ getAnyValue ~ " " ~ getAnyValue) + def getFetchListR[_: P]: P[DualValueOperation] = + P(parseFetchListR.map(x => DualValueOperation("FETCH_LIST_R", x._1, x._2))) + + def parseFetchClassConstantA[_: P] + : P[(ByteCodeKeyword, ByteCodeKeyword, StringLiteral)] = + P("FETCH_CLASS_CONSTANT " ~ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ " " ~ getStringLiteral) + def getFetchClassConstantA[_: P]: P[TripleValueOperation] = + P(parseFetchClassConstantA.map(x => + TripleValueOperation("FETCH_CLASS_CONSTANT", x._1, x._2, x._3))) + + def parseFetchClassConstantB[_: P]: P[(Value, Value)] = + P("FETCH_CLASS_CONSTANT " ~ getAnyValue ~ " " ~ getAnyValue) + def getFetchClassConstantB[_: P]: P[DualValueOperation] = + P(parseFetchClassConstantB.map(x => + DualValueOperation("FETCH_CLASS_CONSTANT", x._1, x._2))) + + def parseFetchClass[_: P]: P[Seq[Value]] = + P("FETCH_CLASS" ~ (" " ~ getAnyValue).rep) + def getFetchClass[_: P]: P[Opcode] = + P(parseFetchClass.map { + case first :: second :: Nil => + DualValueOperation("FETCH_CLASS", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_CLASS", first, second, third) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("FETCH_CLASS", first, second, third, fourth) + }) + + def parseFetchClassName[_: P]: P[Value] = + P("FETCH_CLASS_NAME" ~ " " ~ getAnyValue) + def getFetchClassName[_: P]: P[Opcode] = + P(parseFetchClassName.map(x => SingleValueOperation("FETCH_CLASS_NAME", x))) + + def getFetchClassConstant[_: P]: P[Opcode] = + P(getFetchClassConstantA | getFetchClassConstantB) + + def parseFetchDimFuncArg[_: P]: P[(Value, Value)] = + P("FETCH_DIM_FUNC_ARG " ~/ getAnyValue ~ " " ~ getAnyValue) + def getFetchDimFuncArg[_: P]: P[DualValueOperation] = + P(parseFetchDimFuncArg.map(x => + DualValueOperation("FETCH_DIM_FUNC_ARG", x._1, x._2))) + + def parseFetchConstant[_: P]: P[Seq[Value]] = + P("FETCH_CONSTANT" ~/ (" " ~ getAnyValue).rep(1)) + + def getFetchConstant[_: P]: P[Opcode] = P( + parseFetchConstant.map { + case first :: Nil => SingleValueOperation("FETCH_CONSTANT", first) + case first :: second :: Nil => + DualValueOperation("FETCH_CONSTANT", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_CONSTANT", first, second, third) + } + ) + + def parseFetchR[_: P]: P[(ByteCodeKeyword, Value)] = + P( + "FETCH_R " ~ getByteCodeKeyword ~ " " ~ getAnyValue + ) + def getFetchR[_: P]: P[DualValueOperation] = + P( + parseFetchR.map( + x => DualValueOperation("FETCH_R", x._1, x._2) + )) + + def parseFetchIs[_: P]: P[(ByteCodeKeyword, Value)] = + P( + "FETCH_IS " ~ getByteCodeKeyword ~ " " ~ getAnyValue + ) + def getFetchIs[_: P]: P[DualValueOperation] = + P( + parseFetchIs.map( + x => DualValueOperation("FETCH_IS", x._1, x._2) + ) + ) + + def parseFetchObjW[_: P]: P[Seq[Value]] = + P("FETCH_OBJ_W" ~ (" " ~ getAnyValue).rep ~ &("\n" | End)) + def getFetchObjW[_: P]: P[Opcode] = + P(parseFetchObjW.map { + case first :: second :: Nil => + DualValueOperation("FETCH_OBJ_W_2", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_OBJ_W_3", first, second, third) + }) + + def parseFetchStaticPropUnset[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_UNSET" ~ (" " ~ getAnyValue).rep) + def getFetchStaticPropUnset[_: P]: P[Opcode] = + P(parseFetchStaticPropUnset.map { + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_UNSET", first, second, third) + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_UNSET", first, second) + }) + + def parseFetchStaticPropRw[_: P]: P[Seq[Value]] = + P("FETCH_STATIC_PROP_RW" ~ (" " ~ getAnyValue).rep) + def getFetchStaticPropRw[_: P]: P[Opcode] = + P(parseFetchStaticPropRw.map { + case first :: second :: Nil => + DualValueOperation("FETCH_STATIC_PROP_RW", first, second) + case first :: second :: third :: Nil => + TripleValueOperation("FETCH_STATIC_PROP_RW", first, second, third) + }) + + def parseFetchCommand[_: P]: P[Opcode] = P( + &("FETCH_OBJ_FUNC_ARG ") ~ getFetchObjFuncArg | + &("FETCH_DIM_FUNC_ARG") ~ getFetchDimFuncArg | + &("FETCH_STATIC_PROP_FUNC_ARG ") ~ getStaticPropFuncArg | + &("FETCH_STATIC_PROP_R ") ~ getStaticPropR | + &("FETCH_STATIC_PROP_W ") ~ getStaticPropW | + &("FETCH_STATIC_PROP_IS ") ~ getFetchStaticPropIs | + &("FETCH_STATIC_PROP_UNSET ") ~ getFetchStaticPropUnset | + &("FETCH_STATIC_PROP_RW ") ~ getFetchStaticPropRw | + //&("FETCH_OBJ_R ") ~ getFetchObjR | + &("FETCH_CLASS_NAME ") ~ getFetchClassName | + &("FETCH_DIM_R ") ~ getFetchDimR | + &("FETCH_LIST_R ") ~ getFetchListR | + &("FETCH_CLASS ") ~ getFetchClass | + &("FETCH_CLASS_CONSTANT ") ~ getFetchClassConstant | + &("FETCH_CONSTANT ") ~ getFetchConstant | + &("FETCH_R ") ~ getFetchR | + &("FETCH_IS ") ~ getFetchIs | + &("FETCH_OBJ_W ") ~ getFetchObjW + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Generic.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Generic.scala new file mode 100644 index 0000000..c5e658f --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Generic.scala @@ -0,0 +1,97 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Generic { + + def parseNoValueCommandString[_: P]: P[Unit] = P( + "DO_FCALL_BY_NAME" | "DO_ICALL" | "DO_UCALL" | "DO_FCALL" | "NOP" | "BEGIN_SILENCE" | "EXT_STMT" | "EXT_NOP" | + "FETCH_THIS" | "GENERATOR_CREATE" | "GET_CALLED_CLASS" | "FUNC_NUM_ARGS" | "CHECK_UNDEF_ARGS" | + "FETCH_GLOBALS" | + "VERIFY_NEVER_TYPE" | + "CALLABLE_CONVERT" + ) + + def parseNoValueCommand[_: P]: P[String] = P(parseNoValueCommandString.!) + + def getNoValueCommand[_: P]: P[NoValueOperation] = + P(parseNoValueCommand.map(x => NoValueOperation(x))) + + def parseSingleValueCommandString[_: P]: P[Unit] = P( + "ECHO " | "BW_NOT " | "BOOL_NOT " | "QM_ASSIGN " | "PRE_INC " | "POST_INC " | "PRE_DEC " | "POST_DEC " | "FREE " | + "PRINT " | "FE_FREE " | "END_SILENCE " | "BOOL " | "OP_DATA " | "THROW " | "STRLEN " | "SEND_UNPACK " | + "COUNT " | "DEFINED " | "GET_TYPE " | "UNSET_CV " | "COPY_TMP " | + "CLONE " | "MAKE_REF " | "SEPARATE " | "DECLARE_LAMBDA_FUNCTION " | "GENERATOR_RETURN " | "DISCARD_EXCEPTION " | + "CHECK_VAR " | "MATCH_ERROR " + ) + + def parseSingleValueCommand[_: P]: P[(String, Value)] = P( + parseSingleValueCommandString.! ~/ getAnyValue + ) + + def getSingleValueCommand[_: P]: P[SingleValueOperation] = P( + parseSingleValueCommand.map(x => + SingleValueOperation(x._1.substring(0, x._1.length - 1), x._2)) + ) + + def parseDualValueCommandString[_: P]: P[Unit] = P( + "CONCAT " | "FAST_CONCAT " | "ADD " | "SUB " | "MUL " | "DIV " | "MOD " | "SL " | "SR " | "BW_OR " | + "BW_AND " | "BW_XOR " | "BOOL_OR " | "IS_EQUAL " | "IS_NOT_EQUAL " | "IS_IDENTICAL " | "IS_NOT_IDENTICAL " | + "IS_SMALLER " | "IS_SMALLER_OR_EQUAL " | "BIND_GLOBAL " | "DECLARE_CLASS_DELAYED " | + "DECLARE_CONST " | "INCLUDE_OR_EVAL " | "FETCH_FUNC_ARG " | "FETCH_DIM_FUNC_ARG " | "POW " | + "FETCH_DIM_R " | "FETCH_W " | "FETCH_DIM_W " | "ARRAY_KEY_EXISTS " | "FETCH_OBJ_RW " | + "FETCH_OBJ_R " | "FETCH_RW " | "FETCH_OBJ_IS " | "FETCH_DIM_IS " | "FETCH_DIM_RW " | + "UNSET_OBJ " | "FETCH_UNSET " | "UNSET_DIM " | "FETCH_DIM_UNSET " | "CASE " | "FETCH_OBJ_UNSET " | "UNSET_STATIC_PROP " | + "POST_INC_OBJ " | "PRE_INC_OBJ " | "POST_DEC_OBJ " | "PRE_DEC_OBJ " | "BOOL_XOR " | "SPACESHIP " | "UNSET_VAR " | + "CASE_STRICT " + ) + + def parseDualValueCommand[_: P]: P[(String, Value, Value)] = + P( + parseDualValueCommandString.! ~/ + getAnyValue ~ + " " ~ + getAnyValue) + + def getDualValueCommand[_: P]: P[DualValueOperation] = + P(parseDualValueCommand.map(x => + DualValueOperation(x._1.substring(0, x._1.length - 1), x._2, x._3))) + + def parseTripleValueCommandString[_: P]: P[Unit] = P( + "ASSIGN_DIM_OP " | "ASSIGN_OBJ_OP " | "ISSET_ISEMPTY_VAR " + ) + + def parseTripleValueCommand[_: P]: P[(String, Value, Value, Value)] = + P(parseTripleValueCommandString.! ~/ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + + def getTripleValueCommand[_: P]: P[TripleValueOperation] = + P( + parseTripleValueCommand.map( + x => + TripleValueOperation(x._1.substring(0, x._1.length - 1), + x._2, + x._3, + x._4))) + + def parseQuadrupleValueCommandString[_: P]: P[Unit] = P( + "DOESNOTEXIST " + ) + + def parseQuadrupleValueCommand[_: P] + : P[(String, Value, Value, Value, Value)] = P( + parseQuadrupleValueCommandString.! ~/ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue ~ " " ~ getAnyValue + ) + + def getQuadrupleValueCommand[_: P]: P[QuadrupleValueOperation] = { + parseQuadrupleValueCommand.map( + x => + QuadrupleValueOperation(x._1.substring(0, x._1.length - 1), + x._2, + x._3, + x._4, + x._5)) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/IncDecStaticProp.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/IncDecStaticProp.scala new file mode 100644 index 0000000..34f9744 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/IncDecStaticProp.scala @@ -0,0 +1,27 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue +import io.joern.reporting.ReportableError + +object IncDecStaticProp { + + def parseIncDecStaticProp[_: P]: P[(String, Seq[Value])] = + P(&(("POST_INC" | "PRE_INC" | "POST_DEC" | "PRE_DEC") ~ "_STATIC_PROP ") ~ (("POST_INC" | "PRE_INC" | "POST_DEC" | "PRE_DEC") ~ "_STATIC_PROP").! ~ (" " ~ getAnyValue).rep) + def getIncDecStaticProp[_: P]: P[Opcode] = + P(parseIncDecStaticProp.map { + case (opString, first :: Nil) => SingleValueOperation(opString, first) + case (opString, first :: second :: Nil) => + DualValueOperation(opString, first, second) + case x => + throw ReportableError( + "", + -1, + "", + "", + s"when parsing IncDecStaticProp unexpected result tuple $x") + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Isset.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Isset.scala new file mode 100644 index 0000000..512eead --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Isset.scala @@ -0,0 +1,72 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Literals.{ + getByteCodeKeyword, + getStringLiteral +} +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Isset { + + def parseIssetIsemptyVar[_: P]: P[(ByteCodeKeyword, ByteCodeKeyword, Value)] = + P( + "ISSET_ISEMPTY_VAR " ~/ getByteCodeKeyword ~ " " ~ getByteCodeKeyword ~ + " " ~ (getAnyVariable | getStringLiteral)) + def getIssetIsemptyVar[_: P]: P[TripleValueOperation] = + P(parseIssetIsemptyVar.map(x => + TripleValueOperation("ISSET_ISEMPTY_VAR", x._1, x._2, x._3))) + + def parseIssetIsemptyDimObj[_: P]: P[(ByteCodeKeyword, Value, Value)] = + P("ISSET_ISEMPTY_DIM_OBJ " ~/ getByteCodeKeyword ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getIssetIsemptyDimObj[_: P]: P[TripleValueOperation] = + P(parseIssetIsemptyDimObj.map(x => + TripleValueOperation("ISSET_ISEMPTY_DIM_OBJ", x._1, x._2, x._3))) + + def parseIssetIsemptyCv[_: P]: P[(ByteCodeKeyword, Variable)] = + P("ISSET_ISEMPTY_CV " ~/ getByteCodeKeyword ~ " " ~ getAnyVariable) + def getIssetIsemptyCv[_: P]: P[DualValueOperation] = + P(parseIssetIsemptyCv.map(x => + DualValueOperation("ISSET_ISEMPTY_CV", x._1, x._2))) + + def parseIssetIsemptyPropObj[_: P]: P[(ByteCodeKeyword, Value, Value)] = + P("ISSET_ISEMPTY_PROP_OBJ " ~ getByteCodeKeyword ~ " " ~ getAnyValue ~ " " ~ getAnyValue) + def getIssetIsemptyPropObj[_: P]: P[TripleValueOperation] = + P( + parseIssetIsemptyPropObj.map( + x => TripleValueOperation("ISSET_ISEMPTY_PROP_OBJ", x._1, x._2, x._3) + )) + + def parseIssetIsEmptyStaticProp[_: P]: P[Seq[Value]] = + P(&("ISSET_ISEMPTY_STATIC_PROP ") ~ "ISSET_ISEMPTY_STATIC_PROP" ~ (" " ~ getAnyValue).rep) + def getIssetIsEmptyStaticProp[_: P]: P[Opcode] = + P(parseIssetIsEmptyStaticProp.map { + case first :: second :: third :: Nil => + TripleValueOperation("ISSET_ISEMPTY_STATIC_PROP", first, second, third) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("ISSET_ISEMPTY_STATIC_PROP", + first, + second, + third, + fourth) + }) + + def parseIssetIsemptyThis[_: P]: P[String] = P("ISSET_ISEMPTY_THIS".!) + def getIssetIsemptyThis[_: P]: P[NoValueOperation] = + P(parseIssetIsemptyThis.map { _ => + NoValueOperation("ISSET_ISEMPTY_THIS") + }) + + def getIssetCommand[_: P]: P[Opcode] = + P( + getIssetIsemptyVar | + getIssetIsemptyCv | + getIssetIsemptyDimObj | + getIssetIsemptyPropObj | + getIssetIsEmptyStaticProp | + getIssetIsemptyThis) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Jump.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Jump.scala new file mode 100644 index 0000000..61dddd3 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Jump.scala @@ -0,0 +1,78 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Jump { + + def parseJmpnz[_: P]: P[(Value, String)] = + P("JMPNZ " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpnz[_: P]: P[DualValueOperation] = + P(parseJmpnz.map(x => + DualValueOperation("JMPNZ", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpnzEx[_: P]: P[(Value, String)] = + P("JMPNZ_EX " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpnzEx[_: P]: P[DualValueOperation] = + P(parseJmpnzEx.map(x => + DualValueOperation("JMPNZ_EX", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpz[_: P]: P[(Value, String)] = + P("JMPZ " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpz[_: P]: P[DualValueOperation] = + P(parseJmpz.map(x => + DualValueOperation("JMPZ", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmp[_: P]: P[String] = P("JMP " ~ Utility.parseTarget) + def getJmp[_: P]: P[SingleValueOperation] = + P(parseJmp.map(x => SingleValueOperation("JMP", IntegerLiteral(x.toLong)))) + + def parseJmpzEx[_: P]: P[(Value, String)] = + P("JMPZ_EX " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpzEx[_: P]: P[DualValueOperation] = + P(parseJmpzEx.map(x => + DualValueOperation("JMPZ_EX", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpZnz[_: P]: P[(Value, String, String)] = P( + "JMPZNZ " ~ getAnyValue ~ " " ~ + Utility.parseTarget ~ " " ~ + Utility.parseTarget + ) + def getJmpZnz[_: P]: P[TripleValueOperation] = + P( + parseJmpZnz.map( + x => + TripleValueOperation("JMPZNZ", + x._1, + IntegerLiteral(x._2.toLong), + IntegerLiteral(x._3.toLong)) + )) + + def parseJmpSet[_: P]: P[(Value, String)] = + P("JMP_SET" ~ " " ~ getAnyValue ~ " " ~ Utility.parseTarget) + def getJmpSet[_: P]: P[DualValueOperation] = + P(parseJmpSet.map(x => + DualValueOperation("JMP_SET", x._1, IntegerLiteral(x._2.toLong)))) + + def parseJmpNull[_: P]: P[(Value, String)] = + P("JMP_NULL " ~ getAnyValue ~ " " ~ Utility.parseTarget) + + def getJmpNull[_: P]: P[DualValueOperation] = + P(parseJmpNull.map(x => + DualValueOperation("JMP_NULL", x._1, IntegerLiteral(x._2.toLong)))) + + def getJmpCommand[_: P]: P[Opcode] = + P( + &("JMPZ_EX ") ~ getJmpzEx | + &("JMPNZ_EX ") ~ getJmpnzEx | + &("JMPNZ ") ~ getJmpnz | + &("JMPZ ") ~ getJmpz | + &("JMPZNZ ") ~ getJmpZnz | + &("JMP_SET ") ~ getJmpSet | + &("JMP_NULL") ~ getJmpNull | + &("JMP ") ~ getJmp + ) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/LambdaRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/LambdaRelated.scala new file mode 100644 index 0000000..b2d8ead --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/LambdaRelated.scala @@ -0,0 +1,52 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.instructions.Utility.{ + getAnyValue, + parseTarget +} + +object LambdaRelated { + + def parseYield[_: P]: P[Seq[Value]] = + P("YIELD" ~ (" " ~ getAnyValue).rep) + def getYieldOpcode[_: P]: P[Opcode] = + P(parseYield.map { + case Nil => NoValueOperation("YIELD") + case single :: Nil => SingleValueOperation("YIELD", single) + case first :: second :: Nil => DualValueOperation("YIELD", first, second) + }) + + def parseYieldFrom[_: P]: P[Seq[Value]] = + P("YIELD_FROM" ~ (" " ~ getAnyValue).rep) + def getYieldFrom[_: P]: P[Opcode] = + P(parseYieldFrom.map { + case single :: Nil => SingleValueOperation("YIELD_FROM", single) + }) + + def getYield[_: P]: P[Opcode] = + P( + &("YIELD_FROM ") ~ getYieldFrom | + &("YIELD " | "YIELD") ~ getYieldOpcode + ) + + def parseTicks[_: P]: P[String] = + P(&("TICKS ") ~ "TICKS" ~ " " ~ anyNumber.rep.!) + def getTicks[_: P]: P[Opcode] = + P(parseTicks.map(x => + SingleValueOperation("TICKS", IntegerLiteral(x.toLong)))) + + def parseFastCall[_: P]: P[(String, Option[Value])] = + P("FAST_CALL " ~ parseTarget ~ (" " ~ getAnyValue).?) + def getFastCall[_: P]: P[Opcode] = + P(parseFastCall.map { + case (line, Some(value)) => + DualValueOperation("FAST_CALL", IntegerLiteral(line.toLong), value) + case (line, None) => + SingleValueOperation("FAST_CALL", IntegerLiteral(line.toLong)) + }) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/MatchRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/MatchRelated.scala new file mode 100644 index 0000000..674b3d1 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/MatchRelated.scala @@ -0,0 +1,49 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs.{ + DefaultKeyValuePair, + KeyValuePair, + MatchOpcode, + Variable +} +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.{ + parseStringInQuotes, + parseTarget +} +import io.joern.bytecode.parser.utils.decodeBase64 + +object MatchRelated { + + def getMatchDefaultTuple[_: P]: P[DefaultKeyValuePair] = + P("default: " ~ parseTarget).map(x => DefaultKeyValuePair(x)) + + // example: >>"apple": 0005<< + // >>1: 0005<< + def parseMatchValueTuple[_: P]: P[(Either[Int, String], String)] = { + P( + (parseStringInQuotes.map(x => Right(decodeBase64(x))) | anyNumber + .rep(1) + .! + .map(_.toInt) + .map(Left(_))) ~ ": " ~ parseTarget) + } + + def getMatchValueTuple[_: P]: P[KeyValuePair] = + P(parseMatchValueTuple).map(x => KeyValuePair(x._1, x._2)) + + // example: MATCH CV0($food) "apple": 0005, "bar": 0007, "cake": 0009, "apple2": 0011, "bar2": 0013, "cake2": 0015, default: 0004 + + def parseMatch[_: P] + : P[(String, Variable, Seq[KeyValuePair], DefaultKeyValuePair)] = { + // 2021-12-14: it was a conscious decision to not cut after the MATCH, as we also have MATCH_ERROR + P("MATCH".! ~ " " ~ getAnyVariable ~ " " ~ (getMatchValueTuple ~ ", ").rep ~ getMatchDefaultTuple) + } + + def getMatch[_: P]: P[MatchOpcode] = + P(parseMatch).map(x => MatchOpcode(x._1, x._2, x._3, x._4.value)) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Rope.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Rope.scala new file mode 100644 index 0000000..9c4c407 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Rope.scala @@ -0,0 +1,40 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.Variables.getAnyVariable +import io.joern.bytecode.parser.php8.instructions.Utility.getAnyValue + +object Rope { + + def parseRopeInit[_: P]: P[(String, Value)] = + P("ROPE_INIT " ~/ anyNumber.rep.! ~ " " ~ getAnyValue) + def getRopeInit[_: P]: P[DualValueOperation] = + P(parseRopeInit.map(x => + DualValueOperation("ROPE_INIT", IntegerLiteral(x._1.toLong), x._2))) + + def parseRopeAdd[_: P]: P[(String, Variable, Value)] = + P("ROPE_ADD " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable ~ " " ~ getAnyValue) + def getRopeAdd[_: P]: P[TripleValueOperation] = + P( + parseRopeAdd.map( + x => + TripleValueOperation("ROPE_ADD", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + + def parseRopeEnd[_: P]: P[(String, Variable, Value)] = + P("ROPE_END " ~/ anyNumber.rep.! ~ " " ~ getAnyVariable ~ " " ~ getAnyValue) + def getRopeEnd[_: P]: P[TripleValueOperation] = + P( + parseRopeEnd.map( + x => + TripleValueOperation("ROPE_END", + IntegerLiteral(x._1.toLong), + x._2, + x._3))) + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/TypeRelated.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/TypeRelated.scala new file mode 100644 index 0000000..fcce7d5 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/TypeRelated.scala @@ -0,0 +1,56 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Basics.anyLetter +import io.joern.bytecode.parser.php8.instructions.Utility.{ + getAnyValue, + parseTarget +} + +object TypeRelated { + + def parseCastTypeString[_: P]: P[String] = P("(" ~ anyLetter.rep.! ~ ")") + def parseCast[_: P]: P[(Object, Value)] = + P("CAST " ~ (parseCastTypeString | getAnyValue) ~ " " ~ getAnyValue) + def getCast[_: P]: P[DualValueOperation] = + P(parseCast.map(x => + x._1 match { + case value: String => + DualValueOperation("CAST", StringLiteral(value), x._2) + case value: Value => DualValueOperation("CAST", value, x._2) + })) + + def parseInstanceOf[_: P]: P[Seq[Value]] = + P(&("INSTANCEOF ") ~ "INSTANCEOF" ~ (" " ~ getAnyValue).rep) + def getInstanceOf[_: P]: P[Opcode] = + P(parseInstanceOf.map { + case first :: second :: Nil => + DualValueOperation("INSTANCEOF", first, second) + case first :: second :: third :: fourth :: Nil => + QuadrupleValueOperation("INSTANCEOF", first, second, third, fourth) + case first :: second :: third :: fourth :: fifth :: Nil => + QuintupleValueOperation("INSTANCEOF", + first, + second, + third, + fourth, + fifth) + }) + + def parseCoalesce[_: P]: P[(Value, String)] = + P("COALESCE " ~/ getAnyValue ~ " " ~ parseTarget) + def getCoalesce[_: P]: P[Opcode] = + P(parseCoalesce.map(x => + DualValueOperation("COALESCE", x._1, IntegerLiteral(x._2.toLong)))) + + def parseTypeCheck[_: P]: P[(Value, Value)] = { + P( + "TYPE_CHECK " ~/ ((!"TYPE" ~ getAnyValue ~ " " ~ getAnyValue) | (("TYPE [" ~ (!"]" ~ AnyChar).rep ~ "]").!.map( + StringLiteral) ~ " " ~ getAnyValue))) + } + def getTypeCheck[_: P]: P[Opcode] = { + P(parseTypeCheck.map(x => DualValueOperation("TYPE_CHECK", x._1, x._2))) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/UnexpectedArgumentCount.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/UnexpectedArgumentCount.scala new file mode 100644 index 0000000..00dd89c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/UnexpectedArgumentCount.scala @@ -0,0 +1,9 @@ +package io.joern.bytecode.parser.php8.instructions + +class UnexpectedArgumentCount(opcode: String, + expected: Seq[Int], + encountered: Int) + extends Throwable { + override def getMessage: String = + s"$opcode was encountered with $encountered args but only {${expected.mkString(",")}} are supported" +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Utility.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Utility.scala new file mode 100644 index 0000000..9dc1c56 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/php8/instructions/Utility.scala @@ -0,0 +1,19 @@ +package io.joern.bytecode.parser.php8.instructions + +import fastparse.NoWhitespace._ +import fastparse.{P, _} +import io.joern.bytecode.parser.constructs.Value +import io.joern.bytecode.parser.php8.Basics.anyNumber +import io.joern.bytecode.parser.php8.Literals.getAnyLiteral +import io.joern.bytecode.parser.php8.Variables.getAnyVariable + +object Utility { + + def getAnyValue[_: P]: P[Value] = P(getAnyLiteral | getAnyVariable) + + def parseStringInQuotes[_: P]: P[String] = + P("\"" ~ (!"\"" ~ AnyChar).rep.! ~ "\"") + + // replaces "("L" | "BB") ~ anyNumber.rep.!" + def parseTarget[_: P]: P[String] = P("BB".? ~ anyNumber.rep(1).!) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/parser/utils.scala b/layerByteCode/src/main/scala/io/joern/bytecode/parser/utils.scala new file mode 100644 index 0000000..45a3681 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/parser/utils.scala @@ -0,0 +1,24 @@ +package io.joern.bytecode.parser + +import java.nio.charset.StandardCharsets +import java.util.Base64 + +object utils { + def decodeBase64(str: String): String = + try { + new String(Base64.getDecoder.decode(str), StandardCharsets.UTF_8) + } catch { + case x: Throwable => + throw new RuntimeException( + s"unable to base64 decode $str: ${x.getMessage}") + } + + def encodeBase64(str: String): String = + try { + Base64.getEncoder.encodeToString(str.getBytes(StandardCharsets.UTF_8)) + } catch { + case x: Throwable => + throw new RuntimeException( + s"unable to base64 decode $str: ${x.getMessage}") + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/CallFinishingPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CallFinishingPass.scala new file mode 100644 index 0000000..39217f8 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CallFinishingPass.scala @@ -0,0 +1,168 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.passes.utility.MethodDetectionAndAssociation._ +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode, Method} +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.collection.mutable.{Set => MSet} +import scala.jdk.CollectionConverters._ + +class CallFinishingPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool, + strict: Boolean = true) + extends ParallelCpgPass[nodes.Method](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name: String = "CallFinishingPass" + + override def partIterator: Iterator[Method] = methods.iterator + + private def inMethodStructureOrder(node: Call): Int = { + val order = if (node.astParent.isInstanceOf[Call]) { + node.astParent.order + } else { + node.order + } + order + } + + private def getInitCallsInMethodOrder(node: Method): List[Call] = { + node.ast.isCall + .filter(call => KNOWN_FUNCTION_STARTS.contains(call.name)) + .l + .sortBy(inMethodStructureOrder) + } + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part) { + val remainingFunctionCallStarts = getInitCallsInMethodOrder(part) + val finished: MSet[Call] = MSet() + remainingFunctionCallStarts.foreach { call => + if (!finished.contains(call)) { + processFunctionStart(call, List(), Map(), finished) + } + } + } + //println(s"${part.name}/${part.id()} -> " + diffGraph.build().iterator.toList) + Iterator(diffGraph.build()) + } + + private def isCallRelated(cfgNode: CfgNode): Boolean = { + cfgNode match { + case x: Call => + KNOWN_FUNCTION_STARTS.contains(x.name) || + KNOWN_SEND_VALS.contains(x.name) || + KNOWN_FUNCTION_ENDS.contains(x.name) + case _ => false + } + } + + private def processFunctionStart( + current: CfgNode, + activeInits: List[Call], + activeSends: Map[Call, List[Call]], + finished: MSet[Call])(implicit graph: DiffGraph.Builder): Unit = { + //println(s"processing on ${current.code}") + var actualActiveInits = activeInits + var actualActiveSends = activeSends + // if it is call related + if (isCallRelated(current)) { + // cast current to an actual call as it has to be + val currentCall = current.asInstanceOf[Call] + // if it is a known function start + if (KNOWN_FUNCTION_STARTS.contains(currentCall.name)) { + // push it on top of the active function calls + actualActiveInits = List(currentCall) ++ actualActiveInits + } else if (KNOWN_SEND_VALS.contains(currentCall.name)) { + val activeInit: Call = actualActiveInits.head + val activeSends: List[Call] = + actualActiveSends.getOrElse(activeInit, List[Call]()) + actualActiveSends = actualActiveSends + (activeInit -> (activeSends ++ List[ + Call](currentCall))) + } else { + assert(KNOWN_FUNCTION_ENDS.contains(current.asInstanceOf[Call].name), + s"there can only be a send_val left for $current") + // pop head of active inits + val activeInit: Call = actualActiveInits.head + actualActiveInits = actualActiveInits.tail + // get active related sends + val activeSends: List[Call] = + actualActiveSends.getOrElse(activeInit, List()) + // if we did not already handle the current + if (!finished.contains(activeInit)) { + // handle the call + finishUpCall(activeInit, activeSends, currentCall) + // add finished call to finished + finished.addOne(activeInit) + } + } + } + // if there are still active calls + if (actualActiveInits.nonEmpty) { + // we are assuming no loops + current.out(EdgeTypes.CFG).asScala.foreach { next => + processFunctionStart(next.asInstanceOf[CfgNode], + actualActiveInits, + actualActiveSends, + finished) + } + } else { + // if not we are done + } + } + + private def finishUpCall(init: Call, sends: List[Call], docall: Call)( + implicit graph: DiffGraph.Builder): Unit = { + val nameOfCalledFunction = getCalledMethod(init) + //val unknownNameOfCalledFunction = nameOfCalledFunction.replace("*","UNKNOWN") + // given that we have different send vals (i.e., possibly with named parameter in PHP8) we are now linking to the SEND_VAL + // and do not give an order + sends.foreach { send => + graph.addEdge(docall, send, EdgeTypes.ARGUMENT) + } + graph.addNodeProperty(docall, "NAME", nameOfCalledFunction) + getCallCorrespondingMethod(cpg, nameOfCalledFunction) match { + case Nil => + getCallCorrespondingMethod(cpg, nameOfCalledFunction.split("\\\\").last) match { + case single :: Nil if single.code == Defines.INTERNAL_FUNCTION => + graph.addEdge(docall, single, EdgeTypes.CALL) + case _ => + throw new RuntimeException( + s"Missing linking target for $nameOfCalledFunction. Either stub creation was not run or failed") + } + case single :: Nil => + graph.addEdge(docall, single, EdgeTypes.CALL) + case multiple if strict => + val actualUnknownName = nameOfCalledFunction.replace(".*", "UNKNOWN") + multiple.filter( + x => + x.fullName == actualUnknownName && List( + Defines.MULTIPLE_TARGETS, + Defines.UNKNOWN_FUNCTION, + Defines.UNKNOWN_METHOD).contains(x.code)) match { + case Nil => + throw new RuntimeException( + s"Multiple targets but no $actualUnknownName stub. Either stub creation was not run or failed. Available were ${multiple + .map(elem => s"${elem.fullName}/${elem.code}")}") + case target :: Nil => + graph.addEdge(docall, target, EdgeTypes.CALL) + case _ => + throw new RuntimeException( + s"Multiple targets but multiple $actualUnknownName stub. Stub creation failed") + } + case multiple if !strict => + multiple.foreach { target => + graph.addEdge(docall, target, EdgeTypes.CALL) + } + } + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgInterBBCreationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgInterBBCreationPass.scala new file mode 100644 index 0000000..33f1acf --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgInterBBCreationPass.scala @@ -0,0 +1,185 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.parser.constructs.{ + BasicBlock, + ControlFlowDefinitionsBlock, + MethodDefinitionPair +} +import io.joern.bytecode.passes.utility.MethodIdentification +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.NodeOps + +class CfgInterBBCreationPass( + filesMethodDefinitionPairs: List[Seq[MethodDefinitionPair]], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[Seq[MethodDefinitionPair]]( + cpg, + keyPools = Some(keyPool.split(filesMethodDefinitionPairs.size))) + with Reporting { + + override val name = "CfgInterBBCreationPass" + implicit val codePropertyGraph: Cpg = cpg + + override def partIterator: Iterator[Seq[MethodDefinitionPair]] = + filesMethodDefinitionPairs + .map(_.filterNot(_.byteCodeBlock.name == "{closure}")) + .iterator + + override def runOnPart(fileMethodDefinitionPairs: Seq[MethodDefinitionPair]) + : Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + try { + val absolutePath = MethodIdentification.getAbsolutePath( + fileMethodDefinitionPairs.head.byteCodeBlock.fileName) + + val methodDefinitionPairs: Seq[MethodDefinitionPair] = + fileMethodDefinitionPairs + + methodDefinitionPairs.foreach { + case MethodDefinitionPair(_, cfgBlock) => + val method = getMethod(absolutePath, cfgBlock) + createControlFlow(cfgBlock, method) + } + } catch { + case x: ReportableError => + reportError(x) + case x: Throwable => + reportError(fileMethodDefinitionPairs.head.byteCodeBlock.fileName, + "", + "", + "", + x.getMessage, + x) + } + Iterator(diffGraph.build()) + } + + private def getMethod(fileName: String, + cfgBlock: ControlFlowDefinitionsBlock): nodes.Method = { + MethodIdentification.getUniqueMethod( + Some(fileName), + cfgBlock.namespace match { + case Some(_) => cfgBlock.namespace + case None => Some(Defines.GLOBAL_NAMESPACE_NAME) + }, + cfgBlock.classname, + cfgBlock.name, + Some(cfgBlock.lineStart), + Some(cfgBlock.lineEnd) + ) + } + + def addCfgEdge(start: nodes.CfgNode, end: nodes.CfgNode)( + implicit diffGraph: DiffGraph.Builder): Unit = { + diffGraph.addEdge(start, end, EdgeTypes.CFG) + } + + def createControlFlow( + cfgBlock: ControlFlowDefinitionsBlock, + method: nodes.Method)(implicit diffGraph: DiffGraph.Builder): Unit = { + val methodBodyCalls = + method.astChildren.isBlock.headOption + .map { node => + node.astChildren.isCall.l + .sortWith((a, b) => a.order < b.order) + .toArray + } match { + case Some(value) => value + case None => + throw ReportableError(cfgBlock.fileName, + cfgBlock.lineStart, + cfgBlock.name, + "", + s"The method does not have a code block") + } + // creating the in BB control flow + val basicBlocks = + cfgBlock.blocks.sortWith((lhs, rhs) => lhs.number < rhs.number).toArray + + basicBlocks.foreach { basicBlock => + // handle if the basic block is an entry block + createControlFlowInterBasicBlock(basicBlock, + basicBlocks, + methodBodyCalls, + method, + diffGraph) + } + } + + def getEntryOfCall(call: nodes.Call): nodes.CfgNode = { + // determine if we have an = assignment + if (call.name == "=") { + // if so the entry to this opcode is the entry of the rhs opcode + getEntryOfCall(call.astChildren.order(1).head.asInstanceOf[nodes.Call]) + } else { + // if not so + // and the opcode does not have any arguments + if (call.start.argument.l.isEmpty) { + // then the call itself is the entry + call + } else { + // else the first argument is the entry + call.start.astChildren.order(0).head.asInstanceOf[nodes.CfgNode] + } + } + } + + def createControlFlowInterBasicBlock(basicBlock: BasicBlock, + basicBlocks: Array[BasicBlock], + methodBodyCalls: Array[nodes.Call], + method: nodes.Method, + graph: DiffGraph.Builder): Unit = { + basicBlock.attributes.find(attr => attr == "start") match { + case Some(_) => + val block = method.astChildren.isBlock.l + assert(block.length == 1, "any method should only have one block child") + graph.addEdge(method, block.head, EdgeTypes.CFG) + graph.addEdge( + block.head, + getEntryOfCall(methodBodyCalls(basicBlock.firstInstruction)), + EdgeTypes.CFG) + case None => + } + // handle if the basic block is an exit block + basicBlock.attributes.find(attr => attr == "exit") match { + case Some(_) => + graph.addEdge(methodBodyCalls(basicBlock.lastInstruction), + method.astChildren + .find(child => child.label == NodeTypes.METHOD_RETURN) + .get, + EdgeTypes.CFG) + case None => + } + basicBlock.followedBy match { + case Some(followers) => + val previousInstruction = methodBodyCalls(basicBlock.lastInstruction) + if (followers.length > 1) { + followers.foreach { follower => + assert( + follower < basicBlocks.length, + s"jumping to BB$follower impossible as there are only ${basicBlocks.length} BBs") + assert( + basicBlocks(follower).firstInstruction < methodBodyCalls.length, + s"selecting instruction ${basicBlocks(follower).firstInstruction} impossible as there are only ${methodBodyCalls.length} instructions" + ) + val targetInstruction = getEntryOfCall( + methodBodyCalls(basicBlocks(follower).firstInstruction)) + addCfgEdge(previousInstruction, targetInstruction)(graph) + } + } else { + val List(first) = followers + val followUpInstruction = getEntryOfCall( + methodBodyCalls(basicBlocks(first).firstInstruction)) + addCfgEdge(previousInstruction, followUpInstruction)(graph) + } + case None => + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgIntraBBCreationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgIntraBBCreationPass.scala new file mode 100644 index 0000000..7af90b2 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CfgIntraBBCreationPass.scala @@ -0,0 +1,172 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.parser.constructs.{ + BasicBlock, + ControlFlowDefinitionsBlock, + MethodDefinitionPair +} +import io.joern.bytecode.passes.utility.MethodIdentification +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.NodeOps + + +class CfgIntraBBCreationPass( + filesMethodDefinitionPairs: List[Seq[MethodDefinitionPair]], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[Seq[MethodDefinitionPair]]( + cpg, + keyPools = Some(keyPool.split(filesMethodDefinitionPairs.size))) + with Reporting { + + //println(filesMethodDefinitionPairs.map(_.map(_.byteCodeBlock))) + + override def partIterator: Iterator[Seq[MethodDefinitionPair]] = + filesMethodDefinitionPairs + .map(_.filterNot(_.byteCodeBlock.name == "{closure}")) + .iterator + implicit val codePropertyGraph: Cpg = cpg + override val name = "CfgIntraBBCreationPass" + + override def runOnPart(fileMethodDefinitionPairs: Seq[MethodDefinitionPair]) + : Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + //getting the absolute path to the file + val absolutePath = MethodIdentification.getAbsolutePath( + fileMethodDefinitionPairs.head.byteCodeBlock.fileName) + + fileMethodDefinitionPairs.foreach { cfgBlock => + try { + cfgBlock match { + case MethodDefinitionPair(_, cfgBlock) => + val method = getMethod(absolutePath, cfgBlock) + createControlFlow(cfgBlock, method) + } + } catch { + case x: ReportableError => + reportError(x) + case x: Throwable => + reportError(absolutePath, + cfgBlock.byteCodeBlock.lineStart.toString, + cfgBlock.byteCodeBlock.name, + "", + x.getMessage, + x) + } + } + Iterator(diffGraph.build()) + } + + private def getMethod(fileName: String, + cfgBlock: ControlFlowDefinitionsBlock): nodes.Method = { + MethodIdentification.getUniqueMethod( + Some(fileName), + cfgBlock.namespace match { + case Some(_) => cfgBlock.namespace + case None => Some(Defines.GLOBAL_NAMESPACE_NAME) + }, + cfgBlock.classname, + cfgBlock.name, + Some(cfgBlock.lineStart), + Some(cfgBlock.lineEnd) + ) + } + + def addCfgEdge(start: nodes.CfgNode, end: nodes.CfgNode)( + implicit diffGraph: DiffGraph.Builder): Unit = { + diffGraph.addEdge(start, end, EdgeTypes.CFG) + } + + def createControlFlow( + cfgBlock: ControlFlowDefinitionsBlock, + method: nodes.Method)(implicit diffGraph: DiffGraph.Builder): Unit = { + + val methodBodyCalls: Array[nodes.Call] = + method.astChildren.isBlock.headOption + .map { node => + node.astChildren.isCall.l + .sortWith((a, b) => a.order < b.order) + .toArray + } match { + case Some(value) => value + case None => + throw ReportableError(cfgBlock.fileName, + cfgBlock.lineStart, + cfgBlock.name, + "", + "the method does not have a code block") + } + + // creating the in BB control flow + val basicBlocks = + cfgBlock.blocks.sortWith((lhs, rhs) => lhs.number < rhs.number).toArray + + basicBlocks.foreach { basicBlock => + // create the control flow within the basic block + createControlFlowIntraBasicBlock(basicBlock, methodBodyCalls, diffGraph) + } + } + + def createControlFlowIntraBasicBlock(basicBlock: BasicBlock, + bodyBlockCalls: Array[nodes.Call], + graph: DiffGraph.Builder): Unit = { + implicit val diffGraph: DiffGraph.Builder = graph + var previous = createCfgForCall(bodyBlockCalls(basicBlock.firstInstruction)) + //go through the given instruction range and connect each instruction in the BB with a CFG edge + for (instruction <- Range.inclusive(basicBlock.firstInstruction + 1, + basicBlock.lastInstruction)) { + val next = createCfgForCall(bodyBlockCalls(instruction)) + addCfgEdge(previous._2, next._1) //, List(("condition","always"))) + previous = next + } + } + + private def createCfgForCall(call: nodes.Call)( + implicit diffGraph: DiffGraph.Builder): (nodes.CfgNode, nodes.CfgNode) = { + // check if we are withing an = assignment + if (call.name == "=") { + // if so we need to recurse on the rhs call + val (start, end) = createCfgForCall( + call.astChildren.order(1).head.asInstanceOf[nodes.Call]) + // add an edge from the end of the rhs call to the lhs of the assignment + addCfgEdge(end, + call.astChildren.order(0).head.asInstanceOf[nodes.CfgNode]) + // add an edge from the lhs of the assignment to the current call + addCfgEdge(call.astChildren.order(0).head.asInstanceOf[nodes.CfgNode], + call) + // return the start of the rhs call as the entry to this block and the call itself as the exit + (start, call) + } else { + // we are not within an = assignment + val arguments = + call.start.argument.l.sortWith((lhs, rhs) => lhs.order < rhs.order) + // match on the amount of arguments the call has + arguments match { + // if there is no argument + case Nil => + // return the call itself as both start and exit + (call, call) + case x :: Nil => + // if there is a single argument add an edge from the argument to the call + addCfgEdge(x, call) + // return the argument as the entry and the call as exit + (x, call) + case _ => + // if we have more than one argument + arguments.sliding(2).foreach { + case x :: y :: Nil => addCfgEdge(x, y) + case _ => + throw ReportableError("", -1, "", call.code, "unexpected") + } + addCfgEdge(arguments.last, call) + (arguments.head, call) + } + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/CreateStubMethodNodesPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CreateStubMethodNodesPass.scala new file mode 100644 index 0000000..d43b736 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/CreateStubMethodNodesPass.scala @@ -0,0 +1,211 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.passes.utility.MethodDetectionAndAssociation._ +import io.joern.reporting.{Linking, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.nodes.{ + Call, + Method, + NewMethod, + NewMethodReturn +} +import io.shiftleft.passes.{CpgPass, DiffGraph, IntervalKeyPool} +import io.shiftleft.semanticcpg.language._ + +import scala.collection.mutable.{Map => MMap} + +class CreateStubMethodNodesPass(cpg: Cpg, + keyPool: IntervalKeyPool, + strict: Boolean, + interpreter: String) + extends CpgPass(cpg, keyPool = Some(keyPool)) + with Reporting + with Linking { + + override val name: String = "CreateStubMethodNodesPass" + + private val handledDanglingMethods: MMap[String, NewMethod] = MMap() + + val userlandMethods: List[Method] = cpg.method.l + + private def getAllMethodInitializations: List[Call] = { + KNOWN_FUNCTION_STARTS.toList.flatMap { name => + cpg.call.nameExact(name).l + } + } + + private def callStartsWithWildcard(c: Call) = + getCalledMethod(c).startsWith(".*") + + override def run(): Iterator[DiffGraph] = { + val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting() { + /* + Order the calls in such a way that all non-wildcard calls come first. + This should prevent the following error: + - Wildcard `.*::foo` comes first, matches one existing thing + - Call `qualified::foo` comes second, gets new stub + + Finish-up pass: + - Wildcard is processed, matches both the existing thing and the qualified thing -> error + */ + val sorted = getAllMethodInitializations.sortWith { + case (a, b) + if !callStartsWithWildcard(a) && !callStartsWithWildcard(b) => + a.id <= b.id + case (a, b) + if !callStartsWithWildcard(a) && callStartsWithWildcard(b) => + true + case (a, b) + if callStartsWithWildcard(a) && !callStartsWithWildcard(b) => + false + case (a, b) if callStartsWithWildcard(a) && callStartsWithWildcard(b) => + a.id <= b.id + } + + sorted.foreach { functionStart => + processFunctionStart(functionStart, diffGraph) + } + } + Iterator(diffGraph.build()) + } + + private def createStubMethodNode(name: String, + graph: DiffGraph.Builder, + multiple: Boolean): NewMethod = { + val noNamespaceName = name.split("\\\\").last + if (handledDanglingMethods.contains(name) || //if we already have a corresponding dangling method + // or a corresponding internal dangling method + (handledDanglingMethods.contains(noNamespaceName) && handledDanglingMethods( + noNamespaceName).code == Defines.INTERNAL_FUNCTION)) { + // return that stub + handledDanglingMethods.getOrElse(name, + handledDanglingMethods(noNamespaceName)) + } else if (getPhpInternalFunctions(interpreter).contains(noNamespaceName)) { // if the name without namespace is PHP internal + if (userlandMethods.map(_.name).contains(noNamespaceName)) { + // a user defined method exists, that shadows a built-in + // this occurs for example with polyfills + unableToLinkMethod(name) + val ret = NewMethod() + .name(name) + .fullName(name) + .code(Defines.MULTIPLE_TARGETS) + .isExternal(true) + handledDanglingMethods.addOne(name -> ret) + graph.addNode(ret) + ret + } else { + unlinkedInternalFunction(noNamespaceName) + val ret = NewMethod() + .name(noNamespaceName) + .fullName(noNamespaceName) + .code(Defines.INTERNAL_FUNCTION) + .isExternal(true) + handledDanglingMethods.addOne(noNamespaceName -> ret) + graph.addNode(ret) + ret + } + } + // if the method is dynamic + else if (name == Defines.DYNAMIC_FUNCTION) { + // we cannot do anything here + unableToLinkFunction(name) + val ret = + NewMethod() + .name(name) + .fullName(name) + .code(Defines.DYNAMIC_FUNCTION) + .isExternal(true) + handledDanglingMethods.addOne(name -> ret) + graph.addNode(ret) + ret + } + // if the method name indicates a constructor + else if (name.contains("__construct")) { + unableToLinkMethod(name) + val ret = NewMethod() + .name(name) + .fullName(name) + .code(if (multiple) Defines.MULTIPLE_TARGETS else Defines.CONSTRUCTOR) + .isExternal(true) + handledDanglingMethods.addOne(name -> ret) + graph.addNode(ret) + ret + } + // if the method name indicates another method + else if (name.contains("::")) { + unableToLinkMethod(name) + val ret = NewMethod() + .name(name) + .fullName(name) + .code( + if (multiple) Defines.MULTIPLE_TARGETS else Defines.UNKNOWN_METHOD) + .isExternal(true) + handledDanglingMethods.addOne(name -> ret) + graph.addNode(ret) + ret + } + // if none of the above we clearly have a unknown function + else { + unableToLinkFunction(name) + val ret = NewMethod() + .name(name) + .fullName(name) + .code( + if (multiple) Defines.MULTIPLE_TARGETS else Defines.UNKNOWN_FUNCTION) + .isExternal(true) + handledDanglingMethods.addOne(name -> ret) + graph.addNode(ret) + ret + } + } + + private def createStubMethodReturn(stub: NewMethod, + graph: DiffGraph.Builder) = { + val method_return = NewMethodReturn() + .lineNumber(stub.lineNumber) + .columnNumber(stub.columnNumber) + graph.addNode(method_return) + graph.addEdge(stub, method_return, EdgeTypes.AST) + graph.addEdge(stub, method_return, EdgeTypes.CFG) + } + + private def processFunctionStart(functionStart: Call, + graph: DiffGraph.Builder): Unit = { + + val name = getCalledMethod(functionStart) + val unknownMethodName = name.replace(".*", "UNKNOWN") + getCallCorrespondingMethod(cpg, name) match { + // if there is no method we do need to add a dangling method and note that we cannot link + case Nil => + val stub = + createStubMethodNode(unknownMethodName, graph, multiple = false) + createStubMethodReturn(stub, graph) + // if there is a single corresponding method + case _ :: Nil + if !handledDanglingMethods.exists( + x => // however there must not be already a stub + if (name.startsWith(".*")) { // for a corresponding name via wildcard + name.r.matches(x._1) + } else { + x._1 == name // or exact match + }) => + // if there is a unique called method IN THE CPG we do not need to add a dangling method node as we know we can link + if (name.contains("::")) { + linkedMethod(name) + } else { + linkedFunction(name) + } + // if there are multiple methods it depends on the config - if strict we need to add a dangling method + case _ if strict => + val stub = + createStubMethodNode(unknownMethodName, graph, multiple = true) + createStubMethodReturn(stub, graph) + // if not strict no action is required + case _ => + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyMethodDeclarations.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyMethodDeclarations.scala new file mode 100644 index 0000000..4baa5db --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyMethodDeclarations.scala @@ -0,0 +1,48 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.jdk.CollectionConverters._ + +class DeleteEmptyMethodDeclarations(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "DeleteEmptyMethodDeclaration" + override def partIterator: Iterator[nodes.Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part) { + // check whether the corresponding code block is empty besides the always existing return + if (part.astChildren.isBlock.astChildren.l.length == 1) { + //if so iterate over all ast children of the method + part.outE(EdgeTypes.AST).asScala.foreach { edgeToChild => + diffGraph.removeEdge(edgeToChild) + val childNode = edgeToChild.inNode().asInstanceOf[nodes.StoredNode] + diffGraph.removeNode(childNode) + } + // finally delete the method + diffGraph.removeNode(part) + } + } + Iterator(diffGraph.build()) + } +} + +object DeleteEmptyMethodDeclarations { + + def getMethodDeclarations(cpg: Cpg): Seq[nodes.Method] = { + cpg.method.l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPass.scala new file mode 100644 index 0000000..ddc83a8 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPass.scala @@ -0,0 +1,66 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.jdk.CollectionConverters._ + +class DeleteEmptyOpcodesPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg: Cpg, + keyPools = + Some(keyPool.split(methods.length))) + with Reporting { + + private val emptyOpcodeNames = List("EXT_NOP", "EXT_STMT", "NOP") + override val name = "DeleteEmptyOpcodePass" + override def partIterator: scala.Iterator[nodes.Method] = methods.iterator + + var somethingChanged: Boolean = false + + //todo: this is a horrible way of deleting empty opcodes as this logic needs to be called n times for n being + // the max amount of opcodes over the set of all methods + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part) { + emptyOpcodeNames.find(elem => part.astMinusRoot.isCall(elem).hasNext) match { + case Some(emptyCallString) => + val emptyOpcode = part.astMinusRoot.isCall(emptyCallString).next() + val previousNodes = emptyOpcode + .in(EdgeTypes.CFG) + .asScala + .toList + .map(_.asInstanceOf[nodes.CfgNode]) + val nextNodes = emptyOpcode + .out(EdgeTypes.CFG) + .asScala + .toList + .map(_.asInstanceOf[nodes.CfgNode]) + for (previous <- previousNodes) { + for (next <- nextNodes) { + diffGraph.addEdge(previous, next, EdgeTypes.CFG) + } + } + diffGraph.removeNode(emptyOpcode) + this.synchronized { + somethingChanged = true + } + case None => None + } + } + Iterator(diffGraph.build()) + } +} + +object DeleteEmptyOpcodesPass { + + def getMethodDeclarations(cpg: Cpg): Seq[nodes.Method] = { + cpg.method.l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteUnreachableCodePass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteUnreachableCodePass.scala new file mode 100644 index 0000000..fc05c5b --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DeleteUnreachableCodePass.scala @@ -0,0 +1,65 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, Call, Method} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ +import overflowdb.Edge + +import java.util.function._ +import scala.jdk.CollectionConverters._ + +class DeleteUnreachableCodePass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "DeleteUnreachableCodePass" + + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + val edgesToBeRemoved = collection.mutable.Set[Edge]() + val callsToBeRemoved = collection.mutable.Set[Call]() + + val markEdgeForDeletionConsumer: Consumer[Edge] = (t: Edge) => + edgesToBeRemoved.add(t) + try { + part.astMinusRoot.isCall + .filter { call => + //unless we have dead code there is always an incoming edge and the outgoing edge to the RETURN node + //or a edge to the next instruction in the BB + call._cfgIn.asScala.toList.isEmpty && call._cfgOut.asScala.toList.isEmpty && call.code != "VERIFY_NEVER_TYPE" + } + .foreach { deadCall: Call => + callsToBeRemoved.add(deadCall) + //remove all possible outgoing edges (this can happen, i.e., with a dead jump) + deadCall.outE().forEachRemaining(markEdgeForDeletionConsumer) + //remove all incoming edges (well it is still part of the AST and we do not want dangling edges) + deadCall.inE().forEachRemaining(markEdgeForDeletionConsumer) + } + edgesToBeRemoved.toList.foreach(diffGraph.removeEdge) + callsToBeRemoved.toList.foreach(diffGraph.removeNode(_)) + } catch { + case x: ReportableError => reportError(x) + case x: Throwable => + reportError(part.filename, "", part.fullName, "", x.getMessage, x) + } + Iterator(diffGraph.build()) + } + + def removeNode(node: AstNode, + diff: DiffGraph.Builder, + edgeRemover: Consumer[Edge]): Unit = { + diff.removeNode(node) + node.outE().forEachRemaining(edgeRemover) + node.inE().forEachRemaining(edgeRemover) + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/DominatorPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DominatorPass.scala new file mode 100644 index 0000000..08738c8 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/DominatorPass.scala @@ -0,0 +1,236 @@ +/** + * This algorithm is based on Compiler Bau II by Alfred V. Aho, Ravi Sethis, and Jeffrey D. Ullmann + * Chapter 10.9 Algorithm 10.16 + */ +package io.joern.bytecode.passes + +import io.joern.bytecode.util.extensions.NodeExtension._ +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.{ + CfgNode, + Method, + MethodReturn +} +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.jIteratortoTraversal + +import scala.collection.mutable.{Map => MMap, Set => MSet} +import scala.collection.{Iterator, Seq} + +class DominatorPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool, + postDomination: Boolean = false) + extends ParallelCpgPass[nodes.Method](cpg: Cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name: String = + if (!postDomination) "DominatorPass" else "PostDominatorPass" + + override def partIterator: Iterator[Method] = methods.iterator + + //private val debugPass = "PostDominatorPass" + //private val debugType = Method.Label + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + //if(name == debugPass) { + // println(s"######## $name + ${part.name} #########") + // println() + //} + val diffGraph = DiffGraph.newBuilder + withErrorReporting(part) { + if (!postDomination) { + calculateDominatorTree(part, diffGraph) + } else { + val method_return = part.ast.filter(_.isInstanceOf[MethodReturn]).l + calculateDominatorTree(method_return.head.asInstanceOf[CfgNode], + diffGraph) + } + } + Iterator(diffGraph.build()) + } + + /** + * + * only stuff belonging to the coherent CFG starting from start belong to the domination + * calculation + * + * @param start the cfg start + * @return the map (and implicitly also the order/involved nodes) + */ + private def initiateDominatorMap( + start: CfgNode): MMap[CfgNode, Set[CfgNode]] = { + var frontier: Set[CfgNode] = Set(start) + val collection: MSet[CfgNode] = MSet() + while (frontier.nonEmpty) { + val newFrontier: MSet[CfgNode] = MSet() + frontier.foreach { node => + // get all connected nodes incoming and add them to the new frontier + newFrontier.addAll( + node + .out(EdgeTypes.CFG) + .cast[CfgNode] + ) + // get all connected nodes outgoing and add them to the new frontier + newFrontier.addAll( + node + .in(EdgeTypes.CFG) + .cast[CfgNode] + ) + // add the current node to the collection as we handled it + collection.addOne(node) + } + // make the frontier the new frontier minus the already collected nodes so no double visiting + frontier = newFrontier.toSet -- collection.toSet + } + val ret: MMap[CfgNode, Set[CfgNode]] = MMap() + collection.foreach { node => + ret.addOne(node -> collection.toSet) + } + ret.addOne(start -> Set(start)) + ret + } + + /*private def initiateDominatorMap( + start: CfgNode): MMap[CfgNode, Set[CfgNode]] = { + val methodNode = start.getParentMethod.getOrElse( + throw new RuntimeException( + s"given start node $start has no parent method")) + val dominationMap = MMap[CfgNode, Set[CfgNode]]() + // for the start node only the start node is dominator + val allNodesList: Seq[CfgNode] = methodNode.ast.isCfgNode.l ++ List(start) + methodNode.ast.isCfgNode.foreach { node => + // creating new instance + val allNodes: Set[CfgNode] = Set.from(allNodesList) + // initially all nodes are dominators + dominationMap.addOne((node, allNodes)) + } + dominationMap.addOne((start, Set(start))) + dominationMap + }*/ + + /*private def calculateOrder(part: CfgNode): List[CfgNode] = { + // order is only relevant for optimization and the below will be trivially work + // though not necessarily fast O(n^2) whereas it is possible to achieve O(n) + val methodNode = part.getParentMethod.getOrElse( + throw new ReportableError("na",-1,"na","na", + s"given start node $start has no parent method")) + methodNode.ast.isCfgNode.l + }*/ + + private def calculateDominationSet( + node: CfgNode, + dominationMap: MMap[CfgNode, Set[CfgNode]]): Set[CfgNode] = { + val next: Seq[Set[CfgNode]] = + name match { + case "DominatorPass" => + node + // pred nodes of the given node + .in(EdgeTypes.CFG) + .cast[CfgNode] + .toList + .map(dominationMap) + case "PostDominatorPass" => + node + // post nodes of the node + .out(EdgeTypes.CFG) + .cast[CfgNode] + .toList + .map(dominationMap) + } + + //if(node.label == debugType && name == debugPass) { + // println(s"prev (${node.code}/${node.id()}):" + next.map(_.map(_.code))) + //} + + val intersection: Set[CfgNode] = if (next.nonEmpty) { + next.reduce((lhs, rhs) => lhs.intersect(rhs)) + } else { + Set() + } + intersection ++ Set(node) + } + + private def calculateDominatorTree(start: CfgNode, + diffGraph: DiffGraph.Builder): Unit = { + val dominationMap: MMap[CfgNode, Set[CfgNode]] = initiateDominatorMap(start) + /*name match { + case "DominatorPass" => + initiateDominatorMap(start) + case "PostDominatorPass" => + initiateDominatorMap( + start.getParentMethod.getOrElse( + throw new RuntimeException("given node has no parent method"))) + }*/ + val preOrder: Seq[CfgNode] = dominationMap.keySet.toList + if (preOrder.toSet != dominationMap.keySet) { + val correspondingMethod = start.getParentMethod.get + val setString = preOrder.toList + .sortBy(_.id()) + .map { elem: CfgNode => + s"${elem.label}/${elem.code}" + } + .mkString(",") + val mapString = dominationMap.keySet.toList + .sortBy(_.id()) + .map(elem => s"${elem.label}/${elem.code}") + .mkString(",") + val msg: String = s"""the order set and the domination map are different: + | $setString + | $mapString""".stripMargin + throw ReportableError(correspondingMethod, start, msg) + } + var changed = false + do { + changed = false + preOrder.foreach { currentNode => + //if(currentNode.label == debugType && name == debugPass) { + // println() + // println(s"${currentNode.code}") + //} + val newDominationSet = + calculateDominationSet(currentNode, dominationMap) + + //if(currentNode.label == debugType && name == debugPass) { + // println(s"old set ${dominationMap(currentNode).map(_.code).mkString(",")}") + // println(s"new set ${newDominationSet.map(_.code).mkString(",")}") + // println() + //} + + if (newDominationSet != dominationMap(currentNode)) { + dominationMap.addOne((currentNode, newDominationSet)) + changed = true + } + } + } while (changed) + dominationMap.foreach { + case (node, dominators) => + dominators.foreach(dom => addDominationEdge(dom, node, diffGraph)) + } + } + + private def addDominationEdge(start: CfgNode, + end: CfgNode, + diffGraph: DiffGraph.Builder): Unit = { + if (start != end) { + name match { + case "DominatorPass" => + //if(start.label == debugType && name == debugPass) { + // println(s"${start.label} - DOMINATE-> ${end.label}") + //} + diffGraph.addEdge(start, end, EdgeTypes.DOMINATE) + case "PostDominatorPass" => + //if (start.label == debugType && name == debugPass) { + // println(s"${start.label} -POST_DOMINATE-> ${end.label}/${end.code}") + //} + diffGraph.addEdge(start, end, EdgeTypes.POST_DOMINATE) + } + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/InheritancePass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/InheritancePass.scala new file mode 100644 index 0000000..15ba0e3 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/InheritancePass.scala @@ -0,0 +1,77 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.util.extensions.NodeExtension.ExtendedCFG +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +class InheritancePass(methods: Seq[nodes.Call], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Call](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "InheritancePass" + override def partIterator: Iterator[nodes.Call] = methods.iterator + + override def runOnPart(part: nodes.Call): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part.getParentMethod.get) { + part.code match { + case "DECLARE_CLASS_DELAYED" => + val List(lhs, rhs) = + part.astChildren.isLiteral.l.sortWith((lhs, rhs) => + lhs.order <= rhs.order) match { + case x if x.length != 2 => + throw ReportableError( + part.getParentMethod.get, + part, + s"only has ${x.length} of the expected 2 children") + case lhs :: rhs :: Nil => + List(lhs, rhs) + } + addInheritanceEdge(lhs, rhs, cpg) + case _ => + throw ReportableError(part.getParentMethod.get, + part, + "given bytecode does not support inheritance") + } + } + Iterator(diffGraph.build()) + } + + def addInheritanceEdge( + childClass: nodes.Literal, + parentClass: nodes.Literal, + cpg: Cpg)(implicit diffGraph: DiffGraph.Builder): Unit = { + val childType = + cpg.typeDecl(childClass.code.replace("\\", "\\\\")).l match { + case Nil => + return + case hit :: Nil => hit + case _ :: _ => + return + } + val parentType = + cpg.typeDecl(parentClass.code.replace("\\", "\\\'")).l match { + case Nil => + return + case hit :: Nil => hit + case _ :: _ => + return + } + diffGraph.addEdge(childType, parentType, EdgeTypes.INHERITS_FROM) + } +} + +object InheritancePass { + + def getInheritanceIndicatingCalls(cpg: Cpg): Seq[nodes.Call] = { + cpg.call.code("DECLARE_CLASS_DELAYED").l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/LocalIdentificationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/LocalIdentificationPass.scala new file mode 100644 index 0000000..e6bf8d6 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/LocalIdentificationPass.scala @@ -0,0 +1,84 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.Traversal + +class LocalIdentificationPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "LocalIdentificationPass" + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part) { + val variables: Traversal[nodes.Identifier] = + (part.astMinusRoot.isCall.name("ASSIGN") + ++ part.astMinusRoot.isCall.name("=")).map(node => + node.astChildren.order(0).head match { + case node: nodes.Identifier => node + case x => + throw ReportableError( + part, + part, + s"unexpected assign element for $part and element $x") + }) + val globalDecl: List[String] = part.astMinusRoot.isCall + .name("BIND_GLOBAL") + .map { node => + node.astChildren.order(0).l match { + case Nil => + throw ReportableError(part, + part, + "DECLARE_CONST does not have first child") + case single :: Nil => + single match { + case string: nodes.Identifier => string.name + } + case _ :: _ => + throw ReportableError(part, + part, + "DECLARE_CONST has multiple first children") + } + } + .toList + if (part.astChildren.nonEmpty) { + var count = part.astChildren.map(_.order).toList.max + variables + .filter { variable => + !globalDecl.contains(variable.name) + } + .foreach { variable => + count = count + 1 + val local = + nodes + .NewLocal() + .code(variable.name) + .name(variable.name) + .order(count) + diffGraph.addNode(local) + diffGraph.addEdge(part, local, EdgeTypes.AST) + } + } + } + Iterator(diffGraph.build()) + } +} + +object LocalIdentificationPass { + + def getRelevantMethodDeclarations(cpg: Cpg): Seq[nodes.Method] = { + cpg.method.l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/MetaDataPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/MetaDataPass.scala new file mode 100644 index 0000000..d99f725 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/MetaDataPass.scala @@ -0,0 +1,28 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language.types.structure.FileTraversal + +class MetaDataPass(cpg: Cpg, keyPool: IntervalKeyPool) + extends ParallelCpgPass[String]( + cpg, + keyPools = Some(keyPool.split(1)) + ) + with Reporting { + + override val name = "MetaDataPass" + + // don't care at the moment + override def partIterator: Iterator[String] = List("").iterator + + override def runOnPart(part: String): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + val metaDataNode = nodes.NewMetaData().language("PHP").version("1.0") + diffGraph.addNode(metaDataNode) + diffGraph.addNode(nodes.NewFile().name(FileTraversal.UNKNOWN).order(0)) + Iterator(diffGraph.build()) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/MethodsCreationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/MethodsCreationPass.scala new file mode 100644 index 0000000..0345168 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/MethodsCreationPass.scala @@ -0,0 +1,289 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.passes.utility.MethodIdentification +import io.joern.bytecode.passes.utility.OperationCreation.addOperation +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{ + DispatchTypes, + EdgeTypes, + nodes +} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} + +import java.io.File + +class MethodsCreationPass( + filesMethodDefinitionPairs: List[Seq[MethodDefinitionPair]], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[Seq[MethodDefinitionPair]]( + cpg, + keyPools = Some(keyPool.split(filesMethodDefinitionPairs.size))) + with Reporting { + + override val name = "MethodsCreationPass" + override def partIterator: Iterator[Seq[MethodDefinitionPair]] = + filesMethodDefinitionPairs.iterator + + override def runOnPart(fileMethodDefinitionPairs: Seq[MethodDefinitionPair]) + : Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + try { + // creating and adding the file node + val fileNode = createFileNode( + MethodIdentification.getAbsolutePath( + fileMethodDefinitionPairs.head.byteCodeBlock.fileName)) + diffGraph.addNode(fileNode) + // creating and adding the namespace nodes + createFileNamespaceBlocks(fileNode, fileMethodDefinitionPairs) + // creating mapping for type declarations + implicit val typeDeclarations + : collection.mutable.Map[String, nodes.NewTypeDecl] = + collection.mutable.Map() + var counter = 0 + for (MethodDefinitionPair(definitionBlock, _) <- fileMethodDefinitionPairs) { + createMethod(definitionBlock, counter, fileNode) + counter = counter + 1 + } + } catch { + case x: ReportableError => + reportError(x) + case x: Throwable => + if (fileMethodDefinitionPairs.nonEmpty) { + reportError(fileMethodDefinitionPairs.head.byteCodeBlock.fileName, + "", + "", + "", + x.getMessage, + x) + } else { + reportWarning("", + "", + "", + "", + "we got an empty set of method definitions") + } + } + Iterator(diffGraph.build()) + } + + def createFileNode(path: String): nodes.NewFile = { + val absolutePath = new File(path).toPath.toAbsolutePath + .normalize() + .toString + nodes.NewFile().name(absolutePath).order(0) + } + + def createFileNamespaceBlocks( + fileNode: nodes.NewFile, + methodDefinitionPairs: Seq[MethodDefinitionPair])( + implicit graph: DiffGraph.Builder): Unit = { + var createdNamespaces = Set[String]() + methodDefinitionPairs.foreach { definitionPair => + val namespaceName = definitionPair.byteCodeBlock.namespace match { + case Some(namespaceName) => namespaceName + case None => Defines.GLOBAL_NAMESPACE_NAME + } + if (!createdNamespaces.contains(namespaceName)) { + createdNamespaces += namespaceName + val namespaceBlock = nodes + .NewNamespaceBlock() + .name(namespaceName) + .fullName(s"${fileNode.name}:$namespaceName") + .filename(fileNode.name) + graph.addNode(namespaceBlock) + graph.addEdge(fileNode, namespaceBlock, EdgeTypes.AST) + } + } + } + + def getTypeDeclaration(definition: ByteCodeDefinitionsBlock)( + implicit diffGraph: DiffGraph.Builder, + typeDeclMap: collection.mutable.Map[String, nodes.NewTypeDecl]) + : nodes.NewTypeDecl = { + assert(definition.classname.nonEmpty) + val fullName = definition.namespace match { + case Some(namespace) => s"$namespace\\${definition.classname.get}" + case None => s"${definition.classname.get}" + } + if (!typeDeclMap.contains(fullName)) { + val typeDecl = nodes + .NewTypeDecl() + .name(definition.classname.get) + .fullName(fullName) + .astParentType("NAMESPACE_BLOCK") + .astParentFullName(definition.namespace match { + case Some(namespace) => s"${definition.fileName}:$namespace" + case None => + s"${definition.fileName}:${Defines.GLOBAL_NAMESPACE_NAME}" + }) + diffGraph.addNode(typeDecl) + typeDeclMap.addOne((fullName, typeDecl)) + } + typeDeclMap(fullName) + } + + def createMethod(methodDefinition: ByteCodeDefinitionsBlock, + order: Integer, + file: nodes.NewFile)( + implicit graph: DiffGraph.Builder, + typeDecl: collection.mutable.Map[String, nodes.NewTypeDecl]): Unit = { + val code = methodDefinition.name + + Range + .inclusive(1, methodDefinition.args) + .map(c => "$" + s"param$c") + .mkString("(", ", ", ")") + // creating and adding method as well as possible type decl node and connection + val fileName: String = + MethodIdentification.getAbsolutePath(methodDefinition.fileName) + val start: Integer = methodDefinition.lineStart + val end: Integer = methodDefinition.lineEnd + /*methodDefinition.instructions.head.fileLine match { + case Some(value) => value + case None => + throw ReportableError( + methodDefinition.fileName, + methodDefinition.lineStart, + methodDefinition.name, + methodDefinition.instructions.head.instruction.toString, + "there should be a file line" + ) + } + val end: Integer = methodDefinition.instructions.last.fileLine match { + case Some(value) => value + case None => + throw ReportableError( + methodDefinition.fileName, + methodDefinition.lineStart, + methodDefinition.name, + methodDefinition.instructions.last.instruction.toString, + "there should be a file line" + ) + }*/ + val method = methodDefinition.classname match { + case Some(_) => + val parentType = getTypeDeclaration(methodDefinition) + val method = nodes + .NewMethod() + .name(methodDefinition.name) + .code(code) + .fullName(methodDefinition.fullyQualifiedName) + .order(order) + .astParentType("TYPE_DECL") + .astParentFullName(parentType.fullName) + .filename(fileName) + .lineNumber(start) + .lineNumberEnd(end) + graph.addNode(method) + graph.addEdge(parentType, method, EdgeTypes.AST) + method + case None => + methodDefinition.namespace match { + case Some(namespace) => + val method = nodes + .NewMethod() + .name(methodDefinition.name) + .code(code) + .fullName(methodDefinition.fullyQualifiedName) + .order(order) + .astParentType("NAMESPACE_BLOCK") + .astParentFullName(s"${file.name}:$namespace") + .filename(fileName) + .lineNumber(start) + .lineNumberEnd(end) + graph.addNode(method) + method + case None => + val method = nodes + .NewMethod() + .name(methodDefinition.name) + .code(code) + .fullName(methodDefinition.fullyQualifiedName) + .order(order) + .astParentType("NAMESPACE_BLOCK") + .astParentFullName( + s"${file.name}:${Defines.GLOBAL_NAMESPACE_NAME}") + .filename(fileName) + .lineNumber(start) + .lineNumberEnd(end) + graph.addNode(method) + method + } + } + // creating parameter nodes + var orderCounter = 0 + for (i <- Range.inclusive(1, methodDefinition.args)) { + val parameterNode = + nodes.NewMethodParameterIn().name(s"$i").order(orderCounter) + graph.addNode(parameterNode) + graph.addEdge(method, parameterNode, EdgeTypes.AST) + orderCounter = orderCounter + 1 + } + // creating the method block node + val blockNode = nodes.NewBlock().code("METHOD BLOCK").order(orderCounter) + orderCounter = orderCounter + 1 + graph.addNode(blockNode) + graph.addEdge(method, blockNode, EdgeTypes.AST) + // adding the return node + val returnNode = nodes.NewMethodReturn().order(orderCounter) + graph.addNode(returnNode) + graph.addEdge(method, returnNode, EdgeTypes.AST) + // adding the instructions + //var blockOrderCounter = 0 + for (instruction <- methodDefinition.instructions) { + addMethodInstructionLine(instruction, blockNode, methodDefinition) + //blockOrderCounter = blockOrderCounter + 1 + } + } + + def addMethodInstructionLine( + line: InstructionLine, + block: nodes.NewBlock, + byteCodeDefinitionBlock: ByteCodeDefinitionsBlock)( + implicit graph: DiffGraph.Builder): Unit = { + + line.instruction match { + case instruction: Assignment => + val lhs = nodes + .NewIdentifier() + .name(instruction.lhs.name) + .order(0) + .code(s"${instruction.lhs.name}") + val rhs = addOperation( + instruction.rhs, + order = 1, + line.fileLine.getOrElse(-1)) + val assignment = nodes + .NewCall() + .code(s"${lhs.code} = ${rhs.code}") + .lineNumber(line.fileLine) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .name("=") + .order(line.opNumber.get) + graph.addNode(assignment) + graph.addNode(lhs) + graph.addEdge(block, assignment, EdgeTypes.AST) + graph.addEdge(assignment, lhs, EdgeTypes.AST) + graph.addEdge(assignment, rhs, EdgeTypes.AST) + graph.addEdge(assignment, lhs, EdgeTypes.ARGUMENT) + graph.addEdge(assignment, rhs, EdgeTypes.ARGUMENT) + case instruction: Operation => + val call = + addOperation(instruction.op, + line.opNumber.get, + line.fileLine.getOrElse(-1)) + graph.addEdge(block, call, EdgeTypes.AST) + case _ => + throw ReportableError( + byteCodeDefinitionBlock.fileName, + line.fileLine.getOrElse(-1).toString.toInt, + byteCodeDefinitionBlock.name, + line.instruction.toString, + "the instruction is not yet supported" + ) + } + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/NamespaceMemberCreationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/NamespaceMemberCreationPass.scala new file mode 100644 index 0000000..6a2e55a --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/NamespaceMemberCreationPass.scala @@ -0,0 +1,96 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.util.extensions.NodeExtension._ +import io.joern.reporting.Reporting +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.jdk.CollectionConverters._ + +class NamespaceMemberCreationPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "NamespaceMemberCreationPass" + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: nodes.Method): Iterator[DiffGraph] = { + val (file, _) = part.getLocation + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + withErrorReporting(part) { + travelAddAndDelete(part, file) + } + Iterator(diffGraph.build()) + } + + def travelAddAndDelete(cfgNode: nodes.CfgNode, file: String)( + implicit diffGraph: DiffGraph.Builder, + visited: scala.collection.mutable.Set[nodes.CfgNode] = + scala.collection.mutable.Set[nodes.CfgNode]()): Unit = { + if (!visited.contains(cfgNode) && !cfgNode.isInstanceOf[nodes.Return]) { + visited.add(cfgNode) + if (cfgNode.code.length >= "DECLARE_CONST".length && + cfgNode.code.substring(0, "DECLARE_CONST".length) == "DECLARE_CONST") { + cfgNode.astChildren.order(0).toList.head match { + case name: nodes.Literal => + assert(name.typeFullName == "String") + val varNameSplit: Array[String] = name.code.split("\\\\") + val varName = varNameSplit.last.toLowerCase + val nameSpaceName = + varNameSplit.reverse.tail.reverse + .mkString("\\") + .toLowerCase match { + case "" => Defines.GLOBAL_NAMESPACE_NAME + case x => x + } + cpg.namespaceBlock + .nameExact(nameSpaceName.replace("\\", "\\\\")) + .filename(file) + .toList match { + case Nil => + reportWarning( + cfgNode.getParentMethod.get, + cfgNode, + s"there is no namespace named '$nameSpaceName' requested by ${cfgNode.code} with '{${varNameSplit + .mkString(",")}}' in file $file" + ) + + case single :: Nil => + val member = + nodes.NewMember().code(cfgNode.code).name(varName) + diffGraph.addNode(member) + diffGraph.addEdge(single, member, EdgeTypes.AST) + case _ => + reportWarning( + cfgNode.getParentMethod.get, + cfgNode, + s"there are multiple namespaces named '$nameSpaceName' requested by ${cfgNode.code} with '{${varNameSplit + .mkString(",")}}' in file $file" + ) + } + case _ => throw new RuntimeException() + } + } + cfgNode._cfgOut.asScala.toList.foreach { node => + travelAddAndDelete(node.asInstanceOf[nodes.CfgNode], file) + } + } + } + +} + +object NamespaceMemberCreationPass { + + def getNamespaceMemberRelevantFunctions(cpg: Cpg): Seq[nodes.Method] = { + cpg.method("dlr_main").l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/OpDataPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/OpDataPass.scala new file mode 100644 index 0000000..4cf2991 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/OpDataPass.scala @@ -0,0 +1,58 @@ +package io.joern.bytecode.passes + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.jdk.CollectionConverters._ + +class OpDataPass(methods: Seq[nodes.Method], cpg: Cpg, keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg: Cpg, + keyPools = + Some(keyPool.split(methods.size))) { + + val knownOpDataUser = + List("ASSIGN_OBJ", + "ASSIGN_OBJ_OP", + "ASSIGN_OBJ_REF", + "ASSIGN_DIM", + "ASSIGN_DIM_OP") + + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + val opDataNodes = part.astMinusRoot.isCall.nameExact("OP_DATA") + opDataNodes.foreach { opDataNode => + handleOpDataCall(opDataNode) + } + Iterator(diffGraph.build()) + } + + def getCorrespondingPreviousOperation( + call: nodes.CfgNode): Option[nodes.CfgNode] = { + if (call.isCall && knownOpDataUser.contains( + call.asInstanceOf[nodes.Call].name)) { + Some(call) + } else { + val previous = call._cfgIn.asScala.toList.asInstanceOf[nodes.CfgNode] + previous.foldLeft(None: Option[nodes.CfgNode]) { + case (Some(prev), _) => Some(prev) + case (None, current) => getCorrespondingPreviousOperation(current) + } + } + } + + def handleOpDataCall(opData: nodes.Call)( + implicit graph: DiffGraph.Builder): Unit = { + assert(opData.astChildren.order(0).l.length == 1) + val argument = opData.astChildren.order(0).head + val correspondingOpCall = getCorrespondingPreviousOperation(opData) + .getOrElse( + throw new RuntimeException( + s"Did not find previous operation for OP_DATA ${opData.toString}")) + graph.addEdge(correspondingOpCall, argument, EdgeTypes.ARGUMENT) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/ParamInterpretPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/ParamInterpretPass.scala new file mode 100644 index 0000000..12c5ff3 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/ParamInterpretPass.scala @@ -0,0 +1,31 @@ +package io.joern.bytecode.passes + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.{CpgPass, DiffGraph} +import io.shiftleft.semanticcpg.language._ + +class ParamInterpretPass(cpg: Cpg) extends CpgPass(cpg) { + override def run(): Iterator[DiffGraph] = { + val diffGraph = DiffGraph.newBuilder + cpg.method.l.foreach { method => + val params = method.parameter.sortBy(_.order) + val recvCalls = method.call.name("RECV(_INIT)?").l + + params.filter(_.name.forall(Character.isDigit)).foreach { param => + val index = param.name.toInt - 1 + val name = recvCalls + .lift(index) + .toList + .flatten + .inCall + .argument + .order(0) + .code + .headOption + + diffGraph.addNodeProperty(param, "NAME", name.getOrElse("")) + } + } + Iterator(diffGraph.build()) + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/TypeCreationPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/TypeCreationPass.scala new file mode 100644 index 0000000..61ab9f1 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/TypeCreationPass.scala @@ -0,0 +1,63 @@ +package io.joern.bytecode.passes + +import io.joern.reporting.{AbortPass, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{CpgPass, DiffGraph} +import io.shiftleft.semanticcpg.language._ + +class TypeCreationPass(typeIndicatingNodes: Seq[nodes.StoredNode], cpg: Cpg) + extends CpgPass(cpg) + with Reporting { + + override val name = "TypeCreationPass" + + override def run(): Iterator[DiffGraph] = { + val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + try { + val knownTypes = collection.mutable.ListBuffer[nodes.NewType]() + typeIndicatingNodes.foreach { + case part: nodes.TypeDecl => + knownTypes.find(typ => typ.fullName == part.fullName) match { + case Some(x) => x + case None => + val typeNode = nodes + .NewType() + .name(part.name) + .fullName(part.fullName) + knownTypes.append(typeNode) + diffGraph.addNode(typeNode) + diffGraph.addEdge(typeNode, part, EdgeTypes.REF) + } + case node: nodes.Literal => + node.typeFullName match { + case "Integer" | "Float" | "String" | "Boolean" => + knownTypes.find(typ => typ.fullName == node.typeFullName) match { + case Some(x) => x + case None => + val typeNode = nodes + .NewType() + .name(node.typeFullName) + .fullName(node.typeFullName) + knownTypes.append(typeNode) + diffGraph.addNode(typeNode) + } + case _ => + } + } + } catch { + case AbortPass => + case x: Throwable => + reportError("unknown", "", "", "", x.getMessage, x) + } + Iterator(diffGraph.build()) + } +} + +object TypeCreationPass { + + def getTypeIndicatingNodes(cpg: Cpg): List[nodes.StoredNode] = { + (cpg.typeDecl ++ cpg.literal).l + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/AddArrayElementPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/AddArrayElementPass.scala new file mode 100644 index 0000000..48e877c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/AddArrayElementPass.scala @@ -0,0 +1,107 @@ +package io.joern.bytecode.passes.dataDependencyPasses + +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Method} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.collection.mutable.{ListBuffer, Map => MMap} + +class AddArrayElementPass(methods: Seq[Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[Method]( + cpg: Cpg, + keyPools = Some(keyPool.split(methods.size))) + with Reporting { + + override val name = "AddArrayElementPass" + + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + try { + generateTmpVarMap( + part.ast.filter(_.isInstanceOf[Call]).toList.map(_.asInstanceOf[Call])) + .foreach { + case (variable, nodes) => + assert( + nodes.count(_.name == "INIT_ARRAY") == 1, + s"there must only be a single INIT_ARRAY for $variable but there are ${nodes + .count(_.code == "INIT_ARRAY")}") + nodes.sortBy(_.astParent.order).sliding(2, 1).foreach { + case _ :: Nil => // if we only have INIT_ARRAY nothing needs to be done + case from :: to :: Nil => + diffGraph.addEdge(from, + to, + EdgeTypes.REACHING_DEF, + List(("VARIABLE", variable))) + } + } + } catch { + case err: ReportableError => reportError(err) + case thr: Throwable => + reportError(part.filename, + "NA", + part.fullName, + "NA", + thr.toString + thr.getStackTrace.mkString("\n"), + thr) + } + Iterator(diffGraph.build()) + } + + private def generateTmpVarMap(nodes: List[Call]): Map[String, List[Call]] = { + val tmpVarMap: MMap[String, ListBuffer[Call]] = MMap() + nodes + .filter(_.name == "INIT_ARRAY") + .foreach { // iterate over all init array calls + initArray: Call => + // get the parent node which has to be an = assign + val assignment = initArray.astParent.asInstanceOf[Call] + // ensure that this is the case + assert(assignment.name == "=", + "the parent of INIT_ARRAY has to be an = assign") + // get the name of the variable it is assigned to + val tmpVariable = assignment.astChildren + .sortBy(_.order) + .head + .asInstanceOf[Identifier] + .name + // ensure that our assumption of it being a tmp variable holds + assert( + tmpVariable.substring(0, 1) == "T", + s"the assigned to variable should be a temporary internal one but is $name") + assert(!tmpVarMap.contains(tmpVariable), + "the variable must not have been used previously") + tmpVarMap.addOne(tmpVariable -> ListBuffer(initArray)) + } + nodes + .filter(_.name == "ADD_ARRAY_ELEMENT") + .foreach { // iterate over all add array element calls + addArrayElement: Call => + // get the parent node which has to be an = assign + val assignment = addArrayElement.astParent.asInstanceOf[Call] + // ensure that this is the case + assert(assignment.name == "=", + "the parent of INIT_ARRAY has to be an = assign") + // get the name of the variable it is assigned to + val tmpVariable = assignment.astChildren + .sortBy(_.order) + .head + .asInstanceOf[Identifier] + .name + // ensure that our assumption of it being a tmp variable holds + assert(tmpVariable.substring(0, 1) == "T", + "the assigned to variable should be a temporary internal one") + assert(tmpVarMap.contains(tmpVariable), + "the variable must already exist") + tmpVarMap(tmpVariable).addOne(addArrayElement) + } + tmpVarMap.map(pair => pair._1 -> pair._2.toList).toMap + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/DataDependencyPass.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/DataDependencyPass.scala new file mode 100644 index 0000000..db3dae5 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/dataDependencyPasses/DataDependencyPass.scala @@ -0,0 +1,462 @@ +package io.joern.bytecode.passes.dataDependencyPasses + +import io.joern.bytecode.passes.dataDependencyPasses.WriteOps.{ + IdentifierWrite, + requiredParamsSatisfied +} +import io.joern.bytecode.util.extensions.NodeExtension._ +import io.joern.bytecode.util.implicits.OneableSeq +import io.joern.reporting.{ReportableError, Reporting} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import io.shiftleft.semanticcpg.language._ + +import scala.collection.immutable.{Map, Set} +import scala.jdk.CollectionConverters.IteratorHasAsScala + +// this could be outsourced into a file to make this pass generic and dependent on a definition file +// that can be adjusted for each CPG +object WriteOps { + + /** + * + * @param operation the name of the bytecode operation + * @param position position of the identifier + * @param reading + * @param required_parameters only use this definition, if the required parameters (pos: Int, value: String) match + */ + case class IdentifierWrite(operation: String, + position: Option[Int], + reading: Boolean, + required_parameters: Option[Map[Int, String]] = + None) + + val writes: Set[IdentifierWrite] = Set( + IdentifierWrite("ASSIGN", Some(0), reading = false), + IdentifierWrite("ASSIGN_OP", Some(1), reading = true), + IdentifierWrite("BIND_GLOBAL", Some(0), reading = false), + IdentifierWrite("FE_FETCH_R", Some(1), reading = false), + IdentifierWrite("FE_FREE", Some(0), reading = true), + IdentifierWrite("FREE", Some(0), reading = true), + IdentifierWrite("=", Some(0), reading = false), + IdentifierWrite("ASSIGN_DIM", Some(0), reading = true), + IdentifierWrite("ASSIGN_REF_2", Some(0), reading = false), + IdentifierWrite("ASSIGN_REF_3", Some(1), reading = false), + IdentifierWrite("ASSIGN_DIM_OP", Some(1), reading = true), + IdentifierWrite("PRE_INC", Some(0), reading = true), + IdentifierWrite("PRE_DEC", Some(0), reading = true), + IdentifierWrite("POST_INC", Some(0), reading = true), + IdentifierWrite("POST_DEC", Some(0), reading = true), + IdentifierWrite("ASSIGN_OBJ", + Some(1), + reading = false, + required_parameters = Some(Map(0 -> "THIS"))) + //IdentifierWrite("UNSET_VAR",Some(1), reading = false) + ) + + /** + * Does the provided call satisfy the required parameters? If the parameters are None it returns true. + * + * @param call : the call in question + * @param required_parameters see above + */ + def requiredParamsSatisfied( + call: nodes.Call, + required_parameters: Option[Map[Int, String]]): Boolean = { + if (call.name == "ASSIGN_OBJ" && call.astChildren + .order(1) + .isLiteral + .isEmpty) return false + required_parameters match { + case None => true + case Some(value) => + value.forall(x => + call.argument.filter(_.order == x._1).isLiteral.headOption match { + case Some(value) => value.code == x._2 + case None => false + }) + } + } +} + +class DataDependencyPass(methods: Seq[nodes.Method], + cpg: Cpg, + keyPool: IntervalKeyPool) + extends ParallelCpgPass[nodes.Method](cpg: Cpg, + keyPools = + Some(keyPool.split(methods.size))) + with Reporting { + + val PREFIX_IDENTIFIER: String = "IDENTIFIER-" + val PREFIX_THISLITERAL: String = "THISLITERAL-" + case class PseudoIdentifier(name: String, position: Int) { + def rawName: String = name match { + case x if x.startsWith(PREFIX_IDENTIFIER) => + x.splitAt(PREFIX_IDENTIFIER.length)._2 + case x if x.startsWith(PREFIX_THISLITERAL) => + x.splitAt(PREFIX_THISLITERAL.length)._2 + } + } + type DDGCalcSet = Set[(PseudoIdentifier, Long)] + + override val name = "DataDependencyPass" + + override def partIterator: Iterator[Method] = methods.iterator + + override def runOnPart(part: Method): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + try { + + // here we get all the cfg nodes in depth first spanning tree order which is supposed to optimize runtime + val cfgNodes = generateDepthFirstSpanningTreeOrder(part) + // here we generate the initial out set and the final gen, and kill sets for each cfg node + val (gen, out, kill) = genOutGenKillPreparation(cfgNodes) + // here we calculate the final in and out set for each cfg node + val in = inOutSetCalculation(cfgNodes, out, gen, kill) + //prettyPrintMap("in", in) + // here we create the reaching definition edges based on the in set finalized before + createReachingEdge(in) + //handleOpDataCalls(cfgNodes) + } catch { + case err: ReportableError => reportError(err) + case thr: Throwable => + reportError(part.filename, + "NA", + part.fullName, + "NA", + thr.toString + thr.getStackTrace.mkString("\n"), + thr) + } + Iterator(diffGraph.build()) + } + + /** prepare the sets for out gen and kill for each cfg node + * + * This is done by checking whether the given cfg node is a known writing call and if so + * gen and kill are set to Set((Indentifier, call)) + * + * @param cfgNodes - all the cfg nodes of a given method + */ + private def genOutGenKillPreparation( + cfgNodes: List[nodes.CfgNode]): (Map[nodes.CfgNode, DDGCalcSet], + Map[nodes.CfgNode, DDGCalcSet], + Map[nodes.CfgNode, DDGCalcSet]) = { + var out: Map[nodes.CfgNode, DDGCalcSet] = Map() + var gen: Map[nodes.CfgNode, DDGCalcSet] = Map() + var kill: Map[nodes.CfgNode, DDGCalcSet] = Map() + cfgNodes.foreach { + case call: nodes.Call => + WriteOps.writes.find(_.operation == call.name) match { + case Some(IdentifierWrite(_, position, _, required_parameters)) + if requiredParamsSatisfied(call, required_parameters) => + try { + val pos: Int = position match { + case Some(value) => value + case None => + val parentMethod = call.getParentMethod.get + throw ReportableError( + parentMethod.filename, + call.lineNumber.getOrElse(Integer.getInteger("-1")).toInt, + parentMethod.name, + call.code, + "no expected position of identifier write" + ) + } + val identifier = call.name match { + case "ASSIGN_OBJ" => + assert( + call.astChildren + .order(0) + .isLiteral + .exists(_.code == "THIS")) + val tmp_literal = call.astChildren.order(1).isLiteral.l.head + PseudoIdentifier(PREFIX_THISLITERAL + tmp_literal.code, 1) + case _ => + val tmp_identifier = + call.astChildren + .order(pos) + .head + .asInstanceOf[nodes.Identifier] + + PseudoIdentifier(PREFIX_IDENTIFIER + tmp_identifier.name, + tmp_identifier.order) + } + + val newElement: DDGCalcSet = + Set((identifier, call.id())) + gen = gen + (call -> (gen.getOrElse( + call, + Set[(PseudoIdentifier, Long)]()) ++ newElement)) + kill = kill + (call -> (kill.getOrElse(call, Set()) ++ newElement)) + out = out + (call -> (out.getOrElse(call, Set()) ++ newElement)) + } catch { + case x: Throwable => + val parentMethod = call.getParentMethod.get + throw ReportableError( + parentMethod.filename, + call.lineNumber.getOrElse(Integer.getInteger("-1")).toInt, + parentMethod.name, + call.code, + x.getMessage) + } + case _ => + gen = gen + (call -> Set()) + kill = kill + (call -> Set()) + out = out + (call -> Set()) + } + case node: nodes.CfgNode => + gen = gen + (node -> Set()) + kill = kill + (node -> Set()) + out = out + (node -> Set()) + } + (out, gen, kill) + } + + /** + * + * Algorithm based on Compiler Bau II by Alfred V. Aho, Ravi Sethis, and Jeffrey D. Ullmann + * Chapter 10.6 Algorithm 10.2 + * + * @param cfgNodes - all the cfg nodes of a given method + * @param out - the out set for each cfgNode to be generated/worked on + * @param gen - the gen set for each cfgNode to be worked with + * @param kill - the kill set for each cfgNode to be worked with + */ + private def inOutSetCalculation( + cfgNodes: List[nodes.CfgNode], + out: Map[nodes.CfgNode, DDGCalcSet], + gen: Map[nodes.CfgNode, DDGCalcSet], + kill: Map[nodes.CfgNode, DDGCalcSet]): Map[nodes.CfgNode, DDGCalcSet] = { + var counter = cfgNodes.length + var currentIn: Map[nodes.CfgNode, DDGCalcSet] = + Map() + var currentOut = out + var change: Boolean = true + while (change) { + if (counter == 0) { + val parentMethod = cfgNodes.head.getParentMethod.get + throw ReportableError( + parentMethod.filename, + cfgNodes.head.lineNumber.getOrElse(Integer.getInteger("-1")).toInt, + parentMethod.fullName, + "NA", + "too many iterations to generate in-out-set" + ) + } else { + counter -= 1 + } + change = false + cfgNodes.foreach { call => + val toUnioninSets = call.cfgPrev + .map(incoming => currentOut(incoming)) + val mInUnionSet = + new collection.mutable.HashSet[(PseudoIdentifier, Long)]() + toUnioninSets.foreach(mInUnionSet ++= _) + currentIn = currentIn + (call -> mInUnionSet.toSet) + //prettyPrintSet("new incoming", call, currentIn) + val oldOut = currentOut(call) + //prettyPrintSet("old outgoing", call, currentOut) + val killedVariableList = kill(call).toList + assert(killedVariableList.length <= 1) + var newOut = Set[(PseudoIdentifier, Long)]() + // if there is no variable killed + if (killedVariableList.isEmpty) { + // then all incoming variables plus the generated variables are outgoing + newOut = gen(call) union currentIn(call) + } else { + // if stuff is killed then we have to filter the incoming set for those variables before + // creating the union with the generated variables + newOut = gen(call) union currentIn(call).filter( + _._1.name != killedVariableList.head._1.name) + } + if ((newOut &~ oldOut).nonEmpty) { + change = true + currentOut = currentOut + (call -> newOut) + } + // after updating the set for the call is either the same or larger + assert(currentOut(call).size >= oldOut.size) + // the size has to be the size of the newOut as it is either the same or has been larger + assert(currentOut(call).size == newOut.size) + } + } + currentIn + } + + def addReachingEdge( + from: nodes.CfgNode, + to: nodes.CfgNode, + identifier: String)(implicit diffGraph: DiffGraph.Builder): Unit = { + diffGraph.addEdge(from, + to, + EdgeTypes.REACHING_DEF, + List(("VARIABLE", identifier))) + } + + def getUsedIdentifiers(call: nodes.Call): Set[PseudoIdentifier] = { + call.name match { + // if we have an = it means on the right had side is a call and thus only the lhs has a valid identifier child + case "=" => + assert( + call.astChildren.order(1).next().isInstanceOf[nodes.Call], + s"the rhs of a = should be a call but we encountered ${call.code}") + val tmp_identifier = + call.astChildren.order(0).next().asInstanceOf[nodes.Identifier] + Set( + PseudoIdentifier(PREFIX_IDENTIFIER + tmp_identifier.name, + tmp_identifier.order)) + case "ASSIGN_OBJ" + if call.astChildren + .order(0) + .isLiteral + .exists(_.code == "THIS") && call.astChildren + .order(1) + .isLiteral + .nonEmpty => + Set(PseudoIdentifier( + PREFIX_THISLITERAL + call.astChildren.order(1).isLiteral.l.head.code, + 1)) + case "FETCH_OBJ_R" + if call.astChildren + .order(0) + .isLiteral + .exists(_.code == "THIS") && call.astChildren + .order(1) + .isLiteral + .nonEmpty => + Set(PseudoIdentifier( + PREFIX_THISLITERAL + call.astChildren.order(1).isLiteral.l.head.code, + 1)) + case _ => + call.astMinusRoot.isIdentifier + .map(x => PseudoIdentifier(PREFIX_IDENTIFIER + x.name, x.order)) + .toSet + .toSet + } + } + + /** Based on a calculated in set create corresponding reaching edges + * + * @param in - a map giving you the incoming identifier,definingCall pairs for a given call + * @param diffGraph - the diffGraph to generate which is later merged into a preexisting cpg + */ + def createReachingEdge(in: Map[nodes.CfgNode, DDGCalcSet])( + implicit diffGraph: DiffGraph.Builder): Unit = { + in.filter(_._1.isInstanceOf[nodes.Call]).foreach { + case (callNode, set) => + // this are the identifiers used in the current expression + val usedIdentifiers = + getUsedIdentifiers(callNode.asInstanceOf[nodes.Call]) + // this are the identifier used in the current expression intersected with the incoming identifiers + val inIntersection = + usedIdentifiers.filter(ident => + set.exists(elem => elem._1.name == ident.name)) + // this is the write op the current callNode corresponds with + val writeOp = WriteOps.writes.find( + _.operation == callNode.asInstanceOf[nodes.Call].name) + // now we go over the intersecting identifier + inIntersection.foreach { identifier => + // and based on whether we have a write op we create reaching edges + writeOp match { + // if it is a write op call we have to check that it is either a reading write op or the identifier is not at the writing position + case Some(IdentifierWrite(_, position, reading, _)) => + if (reading || identifier.position != position.get) { + val definitions = + set.filter(elem => elem._1.name == identifier.name) + definitions.foreach { + case (identifier, definedAt) => + addReachingEdge( + cpg.all.id(definedAt).collectAll[nodes.Call].l.one, + callNode, + identifier.rawName) + } + } else {} + // if it is not a write op we can simply add the edge + case None => + val definitions = + set.filter(elem => elem._1.name == identifier.name) + definitions.foreach { + case (identifier, definedAt) => + addReachingEdge( + cpg.all.id(definedAt).collectAll[nodes.Call].l.one, + callNode, + identifier.rawName) + } + } + + } + } + } + + /** returns all cfg nodes of a method in depth first spanning tree order + * + * algorithm based on Compiler Bau II by Alfred V. Aho, Ravi Sethis, and Jeffrey D. Ullmann + * Chapter 10.9, Algorithm 10.14 + * + * @param start the start of the cfg i.e., the method itself + * @return returns the list of all contained cfg nodes in depth first spanning tree order + */ + def generateDepthFirstSpanningTreeOrder( + start: Method): List[nodes.CfgNode] = { + val order: collection.mutable.ListBuffer[(Int, nodes.CfgNode)] = + collection.mutable.ListBuffer() + val cfgNodeCount = start.ast.isCfgNode.l.length + def populateOrder(current: nodes.CfgNode, i: Int, recursionDepth: Int = 0)( + implicit visited: collection.mutable.Set[nodes.CfgNode]): Int = { + assert( + recursionDepth <= cfgNodeCount, + s"the recursion depth exceeded the maximum amount of cfg nodes of $cfgNodeCount") + var currentI = i + current._cfgOut.asScala.foreach { next => + if (!visited.contains(next.asInstanceOf[nodes.CfgNode])) { + visited.addOne(next.asInstanceOf[nodes.CfgNode]) + currentI = populateOrder(next.asInstanceOf[nodes.CfgNode], + currentI, + recursionDepth + 1) + } + } + order.append((currentI, current)) + currentI - 1 + } + populateOrder(start, cfgNodeCount)(collection.mutable.Set()) + augmentDFSTOrderList(start, order.sortBy(_._1).map(_._2).toList) + } + + def augmentDFSTOrderList(start: Method, + dfst: List[nodes.CfgNode]): List[nodes.CfgNode] = { + val dfstSet = dfst.toSet + dfst ++ start.ast.isCfgNode + .map { node => + if (!dfstSet.contains(node)) { + Some(node) + } else { + None + } + } + .filter(_.isDefined) + .map(_.get) + } + + /*def getCorrespondingCall(identifier: Identifier): nodes.Call = { + assert(identifier._astIn.asScala.toList.length == 1, + s"$identifier has not exactly one AST parent") + identifier._astIn.next() match { + case parent: nodes.Call => + assert(parent._astIn.asScala.toList.length == 1, + s"$parent has not exactly one AST parent") + parent + case weird => + val parentMethod = identifier.getParentMethod.get + val lineNumber: Int = + identifier.lineNumber.getOrElse(Integer.getInteger("-1")).toInt + throw ReportableError( + parentMethod.filename, + lineNumber, + parentMethod.fullName, + identifier.code, + s"$identifier has a non-call parent $weird" + ) + } + }*/ +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/AstTraversal.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/AstTraversal.scala new file mode 100644 index 0000000..dd51300 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/AstTraversal.scala @@ -0,0 +1,20 @@ +package io.joern.bytecode.passes.utility + +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.codepropertygraph.generated.nodes.AstNode +import io.shiftleft.semanticcpg.language._ + +import scala.annotation.tailrec + +object AstTraversal { + + @tailrec + def getParentMethod(node: AstNode): nodes.Method = { + node match { + case x: nodes.Method => + x + case x => getParentMethod(x.astParent.next()) + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodDetectionAndAssociation.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodDetectionAndAssociation.scala new file mode 100644 index 0000000..890f8b5 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodDetectionAndAssociation.scala @@ -0,0 +1,181 @@ +package io.joern.bytecode.passes.utility + +import io.joern.bytecode.Defines +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Method} +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.Traversal + +import java.io.{ByteArrayOutputStream, PrintWriter} +import scala.sys.process.{ProcessLogger, _} + +object MethodDetectionAndAssociation { + + val KNOWN_FUNCTION_STARTS: Set[String] = Set("INIT_FCALL", + "INIT_METHOD_CALL", + "INIT_DYNAMIC_CALL", + "INIT_FCALL_BY_NAME", + "INIT_NS_FCALL_BY_NAME", + "INIT_STATIC_METHOD_CALL", + "NEW", + "INIT_USER_CALL") + + val KNOWN_FUNCTION_ENDS: Set[String] = Set( + "DO_FCALL_BY_NAME", + "DO_ICALL", + "DO_UCALL", + "DO_FCALL" + ) + val KNOWN_SEND_VALS: Set[String] = Set( + "SEND_VAR_NO_REF_EX", + "SEND_VAR_NO_REF", + "SEND_VAR_EX", + "SEND_VAL_EX", + "SEND_FUNC_ARG", + "SEND_VAL", + "SEND_VAR", + "SEND_REF", + "SEND_ARRAY", + "SEND_USER" + ) + + var phpInternalFunctions: Option[Set[String]] = None + + def getPhpInternalFunctions(interpreter: String): Set[String] = { + phpInternalFunctions match { + case Some(x) => x + case None => + val stderrStream = new ByteArrayOutputStream() + val stdoutStream = new ByteArrayOutputStream() + val stdoutWriter = new PrintWriter(stdoutStream) + val stderrWriter = new PrintWriter(stderrStream) + val command = + s"""$interpreter -r "echo implode(',',get_defined_functions()['internal']);"""" + command.!(ProcessLogger(stdoutWriter.println, stderrWriter.println)) + stderrWriter.close() + stdoutWriter.close() + phpInternalFunctions = Some(stdoutStream.toString.split(",").toSet) + phpInternalFunctions.get + } + } + + def getSendOrderLiteral(node: nodes.Call): Int = { + val pos = node.name match { + case "SEND_ARRAY" => 0 + case x if KNOWN_SEND_VALS.contains(x) => 1 + } + node.astChildren.order(pos).head.asInstanceOf[nodes.Literal].code.toInt + } + + def getSendValuePos(node: nodes.Call): Int = { + val pos = node.name match { + case "SEND_ARRAY" => 1 + case x if KNOWN_SEND_VALS.contains(x) => 0 + } + pos + } + + private def resolveThisAndSelf(call: Call): Option[String] = { + val split = AstTraversal + .getParentMethod(call) + .fullName + .split("::") + if (split.length == 2) { + Some(split.head) + } else { + None + } + } + + def handleDynamicCall(call: Call): String = { + "DYNAMIC" + } + + def handleUserCall(call: Call): String = { + "DYNAMIC" + } + + def handleCallByName(call: Call): String = { + call.astChildren.order(1).head.asInstanceOf[nodes.Literal].code + } + + def handleStaticMethodCall(call: Call): String = { + val callSize = call.astChildren.l.length + val tclass = call.astChildren.order(1).next() match { + case x: nodes.Literal if x.code == "self" => resolveThisAndSelf(call) + case x: nodes.Literal => Some(x.code) + case _ => None + } + val target = + call.astChildren.order(if (callSize == 3) 2 else 3).next() match { + case x: nodes.Literal => Some(x.code) + case _ => None + } + (tclass, target) match { + case (Some(className), Some(methodName)) => + s"$className::$methodName" + case (_, Some(methodName)) => s".*::$methodName" + case _ => Defines.DYNAMIC_FUNCTION + } + } + + def handleFcall(call: Call): String = { + call.astChildren.order(2).head.asInstanceOf[nodes.Literal].code + } + + def handleMethodCall(call: Call): String = { + (call.astChildren.order(1).head, call.astChildren.order(2).head) match { + case (target: nodes.Identifier, node: nodes.Literal) => + target.code match { + case "THIS" => + val parentClass = resolveThisAndSelf(call) + parentClass match { + case Some(value) => + if (value.contains("{closure}")) { + s".*::${node.code}" + } else { + s"$value::${node.code}" + } + case None => s".*::${node.code}" + } + + case _ => s".*::${node.code}" + } + case (_, _) => Defines.DYNAMIC_FUNCTION + } + } + + def handleNew(call: Call): String = { + call.astChildren.order(1).head match { + case name: nodes.Literal => s"${name.code}::__construct" + case _: nodes.Identifier => s"UNKNOWN::__construct" + } + } + + def getCalledMethod(initCall: Call): String = { + val ret: String = initCall.name match { + case "INIT_DYNAMIC_CALL" => handleDynamicCall(initCall) + case "INIT_USER_CALL" => handleUserCall(initCall) + case "INIT_FCALL_BY_NAME" | "INIT_NS_FCALL_BY_NAME" => + handleCallByName(initCall) + case "INIT_STATIC_METHOD_CALL" => handleStaticMethodCall(initCall) + case "INIT_FCALL" => handleFcall(initCall) + case "INIT_METHOD_CALL" => handleMethodCall(initCall) + case "NEW" => handleNew(initCall) + } + ret.toLowerCase + } + + def getCallCorrespondingMethod(cpg: Cpg, name: String): List[Method] = { + name.toLowerCase.replace("\\", "\\\\") match { + case "DYNAMIC" => Nil + case calledFunctionName: String => + // we actually want the regexp feature here! + val traversal: Traversal[Method] = + cpg.method.fullName(calledFunctionName) + traversal.l + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodIdentification.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodIdentification.scala new file mode 100644 index 0000000..eb65e18 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/MethodIdentification.scala @@ -0,0 +1,222 @@ +package io.joern.bytecode.passes.utility + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.codepropertygraph.generated.nodes.Method +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.Traversal + +import java.io.File + +class FileNotFoundException(file: String, results: List[nodes.AstNode]) + extends Exception { + + override def toString: String = { + s"Identifying unique file $file lead to results $results" + } + +} + +class NamespaceNotFoundException(namespace: String, + file: Option[nodes.File], + results: List[nodes.AstNode]) + extends Exception { + + override def getMessage: String = { + val str = new StringBuilder + str.append(s"Identifying unique namespace $namespace") + str.append(file match { + case Some(file) => s" in specified file $file" + case None => "" + }) + str.append(s" lead to $results") + str.toString() + } + +} + +class TypeDeclNotFoundException(typeDecl: String, + namespace: Option[nodes.NamespaceBlock], + file: Option[nodes.File], + results: List[nodes.AstNode]) + extends Exception { + + override def getMessage: String = { + val str = new StringBuilder + str.append(s"Identifying unique TypeDecl $typeDecl") + str.append(namespace match { + case Some(namespace) => s" in specified namespace ${namespace.name}" + case None => "" + }) + str.append(file match { + case Some(file) => s" in specified file ${file.name}" + case None => "" + }) + str.append(s" lead to results $results") + str.toString() + } + +} + +class MethodNotFoundException(method: String, + typeDecl: Option[nodes.TypeDecl], + namespace: Option[nodes.NamespaceBlock], + file: Option[nodes.File], + results: List[nodes.AstNode]) + extends Exception { + + override def getMessage: String = { + val str = new StringBuilder + str.append(s"Identifying unique method $method") + str.append(typeDecl match { + case Some(typeDecl) => s" in specified TypeDecl ${typeDecl.name}" + case None => "" + }) + str.append(namespace match { + case Some(namespace) => s" in specified namespace ${namespace.name}" + case None => "" + }) + str.append(file match { + case Some(file) => s" in specified file ${file.name}" + case None => "" + }) + str.append(s" lead to results $results") + str.append(s" with Names: ${results + .filter(_.isInstanceOf[Method]) + .map(_.asInstanceOf[Method]) + .map(x => (x.name, x.fullName, x.lineNumber, x.lineNumberEnd))}") + str.append(s" amount of unique results: ${results.toSet.size}") + str.toString() + } + +} + +object MethodIdentification { + + def getUniqueMethod( + file: Option[String], + namespace: Option[String], + typeDeclName: Option[String], + name: String, + start: Option[Int] = None, + end: Option[Int] = None)(implicit cpg: Cpg): nodes.Method = { + val fileNode: Option[nodes.File] = file match { + case Some(filePath) => Some(getUniqueFile(filePath)) + case None => None + } + val namespaceNode: Option[nodes.NamespaceBlock] = namespace match { + case Some(namespaceName) => + Some(getUniqueNamespace(namespaceName, fileNode)) + case None => None + } + val typeDecl: Option[nodes.TypeDecl] = typeDeclName match { + case Some(typeName) => + Some(getUniqueTypeDeclaration(typeName, namespaceNode, fileNode)) + case None => None + } + ((namespaceNode, typeDecl) match { + case (_, Some(typeDecl)) => + typeDecl.astChildren.isMethod.nameExact(name).l + case (Some(namespace), None) => + cpg.method + .astParentTypeExact("NAMESPACE_BLOCK") + .astParentFullNameExact(namespace.fullName) + .nameExact(name) + .l + case (None, None) => cpg.method.nameExact(name).l + }) match { + case Nil => + throw new MethodNotFoundException(name, + typeDecl, + namespaceNode, + fileNode, + Nil) + case single :: Nil => single + case multiple => + (start, end) match { + case (Some(start), Some(_)) => + multiple + .find(method => method.lineNumber.contains(start)) // && method.lineNumberEnd.contains(end) + .getOrElse( + throw new MethodNotFoundException(name, + typeDecl, + namespaceNode, + fileNode, + multiple)) + case _ => + throw new MethodNotFoundException(name, + typeDecl, + namespaceNode, + fileNode, + multiple) + } + + } + } + + def getUniqueFile(file: String)(implicit cpg: Cpg): nodes.File = { + cpg.file.nameExact(file).l match { + case Nil => throw new FileNotFoundException(file, Nil) + case file :: Nil => file + case result => throw new FileNotFoundException(file, result) + } + } + + def getUniqueNamespace(name: String, fileNode: Option[nodes.File])( + implicit cpg: Cpg): nodes.NamespaceBlock = { + (fileNode match { + case Some(fileNode) => + fileNode.astChildren.isNamespaceBlock.nameExact(name).l + case None => cpg.namespaceBlock.nameExact(name).l + }) match { + case Nil => throw new NamespaceNotFoundException(name, fileNode, Nil) + case namespace :: Nil => namespace + case results => + throw new NamespaceNotFoundException(name, fileNode, results) + } + } + + def getUniqueTypeDeclaration( + typeName: String, + namespaceNode: Option[nodes.NamespaceBlock], + fileNode: Option[nodes.File])(implicit cpg: Cpg): nodes.TypeDecl = { + ((fileNode, namespaceNode) match { + case (_, Some(namespace)) => + cpg.typeDecl + .astParentTypeExact("NAMESPACE_BLOCK") + .astParentFullNameExact(namespace.fullName) + .nameExact(typeName) + .l + case (Some(file), None) => + file.astChildren.isNamespaceBlock + .map { namespace => + cpg.typeDecl + .astParentTypeExact("NAMESPACE_BLOCK") + .astParentFullNameExact(namespace.fullName) + } + .foldLeft(Traversal[nodes.TypeDecl]())(_ ++ _) + .astChildren + .isTypeDecl + .nameExact(typeName) + .l + case (None, None) => cpg.typeDecl.nameExact(typeName).l + }) match { + case Nil => + throw new TypeDeclNotFoundException(typeName, + namespaceNode, + fileNode, + Nil) + case typeDecl :: Nil => typeDecl + case results => + throw new TypeDeclNotFoundException(typeName, + namespaceNode, + fileNode, + results) + } + } + + def getAbsolutePath(path: String): String = { + new File(path).toPath.toAbsolutePath.normalize().toString + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/OperationCreation.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/OperationCreation.scala new file mode 100644 index 0000000..64e27a3 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/OperationCreation.scala @@ -0,0 +1,506 @@ +package io.joern.bytecode.passes.utility + +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.passes.utility.ValueCreation.createValueNode +import io.joern.reporting.ReportableError +import io.shiftleft.codepropertygraph.generated.nodes.NewCall +import io.shiftleft.codepropertygraph.generated.{ + DispatchTypes, + EdgeTypes, + nodes +} +import io.shiftleft.passes.DiffGraph + +object OperationCreation { + + def addOperation(opcode: Opcode, order: Integer, lineNumber: Integer)( + implicit graph: DiffGraph.Builder): nodes.NewCall = { + opcode match { + case op: NoValueOperation => addNoValueOperation(op, order, lineNumber) + case op: SingleValueOperation => + addSingleValueOperation(op, order, lineNumber) + case op: DualValueOperation => + addDualValueOperation(op, order, lineNumber) + case op: TripleValueOperation => + addTripleValueOperation(op, order, lineNumber) + case op: QuadrupleValueOperation => + addQuadrupleValueOperation(op, order, lineNumber) + case op: QuintupleValueOperation => + addQuintupleValueOperation(op, order, lineNumber) + case op: INIT_FCALL => addOperationInitFcall(op, order, lineNumber) + case op: INIT_FCALL_BY_NAME => + addOperationInitFcallByName(op, order, lineNumber) + case op: INIT_DYNAMIC_CALL => + addOperationInitDynamicCall(op, order, lineNumber) + case op: INIT_METHOD_CALL => + addOperationInitMethodCall(op, order, lineNumber) + case op: INIT_NS_FCALL_BY_NAME => + addOperationInitFsFcallByName(op, order, lineNumber) + case op: INIT_STATIC_METHOD_CALL => + addOperationInitStaticMethodCall(op, order, lineNumber) + case op: INIT_USER_CALL => + addOperationInitUserCall(op, order, lineNumber) + case op: SWITCH => + addOperationSwitch(op, order, lineNumber) + case op: MatchOpcode => + addOperationMatch(op, order, lineNumber) + } + } + + def addQuintupleValueOperation( + op: QuintupleValueOperation, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val first = createValueNode(op.first, Integer.valueOf(0)) + val second = createValueNode(op.second, Integer.valueOf(1)) + val third = createValueNode(op.third, Integer.valueOf(2)) + val fourth = createValueNode(op.fourth, Integer.valueOf(3)) + val fifth = createValueNode(op.fifth, Integer.valueOf(4)) + val code = + s"${op.code} ${first._2} ${second._2} ${third._2} ${fourth._2} ${fifth._2}" + val operation = nodes + .NewCall() + .name(op.code) + .code(code) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .order(order) + .lineNumber(Some(lineNumber)) + graph.addNode(operation) + graph.addNode(first._1) + graph.addEdge(operation, first._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, first._1, EdgeTypes.AST) + graph.addNode(second._1) + graph.addEdge(operation, second._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, second._1, EdgeTypes.AST) + graph.addNode(third._1) + graph.addEdge(operation, third._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, third._1, EdgeTypes.AST) + graph.addNode(fourth._1) + graph.addEdge(operation, fourth._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, fourth._1, EdgeTypes.AST) + graph.addNode(fifth._1) + graph.addEdge(operation, fifth._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, fifth._1, EdgeTypes.AST) + operation + } + + def addQuadrupleValueOperation( + op: QuadrupleValueOperation, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val first = createValueNode(op.first, Integer.valueOf(0)) + val second = createValueNode(op.second, Integer.valueOf(1)) + val third = createValueNode(op.third, Integer.valueOf(2)) + val fourth = createValueNode(op.fourth, Integer.valueOf(3)) + val code = s"${op.code} ${first._2} ${second._2} ${third._2} ${fourth._2}" + val operation = nodes + .NewCall() + .name(op.code) + .code(code) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .order(order) + .lineNumber(Some(lineNumber)) + graph.addNode(operation) + graph.addNode(first._1) + graph.addEdge(operation, first._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, first._1, EdgeTypes.AST) + graph.addNode(second._1) + graph.addEdge(operation, second._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, second._1, EdgeTypes.AST) + graph.addNode(third._1) + graph.addEdge(operation, third._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, third._1, EdgeTypes.AST) + graph.addNode(fourth._1) + graph.addEdge(operation, fourth._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, fourth._1, EdgeTypes.AST) + operation + } + + def addTripleValueOperation( + op: TripleValueOperation, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val first = createValueNode(op.first, Integer.valueOf(0)) + val second = createValueNode(op.second, Integer.valueOf(1)) + val third = createValueNode(op.third, Integer.valueOf(2)) + val code = s"${op.code} ${first._2} ${second._2} ${third._2}" + val operation = nodes + .NewCall() + .code(code) + .name(op.code) + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(operation) + graph.addNode(first._1) + graph.addEdge(operation, first._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, first._1, EdgeTypes.AST) + graph.addNode(second._1) + graph.addEdge(operation, second._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, second._1, EdgeTypes.AST) + graph.addNode(third._1) + graph.addEdge(operation, third._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, third._1, EdgeTypes.AST) + operation + } + + def addDualValueOperation( + op: DualValueOperation, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val lhs = createValueNode(op.lhs, Integer.valueOf(0)) + val rhs = createValueNode(op.rhs, Integer.valueOf(1)) + val operation = nodes + .NewCall() + .code(s"${op.code} ${lhs._2} ${rhs._2}") + .name(op.code) + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(operation) + graph.addNode(lhs._1) + graph.addEdge(operation, lhs._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, lhs._1, EdgeTypes.AST) + graph.addNode(rhs._1) + graph.addEdge(operation, rhs._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, rhs._1, EdgeTypes.AST) + operation + } + + def addSingleValueOperation( + op: SingleValueOperation, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val lhs = createValueNode(op.value, Integer.valueOf(0)) + val operation = nodes + .NewCall() + .name(op.code) + .code(s"${op.code} ${lhs._2}") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(operation) + + graph.addNode(lhs._1) + graph.addEdge(operation, lhs._1, EdgeTypes.ARGUMENT) + graph.addEdge(operation, lhs._1, EdgeTypes.AST) + operation + } + + def addNoValueOperation(op: NoValueOperation, + order: Integer, + lineNumber: Integer): nodes.NewCall = { + nodes + .NewCall() + .code(op.code) + .name(op.code) + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + } + + def addOperationSwitch( + switchstring: SWITCH, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + var counter: Int = 0 + val decisionVar = createValueNode(switchstring.value, counter) + val fcall = nodes + .NewCall() + .name(switchstring.code) + .code(s"${switchstring.code} ${decisionVar._2} ${switchstring.switches + .map(switch => "\"" + s"${switch._1}" + "\": " + s"${switch._2}") + .mkString(", ")}") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(fcall) + graph.addNode(decisionVar._1) + graph.addEdge(fcall, decisionVar._1, EdgeTypes.AST) + graph.addEdge(fcall, decisionVar._1, EdgeTypes.ARGUMENT) + counter += 1 + switchstring.switches.foreach { switchPair => + val condition = createValueNode(StringLiteral(switchPair._1), counter)._1 + graph.addNode(condition) + graph.addEdge(fcall, condition, EdgeTypes.AST) + graph.addEdge(fcall, condition, EdgeTypes.ARGUMENT) + counter += 1 + val jumpSite = + createValueNode(IntegerLiteral(switchPair._2.toLong), counter)._1 + graph.addNode(jumpSite) + graph.addEdge(fcall, jumpSite, EdgeTypes.AST) + graph.addEdge(fcall, jumpSite, EdgeTypes.ARGUMENT) + counter += 1 + } + fcall + } + + def addOperationMatch( + opcode: MatchOpcode, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + var counter = 0 + val matchVar = createValueNode(opcode.matchee, counter) + val fcall = nodes + .NewCall() + .name(opcode.code) + .code(s"${opcode.code} ${matchVar._2} ${(opcode.values.map(x => + x.key match { + case Left(int) => s"$int: ${x.value}" + case Right(string) => "\"" + string + "\": " + x.value + }) :+ s"default: ${opcode.default}").mkString(", ")}") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(fcall) + graph.addNode(matchVar._1) + graph.addEdge(fcall, matchVar._1, EdgeTypes.AST) + graph.addEdge(fcall, matchVar._1, EdgeTypes.ARGUMENT) + counter += 1 + opcode.values.foreach(pair => { + val condition = createValueNode(pair.key match { + case Left(int) => IntegerLiteral(int.toLong) + case Right(string) => StringLiteral(string) + }, counter)._1 + graph.addNode(condition) + graph.addEdge(fcall, condition, EdgeTypes.AST) + graph.addEdge(fcall, condition, EdgeTypes.ARGUMENT) + counter += 1 + val jumpSite = + createValueNode(IntegerLiteral(pair.value.toLong), counter)._1 + graph.addNode(jumpSite) + graph.addEdge(fcall, jumpSite, EdgeTypes.AST) + graph.addEdge(fcall, jumpSite, EdgeTypes.ARGUMENT) + counter += 1 + }) + // default case + // fixme limitation?: default vs "default" + val condition = createValueNode(StringLiteral("default"), counter)._1 + graph.addNode(condition) + graph.addEdge(fcall, condition, EdgeTypes.AST) + graph.addEdge(fcall, condition, EdgeTypes.ARGUMENT) + counter += 1 + val jumpSite = + createValueNode(IntegerLiteral(opcode.default.toLong), counter)._1 + graph.addNode(jumpSite) + graph.addEdge(fcall, jumpSite, EdgeTypes.AST) + graph.addEdge(fcall, jumpSite, EdgeTypes.ARGUMENT) + counter += 1 + fcall + } + + def addOperationInitUserCall( + op: INIT_USER_CALL, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.param_count.toLong), 0) + val targetFunc = createValueNode(op.func_type, 1) + val actOn = createValueNode(op.act_on, 2) + val fcall = nodes + .NewCall() + .code(s"INIT_USER_CALL ${op.param_count} ${targetFunc._2} ${actOn._2}") + .name("INIT_USER_CALL") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(fcall) + graph.addNode(paramCount._1) + graph.addEdge(fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addNode(targetFunc._1) + graph.addEdge(fcall, targetFunc._1, EdgeTypes.AST) + graph.addEdge(fcall, targetFunc._1, EdgeTypes.ARGUMENT) + graph.addNode(actOn._1) + graph.addEdge(fcall, actOn._1, EdgeTypes.AST) + graph.addEdge(fcall, actOn._1, EdgeTypes.ARGUMENT) + fcall + } + + def addOperationInitDynamicCall( + op: INIT_DYNAMIC_CALL, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0) + val variable = createValueNode(op.variable, 1) + val fcall = nodes + .NewCall() + .name("INIT_DYNAMIC_CALL") + .code(s"INIT_DYNAMIC_CALL ${op.paramCount} ${variable._2}") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(fcall) + graph.addNode(paramCount._1) + graph.addNode(variable._1) + graph.addEdge(fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addEdge(fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(fcall, variable._1, EdgeTypes.ARGUMENT) + graph.addEdge(fcall, variable._1, EdgeTypes.AST) + fcall + } + + def addOperationInitFcallByName( + op: INIT_FCALL_BY_NAME, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0) + val functionName = createValueNode(StringLiteral(op.function), 1) + val fcall = nodes + .NewCall() + .code( + s"INIT_FCALL_BY_NAME ${op.paramCount} " + "string(\"" + op.function + "\")") + .name(s"INIT_FCALL_BY_NAME") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(fcall) + graph.addNode(paramCount._1) + graph.addNode(functionName._1) + graph.addEdge(fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addEdge(fcall, functionName._1, EdgeTypes.AST) + graph.addEdge(fcall, functionName._1, EdgeTypes.ARGUMENT) + fcall + } + + def addOperationInitFcall( + op: INIT_FCALL, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0) + val var2 = createValueNode(IntegerLiteral(op.var2.toLong), 1) + val function = createValueNode(op.function, 2) + val init_fcall = nodes + .NewCall() + .code(s"INIT_FCALL ${op.paramCount} ${op.var2} ${function._2}") + .name(s"INIT_FCALL") + .order(order) + .lineNumber(Some(lineNumber)) + graph.addNode(init_fcall) + graph.addNode(paramCount._1) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addNode(var2._1) + graph.addEdge(init_fcall, var2._1, EdgeTypes.AST) + graph.addEdge(init_fcall, var2._1, EdgeTypes.ARGUMENT) + graph.addNode(function._1) + graph.addEdge(init_fcall, function._1, EdgeTypes.AST) + graph.addEdge(init_fcall, function._1, EdgeTypes.ARGUMENT) + init_fcall + } + + def addOperationInitMethodCall( + op: INIT_METHOD_CALL, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0) + val variable = createValueNode(op.objectVar, 1) + val method = createValueNode(op.method, 2) + val init_fcall = nodes + .NewCall() + .code(s"INIT_METHOD_CALL ${op.paramCount} ${variable._2} ${method._2}") + .name(s"INIT_METHOD_CALL") + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .order(order) + .lineNumber(Some(lineNumber)) + graph.addNode(init_fcall) + graph.addNode(paramCount._1) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addNode(variable._1) + graph.addEdge(init_fcall, variable._1, EdgeTypes.AST) + graph.addEdge(init_fcall, variable._1, EdgeTypes.ARGUMENT) + graph.addNode(method._1) + graph.addEdge(init_fcall, method._1, EdgeTypes.AST) + graph.addEdge(init_fcall, method._1, EdgeTypes.ARGUMENT) + init_fcall + } + + def addOperationInitFsFcallByName( + op: INIT_NS_FCALL_BY_NAME, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0) + val function = createValueNode(StringLiteral(op.function), 1) + val init_fcall = nodes + .NewCall() + .code(s"INIT_NS_FCALL_BY_NAME ${op.paramCount} ${op.function}") + .name(s"INIT_NS_FCALL_BY_NAME") + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .order(order) + .lineNumber(Some(lineNumber)) + graph.addNode(init_fcall) + graph.addNode(paramCount._1) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.AST) + graph.addEdge(init_fcall, paramCount._1, EdgeTypes.ARGUMENT) + graph.addNode(function._1) + graph.addEdge(init_fcall, function._1, EdgeTypes.AST) + graph.addEdge(init_fcall, function._1, EdgeTypes.ARGUMENT) + init_fcall + } + + def addOperationInitStaticMethodCall( + op: INIT_STATIC_METHOD_CALL, + order: Integer, + lineNumber: Integer)(implicit graph: DiffGraph.Builder): nodes.NewCall = { + val paramCount = createValueNode(IntegerLiteral(op.paramCount.toLong), 0)._1 + val methodString = op.method match { + case StringLiteral(value) => value + case x => x.toString + } + val classString = op.baseClass match { + case Some(name: StringLiteral) => s" ${name.value}" + case Some(value) => s" ${value.toString}" + case None => "" + } + var positionCounter = 0 + val first: Option[nodes.NewNode] = op.firstKeyWord match { + case Some(x) => + positionCounter += 1 + Some(createValueNode(x, positionCounter)._1) + case None => None + } + val second: Option[nodes.NewNode] = op.secondKeyword match { + case Some(x) => + positionCounter += 1 + Some(createValueNode(x, positionCounter)._1) + case None => None + } + val classNode: Option[nodes.NewNode] = op.baseClass match { + case Some(x) => + positionCounter += 1 + Some(createValueNode(x, positionCounter)._1) + case None => None + } + val method: String = (op.firstKeyWord, op.secondKeyword) match { + case (Some(first), Some(second)) => + s"($first) ($second)$classString::$methodString" + case (Some(first), None) => s"($first)$classString::$methodString" + case (None, None) => s"$classString::${op.method}" + case x => + throw ReportableError("", + lineNumber, + "", + op.toString, + s"unexpected match combination $x") + } + positionCounter += 1 + val methodNode = createValueNode(op.method, positionCounter)._1 + val init_fcall = nodes + .NewCall() + .code(s"INIT_STATIC_METHOD_CALL ${op.paramCount} $method") + .name(s"INIT_STATIC_METHOD_CALL") + .order(order) + .dispatchType(DispatchTypes.STATIC_DISPATCH) + .lineNumber(Some(lineNumber)) + graph.addNode(init_fcall) + List(Some(paramCount), first, second, classNode, Some(methodNode)).foreach { + case Some(x) => + graph.addNode(x) + graph.addEdge(init_fcall, x, EdgeTypes.AST) + graph.addEdge(init_fcall, x, EdgeTypes.ARGUMENT) + case None => + } + init_fcall + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/ValueCreation.scala b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/ValueCreation.scala new file mode 100644 index 0000000..ff58c29 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/passes/utility/ValueCreation.scala @@ -0,0 +1,124 @@ +package io.joern.bytecode.passes.utility + +import io.joern.bytecode.parser.constructs._ +import io.shiftleft.codepropertygraph.generated.nodes +import spray.json._ +import MyJsonProtocol._ +import io.joern.bytecode.parser.utils.encodeBase64 +object ValueCreation { + + def createValueNode(value: Value, order: Integer): (nodes.NewNode, String) = { + assert(order >= 0) + value match { + case Variable(name, ref, tmp) => + (nodes + .NewIdentifier() + .name(name) + .code(if (ref || tmp) s"$name" else s"CV($$$name)") + .order(order), + if (ref || tmp) { + name + } else { + if (ref || tmp) s"$name" else "CV($" + name + ")" + }) + case ByteCodeConstructor() => + (nodes + .NewLiteral() + .typeFullName("ByteCodeConstructor") + .code("CONSTRUCTOR") + .order(order), + s"CONSTRUCTOR") + case AssignOpLiteral(value) => + (nodes + .NewLiteral() + .typeFullName("AssignOp") + .code(value) + .order(order), + s"($value)") + case StringLiteral(value) => + (nodes.NewLiteral().typeFullName("String").code(value).order(order), + "string(\"" + value + "\")") + case IntegerLiteral(value) => + (nodes + .NewLiteral() + .typeFullName("Integer") + .code(s"$value") + .order(order), + s"int($value)") + case Null() => + (nodes.NewLiteral().typeFullName("NULL").code(" null").order(order), + "NULL") + case FloatLiteral(value) => + (nodes + .NewLiteral() + .typeFullName("Float") + .code(s"$value") + .order(order), + s"float($value)") + case BooleanLiteral(value) => + (nodes + .NewLiteral() + .typeFullName("Boolean") + .code(s"$value") + .order(order), + s"bool($value)") + case TryCatchLiteral(value) => + (nodes + .NewLiteral() + .typeFullName("try-catch") + .code(s"$value") + .order(order), + s"try-catch($value)") + case av: ArrayValue => + av match { + case ArrayValue(None) => + (nodes.NewLiteral().typeFullName("Array").order(order), "array(..)") + case ArrayValue(Some(content)) => + content match { + case ::(_, _) => + val json = encodeBase64(av.toJson.toString()) + val code = s"array($json)" + (nodes + .NewLiteral() + .typeFullName("Array") + .order(order) + .code(code), + code) + case Nil => + val code = s"array()" + (nodes + .NewLiteral() + .typeFullName("Array") + .order(order) + .code(code), + code) + } + } + case Zval(ttype) => + (nodes + .NewLiteral() + .typeFullName("zval") + .code(s"$ttype") + .order(order), + s"zval($value)") + case ByteCodeKeyword(value) => + (nodes + .NewLiteral() + .typeFullName("ByteCodeKeyword") + .order(order) + .code(s"$value"), + s"($value)") + case ByteCodePlaceIndicator(value) => + (nodes + .NewLiteral() + .typeFullName("ByteCodePlaceIndicator") + .order(order) + .code(s"$value"), + s"$value") + case x => + throw new RuntimeException( + s"unexpected matching in control flow block $x") + } + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/FilterFiles.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/FilterFiles.scala new file mode 100644 index 0000000..32a475c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/FilterFiles.scala @@ -0,0 +1,9 @@ +package io.joern.bytecode.util + +import java.io.File +import java.nio.file.Files + +object FilterFiles { + def filterFiles(files: List[File]): List[File] = + files.filter(_.isFile).filterNot(x => Files.isSymbolicLink(x.toPath)) +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/extensions/NodeExtension.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/extensions/NodeExtension.scala new file mode 100644 index 0000000..8fc090c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/extensions/NodeExtension.scala @@ -0,0 +1,130 @@ +package io.joern.bytecode.util.extensions + +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.semanticcpg.language._ + +object NodeExtension { + + implicit class ExtendedAST(node: AstNode) { + + def getParentMethod: Option[Method] = { + node match { + case method: Method => + Some(method) + case _ => + val parent = this.node.in(EdgeTypes.AST) + if (parent.hasNext) { + parent.next().asInstanceOf[AstNode].getParentMethod + } else { + None + } + } + } + + } + + implicit class ExtendedCFG(node: CfgNode) { + + def getParentMethod: Option[Method] = { + node.asInstanceOf[AstNode].getParentMethod + } + + } + + implicit class ExtendedMethod(method: Method) { + + def getLocation: (String, Int) = { + val line: Integer = method.lineNumber.getOrElse(-1) + (method.property("FILENAME").asInstanceOf[String], line.toInt) + } + + } + + implicit class ExtendedCall(call: Call) { + + class NotAMethodOrFunctionCall(call: Call) extends Throwable { + override def toString: String = + s"${call.code} is not a method or function call" + } + + private def parameterMap( + call: Call, + applyToPositional: Literal => Boolean): List[CfgNode] = { + call.argumentOut + .collectAll[Call] + .map { sendVal => + sendVal.name match { + case "SEND_ARRAY" => + if (applyToPositional( + sendVal.astChildren + .order(0) + .l + .head + .asInstanceOf[Literal])) { + Some(sendVal.astChildren.order(1).l.head.asInstanceOf[CfgNode]) + } else { + None + } + case _ => + if (applyToPositional( + sendVal.astChildren + .order(1) + .l + .head + .asInstanceOf[Literal])) { + Some(sendVal.astChildren.order(0).l.head.asInstanceOf[CfgNode]) + } else { + None + } + } + } + .filter(_.nonEmpty) + .map(_.get) + .l + } + + /** + * Retrieve a parameter of a call by its position. The index is taken directly from the bytecode, i.e.: + 1-indexed, i.e. f(1,2); but if send_array is used it is in pos 0. + * @param pos + * @return + */ + def getParameter(pos: Int): Option[CfgNode] = { + def castAndCompare(lit: Literal): Boolean = { + try { + lit.code.toInt == pos + } catch { + case _: Throwable => false + } + } + parameterMap(call, castAndCompare) match { + case Nil => None + case single :: Nil => Some(single) + case multiple => + throw new RuntimeException( + s"multiple arguments matching the description ${multiple.map(_.code)}") + } + } + + def getParameter(name: String): Option[CfgNode] = { + parameterMap(call, lit => lit.code == name) match { + case Nil => None + case single :: Nil => Some(single) + case multiple => + throw new RuntimeException( + s"multiple arguments matching the description ${multiple.map(_.code)}") + } + } + + def getParameter(pos: Int, name: String): Option[CfgNode] = { + // name superseeds position + getParameter(name) match { + case Some(value) => Some(value) + case None => getParameter(pos) + } + } + + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/implicits.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/implicits.scala new file mode 100644 index 0000000..6c2579c --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/implicits.scala @@ -0,0 +1,15 @@ +package io.joern.bytecode.util + +object implicits { + implicit class OneableSeq[T](l: Seq[T]) { + + /** + * Assert that the Seq only contains one element and return it. + * @return The only element in the Seq. + */ + def one: T = { + assert(l.length == 1) + l.head + } + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/BytecodeTraversal.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/BytecodeTraversal.scala new file mode 100644 index 0000000..a000bac --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/BytecodeTraversal.scala @@ -0,0 +1,516 @@ +package io.joern.bytecode.util.traversing + +import io.shiftleft.codepropertygraph.generated.nodes.{ + Call, + CfgNode, + Identifier, + Literal +} +import io.shiftleft.semanticcpg.language._ + +class ByteCodeTraversalHandlingError(message: String) extends Throwable { + override def getMessage: String = message +} + +trait BytecodeTraversal[T] { + + val bytecodeFunctionCalls: Array[String] = + Array("DO_FCALL", "DO_ICALL", "DO_UCALL", "DO_FCALL_BY_NAME") + + /** traverse on the given node, calling the appropriate function to handle the underlying node type/call + * + * @param node the node to traverse on + * @return the generated return value + */ + protected def traverse(node: CfgNode): T = { + node match { + case ident: Identifier => + identifier(ident) + case lit: Literal => + literal(lit) + case call: Call if bytecodeFunctionCalls.contains(call.name) => + functionCall(call) + case call: Call => + traverseBytecodeCall(call) + case x => + throw new RuntimeException(s"unknown how to proceed on ${x.getClass}") + } + } + + /** given a cfg node call traverse on the nth child + * + * @param cfgNode the cfg node + * @param nth the child to traverse on + * @return whatever the implementation shall return + */ + protected def traverseOnNthChild(cfgNode: CfgNode, nth: Int): T = { + traverse(cfgNode.astChildren.order(nth).next().asInstanceOf[CfgNode]) + } + + /** Traverse on actual function calls + * + * @param call the call node + * @return whatever the implementation shall return + */ + protected def functionCall(call: Call): T + + /** Traverse on literals + * + * @param literal the literal to process + * @return whatever the implementation shall return + */ + protected def literal(literal: Literal): T + + /** Traverse on identifier + * + * @param identifier the identifier to process + * @return whatever the implementation shall return + */ + protected def identifier(identifier: Identifier): T + + /** deployment function to call the correct processing for the encountered bytecode + * + * @param call the bytecode call + * @return whatever the implementation shall return + */ + private def traverseBytecodeCall(call: Call): T = { + call.name match { + //Array Stuff + case "INIT_ARRAY" => INIT_ARRAY(call) + case "ADD_ARRAY_ELEMENT" => ADD_ARRAY_ELEMENT(call) + case "IN_ARRAY" => IN_ARRAY(call) + case "ADD_ARRAY_UNPACK" => ADD_ARRAY_UNPACK(call) + // Assign + case "ASSIGN_DIM" => ASSIGN_DIM(call) + case "ASSIGN" => ASSIGN(call) + case "ASSIGN_OP" => ASSIGN_OP(call) + case "ASSIGN_STATIC_PROP_1" | "ASSIGN_STATIC_PROP_2" => + ASSIGN_STATIC_PROP(call) + case "ASSIGN_STATIC_PROP_OP" => ASSIGN_STATIC_PROP_OP(call) + case "ASSIGN_STATIC_PROP_REF" => ASSIGN_STATIC_PROP_REF(call) + case "ASSIGN_OBJ" => ASSIGN_OBJ(call) + case "ASSIGN_REF_2" | "ASSIGN_REF_3" => ASSIGN_REF(call) + case "ASSIGN_OBJ_REF_2" | "ASSIGN_OBJ_REF_3" => ASSIGN_OBJ_REF(call) + case "=" => ASSIGN_EQUALSIGN(call) + //Bind + case "BIND_STAIC" => BIND_STATIC(call) + case "BIND_LEXICAL" => BIND_LEXICAL(call) + //Call Related + case "NEW" => NEW(call) + case "INIT_FCALL" => INIT_FCALL(call) + case "INIT_METHOD_CALL" => INIT_METHOD_CALL(call) + case "INIT_NS_FCALL_BY_NAME" => INIT_NS_FCALL_BY_NAME(call) + case "INIT_DYNAMIC_CALL" => INIT_DYNAMIC_CALL(call) + case "INIT_FCALL_BY_NAME" => INIT_FCALL_BY_NAME(call) + case "INIT_STATIC_METHOD_CALL" => INIT_STATIC_METHOD_CALL(call) + case "INIT_USER_CALL" => INIT_USER_CALL(call) + case "SEND_VAR_EX" => SEND_VAR_EX(call) + case "SEND_VAL_EX" => SEND_VAL_EX(call) + case "SEND_VAL" => SEND_VAL(call) + case "SEND_VAR" => SEND_VAR(call) + case "SEND_VAR_NO_REF" => SEND_VAR_NO_REF(call) + case "SEND_VAR_NO_REF_EX" => SEND_VAR_NO_REF_EX(call) + case "SEND_FUNC_ARG" => SEND_FUNC_ARG(call) + case "SEND_USER" => SEND_USER(call) + case "SEND_REF" => SEND_REF(call) + case "SEND_ARRAY" => SEND_ARRAY(call) + case "RECV" => RECV(call) + case "RECV_INIT" => RECV_INIT(call) + case "RECV_VARIADIC" => RECV_VARIADIC(call) + case "CHECK_FUNC_ARG" => CHECK_FUNC_ARG(call) + case "CHECK_UNDEF_ARGS" => CHECK_UNDEF_ARGS(call) + case "FUNC_GET_ARG" => FUNC_GET_ARG(call) + case "FUNC_GET_ARGS" => FUNC_GET_ARGS(call) + case "RETURN_BY_REF" => RETURN_BY_REF(call) + case "RETURN" => RETURN(call) + case "VERIFY_RETURN_TYPE" => VERIFY_RETURN_TYPE(call) + case "VERIFY_NEVER_TYPE" => VERIFY_NEVER_TYPE(call) + case "FAST_RET" => FAST_RET(call) + //Class Related + case "GET_CLASS" => GET_CLASS(call) + case "DECLARE_ANON_CLASS" => DECLARE_ANON_CLASS(call) + case "DECLARE_CLASS" => DECLARE_CLASS(call) + //Control Constructs + case "SWITCH_STRING" => SWITCH_STRING(call) + case "SWITCH_LONG" => SWITCH_LONG(call) + case "MATCH" => MATCH(call) + case "CASE_STRICT" => CASE_STRICT(call) + case "MATCH_ERROR" => MATCH_ERROR(call) + case "EXIT" => EXIT(call) + case "CATCH" => CATCH(call) + //Fe + case "FE_FETCH_R" => FE_FETCH_R(call) + case "FE_FETCH_RW" => FE_FETCH_RW(call) + case "FE_RESET_RW" => FE_RESET_RW(call) + case "FE_RESET_R" => FE_RESET_R(call) + //Fetch + case "FETCH_OBJ_FUNC_ARG" => FETCH_OBJ_FUNC_ARG(call) + case "FETCH_STATIC_PROP_R" => FETCH_STATIC_PROP_R(call) + case "FETCH_STATIC_PROP_W" => FETCH_STATIC_PROP_W(call) + case "FETCH_STATIC_PROP_FUNC_ARG" => FETCH_STATIC_PROP_FUNC_ARG(call) + case "FETCH_STATIC_PROP_IS" => FETCH_STATIC_PROP_IS(call) + case "FETCH_DIM_R" => FETCH_DIM_R(call) + case "FETCH_LIST_R" => FETCH_LIST_R(call) + case "FETCH_CLASS_CONSTANT" => FETCH_CLASS_CONSTANT(call) + case "FETCH_CLASS" => FETCH_CLASS(call) + case "FETCH_CLASS_NAME" => FETCH_CLASS_NAME(call) + case "FETCH_DIM_FUNC_ARG" => FETCH_DIM_FUNC_ARG(call) + case "FETCH_CONSTANT" => FETCH_CONSTANT(call) + case "FETCH_R" => FETCH_R(call) + case "FETCH_IS" => FETCH_IS(call) + case "FETCH_OBJ_W_2" | "FETCH_OBJ_W_3" => FETCH_OBJ_W(call) + case "FETCH_STATIC_PROP_UNSET" => FETCH_STATIC_PROP_UNSET(call) + case "FETCH_STATIC_PROP_RW" => FETCH_STATIC_PROP_RW(call) + case "FETCH_W" => FETCH_W(call) + case "FETCH_DIM_W" => FETCH_DIM_W(call) + case "FETCH_OBJ_RW" => FETCH_OBJ_RW(call) + case "FETCH_OBJ_R" => FETCH_OBJ_R(call) + case "FETCH_RW" => FETCH_RW(call) + case "FETCH_OBJ_IS" => FETCH_OBJ_IS(call) + case "FETCH_DIM_IS" => FETCH_DIM_IS(call) + case "FETCH_DIM_UNSET" => FETCH_DIM_UNSET(call) + case "FETCH_FUNC_ARG" => FETCH_FUNC_ARG(call) + //Generic + case "NOP" => NOP(call) + case "BEGIN_SILENCE" => BEGIN_SILENCE(call) + case "EXT_STMT" => EXT_STMT(call) + case "EXT_NOP" => EXT_NOP(call) + case "FETCH_THIS" => FETCH_THIS(call) + case "GENERATOR_CREATE" => GENERATOR_CREATE(call) + case "GET_CALLED_CLASS" => GET_CALLED_CLASS(call) + case "FUNC_NUM_ARGS" => FUNC_NUM_ARGS(call) + case "ECHO" => ECHO(call) + case "BW_NOT" => BW_NOT(call) + case "BOOL_NOT" => BOOL_NOT(call) + case "QM_ASSIGN" => QM_ASSIGN(call) + case "PRE_INC" => PRE_INC(call) + case "POST_INC" => POST_INC(call) + case "PRE_DEC" => PRE_DEC(call) + case "POST_DEC" => POST_DEC(call) + case "FREE" => FREE(call) + case "PRINT" => PRINT(call) + case "FE_FREE" => FE_FREE(call) + case "END_SILENCE" => END_SILENCE(call) + case "BOOL" => BOOL(call) + case "OP_DATA" => OP_DATA(call) + case "THROW" => THROW(call) + case "STRLEN" => STRLEN(call) + case "SEND_UNPACK" => SEND_UNPACK(call) + case "COUNT" => COUNT(call) + case "DEFINED" => DEFINED(call) + case "DECLARE_FUNCTION" => DECLARE_FUNCTION(call) + case "GET_TYPE" => GET_TYPE(call) + case "UNSET_CV" => UNSET_CV(call) + case "CLONE" => CLONE(call) + case "MAKE_REF" => MAKE_REF(call) + case "SEPARATE" => SEPARATE(call) + case "DECLARE_LAMBDA_FUNCTION" => DECLARE_LAMBDA_FUNCTION(call) + case "GENERATOR_RETURN" => GENERATOR_RETURN(call) + case "DISCARD_EXCEPTION" => DISCARD_EXCEPTION(call) + case "CONCAT" => CONCAT(call) + case "FAST_CONCAT" => FAST_CONCAT(call) + case "ADD" => ADD(call) + case "SUB" => SUB(call) + case "MUL" => MUL(call) + case "DIV" => DIV(call) + case "MOD" => MOD(call) + case "SL" => SL(call) + case "SR" => SR(call) + case "BW_OR" => BW_OR(call) + case "BW_AND" => BW_AND(call) + case "BW_XOR" => BW_XOR(call) + case "BOOL_OR" => BOOL_OR(call) + case "IS_EQUAL" => IS_EQUAL(call) + case "IS_NOT_EQUAL" => IS_NOT_EQUAL(call) + case "IS_IDENTICAL" => IS_IDENTICAL(call) + case "IS_NOT_IDENTICAL" => IS_NOT_IDENTICAL(call) + case "IS_SMALLER" => IS_SMALLER(call) + case "IS_SMALLER_OR_EQUAL" => IS_SMALLER_OR_EQUAL(call) + case "BIND_GLOBAL" => BIND_GLOBAL(call) + case "DECLARE_CLASS_DELAYED" => DECLARE_CLASS_DELAYED(call) + case "DECLARE_CONST" => DECLARE_CONST(call) + case "INCLUDE_OR_EVAL" => INCLUDE_OR_EVAL(call) + case "POW" => POW(call) + case "ARRAY_KEY_EXISTS" => ARRAY_KEY_EXISTS(call) + case "TYPE_CHECK" => TYPE_CHECK(call) + case "FETCH_DIM_RW" => FETCH_DIM_RW(call) + case "UNSET_OBJ" => UNSET_OBJ(call) + case "FETCH_UNSET" => FETCH_UNSET(call) + case "UNSET_DIM" => UNSET_DIM(call) + case "CASE" => CASE(call) + case "FETCH_OBJ_UNSET" => FETCH_OBJ_UNSET(call) + case "FETCH_GLOBALS" => FETCH_GLOBALS(call) + case "UNSET_STATIC_PROP" => UNSET_STATIC_PROP(call) + case "POST_INC_OBJ" => POST_INC_OBJ(call) + case "PRE_INC_OBJ" => PRE_INC_OBJ(call) + case "POST_DEC_OBJ" => POST_DEC_OBJ(call) + case "PRE_DEC_OBJ" => PRE_DEC_OBJ(call) + case "BOOL_XOR" => BOOL_XOR(call) + case "SPACESHIP" => SPACESHIP(call) + case "UNSET_VAR" => UNSET_VAR(call) + case "ASSIGN_DIM_OP" => ASSIGN_DIM_OP(call) + case "ASSIGN_OBJ_OP" => ASSIGN_OBJ_OP(call) + //INC DEC STATIC PROP + case "POST_INC_STATIC_PROP" => POST_INC_STATIC_PROP(call) + case "PRE_INC_STATIC_PROP" => PRE_INC_STATIC_PROP(call) + case "POST_DEC_STATIC_PROP" => POST_DEC_STATIC_PROP(call) + case "PRE_DEC_STATIC_PROP" => PRE_DEC_STATIC_PROP(call) + //ISSET + case "ISSET_ISEMPTY_VAR" => ISSET_ISEMPTY_VAR(call) + case "ISSET_ISEMPTY_DIM_OBJ" => ISSET_ISEMPTY_DIM_OBJ(call) + case "ISSET_ISEMPTY_CV" => ISSET_ISEMPTY_CV(call) + case "ISSET_ISEMPTY_PROP_OBJ" => ISSET_ISEMPTY_PROP_OBJ(call) + case "ISSET_ISEMPTY_STATIC_PROP" => ISSET_ISEMPTY_STATIC_PROP(call) + case "ISSET_ISEMPTY_THIS" => ISSET_ISEMPTY_THIS(call) + //JUMP + case "JMPNZ" => JMPNZ(call) + case "JMPNZ_EX" => JMPNZ_EX(call) + case "JMPZ" => JMPZ(call) + case "JMP" => JMP(call) + case "JMPZ_EX" => JMPZ_EX(call) + case "JMPZNZ" => JMPZNZ(call) + case "JMP_SET" => JMP_SET(call) + case "JMP_NULL" => JMP_NULL(call) + //Lambda related + case "YIELD" => YIELD(call) + case "YIELD_FROM" => YIELD_FROM(call) + case "TICKS" => TICKS(call) + case "FAST_CALL" => FAST_CALL(call) + //Rope + case "ROPE_INIT" => ROPE_INIT(call) + case "ROPE_ADD" => ROPE_ADD(call) + case "ROPE_END" => ROPE_END(call) + //Type Related + case "CAST" => CAST(call) + case "INSTANCEOF" => INSTANCEOF(call) + case "COALESCE" => COALESCE(call) + case "CALLABLE_CONVERT" => CALLABLE_CONVERT(call) + case _ => + call.code match { + case "DO_FCALL_BY_NAME" => DO_FCALL_BY_NAME(call) + case "DO_ICALL" => DO_ICALL(call) + case "DO_UCALL" => DO_UCALL(call) + case "DO_FCALL" => DO_FCALL(call) + case x => + throw new ByteCodeTraversalHandlingError( + s"unable to handle bytecode $x") + } + } + } + + protected def INIT_ARRAY(call: Call): T + protected def ADD_ARRAY_ELEMENT(call: Call): T + protected def IN_ARRAY(call: Call): T + protected def DD_ARRAY_UNPACK(call: Call): T + protected def ADD_ARRAY_UNPACK(call: Call): T + // Assign + protected def ASSIGN_DIM(call: Call): T + protected def ASSIGN(call: Call): T + protected def ASSIGN_OP(call: Call): T + protected def ASSIGN_STATIC_PROP(call: Call): T + protected def ASSIGN_STATIC_PROP_OP(call: Call): T + protected def ASSIGN_STATIC_PROP_REF(call: Call): T + protected def ASSIGN_OBJ(call: Call): T + protected def ASSIGN_REF(call: Call): T + protected def ASSIGN_OBJ_REF(call: Call): T + protected def ASSIGN_EQUALSIGN(call: Call): T + //Bind + protected def BIND_STATIC(call: Call): T + protected def BIND_LEXICAL(call: Call): T + //Call Related + protected def NEW(call: Call): T + protected def INIT_FCALL(call: Call): T + protected def INIT_METHOD_CALL(call: Call): T + protected def INIT_NS_FCALL_BY_NAME(call: Call): T + protected def INIT_DYNAMIC_CALL(call: Call): T + protected def INIT_FCALL_BY_NAME(call: Call): T + protected def INIT_STATIC_METHOD_CALL(call: Call): T + protected def INIT_USER_CALL(call: Call): T + protected def SEND_VAR_EX(call: Call): T + protected def SEND_VAL_EX(call: Call): T + protected def SEND_VAL(call: Call): T + protected def SEND_VAR(call: Call): T + protected def SEND_VAR_NO_REF(call: Call): T + protected def SEND_VAR_NO_REF_EX(call: Call): T + protected def SEND_FUNC_ARG(call: Call): T + protected def SEND_USER(call: Call): T + protected def SEND_REF(call: Call): T + protected def SEND_ARRAY(call: Call): T + protected def RECV(call: Call): T + protected def RECV_INIT(call: Call): T + protected def RECV_VARIADIC(call: Call): T + protected def CHECK_FUNC_ARG(call: Call): T + protected def CHECK_UNDEF_ARGS(call: Call): T + protected def FUNC_GET_ARG(call: Call): T + protected def FUNC_GET_ARGS(call: Call): T + protected def RETURN_BY_REF(call: Call): T + protected def RETURN(call: Call): T + protected def VERIFY_RETURN_TYPE(call: Call): T + protected def VERIFY_NEVER_TYPE(call: Call): T + protected def FAST_RET(call: Call): T + //Class Related + protected def GET_CLASS(call: Call): T + protected def DECLARE_ANON_CLASS(call: Call): T + protected def DECLARE_CLASS(call: Call): T + //Control Constructs + protected def SWITCH_STRING(call: Call): T + protected def SWITCH_LONG(call: Call): T + protected def MATCH(call: Call): T + protected def CASE_STRICT(call: Call): T + protected def MATCH_ERROR(call: Call): T + protected def EXIT(call: Call): T + protected def CATCH(call: Call): T + //Fe + protected def FE_FETCH_R(call: Call): T + protected def FE_FETCH_RW(call: Call): T + protected def FE_RESET_RW(call: Call): T + protected def FE_RESET_R(call: Call): T + //Fetch + protected def FETCH_OBJ_FUNC_ARG(call: Call): T + protected def FETCH_STATIC_PROP_R(call: Call): T + protected def FETCH_STATIC_PROP_W(call: Call): T + protected def FETCH_STATIC_PROP_FUNC_ARG(call: Call): T + protected def FETCH_STATIC_PROP_IS(call: Call): T + protected def FETCH_LIST_R(call: Call): T + protected def FETCH_CLASS_CONSTANT(call: Call): T + protected def FETCH_CLASS(call: Call): T + protected def FETCH_CLASS_NAME(call: Call): T + protected def FETCH_CONSTANT(call: Call): T + protected def FETCH_R(call: Call): T + protected def FETCH_IS(call: Call): T + protected def FETCH_OBJ_W(call: Call): T + protected def FETCH_STATIC_PROP_UNSET(call: Call): T + protected def FETCH_STATIC_PROP_RW(call: Call): T + protected def FETCH_GLOBALS(call: Call): T + //Generic + protected def DO_FCALL_BY_NAME(call: Call): T + protected def DO_ICALL(call: Call): T + protected def DO_UCALL(call: Call): T + protected def DO_FCALL(call: Call): T + protected def NOP(call: Call): T + protected def BEGIN_SILENCE(call: Call): T + protected def EXT_STMT(call: Call): T + protected def EXT_NOP(call: Call): T + protected def FETCH_THIS(call: Call): T + protected def GENERATOR_CREATE(call: Call): T + protected def GET_CALLED_CLASS(call: Call): T + protected def FUNC_NUM_ARGS(call: Call): T + protected def ECHO(call: Call): T + protected def BW_NOT(call: Call): T + protected def BOOL_NOT(call: Call): T + protected def QM_ASSIGN(call: Call): T + protected def PRE_INC(call: Call): T + protected def POST_INC(call: Call): T + protected def PRE_DEC(call: Call): T + protected def POST_DEC(call: Call): T + protected def FREE(call: Call): T + protected def PRINT(call: Call): T + protected def FE_FREE(call: Call): T + protected def END_SILENCE(call: Call): T + protected def BOOL(call: Call): T + protected def OP_DATA(call: Call): T + protected def THROW(call: Call): T + protected def STRLEN(call: Call): T + protected def SEND_UNPACK(call: Call): T + protected def COUNT(call: Call): T + protected def DEFINED(call: Call): T + protected def DECLARE_FUNCTION(call: Call): T + protected def GET_TYPE(call: Call): T + protected def UNSET_CV(call: Call): T + protected def CLONE(call: Call): T + protected def MAKE_REF(call: Call): T + protected def SEPARATE(call: Call): T + protected def DECLARE_LAMBDA_FUNCTION(call: Call): T + protected def GENERATOR_RETURN(call: Call): T + protected def DISCARD_EXCEPTION(call: Call): T + protected def CONCAT(call: Call): T + protected def FAST_CONCAT(call: Call): T + protected def ADD(call: Call): T + protected def SUB(call: Call): T + protected def MUL(call: Call): T + protected def DIV(call: Call): T + protected def MOD(call: Call): T + protected def SL(call: Call): T + protected def SR(call: Call): T + protected def BW_OR(call: Call): T + protected def BW_AND(call: Call): T + protected def BW_XOR(call: Call): T + protected def BOOL_OR(call: Call): T + protected def IS_EQUAL(call: Call): T + protected def IS_NOT_EQUAL(call: Call): T + protected def IS_IDENTICAL(call: Call): T + protected def IS_NOT_IDENTICAL(call: Call): T + protected def IS_SMALLER(call: Call): T + protected def IS_SMALLER_OR_EQUAL(call: Call): T + protected def BIND_GLOBAL(call: Call): T + protected def DECLARE_CLASS_DELAYED(call: Call): T + protected def DECLARE_CONST(call: Call): T + protected def INCLUDE_OR_EVAL(call: Call): T + protected def FETCH_FUNC_ARG(call: Call): T + protected def FETCH_DIM_FUNC_ARG(call: Call): T + protected def POW(call: Call): T + protected def FETCH_DIM_R(call: Call): T + protected def FETCH_W(call: Call): T + protected def FETCH_DIM_W(call: Call): T + protected def ARRAY_KEY_EXISTS(call: Call): T + protected def FETCH_OBJ_RW(call: Call): T + protected def FETCH_OBJ_R(call: Call): T + protected def FETCH_RW(call: Call): T + protected def FETCH_OBJ_IS(call: Call): T + protected def FETCH_DIM_IS(call: Call): T + protected def TYPE_CHECK(call: Call): T + protected def FETCH_DIM_RW(call: Call): T + protected def UNSET_OBJ(call: Call): T + protected def FETCH_UNSET(call: Call): T + protected def UNSET_DIM(call: Call): T + protected def FETCH_DIM_UNSET(call: Call): T + protected def CASE(call: Call): T + protected def FETCH_OBJ_UNSET(call: Call): T + protected def UNSET_STATIC_PROP(call: Call): T + protected def POST_INC_OBJ(call: Call): T + protected def PRE_INC_OBJ(call: Call): T + protected def POST_DEC_OBJ(call: Call): T + protected def PRE_DEC_OBJ(call: Call): T + protected def BOOL_XOR(call: Call): T + protected def SPACESHIP(call: Call): T + protected def UNSET_VAR(call: Call): T + protected def ASSIGN_DIM_OP(call: Call): T + protected def ASSIGN_OBJ_OP(call: Call): T + //INC DEC STATIC PROP + protected def POST_INC_STATIC_PROP(call: Call): T + protected def PRE_INC_STATIC_PROP(call: Call): T + protected def POST_DEC_STATIC_PROP(call: Call): T + protected def PRE_DEC_STATIC_PROP(call: Call): T + //ISSET + protected def ISSET_ISEMPTY_VAR(call: Call): T + protected def ISSET_ISEMPTY_DIM_OBJ(call: Call): T + protected def ISSET_ISEMPTY_CV(call: Call): T + protected def ISSET_ISEMPTY_PROP_OBJ(call: Call): T + protected def ISSET_ISEMPTY_STATIC_PROP(call: Call): T + protected def ISSET_ISEMPTY_THIS(call: Call): T + //JUMP + protected def JMPNZ(call: Call): T + protected def JMPNZ_EX(call: Call): T + protected def JMPZ(call: Call): T + protected def JMP(call: Call): T + protected def JMPZ_EX(call: Call): T + protected def JMPZNZ(call: Call): T + protected def JMP_SET(call: Call): T + protected def JMP_NULL(call: Call): T + //Lambda related + protected def YIELD(call: Call): T + protected def YIELD_FROM(call: Call): T + protected def TICKS(call: Call): T + protected def FAST_CALL(call: Call): T + //Rope + protected def ROPE_INIT(call: Call): T + protected def ROPE_ADD(call: Call): T + protected def ROPE_END(call: Call): T + //Type Related + protected def CAST(call: Call): T + protected def INSTANCEOF(call: Call): T + protected def COALESCE(call: Call): T + + protected def CALLABLE_CONVERT(call: Call): T + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/TraversalUtility.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/TraversalUtility.scala new file mode 100644 index 0000000..0fafc58 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/traversing/TraversalUtility.scala @@ -0,0 +1,108 @@ +package io.joern.bytecode.util.traversing + +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.semanticcpg.language._ +import overflowdb.traversal.{Traversal, jIteratortoTraversal} + +import scala.annotation.tailrec +import scala.jdk.CollectionConverters._ + +object TraversalUtility { + + @tailrec + def getParentMethod(node: AstNode): Method = { + node match { + case x: Method => x + case x => + getParentMethod(x.astParent) + } + } + + def getNthArgument(call: Call, nth: Int): CfgNode = { + call + .out(EdgeTypes.ARGUMENT) + .asScala + .find { + case node: CfgNode => + node.astParent match { + case node: Call => + node.name match { + case "SEND_VAR_NO_REF_EX" | "SEND_VAR_NO_REF" | "SEND_VAR_EX" | + "SEND_VAL_EX" | "SEND_FUNC_ARG" | "SEND_VAL" | "SEND_VAR" | + "SEND_REF" | "SEND_USER" => + node.astChildren + .order(1) + .next() + .asInstanceOf[Literal] + .code + .toInt == nth + case "SEND_ARRAY" => + node.astChildren + .order(0) + .next() + .asInstanceOf[Literal] + .code + .toInt == nth + case _ => false + } + case _ => false + } + } + .getOrElse(throw new RuntimeException( + s"call ${call.code} as no $nth argument")) + .asInstanceOf[CfgNode] + } + + def followReachingForward(call: Call, + variable: String): Traversal[CfgNode] = { + call + .outE(EdgeTypes.REACHING_DEF) + .asScala + .filter { edge => + edge.property("VARIABLE").asInstanceOf[String] == variable + } + .map(_.inNode().asInstanceOf[CfgNode]) + } + + def followReachingBackwards(call: Call, + variable: String): Traversal[CfgNode] = { + call + .inE(EdgeTypes.REACHING_DEF) + .asScala + .filter { edge => + edge.property("VARIABLE").asInstanceOf[String] == variable + } + .map(_.outNode().asInstanceOf[CfgNode]) + } + + def followControlFlowForward(node: CfgNode): Traversal[CfgNode] = { + node.out(EdgeTypes.CFG).asScala.map(_.asInstanceOf[CfgNode]) + } + + def followControlFlowBackwards(node: CfgNode): Traversal[CfgNode] = { + node.in(EdgeTypes.CFG).asScala.map(_.asInstanceOf[CfgNode]) + } + + def followCallForward(call: CfgNode): Traversal[Method] = { + call.out(EdgeTypes.CALL).asScala.map(_.asInstanceOf[Method]) + } + + def followCallBackwards(method: Method): Traversal[CfgNode] = { + method.in(EdgeTypes.CALL).asScala.map(_.asInstanceOf[CfgNode]) + } + + def followBackParameter(method: Method, + parameter: Int): Traversal[CfgNode] = { + followCallBackwards(method).map { callSite => + getNthArgument(callSite.asInstanceOf[Call], parameter) + } + } + + def ifIsArgGetCaller(call: CfgNode): Option[List[CfgNode]] = { + if (call.inE(EdgeTypes.ARGUMENT).nonEmpty) { + return Some(call.in(EdgeTypes.ARGUMENT).map(_.asInstanceOf[CfgNode]).l) + } + None + } +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/AbstractCpgTestFixture.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/AbstractCpgTestFixture.scala new file mode 100644 index 0000000..a19b789 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/AbstractCpgTestFixture.scala @@ -0,0 +1,225 @@ +package io.joern.bytecode.util.unittesting + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.CfgNode +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.dotextension.ImageViewer +import io.shiftleft.semanticcpg.passes.controlflow.cfgcreation.Cfg.CfgEdgeType +import overflowdb.traversal.{NodeOps, Traversal} + +import scala.jdk.CollectionConverters._ +import scala.sys.process.Process +import scala.util.Try + +/** NOTE + * + * We do not want to use regexp matching. Regexp matching is a bad fit for unit testing as it may lead to FP (or FN) + * if the given code contains regexp characters or the regexp is over inclusive. + * For testing purposes only exact code matching shall be used. + * + * Some people, when confronted with a problem, think : “I know, I'll use regular expressions.” + * Now they have two problems. + * + */ +abstract class AbstractCpgTestFixture { + + implicit var cpg: Cpg + + def codeToNode(parentCall: Option[String] = None)( + implicit method: String): Map[String, nodes.CfgNode] = { + var retSet = Set[String]() + var dupl = Set[String]() + assert( + cpg.method.fullNameExact(method).hasNext, + s"the method $method does not exist only ${cpg.method.map(_.fullName).l}") + val ret: Map[String, nodes.CfgNode] = parentCall match { + case None => + assert( + cpg.method.fullNameExact(method).hasNext, + s"method $method does not exist there are only ${cpg.method.map(_.name).l}") + cpg.method + .fullNameExact(method) + .head + .ast + .isCfgNode + .l + .map { node => + if (retSet.contains(node.code)) { + dupl = dupl + node.code + } else { + retSet = retSet + node.code + } + node.code -> node + } + .toMap + case Some(parentCode) => + val nodes = cpg.method.fullNameExact(method).ast.isCfgNode.l + assert(nodes.nonEmpty, s"there are no cfg children for $method") + val parent: Traversal[CfgNode] = nodes.codeExact(parentCode) + assert( + parent.hasNext, + s"there is no node '$parentCode' for method $method. Only:\n ${nodes.map(_.code).mkString("\n")}") + parent.ast.isCfgNode.l.map { node => + if (retSet.contains(node.code)) { + dupl = dupl + node.code + } else { + retSet = retSet + node.code + } + node.code -> node + }.toMap + } + ret + } + + def expectedCfg(pairs: (String, CfgEdgeType)*)( + implicit method: String): Set[String] = { + expectedCfg(None, pairs: _*) + } + + def expectedCfg(parentCall: String, pairs: (String, CfgEdgeType)*)( + implicit method: String): Set[String] = { + expectedCfg(Some(parentCall), pairs: _*) + } + + private def expectedCfg( + parentCall: Option[String], + pairs: (String, CfgEdgeType)*)(implicit method: String): Set[String] = { + pairs.map { + case (code, _) => + val map = codeToNode(parentCall) + map + .getOrElse( + code, + throw new RuntimeException( + s"$code with parent $parentCall is unknown for method $method. Map contained #${map.size} nodes :${map.keys + .mkString("|")}") + ) + .start + .code + .head + }.toSet + } + + def expectedDdg(pairs: (String, String)*)( + implicit method: String): Set[(String, String)] = { + expectedDdg(None, pairs: _*) + } + + def expectedDdg(parentCall: String, pairs: (String, String)*)( + implicit method: String): Set[(String, String)] = { + expectedDdg(Some(parentCall), pairs: _*) + } + + private def expectedDdg(parentCall: Option[String], pairs: (String, String)*)( + implicit method: String): Set[(String, String)] = { + pairs.map { + case (code, variable) => + val map = codeToNode(parentCall) + assert(map.contains(code), s"there is no node with code $code") + val node = map(code).start.code + //assert(node.nonEmpty, s"there is no $pairs for parent $parentCall") + (node.head, variable) + }.toSet + } + + def cfgSuccOf(code: String)(implicit method: String): Set[String] = { + cfgSuccOf(None, code) + } + + def cfgSuccOf(parentCall: String, code: String)( + implicit method: String): Set[String] = { + cfgSuccOf(Some(parentCall), code) + } + + private def cfgSuccOf(parentCall: Option[String], code: String)( + implicit method: String): Set[String] = { + val map = codeToNode(parentCall) + val buff = map + .getOrElse( + code, + throw new RuntimeException( + s"did not find code >>$code<< of parent call $parentCall in method $method, available: (${map.keys})")) + ._cfgOut + .asScala + .map(_.asInstanceOf[nodes.CfgNode]) + .toSet + buff.map[String](_.code) + } + + def cfgPredsOf(code: String)(implicit method: String): Set[String] = { + cfgPredsOf(None, code) + } + + def cfgPredsOf(parentCall: String, code: String)( + implicit method: String): Set[String] = { + cfgPredsOf(Some(parentCall), code) + } + + private def cfgPredsOf(parentCall: Option[String], code: String)( + implicit method: String): Set[String] = { + val map = codeToNode(parentCall) + map(code)._cfgIn.asScala + .map(_.asInstanceOf[nodes.CfgNode]) + .toSet + .map[String](_.code) + } + + def ddgSuccOf(code: String)( + implicit method: String): Set[(String, String)] = { + ddgSuccOf(None, code) + } + + def ddgSuccOf(parentCall: String, code: String)( + implicit method: String): Set[(String, String)] = { + ddgSuccOf(Some(parentCall), code) + } + + private def ddgSuccOf(parentCall: Option[String], code: String)( + implicit method: String): Set[(String, String)] = { + val map = codeToNode(parentCall) + map + .getOrElse( + code, + throw new RuntimeException( + s"the code segment $code of method $method with specific parent $parentCall does not exist")) + .outE(EdgeTypes.REACHING_DEF) + .asScala + .map(edge => + (edge.inNode().asInstanceOf[nodes.CfgNode].code, + edge.property("VARIABLE").toString)) + .toSet + } + + def ddgPredOf(code: String)( + implicit method: String): Set[(String, String)] = { + ddgPredOf(None, code) + } + + def ddgPredOf(parentCall: String, code: String)( + implicit method: String): Set[(String, String)] = { + ddgPredOf(Some(parentCall), code) + } + + private def ddgPredOf(parentCall: Option[String], code: String)( + implicit method: String): Set[(String, String)] = { + val map = codeToNode(parentCall) + map + .getOrElse( + code, + throw new RuntimeException( + s"the code segment $code of method $method with specific parent $parentCall does not exist")) + .inE(EdgeTypes.REACHING_DEF) + .asScala + .map(edge => + (edge.outNode().asInstanceOf[nodes.CfgNode].code, + edge.property("VARIABLE").toString)) + .toSet + } + + implicit val viewer: ImageViewer = (pathStr: String) => + Try { + Process(Seq("xdg-open", pathStr)).!! + } + +} diff --git a/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/CpgFromCodeTestFixture.scala b/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/CpgFromCodeTestFixture.scala new file mode 100644 index 0000000..c9b6694 --- /dev/null +++ b/layerByteCode/src/main/scala/io/joern/bytecode/util/unittesting/CpgFromCodeTestFixture.scala @@ -0,0 +1,81 @@ +package io.joern.bytecode.util.unittesting + +import better.files.File +import io.joern.bytecode.PhpToCpg +import io.joern.bytecode.parser.PHPVersion.PHPVersion +import io.joern.bytecode.util.FilterFiles.filterFiles +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture.{ + cpgForDir, + setTimestamp +} +import io.joern.config.CPGConfig +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.x2cpg.SourceFiles +import overflowdb.Node + +import java.io.{File => JFile} +import java.nio.charset.StandardCharsets +import java.text.SimpleDateFormat +import scala.jdk.CollectionConverters._ + +object CpgFromCodeTestFixture { + def cpgForDir(files: List[JFile], configFile: Option[String] = None)( + implicit version: PHPVersion): Cpg = { + val cpg: Cpg = Cpg.emptyCpg + val config = configFile match { + case Some(value) => CPGConfig.initializeConfig(value) + case None => CPGConfig.initializeConfig() + } + val cpgCreator = new PhpToCpg() + try { + cpgCreator.populateCpg(files, cpg, config) + } finally { + val finalReport = cpgCreator.getFinalReport + if (!finalReport.success) { + throw new RuntimeException( + s"Creation of CPG was not successful with fromCodeFixture \n ${finalReport.prettyPrintErrors}") + } + } + } + + // Workaround for a bug in PHP: + // we set the timestamp to a date way in the past here + // because there seems to be a bug in `php` which causes + // the error stream to be empty for freshly created files. + def setTimestamp(filePath: String): Boolean = { + val file = new JFile(filePath) + val date = new SimpleDateFormat("MM/dd/yyyy") + val last = date.parse("10/03/1990") + file.setLastModified(last.getTime) + } +} + +case class CpgFromCodeTestFixture( + code: String, + insertPhpTags: Boolean = true, + configFile: Option[String] = None)(implicit val version: PHPVersion) + extends AbstractCpgTestFixture { + override implicit var cpg: Cpg = _ + + File.usingTemporaryDirectory("php2cpg") { tmpFolder => + val filePath = s"$tmpFolder/test.php" + if (insertPhpTags) { + File(filePath).write(s"""""".stripMargin)( + charset = StandardCharsets.UTF_8) + } else { + File(filePath).write(code)(charset = StandardCharsets.UTF_8) + } + setTimestamp(filePath) + val files: List[JFile] = filterFiles( + SourceFiles + .determine(Set(tmpFolder.path.toString), Set(".php")) + .distinct + .map(x => new JFile(x))) + cpg = cpgForDir(files, configFile) + } + + def V: Iterator[Node] = cpg.graph.V.asScala + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/EasyBase64.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/EasyBase64.scala new file mode 100644 index 0000000..a9ff6ce --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/EasyBase64.scala @@ -0,0 +1,12 @@ +package io.joern.bytecode.parser + +import java.nio.charset.StandardCharsets +import java.util.Base64 + +object EasyBase64 { + + def encode(str : String) : String = { + Base64.getEncoder.encodeToString(str.getBytes(StandardCharsets.UTF_8)) + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/MethodDefinitionTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/MethodDefinitionTest.scala new file mode 100644 index 0000000..e69de29 diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/BasicsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/BasicsTest.scala new file mode 100644 index 0000000..e713d1f --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/BasicsTest.scala @@ -0,0 +1,70 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.Basics._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class BasicsTest extends AnyWordSpec with Matchers { + + "parser escapedSlash" should { + "be able to parse \\" in { + val str = "\\" + val Parsed.Success(_, length) = parse(str, escapedSlash(_)) + length shouldBe str.length + } + } + + "parser legalString" should { + "be able to parse single line string \"test\"" in { + val singleLineString = "\"test\"" + val Parsed.Success(_, length) = parse(singleLineString, legalString(_)) + length shouldBe singleLineString.length + } + "be able to parse multi line string" in { + val multiLineString = + """"this is + a multi line + string"""" + val Parsed.Success(_, length) = parse(multiLineString, legalString(_)) + length shouldBe multiLineString.length + } + "be able to parse a bunch of spaces" in { + val bunchOfSpaces = "\" \"" + val Parsed.Success(_, length) = parse(bunchOfSpaces, legalString(_)) + length shouldBe bunchOfSpaces.length + } + "be able to parse multi line string with \"" in { + val multiLineString = + "\"this is \n a multi line string with \\\" \n in the middle\"" + val Parsed.Success(_, length) = parse(multiLineString, legalString(_)) + length shouldBe multiLineString.length + } + "be able to parse string with escaped slash" in { + val str = "\"f\\s\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse fully qualified names" in { + val str = "\"fully\\qualified\\name\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse PleskX\\Api\\strtolower" in { + val str = "\"PleskX\\Api\\strtolower\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse <" in { + val str = "\"<\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse /^[a-z]/" in { + val str = "\"/^[a-z]/\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ByteCodeBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ByteCodeBlockTest.scala new file mode 100644 index 0000000..415fa84 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ByteCodeBlockTest.scala @@ -0,0 +1,218 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.ByteCodeBlock._ +import io.joern.bytecode.parser.php7.EasyBase64.encode +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ByteCodeBlockTest extends AnyWordSpec with Matchers { + + "parser getLineNumber" should { + "be able to parse L42" in { + val Parsed.Success(result, _) = parse(input = "L42", getLineNumber(_)) + assert(result == 42) + } + } + + "parser getFileLineNumber" should { + "be able to parse (43)" in { + val Parsed.Success(result, _) = + parse(input = "(43)", getFileLineNUmber(_)) + assert(result == 43) + } + } + + "parser getDefiningInstructionLine" should { + "be able to parse L1 (42): DO_ICALL" in { + val Parsed.Success(result, count) = + parse(input = "L1 (42): DO_ICALL", getDefiningInstructionLine(_)) + assert(count == 17) + assert(result.fileLine.contains(42)) + assert(result.opNumber.contains(1)) + result.instruction match { + case instruction: Operation => + instruction.op match { + case NoValueOperation("DO_ICALL") => //passing the test + case _ => fail(message = "the operation is not a NoValueOperation") + } + case _ => fail() + } + } + "be able to parse L0 (20): V1 = NEW 1 string(\"Basic\")" in { + val line = s"""L0 (20): V1 = NEW 1 string("${encode("Basic")}")""" + val Parsed.Success(result, count) = + parse(line, getDefiningInstructionLine(_)) + count shouldBe line.length + result.opNumber shouldBe Some(0) + result.fileLine shouldBe Some(20) + } + } + + "parser getMethodBlockByteCode" should { + "be able to parse single instruction code block" in { + val singleLineByteCodeBlock = + """$_main: ; (lines=42, args=3, vars=3, tmps=2) + | ; (before optimizer) + | ; main:23-42 + |L0 (23): CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getByteCodeBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 3) + assert(result.vars == 3) + assert(result.tmps == 2) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "optimizer") + assert(result.fileName == "main") + assert(result.lineStart == 23) + assert(result.lineEnd == 42) + assert(result.instructions.length == 1) + result.instructions.head match { + case instr: InstructionLine => + assert(instr.opNumber.contains(0)) + assert(instr.fileLine.contains(23)) + instr.instruction match { + case instr: Operation => + instr.op match { + case DualValueOperation("CONCAT", _, _) => + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + } + "be able to parse multi instructions code block" in { + val singleLineByteCodeBlock = + """$_main: ; (lines=42, args=3, vars=3, tmps=2) + | ; (before optimizer) + | ; main:23-42 + |L0 (23): CONCAT T1 T2 + |L1 (24): T3 = CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getByteCodeBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 3) + assert(result.vars == 3) + assert(result.tmps == 2) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "optimizer") + assert(result.fileName == "main") + assert(result.lineStart == 23) + assert(result.lineEnd == 42) + assert(result.instructions.length == 2) + result.instructions.head match { + case instr: InstructionLine => + assert(instr.opNumber.contains(0)) + assert(instr.fileLine.contains(23)) + instr.instruction match { + case instr: Operation => + instr.op match { + case DualValueOperation("CONCAT", _, _) => + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + result.instructions(1) match { + case instr: InstructionLine => + assert(instr.opNumber.contains(1)) + assert(instr.fileLine.contains(24)) + instr.instruction match { + case _ : Assignment => + case _ => fail() + } + case _ => fail() + } + } + "be able to parse the beginning of a proper dump" in { + val fullDump = + s"""$$_main: ; (lines=6, args=0, vars=1, tmps=3) + ; (before optimizer) + ; trivial-main.php:1-4 +L0 (2): INIT_FCALL 1 96 string("${encode("phpinfo")}") +L1 (2): T1 = CONCAT string("${encode("conca")}") CV0($$var) +L2 (2): T2 = CONCAT T1 string("${encode("tenation")}") +L3 (2): SEND_VAL T2 1 +L4 (2): DO_ICALL +L5 (4): RETURN int(1) + +$$_main: ; (lines=6, args=0, vars=1, tmps=3) + ; (before block pass) + ; trivial-main.php:1-4 +BB0: start exit lines=[0-5] + INIT_FCALL 1 96 string("${encode("phpinfo")}") + T1 = CONCAT string("${encode("conca")}") CV0($$var) + T2 = CONCAT T1 string("${encode("tenation")}") + SEND_VAL T2 1 + DO_ICALL + RETURN int(1)""".stripMargin + val Parsed.Success(_, count) = parse(fullDump, getByteCodeBlock(_)) + assert(count > 10) //not the best test but I was to lazy to count + } + } + + "be able to parse a dump containing a live range" in { + val dump = + s"""$$_main: ; (lines=8, args=0, vars=1, tmps=4) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/basicOOP.php:1-23 +L0 (20): V1 = NEW 1 string("${encode("Basic")}") +L1 (20): SEND_VAL_EX string("${encode("value")}") 1 +L2 (20): DO_FCALL +L3 (20): ASSIGN CV0($$var) V1 +L4 (21): INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") +L5 (21): SEND_VAL_EX string("${encode("other")}") 1 +L6 (21): DO_FCALL +L7 (23): RETURN int(1) +LIVE RANGES: + 1: L1 - L3 (new) +""" + val Parsed.Success(_, count) = parse(dump, getByteCodeBlock(_)) + count shouldBe dump.length + } + "be able to parse a dump containing live ranges and exception table" in { + val dump = + s"""PleskXTest\\TestCase::tearDownAfterClass: ; (lines=22, args=0, vars=2, tmps=6) + | ; (before optimizer) + | ; /home/simon/tmp/api-php-lib/tests/TestCase.php:32-40 + |L0 (32): EXT_NOP + |L1 (34): EXT_STMT + |L2 (34): T2 = FETCH_STATIC_PROP_R string("${encode("webspaces")}") (self) (exception) + |L3 (34): V3 = FE_RESET_R T2 L19 + |L4 (34): FE_FETCH_R V3 CV0($$webspace) L19 + |L5 (35): NOP + |L6 (36): EXT_STMT + |L7 (36): T4 = FETCH_STATIC_PROP_R string("${encode("_client")}") (static) (exception) + |L8 (36): INIT_METHOD_CALL 0 T4 string("${encode("webspace")}") + |L9 (36): V5 = DO_FCALL + |L10 (36): INIT_METHOD_CALL 2 V5 string("${encode("delete")}") + |L11 (36): SEND_VAL_EX string("${encode("id")}") 1 + |L12 (36): CHECK_FUNC_ARG 2 + |L13 (36): V6 = FETCH_OBJ_FUNC_ARG (ref) CV0($$webspace) string("${encode("id")}") + |L14 (36): SEND_FUNC_ARG V6 2 + |L15 (36): DO_FCALL + |L16 (36): JMP L18 + |L17 (37): CV1($$e) = CATCH string("${encode("Exception")}") + |L18 (34): JMP L4 + |L19 (34): FE_FREE V3 + |L20 (40): EXT_STMT + |L21 (40): RETURN null + |LIVE RANGES: + | 3: L4 - L19 (loop) + |EXCEPTION TABLE: + | L6, L17, -, - + |""".stripMargin + val Parsed.Success(_, count) = parse(dump, getByteCodeBlock(_)) + count shouldBe dump.length + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ControlFlowBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ControlFlowBlockTest.scala new file mode 100644 index 0000000..1e9ecc1 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ControlFlowBlockTest.scala @@ -0,0 +1,332 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.ControlFlowBlock._ +import io.joern.bytecode.parser.php7.EasyBase64.encode +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ControlFlowBlockTest extends AnyWordSpec with Matchers { + + "parser getBBInstructionLine" should { + "be able to parse DO_ICALL" in { + val Parsed.Success(result, count) = + parse(" DO_ICALL", getBBInstructionLine(_)) + assert(count == 16) + assert(result.fileLine.isEmpty) + assert(result.opNumber.isEmpty) + result.instruction match { + case instruction: Operation => + instruction.op match { + case NoValueOperation("DO_ICALL") => + case _ => fail() + } + case _ => fail() + } + } + "be able to parse JMPZ T2 BB2" in { + val instr = " JMPZ T2 BB2" + val Parsed.Success(result, length) = parse(instr, getBBInstructionLine(_)) + length shouldBe instr.length + val InstructionLine(_, _, instruction) = result + instruction match { + case Operation(op) => + op match { + case DualValueOperation(command, condition, target) => + command shouldBe "JMPZ" + condition match { + case Variable(name, tmp, _) => + name shouldBe "T2" + tmp shouldBe true + case _ => fail() + } + target match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + } + } + + "parser getBBDefinitionLine" should { + "be able to parse BB32: follow exit finally_end lines=[272-272]" in { + val line = "BB32: follow exit finally_end lines=[272-272]" + val Parsed.Success(result, length) = parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result shouldBe BBDefinitionLine(32,Seq("follow","exit","finally_end"),272,272) + } + "be able to parse BB0: start exit lines=[0-5]" in { + val Parsed.Success(result, _) = + parse("BB0: start exit lines=[0-5]", getBBDefinitionLine(_)) + assert(result.number == 0) + assert(result.attributes.length == 2) + assert(result.attributes.head == "start") + assert(result.attributes(1) == "exit") + assert(result.firstInstruction == 0) + assert(result.lastInstruction == 5) + } + "be able to parse BB4: catch lines=[17-17]" in { + val line = "BB4: catch lines=[17-17]" + val Parsed.Success(result, length) = + parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result.attributes shouldBe List("catch") + result.firstInstruction shouldBe 17 + result.lastInstruction shouldBe 17 + } + "be able to parse BB2: unreachable unreachable_free lines=[12-13]" in { + val line = "BB2: unreachable unreachable_free lines=[12-13]" + val Parsed.Success(result, length) = parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result.attributes shouldBe List("unreachable", "unreachable_free") + result.firstInstruction shouldBe 12 + result.lastInstruction shouldBe 13 + } + } + + "parser getBBToLine" should { + "be able to parse ; to=(BB0)" in { + val Parsed.Success(result, _) = parse(" ; to=(BB0)", getBBToLine(_)) + assert(result.length == 1) + assert(result.head == 0) + } + "be able to parse ; to=(BB0, BB1)" in { + val Parsed.Success(result, _) = parse(" ; to=(BB0, BB1)", getBBToLine(_)) + assert(result.length == 2) + assert(result.head == 0) + assert(result(1) == 1) + } + } + + "parser getBasicBlock" should { + "be able to parse a BasicBlock with JMPZ at the end but not including the next basic block" in { + val bbstring = + """BB0: start lines=[0-2] + ; to=(BB2, BB1) + ASSIGN CV0($x) int(42) + T2 = IS_EQUAL CV0($x) int(43) + JMPZ T2 BB2 +""" + val nextBBStart = "BB1: follow lines[3-4]" + val fullTestString = bbstring + nextBBStart + val Parsed.Success(result, length) = + parse(fullTestString, getBasicBlock(_)) + length shouldBe bbstring.length + result.firstInstruction shouldBe 0 + result.lastInstruction shouldBe 2 + result.number shouldBe 0 + result.followedBy.get.length shouldBe 2 + result.followedBy.get.head shouldBe 2 + result.followedBy.get(1) shouldBe 1 + result.instructions.length shouldBe 3 + } + "be able to parse basic block with catch as its only keyword" in { + val bb = + s"""BB4: catch lines=[17-17] + | ; to=(BB5) + | CV1($$e) = CATCH string("${encode("Exception")}") + |""".stripMargin + val Parsed.Success(result, length) = parse(bb, getBasicBlock(_)) + length shouldBe bb.length + result.attributes shouldBe List("catch") + result.followedBy shouldBe Some(List(5)) + result.firstInstruction shouldBe 17 + result.lastInstruction shouldBe 17 + } + } + + "parser getMethodBlockControlFlow" should { + "be able to parse single BB single instruction" in { + val singleLineByteCodeBlock = + """$_main: ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + |BB0: start exit lines=[0-1] + | CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.parsingMetaInfo.length == 3) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "block") + assert(result.parsingMetaInfo(2) == "pass") + assert(result.blocks.length == 1) + assert(result.blocks.head.number == 0) + assert(result.blocks.head.attributes.length == 2) + assert(result.blocks.head.firstInstruction == 0) + assert(result.blocks.head.lastInstruction == 1) + assert(result.blocks.head.instructions.length == 1) + } + "be able to parse single BB multiple instructions" in { + val singleLineByteCodeBlock = + """$_main: ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + |BB0: start exit lines=[0-1] + | CONCAT T1 T2 + | DO_ICALL + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.blocks.length == 1) + assert(result.blocks.head.instructions.length == 2) + } + "be able to parse #247" in { + val dump = """$_main: ; (lines=9, args=0, vars=0, tmps=3) + | ; (before block pass) + | ; ../testproject/tmp.php:1-3 + |BB0: start lines=[0-3] + | ; to=(BB2, BB1) + | NOP + | NOP + | NOP + | T1 = JMPZ_EX bool(false) BB2 + |BB1: follow lines=[4-7] + | ; to=(BB2) + | INIT_FCALL 1 96 string("aW5pX2dldA==") + | SEND_VAL string("bWJzdHJpbmcuZnVuY19vdmVybG9hZA==") 1 + | V2 = DO_ICALL + | T1 = BOOL V2 + |BB2: follow target exit lines=[8-8] + | RETURN int(1) + |""".stripMargin + val Parsed.Success(result, count) = parse(dump, getControlFlowBlock(_)) + count shouldBe dump.length + result.blocks.length shouldBe 3 + result.blocks.head.instructions.length shouldBe 4 + } + "be able to parse multiple BB" in { + val singleLineByteCodeBlock = + """$_main: ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + |BB0: start exit lines=[0-1] + | ; to=(BB1) + | CONCAT T1 T2 + | DO_ICALL + |BB1: exit lines=[0-2] + | CONCAT T1 T2 + | DO_ICALL + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.blocks.length == 2) + assert(result.blocks.head.followedBy.get.length == 1) + assert(result.blocks.head.followedBy.get.head == 1) + assert(result.blocks(1).followedBy.isEmpty) + } + "be able to parse proper (partial) dump" in { + val fullDump = + s"""$$_main: ; (lines=6, args=0, vars=1, tmps=3) + ; (before block pass) + ; trivial-main.php:1-4 +BB0: start exit lines=[0-5] + INIT_FCALL 1 96 string("${encode("phpinfo")}") + T1 = CONCAT string("${encode("conca")}") CV0($$var) + T2 = CONCAT T1 string("${encode("tenation")}") + SEND_VAL T2 1 + DO_ICALL + RETURN int(1) +""".stripMargin + val Parsed.Success(result, count) = + parse(fullDump, getControlFlowBlock(_)) + assert(count == fullDump.length) + assert(result.blocks.length == 1) + } + "be able to parse proper long (partial) dump" in { + val fullDump = + s"""PleskXTest\\TestCase::tearDownAfterClass: ; (lines=22, args=0, vars=2, tmps=6) + | ; (before block pass) + | ; /home/simon/tmp/api-php-lib/tests/TestCase.php:32-40 + |BB0: start lines=[0-3] + | ; to=(BB6, BB1) + | EXT_NOP + | EXT_STMT + | T2 = FETCH_STATIC_PROP_R string("${encode("webspaces")}") (self) (exception) + | V3 = FE_RESET_R T2 BB6 + |BB1: follow target lines=[4-4] + | ; to=(BB6, BB2) + | FE_FETCH_R V3 CV0($$webspace) BB6 + |BB2: follow lines=[5-5] + | ; to=(BB3) + | NOP + |BB3: follow try lines=[6-16] + | ; to=(BB1) + | EXT_STMT + | T4 = FETCH_STATIC_PROP_R string("${encode("_client")}") (static) (exception) + | INIT_METHOD_CALL 0 T4 string("${encode("webspace")}") + | V5 = DO_FCALL + | INIT_METHOD_CALL 2 V5 string("${encode("delete")}") + | SEND_VAL_EX string("id") 1 + | CHECK_FUNC_ARG 2 + | V6 = FETCH_OBJ_FUNC_ARG (ref) CV0($$webspace) string("${encode("id")}") + | SEND_FUNC_ARG V6 2 + | DO_FCALL + | JMP BB1 + |BB4: catch lines=[17-17] + | ; to=(BB5) + | CV1($$e) = CATCH string("${encode("Exception")}") + |BB5: follow lines=[18-18] + | ; to=(BB1) + | JMP BB1 + |BB6: target exit lines=[19-21] + | FE_FREE V3 + | EXT_STMT + | RETURN null + |EXCEPTION TABLE: + | BB3, BB4, -, - + |""".stripMargin + val Parsed.Success(result, length) = + parse(fullDump, getControlFlowBlock(_)) + length shouldBe fullDump.length + result.blocks.length shouldBe 7 + result.exceptionTable match { + case Some(_) => + case None => fail(message = "there should be an exception table block") + } + } + "be able to parse partial (longer) dump II" in { + val dump = + s"""PleskXTest\\Utility\\KeyLimitChecker::checkByType: ; (lines=69, args=3, vars=4, tmps=20) + | ; (before block pass) + | ; /home/simon/tmp/api-php-lib/tests/Utility/KeyLimitChecker.php:21-45 + |BB0: start lines=[0-9] + | ; to=(BB4, BB1) + | EXT_NOP + | CV0($$keyInfo) = RECV 1 + | CV1($$type) = RECV 2 + | CV2($$minimalRequirement) = RECV 3 + | EXT_STMT + | ASSIGN CV3($$field) null + | EXT_STMT + | NOP + | T5 = IS_EQUAL CV1($$type) string("${encode("limit_clients")}") + | JMPNZ T5 BB4 + |BB1: follow lines=[10-12] + | ; to=(BB7, BB2) + | NOP + | T5 = IS_EQUAL CV1($$type) string("${encode("limit_resellers")}") + | JMPNZ T5 BB7 + |BB2: follow lines=[13-15] + | ; to=(BB11, BB10) + | NOP + | T5 = IS_EQUAL CV1($$type) string("${encode("limit_domains")}") + | JMPZNZ T5 BB11 BB10 + |BB3: unreachable lines=[16-16] + | ; to=(BB11) + | JMP BB11 + |""".stripMargin + val Parsed.Success(result, length) = parse(dump, getControlFlowBlock(_)) + length shouldBe dump.length + result.blocks.length shouldBe 4 + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/EasyBase64.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/EasyBase64.scala new file mode 100644 index 0000000..5922ea2 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/EasyBase64.scala @@ -0,0 +1,13 @@ +package io.joern.bytecode.parser.php7 + +import java.nio.charset.StandardCharsets +import java.util.Base64 + + +object EasyBase64 { + + def encode(str: String): String = { + Base64.getEncoder.encodeToString(str.getBytes(StandardCharsets.UTF_8)) + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ExceptionTableBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ExceptionTableBlockTest.scala new file mode 100644 index 0000000..52071a1 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/ExceptionTableBlockTest.scala @@ -0,0 +1,59 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.ExceptionTableBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ExceptionTableBlockTest extends AnyWordSpec with Matchers { + + "parser getExceptionTableLine" should { + "be able to parse ' L1, L2, -, -'" in { + val line = " L1, L2, -, -" + val Parsed.Success(result, length) = parse(line, getExceptionTableLine(_)) + assert(length == line.length) + assert(result.content.head == "1") + assert(result.content(1) == "2") + assert(result.content(2) == "-") + assert(result.content(3) == "-") + } + "be able to parse ' BB1, BB2, -, -'" in { + val line = " BB1, BB2, -, -" + val Parsed.Success(result, length) = parse(line, getExceptionTableLine(_)) + assert(length == line.length) + assert(result.content.head == "1") + assert(result.content(1) == "2") + assert(result.content(2) == "-") + assert(result.content(3) == "-") + } + } + + "parser getExceptionTableBlock" should { + "be able to parse a 00XX exception table block" in { + val block = + """EXCEPTION TABLE: + L6, L4, -, - + L11, L33, -, - +""" + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_)) + assert(length == block.length) + assert(result.tableEntry.length == 2) + assert(result.tableEntry.head.content.head == "6") + assert(result.tableEntry.head.content(1) == "4") + assert(result.tableEntry(1).content(0) == "11") + assert(result.tableEntry(1).content(1) == "33") + } + + "be able to parse this example exception table block" in { + val block = + """EXCEPTION TABLE: + | L6, L17, -, - +""".stripMargin + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_)) + length shouldBe block.length + result.tableEntry.length shouldBe 1 + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/FileParserTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/FileParserTest.scala new file mode 100644 index 0000000..35023ec --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/FileParserTest.scala @@ -0,0 +1,131 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.constructs.MethodDefinitionPair +import io.joern.bytecode.parser.php7.EasyBase64.encode +import io.joern.config.CPGConfig +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.File +import scala.sys.process._ + +class FileParserTest extends AnyWordSpec with Matchers { + + val config: CPGConfig = CPGConfig.initializeConfig() + + "parser parseLastLine" should { + "be able to consume 'No syntax errors detected in garbage'" in { + val Parsed.Success(_, count) = + parse("No syntax errors detected in garbage", + FileParser7.parseLastLine(_)) + assert(count == "No syntax errors detected in garbage".length) + } + } + + "actualParse" should { + "detect a PHP Parser Error" in { + val fullDump : String = "PHP Parse error: syntax error, unexpected new T_NEW" + an [FileParser7.PHPParseError] should be thrownBy FileParser7.actualParse(fullDump, "file") + } + "detect a PHP Fatal Error" in { + val fullDump : String = "PHP Fatal error: Cannot declare class MyClass," + an [FileParser7.PHPFatalError] should be thrownBy FileParser7.actualParse(fullDump, "file") + } + "detect an internal error" in { + val fullDump : String = "$_main: ; (lines=6, args=0, vars=1, tmps=3)" + an [FileParser7.BytecodeParseError] should be thrownBy FileParser7.actualParse(fullDump, "file") + } + } + + "parser parseByteCodeDump" should { + "be able to parse single function single BB" in { + val fullDump = + s""" +$$_main: ; (lines=6, args=0, vars=1, tmps=3) + ; (before optimizer) + ; trivial-main.php:1-4 +L0 (2): INIT_FCALL 1 96 string("${encode("phpinfo")}") +L1 (2): T1 = CONCAT string("${encode("conca")}") CV0($$var) +L2 (2): T2 = CONCAT T1 string("${encode("tenation")}") +L3 (2): SEND_VAL T2 1 +L4 (2): DO_ICALL +L5 (4): RETURN int(1) + +$$_main: ; (lines=6, args=0, vars=1, tmps=3) + ; (before block pass) + ; trivial-main.php:1-4 +BB0: start exit lines=[0-5] + INIT_FCALL 1 96 string("${encode("phpinfo")}") + T1 = CONCAT string("${encode("conca")}") CV0($$var) + T2 = CONCAT T1 string("${encode("tenation")}") + SEND_VAL T2 1 + DO_ICALL + RETURN int(1) +""" + val Parsed.Success(results, count) = + parse(fullDump, FileParser7.parseByteCodeDump(_)) + assert(results.length == 1) + val MethodDefinitionPair(byteCode, controlFlow) = results.head + val result = (byteCode, controlFlow) + assert(count == fullDump.length) + assert(result._1.instructions.length == 6) + assert(result._2.blocks.length == 1) + assert(result._2.blocks.head.instructions.length == 6) + } + } + + "ByteCodeParser.parse" should { + "be able to directly deal with a php file" in { + val cwd = "pwd".!!.stripMargin.trim + val methodDefinitionPairs = + FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php"), config.php7.interpreter, config.php7.phpini) + assert(methodDefinitionPairs.length == 1) + assert(methodDefinitionPairs.head.byteCodeBlock.instructions.length == 6) + } + "be able to extract multiple methods from the same file if they exist" in { + val cwd = "pwd".!!.stripMargin.trim + val multipleMethodsPairs = + FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/twoFunctionsAndMain/main.php"), config.php7.interpreter, config.php7.phpini) + assert(multipleMethodsPairs.length == 3) + } + "be able to extract multiple BB in basicConditional project" in { + val cwd = "pwd".!!.stripMargin.trim + val multipleMethodPairs = FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/basicConditional/main.php"), config.php7.interpreter, config.php7.phpini) + multipleMethodPairs.length shouldBe 1 + multipleMethodPairs.head.controlFlowBlock.blocks.length shouldBe 4 + } + "be ebale to extract multiple defintion pairs in project" in { + val cwd = "pwd".!!.stripMargin.trim + val multipleMethodPairs = FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/singleClassProject/main.php"), config.php7.interpreter, config.php7.phpini) + multipleMethodPairs.length shouldBe 4 + } + "be able to deal with unicode" when { + "in identifier" in { + val cwd = "pwd".!!.stripMargin.trim + try { + FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/unicode/main.php"), config.php7.interpreter, config.php7.phpini) + } catch { + case x : Throwable => + fail(x.getMessage) + } + } + "in function name" in { + val cwd = "pwd".!!.stripMargin.trim + try { + val res = FileParser7.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/unicode/hiragana.php"), config.php7.interpreter, config.php7.phpini) + res.map(_.byteCodeBlock.name).toSet shouldBe Set("dlr_main", "rさ") + } catch { + case x: Throwable => + fail(x.getMessage) + } + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/HeaderBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/HeaderBlockTest.scala new file mode 100644 index 0000000..89c5c0d --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/HeaderBlockTest.scala @@ -0,0 +1,251 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.HeaderBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class HeaderBlockTest extends AnyWordSpec with Matchers { + + "parsing '$_main:\n ; (lines=42, args=43, vars=44, tmps=45)'" should { + "be parsable only considering name results" in { + val Parsed.Success(_, success) = + parse("$_main", parseHeaderBlockMethodIdentifier(_)) + assert(success == 6) //check that all 6 letters are consumed + } + "be parsable only considering meta variable results in" in { + val Parsed.Success(result, _) = + parse("(lines=42, args=43, vars=44, tmps=45)", + parseHeaderBlockMethodMetaBlock(_)) + assert(result._1 == 42) + assert(result._2 == 43) + assert(result._3 == 44) + assert(result._4 == 45) + } + "be completely parsable" in { + val Parsed.Success(result, _) = + parse("$_main: ; (lines=42, args=43, vars=44, tmps=45)", + getHeaderBLockMethodDefinitionLine(_)) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 43) + assert(result.vars == 44) + assert(result.tmps == 45) + } + } + + "parser getHeaderBlockMethodIdentifier" should { + "be able to parse {closure}" in { + val name = "{closure}" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "{closure}" + } + "be able to parse $_main" in { + val name = "$_main" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "$_main" + } + "be able to parse testFunction" in { + val name = "testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse namespace\\testFunction" in { + val name = "namespace\\testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("namespace") + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse weird\\namespace\\testFunction" in { + val name = "weird\\namespace\\testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("weird\\namespace") + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse className::testFunction" in { + val name = "className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse namespace\\className::testFunction" in { + val name = "namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse weird\\namespace\\className::testFunction" in { + val name = "weird\\namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("weird\\namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse long\\weird\\namespace\\className::testFunction" in { + val name = "long\\weird\\namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("long\\weird\\namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse \\strlen" in { + val name = "\\strlen" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "strlen" + } + } + + "parser getMetaLineParsing " should { + "be able to parse '; (before optimizer)'" in { + val Parsed.Success(result, _) = + parse("; (before optimizer)", getHeaderBlockMetaLineParsing(_)) + assert(result.metaInfo.length == 2) + assert(result.metaInfo.head == "before") + assert(result.metaInfo(1) == "optimizer") + } + } + + "parser getMetaLineFileInfo" should { + "be able to pare '; (main: |1-3)'" in { + val Parsed.Success(result, _) = + parse("; main:1-3", getHeaderBlockMetaLineFileInfo(_)) + assert(result.fileName == "main") + assert(result.lineStart == 1) + assert(result.lineEnd == 3) + } + } + + "parseHeaderBlockMetaLineFileInfo" should { + "be able to handle a path with a colon" in { + val line = "; /home/simon/tmp/bytecode-cpg/trivial:try-catch.php:1-23" + val Parsed.Success(result,length) = parse(line,getHeaderBlockMetaLineFileInfo(_)) + length shouldBe line.length + result.fileName shouldBe "/home/simon/tmp/bytecode-cpg/trivial:try-catch.php" + } + "be able to handle path without colon" in { + val line = " ; /tmp/php2cpg17511137408163283203/test.php:1-9" + val Parsed.Success(result,length) = parse(line,getHeaderBlockMetaLineFileInfo(_)) + length shouldBe line.length + result.fileName shouldBe "/tmp/php2cpg17511137408163283203/test.php" + result.lineStart shouldBe 1 + result.lineEnd shouldBe 9 + } + } + + "parser parseHeaderBlock" should { + "be able to parse a valid header block with colon in path" in { + val headerBlock = + """$_main: ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial:try-catch.php:1-23 +""".stripMargin + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._3.fileName shouldBe "/home/simon/tmp/bytecode-cpg/trivial:try-catch.php" + result._3.lineStart shouldBe 1 + result._3.lineEnd shouldBe 23 + } + "be able to parse a valid header block with class definition" in { + val headerBlock = + """Basic::__init__: ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe Some("basic") + } + "be able to parse a valid header block with namespace and class" in { + val headerBlock = + """namespace\Basic::__init__: ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe Some("basic") + result._1.namespace shouldBe Some("namespace") + } + "be able to parse a valid header block with namespace" in { + val headerBlock = + """namespace\__init__: ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe None + result._1.namespace shouldBe Some("namespace") + } + "be able to parse a valid header block with deep namespace" in { + val headerBlock = + """start\namespace\__init__: ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe None + result._1.namespace shouldBe Some("start\\namespace") + } + "be able to parse a valid head block with spaces in file name" in { + val headerBlock = + """$_main: ; (lines=3, args=0, vars=0, tmps=0) + | ; (before optimizer) + | ; /home/simon/tmp/cpgIssues/PoC/People Weird(@#.php:1-3 + |""".stripMargin + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "dlr_main" + result._3.fileName shouldBe "/home/simon/tmp/cpgIssues/PoC/People Weird(@#.php" + result._3.lineStart shouldBe 1 + result._3.lineEnd shouldBe 3 + } + "be able to parse valid header block from other unit test" in { + val headerBlock = + """$_main: ; (lines=3, args=0, vars=0, tmps=1) + | ; (before optimizer) + | ; /tmp/php2cpg17511137408163283203/test.php:1-9 + |""".stripMargin + val Parsed.Success(_,length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/InstructionsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/InstructionsTest.scala new file mode 100644 index 0000000..080546f --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/InstructionsTest.scala @@ -0,0 +1,1843 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php7.EasyBase64.encode +import io.joern.bytecode.parser.php7.Instructions._ +import io.joern.bytecode.parser.php7.instructions.ControlConstructs.{parseNumberDestinationPattern, parseStringDestinationPattern} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class InstructionsTest extends AnyWordSpec with Matchers { + + + + "parser getOperation" should { + "be able to parse NEW 1 string(\"Basic\")" in { + val operation = s"""NEW 1 string("${encode("Basic")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "NEW" + lhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "Basic" + case _ => fail(message = "rhs is not of type StringLiteral") + } + + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 2 string(\"some\\qualified\\name\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 2 string("${encode("some\\qualified\\name")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 2 + function shouldBe "some\\qualified\\name" + case _ => fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 1 string(\"PleskX\\Api\\get_class\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 1 string("${encode("PleskX\\Api\\get_class")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 1 + function shouldBe "pleskx\\api\\get_class" + case _ => fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + } + "be able to parse INIT_METHOD_CALL 1 CV0($var) string(\"test\")" in { + val operation = s"""INIT_METHOD_CALL 1 CV0($$var) string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_METHOD_CALL(paramCount, objectVar, method) => + paramCount shouldBe 1 + val Variable(name, tmp, ref) = objectVar + name shouldBe "var" + tmp shouldBe false + ref shouldBe false + method shouldBe StringLiteral("test") + case _ => fail(message = "result is not of type INIT_METHOD_CALL") + } + } + "be able to parse INIT_METHOD_CALL 1 THIS string(\"test\")" in { + val operation = s"""INIT_METHOD_CALL 1 THIS string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_METHOD_CALL(paramCount, objectVar, method) => + paramCount shouldBe 1 + val Variable(name, tmp, ref) = objectVar + name shouldBe "THIS" + tmp shouldBe false + ref shouldBe true + method shouldBe StringLiteral("test") + case _ => fail(message = "result is not of type INIT_METHOD_CALL") + } + } + "be able to parse INIT_STATIC_METHOD_CALL 3 string(\"test\") CONSTRUCTOR" in { + val operation = s"""INIT_STATIC_METHOD_CALL 3 string("${encode("test")}") CONSTRUCTOR""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op shouldBe INIT_STATIC_METHOD_CALL(3, None, None, Some(StringLiteral("test")), ByteCodeConstructor()) + } + "be able to parse INIT_FCALL 2 42 string(\"phpinfo\")" in { + val operation = s"""INIT_FCALL 2 42 string("${encode("phpinfo")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_FCALL(parameter, unknown, name) => + parameter shouldBe 2 + unknown shouldBe 42 + name match { + case StringLiteral(value) => value shouldBe "phpinfo" + case _ => fail(message = "the name is not of type StringLiteral") + } + + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse INIT_DYNAMIC_CALL 2 CV($x)" in { + val operation = "INIT_DYNAMIC_CALL 2 CV($x)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_DYNAMIC_CALL(paramCount, variable) => + paramCount shouldBe 2 + variable match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "the name is not of type StringLiteral") + } + + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse INIT_ARRAY 1 (packed) CV0($request) CV2($wrapped)" in { + val operation = "INIT_ARRAY 1 (packed) CV0($request) CV2($wrapped)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case QuadrupleValueOperation(code, first, second, third, fourth) => + code shouldBe "INIT_ARRAY" + first shouldBe IntegerLiteral(1) + second shouldBe ByteCodeKeyword("packed") + third shouldBe Variable("request", tmp = false) + fourth shouldBe Variable("wrapped", tmp = false) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INIT_FCALL_BY_NAME 2 string(\"phpinfo\")" in { + val operation = s"""INIT_FCALL_BY_NAME 2 string("${encode("phpinfo")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_FCALL_BY_NAME(paramCount, name) => + paramCount shouldBe 2 + name shouldBe "phpinfo" + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse SEND_VAR_EX CV($var) 2" in { + val operation = "SEND_VAR_EX CV($var) 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "SEND_VAR_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse SEND_VAL_EX CV($var) 2" in { + val operation = "SEND_VAL_EX CV($var) 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "SEND_VAL_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse CHECK_UNDEF_ARGS" in { + val operation = "CHECK_UNDEF_ARGS" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op shouldBe NoValueOperation("CHECK_UNDEF_ARGS") + } + + "be able to parse FETCH_CLASS_CONSTANT string(\"PleskX\\Api\\Client\") string(\"RESPONSE_FULL\")" in { + val operation = + s"""FETCH_CLASS_CONSTANT string("${encode("PleskX\\Api\\Client")}") string("${encode("RESPONSE_FULL")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_CLASS_CONSTANT" + lhs match { + case StringLiteral(value) => value shouldBe "PleskX\\Api\\Client" + case x => fail(s"unexpected value $x") + } + rhs match { + case StringLiteral(value) => value shouldBe "RESPONSE_FULL" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RECV 2" in { + val operation = "RECV 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "RECV" + value match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "the value is not of type IntegerLiteral") + } + case _ => + fail(message = "the result is not of type SingleValueOperation") + } + } + "be able to parse RECV_INIT 4 string(\"test\")" in { + val operation = s"""RECV_INIT 4 string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "RECV_INIT" + lhs match { + case IntegerLiteral(value) => value shouldBe 4 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "test" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse SEND_VAL string(\"string\") 1" in { + val operation = s"""SEND_VAL string("${encode("string")}") 1""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAL" + lhs match { + case StringLiteral(value) => value shouldBe "string" + case _ => fail(message = "lhs is not of type StringLiteral") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP L1" in { + val operation = "JMP L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "JMP" + value match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP BB1" in { + val operation = "JMP BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "JMP" + value match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPZ CV($var) L1" in { + val operation = "JMPZ CV($var) L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPZ CV($var) BB1" in { + val operation = "JMPZ CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ CV($var) L1" in { + val operation = "JMPNZ CV($var) L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ CV($var) BB1" in { + val operation = "JMPNZ CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ_EX CV($var) L1" in { + val operation = "JMPNZ_EX CV($var) L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ_EX CV($var) BB1" in { + val operation = "JMPNZ_EX CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP_NULL CV0($a) L4" in { + val operation = "JMP_NULL CV0($a) L4" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMP_NULL" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 4 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse FE_RESET_R V1 L1" in { + val operation = "FE_RESET_R V1 L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse FE_RESET_R V1 BB1" in { + val operation = "FE_RESET_R V1 BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse FE_FETCH_R V1 CV($array) L1" in { + val operation = "FE_FETCH_R V1 CV($var) L1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_R" + first match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "first value is not of type Variable") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second value is not of type Variable") + } + third match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => + fail(message = "third value is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse FE_FETCH_R V1 CV($array) BB1" in { + val operation = "FE_FETCH_R V1 CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_R" + first match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "first value is not of type Variable") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second value is not of type Variable") + } + third match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => + fail(message = "third value is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse FETCH_DIM_R V1 string(\"a\")" in { + val operation = s"""FETCH_DIM_R V1 string("${encode("a")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_DIM_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_DIM CV($array) NEXT" in { + val operation = "ASSIGN_DIM CV($array) NEXT" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_DIM" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "array" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case ByteCodePlaceIndicator(value) => value shouldBe "NEXT" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_DIM CV($array) int(1)" in { + val operation = "ASSIGN_DIM CV($array) int(1)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_DIM" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "array" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN CV($x) int(42)" in { + val operation = "ASSIGN CV($x) int(42)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 42 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_OBJ_OP (CONCAT) V11 int(42)" in { + val operation = "ASSIGN_OBJ_OP (CONCAT) V11 int(42)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, _) => + code shouldBe "ASSIGN_OBJ_OP" + first shouldBe AssignOpLiteral("CONCAT") + second shouldBe Variable("V11", tmp = true, reference = true) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ROPE_INIT 3 string(\"a\")" in { + val operation = s"""ROPE_INIT 3 string("${encode("a")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ROPE_INIT" + lhs match { + case IntegerLiteral(value) => value shouldBe 3 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "the result is not of type DualValueOperation") + } + } + "be able to parse ROPE_ADD 3 CV($a) T1" in { + val operation = "ROPE_ADD 3 CV($a) T1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ROPE_ADD" + first match { + case IntegerLiteral(value) => value shouldBe 3 + case _ => fail(message = "first is not of type IntegerLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "third is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse ROPE_END 2 T1 T2" in { + val operation = "ROPE_END 2 T1 T2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ROPE_END" + first match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "first is not of type IntegerLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T2" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse ASSIGN_OP (ADD) CV($a) CV($b)" in { + val operation = "ASSIGN_OP (ADD) CV($a) CV($b)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OP" + first match { + case AssignOpLiteral(value) => value shouldBe "ADD" + case _ => fail(message = "first is not of type StringLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "b" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse all ASSIGN_OP OPs" in { + def variant(x: String): Unit = { + val Parsed.Success(_, length) = parse(x, getOperation(_)) + length shouldBe x.length + } + variant("ASSIGN_OP (SUB) CV0($a) int(2)") + variant("ASSIGN_OP (MUL) CV0($a) int(2)") + variant("ASSIGN_OP (ADD) CV0($a) int(2)") + variant("ASSIGN_OP (DIV) CV0($a) int(2)") + variant("ASSIGN_OP (MOD) CV0($a) int(2)") + variant("ASSIGN_OP (POW) CV0($a) int(2)") + variant("ASSIGN_OP (ADD) CV0($a) int(2)") + variant("ASSIGN_OP (BW_AND) CV0($a) int(2)") + variant("ASSIGN_OP (BW_OR) CV0($a) int(2)") + variant("ASSIGN_OP (BW_XOR) CV0($a) int(2)") + variant("ASSIGN_OP (SL) CV0($a) int(2)") + variant("ASSIGN_OP (SR) CV0($a) int(2)") + } + + "be able to parse all ASSIGN_DIM_OP OPs" in { + def variant(x: String): Unit = { + val Parsed.Success(_, length) = parse(x, getOperation(_)) + length shouldBe x.length + } + variant("ASSIGN_DIM_OP (SUB) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (MUL) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (ADD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (DIV) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (MOD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (POW) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (ADD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_AND) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_OR) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_XOR) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (SL) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (SR) CV0($a) int(0)") + } + // below is the representative test for no value operations + "be able to parse DO_ICALL" in { + val operation = "DO_ICALL" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case NoValueOperation(code) => code shouldBe "DO_ICALL" + case _ => fail(message = "the result is not of type NoValueOperation") + } + } + // below is the representative test for single value operations + "be able to parse ECHO int(32)" in { + val operation = "ECHO int(32)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "ECHO" + value match { + case IntegerLiteral(value) => value shouldBe 32 + case _ => fail(message = "value is not of type IntegerLiteral") + } + case x => fail(s"unexpected operation $x") + } + } + // below is the representative test for dual value operations + "be able to parse CONCAT string(\"a\") string(\"b\")" in { + val operation = s"""CONCAT string("${encode("a")}") string("${encode("b")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs is not of type StringLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "b" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse CONCAT string(\"a\") T1" in { + val operation = s"""CONCAT string("${encode("a")}") T1""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs was not of type StringLiteral") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "rhs was not of type Variable") + } + case _ => + fail(message = "the result was not of type DualValueOperation") + } + } + "be able to parse CONCAT string(\"a\") CV($var)" in { + val operation = s"""CONCAT string("${encode("a")}") CV($$var)""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs was not of type StringLiteral") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type Variable") + } + case _ => fail(message = "lhs was not of type DualValueOperation") + } + } + "be able to parse CONCAT T1 T2" in { + val operation = "CONCAT T1 T2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "T2" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + case _ => fail(message = "result was not of type DualValueOperation") + } + } + "be able to parse CONCAT CV($var1) CV($var2)" in { + val operation = "CONCAT CV($var1) CV($var2)" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var1" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "var2" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + case _ => fail(message = "result was not of type DualValueOperation") + } + } + "be able to parse FETCH_STATIC_PROP_FUNC_ARG string(\"test\") (static) (exception)" in { + val operation = + s"""FETCH_STATIC_PROP_FUNC_ARG string("${encode("test")}") (static) (exception)""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_STATIC_PROP_FUNC_ARG" + first match { + case StringLiteral(value) => value shouldBe "test" + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "static" + case x => fail(s"unexpected value $x") + } + third match { + case ByteCodeKeyword(value) => value shouldBe "exception" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse INIT_ARRAY 3 (packed) T19 NEXT" in { + val operation = "INIT_ARRAY 3 (packed) T19 NEXT" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case QuadrupleValueOperation(code, first, second, third, fourth) => + code shouldBe "INIT_ARRAY" + first match { + case IntegerLiteral(value) => value shouldBe 3 + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "packed" + case x => fail(s"unexpected value $x") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T19" + tmp shouldBe true + reference shouldBe false + case x => fail(s"unexpected value $x") + } + fourth match { + case ByteCodePlaceIndicator(value) => value shouldBe "NEXT" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse FETCH_STATIC_PROP_R string(\"test\") (self) (exception)" in { + val operation = s"""FETCH_STATIC_PROP_R string("${encode("test")}") (self) (exception)""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_STATIC_PROP_R" + first match { + case StringLiteral(value) => value shouldBe "test" + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "self" + case x => fail(s"unexpected value $x") + } + third match { + case ByteCodeKeyword(value) => value shouldBe "exception" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse FETCH_OBJ_R THIS string(\"test\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("test")}")""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_R" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("test") + case x => fail(s"unexpected op $x") + } + } + + "be able to parse INSTANCEOF CV0($instance) (static) (no-autolod) (exception)" in { + val op = "INSTANCEOF CV0($instance) (static) (no-autolod) (exception)" + val Parsed.Success(_, length) = parse(op, getOperation(_)) + length shouldBe op.length + } + + "be able to parse FAST_RET T10 try-catch(0)" in { + val op = "FAST_RET T10 try-catch(0)" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe DualValueOperation("FAST_RET", Variable("T10", tmp = true), TryCatchLiteral(0)) + } + } + + "parser getInstruction" should { + "be able to correctly parse OP_DATA int(42)" in { + val instruction = "OP_DATA int(42)" + val Parsed.Success(result, length) = + parse(input = instruction, getInstruction(_)) + length shouldBe instruction.length + result match { + case Operation(op) => + op match { + case SingleValueOperation(name, value) => + name shouldBe "OP_DATA" + value match { + case IntegerLiteral(value) => + value shouldBe 42 + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse V1 = NEW 1 string(\"Basic\")" in { + val instruction = s"""V1 = NEW 1 string("${encode("Basic")}")""" + val Parsed.Success(result, length) = + parse(input = instruction, getInstruction(_)) + length shouldBe instruction.length + result match { + case Assignment(lhs, rhs) => + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + } + rhs match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "NEW" + lhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "Basic" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case x => fail(s"unexpected operation $x") + } + case _ => fail(message = "result is not of type Assignment") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 1 string(\"PleskX\\Api\\get_class\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 1 string("${encode("PleskX\\Api\\get_class")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 1 + function shouldBe "pleskx\\api\\get_class" + case _ => + fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INCLUDE_OR_EVAL (require_once) string(\"../class.phpmailer.php\")" in { + val operation = + s"""INCLUDE_OR_EVAL (require_once) string("${encode("../class.phpmailer.php")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "INCLUDE_OR_EVAL" + lhs shouldBe ByteCodeKeyword("require_once") + rhs shouldBe StringLiteral("../class.phpmailer.php") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_R THIS string(\"Mail\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("Mail")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_R" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("Mail") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": L13" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": L13""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": BB3" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": BB3""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 3)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": L13, \"bcc\": L14" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": L13, "${encode("bcc")}": L14""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13), ("bcc", 14)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING string(\"cl\") \"cgi\": L51, \"cgi-fcgi\": L51, default: L69" in { + val operation = s"""SWITCH_STRING string("${encode("cl")}") "${encode("cgi")}": L51, "${encode("cgi-fcgi")}": L51, default: L69""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, value, switches) => + code shouldBe "SWITCH_STRING" + value shouldBe StringLiteral("cl") + switches shouldBe Seq(("cgi",51),("cgi-fcgi",51),("default",69)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": 0013, \"bcc\": L14, default: L16" in { + val operation = + s"""SWITCH_STRING CV2($$sType) "${encode("to")}": L13, "${encode("bcc")}": L14, default: L16""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13), ("bcc", 14), ("default", 16)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_LONG CV($test) 33: L13, 44: L14, default: L23" in { + val operation = "SWITCH_LONG CV($test) 33: L13, 44: L14, default: L23" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_LONG" + variable shouldBe Variable("test", tmp = false) + switches shouldBe Seq(("33", 13), ("44", 14), ("default", 23)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF (value) bool(false)" in { + val operation = "RETURN_BY_REF (value) bool(false)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op : Operation => + op.op shouldBe DualValueOperation("RETURN_BY_REF",ByteCodeKeyword("value"),BooleanLiteral(false)) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF null" in { + val operation = "RETURN_BY_REF null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op : Operation => + op.op shouldBe SingleValueOperation("RETURN_BY_REF",Null()) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ECHO of multi line string" in { + val encoded = encode(""" + | + |Joomla! derives from copyrighted works licensed under the GNU General + |Public License. This version has been modified pursuant to the + |GNU General Public License as of September 15, 2005, and as distributed, + |it includes or is derivative of works licensed under the GNU General + |Public License or other free or open source software licenses. Please + |see the CREDITS.php for a non-exhaustive list of contributors and + |copyright holders. A full text version of the GNU GPL version 2 can be + |found in the LICENSE.php file. A full text version of the other licenses + |that Joomla! is derivative of or includes can be found in LICENSES.php. + | + |""".stripMargin) + val operation = + s"""ECHO string("$encoded")""".stripMargin + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "ECHO" + value shouldBe StringLiteral( + """ + | + |Joomla! derives from copyrighted works licensed under the GNU General + |Public License. This version has been modified pursuant to the + |GNU General Public License as of September 15, 2005, and as distributed, + |it includes or is derivative of works licensed under the GNU General + |Public License or other free or open source software licenses. Please + |see the CREDITS.php for a non-exhaustive list of contributors and + |copyright holders. A full text version of the GNU GPL version 2 can be + |found in the LICENSE.php file. A full text version of the other licenses + |that Joomla! is derivative of or includes can be found in LICENSES.php. + | + |""".stripMargin) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_IS CV1($display) string(\"panels\")" in { + val operation = s"""FETCH_OBJ_IS CV1($$display) string("${encode("panels")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_IS" + lhs shouldBe Variable("display", tmp = false) + rhs shouldBe StringLiteral("panels") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse IN_ARRAY 0 CV0($type) array(...)" in { + val operation = "IN_ARRAY 0 CV0($type) array(...)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "IN_ARRAY" + first shouldBe IntegerLiteral(0) + second shouldBe Variable("type", tmp = false) + third shouldBe ArrayValue(None) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INIT_ARRAY 2 (packed) (ref) V11 NEXT" in { + val operation = "INIT_ARRAY 2 (packed) (ref) V11 NEXT" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case QuintupleValueOperation(code, + first, + second, + third, + fourth, + fifth) => + code shouldBe "INIT_ARRAY" + first shouldBe IntegerLiteral(2) + second shouldBe ByteCodeKeyword("packed") + third shouldBe ByteCodeKeyword("ref") + fourth shouldBe Variable("V11", tmp = true, reference = true) + fifth shouldBe ByteCodePlaceIndicator("NEXT") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SEND_VAR_NO_REF_EX V30 1" in { + val operation = "SEND_VAR_NO_REF_EX V30 1" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAR_NO_REF_EX" + lhs shouldBe Variable("V30", tmp = true, reference = true) + rhs shouldBe IntegerLiteral(1) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse CATCH string(\"Exception\")" in { + val operation = s"""CATCH string("${encode("Exception")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "CATCH" + value shouldBe StringLiteral("Exception") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse CATCH string(\"Exception\") L66" in { + val operation = s"""CATCH string("${encode("Exception")}") L66""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "CATCH" + lhs shouldBe StringLiteral("Exception") + rhs shouldBe IntegerLiteral(66) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ THIS string(\"auth_username\")" in { + val operation = s"""ASSIGN_OBJ THIS string("${encode("auth_username")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_OBJ" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("auth_username") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ_OP (CONCAT) THIS string(\"response\")" in { + val operation = s"""ASSIGN_OBJ_OP (CONCAT) THIS string("${encode("response")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OBJ_OP" + first shouldBe AssignOpLiteral("CONCAT") + second shouldBe ByteCodePlaceIndicator("THIS") + third shouldBe StringLiteral("response") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FE_FETCH_RW V6 CV2($frameLine) L29" in { + val operation = "FE_FETCH_RW V6 CV2($frameLine) L29" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_RW" + first shouldBe Variable("V6", tmp = true, reference = true) + second shouldBe Variable("frameLine", tmp = false) + third shouldBe IntegerLiteral(29) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SEND_VAL string(\"\") 2" in { + val operation = "SEND_VAL string(\"\") 2" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAL" + lhs shouldBe StringLiteral("") + rhs shouldBe IntegerLiteral(2) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_REF (function) CV0($backref_view) V2" in { + val operation = "ASSIGN_REF (function) CV0($backref_view) V2" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_REF_3" + first shouldBe ByteCodeKeyword("function") + second shouldBe Variable("backref_view", tmp = false) + third shouldBe Variable("V2", tmp = true, reference = true) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN null" in { + val operation = "RETURN null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "RETURN" + value shouldBe Null() + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF (function) null" in { + val operation = "RETURN_BY_REF (function) null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "RETURN_BY_REF" + lhs shouldBe ByteCodeKeyword("function") + rhs shouldBe Null() + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ADD_ARRAY_ELEMENT array(...) string(\"libraries\")" in { + val operation = s"""ADD_ARRAY_ELEMENT array(...) string("${encode("libraries")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ADD_ARRAY_ELEMENT" + lhs shouldBe ArrayValue(None) + rhs shouldBe StringLiteral("libraries") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FE_RESET_R array(...) L267" in { + val operation = "FE_RESET_R array(...) L267" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs shouldBe ArrayValue(None) + rhs shouldBe IntegerLiteral(267) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ_REF (function) CV6($cache) string(\"display\")" in { + val operation = + s"""ASSIGN_OBJ_REF (function) CV6($$cache) string("${encode("display")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OBJ_REF_3" + first shouldBe ByteCodeKeyword("function") + second shouldBe Variable("cache", tmp = false) + third shouldBe StringLiteral("display") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_W CV0($account) string(\"content\")" in { + val operation = s"""FETCH_OBJ_W CV0($$account) string("${encode("content")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_W_2" + lhs shouldBe Variable("account", tmp = false) + rhs shouldBe StringLiteral("content") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be bale to parse FETCH_OBJ_W (ref) V3 string(\"content\")" in { + val operation = s"""FETCH_OBJ_W (ref) V3 string("${encode("content")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_OBJ_W_3" + first shouldBe ByteCodeKeyword("ref") + second shouldBe Variable("V3", tmp = true, reference = true) + third shouldBe StringLiteral("content") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_STATIC_PROP string(\"leeway\") string(\"Firebase\\JWT\\JWT\")" in { + val operation = + s"""ASSIGN_STATIC_PROP string("${encode("leeway")}") string("${encode("Firebase\\JWT\\JWT")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_STATIC_PROP_2" + lhs shouldBe StringLiteral("leeway") + rhs shouldBe StringLiteral("Firebase\\JWT\\JWT") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse TICKS 1" in { + val operation = "TICKS 1" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation(SingleValueOperation("TICKS", IntegerLiteral(1))) + } + "be able to parse ADD_ARRAY_ELEMENT float(inf) string(\"depth_min_leaf\")" in { + val operation = s"""ADD_ARRAY_ELEMENT float(inf) string("${encode("depth_min_leaf")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ADD_ARRAY_ELEMENT", FloatLiteral(Float.PositiveInfinity), StringLiteral("depth_min_leaf"))) + } + "be able to parse ASSIGN_STATIC_PROP_OP (CONCAT) string(\"extraHeaderHTML\")" in { + val operation = s"""ASSIGN_STATIC_PROP_OP (CONCAT) string("${encode("extraHeaderHTML")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ASSIGN_STATIC_PROP_OP", AssignOpLiteral("CONCAT"), StringLiteral("extraHeaderHTML")) + ) + } + "be able to parse FUNC_GET_ARGS" in { + val operation = "FUNC_GET_ARGS" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + NoValueOperation("FUNC_GET_ARGS") + ) + } + "be able to parse FUNC_GET_ARGS int(1)" in { + val operation = "FUNC_GET_ARGS int(1)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("FUNC_GET_ARGS", IntegerLiteral(1)) + ) + } + "be able to parse POST_INC_STATIC_PROP string(\"writes\")" in { + val operation = s"""POST_INC_STATIC_PROP string("${encode("writes")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("POST_INC_STATIC_PROP", StringLiteral("writes")) + ) + } + "be able to parse INIT_FCALL 1 96 string(\"mb_strlen\")" in { + val operation = s"""INIT_FCALL 1 96 string("${encode("mb_strlen")}")" + otherStuff""" + val Parsed.Success(result, _) = parse(operation, getInstruction(_)) + result shouldBe Operation( + INIT_FCALL(1, 96, StringLiteral("mb_strlen")) + ) + } + "be able to parse SEND_USER string(\"spc\") 3" in { + val operation = s"""SEND_USER string("${encode("spc")}") 3""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("SEND_USER", StringLiteral("spc"), IntegerLiteral(3)) + ) + } + "be able to parse PRE_INC_STATIC_PROP string(\"value\") string(\"other\")" in { + val operation = s"""PRE_INC_STATIC_PROP string("${encode("value")}") string("${encode("other")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("PRE_INC_STATIC_PROP", StringLiteral("value"), StringLiteral("other")) + ) + } + "be able to parse FETCH_STATIC_PROP_IS string(\"documents\") string(\"phpQuery\")" in { + val operation = s"""FETCH_STATIC_PROP_IS string("${encode("documents")}") string("${encode("phpQuery")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("FETCH_STATIC_PROP_IS", StringLiteral("documents"), StringLiteral("phpQuery")) + ) + } + "be able to parse DECLARE_ANON_CLASS string(\"class@anonymous\") string(\"vanilla\\models\\model\")" in { + val operation = s"""DECLARE_ANON_CLASS string("${encode("class@anonymous")}") string("${encode("vanilla\\models\\model")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("DECLARE_ANON_CLASS", StringLiteral("class@anonymous"), StringLiteral("vanilla\\models\\model")) + ) + } + "be able to process weird string stuff" in { + val operation = s"""ADD_ARRAY_ELEMENT string("${encode("$G")}") string("${encode("^G")}")""" + val Parsed.Success(result,length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ADD_ARRAY_ELEMENT", StringLiteral("$G"), StringLiteral("^G")) + ) + } + "be able to parse FETCH_OBJ_R THIS string(\"b\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("b")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("FETCH_OBJ_R", ByteCodePlaceIndicator("THIS"), StringLiteral("b")) + ) + } + "be able to parse PRE_DEC_STATIC_PROP string(\"active\")" in { + val operation = s"""PRE_DEC_STATIC_PROP string("${encode("active")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("PRE_DEC_STATIC_PROP", StringLiteral("active")) + ) + } + "be able to parse FETCH_GLOBALS" in { + val op = "FETCH_GLOBALS" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("FETCH_GLOBALS") + ) + } + "be able to parse VERIFY_NEVER_TYPE" in { + val op = "VERIFY_NEVER_TYPE" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("VERIFY_NEVER_TYPE") + ) + } + "be able to parse CALLABLE_CONVERT" in { + val op = "CALLABLE_CONVERT" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("CALLABLE_CONVERT") + ) + } + "be able to parse CASE_STRICT T1 int(2)" in { + val op = "CASE_STRICT T1 int(2)" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(DualValueOperation("CASE_STRICT", Variable("T1", tmp= true), IntegerLiteral(2))) + } + "be able to parse DECLARE_LAMBDA_FUNCTION 0" in{ + val op = "DECLARE_LAMBDA_FUNCTION int(0)" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(SingleValueOperation("DECLARE_LAMBDA_FUNCTION", IntegerLiteral(0))) + } + "be able to parse TYPE_CHECK bool(false) V2" in { + val op = "TYPE_CHECK bool(false) V2" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(DualValueOperation("TYPE_CHECK", BooleanLiteral(false), Variable("V2", tmp = true, reference = true))) + } + "be able to parse CHECK_VAR" in { + val string = "CHECK_VAR CV0($undef)" + val Parsed.Success(result, length) = parse(string, getOperation(_)) + length shouldBe string.length + result.op match { + case SingleValueOperation(code, value) => code shouldBe "CHECK_VAR" + value shouldBe Variable("undef", tmp= false) + case _ => fail() + } + } + } + + "be able to parse named parameters" should { + "be able to parse SEND_VAL[_X] bool(false) string(\"double_encode\")" in { + // I've only seen SEND_VAL in the wild so far, the rest is extrapolated. /Malte + for (x <- Seq("SEND_VAL", "SEND_VAL_EX", "SEND_USER", "SEND_REF")) { + val operation = x + s""" bool(false) string("${encode("double_encode")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_), verboseFailures = true) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe x + lhs shouldBe BooleanLiteral(false) + rhs shouldBe StringLiteral("double_encode") + case x => fail(s"unexpected operation $x") + } + case x => fail(s"unexpected operation $x") + } + }} + "be able to parse SEND_VAR[_X] bool(false) string(\"double_encode\")" in { + for (x <- Seq("SEND_VAR", "SEND_VAR_EX", "SEND_VAR_NO_REF_EX", "SEND_VAR_NO_REF", "SEND_FUNC_ARG")) { + val operation = x + s""" CV($$a) string("${encode("double_encode")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_), verboseFailures = true) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe x + lhs shouldBe Variable("a", tmp = false) + rhs shouldBe StringLiteral("double_encode") + case x => fail(s"unexpected operation $x") + } + case x => fail(s"unexpected operation $x") + } + } + } + } + + "be able to parse COPY_TMP T2" in { + val operation = "COPY_TMP T2" + val Parsed.Success(result, length) = parse(operation, parseOperation(_), verboseFailures = true) + length shouldBe operation.length + result match { + case SingleValueOperation(code, value) => + code shouldBe "COPY_TMP" + value shouldBe Variable("T2", tmp = true) + } + } + + "parseDestinationPattern" should { + "parse \"to\": L13" in { + val string = s""""${encode("to")}": L13""" + val Parsed.Success(result, length) = + parse(string, parseStringDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("to", "13")) + } + "parse default: L13" in { + val string = "default: L13" + val Parsed.Success(result, length) = + parse(string, parseStringDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("default", "13")) + } + "parse 44: L13" in { + val string = "44: L13" + val Parsed.Success(result, length) = + parse(string, parseNumberDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("44", "13")) + } + "parse default: L16" in { + val string = "default: L16" + val Parsed.Success(result, length) = + parse(string, parseNumberDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("default", "16")) + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiteralsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiteralsTest.scala new file mode 100644 index 0000000..1f268d8 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiteralsTest.scala @@ -0,0 +1,189 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.constructs.{ByteCodeKeyword, FloatLiteral} +import io.joern.bytecode.parser.php7.EasyBase64.encode +import io.joern.bytecode.parser.php7.Literals._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LiteralsTest extends AnyWordSpec with Matchers { + + "parser parseStringLiteral" should { + "be able to parse string(\"value\")" in { + val Parsed.Success(result, _) = + parse(s"""string("${encode("value")}")""", getStringLiteral(_)) + assert(result.value == "value") + } + "be able to parse a string(\"value\") 42" in { + val Parsed.Success(result, _) = parse(s"""string("${encode("value")}") 42""", getStringLiteral(_)) + result.value shouldBe "value" + } + "be able to parse string(\"a\")" in { + val Parsed.Success(result, _) = + parse(s"""string("${encode("a")}")""", getStringLiteral(_)) + assert(result.value == "a") + } + "be able to parse multi line strings without quotations" in { + val multiLineString = + """I am a + a multi line string without + quotation that should be parsable""" + val stringLiteral = "string(\"" + encode(multiLineString) + "\")" + val Parsed.Success(result, length) = + parse(stringLiteral, getStringLiteral(_)) + assert(length == stringLiteral.length) + assert(result.value == multiLineString) + } + "be able to parse multi line strings with escaped quotations" in { + val multiLineString = + "I am a \n multi line string with \\\" \n that should be parsable" + val stringLiteral = "string(\"" + encode(multiLineString) + "\")" + val Parsed.Success(result, length) = + parse(stringLiteral, getStringLiteral(_)) + assert(length == stringLiteral.length) + assert(result.value == multiLineString) + } + "be able to parse valid qualifed namespace path" in { + val string = s"""string("${encode("PleskX\\Api\\strtolower")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "PleskX\\Api\\strtolower" + } + "be able to parse string with underscore" in { + val string = s"""string("${encode("some_text")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "some_text" + } + "be able to parse string with leading underscore" in { + val string = s"""string("${encode("_leading")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "_leading" + } + "be able to parse string with numbers" in { + val string = s"""string("${encode("Copyright3_6_56")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "Copyright3_6_56" + } + "be bale to parse string that is empty" in { + val string = s"""string("${encode("")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "" + } + "be able to parse string string(\"/[\\s]+/\")" in { + val string = s"""string("${encode("/[\\s]+/")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "/[\\s]+/" + } + "be able to parse string string(\"Firebase\\JWT\\JWT\")" in { + val string = s"""string("${encode("Firebase\\JWT\\JWT")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "Firebase\\JWT\\JWT" + } + "be able to parse escaped backslash" in { + val string = s"""string("${encode("\\\\")}")""" + val value = "\\\\" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe value + } + } + + "parser parseIntLiteral" should { + "be able to parse int(42)" in { + val Parsed.Success(result, _) = parse("int(42)", getIntLiteral(_)) + assert(result.value == 42) + } + "be able to parse int(-42)" in { + val Parsed.Success(result, _) = parse("int(-42)", getIntLiteral(_)) + result.value shouldBe -42 + } + "be bale to parse int(0)" in { + val Parsed.Success(result, _) = parse("int(0)", getIntLiteral(_)) + result.value shouldBe 0 + } + } + + "parser parseFloatLiteral" should { + "be able to parse float(1.2)" in { + val Parsed.Success(result, _) = parse("float(1.2)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1.2".toFloat) + } + "be bale to parse float(1)" in { + val Parsed.Success(result, _) = parse("float(1)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1".toFloat) + } + "be able to parse float(-1)" in { + val Parsed.Success(result, _) = parse("float(-1)", getFloatLiteral(_)) + result shouldBe FloatLiteral("-1".toFloat) + } + "be able to parse float(-1.22)" in { + val Parsed.Success(result, _) = parse("float(-1.22)", getFloatLiteral(_)) + result shouldBe FloatLiteral("-1.22".toFloat) + } + "be able to parse float(1e-06)" in { + val Parsed.Success(result, _) = parse("float(1e-06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1e-06".toFloat) + } + "be able to parse float(1e+06)" in { + val Parsed.Success(result, _) = parse("float(1e+06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1e+06".toFloat) + } + "be able to parse float(6.367e+06)" in { + val Parsed.Success(result, _) = parse("float(6.367e+06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("6.367e+06".toFloat) + } + "be able to process float(-inf)" in { + val Parsed.Success(result, _) = parse("float(-inf)", getFloatLiteral(_)) + result shouldBe FloatLiteral(Float.NegativeInfinity) + } + } + + "(require_once)" should { + "be able to parsed by getByteCodeKeyword" in { + val string = "(require_once)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require_once" + } + } + } + + "(require)" should { + "be able to parsed by getByteCodeKeyword" in { + val string = "(require)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require" + } + } + } + + "getByteCodeKeyword" should { + "be able to parse (require_once)" in { + val string = "(require_once)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require_once" + } + } + "be able to parse (require)" in { + val string = "(require)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require" + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiveRangesBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiveRangesBlockTest.scala new file mode 100644 index 0000000..d002581 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/LiveRangesBlockTest.scala @@ -0,0 +1,34 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.LiveRangesBlock.getLiveRangesBlock +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LiveRangesBlockTest extends AnyWordSpec with Matchers { + + "parser getLiveRangesBlock" should { + "be able to parse a full live ranges block" in { + val block = + """LIVE RANGES: + 4: L2 - L5 (loop) +""" + val Parsed.Success(result, length) = parse(block, getLiveRangesBlock(_)) + length shouldBe block.length + result.rangesEntry.length shouldBe 1 + result.rangesEntry.head.varNum shouldBe 4 + result.rangesEntry.head.start shouldBe 2 + result.rangesEntry.head.end shouldBe 5 + result.rangesEntry.head.rangeType shouldBe "loop" + } + "be able to parse valid block" in { + val block = + """LIVE RANGES: + | 3: L4 - L19 (loop) + |""".stripMargin + val Parsed.Success(_, count) = parse(block, getLiveRangesBlock(_)) + count shouldBe block.length + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/MethodDefinitionTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/MethodDefinitionTest.scala new file mode 100644 index 0000000..1e68eba --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/MethodDefinitionTest.scala @@ -0,0 +1,46 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.EasyBase64.encode +import io.joern.bytecode.parser.php7.MethodDefinition.getFullMethodDefinitionBlock +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class MethodDefinitionTest extends AnyWordSpec with Matchers { + + "parser getFullMethodDefintionBlock" should { + "be able to parse a single MethodDefintionBlock fully" in { + val block = s"""$$_main: ; (lines=8, args=0, vars=1, tmps=4) + | ; (before optimizer) + | ; /home/simon/tmp/bytecode-cpg/basicOOP.php:1-23 + |L0 (20): V1 = NEW 1 string("${encode("Basic")}") + |L1 (20): SEND_VAL_EX string("${encode("value")}") 1 + |L2 (20): DO_FCALL + |L3 (20): ASSIGN CV0($$var) V1 + |L4 (21): INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") + |L5 (21): SEND_VAL_EX string("${encode("other")}") 1 + |L6 (21): DO_FCALL + |L7 (23): RETURN int(1) + |LIVE RANGES: + | 1: L1 - L3 (new) + | + |$$_main: ; (lines=8, args=0, vars=1, tmps=4) + | ; (before block pass) + | ; /home/simon/tmp/bytecode-cpg/basicOOP.php:1-23 + |BB0: start exit lines=[0-7] + | V1 = NEW 1 string("${encode("Basic")}") + | SEND_VAL_EX string("${encode("value")}") 1 + | DO_FCALL + | ASSIGN CV0($$var) V1 + | INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") + | SEND_VAL_EX string("${encode("other")}") 1 + | DO_FCALL + | RETURN int(1) + |""".stripMargin + val Parsed.Success(_, length) = + parse(block, getFullMethodDefinitionBlock(_)) + length shouldBe block.length + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/VariablesTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/VariablesTest.scala new file mode 100644 index 0000000..a6540fc --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php7/VariablesTest.scala @@ -0,0 +1,27 @@ +package io.joern.bytecode.parser.php7 + +import fastparse._ +import io.joern.bytecode.parser.php7.Variables._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class VariablesTest extends AnyWordSpec with Matchers { + + "parser getVariable" should { + "be able to parse 'CV($variable_name)'" in { + val Parsed.Success(result, _) = + parse("CV($variable_name)", getVariable(_)) + assert(result.name == "variable_name") + assert(result.tmp == false) + } + } + + "parser getTemporary" should { + "should be able to parse 'T1'" in { + val Parsed.Success(result, _) = parse("T1", getTemporary(_)) + assert(result.name == "T1") + assert(result.tmp == true) + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/BasicsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/BasicsTest.scala new file mode 100644 index 0000000..e80f028 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/BasicsTest.scala @@ -0,0 +1,70 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.php8.Basics._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class BasicsTest extends AnyWordSpec with Matchers { + + "parser escapedSlash" should { + "be able to parse \\" in { + val str = "\\" + val Parsed.Success(_, length) = parse(str, escapedSlash(_)) + length shouldBe str.length + } + } + + "parser legalString" should { + "be able to parse single line string \"test\"" in { + val singleLineString = "\"test\"" + val Parsed.Success(_, length) = parse(singleLineString, legalString(_)) + length shouldBe singleLineString.length + } + "be able to parse multi line string" in { + val multiLineString = + """"this is + a multi line + string"""" + val Parsed.Success(_, length) = parse(multiLineString, legalString(_)) + length shouldBe multiLineString.length + } + "be able to parse a bunch of spaces" in { + val bunchOfSpaces = "\" \"" + val Parsed.Success(_, length) = parse(bunchOfSpaces, legalString(_)) + length shouldBe bunchOfSpaces.length + } + "be able to parse multi line string with \"" in { + val multiLineString = + "\"this is \n a multi line string with \\\" \n in the middle\"" + val Parsed.Success(_, length) = parse(multiLineString, legalString(_)) + length shouldBe multiLineString.length + } + "be able to parse string with escaped slash" in { + val str = "\"f\\s\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse fully qualified names" in { + val str = "\"fully\\qualified\\name\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse PleskX\\Api\\strtolower" in { + val str = "\"PleskX\\Api\\strtolower\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse <" in { + val str = "\"<\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + "be able to parse /^[a-z]/" in { + val str = "\"/^[a-z]/\"" + val Parsed.Success(_, length) = parse(str, legalString(_)) + length shouldBe str.length + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ByteCodeBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ByteCodeBlockTest.scala new file mode 100644 index 0000000..d0424f0 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ByteCodeBlockTest.scala @@ -0,0 +1,238 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.ByteCodeBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ByteCodeBlockTest extends AnyWordSpec with Matchers { + + /*"parser getLineNumber" should { + "be able to parse L42" in { + val Parsed.Success(result, _) = parse(input = "L42", getLineNumber(_)) + assert(result == 42) + } + }*/ + + /*"parser getFileLineNumber" should { + "be able to parse (43)" in { + val Parsed.Success(result, _) = + parse(input = "(43)", getFileLineNUmber(_)) + assert(result == 43) + } + }*/ + + "parser getDefiningInstructionLine" should { + "be able to parse 0042 DO_ICALL" in { + val Parsed.Success(result, count) = + parse(input = "0042 DO_ICALL", getDefiningInstructionLine(_)) + assert(count == 13) + assert(result.fileLine.isEmpty) + assert(result.opNumber.contains(42)) + result.instruction match { + case instruction: Operation => + instruction.op match { + case NoValueOperation("DO_ICALL") => //passing the test + case _ => fail(message = "the operation is not a NoValueOperation") + } + case _ => fail() + } + } + "be able to parse 0000 V1 = NEW 0 string(\"Basic\")" in { + val line = s"""0000 V1 = NEW 0 string("${encode("Basic")}")""" + val Parsed.Success(result, count) = + parse(line, getDefiningInstructionLine(_)) + count shouldBe line.length + result.opNumber shouldBe Some(0) + // result.fileLine shouldBe Some(20) + } + } + + "parser getMethodBlockByteCode" should { + "be able to parse single instruction code block" in { + val singleLineByteCodeBlock = + """$_main: + | ; (lines=42, args=3, vars=3, tmps=2) + | ; (before optimizer) + | ; main:23-42 + | ; return [] RANGE[0..0] + |0000 CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getByteCodeBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 3) + assert(result.vars == 3) + assert(result.tmps == 2) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "optimizer") + assert(result.fileName == "main") + assert(result.lineStart == 23) + assert(result.lineEnd == 42) + assert(result.instructions.length == 1) + result.instructions.head match { + case instr: InstructionLine => + assert(instr.opNumber.contains(0)) + // assert(instr.fileLine == Some(23)) + instr.instruction match { + case instr: Operation => + instr.op match { + case DualValueOperation("CONCAT", _, _) => + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + } + "be able to parse multi instructions code block" in { + val singleLineByteCodeBlock = + """$_main: + | ; (lines=42, args=3, vars=3, tmps=2) + | ; (before optimizer) + | ; main:23-42 + | ; return [] RANGE[0..0] + |0000 CONCAT T1 T2 + |0001 T3 = CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getByteCodeBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 3) + assert(result.vars == 3) + assert(result.tmps == 2) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "optimizer") + assert(result.fileName == "main") + assert(result.lineStart == 23) + assert(result.lineEnd == 42) + assert(result.instructions.length == 2) + result.instructions.head match { + case instr: InstructionLine => + assert(instr.opNumber.contains(0)) + //assert(instr.fileLine.contains(23)) + instr.instruction match { + case instr: Operation => + instr.op match { + case DualValueOperation("CONCAT", _, _) => + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + result.instructions(1) match { + case instr: InstructionLine => + assert(instr.opNumber.contains(1)) + //assert(instr.fileLine.contains(24)) + instr.instruction match { + case _ : Assignment => + case _ => fail() + } + case _ => fail() + } + } + // ${encode("Basic")} + "be able to parse the beginning of a proper dump" in { + val fullDump = + s"""$$_main: + | ; (lines=7, args=0, vars=1, tmps=3) + | ; (before optimizer) + | ; trivial-main.php:1-4 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 INIT_FCALL 1 96 string("${encode("phpinfo")}") + |0002 T1 = CONCAT string("${encode("conca")}") CV0($$var) + |0003 T2 = CONCAT T1 string("${encode("tenation")}") + |0004 SEND_VAL T2 1 + |0005 DO_FCALL + |0006 RETURN int(1) + | + |$$_main: + | ; (lines=7, args=0, vars=1, tmps=3) + | ; (before block pass) + | ; trivial-main.php:1-4 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-6] + |0000 EXT_STMT + |0001 INIT_FCALL 1 96 string("${encode("phpinfo")}") + |0002 T1 = CONCAT string("${encode("conca")}") CV0($$var) + |0003 T2 = CONCAT T1 string("${encode("tenation")}") + |0004 SEND_VAL T2 1 + |0005 DO_FCALL + |0006 RETURN int(1) + |""".stripMargin + val Parsed.Success(_, count) = parse(fullDump, getByteCodeBlock(_)) + assert(count > 10) //not the best test but I was to lazy to count + } + } + + "be able to parse a dump containing a live range" in { + val dump = + s"""$$_main: + | ; (lines=10, args=0, vars=1, tmps=4) + | ; (before optimizer) + | ; /home/malte/coding/uni/master/testproject/tests/new.php:1-4 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 V1 = NEW 1 string("${encode("Basic")}") + |0002 SEND_VAL_EX string("${encode("value")}") 1 + |0003 DO_FCALL + |0004 ASSIGN CV0($$ar) V1 + |0005 EXT_STMT + |0006 INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") + |0007 SEND_VAL_EX string("${encode("other")}") 1 + |0008 DO_FCALL + |0009 RETURN int(1) + |LIVE RANGES: + | 1: 0002 - 0004 (new) + |""".stripMargin + val Parsed.Success(_, count) = parse(dump, getByteCodeBlock(_)) + count shouldBe dump.length + } + "be able to parse a dump containing live ranges and exception table" in { + // file from https://github.com/plesk/api-php-lib/blob/47f5f2e7b03bd088f35c84c6c10187bcc4698bcc/tests/TestCase.php + val dump = + s"""PleskXTest\\TestCase::tearDownAfterClass: + | ; (lines=21, args=0, vars=2, tmps=6) + | ; (before optimizer) + | ; pleskTC.php:32-40 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 T2 = FETCH_STATIC_PROP_R string("${encode("webspaces")}") (self) (exception) + |0002 V3 = FE_RESET_R T2 0018 + |0003 FE_FETCH_R V3 CV0($$webspace) 0018 + |0004 NOP + |0005 EXT_STMT + |0006 T4 = FETCH_STATIC_PROP_R string("${encode("_client")}") (static) (exception) + |0007 INIT_METHOD_CALL 0 T4 string("${encode("webspace")}") + |0008 V5 = DO_FCALL + |0009 INIT_METHOD_CALL 2 V5 string("${encode("delete")}") + |0010 SEND_VAL_EX string("${encode("id")}") 1 + |0011 CHECK_FUNC_ARG 2 + |0012 V6 = FETCH_OBJ_FUNC_ARG (ref) CV0($$webspace) string("${encode("id")}") + |0013 SEND_FUNC_ARG V6 2 + |0014 DO_FCALL + |0015 JMP 0017 + |0016 CV1($$e) = CATCH string("${encode("Exception")}") + |0017 JMP 0003 + |0018 FE_FREE V3 + |0019 EXT_STMT + |0020 RETURN null + |LIVE RANGES: + | 3: 0003 - 0018 (loop) + |EXCEPTION TABLE: + | 0005, 0016, -, - + |""".stripMargin + val Parsed.Success(_, count) = parse(dump, getByteCodeBlock(_)) + count shouldBe dump.length + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ControlFlowBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ControlFlowBlockTest.scala new file mode 100644 index 0000000..f214135 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ControlFlowBlockTest.scala @@ -0,0 +1,386 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.ControlFlowBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ControlFlowBlockTest extends AnyWordSpec with Matchers { + + "parser getBBInstructionLine" should { + "be able to parse DO_ICALL" in { + val Parsed.Success(result, count) = + parse(" DO_ICALL", getBBInstructionLine(_)) + assert(count == 16) + assert(result.fileLine.isEmpty) + assert(result.opNumber.isEmpty) + result.instruction match { + case instruction: Operation => + instruction.op match { + case NoValueOperation("DO_ICALL") => + case _ => fail() + } + case _ => fail() + } + } + "be able to parse JMPZ T2 BB2" in { + val instr = " JMPZ T2 BB2" + val Parsed.Success(result, length) = parse(instr, getBBInstructionLine(_)) + length shouldBe instr.length + val InstructionLine(_, _, instruction) = result + instruction match { + case Operation(op) => + op match { + case DualValueOperation(command, condition, target) => + command shouldBe "JMPZ" + condition match { + case Variable(name, tmp, _) => + name shouldBe "T2" + tmp shouldBe true + case _ => fail() + } + target match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail() + } + case _ => fail() + } + case _ => fail() + } + } + } + + "parser getBBDefinitionLine" should { + "be able to parse BB32: follow exit finally_end lines=[272-272]" in { + val line = + """BB32: + | ; follow exit finally_end lines=[272-272]""".stripMargin + val Parsed.Success(result, length) = parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result shouldBe BBDefinitionLine(32,Seq("follow","exit","finally_end"),272,272) + } + "be able to parse BB0 start exit lines=[0-5]" in { + val Parsed.Success(result, _) = + parse("BB0:\n ; start exit lines=[0-5]", getBBDefinitionLine(_)) + assert(result.number == 0) + assert(result.attributes.length == 2) + assert(result.attributes.head == "start") + assert(result.attributes(1) == "exit") + assert(result.firstInstruction == 0) + assert(result.lastInstruction == 5) + } + "be able to parse BB4 catch lines=[17-17]" in { + val line = """BB4: + | ; catch lines=[17-17]""".stripMargin + val Parsed.Success(result, length) = + parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result.attributes shouldBe List("catch") + result.firstInstruction shouldBe 17 + result.lastInstruction shouldBe 17 + } + "be able to parse BB2: unreachable unreachable_free lines=[12-13]" in { + val line = + """BB2: + | ; unreachable unreachable_free lines=[12-13]""".stripMargin + val Parsed.Success(result, length) = parse(line, getBBDefinitionLine(_)) + length shouldBe line.length + result.attributes shouldBe List("unreachable", "unreachable_free") + result.firstInstruction shouldBe 12 + result.lastInstruction shouldBe 13 + } + } + + "parser getBBToLine" should { + "be able to parse ; to=(BB0)" in { + val Parsed.Success(result, _) = parse(" ; to=(BB0)", getBBToLine(_)) + assert(result.length == 1) + assert(result.head == 0) + } + "be able to parse ; to=(BB0, BB1)" in { + val Parsed.Success(result, _) = parse(" ; to=(BB0, BB1)", getBBToLine(_)) + assert(result.length == 2) + assert(result.head == 0) + assert(result(1) == 1) + } + } + + "parser getBasicBlock" should { + "be able to parse a BasicBlock with JMPZ at the end but not including the next basic block" in { + val bbstring = + """BB0: + ; start lines=[0-2] + ; to=(BB2, BB1) +0001 ASSIGN CV0($x) int(42) +0002 T2 = IS_EQUAL CV0($x) int(43) +0003 JMPZ T2 BB2 +""" + val nextBBStart = """BB1: + | ; follow lines[3-4]""".stripMargin + val fullTestString = bbstring + nextBBStart + val Parsed.Success(result, length) = + parse(fullTestString, getBasicBlock(_)) + length shouldBe bbstring.length + result.firstInstruction shouldBe 0 + result.lastInstruction shouldBe 2 + result.number shouldBe 0 + result.followedBy.get.length shouldBe 2 + result.followedBy.get.head shouldBe 2 + result.followedBy.get(1) shouldBe 1 + result.instructions.length shouldBe 3 + } + "be able to parse basic block with catch as its only keyword" in { + val bb = + s"""BB4: + | ; catch lines=[17-17] + | ; to=(BB5) + |0001 CV1($$e) = CATCH string("${encode("Exception")}") + |""".stripMargin + val Parsed.Success(result, length) = parse(bb, getBasicBlock(_)) + length shouldBe bb.length + result.attributes shouldBe List("catch") + result.followedBy shouldBe Some(List(5)) + result.firstInstruction shouldBe 17 + result.lastInstruction shouldBe 17 + } + } + + "parser getMethodBlockControlFlow" should { + "be able to parse single BB single instruction" in { + val singleLineByteCodeBlock = + """$_main: + | ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + |0000 CONCAT T1 T2 + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.parsingMetaInfo.length == 3) + assert(result.parsingMetaInfo.head == "before") + assert(result.parsingMetaInfo(1) == "block") + assert(result.parsingMetaInfo(2) == "pass") + assert(result.blocks.length == 1) + assert(result.blocks.head.number == 0) + assert(result.blocks.head.attributes.length == 2) + assert(result.blocks.head.firstInstruction == 0) + assert(result.blocks.head.lastInstruction == 1) + assert(result.blocks.head.instructions.length == 1) + } + "be able to parse single BB multiple instructions" in { + val singleLineByteCodeBlock = + """$_main: + | ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + |0000 CONCAT T1 T2 + |0001 DO_ICALL + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.blocks.length == 1) + assert(result.blocks.head.instructions.length == 2) + } + "be able to parse #247" in { + val dump = """$_main: + | ; (lines=9, args=0, vars=0, tmps=3) + | ; (before block pass) + | ; ../testproject/tmp.php:1-3 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-3] + | ; to=(BB2, BB1) + |0000 NOP + |0001 NOP + |0002 NOP + |0003 T1 = JMPZ_EX bool(false) BB2 + | + |BB1: + | ; follow lines=[4-7] + | ; to=(BB2) + |0004 INIT_FCALL 1 96 string("aW5pX2dldA==") + |0005 SEND_VAL string("bWJzdHJpbmcuZnVuY19vdmVybG9hZA==") 1 + |0006 V2 = DO_ICALL + |0007 T1 = BOOL V2 + | + |BB2: + | ; follow target exit lines=[8-8] + |0008 RETURN int(1) + |""".stripMargin + val Parsed.Success(result, count) = parse(dump, getControlFlowBlock(_)) + count shouldBe dump.length + result.blocks.length shouldBe 3 + result.blocks.head.instructions.length shouldBe 4 + } + "be able to parse multiple BB" in { + val singleLineByteCodeBlock = + """$_main: + | ; (lines=42, args=3, vars=3, tmps=2) + | ; (before block pass) + | ; main:23-42 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + | ; to=(BB1) + |0000 CONCAT T1 T2 + |0001 DO_ICALL + | + |BB1: + | ; exit lines=[0-2] + |0002 CONCAT T1 T2 + |0002 DO_ICALL + |""".stripMargin + val Parsed.Success(result, count) = + parse(singleLineByteCodeBlock, getControlFlowBlock(_)) + assert(count == singleLineByteCodeBlock.length) + assert(result.blocks.length == 2) + assert(result.blocks.head.followedBy.get.length == 1) + assert(result.blocks.head.followedBy.get.head == 1) + assert(result.blocks(1).followedBy.isEmpty) + } + "be able to parse proper (partial) dump" in { + val fullDump = + s"""$$_main: + | ; (lines=7, args=0, vars=1, tmps=3) + | ; (before block pass) + | ; trivial-main.php:1-4 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-6] + |0000 EXT_STMT + |0001 INIT_FCALL 1 96 string("${encode("phpinfo")}") + |0002 T1 = CONCAT string("${encode("conca")}") CV0($$var) + |0003 T2 = CONCAT T1 string("${encode("tentation")}") + |0004 SEND_VAL T2 1 + |0005 DO_FCALL + |0006 RETURN int(1) + |""".stripMargin + val Parsed.Success(result, count) = + parse(fullDump, getControlFlowBlock(_)) + assert(count == fullDump.length) + assert(result.blocks.length == 1) + } + "be able to parse proper long (partial) dump" in { + val fullDump = + s"""PleskXTest\\TestCase::tearDownAfterClass: + | ; (lines=21, args=0, vars=2, tmps=6) + | ; (before block pass) + | ; /home/malte/coding/uni/master/testproject/tests/plestc.php:32-40 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-2] + | ; to=(BB6, BB1) + |0000 EXT_STMT + |0001 T2 = FETCH_STATIC_PROP_R string("${encode("webspaces")}") (self) (exception) + |0002 V3 = FE_RESET_R T2 BB6 + | + |BB1: + | ; follow target lines=[3-3] + | ; to=(BB6, BB2) + |0003 FE_FETCH_R V3 CV0($$webspace) BB6 + | + |BB2: + | ; follow lines=[4-4] + | ; to=(BB3) + |0004 NOP + | + |BB3: + | ; follow try lines=[5-15] + | ; to=(BB1) + |0005 EXT_STMT + |0006 T4 = FETCH_STATIC_PROP_R string("${encode("_client")}") (static) (exception) + |0007 INIT_METHOD_CALL 0 T4 string("${encode("webspace")}") + |0008 V5 = DO_FCALL + |0009 INIT_METHOD_CALL 2 V5 string("${encode("delete")}") + |0010 SEND_VAL_EX string("${encode("id")}") 1 + |0011 CHECK_FUNC_ARG 2 + |0012 V6 = FETCH_OBJ_FUNC_ARG (ref) CV0($$webspace) string("${encode("id")}") + |0013 SEND_FUNC_ARG V6 2 + |0014 DO_FCALL + |0015 JMP BB1 + | + |BB4: + | ; catch lines=[16-16] + | ; to=(BB5) + |0016 CV1($$e) = CATCH string("${encode("Exception")}") + | + |BB5: + | ; follow lines=[17-17] + | ; to=(BB1) + |0017 JMP BB1 + | + |BB6: + | ; target exit lines=[18-20] + |0018 FE_FREE V3 + |0019 EXT_STMT + |0020 RETURN null + |EXCEPTION TABLE: + | BB3, BB4, -, - + |""".stripMargin + val Parsed.Success(result, length) = + parse(fullDump, getControlFlowBlock(_)) + length shouldBe fullDump.length + result.blocks.length shouldBe 7 + result.exceptionTable match { + case Some(_) => + case None => fail(message = "there should be an exception table block") + } + } + "be able to parse partial (longer) dump II" in { + val dump = + s"""PleskXTest\\Utility\\KeyLimitChecker::checkByType: + | ; (lines=66, args=3, vars=4, tmps=17) + | ; (before block pass) + | ; KeyLimitChecker.php:21-45 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-6] + | ; to=(BB5, BB8, BB11, BB12, BB1) + |0000 CV0($$keyInfo) = RECV 1 + |0001 CV1($$type) = RECV 2 + |0002 CV2($$minimalRequirement) = RECV 3 + |0003 EXT_STMT + |0004 ASSIGN CV3($$field) null + |0005 EXT_STMT + |0006 SWITCH_STRING CV1($$type) "${encode("limit_clients")}": BB5, "${encode("limit_resellers")}": BB8, "${encode("limit_domains")}": BB11, default: BB12 + | + |BB1: + | ; follow target lines=[7-8] + | ; to=(BB5, BB2) + |0007 T5 = IS_EQUAL CV1($$type) string("${encode("limit_clients")}") + |0008 JMPNZ T5 BB5 + | + |BB2: + | ; follow lines=[9-10] + | ; to=(BB8, BB3) + |0009 T5 = IS_EQUAL CV1($$type) string("${encode("limit_resellers")}") + |0010 JMPNZ T5 BB8 + | + |BB3: + | ; follow lines=[11-12] + | ; to=(BB12, BB11) + |0011 T5 = IS_EQUAL CV1($$type) string("${encode("limit_domains")}") + |0012 JMPZNZ T5 BB12 BB11 + | + |BB4: + | ; unreachable lines=[13-13] + | ; to=(BB12) + |0013 JMP BB12 + |""".stripMargin + val Parsed.Success(result, length) = parse(dump, getControlFlowBlock(_)) + length shouldBe dump.length + result.blocks.length shouldBe 5 + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ExceptionTableBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ExceptionTableBlockTest.scala new file mode 100644 index 0000000..33cc4a6 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/ExceptionTableBlockTest.scala @@ -0,0 +1,80 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.php8.ExceptionTableBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ExceptionTableBlockTest extends AnyWordSpec with Matchers { + + "parser getExceptionTableLine" should { + "be able to parse ' 0001, 0002, -, -'" in { + val line = " 0001, 0002, -, -\n" + val Parsed.Success(result, length) = parse(line, getExceptionTableLine(_)) + assert(length == line.length) + assert(result.content.head == "0001") + assert(result.content(1) == "0002") + assert(result.content(2) == "-") + assert(result.content(3) == "-") + } + "be able to parse ' BB1, BB2, -, -'" in { + val line = " BB1, BB2, -, -\n" + val Parsed.Success(result, length) = parse(line, getExceptionTableLine(_)) + assert(length == line.length) + assert(result.content.head == "1") + assert(result.content(1) == "2") + assert(result.content(2) == "-") + assert(result.content(3) == "-") + } + } + + "parser getExceptionTableBlock" should { + "be able to parse a 00XX exception table block" in { + val block = + """EXCEPTION TABLE: + 0006, 0004, -, - + 0011, 0033, -, - +""" + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_)) + assert(length == block.length) + assert(result.tableEntry.length == 2) + assert(result.tableEntry.head.content.head == "0006") + assert(result.tableEntry.head.content(1) == "0004") + assert(result.tableEntry(1).content.head == "0011") + assert(result.tableEntry(1).content(1) == "0033") + } + + "be able to parse this example exception table block" in { + val block = + """EXCEPTION TABLE: + | 0006, 0017, -, - +""".stripMargin + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_)) + length shouldBe block.length + result.tableEntry.length shouldBe 1 + } + "be able to parse this nested exception table block" in { + val block = + """EXCEPTION TABLE: + | 0000, -, 0008, 0009 0001, 0003, -, - +""".stripMargin + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_), verboseFailures = true) + length shouldBe block.length + result.tableEntry.length shouldBe 1 + } + "be able to parse this triple nested exception table block" in { + val block = + """EXCEPTION TABLE: + | 0000, -, 0012, 0013 0001, 0007, -, - + | 0001, 0003, -, - +""".stripMargin + val Parsed.Success(result, length) = + parse(block, getExceptionTableBlock(_), verboseFailures = true) + length shouldBe block.length + result.tableEntry.length shouldBe 2 + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/FileParserTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/FileParserTest.scala new file mode 100644 index 0000000..61ce00e --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/FileParserTest.scala @@ -0,0 +1,612 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.constructs.MethodDefinitionPair +import io.joern.config.CPGConfig +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.File +import scala.sys.process._ + +class FileParserTest extends AnyWordSpec with Matchers { + + val config: CPGConfig = CPGConfig.initializeConfig() + + "parser parseLastLine" should { + "be able to consume 'No syntax errors detected in garbage'" in { + val Parsed.Success(_, count) = + parse("No syntax errors detected in garbage", + FileParser8.parseLastLine(_)) + assert(count == "No syntax errors detected in garbage".length) + } + } + + "actualParse" should { + "detect a PHP Parser Error" in { + val fullDump : String = "PHP Parse error: syntax error, unexpected new T_NEW" + an [FileParser8.PHPParseError] should be thrownBy FileParser8.actualParse(fullDump, "file") + } + "detect a PHP Fatal Error" in { + val fullDump : String = "PHP Fatal error: Cannot declare class MyClass," + an [FileParser8.PHPFatalError] should be thrownBy FileParser8.actualParse(fullDump, "file") + } + "detect an internal error" in { + val fullDump : String = "$_main: ; (lines=6, args=0, vars=1, tmps=3)" + an [FileParser8.BytecodeParseError] should be thrownBy FileParser8.actualParse(fullDump, "file") + } + } + + "parser parseByteCodeDump" should { + "be able to parse single function single BB" in { + val fullDump = + s""" + |$$_main: + | ; (lines=7, args=0, vars=1, tmps=3) + | ; (before optimizer) + | ; trivial-main.php:1-4 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 INIT_FCALL 1 96 string("${encode("phpinfo")}") + |0002 T1 = CONCAT string("${encode("conca")}") CV0($$var) + |0003 T2 = CONCAT T1 string("${encode("tenation")}") + |0004 SEND_VAL T2 1 + |0005 DO_FCALL + |0006 RETURN int(1) + | + |$$_main: + | ; (lines=7, args=0, vars=1, tmps=3) + | ; (before block pass) + | ; trivial-main.php:1-4 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-6] + |0000 EXT_STMT + |0001 INIT_FCALL 1 96 string("${encode("phpinfo")}") + |0002 T1 = CONCAT string("${encode("conca")}") CV0($$var) + |0003 T2 = CONCAT T1 string("${encode("tenation")}") + |0004 SEND_VAL T2 1 + |0005 DO_FCALL + |0006 RETURN int(1) + |""".stripMargin + val Parsed.Success(results, count) = + parse(fullDump, FileParser8.parseByteCodeDump(_)) + assert(results.length == 1) + val MethodDefinitionPair(byteCode, controlFlow) = results.head + val result = (byteCode, controlFlow) + assert(count == fullDump.length) + assert(result._1.instructions.length == 7) + assert(result._2.blocks.length == 1) + assert(result._2.blocks.head.instructions.length == 7) + } + "be able to parse BB block with missing newline before CFG" in { + /** + * generated from https://github.com/symfony/finder/blob/5.4/SplFileInfo.php + */ + val fullDump = """ + |$_main: + | ; (lines=2, args=0, vars=0, tmps=0) + | ; (before optimizer) + | ; finder/SplFileInfo.php:1-89 + | ; return [] RANGE[0..0] + |0000 DECLARE_CLASS_DELAYED string("c3ltZm9ueVxjb21wb25lbnRcZmluZGVyXHNwbGZpbGVpbmZv") string("c3BsZmlsZWluZm8=") + |0001 RETURN int(1) + | + |$_main: + | ; (lines=2, args=0, vars=0, tmps=0) + | ; (before block pass) + | ; finder/SplFileInfo.php:1-89 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + |0000 DECLARE_CLASS_DELAYED string("c3ltZm9ueVxjb21wb25lbnRcZmluZGVyXHNwbGZpbGVpbmZv") string("c3BsZmlsZWluZm8=") + |0001 RETURN int(1) + | + |Symfony\Component\Finder\SplFileInfo::__construct: + | ; (lines=11, args=3, vars=3, tmps=3) + | ; (before optimizer) + | ; finder/SplFileInfo.php:29-34 + | ; return [] RANGE[0..0] + |0000 CV0($file) = RECV 1 + |0001 CV1($relativePath) = RECV 2 + |0002 CV2($relativePathname) = RECV 3 + |0003 INIT_STATIC_METHOD_CALL 1 (parent) (exception) CONSTRUCTOR + |0004 SEND_VAR_EX CV0($file) 1 + |0005 DO_FCALL + |0006 ASSIGN_OBJ THIS string("cmVsYXRpdmVQYXRo") + |0007 OP_DATA CV1($relativePath) + |0008 ASSIGN_OBJ THIS string("cmVsYXRpdmVQYXRobmFtZQ==") + |0009 OP_DATA CV2($relativePathname) + |0010 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::__construct: + | ; (lines=11, args=3, vars=3, tmps=3) + | ; (before block pass) + | ; finder/SplFileInfo.php:29-34 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-10] + |0000 CV0($file) = RECV 1 + |0001 CV1($relativePath) = RECV 2 + |0002 CV2($relativePathname) = RECV 3 + |0003 INIT_STATIC_METHOD_CALL 1 (parent) (exception) CONSTRUCTOR + |0004 SEND_VAR_EX CV0($file) 1 + |0005 DO_FCALL + |0006 ASSIGN_OBJ THIS string("cmVsYXRpdmVQYXRo") + |0007 OP_DATA CV1($relativePath) + |0008 ASSIGN_OBJ THIS string("cmVsYXRpdmVQYXRobmFtZQ==") + |0009 OP_DATA CV2($relativePathname) + |0010 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getRelativePath: + | ; (lines=3, args=0, vars=0, tmps=1) + | ; (before optimizer) + | ; finder/SplFileInfo.php:43-46 + | ; return [] RANGE[0..0] + |0000 T0 = FETCH_OBJ_R THIS string("cmVsYXRpdmVQYXRo") + |0001 RETURN T0 + |0002 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getRelativePath: + | ; (lines=3, args=0, vars=0, tmps=1) + | ; (before block pass) + | ; finder/SplFileInfo.php:43-46 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + |0000 T0 = FETCH_OBJ_R THIS string("cmVsYXRpdmVQYXRo") + |0001 RETURN T0 + | + |BB1: + | ; unreachable lines=[2-2] + |0002 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getRelativePathname: + | ; (lines=3, args=0, vars=0, tmps=1) + | ; (before optimizer) + | ; finder/SplFileInfo.php:55-58 + | ; return [] RANGE[0..0] + |0000 T0 = FETCH_OBJ_R THIS string("cmVsYXRpdmVQYXRobmFtZQ==") + |0001 RETURN T0 + |0002 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getRelativePathname: + | ; (lines=3, args=0, vars=0, tmps=1) + | ; (before block pass) + | ; finder/SplFileInfo.php:55-58 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-1] + |0000 T0 = FETCH_OBJ_R THIS string("cmVsYXRpdmVQYXRobmFtZQ==") + |0001 RETURN T0 + | + |BB1: + | ; unreachable lines=[2-2] + |0002 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getFilenameWithoutExtension: + | ; (lines=11, args=0, vars=1, tmps=3) + | ; (before optimizer) + | ; finder/SplFileInfo.php:60-65 + | ; return [] RANGE[0..0] + |0000 INIT_METHOD_CALL 0 THIS string("Z2V0RmlsZW5hbWU=") + |0001 V1 = DO_FCALL + |0002 ASSIGN CV0($filename) V1 + |0003 INIT_NS_FCALL_BY_NAME 2 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHBhdGhpbmZv") + |0004 SEND_VAR_EX CV0($filename) 1 + |0005 SEND_VAL_EX int(8) 2 + |0006 V3 = DO_FCALL_BY_NAME + |0007 VERIFY_RETURN_TYPE V3 + |0008 RETURN V3 + |0009 VERIFY_RETURN_TYPE + |0010 RETURN null + |LIVE RANGES: + | 3: 0007 - 0008 (tmp/var) + | + |Symfony\Component\Finder\SplFileInfo::getFilenameWithoutExtension: + | ; (lines=11, args=0, vars=1, tmps=3) + | ; (before block pass) + | ; finder/SplFileInfo.php:60-65 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-8] + |0000 INIT_METHOD_CALL 0 THIS string("Z2V0RmlsZW5hbWU=") + |0001 V1 = DO_FCALL + |0002 ASSIGN CV0($filename) V1 + |0003 INIT_NS_FCALL_BY_NAME 2 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHBhdGhpbmZv") + |0004 SEND_VAR_EX CV0($filename) 1 + |0005 SEND_VAL_EX int(8) 2 + |0006 V3 = DO_FCALL_BY_NAME + |0007 VERIFY_RETURN_TYPE V3 + |0008 RETURN V3 + | + |BB1: + | ; unreachable lines=[9-10] + |0009 VERIFY_RETURN_TYPE + |0010 RETURN null + | + |Symfony\Component\Finder\SplFileInfo::getContents: + | ; (lines=24, args=0, vars=2, tmps=10) + | ; (before optimizer) + | ; finder/SplFileInfo.php:74-87 + | ; return [] RANGE[0..0] + |0000 INIT_NS_FCALL_BY_NAME 1 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHNldF9lcnJvcl9oYW5kbGVy") + |0001 T2 = DECLARE_LAMBDA_FUNCTION 0 + |0002 BIND_LEXICAL (ref) T2 CV0($error) + |0003 SEND_VAL_EX T2 1 + |0004 DO_FCALL_BY_NAME + |0005 INIT_NS_FCALL_BY_NAME 1 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXGZpbGVfZ2V0X2NvbnRlbnRz") + |0006 INIT_METHOD_CALL 0 THIS string("Z2V0UGF0aG5hbWU=") + |0007 V5 = DO_FCALL + |0008 SEND_VAR_NO_REF_EX V5 1 + |0009 V6 = DO_FCALL_BY_NAME + |0010 ASSIGN CV1($content) V6 + |0011 T4 = FAST_CALL 0013 + |0012 JMP 0016 + |0013 INIT_NS_FCALL_BY_NAME 0 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHJlc3RvcmVfZXJyb3JfaGFuZGxlcg==") + |0014 DO_FCALL_BY_NAME + |0015 FAST_RET T4 + |0016 T9 = TYPE_CHECK (false) CV1($content) + |0017 JMPZ T9 0022 + |0018 V10 = NEW 1 string("UnVudGltZUV4Y2VwdGlvbg==") + |0019 SEND_VAR_EX CV0($error) 1 + |0020 DO_FCALL + |0021 THROW V10 + |0022 RETURN CV1($content) + |0023 RETURN null + |LIVE RANGES: + | 2: 0002 - 0003 (tmp/var) + | 10: 0019 - 0021 (new) + |EXCEPTION TABLE: + | 0005, -, 0013, 0015 + |Symfony\Component\Finder\SplFileInfo::getContents: + | ; (lines=24, args=0, vars=2, tmps=10) + | ; (before block pass) + | ; finder/SplFileInfo.php:74-87 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-4] + | ; to=(BB1) + |0000 INIT_NS_FCALL_BY_NAME 1 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHNldF9lcnJvcl9oYW5kbGVy") + |0001 T2 = DECLARE_LAMBDA_FUNCTION 0 + |0002 BIND_LEXICAL (ref) T2 CV0($error) + |0003 SEND_VAL_EX T2 1 + |0004 DO_FCALL_BY_NAME + | + |BB1: + | ; follow try lines=[5-11] + | ; to=(BB3, BB2) + |0005 INIT_NS_FCALL_BY_NAME 1 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXGZpbGVfZ2V0X2NvbnRlbnRz") + |0006 INIT_METHOD_CALL 0 THIS string("Z2V0UGF0aG5hbWU=") + |0007 V5 = DO_FCALL + |0008 SEND_VAR_NO_REF_EX V5 1 + |0009 V6 = DO_FCALL_BY_NAME + |0010 ASSIGN CV1($content) V6 + |0011 T4 = FAST_CALL BB3 + | + |BB2: + | ; follow lines=[12-12] + | ; to=(BB5) + |0012 JMP BB5 + | + |BB3: + | ; target finally lines=[13-14] + | ; to=(BB4) + |0013 INIT_NS_FCALL_BY_NAME 0 string("U3ltZm9ueVxDb21wb25lbnRcRmluZGVyXHJlc3RvcmVfZXJyb3JfaGFuZGxlcg==") + |0014 DO_FCALL_BY_NAME + | + |BB4: + | ; follow exit finally_end lines=[15-15] + |0015 FAST_RET T4 + | + |BB5: + | ; target lines=[16-17] + | ; to=(BB7, BB6) + |0016 T9 = TYPE_CHECK (false) CV1($content) + |0017 JMPZ T9 BB7 + | + |BB6: + | ; follow exit lines=[18-21] + |0018 V10 = NEW 1 string("UnVudGltZUV4Y2VwdGlvbg==") + |0019 SEND_VAR_EX CV0($error) 1 + |0020 DO_FCALL + |0021 THROW V10 + | + |BB7: + | ; target exit lines=[22-22] + |0022 RETURN CV1($content) + | + |BB8: + | ; unreachable lines=[23-23] + |0023 RETURN null + |EXCEPTION TABLE: + | BB1, -, BB3, BB4 + | + |Symfony\Component\Finder\{closure}: + | ; (lines=5, args=2, vars=3, tmps=1) + | ; (before optimizer) + | ; finder/SplFileInfo.php:76-76 + | ; return [] RANGE[0..0] + |0000 CV0($type) = RECV 1 + |0001 CV1($msg) = RECV 2 + |0002 BIND_STATIC (ref) CV2($error) + |0003 ASSIGN CV2($error) CV1($msg) + |0004 RETURN null + | + |Symfony\Component\Finder\{closure}: + | ; (lines=5, args=2, vars=3, tmps=1) + | ; (before block pass) + | ; finder/SplFileInfo.php:76-76 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-4] + |0000 CV0($type) = RECV 1 + |0001 CV1($msg) = RECV 2 + |0002 BIND_STATIC (ref) CV2($error) + |0003 ASSIGN CV2($error) CV1($msg) + |0004 RETURN null + |""".stripMargin + val Parsed.Success(_, length) = parse(fullDump, FileParser8.parseByteCodeDump(_)) + length shouldBe fullDump.length + } + + "support php8 stuff" should { + + "nullsafe operator" in { + /* + generated from: + f(); + */ + val fullDump = s""" + |$$_main: + | ; (lines=9, args=0, vars=1, tmps=1) + | ; (before optimizer) + | ; /home/malte/coding/uni/master/testproject/tests/null.php:1-7 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 V1 = JMP_NULL CV0($$a) 0004 + |0002 INIT_METHOD_CALL 0 CV0($$a) string("${encode("f")}") + |0003 DO_FCALL + |0004 EXT_STMT + |0005 JMPZ CV0($$a) 0008 + |0006 EXT_STMT + |0007 ECHO int(1) + |0008 RETURN int(1) + | + |$$_main: + | ; (lines=9, args=0, vars=1, tmps=1) + | ; (before block pass) + | ; /home/malte/coding/uni/master/testproject/tests/null.php:1-7 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-1] + | ; to=(BB2, BB1) + |0000 EXT_STMT + |0001 V1 = JMP_NULL CV0($$a) BB2 + | + |BB1: + | ; follow lines=[2-3] + | ; to=(BB2) + |0002 INIT_METHOD_CALL 0 CV0($$a) string("${encode("f")}") + |0003 DO_FCALL + | + |BB2: + | ; follow target lines=[4-5] + | ; to=(BB4, BB3) + |0004 EXT_STMT + |0005 JMPZ CV0($$a) BB4 + | + |BB3: + | ; follow lines=[6-7] + | ; to=(BB4) + |0006 EXT_STMT + |0007 ECHO int(1) + | + |BB4: + | ; follow target exit lines=[8-8] + |0008 RETURN int(1) + |""".stripMargin + val Parsed.Success(result, count) = parse(fullDump, FileParser8.parseByteCodeDump(_), verboseFailures = true) + Console.out.flush() + assert(count == fullDump.length) // parsed everything + assert(result.length == 1) // resulted in one method + } + + /** + * generated from https://www.php.net/manual/en/control-structures.match.php: + */ + // 'This food is an apple', + // 'bar' => 'This food is a bar', + // 'cake' => 'This food is a cake', + // }; + // + // var_dump($return_value); + // ?> + "match expression" in { + val fullDump = + s""" + |$$_main: + | ; (lines=17, args=0, vars=2, tmps=5) + | ; (before optimizer) + | ; /home/malte/coding/uni/master/testproject/tests/match.php:1-11 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 ASSIGN CV0($$food) string("${encode("cake")}") + |0002 EXT_STMT + |0003 MATCH CV0($$food) "${encode("apple")}": 0005, "${encode("bar")}": 0007, "${encode("cake")}": 0009, default: 0004 + |0004 MATCH_ERROR CV0($$food) + |0005 T4 = QM_ASSIGN string("${encode("apple")}") + |0006 JMP 0011 + |0007 T4 = QM_ASSIGN string("${encode("bar")}") + |0008 JMP 0011 + |0009 T4 = QM_ASSIGN string("${encode("cake")}") + |0010 JMP 0011 + |0011 ASSIGN CV1($$return_value) T4 + |0012 EXT_STMT + |0013 INIT_FCALL 1 96 string("${encode("var_dump")}") + |0014 SEND_VAR CV1($$return_value) 1 + |0015 DO_FCALL + |0016 RETURN int(1) + |LIVE RANGES: + | 4: 0010 - 0011 (tmp/var) + | + |$$_main: + | ; (lines=17, args=0, vars=2, tmps=5) + | ; (before block pass) + | ; /home/malte/coding/uni/master/testproject/tests/match.php:1-11 + | ; return [] RANGE[0..0] + |BB0: + | ; start lines=[0-3] + | ; to=(BB2, BB3, BB4, BB1) + |0000 EXT_STMT + |0001 ASSIGN CV0($$food) string("${encode("cake")}") + |0002 EXT_STMT + |0003 MATCH CV0($$food) "${encode("apple")}": BB2, "${encode("bar")}": BB3, "${encode("cake")}": BB4, default: BB1 + | + |BB1: + | ; target exit lines=[4-4] + |0004 MATCH_ERROR CV0($$food) + | + |BB2: + | ; target lines=[5-6] + | ; to=(BB5) + |0005 T4 = QM_ASSIGN string("${encode("apple")}") + |0006 JMP BB5 + | + |BB3: + | ; target lines=[7-8] + | ; to=(BB5) + |0007 T4 = QM_ASSIGN string("${encode("bar")}") + |0008 JMP BB5 + | + |BB4: + | ; target lines=[9-10] + | ; to=(BB5) + |0009 T4 = QM_ASSIGN string("${encode("cake")}") + |0010 NOP + | + |BB5: + | ; follow target exit lines=[11-16] + |0011 ASSIGN CV1($$return_value) T4 + |0012 EXT_STMT + |0013 INIT_FCALL 1 96 string("${encode("var_dump")}") + |0014 SEND_VAR CV1($$return_value) 1 + |0015 DO_FCALL + |0016 RETURN int(1) + |""".stripMargin + val Parsed.Success(result, count) = parse(fullDump, FileParser8.parseByteCodeDump(_), verboseFailures = true) + Console.out.flush() + assert(count == fullDump.length) // parsed everything + assert(result.length == 1) // resulted in one method + } + "undef arguments" in { + // from https://wiki.php.net/rfc/named_params + // gen from + // + fail(x.getMessage) + } + } + "in function name" in { + val cwd = "pwd".!!.stripMargin.trim + try { + val res = FileParser8.parseFromFile(new File( + cwd + "/layerByteCode/resources/unittesting/testprojects/unicode/hiragana.php"), config.php8.interpreter, config.php8.phpini) + res.map(_.byteCodeBlock.name).toSet shouldBe Set("dlr_main", "rさ") + } catch { + case x: Throwable => + fail(x.getMessage) + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/HeaderBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/HeaderBlockTest.scala new file mode 100644 index 0000000..38540ce --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/HeaderBlockTest.scala @@ -0,0 +1,278 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.php8.HeaderBlock._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class HeaderBlockTest extends AnyWordSpec with Matchers { + + "parsing '$_main:\n ; (lines=42, args=43, vars=44, tmps=45)'" should { + "be parsable only considering name results" in { + val Parsed.Success(_, success) = + parse("$_main", parseHeaderBlockMethodIdentifier(_)) + assert(success == 6) //check that all 6 letters are consumed + } + "be parsable only considering meta variable results in" in { + val Parsed.Success(result, _) = + parse("(lines=42, args=43, vars=44, tmps=45)", + parseHeaderBlockMethodMetaBlock(_)) + assert(result._1 == 42) + assert(result._2 == 43) + assert(result._3 == 44) + assert(result._4 == 45) + } + "be completely parsable" in { + val Parsed.Success(result, _) = + parse("$_main:\n ; (lines=42, args=43, vars=44, tmps=45)", + getHeaderBLockMethodDefinitionLine(_)) + assert(result.name == "dlr_main") + assert(result.lines == 42) + assert(result.args == 43) + assert(result.vars == 44) + assert(result.tmps == 45) + } + } + + "parser getHeaderBlockMethodIdentifier" should { + "be able to parse {closure}" in { + val name = "{closure}" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "{closure}" + } + "be able to parse $_main" in { + val name = "$_main" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "$_main" + } + "be able to parse testFunction" in { + val name = "testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse namespace\\testFunction" in { + val name = "namespace\\testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("namespace") + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse weird\\namespace\\testFunction" in { + val name = "weird\\namespace\\testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("weird\\namespace") + result._2 shouldBe None + result._3 shouldBe "testFunction" + } + "be able to parse className::testFunction" in { + val name = "className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse namespace\\className::testFunction" in { + val name = "namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse weird\\namespace\\className::testFunction" in { + val name = "weird\\namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("weird\\namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse long\\weird\\namespace\\className::testFunction" in { + val name = "long\\weird\\namespace\\className::testFunction" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe Some("long\\weird\\namespace") + result._2 shouldBe Some("className") + result._3 shouldBe "testFunction" + } + "be able to parse \\strlen" in { + val name = "\\strlen" + val Parsed.Success(result, length) = + parse(name, getHeaderBlockMethodIdentifier(_)) + length shouldBe name.length + result._1 shouldBe None + result._2 shouldBe None + result._3 shouldBe "strlen" + } + } + + "parser getMetaLineParsing " should { + "be able to parse '; (before optimizer)'" in { + val Parsed.Success(result, _) = + parse("; (before optimizer)", getHeaderBlockMetaLineParsing(_)) + assert(result.metaInfo.length == 2) + assert(result.metaInfo(0) == "before") + assert(result.metaInfo(1) == "optimizer") + } + } + + "parser getMetaLineFileInfo" should { + "be able to pare '; (main: |1-3)'" in { + val Parsed.Success(result, _) = + parse("; main:1-3", getHeaderBlockMetaLineFileInfo(_)) + assert(result.fileName == "main") + assert(result.lineStart == 1) + assert(result.lineEnd == 3) + } + } + + "parseHeaderBlockMetaLineFileInfo" should { + "be able to handle a path with a colon" in { + val line = "; /home/simon/tmp/bytecode-cpg/trivial:try-catch.php:1-23" + val Parsed.Success(result,length) = parse(line,getHeaderBlockMetaLineFileInfo(_)) + length shouldBe line.length + result.fileName shouldBe "/home/simon/tmp/bytecode-cpg/trivial:try-catch.php" + } + "be able to handle path without colon" in { + val line = " ; /tmp/php2cpg17511137408163283203/test.php:1-9" + val Parsed.Success(result,length) = parse(line,getHeaderBlockMetaLineFileInfo(_)) + length shouldBe line.length + result.fileName shouldBe "/tmp/php2cpg17511137408163283203/test.php" + result.lineStart shouldBe 1 + result.lineEnd shouldBe 9 + } + } + + "parser parseHeaderBlock" should { + "be able to parse a valid header block with colon in path" in { + val headerBlock = + """$_main: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial:try-catch.php:1-23 + ; return [] RANGE[0..0] +""".stripMargin + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._3.fileName shouldBe "/home/simon/tmp/bytecode-cpg/trivial:try-catch.php" + result._3.lineStart shouldBe 1 + result._3.lineEnd shouldBe 23 + } + "be able to parse a valid header block" in { + val headerBlock = + """$_main: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 + ; return [] RANGE[0..0] +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + assert(length == headerBlock.length) + assert(result._1.name == "dlr_main") + assert(result._1.lines == 23) + assert(result._1.args == 0) + assert(result._1.vars == 2) + assert(result._1.tmps == 8) + assert(result._2.metaInfo.length == 2) + assert(result._2.metaInfo.head == "before") + assert(result._2.metaInfo(1) == "optimizer") + assert( + result._3.fileName == "/home/simon/tmp/bytecode-cpg/trivial-try-catch.php") + assert(result._3.lineStart == 1) + assert(result._3.lineEnd == 23) + result._4.string shouldBe "return [] RANGE[0..0]" + } + "be able to parse a valid header block with class definition" in { + val headerBlock = + """Basic::__init__: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 + ; return [] RANGE[0..0] +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe Some("basic") + } + "be able to parse a valid header block with namespace and class" in { + val headerBlock = + """namespace\Basic::__init__: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 + ; return [] RANGE[0..0] +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe Some("basic") + result._1.namespace shouldBe Some("namespace") + } + "be able to parse a valid header block with namespace" in { + val headerBlock = + """namespace\__init__: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 + ; return [] RANGE[0..0] +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe None + result._1.namespace shouldBe Some("namespace") + } + "be able to parse a valid header block with deep namespace" in { + val headerBlock = + """start\namespace\__init__: + ; (lines=23, args=0, vars=2, tmps=8) + ; (before optimizer) + ; /home/simon/tmp/bytecode-cpg/trivial-try-catch.php:1-23 + ; return [] RANGE[0..0] +""" + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "__init__" + result._1.classname shouldBe None + result._1.namespace shouldBe Some("start\\namespace") + } + "be able to parse a valid head block with spaces in file name" in { + val headerBlock = + """$_main: + | ; (lines=3, args=0, vars=0, tmps=0) + | ; (before optimizer) + | ; /home/simon/tmp/cpgIssues/PoC/People Weird(@#.php:1-3 + | ; return [] RANGE[0..0] + |""".stripMargin + val Parsed.Success(result, length) = parse(headerBlock, getHeaderBlock(_)) + length shouldBe headerBlock.length + result._1.name shouldBe "dlr_main" + result._3.fileName shouldBe "/home/simon/tmp/cpgIssues/PoC/People Weird(@#.php" + result._3.lineStart shouldBe 1 + result._3.lineEnd shouldBe 3 + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/InstructionsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/InstructionsTest.scala new file mode 100644 index 0000000..73e21eb --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/InstructionsTest.scala @@ -0,0 +1,2001 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.constructs._ +import io.joern.bytecode.parser.php8.Instructions._ +import io.joern.bytecode.parser.php8.instructions.ControlConstructs.{parseNumberDestinationPattern, parseStringDestinationPattern} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class InstructionsTest extends AnyWordSpec with Matchers { + + "parser getOperation" should { + "be able to parse NEW 1 string(\"Basic\")" in { + val operation = s"""NEW 1 string("${encode("Basic")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "NEW" + lhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "Basic" + case _ => fail(message = "rhs is not of type StringLiteral") + } + + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 2 string(\"some\\qualified\\name\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 2 string("${encode("some\\qualified\\name")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 2 + function shouldBe "some\\qualified\\name" + case _ => fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 1 string(\"PleskX\\Api\\get_class\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 1 string("${encode("PleskX\\Api\\get_class")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 1 + function shouldBe "pleskx\\api\\get_class" + case _ => fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + } + "be able to parse INIT_METHOD_CALL 1 CV0($var) string(\"test\")" in { + val operation = s"""INIT_METHOD_CALL 1 CV0($$var) string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_METHOD_CALL(paramCount, objectVar, method) => + paramCount shouldBe 1 + val Variable(name, tmp, ref) = objectVar + name shouldBe "var" + tmp shouldBe false + ref shouldBe false + method shouldBe StringLiteral("test") + case _ => fail(message = "result is not of type INIT_METHOD_CALL") + } + } + "be able to parse INIT_METHOD_CALL 1 THIS string(\"test\")" in { + val operation = s"""INIT_METHOD_CALL 1 THIS string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_METHOD_CALL(paramCount, objectVar, method) => + paramCount shouldBe 1 + val Variable(name, tmp, ref) = objectVar + name shouldBe "THIS" + tmp shouldBe false + ref shouldBe true + method shouldBe StringLiteral("test") + case _ => fail(message = "result is not of type INIT_METHOD_CALL") + } + } + "be able to parse INIT_STATIC_METHOD_CALL 3 string(\"test\") CONSTRUCTOR" in { + val operation = s"""INIT_STATIC_METHOD_CALL 3 string("${encode("test")}") CONSTRUCTOR""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op shouldBe INIT_STATIC_METHOD_CALL(3, None, None, Some(StringLiteral("test")), ByteCodeConstructor()) + } + "be able to parse INIT_FCALL 2 42 string(\"phpinfo\")" in { + val operation = s"""INIT_FCALL 2 42 string("${encode("phpinfo")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_FCALL(parameter, unknown, name) => + parameter shouldBe 2 + unknown shouldBe 42 + name match { + case StringLiteral(value) => value shouldBe "phpinfo" + case _ => fail(message = "the name is not of type StringLiteral") + } + + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse INIT_DYNAMIC_CALL 2 CV($x)" in { + val operation = "INIT_DYNAMIC_CALL 2 CV($x)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_DYNAMIC_CALL(paramCount, variable) => + paramCount shouldBe 2 + variable match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "the name is not of type StringLiteral") + } + + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse INIT_ARRAY 0 NEXT" in { + val operation = "INIT_ARRAY 0 NEXT" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, pos, first) => + code shouldBe "INIT_ARRAY" + pos shouldBe IntegerLiteral(0) + first shouldBe ByteCodePlaceIndicator("NEXT") + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INIT_ARRAY 1 (packed) CV0($request) CV2($wrapped)" in { + val operation = "INIT_ARRAY 1 (packed) CV0($request) CV2($wrapped)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case QuadrupleValueOperation(code, first, second, third, fourth) => + code shouldBe "INIT_ARRAY" + first shouldBe IntegerLiteral(1) + second shouldBe ByteCodeKeyword("packed") + third shouldBe Variable("request", tmp = false) + fourth shouldBe Variable("wrapped", tmp = false) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INIT_FCALL_BY_NAME 2 string(\"phpinfo\")" in { + val operation = s"""INIT_FCALL_BY_NAME 2 string("${encode("phpinfo")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case INIT_FCALL_BY_NAME(paramCount, name) => + paramCount shouldBe 2 + name shouldBe "phpinfo" + case _ => fail(message = "the result is not of type INIT_FCALL") + } + } + "be able to parse SEND_VAR_EX CV($var) 2" in { + val operation = "SEND_VAR_EX CV($var) 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "SEND_VAR_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse SEND_VAL_EX CV($var) 2" in { + val operation = "SEND_VAL_EX CV($var) 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "SEND_VAL_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse CHECK_UNDEF_ARGS" in { + val operation = "CHECK_UNDEF_ARGS" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op shouldBe NoValueOperation("CHECK_UNDEF_ARGS") + } + + "be able to parse FETCH_CLASS_CONSTANT string(\"PleskX\\Api\\Client\") string(\"RESPONSE_FULL\")" in { + val operation = + s"""FETCH_CLASS_CONSTANT string("${encode("PleskX\\Api\\Client")}") string("${encode("RESPONSE_FULL")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_CLASS_CONSTANT" + lhs match { + case StringLiteral(value) => value shouldBe "PleskX\\Api\\Client" + case x => fail(s"unexpected value $x") + } + rhs match { + case StringLiteral(value) => value shouldBe "RESPONSE_FULL" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_CONSTANT (unqualified-in-namespace) string(\"Workerman\\Protocols\\PHP_URL_QUERY\")" in { + val op = "FETCH_CONSTANT (unqualified-in-namespace) string(\"" + encode("""Workerman\Protocols\PHP_URL_QUERY""") + "\")" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe DualValueOperation("FETCH_CONSTANT", + ByteCodeKeyword("unqualified-in-namespace"), + StringLiteral("Workerman\\Protocols\\PHP_URL_QUERY") + ) + } + "be able to parse FETCH_STATIC_PROP_W string(\"app\") string(\"Yii\")" in { + val op = "FETCH_STATIC_PROP_W string(\"" + encode("app") + "\") string(\"" + encode("Yii") + "\")" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe DualValueOperation("FETCH_STATIC_PROP_W", StringLiteral("app"), StringLiteral("Yii")) + } + + "be able to parse RECV 2" in { + val operation = "RECV 2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "RECV" + value match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "the value is not of type IntegerLiteral") + } + case _ => + fail(message = "the result is not of type SingleValueOperation") + } + } + "be able to parse RECV_INIT 4 string(\"test\")" in { + val operation = s"""RECV_INIT 4 string("${encode("test")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "RECV_INIT" + lhs match { + case IntegerLiteral(value) => value shouldBe 4 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "test" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse SEND_VAL string(\"string\") 1" in { + val operation = s"""SEND_VAL string("${encode("string")}") 1""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAL" + lhs match { + case StringLiteral(value) => value shouldBe "string" + case _ => fail(message = "lhs is not of type StringLiteral") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP 0001" in { + val operation = "JMP 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "JMP" + value match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP BB1" in { + val operation = "JMP BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "JMP" + value match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPZ CV($var) 0001" in { + val operation = "JMPZ CV($var) 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPZ CV($var) BB1" in { + val operation = "JMPZ CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ CV($var) 0001" in { + val operation = "JMPNZ CV($var) 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ CV($var) BB1" in { + val operation = "JMPNZ CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ_EX CV($var) 0001" in { + val operation = "JMPNZ_EX CV($var) 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMPNZ_EX CV($var) BB1" in { + val operation = "JMPNZ_EX CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMPNZ_EX" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse JMP_NULL CV0($a) 0004" in { + val operation = "JMP_NULL CV0($a) 0004" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "JMP_NULL" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 4 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse FE_RESET_R V1 0001" in { + val operation = "FE_RESET_R V1 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse FE_RESET_R V1 BB1" in { + val operation = "FE_RESET_R V1 BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualOperation") + } + } + "be able to parse FE_FETCH_R V1 CV($array) 0001" in { + val operation = "FE_FETCH_R V1 CV($var) 0001" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_R" + first match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "first value is not of type Variable") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second value is not of type Variable") + } + third match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => + fail(message = "third value is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse FE_FETCH_R V1 CV($array) BB1" in { + val operation = "FE_FETCH_R V1 CV($var) BB1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_R" + first match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "first value is not of type Variable") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second value is not of type Variable") + } + third match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => + fail(message = "third value is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse FETCH_DIM_R V1 string(\"a\")" in { + val operation = s"""FETCH_DIM_R V1 string("${encode("a")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_DIM_R" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_DIM CV($array) NEXT" in { + val operation = "ASSIGN_DIM CV($array) NEXT" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_DIM" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "array" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case ByteCodePlaceIndicator(value) => value shouldBe "NEXT" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_DIM CV($array) int(1)" in { + val operation = "ASSIGN_DIM CV($array) int(1)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_DIM" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "array" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN CV($x) int(42)" in { + val operation = "ASSIGN CV($x) int(42)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case IntegerLiteral(value) => value shouldBe 42 + case _ => fail(message = "rhs is not of type IntegerLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse ASSIGN_OBJ_OP (CONCAT) V11 int(42)" in { + val operation = "ASSIGN_OBJ_OP (CONCAT) V11 int(42)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, _) => + code shouldBe "ASSIGN_OBJ_OP" + first shouldBe AssignOpLiteral("CONCAT") + second shouldBe Variable("V11", tmp = true, reference = true) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ROPE_INIT 3 string(\"a\")" in { + val operation = s"""ROPE_INIT 3 string("${encode("a")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ROPE_INIT" + lhs match { + case IntegerLiteral(value) => value shouldBe 3 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "the result is not of type DualValueOperation") + } + } + "be able to parse ROPE_ADD 3 CV($a) T1" in { + val operation = "ROPE_ADD 3 CV($a) T1" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ROPE_ADD" + first match { + case IntegerLiteral(value) => value shouldBe 3 + case _ => fail(message = "first is not of type IntegerLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "third is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse ROPE_END 2 T1 T2" in { + val operation = "ROPE_END 2 T1 T2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ROPE_END" + first match { + case IntegerLiteral(value) => value shouldBe 2 + case _ => fail(message = "first is not of type IntegerLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T2" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse ASSIGN_OP (ADD) CV($a) CV($b)" in { + val operation = "ASSIGN_OP (ADD) CV($a) CV($b)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OP" + first match { + case AssignOpLiteral(value) => value shouldBe "ADD" + case _ => fail(message = "first is not of type StringLiteral") + } + second match { + case Variable(name, tmp, reference) => + name shouldBe "a" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "b" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "second is not of type Variable") + } + case _ => fail(message = "result is not of type TripleValueOperation") + } + } + "be able to parse all ASSIGN_OP OPs" in { + def variant(x: String): Unit = { + val Parsed.Success(_, length) = parse(x, getOperation(_)) + length shouldBe x.length + } + + variant("ASSIGN_OP (SUB) CV0($a) int(2)") + variant("ASSIGN_OP (MUL) CV0($a) int(2)") + variant("ASSIGN_OP (ADD) CV0($a) int(2)") + variant("ASSIGN_OP (DIV) CV0($a) int(2)") + variant("ASSIGN_OP (MOD) CV0($a) int(2)") + variant("ASSIGN_OP (POW) CV0($a) int(2)") + variant("ASSIGN_OP (ADD) CV0($a) int(2)") + variant("ASSIGN_OP (BW_AND) CV0($a) int(2)") + variant("ASSIGN_OP (BW_OR) CV0($a) int(2)") + variant("ASSIGN_OP (BW_XOR) CV0($a) int(2)") + variant("ASSIGN_OP (SL) CV0($a) int(2)") + variant("ASSIGN_OP (SR) CV0($a) int(2)") + } + + "be able to parse all ASSIGN_DIM_OP OPs" in { + def variant(x: String): Unit = { + val Parsed.Success(_, length) = parse(x, getOperation(_)) + length shouldBe x.length + } + + variant("ASSIGN_DIM_OP (SUB) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (MUL) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (ADD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (DIV) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (MOD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (POW) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (ADD) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_AND) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_OR) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (BW_XOR) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (SL) CV0($a) int(0)") + variant("ASSIGN_DIM_OP (SR) CV0($a) int(0)") + } + // below is the representative test for no value operations + "be able to parse DO_ICALL" in { + val operation = "DO_ICALL" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case NoValueOperation(code) => code shouldBe "DO_ICALL" + case _ => fail(message = "the result is not of type NoValueOperation") + } + } + // below is the representative test for single value operations + "be able to parse ECHO int(32)" in { + val operation = "ECHO int(32)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case SingleValueOperation(code, value) => + code shouldBe "ECHO" + value match { + case IntegerLiteral(value) => value shouldBe 32 + case _ => fail(message = "value is not of type IntegerLiteral") + } + case x => fail(s"unexpected operation $x") + } + } + // below is the representative test for dual value operations + "be able to parse CONCAT string(\"a\") string(\"b\")" in { + val operation = s"""CONCAT string("${encode("a")}") string("${encode("b")}")""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs is not of type StringLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "b" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "be able to parse CONCAT string(\"a\") T1" in { + val operation = s"""CONCAT string("${encode("a")}") T1""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs was not of type StringLiteral") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "rhs was not of type Variable") + } + case _ => + fail(message = "the result was not of type DualValueOperation") + } + } + "be able to parse CONCAT string(\"a\") CV($var)" in { + val operation = s"""CONCAT string("${encode("a")}") CV($$var)""" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case StringLiteral(value) => value shouldBe "a" + case _ => fail(message = "lhs was not of type StringLiteral") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "var" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type Variable") + } + case _ => fail(message = "lhs was not of type DualValueOperation") + } + } + "be able to parse CONCAT T1 T2" in { + val operation = "CONCAT T1 T2" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + length shouldBe operation.length + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "T1" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "T2" + tmp shouldBe true + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + case _ => fail(message = "result was not of type DualValueOperation") + } + } + "be able to parse CONCAT CV($var1) CV($var2)" in { + val operation = "CONCAT CV($var1) CV($var2)" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(command, lhs, rhs) => + command shouldBe "CONCAT" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "var1" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + rhs match { + case Variable(name, tmp, reference) => + name shouldBe "var2" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs was not of type variable") + } + case _ => fail(message = "result was not of type DualValueOperation") + } + } + "be able to parse FETCH_STATIC_PROP_FUNC_ARG string(\"test\") (static) (exception)" in { + val operation = + s"""FETCH_STATIC_PROP_FUNC_ARG string("${encode("test")}") (static) (exception)""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_STATIC_PROP_FUNC_ARG" + first match { + case StringLiteral(value) => value shouldBe "test" + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "static" + case x => fail(s"unexpected value $x") + } + third match { + case ByteCodeKeyword(value) => value shouldBe "exception" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse INIT_ARRAY 3 (packed) T19 NEXT" in { + val operation = "INIT_ARRAY 3 (packed) T19 NEXT" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case QuadrupleValueOperation(code, first, second, third, fourth) => + code shouldBe "INIT_ARRAY" + first match { + case IntegerLiteral(value) => value shouldBe 3 + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "packed" + case x => fail(s"unexpected value $x") + } + third match { + case Variable(name, tmp, reference) => + name shouldBe "T19" + tmp shouldBe true + reference shouldBe false + case x => fail(s"unexpected value $x") + } + fourth match { + case ByteCodePlaceIndicator(value) => value shouldBe "NEXT" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse FETCH_STATIC_PROP_R string(\"test\") (self) (exception)" in { + val operation = s"""FETCH_STATIC_PROP_R string("${encode("test")}") (self) (exception)""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_STATIC_PROP_R" + first match { + case StringLiteral(value) => value shouldBe "test" + case x => fail(s"unexpected value $x") + } + second match { + case ByteCodeKeyword(value) => value shouldBe "self" + case x => fail(s"unexpected value $x") + } + third match { + case ByteCodeKeyword(value) => value shouldBe "exception" + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + } + "be able to parse FETCH_OBJ_R THIS string(\"test\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("test")}")""" + val Parsed.Success(result, length) = + parse(input = operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_R" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("test") + case x => fail(s"unexpected op $x") + } + } + + "be able to parse INSTANCEOF CV0($instance) (static) (no-autoload) (silent) (exception)" in { + val op = "INSTANCEOF CV0($instance) (static) (no-autoload) (silent) (exception)" + val Parsed.Success(_, length) = parse(op, getOperation(_)) + length shouldBe op.length + } + "be able to parse FAST_CALL 0016" in { + val op = "FAST_CALL 0016" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe SingleValueOperation("FAST_CALL", IntegerLiteral(16)) + } + "be able to parse FAST_RET T10 try-catch(0)" in { + val op = "FAST_RET T10 try-catch(0)" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe DualValueOperation("FAST_RET", Variable("T10", tmp = true), TryCatchLiteral(0)) + } + "be able to parse IS_IDENTICAL CV0($a) string(SINGLEBACKSLASH)" in { + val op = "IS_IDENTICAL CV0($a) string(\"" + encode("""\""") + "\")" + val Parsed.Success(result, length) = parse(op, getOperation(_)) + length shouldBe op.length + result.op shouldBe DualValueOperation("IS_IDENTICAL", Variable("a", tmp = false), StringLiteral("""\""")) + } + } + + "parser getInstruction" should { + "be able to correctly parse OP_DATA int(42)" in { + val instruction = "OP_DATA int(42)" + val Parsed.Success(result, length) = + parse(input = instruction, getInstruction(_)) + length shouldBe instruction.length + result match { + case Operation(op) => + op match { + case SingleValueOperation(name, value) => + name shouldBe "OP_DATA" + value match { + case IntegerLiteral(value) => + value shouldBe 42 + case x => fail(s"unexpected value $x") + } + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse V1 = NEW 1 string(\"Basic\")" in { + val instruction = s"""V1 = NEW 1 string("${encode("Basic")}")""" + val Parsed.Success(result, length) = + parse(input = instruction, getInstruction(_)) + length shouldBe instruction.length + result match { + case Assignment(lhs, rhs) => + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "V1" + tmp shouldBe true + reference shouldBe true + } + rhs match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "NEW" + lhs match { + case IntegerLiteral(value) => value shouldBe 1 + case _ => fail(message = "lhs is not of type IntegerLiteral") + } + rhs match { + case StringLiteral(value) => value shouldBe "Basic" + case _ => fail(message = "rhs is not of type StringLiteral") + } + case x => fail(s"unexpected operation $x") + } + case _ => fail(message = "result is not of type Assignment") + } + } + "be able to parse INIT_NS_FCALL_BY_NAME 1 string(\"PleskX\\Api\\get_class\")" in { + val operation = + s"""INIT_NS_FCALL_BY_NAME 1 string("${encode("PleskX\\Api\\get_class")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case INIT_NS_FCALL_BY_NAME(paramCount, function) => + paramCount shouldBe 1 + function shouldBe "pleskx\\api\\get_class" + case _ => + fail(message = "return is not of type INIT_NS_FCALL_BY_NAME") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INCLUDE_OR_EVAL (require_once) string(\"../class.phpmailer.php\")" in { + val operation = + s"""INCLUDE_OR_EVAL (require_once) string("${encode("../class.phpmailer.php")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "INCLUDE_OR_EVAL" + lhs shouldBe ByteCodeKeyword("require_once") + rhs shouldBe StringLiteral("../class.phpmailer.php") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_R THIS string(\"Mail\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("Mail")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_R" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("Mail") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": 0013" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": 0013""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": BB3" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": BB3""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 3)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": 0013, \"bcc\": 0014" in { + val operation = s"""SWITCH_STRING CV2($$sType) "${encode("to")}": 0013, "${encode("bcc")}": 0014""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13), ("bcc", 14)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING string(\"cl\") \"cgi\": 0051, \"cgi-fcgi\": 0051, default: 0069" in { + val operation = s"""SWITCH_STRING string("${encode("cl")}") "${encode("cgi")}": 0051, "${encode("cgi-fcgi")}": 0051, default: 0069""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, value, switches) => + code shouldBe "SWITCH_STRING" + value shouldBe StringLiteral("cl") + switches shouldBe Seq(("cgi", 51), ("cgi-fcgi", 51), ("default", 69)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_STRING CV2($sType) \"to\": 0013, \"bcc\": 0014, default: 0016" in { + val operation = + s"""SWITCH_STRING CV2($$sType) "${encode("to")}": 0013, "${encode("bcc")}": 0014, default: 0016""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_STRING" + variable shouldBe Variable("sType", tmp = false) + switches shouldBe Seq(("to", 13), ("bcc", 14), ("default", 16)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SWITCH_LONG CV($test) 33: 0013, 44: 0014, default: 0023" in { + val operation = "SWITCH_LONG CV($test) 33: 0013, 44: 0014, default: 0023" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SWITCH(code, variable, switches) => + code shouldBe "SWITCH_LONG" + variable shouldBe Variable("test", tmp = false) + switches shouldBe Seq(("33", 13), ("44", 14), ("default", 23)) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF (value) bool(false)" in { + val operation = "RETURN_BY_REF (value) bool(false)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op shouldBe DualValueOperation("RETURN_BY_REF", ByteCodeKeyword("value"), BooleanLiteral(false)) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF null" in { + val operation = "RETURN_BY_REF null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op shouldBe SingleValueOperation("RETURN_BY_REF", Null()) + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ECHO of multi line string" in { + val encoded = encode( + """ + | + |Joomla! derives from copyrighted works licensed under the GNU General + |Public License. This version has been modified pursuant to the + |GNU General Public License as of September 15, 2005, and as distributed, + |it includes or is derivative of works licensed under the GNU General + |Public License or other free or open source software licenses. Please + |see the CREDITS.php for a non-exhaustive list of contributors and + |copyright holders. A full text version of the GNU GPL version 2 can be + |found in the LICENSE.php file. A full text version of the other licenses + |that Joomla! is derivative of or includes can be found in LICENSES.php. + | + |""".stripMargin) + val operation = + s"""ECHO string("$encoded")""".stripMargin + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "ECHO" + value shouldBe StringLiteral( + """ + | + |Joomla! derives from copyrighted works licensed under the GNU General + |Public License. This version has been modified pursuant to the + |GNU General Public License as of September 15, 2005, and as distributed, + |it includes or is derivative of works licensed under the GNU General + |Public License or other free or open source software licenses. Please + |see the CREDITS.php for a non-exhaustive list of contributors and + |copyright holders. A full text version of the GNU GPL version 2 can be + |found in the LICENSE.php file. A full text version of the other licenses + |that Joomla! is derivative of or includes can be found in LICENSES.php. + | + |""".stripMargin) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_IS CV1($display) string(\"panels\")" in { + val operation = s"""FETCH_OBJ_IS CV1($$display) string("${encode("panels")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_IS" + lhs shouldBe Variable("display", tmp = false) + rhs shouldBe StringLiteral("panels") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse IN_ARRAY 0 CV0($type) array(...)" in { + val operation = "IN_ARRAY 0 CV0($type) array(...)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "IN_ARRAY" + first shouldBe IntegerLiteral(0) + second shouldBe Variable("type", tmp = false) + third shouldBe ArrayValue(None) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse INIT_ARRAY 2 (packed) (ref) V11 NEXT" in { + val operation = "INIT_ARRAY 2 (packed) (ref) V11 NEXT" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case QuintupleValueOperation(code, + first, + second, + third, + fourth, + fifth) => + code shouldBe "INIT_ARRAY" + first shouldBe IntegerLiteral(2) + second shouldBe ByteCodeKeyword("packed") + third shouldBe ByteCodeKeyword("ref") + fourth shouldBe Variable("V11", tmp = true, reference = true) + fifth shouldBe ByteCodePlaceIndicator("NEXT") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SEND_VAR_NO_REF_EX V30 1" in { + val operation = "SEND_VAR_NO_REF_EX V30 1" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAR_NO_REF_EX" + lhs shouldBe Variable("V30", tmp = true, reference = true) + rhs shouldBe IntegerLiteral(1) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse CATCH string(\"Exception\")" in { + val operation = s"""CATCH string("${encode("Exception")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "CATCH" + value shouldBe StringLiteral("Exception") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse CATCH string(\"Exception\") 0066" in { + val operation = s"""CATCH string("${encode("Exception")}") 0066""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "CATCH" + lhs shouldBe StringLiteral("Exception") + rhs shouldBe IntegerLiteral(66) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ THIS string(\"auth_username\")" in { + val operation = s"""ASSIGN_OBJ THIS string("${encode("auth_username")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_OBJ" + lhs shouldBe ByteCodePlaceIndicator("THIS") + rhs shouldBe StringLiteral("auth_username") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ_OP (CONCAT) THIS string(\"response\")" in { + val operation = s"""ASSIGN_OBJ_OP (CONCAT) THIS string("${encode("response")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OBJ_OP" + first shouldBe AssignOpLiteral("CONCAT") + second shouldBe ByteCodePlaceIndicator("THIS") + third shouldBe StringLiteral("response") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FE_FETCH_RW V6 CV2($frameLine) 0029" in { + val operation = "FE_FETCH_RW V6 CV2($frameLine) 0029" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FE_FETCH_RW" + first shouldBe Variable("V6", tmp = true, reference = true) + second shouldBe Variable("frameLine", tmp = false) + third shouldBe IntegerLiteral(29) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse SEND_VAL string(\"\") 2" in { + val operation = "SEND_VAL string(\"\") 2" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "SEND_VAL" + lhs shouldBe StringLiteral("") + rhs shouldBe IntegerLiteral(2) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_REF (function) CV0($backref_view) V2" in { + val operation = "ASSIGN_REF (function) CV0($backref_view) V2" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_REF_3" + first shouldBe ByteCodeKeyword("function") + second shouldBe Variable("backref_view", tmp = false) + third shouldBe Variable("V2", tmp = true, reference = true) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN null" in { + val operation = "RETURN null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case SingleValueOperation(code, value) => + code shouldBe "RETURN" + value shouldBe Null() + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse RETURN_BY_REF (function) null" in { + val operation = "RETURN_BY_REF (function) null" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "RETURN_BY_REF" + lhs shouldBe ByteCodeKeyword("function") + rhs shouldBe Null() + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ADD_ARRAY_ELEMENT array(...) string(\"libraries\")" in { + val operation = s"""ADD_ARRAY_ELEMENT array(...) string("${encode("libraries")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ADD_ARRAY_ELEMENT" + lhs shouldBe ArrayValue(None) + rhs shouldBe StringLiteral("libraries") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FE_RESET_R array(...) 0267" in { + val operation = "FE_RESET_R array(...) 0267" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FE_RESET_R" + lhs shouldBe ArrayValue(None) + rhs shouldBe IntegerLiteral(267) + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_OBJ_REF (function) CV6($cache) string(\"display\")" in { + val operation = + s"""ASSIGN_OBJ_REF (function) CV6($$cache) string("${encode("display")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "ASSIGN_OBJ_REF_3" + first shouldBe ByteCodeKeyword("function") + second shouldBe Variable("cache", tmp = false) + third shouldBe StringLiteral("display") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse FETCH_OBJ_W CV0($account) string(\"content\")" in { + val operation = s"""FETCH_OBJ_W CV0($$account) string("${encode("content")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "FETCH_OBJ_W_2" + lhs shouldBe Variable("account", tmp = false) + rhs shouldBe StringLiteral("content") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be bale to parse FETCH_OBJ_W (ref) V3 string(\"content\")" in { + val operation = s"""FETCH_OBJ_W (ref) V3 string("${encode("content")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case TripleValueOperation(code, first, second, third) => + code shouldBe "FETCH_OBJ_W_3" + first shouldBe ByteCodeKeyword("ref") + second shouldBe Variable("V3", tmp = true, reference = true) + third shouldBe StringLiteral("content") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse ASSIGN_STATIC_PROP string(\"leeway\") string(\"Firebase\\JWT\\JWT\")" in { + val operation = + s"""ASSIGN_STATIC_PROP string("${encode("leeway")}") string("${encode("Firebase\\JWT\\JWT")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN_STATIC_PROP_2" + lhs shouldBe StringLiteral("leeway") + rhs shouldBe StringLiteral("Firebase\\JWT\\JWT") + case x => fail(s"unexpected op $x") + } + case x => fail(s"unexpected operation $x") + } + } + "be able to parse TICKS 1" in { + val operation = "TICKS 1" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation(SingleValueOperation("TICKS", IntegerLiteral(1))) + } + "be able to parse ADD_ARRAY_ELEMENT float(inf) string(\"depth_min_leaf\")" in { + val operation = s"""ADD_ARRAY_ELEMENT float(inf) string("${encode("depth_min_leaf")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ADD_ARRAY_ELEMENT", FloatLiteral(Float.PositiveInfinity), StringLiteral("depth_min_leaf"))) + } + "be able to parse ASSIGN_STATIC_PROP_OP (CONCAT) string(\"extraHeaderHTML\")" in { + val operation = s"""ASSIGN_STATIC_PROP_OP (CONCAT) string("${encode("extraHeaderHTML")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ASSIGN_STATIC_PROP_OP", AssignOpLiteral("CONCAT"), StringLiteral("extraHeaderHTML")) + ) + } + "be able to parse FUNC_GET_ARGS" in { + val operation = "FUNC_GET_ARGS" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + NoValueOperation("FUNC_GET_ARGS") + ) + } + "be able to parse FUNC_GET_ARGS int(1)" in { + val operation = "FUNC_GET_ARGS int(1)" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("FUNC_GET_ARGS", IntegerLiteral(1)) + ) + } + "be able to parse POST_INC_STATIC_PROP string(\"writes\")" in { + val operation = s"""POST_INC_STATIC_PROP string("${encode("writes")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("POST_INC_STATIC_PROP", StringLiteral("writes")) + ) + } + "be able to parse INIT_FCALL 1 96 string(\"mb_strlen\")" in { + val operation = s"""INIT_FCALL 1 96 string("${encode("mb_strlen")}")" + otherStuff""" + val Parsed.Success(result, _) = parse(operation, getInstruction(_)) + result shouldBe Operation( + INIT_FCALL(1, 96, StringLiteral("mb_strlen")) + ) + } + "be able to parse SEND_USER string(\"spc\") 3" in { + val operation = s"""SEND_USER string("${encode("spc")}") 3""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("SEND_USER", StringLiteral("spc"), IntegerLiteral(3)) + ) + } + "be able to parse PRE_INC_STATIC_PROP string(\"value\") string(\"other\")" in { + val operation = s"""PRE_INC_STATIC_PROP string("${encode("value")}") string("${encode("other")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("PRE_INC_STATIC_PROP", StringLiteral("value"), StringLiteral("other")) + ) + } + "be able to parse FETCH_STATIC_PROP_IS string(\"documents\") string(\"phpQuery\")" in { + val operation = s"""FETCH_STATIC_PROP_IS string("${encode("documents")}") string("${encode("phpQuery")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("FETCH_STATIC_PROP_IS", StringLiteral("documents"), StringLiteral("phpQuery")) + ) + } + "be able to parse DECLARE_ANON_CLASS string(\"class@anonymous\") string(\"vanilla\\models\\model\")" in { + val operation = s"""DECLARE_ANON_CLASS string("${encode("class@anonymous")}") string("${encode("vanilla\\models\\model")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("DECLARE_ANON_CLASS", StringLiteral("class@anonymous"), StringLiteral("vanilla\\models\\model")) + ) + } + "be able to process weird string stuff" in { + val operation = s"""ADD_ARRAY_ELEMENT string("${encode("$G")}") string("${encode("^G")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("ADD_ARRAY_ELEMENT", StringLiteral("$G"), StringLiteral("^G")) + ) + } + "be able to parse FETCH_OBJ_R THIS string(\"b\")" in { + val operation = s"""FETCH_OBJ_R THIS string("${encode("b")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + DualValueOperation("FETCH_OBJ_R", ByteCodePlaceIndicator("THIS"), StringLiteral("b")) + ) + } + "be able to parse PRE_DEC_STATIC_PROP string(\"active\")" in { + val operation = s"""PRE_DEC_STATIC_PROP string("${encode("active")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_)) + length shouldBe operation.length + result shouldBe Operation( + SingleValueOperation("PRE_DEC_STATIC_PROP", StringLiteral("active")) + ) + } + "be able to parse FETCH_GLOBALS" in { + val op = "FETCH_GLOBALS" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("FETCH_GLOBALS") + ) + } + "be able to parse VERIFY_NEVER_TYPE" in { + val op = "VERIFY_NEVER_TYPE" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("VERIFY_NEVER_TYPE") + ) + } + "be able to parse CALLABLE_CONVERT" in { + val op = "CALLABLE_CONVERT" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation( + NoValueOperation("CALLABLE_CONVERT") + ) + } + "be able to parse CASE_STRICT T1 int(2)" in { + val op = "CASE_STRICT T1 int(2)" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(DualValueOperation("CASE_STRICT", Variable("T1", tmp = true), IntegerLiteral(2))) + } + "be able to parse DECLARE_LAMBDA_FUNCTION 0" in { + val op = "DECLARE_LAMBDA_FUNCTION 0" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(SingleValueOperation("DECLARE_LAMBDA_FUNCTION", IntegerLiteral(0))) + } + "be able to parse TYPE_CHECK (false) V2" in { + val op = "TYPE_CHECK (false) V2" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(DualValueOperation("TYPE_CHECK", BooleanLiteral(false), Variable("V2", tmp = true, reference = true))) + } + "be able to parse TYPE_CHECK TYPE [bool, long, double, string, array, object, resource] T6" in { + val op = "TYPE_CHECK TYPE [bool, long, double, string, array, object, resource] T6" + val Parsed.Success(result, length) = parse(op, getInstruction(_)) + length shouldBe op.length + result shouldBe Operation(DualValueOperation("TYPE_CHECK", + StringLiteral("TYPE [bool, long, double, string, array, object, resource]"), + Variable("T6", tmp = true))) + } + "be able to parse CHECK_VAR" in { + val string = "CHECK_VAR CV0($undef)" + val Parsed.Success(result, length) = parse(string, getOperation(_)) + length shouldBe string.length + result.op match { + case SingleValueOperation(code, value) => code shouldBe "CHECK_VAR" + value shouldBe Variable("undef", tmp = false) + case _ => fail() + } + } + } + + "be able to parse named parameters" should { + "be able to parse SEND_VAL[_X] bool(false) string(\"double_encode\")" in { + // I've only seen SEND_VAL in the wild so far, the rest is extrapolated. /Malte + for (x <- Seq("SEND_VAL", "SEND_VAL_EX", "SEND_USER", "SEND_REF")) { + val operation = x + s""" bool(false) string("${encode("double_encode")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_), verboseFailures = true) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe x + lhs shouldBe BooleanLiteral(false) + rhs shouldBe StringLiteral("double_encode") + case x => fail(s"unexpected operation $x") + } + case x => fail(s"unexpected operation $x") + } + } + } + "be able to parse SEND_VAR[_X] bool(false) string(\"double_encode\")" in { + for (x <- Seq("SEND_VAR", "SEND_VAR_EX", "SEND_VAR_NO_REF_EX", "SEND_VAR_NO_REF", "SEND_FUNC_ARG")) { + val operation = x + s""" CV($$a) string("${encode("double_encode")}")""" + val Parsed.Success(result, length) = parse(operation, getInstruction(_), verboseFailures = true) + length shouldBe operation.length + result match { + case op: Operation => + op.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe x + lhs shouldBe Variable("a", tmp = false) + rhs shouldBe StringLiteral("double_encode") + case x => fail(s"unexpected operation $x") + } + case x => fail(s"unexpected operation $x") + } + } + } + } + + "be able to parse COPY_TMP T2" in { + val operation = "COPY_TMP T2" + val Parsed.Success(result, length) = parse(operation, parseOperation(_), verboseFailures = true) + length shouldBe operation.length + result match { + case SingleValueOperation(code, value) => + code shouldBe "COPY_TMP" + value shouldBe Variable("T2", tmp = true) + } + } + + "be able to parse match instructions" should { + "complicated usage with fake default" in { + val operation = s"""MATCH CV0($$food) "${encode("apple")}": BB2, "${encode("bar")}": BB3, "${encode("default")}": BB4, "${encode("apple2")}": BB5, "${encode("bar2")}": BB6, "${encode("cake2")}": BB7, default: BB1""" + val Parsed.Success(result, length) = parse(operation, parseOperation(_)) + length shouldBe operation.length + val rc = result.asInstanceOf[MatchOpcode] + rc.code shouldBe "MATCH" + rc.default shouldBe "1" + rc.values shouldBe Seq( + KeyValuePair(Right("apple"), "2"), + KeyValuePair(Right("bar"), "3"), + KeyValuePair(Right("default"), "4"), + KeyValuePair(Right("apple2"), "5"), + KeyValuePair(Right("bar2"), "6"), + KeyValuePair(Right("cake2"), "7")) + } + + "int and string" in { + val operation = s"""MATCH CV0($$food) 1: 0005, "${encode("42")}": 0007, default: 0004""" + val Parsed.Success(result, length) = parse(operation, parseOperation(_), verboseFailures = true) + length shouldBe operation.length + val rc = result.asInstanceOf[MatchOpcode] + rc.code shouldBe "MATCH" + rc.default shouldBe "0004" + rc.values shouldBe Seq(KeyValuePair(Left(1), "0005"), + KeyValuePair(Right("42"), "0007")) + } + } + + "be able to parse const array initialization" when { + "empty array" in { + val operation = + "ASSIGN CV0($x) array()" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + } + rhs match { + case ArrayValue(content) => content shouldBe Some(List()) + } + } + } + + "named and index values" in { + val operation = + "ASSIGN CV0($x) array(N:string(\"Zm9v\") string(\"YmFy\")|P:int(0) int(1)|P:int(1) int(2)|P:int(2) int(3)|)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case ArrayValue(arr) => arr match { + case Some(value) => value shouldBe List( + ArrayKeyValuePair(Right("foo"), StringLiteral("bar")), + ArrayKeyValuePair(Left(0), IntegerLiteral(1)), + ArrayKeyValuePair(Left(1), IntegerLiteral(2)), + ArrayKeyValuePair(Left(2), IntegerLiteral(3)), + ) + case None => fail("array init missing") + } + case _ => fail(message = "rhs is not of type ArrayValue") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + "nested arrays" in { + val operation = "ASSIGN CV0($x) array(P:int(0) array(...)|P:int(1) string(\"Zm9vYmFy\")|)" + val Parsed.Success(result, length) = parse(operation, getOperation(_)) + length shouldBe operation.length + result.op match { + case DualValueOperation(code, lhs, rhs) => + code shouldBe "ASSIGN" + lhs match { + case Variable(name, tmp, reference) => + name shouldBe "x" + tmp shouldBe false + reference shouldBe false + case _ => fail(message = "lhs is not of type Variable") + } + rhs match { + case ArrayValue(arr) => arr match { + case Some(value) => value shouldBe List( + ArrayKeyValuePair(Left(0), ArrayValue(None)), + ArrayKeyValuePair(Left(1), StringLiteral("foobar")), + ) + case None => fail("array init missing") + } + case _ => fail(message = "rhs is not of type ArrayValue") + } + case _ => fail(message = "result is not of type DualValueOperation") + } + } + } + + "parseDestinationPattern" should { + "parse \"to\": 0013" in { + val string = s""""${encode("to")}": 0013""" + val Parsed.Success(result, length) = + parse(string, parseStringDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("to", "0013")) + } + "parse default: 0013" in { + val string = "default: 0013" + val Parsed.Success(result, length) = + parse(string, parseStringDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("default", "0013")) + } + "parse 44: 0013" in { + val string = "44: 0013" + val Parsed.Success(result, length) = + parse(string, parseNumberDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("44", "0013")) + } + "parse default: 0016" in { + val string = "default: 0016" + val Parsed.Success(result, length) = + parse(string, parseNumberDestinationPattern(_)) + length shouldBe string.length + result shouldBe (("default", "0016")) + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiteralsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiteralsTest.scala new file mode 100644 index 0000000..d369f08 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiteralsTest.scala @@ -0,0 +1,197 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.constructs.{ArrayValue, ByteCodeKeyword, FloatLiteral} +import io.joern.bytecode.parser.php8.Literals._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LiteralsTest extends AnyWordSpec with Matchers { + + "parser parseStringLiteral" should { + "be able to parse string(\"value\")" in { + val Parsed.Success(result, _) = + parse(s"""string("${encode("value")}")""", getStringLiteral(_)) + assert(result.value == "value") + } + "be able to parse a string(\"value\") 42" in { + val Parsed.Success(result, _) = parse(s"""string("${encode("value")}") 42""", getStringLiteral(_)) + result.value shouldBe "value" + } + "be able to parse string(\"a\")" in { + val Parsed.Success(result, _) = + parse(s"""string("${encode("a")}")""", getStringLiteral(_)) + assert(result.value == "a") + } + "be able to parse multi line strings without quotations" in { + val multiLineString = + """I am a + a multi line string without + quotation that should be parsable""" + val stringLiteral = "string(\"" + encode(multiLineString) + "\")" + val Parsed.Success(result, length) = + parse(stringLiteral, getStringLiteral(_)) + assert(length == stringLiteral.length) + assert(result.value == multiLineString) + } + "be able to parse multi line strings with escaped quotations" in { + val multiLineString = + "I am a \n multi line string with \\\" \n that should be parsable" + val stringLiteral = "string(\"" + encode(multiLineString) + "\")" + val Parsed.Success(result, length) = + parse(stringLiteral, getStringLiteral(_)) + assert(length == stringLiteral.length) + assert(result.value == multiLineString) + } + "be able to parse valid qualifed namespace path" in { + val string = s"""string("${encode("PleskX\\Api\\strtolower")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "PleskX\\Api\\strtolower" + } + "be able to parse string with underscore" in { + val string = s"""string("${encode("some_text")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "some_text" + } + "be able to parse string with leading underscore" in { + val string = s"""string("${encode("_leading")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "_leading" + } + "be able to parse string with numbers" in { + val string = s"""string("${encode("Copyright3_6_56")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "Copyright3_6_56" + } + "be bale to parse string that is empty" in { + val string = s"""string("${encode("")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "" + } + "be able to parse string string(\"/[\\s]+/\")" in { + val string = s"""string("${encode("/[\\s]+/")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "/[\\s]+/" + } + "be able to parse string string(\"Firebase\\JWT\\JWT\")" in { + val string = s"""string("${encode("Firebase\\JWT\\JWT")}")""" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe "Firebase\\JWT\\JWT" + } + "be able to parse escaped backslash" in { + val string = s"""string("${encode("\\\\")}")""" + val value = "\\\\" + val Parsed.Success(result, length) = parse(string, getStringLiteral(_)) + length shouldBe string.length + result.value shouldBe value + } + } + + "parser parseIntLiteral" should { + "be able to parse int(42)" in { + val Parsed.Success(result, _) = parse("int(42)", getIntLiteral(_)) + assert(result.value == 42) + } + "be able to parse int(-42)" in { + val Parsed.Success(result, _) = parse("int(-42)", getIntLiteral(_)) + result.value shouldBe -42 + } + "be bale to parse int(0)" in { + val Parsed.Success(result, _) = parse("int(0)", getIntLiteral(_)) + result.value shouldBe 0 + } + } + + "parser parseFloatLiteral" should { + "be able to parse float(1.2)" in { + val Parsed.Success(result, _) = parse("float(1.2)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1.2".toFloat) + } + "be bale to parse float(1)" in { + val Parsed.Success(result, _) = parse("float(1)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1".toFloat) + } + "be able to parse float(-1)" in { + val Parsed.Success(result, _) = parse("float(-1)", getFloatLiteral(_)) + result shouldBe FloatLiteral("-1".toFloat) + } + "be able to parse float(-1.22)" in { + val Parsed.Success(result, _) = parse("float(-1.22)", getFloatLiteral(_)) + result shouldBe FloatLiteral("-1.22".toFloat) + } + "be able to parse float(1e-06)" in { + val Parsed.Success(result, _) = parse("float(1e-06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1e-06".toFloat) + } + "be able to parse float(1e+06)" in { + val Parsed.Success(result, _) = parse("float(1e+06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("1e+06".toFloat) + } + "be able to parse float(6.367e+06)" in { + val Parsed.Success(result, _) = parse("float(6.367e+06)", getFloatLiteral(_)) + result shouldBe FloatLiteral("6.367e+06".toFloat) + } + "be able to process float(-inf)" in { + val Parsed.Success(result, _) = parse("float(-inf)", getFloatLiteral(_)) + result shouldBe FloatLiteral(Float.NegativeInfinity) + } + } + + "parser array" should { + "be able to parse empty arrays" in { + val Parsed.Success(result, length) = parse("array()", getAnyLiteral(_)) + length shouldBe "array()".length + result shouldBe ArrayValue(Some(List())) + } + } + + "(require_once)" should { + "be able to parsed by getByteCodeKeyword" in { + val string = "(require_once)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require_once" + } + } + } + + "(require)" should { + "be able to parsed by getByteCodeKeyword" in { + val string = "(require)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require" + } + } + } + + "getByteCodeKeyword" should { + "be able to parse (require_once)" in { + val string = "(require_once)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require_once" + } + } + "be able to parse (require)" in { + val string = "(require)" + val Parsed.Success(result, length) = parse(string, getByteCodeKeyword(_)) + length shouldBe string.length + result match { + case ByteCodeKeyword(value) => value shouldBe "require" + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiveRangesBlockTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiveRangesBlockTest.scala new file mode 100644 index 0000000..17560be --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/LiveRangesBlockTest.scala @@ -0,0 +1,26 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.php8.LiveRangesBlock.getLiveRangesBlock +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LiveRangesBlockTest extends AnyWordSpec with Matchers { + + "parser getLiveRangesBlock" should { + "be able to parse a full live ranges block" in { + val block = + """LIVE RANGES: + 4: 0002 - 0005 (loop) +""" + val Parsed.Success(result, length) = parse(block, getLiveRangesBlock(_)) + length shouldBe block.length + result.rangesEntry.length shouldBe 1 + result.rangesEntry.head.varNum shouldBe 4 + result.rangesEntry.head.start shouldBe 2 + result.rangesEntry.head.end shouldBe 5 + result.rangesEntry.head.rangeType shouldBe "loop" + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/MethodDefinitionTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/MethodDefinitionTest.scala new file mode 100644 index 0000000..6807a80 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/MethodDefinitionTest.scala @@ -0,0 +1,56 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.EasyBase64.encode +import io.joern.bytecode.parser.php8.MethodDefinition.getFullMethodDefinitionBlock +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class MethodDefinitionTest extends AnyWordSpec with Matchers { + + "parser getFullMethodDefintionBlock" should { + "be able to parse a single MethodDefintionBlock fully" in { + val block = s"""$$_main: + | ; (lines=10, args=0, vars=1, tmps=4) + | ; (before optimizer) + | ; /home/malte/coding/uni/master/testproject/tests/new.php:1-4 + | ; return [] RANGE[0..0] + |0000 EXT_STMT + |0001 V1 = NEW 1 string("${encode("Basic")}") + |0002 SEND_VAL_EX string("${encode("value")}") 1 + |0003 DO_FCALL + |0004 ASSIGN CV0($$var) V1 + |0005 EXT_STMT + |0006 INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") + |0007 SEND_VAL_EX string("${encode("Basic")}") 1 + |0008 DO_FCALL + |0009 RETURN int(1) + |LIVE RANGES: + | 1: 0002 - 0004 (new) + | + |$$_main: + | ; (lines=10, args=0, vars=1, tmps=4) + | ; (before block pass) + | ; /home/malte/coding/uni/master/testproject/tests/new.php:1-4 + | ; return [] RANGE[0..0] + |BB0: + | ; start exit lines=[0-9] + |0000 EXT_STMT + |0001 V1 = NEW 1 string("${encode("Basic")}") + |0002 SEND_VAL_EX string("${encode("value")}") 1 + |0003 DO_FCALL + |0004 ASSIGN CV0($$var) V1 + |0005 EXT_STMT + |0006 INIT_METHOD_CALL 1 CV0($$var) string("${encode("test2")}") + |0007 SEND_VAL_EX string("${encode("other")}") 1 + |0008 DO_FCALL + |0009 RETURN int(1) + |""".stripMargin + val Parsed.Success(_, length) = + parse(block, getFullMethodDefinitionBlock(_)) + length shouldBe block.length + } + } + +} + diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/VariablesTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/VariablesTest.scala new file mode 100644 index 0000000..7df8eec --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/parser/php8/VariablesTest.scala @@ -0,0 +1,27 @@ +package io.joern.bytecode.parser.php8 + +import fastparse._ +import io.joern.bytecode.parser.php8.Variables._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class VariablesTest extends AnyWordSpec with Matchers { + + "parser getVariable" should { + "be able to parse 'CV($variable_name)'" in { + val Parsed.Success(result, _) = + parse("CV($variable_name)", getVariable(_)) + assert(result.name == "variable_name") + assert(result.tmp == false) + } + } + + "parser getTemporary" should { + "should be able to parse 'T1'" in { + val Parsed.Success(result, _) = parse("T1", getTemporary(_)) + assert(result.name == "T1") + assert(result.tmp == true) + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/AstTraversalTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/AstTraversalTest.scala new file mode 100644 index 0000000..5ddbb38 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/AstTraversalTest.scala @@ -0,0 +1,34 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.passes.utility.AstTraversal +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes.Call +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class AstTraversalTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"getParentMethod for PHP $version" should { + "work with closures" in new CpgFromCodeTestFixture( + """function test() { + | $stuff = 42; + | function ($param) use ($stuff) { + | echo "dostuff"; + | }; + |} + |""".stripMargin + ) { + //we do not process any closures anymore and this unit test only tests wether there is a crash + //val echo: Call = cpg.call.name("ECHO").next() + //AstTraversal.getParentMethod(echo).name shouldBe "closure" + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/BasicDataDependencyPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/BasicDataDependencyPassTest.scala new file mode 100644 index 0000000..027627d --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/BasicDataDependencyPassTest.scala @@ -0,0 +1,98 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class BasicDataDependencyPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"cpg for PHP $version" should { + "have a single ddg edge from the assignment" in new CpgFromCodeTestFixture( + "$x = 5; echo $x;") { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) int(5)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "have a single ddg edge from the global declaration" in new CpgFromCodeTestFixture( + """ + |function func() { + | global $x; + | echo $x; + |} + |""".stripMargin) { + implicit val method: String = "func" + ddgSuccOf(s"""BIND_GLOBAL CV($$x) string("x")""") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "have a single ddg edge from the assign op" in new CpgFromCodeTestFixture( + """ + |function func() { + | $x += 5; + | echo $x; + |} + |""".stripMargin) { + implicit val method: String = "func" + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(5)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "have a ddg edge between declaration and reassignment" in new CpgFromCodeTestFixture( + "$x = 5; $x += 5;") { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) int(5)") shouldBe expectedDdg( + ("ASSIGN_OP (ADD) CV($x) int(5)", "x")) + } + + "have two ddg edges when reassignment" in new CpgFromCodeTestFixture( + "$x = 5; $x += 5; echo $x;") { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) int(5)") shouldBe expectedDdg( + ("ASSIGN_OP (ADD) CV($x) int(5)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(5)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "have a split of the ddg due to if" in new CpgFromCodeTestFixture( + """ + |$x = 5; + |if($x == 6) { + | $x += 4; + |} else { + | $x += 5; + |} + |echo $x; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) int(5)") shouldBe expectedDdg( + ("IS_EQUAL CV($x) int(6)", "x"), + ("ASSIGN_OP (ADD) CV($x) int(4)", "x"), + ("ASSIGN_OP (ADD) CV($x) int(5)", "x")) + ddgSuccOf("T2 = IS_EQUAL CV($x) int(6)") shouldBe + expectedDdg(("JMPZ T2 int(5)", "T2")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(4)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(5)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "be able to handle = assignment in code" in new CpgFromCodeTestFixture( + """$statements = ["DELETE FROM `gui_group` WHERE `group_id` = $group_id", + | "UPDATE `gui_account` SET `account_group` = 0 WHERE `account_group` = $group_id",]; + | foreach ($statements as $statement) { + | $db->exec($statement); + | } + |""".stripMargin + ) { + implicit val method: String = "dlr_main" + ddgSuccOf("T5 = ADD_ARRAY_ELEMENT T6 NEXT") shouldBe expectedDdg(("ASSIGN CV($statements) T5", "T5")) + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallFinishingPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallFinishingPassTest.scala new file mode 100644 index 0000000..3774448 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallFinishingPassTest.scala @@ -0,0 +1,317 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.passes.utility.MethodDetectionAndAssociation +import io.joern.bytecode.util.implicits.OneableSeq +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.joern.bytecode.{Defines, parser} +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Method} +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.collection.mutable +import scala.jdk.CollectionConverters._ + +class CallFinishingPassTest extends AnyWordSpec with Matchers with PHPVersions { + + /*"compangnion object function getPhpInternalFucntions" should { + "return a set greater zero" in { + CallFinishingPass.getPhpInternalFunctions.nonEmpty shouldBe true + } + "contain get_defined_constants" in { + CallFinishingPass.getPhpInternalFunctions.contains( + "get_defined_constants") shouldBe true + } + "contains mysql_error" in { + CallFinishingPass.getPhpInternalFunctions.contains("pclose") shouldBe true + } + }*/ + + for (v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"call finishing pass for PHP $version" should { + + "have the called function argument node as well as two additional arguments" in new CpgFromCodeTestFixture( + """ + |$test = mysqli_query($link,$query); + |""".stripMargin + ) { + val call: Call = cpg.call.nameExact("mysqli_query").l.head + call.argumentOut.l.map(_.code).toSet shouldBe Set("SEND_VAR_EX CV($link) int(1)", "SEND_VAR_EX CV($query) int(2)") + val calling: Seq[Method] = call.out(EdgeTypes.CALL).asScala.toList.map(_.asInstanceOf[Method]) + calling.length shouldBe 1 + calling.head.code shouldBe Defines.UNKNOWN_FUNCTION + } + + "work even if there is a exit branch inside a call" in new CpgFromCodeTestFixture( + """ + |$test = mysqli_query($link, $cond ? $query : exit()); + |""".stripMargin + ) { + val call: List[Call] = cpg.call.nameExact("mysqli_query").l + call.length shouldBe 1 + call.head.argumentOut.l.map(_.code).toSet shouldBe Set("SEND_VAR_EX CV($link) int(1)", "SEND_VAL_EX T4 int(2)") + val calling: Seq[Method] = call.head.out(EdgeTypes.CALL).asScala.toList.map(_.asInstanceOf[Method]) + calling.length shouldBe 1 + calling.head.code shouldBe Defines.UNKNOWN_FUNCTION + } + + "be able to handle nested calls" in new CpgFromCodeTestFixture( + """ + |outer(inner($first),inner2($second)); + |""".stripMargin + ) { + val outer: Call = cpg.call.nameExact("outer").l.head + val inner: Call = cpg.call.nameExact("inner").l.head + val inner2: Call = cpg.call.nameExact("inner2").l.head + inner.argumentOut.l.map(_.code).toSet shouldBe Set("SEND_VAR_EX CV($first) int(1)") + inner2.argumentOut.l.map(_.code).toSet shouldBe Set("SEND_VAR_EX CV($second) int(1)") + outer.argumentOut.l.map(_.code).toSet shouldBe Set("SEND_VAR_NO_REF_EX V2 int(1)", "SEND_VAR_NO_REF_EX V3 int(2)") + } + + "be able to handle self defined functions" in new CpgFromCodeTestFixture( + """ + |function test() { + | echo "test"; + |} + | + |test(); + |""".stripMargin + ) { + val call: Call = cpg.call.nameExact("test").l.head + call.argumentOut.l.map(_.code).toSet shouldBe Set() + } + + "create the correct call even though it is a subcall" in new CpgFromCodeTestFixture( + """ + |echo $e->errorMessage(); + |""".stripMargin + ) { + cpg.call.nameExact(".*::errormessage").l.length shouldBe 1 + } + + "have the all the correct names" in new CpgFromCodeTestFixture( + """try { + | + |} catch (phpmailerException $e) { + | echo $e->errorMessage(); //Pretty error messages from PHPMailer + |} + |""".stripMargin + ) { + cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet shouldBe Set(".*::errormessage") + } + + "be able to handle a call within a return" in new CpgFromCodeTestFixture( + """ + |function getImageBuffer($image) { + | if ($this->diskcache AND isset($this->images[$image])) { + | return unserialize($this->readDiskCache($this->images[$image])); + | } elseif (isset($this->images[$image])) { + | return $this->images[$image]; + | } + | return false; + |} + |""".stripMargin + ) { + val calls: mutable.Set[String] = cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet + calls shouldBe Set("unserialize", ".*::readdiskcache") + } + + "be to handle special drupal case I" in new CpgFromCodeTestFixture( + """function ValidateAddress($address) { + | if (function_exists('filter_var')) { //Introduced in PHP 5.2 + | if(filter_var($address, FILTER_VALIDATE_EMAIL) === FALSE) { + | return false; + | } else { + | return true; + | } + | } else { + | return preg_match('regexpString', $address); + | } + |} + |""".stripMargin + ) { + val calls: mutable.Set[String] = cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet + calls shouldBe Set("function_exists", "filter_var", "preg_match") + } + + "be able to handle special drupal case II" in new CpgFromCodeTestFixture( + """ + |class Klasse { + | public function Send() { + | try { + | switch($this->Mailer) { + | default: + | return $this->MailSend($header, $body); + | } + | } catch (phpmailerException $e) { + | echo $e->getMessage()."\n"; + | } + | } + |} + |""".stripMargin + ) { + val calls: mutable.Set[String] = cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet + calls shouldBe Set("klasse::mailsend", ".*::getmessage") + } + + "be able to handle drupal case III" in new CpgFromCodeTestFixture( + """function add($name, $title = '', $body = '') { + | if (is_object($name) && is_subclass_of($name, 'views_tab')) { + | $this->add_tab($name); + | } + | elseif (is_array($name)) { + | foreach ($name as $real_tab) { + | $this->add($real_tab); + | } + | } + | else { + | $this->add_tab(new views_tab($name, $title, $body)); + | } + | } + |""".stripMargin + ) { + val calls: mutable.Set[String] = cpg.call.map(_.name).toSet + } + "be able to correctly handle SEND_ARRAY" in new CpgFromCodeTestFixture( + """ + |call_user_func_array($field["callback"], array($field)); + |""".stripMargin + ) { + cpg.call.code("DO_FCALL").next() + val sendArray: Call = cpg.call("SEND_ARRAY").next() + val argOfSendArray: nodes.CfgNode = sendArray.astChildren.order(MethodDetectionAndAssociation.getSendValuePos(sendArray)).next().asInstanceOf[nodes.CfgNode] + argOfSendArray.code shouldBe "T2" + + } + "be able to utilize this marker to distinguish between equal function names" in new CpgFromCodeTestFixture( + """class A { + | function bob() { + | echo "do stuff"; + | } + | + | function callBob() { + | $this->bob(); + | } + |} + | + |class B { + | function bob() { + | echo "do other stuff"; + | } + | function callBob() { + | $this->bob(); + | } + |} + |""".stripMargin) { + cpg.call.code("DO_FCALL").foreach { + node: nodes.Call => + if (!Set("a::bob", "b::bob").contains(node.name)) { + fail(s"call node name ${node.name} is unexpected") + //node.out(EdgeTypes.CALL).asScala.toList.length shouldBe 1 + } + } + } + "be able to call link static method calls" in new CpgFromCodeTestFixture( + """class A { + | public static function stafu() { + | echo "dostuff"; + | } + |} + |A::stafu(); + |""".stripMargin) { + val call: Call = cpg.call.code("DO_UCALL").next() + call.name shouldBe "a::stafu" + call.out(EdgeTypes.CALL).asScala.toList.length shouldBe 1 + } + "be able to set the name for self::randomToken() in class" in new CpgFromCodeTestFixture( + """class c + |{ + | public static function token() + | { + | return self::randomToken(); + | } + |} + |""".stripMargin) { + private val call = cpg.call.filter(_.code.startsWith("DO_")).l.one + call.name shouldBe "c::randomtoken" + } + "not set multiple call edges (none in this case)" in new CpgFromCodeTestFixture( + """interface i { + | public function foo($param); + |} + | + |class Klasse implements i { + | public function foo($param) { + | echo $param; + | } + |} + |$obj = new Klasse(); + |$obj->foo(42); + |""".stripMargin) { + val calls: mutable.Set[String] = cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet + calls shouldBe Set(".*::foo", "klasse::__construct") + val call: Seq[Call] = cpg.call.nameExact(".*::foo").l + call.length shouldBe 1 + call.head.out(EdgeTypes.CALL).asScala.toList.length shouldBe 1 + } + "correctly create a internal function dummy" in new CpgFromCodeTestFixture( + """ + |str_replace("test","test","test"); + |""".stripMargin + ) { + val calls: mutable.Set[String] = cpg.call.filter(_.out(EdgeTypes.CALL).hasNext).map(_.name).toSet + calls shouldBe Set("str_replace") + val methods: Seq[Method] = cpg.method.nameExact("str_replace").l + methods.length shouldBe 1 + methods.head.code shouldBe Defines.INTERNAL_FUNCTION + } + "handle function name collisions gracefully" when { + "collision in name" in new CpgTestFixture("nameCollision") { + cpg.method.fullName.l.toSet shouldEqual Set("a::foo", "a::__construct", "b::__construct", "dlr_main", "UNKNOWN::foo") + cpg.method.fullName("a::foo").size shouldBe 2 + cpg.method("a::__construct").l.one.code shouldBe Defines.CONSTRUCTOR + } + "collision via namespace and other stuff" in new CpgTestFixture("guzzleunknown") { + assert(cpg.method.nonEmpty) // for now: just dont fail + } + "function defined twice" in new CpgTestFixture("functionDefinedTwice") { + assert(cpg.method.l.nonEmpty) + } + } + "be able to handle namespace covering up one internal function but not the second" in new CpgTestFixture("twoInternalFunctionsOneNamespace") { + cpg.method.filter(_.code == Defines.INTERNAL_FUNCTION).l.length shouldBe 1 + } + "be able to ignore namespace if function in namespace does not exist but basic function outside does" in new CpgFromCodeTestFixture( + """namespace test; + | + |sin(1); + | + |""".stripMargin + ){ + val methods: Seq[Method] = cpg.method.code(Defines.INTERNAL_FUNCTION).l + methods.length shouldBe 1 + methods.head.name shouldBe "sin" + methods.head.in(EdgeTypes.CALL).hasNext shouldBe true + } + "be able to handle function defines that overwrite internal functions" in new CpgFromCodeTestFixture("""namespace { + | if (!is_callable('random_bytes')) { + | function random_bytes($length) + | { + | return 1; + | } + | } + |} + | + |namespace KeePassPHP { + | + | echo random_bytes(32); + |}""".stripMargin){ + cpg.method.size should be > 0 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallInterpretPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallInterpretPassTest.scala new file mode 100644 index 0000000..5eac2d1 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CallInterpretPassTest.scala @@ -0,0 +1,30 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class CallInterpretPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"call for PHP $version" should { + "should have a CALL node with name=`foo`" in new CpgFromCodeTestFixture( + """ + | function foo() { + | + | } + | + | foo(); + |""".stripMargin) { + val List(x) = cpg.call("foo").l + x.name shouldBe "foo" + x.code shouldBe "DO_UCALL" + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CfgCreationPassTests.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CfgCreationPassTests.scala new file mode 100644 index 0000000..ea16433 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CfgCreationPassTests.scala @@ -0,0 +1,132 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Method} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.passes.controlflow.cfgcreation.Cfg.AlwaysEdge +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.traversal.NodeOps + +class CfgCreationPassTests extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"Cfg for PHP $version" should { + + "allow retrieving arguments" in new CpgFromCodeTestFixture( + "$x = 2;") { + val call: Call = cpg.call.codeExact("ASSIGN CV($x) int(2)").head + call.start.argument.size shouldBe 2 + } + + // Empty functions are removed entirely, so, in contrast to the tests in the C frontend, + // we do not test here for the correct translation of empty functions into entry + exit node + + "be correct for assignment" in new CpgFromCodeTestFixture( + "$x = 2;") { + implicit val method: String = "dlr_main" + val main: Method = this.cpg.method.l.head + main.code shouldBe "dlr_main()" + cfgSuccOf("dlr_main()") shouldBe expectedCfg(("METHOD BLOCK", AlwaysEdge)) + cfgSuccOf("METHOD BLOCK") shouldBe expectedCfg(("CV($x)", AlwaysEdge)) + cfgSuccOf("CV($x)") shouldBe expectedCfg(("2", AlwaysEdge)) + cfgSuccOf("2") shouldBe expectedCfg(("ASSIGN CV($x) int(2)", AlwaysEdge)) + cfgSuccOf("ASSIGN CV($x) int(2)") shouldBe expectedCfg(("1", AlwaysEdge)) + cfgSuccOf("1") shouldBe expectedCfg(("RETURN int(1)", AlwaysEdge)) + } + + "be correct for `if` statement" in new CpgFromCodeTestFixture( + """ + |if ($x > 10) { + | return 1; + |} + |return 0; + |""".stripMargin) { + implicit val method: String = "dlr_main" + cfgSuccOf("dlr_main()") shouldBe expectedCfg(("METHOD BLOCK", AlwaysEdge)) + cfgSuccOf("METHOD BLOCK") shouldBe expectedCfg(("10", AlwaysEdge)) + cfgSuccOf("10") shouldBe expectedCfg(("CV($x)", AlwaysEdge)) + cfgSuccOf("CV($x)") shouldBe expectedCfg( + ("IS_SMALLER int(10) CV($x)", AlwaysEdge)) + } + + "be correct for `while` loop" in new CpgFromCodeTestFixture( + """ + |$x = 0; + |while($x < 10) { + | foo($x); + |} + |""".stripMargin) { + implicit val method: String = "dlr_main" + cfgSuccOf("ASSIGN CV($x) int(0)") shouldBe expectedCfg(("5", AlwaysEdge)) + cfgSuccOf("5") shouldBe expectedCfg(("JMP int(5)", AlwaysEdge)) + cfgSuccOf("JMP int(5)") shouldBe expectedCfg(("CV($x)", AlwaysEdge)) + cfgSuccOf("CV($x)") shouldBe expectedCfg(("10", AlwaysEdge)) + cfgSuccOf("10") shouldBe expectedCfg( + ("IS_SMALLER CV($x) int(10)", AlwaysEdge)) + cfgSuccOf("IS_SMALLER CV($x) int(10)") shouldBe expectedCfg( + ("T3", AlwaysEdge)) + cfgSuccOf("T3 = IS_SMALLER CV($x) int(10)", "T3") shouldBe + expectedCfg(("T3 = IS_SMALLER CV($x) int(10)", AlwaysEdge)) + cfgSuccOf("DO_FCALL_BY_NAME") shouldBe expectedCfg(("CV($x)", AlwaysEdge)) + } + + "create the correct CFG for drupal example" in new CpgFromCodeTestFixture( + """ + |class Klasse { + | public function Send() { + | try { + | switch($this->Mailer) { + | default: + | return $this->MailSend($header, $body); + | } + | } catch (phpmailerException $e) { + | echo $e->getMessage()."\n"; + | } + | } + |} + |""".stripMargin + ) { + implicit val method: String = "klasse::send" + cfgSuccOf("INIT_METHOD_CALL 0 CV($e) string(\"getmessage\")") shouldBe expectedCfg( + ("DO_FCALL", AlwaysEdge)) + } + "create a CFG edge after the JMP on true in a normal function" in new CpgFromCodeTestFixture( + """function test() { + | if(other()) { + | if(ttest()) + | echo "test"; + | } + | do { + | echo "ttest"; + | } while(true); + |} + |""".stripMargin + ) { + implicit val method: String = "test" + cfgSuccOf("JMPNZ bool(true) int(7)") shouldBe expectedCfg(("ttest", AlwaysEdge)) + } + "create a CFG edge after the JMP on true even in a class" in new CpgFromCodeTestFixture( + """class testClass { + |function test() { + | if(other()) { + | if(ttest()) + | echo "test"; + | } + | do { + | echo "ttest"; + | } while(true); + |} + |} + |""".stripMargin + ) { + implicit val method: String = "testclass::test" + cfgSuccOf("JMPNZ bool(true) int(7)") shouldBe expectedCfg(("ttest", AlwaysEdge)) + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/ConditionalMethodCreationTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ConditionalMethodCreationTest.scala new file mode 100644 index 0000000..583c3eb --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ConditionalMethodCreationTest.scala @@ -0,0 +1,69 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ConditionalMethodCreationTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"cpg for PHP $version" should { + "contain two imce_image_info methods" in new CpgFromCodeTestFixture( + """if (variable_get('imce_image_get_info', 0)) { + | function imce_image_info($file) { + | $mimes = array('image/jpeg' => IMAGETYPE_JPEG, 'image/gif' => IMAGETYPE_GIF, 'image/png' => IMAGETYPE_PNG); + | if (is_file($file) && ($dot = strrpos($file, '.')) && in_array(strtolower(substr($file, $dot+1)), array('jpg', 'jpeg', 'gif', 'png')) && ($info = @image_get_info($file)) && isset($mimes[$info['mime_type']]) ) { + | return array('width' => $info['width'], 'height' => $info['height'], 'type' => $mimes[$info['mime_type']], 'mime' => $info['mime_type']); + | } + | return FALSE; + | } + | } + | else { + | function imce_image_info($file) { + | if (is_file($file) && ($dot = strrpos($file, '.')) && in_array(strtolower(substr($file, $dot+1)), array('jpg', 'jpeg', 'gif', 'png')) && ($info = @getimagesize($file)) && in_array($info[2], array(IMAGETYPE_JPEG, IMAGETYPE_GIF, IMAGETYPE_PNG)) ) { + | return array('width' => $info[0], 'height' => $info[1], 'type' => $info[2], 'mime' => $info['mime']); + | } + | return FALSE; + | } + |} + |""".stripMargin) { + cpg.method("imce_image_info").toList.length shouldBe 2 + } + "poc for rc" in new CpgFromCodeTestFixture( + """if (variable_get('imce_image_get_info', 0)) { + | function c($file) { + | b(); + | a(); + | } + | } + | else { + | function c($file) { + | b(); + | a(); + | } + |} + |""".stripMargin) { + // no crash means passing + } + "work with two equally named conditional functions" in new CpgFromCodeTestFixture( + """if($cond) { + | function test($var) { + | funcall(); + | } + |} else { + | function test($var) { + | funcall(); + | } + |} + |""".stripMargin + ) + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/ControlConstructsStringTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ControlConstructsStringTest.scala new file mode 100644 index 0000000..6632a28 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ControlConstructsStringTest.scala @@ -0,0 +1,83 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.{AbstractCpgTestFixture, CpgFromCodeTestFixture} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.passes.controlflow.cfgcreation.Cfg.AlwaysEdge +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.jdk.CollectionConverters._ + +class ControlConstructsStringTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """switch($test) { + | case "to": + | echo("first"); + | break; + | case "cc": + | echo("second"); + | break; + | case "bcc": + | echo("third"); + | break; + |} + |echo("done"); + |""".stripMargin + ) + + s"resulting cpg for PHP $version" should { + "have correct cfg" in { + implicit val method: String = "dlr_main" + val main = fixture.cpg.method.l.head + main.code shouldBe "dlr_main()" + //println(main.ast.isCfgNode.map(_.code).l) + val switchParent = + """SWITCH_STRING CV($test) "to": 8, "cc": 10, "bcc": 12, "default": 14""" + fixture.cfgSuccOf("dlr_main()") shouldBe fixture.expectedCfg( + ("METHOD BLOCK", AlwaysEdge)) + fixture.cfgSuccOf("METHOD BLOCK") shouldBe fixture.expectedCfg( + switchParent, + ("CV($test)", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "CV($test)") shouldBe fixture.expectedCfg( + switchParent, + ("to", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "to") shouldBe fixture.expectedCfg( + switchParent, + ("8", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "8") shouldBe fixture.expectedCfg( + switchParent, + ("cc", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "cc") shouldBe fixture.expectedCfg( + switchParent, + ("10", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "10") shouldBe fixture.expectedCfg( + switchParent, + ("bcc", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "bcc") shouldBe fixture.expectedCfg( + switchParent, + ("12", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "12") shouldBe fixture.expectedCfg( + switchParent, + ("default", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "default") shouldBe fixture.expectedCfg( + switchParent, + ("14", AlwaysEdge)) + fixture.cfgSuccOf(switchParent, "14") shouldBe fixture.expectedCfg( + (switchParent, AlwaysEdge)) + fixture.cpg.call + .codeExact(switchParent) + .head + ._cfgOut + .asScala + .toList + .length shouldBe 5 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CorrectDDGReassignmentTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CorrectDDGReassignmentTest.scala new file mode 100644 index 0000000..845a1a4 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CorrectDDGReassignmentTest.scala @@ -0,0 +1,125 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.passes.controlflow.cfgcreation.Cfg.{AlwaysEdge, FalseEdge, TrueEdge} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class CorrectDDGReassignmentTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"cpg for PHP $version" should { + "have no DDG edge leading from the first to the third command" in new CpgFromCodeTestFixture( + """ + |$x = 42; + |$x += 33; + |echo $x; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) int(42)") shouldBe expectedDdg( + ("ASSIGN_OP (ADD) CV($x) int(33)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(33)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + } + + "have no DDG edge leading before the if" in new CpgFromCodeTestFixture( + """ + |$x += 42; + |if($x == 33) { + | $x = 11; + |} else { + | $x = 33; + |} + |echo $x; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(42)") shouldBe expectedDdg( + ("IS_EQUAL CV($x) int(33)", "x")) + cfgSuccOf("ASSIGN_OP (ADD) CV($x) int(42)") shouldBe expectedCfg( + ("CV($x)", AlwaysEdge)) + ddgSuccOf("IS_EQUAL CV($x) int(33)") shouldBe Set() + cfgSuccOf("IS_EQUAL CV($x) int(33)") shouldBe expectedCfg( + ("T2", AlwaysEdge)) + cfgSuccOf("T2 = IS_EQUAL CV($x) int(33)", "T2") shouldBe expectedCfg( + ("T2 = IS_EQUAL CV($x) int(33)", AlwaysEdge)) + ddgSuccOf("T2 = IS_EQUAL CV($x) int(33)") shouldBe expectedDdg( + ("JMPZ T2 int(5)", "T2")) + cfgSuccOf("T2 = IS_EQUAL CV($x) int(33)") shouldBe expectedCfg( + ("T2", AlwaysEdge)) + ddgSuccOf("JMPZ T2 int(5)") shouldBe Set() + cfgSuccOf("JMPZ T2 int(5)") shouldBe expectedCfg(("CV($x)", TrueEdge), + ("CV($x)", FalseEdge)) + ddgSuccOf("ASSIGN CV($x) int(11)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + cfgSuccOf("ASSIGN CV($x) int(33)") shouldBe expectedCfg( + ("CV($x)", AlwaysEdge)) + } + + "have one DDG edge leading before the if" in new CpgFromCodeTestFixture( + """ + |$x += 42; + |if($x == 33) { + | $x += 11; + |} else { + | $y = 11; + |} + |echo $x; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(42)") shouldBe expectedDdg( + ("IS_EQUAL CV($x) int(33)", "x"), + ("ASSIGN_OP (ADD) CV($x) int(11)", "x"), + ("ECHO CV($x)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(11)") shouldBe expectedDdg( + ("ECHO CV($x)", "x")) + ddgSuccOf("ASSIGN CV($y) int(11)") shouldBe Set() + } + + "have one DDG edge leading before the loop and one into" in new CpgFromCodeTestFixture( + """ + |$x += 42; + |foreach($array as $element) { + | $x += 33; + |} + |echo $x; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(42)") shouldBe expectedDdg( + ("ASSIGN_OP (ADD) CV($x) int(33)", "x"), + ("ECHO CV($x)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) int(33)") shouldBe expectedDdg( + ("ASSIGN_OP (ADD) CV($x) int(33)", "x"), + ("ECHO CV($x)", "x")) + } + + "generate a proper string for a nesting loops where the outer one is productive" in new CpgFromCodeTestFixture( + """ + |$x = "start"; + |foreach($t as $a) { + | $x += "outerloop"; + | foreach($p as $g) { + | $x += "innerloop"; + | } + |} + |echo $x; + |""".stripMargin + ) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) string(\"start\")") shouldBe + expectedDdg(("ASSIGN_OP (ADD) CV($x) string(\"outerloop\")", "x"), + ("ECHO CV($x)", "x")) + ddgSuccOf("ASSIGN_OP (ADD) CV($x) string(\"outerloop\")") shouldBe + expectedDdg(("ASSIGN_OP (ADD) CV($x) string(\"innerloop\")", "x"), + ("ASSIGN_OP (ADD) CV($x) string(\"outerloop\")", "x"), + ("ECHO CV($x)", "x")) + ddgPredOf("ASSIGN_OP (ADD) CV($x) string(\"innerloop\")") shouldBe + expectedDdg(("ASSIGN_OP (ADD) CV($x) string(\"outerloop\")", "x"), + ("ASSIGN_OP (ADD) CV($x) string(\"innerloop\")", "x")) + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgClassCreationTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgClassCreationTest.scala new file mode 100644 index 0000000..88e620f --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgClassCreationTest.scala @@ -0,0 +1,105 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.jdk.CollectionConverters._ + +class CpgClassCreationTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """ + |class Basic { + | + | public $var = 42; + | + | function __construct($val) { + | $var = $val; + | } + | + | private function test($test) { + | echo $test; + | } + | + | public function test2($test) { + | $this->test($rest); + | test($rest); + | } + | } + | + | $var = new Basic("value"); + | $var->test2("other"); + |""".stripMargin + ) + + s"CPG for PHP $version" should { + + "have a single type declaration" in { + fixture.cpg.typeDecl.l match { + case typeDecl :: Nil => + typeDecl.name shouldBe "basic" + typeDecl.fullName shouldBe "basic" + typeDecl.astParentType shouldBe "NAMESPACE_BLOCK" + case x => fail(s"unexpected traversal result $x") + } + } + + "the type decl should have three methods and a main" in { + fixture.cpg.typeDecl.l match { + case typeDecl :: Nil => + typeDecl.astChildren.isMethod.sortBy(_.name).toList match { + case first :: second :: third :: Nil => + first.name shouldBe "__construct" + first.fullName shouldBe "basic::__construct" + second.name shouldBe "test" + second.fullName shouldBe "basic::test" + third.name shouldBe "test2" + third.fullName shouldBe "basic::test2" + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + fixture.cpg.method("dlr_main").l match { + case main :: Nil => + main.name shouldBe "dlr_main" + case x => fail(s"unexpected traversal result $x") + } + } + + "have 1 call edges for Basic::test2" in { + fixture.cpg.method.fullName("basic::test2").l match { + case method :: Nil => + method.in(EdgeTypes.CALL).asScala.toList match { + case (caller: nodes.Call) :: Nil => + caller.code shouldBe "DO_FCALL" + case Nil => fail("expected call edge not created") + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + + "have 1 call edge for Basic::__construct" in { + fixture.cpg.method.fullName("basic::__construct").l match { + case method :: Nil => + method.in(EdgeTypes.CALL).asScala.toList match { + case (caller: nodes.Call) :: Nil => + caller.code shouldBe "DO_FCALL" + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgConditionalTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgConditionalTest.scala new file mode 100644 index 0000000..d35f97b --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgConditionalTest.scala @@ -0,0 +1,72 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.Node + +import java.io.File +import scala.jdk.CollectionConverters._ + +class CpgConditionalTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("basicConditional") + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail() + } + } + + def getMethods(name: String): List[Node] = { + fixture.cpg.method.nameExact(name).l + } + + + s"CPG layout for PHP $version" should { + "have a single file" in { + fixture.files.length shouldBe 1 + getSingleFile(fixture.files.head.getPath) + } + "have the method main" in { + getMethods("dlr_main") + } + } + + s"CFG of CPG for PHP $version" should { + "have a distinct edge pattern" in { + val method = fixture.cpg.method("dlr_main").l + method.length shouldBe 1 + var next = method.head._cfgOut.next()._cfgOut.next() + next.asInstanceOf[nodes.Identifier].name shouldBe "x" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Literal].code shouldBe "42" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Call].code shouldBe "ASSIGN CV($x) int(42)" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Identifier].code shouldBe "CV($x)" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Literal].code shouldBe "43" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Call].code shouldBe "IS_EQUAL CV($x) int(43)" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Identifier].code shouldBe "T2" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Call].code shouldBe "T2 = IS_EQUAL CV($x) int(43)" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Identifier].name shouldBe "T2" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Literal].code shouldBe "5" + next = next._cfgOut.next() + next.asInstanceOf[nodes.Call].code shouldBe "JMPZ T2 int(5)" + next._cfgOut.asScala.length shouldBe 2 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgInheritancePassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgInheritancePassTest.scala new file mode 100644 index 0000000..9aca546 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgInheritancePassTest.scala @@ -0,0 +1,51 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.Node + +class CpgInheritancePassTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("inheritance") + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail() + } + } + + s"CPG layout for PHP $version" should { + "have two files" in { + fixture.files.length shouldBe 2 + getSingleFile(fixture.files.head.getPath) + getSingleFile(fixture.files(1).getPath) + } + "have one main methods" in { + fixture.cpg.method("dlr_main").l.length shouldBe 1 + } + "have two types" in { + fixture.cpg.typeDecl.l.length shouldBe 2 + } + "have an inheritance edge from A to B" in { + val a = fixture.cpg.typeDecl("a").l + val b = fixture.cpg.typeDecl("b").l + a.length shouldBe 1 + b.length shouldBe 1 + //val targetA = in(a.head, EdgeTypes.INHERITS_FROM) + //targetA.length shouldBe 1 + //val targetB = out(a.head, EdgeTypes.INHERITS_FROM) + //targetB.length shouldBe 1 + //targetA shouldBe b.head + //targetB shouldBe a.head + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleFilesProjectTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleFilesProjectTest.scala new file mode 100644 index 0000000..a7ffedd --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleFilesProjectTest.scala @@ -0,0 +1,36 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.Node + +class CpgMultipleFilesProjectTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("multipleFilesProject") + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail() + } + } + + s"CPG layout for PHP $version" should { + "have two files" in { + fixture.files.length shouldBe 2 + getSingleFile(fixture.files.head.getPath) + getSingleFile(fixture.files(1).getPath) + } + "have two main methods" in { + fixture.cpg.method.nameExact("dlr_main").l.length shouldBe 2 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleMethodDefinitionsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleMethodDefinitionsTest.scala new file mode 100644 index 0000000..2c1996a --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgMultipleMethodDefinitionsTest.scala @@ -0,0 +1,45 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.Node + +class CpgMultipleMethodDefinitionsTest + extends AnyWordSpec + with Matchers with PHPVersions { + + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("twoFunctionsAndMain") + + def getMethods(name: String): List[Node] = { + fixture.cpg.method.nameExact(name).l + } + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail() + } + } + + s"CFG layout for PHP $version" should { + "have a single file" in { + getSingleFile(fixture.files.head.getPath) + } + "have the method main" in { + getMethods("DLR_main") + } + "have the method first_function" in { + getMethods("first_function") + } + "have the method second_function" in { + getMethods("second_function") + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgNamespaceCreationTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgNamespaceCreationTest.scala new file mode 100644 index 0000000..165c166 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgNamespaceCreationTest.scala @@ -0,0 +1,130 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.jdk.CollectionConverters._ + +class CpgNamespaceCreationTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """ + |namespace weird\testOne; + | + |function notAMethod() { + | echo("test"); + |} + | + |class test { + | + | function test() { + | echo("test"); + | } + | + |} + | + |namespace testTwo; + | + |function notAMethod() { + | echo("test"); + |} + | + |class test { + | + | function test() { + | echo("test"); + | } + | + |} + | + |use weird\testOne; + | + |testOne\notAMethod(); + |notAMethod(); + |""".stripMargin + ) + + s"CPG namespace structure for PHP $version" should { + "have namespace weird\\testone" in { + fixture.cpg.namespaceBlock.nameExact("weird\\testone").l match { + case _ :: Nil => + case x => fail(s"unexpected traversal result $x") + } + } + "have namespace testtwo" in { + fixture.cpg.namespaceBlock.nameExact("testtwo").l match { + case _ :: Nil => + case x => fail(s"unexpected traversal result $x") + } + } + } + + s"namespace testtwo for PHP $version" should { + "have a function notamethod" in { + fixture.cpg.method.fullNameExact("testtwo\\notamethod").l match { + case (method: nodes.Method) :: Nil => + method.name shouldBe "notamethod" + method.in(EdgeTypes.CALL).asScala.toList match { + case (single: nodes.Call) :: Nil => + single.code shouldBe "DO_FCALL_BY_NAME" + case Nil => fail("expected call edge not found") + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + "have a typedecl test with method test" in { + fixture.cpg.typeDecl.fullNameExact("testtwo\\test").l match { + case (typeDecl: nodes.TypeDecl) :: Nil => + typeDecl.name shouldBe "test" + typeDecl.astChildren.isMethod.l match { + case method :: Nil => + method.fullName shouldBe "testtwo\\test::test" + method.name shouldBe "test" + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + } + + s"namespace weird\\testone for PHP $version" should { + "have a function notamethod" in { + fixture.cpg.method.fullNameExact("weird\\testone\\notamethod").l match { + case (method: nodes.Method) :: Nil => + method.name shouldBe "notamethod" + method.in(EdgeTypes.CALL).asScala.toList match { + case (single: nodes.Call) :: Nil => + single.code shouldBe "DO_UCALL" + case Nil => fail("expected call edge not found") + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + "have a typedecl test with method test" in { + fixture.cpg.typeDecl.fullNameExact("weird\\testone\\test").l match { + case (typeDecl: nodes.TypeDecl) :: Nil => + typeDecl.name shouldBe "test" + typeDecl.astChildren.isMethod.l match { + case method :: Nil => + method.fullName shouldBe "weird\\testone\\test::test" + method.name shouldBe "test" + case x => fail(s"unexpected traversal result $x") + } + case x => fail(s"unexpected traversal result $x") + } + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgOnlyMainCreationTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgOnlyMainCreationTest.scala new file mode 100644 index 0000000..a54bfdd --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgOnlyMainCreationTest.scala @@ -0,0 +1,95 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, nodes} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.passes.controlflow.cfgcreation.Cfg.AlwaysEdge +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.Node + +import java.io.File +import scala.jdk.CollectionConverters._ + +class CpgOnlyMainCreationTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("onlyMainCreation") + + def getMethods(name: String): List[Node] = { + fixture.cpg.method.nameExact(name).l + } + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail() + } + } + + + s"CFG layout for PHP $version" should { + "have a single file" in { + getSingleFile(fixture.files.head.getPath) + } + } + + s"CFG Operations Graphs for PHP $version" should { + "have a $_main method with a single block" in { + val main: List[Node] = getMethods("dlr_main") + main.length shouldBe 1 + val mainChildren = main.flatMap( + _.out(EdgeTypes.AST).asScala + .filter(_.label == NodeTypes.BLOCK)) + mainChildren.length shouldBe 1 + } + "have a main block with 6 instructions" in { + val main: List[Node] = getMethods("dlr_main") + val instructions = main.flatMap( + _.out(EdgeTypes.AST).asScala + .filter(_.label == NodeTypes.BLOCK) + .flatMap(_.out(EdgeTypes.AST).asScala)) + assert(instructions.length == 6) + } + } + + s"CPG CFG for PHP $version" should { + "have a CFG edge coming from $_main" in { + implicit val method: String = "dlr_main" + fixture.cfgSuccOf("dlr_main()") shouldBe fixture.expectedCfg( + ("METHOD BLOCK", AlwaysEdge)) + fixture.cfgSuccOf("METHOD BLOCK") shouldBe fixture.expectedCfg( + ("1", AlwaysEdge)) + } + "have a CFG edge leading into the $_main METHOD_RETURN node" in { + val main = getMethods("dlr_main") + main.length shouldBe 1 + val methodReturn = main.head.out(EdgeTypes.AST).asScala.toList.filter(vertex => + vertex.label == NodeTypes.METHOD_RETURN) + methodReturn.length shouldBe 1 + val preFlow = methodReturn.head.in(EdgeTypes.CFG).asScala.toList + preFlow.length shouldBe 1 + preFlow.head.property("CODE") shouldBe "RETURN int(1)" + } + "have 16 CFG steps in between the start and end of the method body" in { + val main: List[nodes.Method] = fixture.cpg.method("dlr_main").l + main.length shouldBe 1 + var next: nodes.CfgNode = main.head + ._cfgOut + .asScala + .map(_.asInstanceOf[nodes.CfgNode]) + .toList + .head + for (_ <- Range(0, 20)) { + next = + next._cfgOut.asScala.map(_.asInstanceOf[nodes.CfgNode]).toList.head + } + next.code shouldBe "RETURN int(1)" + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgParsingSideCasesTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgParsingSideCasesTest.scala new file mode 100644 index 0000000..51f2cce --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgParsingSideCasesTest.scala @@ -0,0 +1,23 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class CpgParsingSideCasesTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgTestFixture = CpgTestFixture("parsingSideCases") + + s"sidecases for PHP $version" should { + "be able to parse the given sidecase assortment successfully" in { + fixture.cpg.method("dlr_main").l.length shouldBe 1 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgTestFixture.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgTestFixture.scala new file mode 100644 index 0000000..e57e951 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CpgTestFixture.scala @@ -0,0 +1,57 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.PhpToCpg +import io.joern.bytecode.parser.PHPVersion.PHPVersion +import io.joern.bytecode.util.FilterFiles.filterFiles +import io.joern.bytecode.util.unittesting.AbstractCpgTestFixture +import io.joern.config.CPGConfig +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.x2cpg.SourceFiles +import overflowdb.Node + +import java.io.{File => JFile} +import java.text.SimpleDateFormat +import scala.jdk.CollectionConverters._ + +case class CpgTestFixture(projectName: String)(implicit version : PHPVersion) extends AbstractCpgTestFixture { + import CpgTestFixture._ + + private val dirName: String = + String.format("layerByteCode/resources/unittesting/testprojects/%s", + projectName) + val files: List[JFile] = filterFiles(SourceFiles.determine(Set(dirName), Set(".php")).distinct.map(new JFile(_))) + override implicit var cpg: Cpg = cpgForDir(files) + + def V: Iterator[Node] = cpg.graph.V.asScala +} + + +object CpgTestFixture { + + def cpgForDir(files: List[JFile])(implicit version : PHPVersion) : Cpg = { + val cpg: Cpg = Cpg.emptyCpg + val config = CPGConfig.initializeConfig() + val cpgCreator = new PhpToCpg() + try { + cpgCreator.populateCpg(files, cpg, config) + } finally { + val finalReport = cpgCreator.getFinalReport + if (!finalReport.success) { + throw new RuntimeException( + s"Creation of CPG was not successful with fromCodeFixture \n ${finalReport.prettyPrintErrors}") + } + } + } + + // Workaround for a bug in PHP: + // we set the timestamp to a date way in the past here + // because there seems to be a bug in `php` which causes + // the error stream to be empty for freshly created files. + def setTimestamp(filePath: String): Boolean = { + val file = new JFile(filePath) + val date = new SimpleDateFormat("MM/dd/yyyy") + val last = date.parse("10/03/1990") + file.setLastModified(last.getTime) + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/CreateStubMethodPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CreateStubMethodPassTest.scala new file mode 100644 index 0000000..a71e25a --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/CreateStubMethodPassTest.scala @@ -0,0 +1,48 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import io.shiftleft.codepropertygraph.generated.nodes.{Method, MethodReturn} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import overflowdb.traversal.jIteratortoTraversal + +class CreateStubMethodPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + + implicit val version: PHPVersion.Value = v + + s"stub creation for PHP $version" should { + s"be able to handle similarly named method of whom one class is unknown" in new CpgFromCodeTestFixture( + """foo::create(); + |$bar->create(); + |""".stripMargin + ){ + val specificCreate : Seq[Method] = cpg.method("foo::create").l + specificCreate.length shouldBe 1 + val unknownCreate : Seq[Method] = cpg.method("UNKNOWN::create").l + unknownCreate.length shouldBe 1 + } + "work with two internal functions" in new CpgFromCodeTestFixture( + """header("header"); + |header("header"); + |""".stripMargin + ) { + cpg.method("header").l.length shouldBe 1 + } + "set IS_EXTERNAL for internal functions" in new CpgFromCodeTestFixture("""echo strpos($url, 'pipix');"""){ + cpg.method("strpos").l.head.isExternal shouldBe true + } + "create METHOD_RETURN for stub methods" in new CpgFromCodeTestFixture("""echo strpos($url, 'pipix');"""){ + val stub: Method = cpg.method("strpos").l.head + stub.out(EdgeTypes.AST).collectAll[MethodReturn].size shouldBe 1 + stub.out(EdgeTypes.CFG).collectAll[MethodReturn].size shouldBe 1 + } + } + + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/DataDependencyPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DataDependencyPassTest.scala new file mode 100644 index 0000000..558aa08 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DataDependencyPassTest.scala @@ -0,0 +1,85 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class DataDependencyPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"DDG for PHP $version" should { + "work for simple magic string" in new CpgFromCodeTestFixture( + """ + |$x = "$a $b $c"; + |""".stripMargin + ) { + implicit val method: String = "dlr_main" + ddgSuccOf("T5 = ROPE_INIT int(5) CV($a)") shouldBe expectedDdg( + ("ROPE_ADD int(1) T5 string(\" \")", "T5")) + ddgSuccOf("ROPE_ADD int(1) T5 string(\" \")") shouldBe Set() + ddgSuccOf("T5 = ROPE_ADD int(1) T5 string(\" \")") shouldBe expectedDdg( + ("ROPE_ADD int(2) T5 CV($b)", "T5")) + } + "$this->x" in new CpgFromCodeTestFixture( + """class A + |{ + | function foo() + | { + | $this->a = 1; + | echo $this->a; + | } + |} + |(new A())->foo();""".stripMargin)(getPhpVersions.last) { + implicit val method: String = "a::foo" + ddgSuccOf("ASSIGN_OBJ THIS string(\"a\")") shouldBe expectedDdg(("FETCH_OBJ_R THIS string(\"a\")", "a")) + ddgSuccOf("T1 = FETCH_OBJ_R THIS string(\"a\")") shouldBe expectedDdg(("ECHO T1", "T1")) + } + // we don't support this case, but we shouldn't crash :) + "don't crash with $this->$x" in new CpgFromCodeTestFixture( + """class A + |{ + | function foo() + | { + | $x = $_GET["a"]; + | $this->$x = 1; + | echo $this->$x; + | } + |} + |(new A())->foo();""".stripMargin)(getPhpVersions.last) { + implicit val method: String = "a::foo" + ddgSuccOf("T5 = FETCH_OBJ_R THIS CV($x)") shouldBe expectedDdg(("ECHO T5", "T5")) + } + "edge between ASSIGN and ASSIGN_DIM" in new CpgFromCodeTestFixture( + """$x = $array; + |$x['foo'] = 42; + |""".stripMargin) { + implicit val method: String = "dlr_main" + ddgSuccOf("ASSIGN CV($x) CV($array)") shouldBe expectedDdg( + ("""ASSIGN_DIM CV($x) string("foo")""", "x") + ) + } + "ADD_ARRAY_ELEMENT missing data dependency edge" in new CpgFromCodeTestFixture( + """$x = [ + | 'f' => 2, + | 'h' => 2, + | 'g' => (object) [ 'id' => 1 ], + | 'j' => 2, + |]; + |extract($x); + |""".stripMargin + ) { + implicit val method: String = "dlr_main" + ddgSuccOf("""INIT_ARRAY int(4) int(2) string("f")""") shouldBe expectedDdg( + ("""ADD_ARRAY_ELEMENT int(2) string("h")""", "T1") + ) + ddgSuccOf("""ADD_ARRAY_ELEMENT T2 string("g")""") shouldBe expectedDdg( + ("""ADD_ARRAY_ELEMENT int(2) string("j")""", "T1") + ) + //new CpgDotFileCreator(cpg.graph).show() + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeclareClassDelayedTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeclareClassDelayedTest.scala new file mode 100644 index 0000000..87848cd --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeclareClassDelayedTest.scala @@ -0,0 +1,49 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class DeclareClassDelayedTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"cpg for PHP $version" should { + "contain call to DECLARE_CLASS_DELAYED" in new CpgFromCodeTestFixture( + """ + |class B extends A { + | function test2() { + | echo "test2"; + | } + |} + |""".stripMargin + ) { + cpg.call("DECLARE_CLASS_DELAYED").l match { + case single :: Nil => + single.name shouldBe "DECLARE_CLASS_DELAYED" + case x => fail(s"unexpected traversal result $x") + } + } + } + + s"cpg for PHP $version" should { + "not contain call to DECLARE_CLASS_DELAYED" in new CpgFromCodeTestFixture( + """ + |class A { + | function test2() { + | echo "test2"; + | } + |} + |""".stripMargin + ) { + cpg.call("DECLARE_CLASS_DELAYED").l match { + case Nil => + case x => fail(s"unexpected traversal result $x") + } + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPassTest.scala new file mode 100644 index 0000000..6d42573 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DeleteEmptyOpcodesPassTest.scala @@ -0,0 +1,35 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class DeleteEmptyOpcodesPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"cpg for PHP $version" should { + + "have no more EXT_STMT nodes in trivial cfg" in new CpgFromCodeTestFixture( + "$x = 5; echo $x") { + cpg.call("EXT_STMT").toList.length shouldBe 0 + } + + "have a no more EXT_STMT nodes in complex cfg" in new CpgFromCodeTestFixture( + """ + |$x = 5; + |if($x == 6) { + | $x += 4; + |} else { + | $x += 5; + |} + |echo $x; + |""".stripMargin) { + cpg.call("EXT_STMT").toList.length shouldBe 0 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/DominatorPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DominatorPassTest.scala new file mode 100644 index 0000000..2311053 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DominatorPassTest.scala @@ -0,0 +1,122 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes.Call +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class DominatorPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"cpg pre dominator for PHP $version" should { + "have trivial domination tree" in new CpgFromCodeTestFixture( + """echo "test";""".stripMargin + ) { + cpg.method("dlr_main").head.dominates.l.map(_.code).toSet shouldBe + Set("ECHO string(\"test\")", "test", "1", "RETURN int(1)", "", "METHOD BLOCK") + val echo: Call = cpg.call.codeExact("ECHO string(\"test\")").head + echo.dominates.l.map(_.code).toSet shouldBe + Set("1", "RETURN int(1)", "") + } + + "work with an if clause" in new CpgFromCodeTestFixture( + """if($x == true) { + | echo "true"; + |} else { + | echo "false"; + |} + |echo "finally"; + |""".stripMargin + ) { + val jmp: Call = cpg.call.codeExact("JMP int(5)").head + // jmp does not dominate anything + jmp.dominates.l.map(_.code).toSet shouldBe + Set() + val echoTrue: Call = cpg.call.codeExact("ECHO string(\"true\")").head + echoTrue.dominates.l.map(_.code).toSet shouldBe + Set("JMP int(5)", "5") + } + "work with function definitions" in new CpgFromCodeTestFixture( + """function fu($x) { + | echo $x; + |} + |fu("42"); + |""".stripMargin + ) { + cpg.method("fu").l.head.dominates.nonEmpty shouldBe true + } + "work with function definition using return stmt" in new CpgFromCodeTestFixture( + """function foo($x) { + | echo $x; + | return $x; + |} + |""".stripMargin + ) { + cpg.method.nameExact("foo").l.head.dominates.nonEmpty shouldBe true + } + "work with constructor" in new CpgFromCodeTestFixture( + """class test { + | + | function __construct() { + | $this->test = 42; + | } + | + |}""".stripMargin + ) { + cpg.method.fullNameExact("test::__construct").l.head.dominates.nonEmpty shouldBe true + } + } + + s"cpg post dominator for PHP $version" should { + "work with function definition and the method should not post dominate any node" in new CpgFromCodeTestFixture( + """function test() { + | echo "test"; + |} + |""".stripMargin + ) { + cpg.method("test").head.postDominates.l.map(_.code).toSet shouldBe Set() + } + + "have trivial post domination tree" in new CpgFromCodeTestFixture( + """echo "test";""".stripMargin + ) { + cpg.method("dlr_main").head.postDominates.l.map(_.code).toSet shouldBe + Set() + val echo: Call = cpg.call.codeExact("ECHO string(\"test\")").head + echo.dominates.l.map(_.code).toSet shouldBe + Set("1", "RETURN int(1)", "") + } + + "work with an if clause post" in new CpgFromCodeTestFixture( + """if($x == true) { + | echo "true"; + |} else { + | echo "false"; + |} + |echo "finally"; + |""".stripMargin + ) { + val echoTrue: Call = cpg.call.codeExact("ECHO string(\"true\")").head + // echoTrue does only post dominate its parameter as the following node is a conditional jmp + echoTrue.postDominates.map(_.code).l.toSet shouldBe Set("true") + val jmp: Call = cpg.call.codeExact("JMP int(5)").head + jmp.postDominates.map(_.code).l.toSet shouldBe + Set("true", "ECHO string(\"true\")", "5") + } + "succeed on more complex example" in new CpgFromCodeTestFixture( + """do { + | //echo "ttest"; + |} while(true); + |""".stripMargin + ) { + //implicit val method: String = "test" + //cfgSuccOf("JMPNZ bool(true) int(7)") shouldBe expectedCfg(("ttest", AlwaysEdge)) + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/DynamicCallsTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DynamicCallsTest.scala new file mode 100644 index 0000000..a2e4b36 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/DynamicCallsTest.scala @@ -0,0 +1,30 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class DynamicCallsTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + + s"cpg for PHP $version" should { + + "have a proper INIT_DYNAMIC_CALL node" in new CpgFromCodeTestFixture( + """ + |$test(); + |""".stripMargin + ) { + cpg.call("INIT_DYNAMIC_CALL").hasNext shouldBe true + cpg.call("INIT_DYNAMIC_CALL").next().argument.toList.length shouldBe 2 + cpg.call("INIT_DYNAMIC_CALL").next().code shouldBe "INIT_DYNAMIC_CALL 0 CV($test)" + } + + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/LocalIdentificationPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/LocalIdentificationPassTest.scala new file mode 100644 index 0000000..78b9788 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/LocalIdentificationPassTest.scala @@ -0,0 +1,37 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LocalIdentificationPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"cpgStructure for PHP $version" should { + "have no locals" in new CpgFromCodeTestFixture( + """mysql_query("BEGIN"); + |mysql_query("UPDATE ttable SET test = '32' WHERE other = '33'"); + |mysql_query("UPDATE ttable SET test = '32' WHERE other = '33'"); + |mysql_query("COMMIT"); + |""".stripMargin + ) { + cpg.local.size shouldBe 0 + } + + "return two transaction issues for the same transaction" in new CpgFromCodeTestFixture( + """$var = 42 + |""".stripMargin + ) { + cpg.local.toList match { + case elem :: Nil => elem.name shouldBe "var" + case x => fail(s"unexpected traversal result $x") + } + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/MethodCreationTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MethodCreationTest.scala new file mode 100644 index 0000000..12a9912 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MethodCreationTest.scala @@ -0,0 +1,74 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.Defines +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class MethodCreationTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """ + |function test($first, $second) { + | echo "test"; + |} + | + |function noParam() { + | echo "no param"; + |} + |""".stripMargin) + + s"cpg for PHP $version" should { + "not have a single node with an order less than 0" in { + fixture.cpg.method.foreach( + method => + method.ast.foreach( + node => + assert( + node.order >= 0, + s"node $node has the order ${node.order} which is below zero") + ) + ) + } + "have a single test method definition" in { + val method = fixture.cpg.method("test").l + method.length shouldBe 1 + method.head.name shouldBe "test" + method.head.code shouldBe "test($param1, $param2)" + } + "have a single noParam method definition" in { + //remember function names are case insensitive and as such stored lower case in the cpg + val methods = fixture.cpg.method("noparam").l + methods match { + case method :: Nil => + method.name shouldBe "noparam" + method.code shouldBe "noparam()" + case x => fail(s"unexpected traversal result $x") + } + } + "have a single namespace node" in { + fixture.cpg.namespaceBlock(Defines.GLOBAL_NAMESPACE_NAME).l match { + case namespace :: Nil => + namespace.name shouldBe Defines.GLOBAL_NAMESPACE_NAME + case x => fail(s"unexpected traversal result $x") + } + } + + "create a method with the name of the class" in new CpgFromCodeTestFixture( + """class Klasse { + | public function Send() { + | echo "test"; + | } + |} + |""".stripMargin + ) { + cpg.method.fullName("klasse::send").toList.size shouldBe 1 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralNamespaceNodesTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralNamespaceNodesTest.scala new file mode 100644 index 0000000..8231e5a --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralNamespaceNodesTest.scala @@ -0,0 +1,64 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class MissingStructuralNamespaceNodesTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """namespace FooBarBaz; + | + |const TEST = 1; + |class Foo { + | function bar() { + | global $stuff; + | $stuff = 42; + | $baz = 1337 + $stuff; + | return $baz; + | } + |} + |""".stripMargin) + + s"cpg for PHP $version" should { + "have the namespace test" in { + // fixture.cpg.namespace.toList.length shouldBe 1 + } + "have a typeDecl named Foo" in { + fixture.cpg.typeDecl.fullName("foobarbaz\\\\foo").l.length shouldBe 1 + } + "have a method named bar" in { + fixture.cpg.method.fullName("foobarbaz\\\\foo::bar").l.length shouldBe 1 + } + "namespace member named TEST" in { + fixture.cpg + .namespaceBlock("foobarbaz") + .astChildren + .isMember + .l + .length shouldBe 1 + } + "have local named baz" in { + // should have two local has we have $baz and a temporary T2 + fixture.cpg + .method.fullName("foobarbaz\\\\foo::bar") + .head + .astChildren + .isLocal + .l + .length shouldBe 2 + } + "have the type int" in { + fixture.cpg.typ("foobarbaz\\\\foo").l.length shouldBe 1 + fixture.cpg.typ("Integer").l.length shouldBe 1 + } + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralTypeNodesTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralTypeNodesTest.scala new file mode 100644 index 0000000..8c91eb4 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/MissingStructuralTypeNodesTest.scala @@ -0,0 +1,34 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class MissingStructuralTypeNodesTest + extends AnyWordSpec + with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """class foo { + | function bar() { + | echo "test"; + | } + |} + |""".stripMargin) + + s"cpg for PHP $version" should { + "have the typeDecl foo" in { + fixture.cpg.typeDecl.toList.length shouldBe 1 + } + "have the type foo, string, and NULL" in { + fixture.cpg.typ.toList.length shouldBe 3 + } + } + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/ParamInterpretPassTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ParamInterpretPassTest.scala new file mode 100644 index 0000000..0a96462 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/ParamInterpretPassTest.scala @@ -0,0 +1,94 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ParamInterpretPassTest extends AnyWordSpec with Matchers with PHPVersions { + + for (v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"paramInterpretPass for PHP $version" should { + "contain correct name fields for parameters" in new CpgFromCodeTestFixture( + """ + |echo "test"; + |function foo($x, $y, $z) { + |} + | + |foo(1,2,3); + |""".stripMargin + ) { + cpg.parameter.map(x => (x.order, x.name)).toSet shouldBe Set( + (0, "x"), + (1, "y"), + (2, "z")) + } + } + } + "PHP 8 named params" when { + "example" in new CpgFromCodeTestFixture( + """function f($m, $e = "Hi Surf", $url = "http://www.example.org") + |{ + | echo $m; + | echo $e; + | return file_get_contents($url); + |} + |echo f(m: "Hello Surf", url: $_GET["a"]); + |""".stripMargin + )(PHPVersion.V8) { + cpg.parameter.map(x => (x.order, x.name)).toSet shouldBe Set( + (0, "m"), + (1, "e"), + (2, "url")) + } + "variation: usage swapped" in new CpgFromCodeTestFixture( + """function f($m, $e = "Hi Surf", $url = "http://www.example.org") + |{ + | echo $e; + | echo $m; + | return file_get_contents($url); + |} + |echo f(m: "Hello Surf", url: $_GET["a"]); + |""".stripMargin + )(PHPVersion.V8) { + cpg.parameter.map(x => (x.order, x.name)).toSet shouldBe Set( + (0, "m"), + (1, "e"), + (2, "url")) + } + "variation: call site param order swapped" in new CpgFromCodeTestFixture( + """function f($m, $e = "Hi Surf", $url = "http://www.example.org") + |{ + | echo $m; + | echo $e; + | return file_get_contents($url); + |} + |echo f(url: $_GET["a"], m: "Hello Surf"); + |""".stripMargin + )(PHPVersion.V8) { + cpg.parameter.map(x => (x.order, x.name)).toSet shouldBe Set( + (0, "m"), + (1, "e"), + (2, "url")) + } + "variation: usage and call site param order swapped" in new CpgFromCodeTestFixture( + """function f($m, $e = "Hi Surf", $url = "http://www.example.org") + |{ + | echo $e; + | echo $m; + | return file_get_contents($url); + |} + |echo f(url: $_GET["a"], m: "Hello Surf"); + |""".stripMargin + )(PHPVersion.V8) { + cpg.parameter.map(x => (x.order, x.name)).toSet shouldBe Set( + (0, "m"), + (1, "e"), + (2, "url")) + } + + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/passes/QuoteSideCaseTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/passes/QuoteSideCaseTest.scala new file mode 100644 index 0000000..91d8c61 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/passes/QuoteSideCaseTest.scala @@ -0,0 +1,26 @@ +package io.joern.bytecode.passes + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class QuoteSideCaseTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"quotes in quotes for PHP $version" should { + "work for basic case" in new CpgFromCodeTestFixture( + """ + |echo '") ' . '$var'; + |""".stripMargin + ) { + cpg.call("ECHO").astChildren.order(0).head.asInstanceOf[nodes.Literal].code shouldBe "\") $var" + } + } + + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/BadLineAndOrderNumbers.scala b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/BadLineAndOrderNumbers.scala new file mode 100644 index 0000000..437ca6d --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/BadLineAndOrderNumbers.scala @@ -0,0 +1,32 @@ +package io.joern.bytecode.realWorldPocs + +import io.joern.bytecode.parser.PHPVersion +import io.joern.bytecode.parser.PHPVersion.V7 +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class BadLineAndOrderNumbers extends AnyWordSpec with Matchers { + + // line numbers only matter for PHP7 as 8 does not provide them anymore + implicit val version: PHPVersion.Value = V7 + + "in PHP7 the line numbers and order" should { + "be correct" in new CpgFromCodeTestFixture( + s"""mysqli_query($$link,"BEGIN"); + |mysqli_query($$link,"SELECT test FROM ttable WHERE condition = true"); + |mysqli_query($$link,"UPDATE ttable SET test = '42' WHERE condition = true"); + |mysqli_query($$link,"COMMIT"); + |""".stripMargin + ) { + /*cpg.call.code("DO_FCALL_BY_NAME").map(_.lineNumber).map(_.get).toSet shouldBe Set(2, + 3, + 4, + 5)*/ + //this result set is derived by looking at the println - might be subject to change + //relevant part of the test is that there are four different + cpg.call.code("DO_FCALL_BY_NAME").map(_.order).toSet shouldBe Set(3, 7, 11, 15) + } + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/ClosureTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/ClosureTest.scala new file mode 100644 index 0000000..fa59c93 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/ClosureTest.scala @@ -0,0 +1,23 @@ +package io.joern.bytecode.realWorldPocs + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ClosureTest extends AnyWordSpec with Matchers with PHPVersions { + + for (v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"Closures in PHP$version" should { + "be able to be processed" in new CpgFromCodeTestFixture( + """ + array_map(fn($n) => $n, array_filter($a['b'], fn($n) => !is_null($n))); + """.stripMargin + ) + } + + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/MissingLiteralParentTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/MissingLiteralParentTest.scala new file mode 100644 index 0000000..dc03b4c --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/MissingLiteralParentTest.scala @@ -0,0 +1,39 @@ +package io.joern.bytecode.realWorldPocs + +import io.joern.bytecode.parser.{PHPVersion, PHPVersions} +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.EdgeTypes +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import io.shiftleft.semanticcpg.language._ + +class MissingLiteralParentTest extends AnyWordSpec with Matchers with PHPVersions { + + for (v <- getPhpVersions) { + implicit val version: PHPVersion.Value = v + + s"every literal in PHP $v" should { + "have a parent " in new CpgFromCodeTestFixture( + """$outeru = 3; + |$closure = function($outerp) use ( + | $outeru + | ) { + | $inneru = 4; + | $inclo = function($innerp) use ($inneru) { + | echo "test"; + | }; + |}; + |""".stripMargin + ) { + // we do not process closures anymore this unit test only tests whether it compiles without error + //cpg.literal.foreach { + // lit => lit.in(EdgeTypes.AST).hasNext shouldBe true + // lit.astParent.next() + //} + } + + } + + } + +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/Parsing.scala b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/Parsing.scala new file mode 100644 index 0000000..fb2c506 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/Parsing.scala @@ -0,0 +1,141 @@ +package io.joern.bytecode.realWorldPocs + + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersion.V8 +import io.joern.bytecode.parser.utils.encodeBase64 +import io.joern.bytecode.parser.{EasyBase64, PHPVersion, PHPVersions} +import io.joern.bytecode.util.implicits.OneableSeq +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Expression} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class Parsing extends AnyWordSpec with Matchers with PHPVersions { + + for (v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + s"PHP $v" should { + "quotes in switch case" in new CpgFromCodeTestFixture( + """$i = $_GET["a"]; + | + |switch ($i) { + | case "'": + | echo 1; + | break; + | case "\"": + | echo 1; + | break; + | case "übel": + | echo "2"; + | default: + | break; + |}""".stripMargin) { + val switch: Call = cpg.call("SWITCH_STRING").head + val args: List[Expression] = switch.argument.l + args.head.code shouldBe "CV($i)" + args(1).code shouldBe "'" + args(2).code shouldBe "11" + args(3).code shouldBe "\"" + args(4).code shouldBe "13" + args(5).code shouldBe "übel" + args(6).code shouldBe "15" + args(7).code shouldBe "default" + args(8).code shouldBe "16" + } + "parse Null Coalescing Assignment Operator with arrays" in new CpgFromCodeTestFixture( + """ + |$x[$y['z']] ??= []; + |""".stripMargin) { + // just don't fail. + } + /* + * old bug java.lang.NumberFormatException + * because the Class begins with BB we mistook it for a building block identifier (e.g. BB1) and the parse took the wrong route + */ + "handle Classnames starting with BB" in new CpgFromCodeTestFixture( + """ + |class BBMessageContent { + | public function build() {} + | } + |""".stripMargin) { + assert(true) + } + + /* + old bug with exception table parsing + */ + "nested try/catch" in new CpgFromCodeTestFixture( + """ + |try { + | echo 1; + | try { + | echo 2; + | } catch (Exception $e) { + | echo 3; + | } + |} finally { + | echo 4; + |} + |""".stripMargin) { + assert(true) + } + } + } + + implicit val version: PHPVersion.Value = V8 + "quotes in match case" in new CpgFromCodeTestFixture( + """match ($test) { + | "'" => "first", + | "\"" => "second", + | "übel" => "third", + | 12 => "fourth", + |};""".stripMargin) { + val match_call: Call = cpg.call("MATCH").head + val args: List[Expression] = match_call.argument.l + args.head.code shouldBe "CV($test)" + args(1).code shouldBe "'" + args(2).code shouldBe "2" + args(3).code shouldBe "\"" + args(4).code shouldBe "4" + args(5).code shouldBe "übel" + args(6).code shouldBe "6" + args(7).code shouldBe "12" + args(8).code shouldBe "8" + args(9).code shouldBe "default" + args(10).code shouldBe "1" + } + /* + this was an old bug in the delete unreachable code pass. + */ + "don't crash with nested arrow funcs" in new CpgFromCodeTestFixture("""fn($x) => fn($y) => 1;""".stripMargin) { + assert(true) + } + "content of const arrays" in new CpgFromCodeTestFixture( + """ + |$x = array(1,2,"foo" => array(42,6)); + |""".stripMargin) { + val assign: Call = cpg.call("ASSIGN").l.one + val b64: String = encodeBase64("""{"0":1,"1":2,"foo":"PHP2CPG-NESTED-ARRAY-LIMITATION"}""") + assign.code shouldBe s"ASSIGN CV($$x) array($b64)" + val args = assign.argument.l + args(0).code shouldBe "CV($x)" + args(1).code shouldBe s"array($b64)" + } + "empty array" when { + "as variable init " in new CpgFromCodeTestFixture( + """$x = [];""".stripMargin) { + cpg.call("ASSIGN").l match { + case assign :: Nil => assign.argument.code.l shouldBe List("CV($x)", "array()") + case Nil => fail() + } + } + "default value of function" in new CpgFromCodeTestFixture( + """function f($x = []) { }""".stripMargin) { + cpg.method("f") should not be empty + cpg.call("RECV_INIT").argument.order(1).code.head shouldBe "array()" + } + } +} + diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/php81.scala b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/php81.scala new file mode 100644 index 0000000..49337d7 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/realWorldPocs/php81.scala @@ -0,0 +1,61 @@ +package io.joern.bytecode.realWorldPocs + +import io.joern.bytecode.parser.PHPVersion +import io.joern.bytecode.parser.PHPVersion.V8 +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class php81 extends AnyWordSpec with Matchers { + + implicit val version: PHPVersion.Value = V8 + + "callable convert" in new CpgFromCodeTestFixture( + """ + |$c = strlen(...); + |echo c("asdasd"); + |""".stripMargin) { + cpg.call("CALLABLE_CONVERT").nonEmpty shouldBe true + } + + "never" in new CpgFromCodeTestFixture("""function f(): never {exit();}"""){ + cpg.all.nonEmpty shouldBe true + } + + "can work with all php8.1 features at once" in new CpgFromCodeTestFixture( + """ + |function f(): never + |{ + | $g = $GLOBALS; + | $a = $_GET; + | $c = strlen(...); + | $b = match ($a["b"] + 1) { + | 1 => $o?->f(), + | 2 => htmlspecialchars($string, double_encode: false), + | 3 => $c("a"), + | }; + | match ($a + 1) { + | 2 => print(1), + | }; + | if($ba?->do()){} + | exit(1); + |} + |f(); + | + |""".stripMargin) { + cpg.call("CALLABLE_CONVERT").nonEmpty shouldBe true + cpg.call("MATCH").nonEmpty shouldBe true + cpg.call("MATCH_ERROR").nonEmpty shouldBe true + cpg.call("CHECK_UNDEF_ARGS").nonEmpty shouldBe true + cpg.call("FETCH_GLOBALS").nonEmpty shouldBe true + cpg.call("VERIFY_NEVER_TYPE").nonEmpty shouldBe true + cpg.call("JMP_NULL").nonEmpty shouldBe true + } + + "check func arg with string literal" in new CpgFromCodeTestFixture("""call_user_func_array(a: $field["callback"], b: array($field));""".stripMargin) { + // the code throws a runtime error + // but we should be able to convert it + assert(true) + } +} diff --git a/layerByteCode/src/test/scala/io/joern/bytecode/util/extensions/NodeExtensionTest.scala b/layerByteCode/src/test/scala/io/joern/bytecode/util/extensions/NodeExtensionTest.scala new file mode 100644 index 0000000..9981141 --- /dev/null +++ b/layerByteCode/src/test/scala/io/joern/bytecode/util/extensions/NodeExtensionTest.scala @@ -0,0 +1,63 @@ +package io.joern.bytecode.util.extensions + +import io.joern.bytecode.parser +import io.joern.bytecode.parser.PHPVersion._ +import io.joern.bytecode.parser.PHPVersions +import io.joern.bytecode.util.extensions.NodeExtension._ +import io.joern.bytecode.util.unittesting.CpgFromCodeTestFixture +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Literal} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class NodeExtensionTest extends AnyWordSpec with Matchers with PHPVersions { + + for(v <- getPhpVersions) { + implicit val version: parser.PHPVersion.Value = v + s"call node extension in PHP $version" should { + "give me the right arguments" in new CpgFromCodeTestFixture( + """ + |f(1,$eins); + |""".stripMargin) { + val call: List[Call] = cpg.call.nameExact("f").l + call.length shouldBe 1 + call.head.getParameter(1).get.asInstanceOf[Literal].code shouldBe "1" + call.head.getParameter(2).get.asInstanceOf[Identifier].name shouldBe "eins" + } + "deal with call_user_func_array" in new CpgFromCodeTestFixture( + """call_user_func_array('file_get_contents', $args);""" + ) { + val call = cpg.call.nameExact("file_get_contents").l + call.length shouldBe 1 + call.head.getParameter(0).get.asInstanceOf[Identifier].name shouldBe "args" + } + } + + } + + { + implicit val version: parser.PHPVersion.Value = V8 + s"call node extension PHP $version specific" should { + "give me the right named parameter" in new CpgFromCodeTestFixture( + """ + |f(named : 1); + |""".stripMargin) { + val call: List[Call] = cpg.call.nameExact("f").l + call.length shouldBe 1 + call.head.getParameter("named").get.asInstanceOf[Literal].code shouldBe "1" + } + "give me the right parameter even if mixed" in new CpgFromCodeTestFixture( + """ + |f($var, 7, named : 42, other : 33); + |""".stripMargin + ) { + val call: List[Call] = cpg.call.nameExact("f").l + call.length shouldBe 1 + call.head.getParameter(1).get.asInstanceOf[Identifier].name shouldBe "var" + call.head.getParameter("named").get.asInstanceOf[Literal].code shouldBe "42" + call.head.getParameter("other").get.asInstanceOf[Literal].code shouldBe "33" + } + } + } + +} diff --git a/layerSourceCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php b/layerSourceCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php new file mode 100644 index 0000000..e05fdd3 --- /dev/null +++ b/layerSourceCode/resources/unittesting/testprojects/onlyMainCreation/trivial-php.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/layerSourceCode/src/main/scala/io/joern/Defines.scala b/layerSourceCode/src/main/scala/io/joern/Defines.scala new file mode 100644 index 0000000..4c2a44a --- /dev/null +++ b/layerSourceCode/src/main/scala/io/joern/Defines.scala @@ -0,0 +1,7 @@ +package io.joern + +object Defines { + + val globalNameSpaceName = "PHPByteCode" + +} diff --git a/layerSourceCode/src/main/scala/io/joern/PhpToCpg.scala b/layerSourceCode/src/main/scala/io/joern/PhpToCpg.scala new file mode 100644 index 0000000..658f683 --- /dev/null +++ b/layerSourceCode/src/main/scala/io/joern/PhpToCpg.scala @@ -0,0 +1,48 @@ +package io.joern + +import io.joern.php.passes.{AstCreationPass, MetaDataPass} +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.KeyPoolCreator +import io.shiftleft.x2cpg.{SourceFiles, X2Cpg} +import scopt.OptionParser + +object PhpToCpg { + + def main(args: Array[String]): Unit = { + parseConfig(args).foreach { config => + val cpg = new PhpToCpg().run( + SourceFiles.determine(Set(config.inputPath), Set(".php")).toSet, + config.outputPath) + cpg.close() + } + } + + final case class Config(inputPath: String = "-1", + outputPath: String = "cpg.bin") + + def parseConfig(args: Array[String]): Option[Config] = { + new OptionParser[Config](classOf[PhpToCpg].getSimpleName) { + arg[String]("base-path") + .text("base directory of the PHP project") + .action((x, c) => c.copy(inputPath = x)) + opt[String]("output") + .abbr("o") + .text("output filename") + .action((x, c) => c.copy(outputPath = x)) + }.parse(args, Config()) + } + +} + +class PhpToCpg { + + def run(fileNames: Set[String], outputPath: String): Cpg = { + val keyPools = KeyPoolCreator.obtain(2) + val cpg = X2Cpg.newEmptyCpg(Some(outputPath)) + new MetaDataPass(cpg, keyPools.head).createAndApply() + new AstCreationPass(fileNames.toSeq, cpg, keyPools.head) + .createAndApply() + cpg + } + +} diff --git a/layerSourceCode/src/main/scala/io/joern/php/passes/AstCreationPass.scala b/layerSourceCode/src/main/scala/io/joern/php/passes/AstCreationPass.scala new file mode 100644 index 0000000..c62d250 --- /dev/null +++ b/layerSourceCode/src/main/scala/io/joern/php/passes/AstCreationPass.scala @@ -0,0 +1,1305 @@ +package io.joern.php.passes + +import io.joern.Defines +import io.joern.php.passes.utility.ASTJSON._ +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, nodes} +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} +import parser.php.FileParser + +class AstCreationPass(files: Seq[String], cpg: Cpg, keyPool: IntervalKeyPool) + extends ParallelCpgPass[String]( + cpg, + keyPools = Some(keyPool.split(files.size)) + ) { + + override def partIterator: Iterator[String] = files.iterator + + override def runOnPart(part: String): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + + val fileNode = nodes.NewFile().name(part) + diffGraph.addNode(fileNode) + val nameSpaceBlock = nodes + .NewNamespaceBlock() + .name(Defines.globalNameSpaceName) + .fullName(fileNode.name) + diffGraph.addNode(nameSpaceBlock) + diffGraph.addEdge(fileNode, nameSpaceBlock, EdgeTypes.AST) + try { + create(FileParser.parse(part)).foreach { node => + diffGraph.addEdge(nameSpaceBlock, node, EdgeTypes.AST) + } + } catch { + case e: MatchError => + println( + s"when trying to create ast for file $part exception ${e.toString} \n ${e.getStackTrace + .mkString("\n")}") + throw e + case e: Error => + println( + s"when trying to create ast for file $part exception ${e.toString} \n ${e.getStackTrace + .mkString("\n")}") + throw e + } + Iterator(diffGraph.build()) + } + + def createCodeBlock(json: List[Map[String, Any]], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val codeBlock = nodes.NewBlock().order(childNum) + diffGraph.addNode(codeBlock) + var counter = 0 + val getChildNum = () => { + val buff = counter + counter = counter + 1 + buff + } + json.map(create(_, getChildNum())).foreach { node => + diffGraph.addEdge(codeBlock, node, EdgeTypes.AST) + } + codeBlock + } + + def create(json: List[Map[String, Any]], counterStart: Int = 0)( + implicit diffGraph: DiffGraph.Builder): Seq[nodes.NewNode] = { + var counter = counterStart + val getChildNum = () => { + val buff = counter + counter = counter + 1 + buff + } + json.map(create(_, getChildNum())) + } + + def toCode(node: nodes.NewNode): String = { + node match { + case node: nodes.NewCall => node.code + case node: nodes.NewControlStructure => node.code + case node: nodes.NewIdentifier => node.code + case node: nodes.NewLiteral => node.code + case node: nodes.NewMethodParameterIn => node.code + case node: nodes.NewReturn => node.code + case node: nodes.NewMethod => node.code + case _: nodes.NewBlock => "CODEBLOCK" + case _: nodes.NewTypeDecl => "TYPEDECL" + } + } + + def toCode(node: Seq[nodes.NewNode]): Seq[String] = { + node.map(toCode) + } + + def create(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + json.getOrElse("nodeType", missingKey("nodeType", json)) match { + case "Stmt_Expression" => createStmtExpression(json, childNum) + case "Stmt_Echo" => createStmtEcho(json, childNum) + case "Stmt_Else" => createCodeBlock(getJsonList("stmts", json), childNum) + case "Stmt_ElseIf" => createStmtElseIf(json, childNum) + case "Stmt_Function" => createStmtFunction(json, childNum) + case "Stmt_If" => createStmtIf(json, childNum) + case "Stmt_Return" => createStmtReturn(json, childNum) + case "Stmt_Switch" => createStmtSwitch(json, childNum) + case "Stmt_Case" => createStmtCase(json, childNum) + case "Stmt_Break" => createStmtBreak(json, childNum) + case "Stmt_Class" => createStmtClass(json, childNum) + case "Stmt_ClassConst" => createStmtClassConst(json, childNum) + case "Stmt_Property" => createStmtProperty(json, childNum) + case "Stmt_ClassMethod" => createStmtClassMethod(json, childNum) + case "Stmt_Namespace" => createStmtNamespace(json, childNum) + case "Stmt_Use" => createStmtUse(json, childNum) + case "Stmt_Foreach" => createStmtForeach(json, childNum) + case "Stmt_Continue" => createStmtContinue(json, childNum) + case "Stmt_Throw" => createStmtThrow(json, childNum) + case "Stmt_TryCatch" => createStmtTryCatch(json, childNum) + case "Stmt_Catch" => createStmtCatch(json, childNum) + case "Stmt_Finally" => createStmtFinally(json, childNum) + case "Stmt_Unset" => createStmtUnset(json, childNum) + case "Expr_FuncCall" => createExprFuncCall(json, childNum) + case "Expr_Variable" | "Identifier" | "VarLikeIdentifier" => + createExprVariable(json, childNum) + case "Expr_ConstFetch" => createExprConstFetch(json, childNum) + case "Expr_Assign" => createExprAssign(json, childNum) + case "Expr_PropertyFetch" => createExprPropertyFetch(json, childNum) + case "Expr_New" => createExprNew(json, childNum) + case "Expr_MethodCall" => createExprMethodCall(json, childNum) + case "Expr_StaticCall" => createExprStaticCall(json, childNum) + case "Expr_Instanceof" => createExprInstanceOf(json, childNum) + case "Expr_ShellExec" => createExprShellExec(json, childNum) + case "Expr_Array" => createExprArray(json, childNum) + case "Expr_ArrayItem" => createExprArrayItem(json, childNum) + case "Expr_ArrayDimFetch" => createExprArrayDimFetch(json, childNum) + case "Expr_Empty" => createExprEmpty(json, childNum) + case "Expr_Isset" => createExprIsset(json, childNum) + case "Expr_Ternary" => createExprTernary(json, childNum) + case "Expr_Closure" => createExprClosure(json, childNum) + case "Expr_List" => createExprList(json, childNum) + case "Expr_StaticPropertyFetch" => + createExprStaticPropertyFetch(json, childNum) + case "Expr_ClassConstFetch" => createExprClassConstFetch(json, childNum) + case "Expr_BooleanNot" | "Expr_UnaryMinus" | "Expr_UnaryPlus" | + "Expr_BitwiseNot" | "Expr_PreInc" | "Expr_PreDec" | "Expr_PostInc" | + "Expr_PostDec" | "Expr_ErrorSuppress" => + createExprUnaryOp(json, childNum) + case "Name_FullyQualified" => createNameFullyQualified(json, childNum) + case "Arg" => createArg(json, childNum) + case "Param" => createParam(json, childNum) + case x: String => + val substr_binary = "Expr_BinaryOp_".length + val substr_scalar = "Scalar_".length + val substr_assignOp = "Expr_AssignOp_".length + val substr_exprCast = "Expr_Cast_".length + if (x.length > substr_binary && x.substring(0, substr_binary) == "Expr_BinaryOp_") { + createExprBinaryOp(json, childNum) + } else if (x.length > substr_scalar && x.substring(0, substr_scalar) == "Scalar_") { + createScalar(json, childNum) + } else if (x.length > substr_assignOp && x.substring(0, substr_assignOp) == "Expr_AssignOp_") { + createExprAssignOp(json, childNum) + } else if (x.length > substr_exprCast && x.substring(0, substr_exprCast) == "Expr_Cast_") { + createExprCast(json, childNum) + } else { + println(format(json)) + throw new RuntimeException(s"nodeType $x not yet supported") + } + } + } + + def translateFlag(flag: Double): List[String] = { + flag match { + case 0.0 => List() + case 1.0 => List("public") + case 2.0 => List("protected") + case 4.0 => List("private") + case 9.0 => List("public", "static") + case 10.0 => List("protected", "static") + case 12.0 => List("private", "static") + case 16.0 => List("abstract") + } + } + + def nameNodeToNameString(json: Map[String, Any]): String = { + val name = getJsonAtom[String]("nodeType", json).get + assert(name == "Name" || name == "Name_FullyQualified", + s"$name is not a supported name node type") + getJsonAtom[List[String]]("parts", json).get + .mkString(if (name == "Name_FullyQualified") "\\" else "", "\\", "") + } + + def createNameFullyQualified(value: Map[String, Any], integer: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val name = nameNodeToNameString(value) + val node = + nodes + .NewLiteral() + .code(name) + .typeFullName("fullyQualifiedName") + .order(integer) + diffGraph.addNode(node) + node + } + + def createStmtUnset(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val vars = create(getJsonList("vars", json)) + nodes.NewCall() + val unset = nodes + .NewCall() + .name("unset") + .code(toCode(vars).mkString("unset(", ",", ")")) + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(unset) + vars.foreach(variable => diffGraph.addEdge(unset, variable, EdgeTypes.AST)) + unset + } + + def createExprList(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val items = create(getJsonList("items", json)) + val expr = nodes + .NewCall() + .name("list") + .code(toCode(items).mkString("list(", ",", ")")) + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(expr) + items.foreach { node => + diffGraph.addEdge(expr, node, EdgeTypes.AST) + } + expr + } + + def createExprTernary(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val condNode = create(getJsonObject("cond", json).get, 0) + val ifNode = create(getJsonObject("if", json).get, 1) + val elseNode = create(getJsonObject("else", json).get, 2) + val ternary = nodes + .NewControlStructure() + .code(s"${toCode(condNode)} ? ${toCode(ifNode)} : ${toCode(elseNode)}") + .parserTypeName("ternary") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(ternary) + diffGraph.addEdge(ternary, condNode, EdgeTypes.AST) + diffGraph.addEdge(ternary, ifNode, EdgeTypes.AST) + diffGraph.addEdge(ternary, elseNode, EdgeTypes.AST) + ternary + } + + def createExprIsset(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val vars = create(getJsonList("vars", json)) + val isset = nodes + .NewCall() + .name("isset") + .code(toCode(vars).mkString("isset(", ",", ")")) + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(isset) + vars.foreach(variable => diffGraph.addEdge(isset, variable, EdgeTypes.AST)) + isset + } + + def createStmtFinally(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val stmts = getJsonList("stmts", json) + val finallyNode = nodes + .NewControlStructure() + .code("finally") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(finallyNode) + diffGraph.addEdge(finallyNode, createCodeBlock(stmts, 0), EdgeTypes.AST) + finallyNode + } + + def createStmtCatch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val variable = create(getJsonObject("var", json).get, 0) + //val types = getJsonList("types", json) + val stmts = getJsonList("stmts", json) + val catchNode = nodes + .NewControlStructure() + .code(s"catch(${toCode(variable)}") + .parserTypeName("catch") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(catchNode) + diffGraph.addEdge(catchNode, variable, EdgeTypes.AST) + diffGraph.addEdge(catchNode, createCodeBlock(stmts, 1), EdgeTypes.AST) + catchNode + } + + def createStmtTryCatch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val stmts = getJsonList("stmts", json) + val catches = getJsonList("catches", json) + val finallies = getJsonObject("finally", json) + val tryCatch = nodes + .NewControlStructure() + .code("tryCatch") + .parserTypeName("tryCatch") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(tryCatch) + diffGraph.addEdge(tryCatch, createCodeBlock(stmts, 0), EdgeTypes.AST) + var child = 1 + for (catchJson <- catches) { + diffGraph.addEdge(tryCatch, create(catchJson, childNum), EdgeTypes.AST) + child = child + 1 + } + finallies match { + case Some(finallies) => + diffGraph.addEdge(tryCatch, create(finallies, child), EdgeTypes.AST) + case None => + } + tryCatch + } + + def createStmtThrow(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val expr = create(getJsonObject("expr", json).get, 0) + val throwNode = nodes + .NewControlStructure() + .code(s"throw ${toCode(expr)}") + .parserTypeName("throw") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(throwNode) + diffGraph.addEdge(throwNode, expr, EdgeTypes.AST) + throwNode + } + + def createStmtClassConst(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val flags = getJsonAtom[Double]("flags", json).get + assert(flags == 0.0) + val consts = getJsonList("consts", json) + val codeBlock = nodes + .NewBlock() + .order(childNum) + var childCounter = 0 + diffGraph.addNode(codeBlock) + for (const <- consts) { + val constNode = nodes + .NewIdentifier() + .name(getJsonObject("name", const).get match { + case x: Map[String, Any] => getJsonAtom[String]("name", x).get + }) + .order(childCounter) + childCounter = childCounter + 1 + diffGraph.addNode(constNode) + diffGraph.addEdge(codeBlock, constNode, EdgeTypes.AST) + } + codeBlock + } + + def createExprClosure(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + //val uses = getJsonList("uses", json) + val params = create(getJsonList("params", json)) + val stmts = createCodeBlock(getJsonList("stmts", json), params.length) + val retunType = getJsonAtom[Any]("returnType", json) + assert(retunType.isEmpty) + val byRef = getJsonAtom[Boolean]("byRef", json).get + assert(!byRef) + val static = getJsonAtom[Boolean]("static", json).get + assert(!static) + val clojure = nodes + .NewMethod() + .name("anonymous_clojure") + .fullName("anonymous_clojure") + .code(toCode(params).mkString("function(", ",", ") with (tbd)")) + .lineNumber(getLineStart(attributes)) + .lineNumberEnd(getLineEnd(attributes)) + .order(childNum) + diffGraph.addNode(clojure) + params.foreach(node => diffGraph.addEdge(clojure, node, EdgeTypes.AST)) + diffGraph.addEdge(clojure, stmts, EdgeTypes.AST) + clojure + } + + def createStmtContinue(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val node = nodes + .NewControlStructure() + .code("continue") + .parserTypeName("continue") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(node) + node + } + + def createExprEmpty(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val expr = create(getJsonObject("expr", json).get, 0) + val node = nodes + .NewCall() + .code(s"empty(${toCode(expr)}") + .name("empty") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(node) + diffGraph.addEdge(node, expr, EdgeTypes.AST) + node + } + + def createStmtForeach(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val byRef = getJsonAtom[Boolean]("byRef", json).get + assert(!byRef) + val expr = create(getJsonObject("expr", json).get, 0) + val (key, childnum): (Option[nodes.NewNode], Int) = + getJsonObject("keyVar", json) match { + case Some(node) => (Some(create(node, 1)), 2) + case None => (None, 1) + } + val value = create(getJsonObject("valueVar", json).get, childnum) + val stmts = createCodeBlock(getJsonList("stmts", json), childnum + 1) + val foreach = nodes + .NewControlStructure() + .code(s"foreach ${toCode(expr)} as ${key match { + case Some(node) => s"${toCode(node)} => "; case None => + }} ${toCode(value)}") + .parserTypeName("foreach") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(foreach) + diffGraph.addEdge(foreach, expr, EdgeTypes.AST) + key match { + case Some(node) => diffGraph.addEdge(foreach, node, EdgeTypes.AST) + case None => + } + diffGraph.addEdge(foreach, value, EdgeTypes.AST) + diffGraph.addEdge(foreach, stmts, EdgeTypes.AST) + foreach + } + + def createExprClassConstFetch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val classJson = getJsonObject("class", json).get + val classNode = getJsonAtom[String]("nodeType", classJson).get match { + case "Name" => + nodes + .NewIdentifier() + .code(nameNodeToNameString(classJson)) + .typeFullName("classIdentifier") + .order(0) + case _ => create(classJson, 0) + } + diffGraph.addNode(classNode) + val nameNode = create(getJsonObject("name", json).get, 1) + val node = nodes + .NewCall() + .code(s"${toCode(classNode)}::${toCode(nameNode)}") + .name("classConstFetch") + .order(childNum) + .lineNumber(getLineStart(attributes)) + + diffGraph.addNode(node) + diffGraph.addEdge(node, classNode, EdgeTypes.AST) + diffGraph.addEdge(node, nameNode, EdgeTypes.AST) + node + } + + def createExprStaticPropertyFetch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val className = nodes + .NewLiteral() + .code(nameNodeToNameString(getJsonObject("class", json).get)) + .typeFullName("String") + .order(0) + diffGraph.addNode(className) + val name = create(getJsonObject("name", json).get, 1) + diffGraph.addNode(name) + val staticFetch = nodes + .NewCall() + .code(s"${toCode(className)}::${toCode(name)}") + .name("staticPropertyFetch") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(staticFetch) + diffGraph.addEdge(staticFetch, className, EdgeTypes.AST) + diffGraph.addEdge(staticFetch, name, EdgeTypes.AST) + staticFetch + } + + def createExprArrayDimFetch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val variable = create(getJsonObject("var", json).get, 0) + val dim = getJsonObject("dim", json) match { + case Some(node) => Some(create(node, 1)) + case None => None + } + val fetch = nodes + .NewCall() + .code(s"${toCode(variable)}[${dim match { + case Some(dim) => toCode(dim); case None => "" + }}]") + .name("dimFetch") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(fetch) + diffGraph.addEdge(fetch, variable, EdgeTypes.AST) + dim match { + case Some(dim) => diffGraph.addEdge(fetch, dim, EdgeTypes.AST) + case None => + } + fetch + } + + def createExprArrayItem(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val unpack = getJsonAtom[Boolean]("unpack", json).get + assert(!unpack, + s"array creation with unpack is not supported: ${format(json)}") + val byRef = getJsonAtom[Boolean]("byRef", json).get + assert(!byRef, s"array creation byRef is not supported: ${format(json)}") + val value = getJsonObject("value", json).get + getJsonObject("key", json) match { + case Some(key) => + val keyNode = create(key, 0) + diffGraph.addNode(keyNode) + val valueNode = create(value, 1) + diffGraph.addNode(valueNode) + val keyValue = nodes + .NewControlStructure() + .code(s"${toCode(keyNode)} => ${toCode(valueNode)}") + .parserTypeName("keyValuePair") + .order(childNum) + diffGraph.addNode(keyValue) + diffGraph.addEdge(keyValue, keyNode, EdgeTypes.AST) + diffGraph.addEdge(keyValue, valueNode, EdgeTypes.AST) + keyValue + case None => + create(value, childNum) + } + + } + + def createExprArray(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val items: Seq[nodes.NewNode] = create(getJsonList("items", json)) + val array = nodes + .NewCall() + .code(toCode(items).mkString("array(", ",", ")")) + .name("array") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(array) + items.foreach(item => diffGraph.addEdge(array, item, EdgeTypes.AST)) + array + } + + def createExprCast(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val castTo = + getJsonAtom[String]("nodeType", json).get.substring("Expr_Cast_".length) + val expr = getJsonObject("expr", json).get + val cast = nodes + .NewCall() + .code(s"($castTo)") + .name("cast") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addEdge(cast, create(expr, 0), EdgeTypes.AST) + cast + } + + def createStmtUse(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + //println(format(json)) + val attributes = getJsonObject("attributes", json).get + val utype = getJsonAtom[Double]("type", json).get + val typeNode = nodes + .NewLiteral() + .code(utype match { + case 1.0 => "general" + case 2.0 => "function" + case 3.0 => "const" + }) + .typeFullName("keyword") + diffGraph.addNode(typeNode) + val uses = getJsonList("uses", json) + assert(uses.length == 1) + val name = getJsonObject("name", uses.head).get match { + case name: Map[String, Any] => + nodes + .NewLiteral() + .code(nameNodeToNameString(name)) + .typeFullName("namespaceName") + .order(1) + } + diffGraph.addNode(name) + val alias: Option[nodes.NewNode] = getJsonObject("alias", uses.head) match { + case Some(x) => Option(create(x, 2)) + case None => None + } + alias match { + case Some(x) => diffGraph.addNode(x) + case None => + } + val use = nodes + .NewCall() + .code("use") + .name("use") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(use) + diffGraph.addEdge(use, typeNode, EdgeTypes.AST) + diffGraph.addEdge(use, name, EdgeTypes.AST) + alias match { + case Some(node) => diffGraph.addEdge(use, node, EdgeTypes.AST) + case None => + } + use + } + + def createStmtNamespace(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + //val attributes = getJsonObject("attributes", json).get + val name = getJsonObject("name", json).get match { + case name: Map[String, Any] => nameNodeToNameString(name) + } + val stmts = getJsonList("stmts", json) + val namespace = nodes + .NewNamespaceBlock() + .name(name) + .fullName(name) + .order(childNum) + diffGraph.addNode(namespace) + diffGraph.addEdge(namespace, createCodeBlock(stmts, 0), EdgeTypes.AST) + namespace + } + + def createExprShellExec(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val parts = create(getJsonList("parts", json)) + val call = nodes + .NewCall() + .code(s"`${toCode(parts).mkString(" ; ")}`") + .name("ShellExec") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(call) + parts.foreach(node => diffGraph.addEdge(call, node, EdgeTypes.AST)) + call + } + + def createExprInstanceOf(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val expr: nodes.NewNode = create(getJsonObject("expr", json).get, 0) + val eClassJson = getJsonObject("class", json).get + val eClass = getJsonAtom[String]("nodeType", eClassJson).get match { + case "Name" => + nodes + .NewLiteral() + .code(nameNodeToNameString(eClassJson)) + .typeFullName("classIdentifier") + case _ => create(eClassJson, 1) + } + val call = nodes + .NewCall() + .code(s"${toCode(expr)} instanceof ${toCode(eClass)}") + .name("instanceof") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(call) + diffGraph.addEdge(call, expr, EdgeTypes.AST) + diffGraph.addEdge(call, eClass, EdgeTypes.AST) + call + } + + def createExprAssignOp(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val variable = create(getJsonObject("var", json).get, 0) + val expr = create(getJsonObject("expr", json).get, 1) + val (code, _) = getJsonAtom[String]("nodeType", json).get + .substring("Expr_AssignOp_".length) match { + case "Plus" => ("+=", s"AssignOp_Plus") + case "Minus" => ("-=", s"AssignOp_Minus") + case "Mul" => ("*=", s"AssignOp_Mul") + case "Div" => ("/=", s"AssignOp_Div") + case "Mod" => ("%=", s"AssignOp_Mod") + case "Concat" => (".=", "AssignOp_Concat") + case "BitwiseAnd" => ("&=", "AssignOp_BitwiseAnd") + case "BitwiseOr" => ("|=", "AssignOp_BitwiseOr") + case "BitwiseXor" => ("^=", "AssignOp_BitwiseXor") + case "ShiftLeft" => ("<<=", "AssignOp_ShiftLeft") + case "ShiftRight" => ("<<=", "AssignOp_ShiftRight") + case "Coalesce" => ("??=", "AssignOp_Coalesce") + } + val assignOp = nodes + .NewCall() + .code(s"${toCode(variable)} $code ${toCode(expr)}") + .name(code) + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(assignOp) + diffGraph.addEdge(assignOp, variable, EdgeTypes.AST) + diffGraph.addEdge(assignOp, expr, EdgeTypes.AST) + assignOp + } + + def createExprNew(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val className = getJsonObject("class", json).get match { + case name: Map[String, Any] => + try { + nameNodeToNameString(name) + } catch { + case _: AssertionError => "$VARIABLE" + } + } + val stringLiteral = create( + Map( + "nodeType" -> "Scalar_String", + "value" -> className, + "attributes" -> Map( + "startLine" -> getLineStart(attributes).get.toDouble + ) + ), + 0) + val args = create(getJsonList("args", json), 1) + val newStmt = nodes + .NewCall() + .code(s"new ${toCode(stringLiteral)}(${toCode(args).mkString(",")})") + .name("new") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(newStmt) + + diffGraph.addEdge(newStmt, stringLiteral, EdgeTypes.AST) + args.foreach(node => diffGraph.addEdge(newStmt, node, EdgeTypes.AST)) + newStmt + } + + def createExprPropertyFetch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val variable = create(getJsonObject("var", json).get, 0) + val name = create(getJsonObject("name", json).get, 1) + val fetch = nodes + .NewCall() + .code(s"${toCode(variable)}->${toCode(name)}") + .name("->") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(fetch) + diffGraph.addEdge(fetch, variable, EdgeTypes.AST) + diffGraph.addEdge(fetch, name, EdgeTypes.AST) + fetch + } + + def createExprAssign(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val variable = create(getJsonObject("var", json).get, 0) + val expr = create(getJsonObject("expr", json).get, 1) + val assign = nodes + .NewCall() + .code(s"${toCode(variable)} = ${toCode(expr)}") + .name("assign") + .order(childNum) + .lineNumber(getLineStart(attributes)) + + diffGraph.addNode(assign) + diffGraph.addEdge(assign, variable, EdgeTypes.AST) + diffGraph.addEdge(assign, expr, EdgeTypes.AST) + assign + } + + def createStmtClassMethod(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + //val returnType = getJsonAtom[String]("returnType", json) + val name = getJsonObject("name", json).get match { + case name: Map[String, Any] => getJsonAtom[String]("name", name).get + } + val flags = translateFlag(getJsonAtom[Double]("flags", json).get) + val params: Seq[nodes.NewNode] = create(getJsonList("params", json)) + val stmts = getJsonList("stmts", json) + val method = nodes + .NewMethod() + .code(s"$name(${toCode(params).mkString(",")})") + .name(name) + .fullName(name) + .isExternal(flags.contains("public")) + .lineNumber(getLineStart(attributes)) + .lineNumberEnd(getLineEnd(attributes)) + .order(childNum) + diffGraph.addNode(method) + params.foreach(param => diffGraph.addEdge(method, param, EdgeTypes.AST)) + diffGraph.addEdge(method, + createCodeBlock(stmts, params.length), + EdgeTypes.AST) + method + } + + def createStmtProperty(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + // flags 9.0 -> public static, 1.0 -> public, 4.0 -> private, 12.0 -> private static + //val flags = translateFlag(getJsonAtom[Double]("flags", json).get) + val property = getJsonList("props", json) + assert(property.length == 1) + val (attributes, name, _) = property match { + case prop :: Nil => + prop match { + case prop: Map[String, Any] => + val attributes = getJsonObject("attributes", prop).get + val name = getJsonObject("name", prop).get match { + case ident: Map[String, Any] => + getJsonAtom[String]("name", ident).get + } + val default = getJsonObject("default", prop) + (attributes, name, default) + case unexpectedPattern => + throw new RuntimeException( + s"unexpected pattern of $unexpectedPattern") + } + case unexpectedPattern => + throw new RuntimeException(s"unexpected pattern of $unexpectedPattern") + } + val node = nodes + .NewIdentifier() + .name(name) + .order(childNum) + .typeFullName(name) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(node) + node + } + + def createStmtClass(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + //val attributes = getJsonObject("attributes", json).get + val name = getJsonAtom[String]("name", getJsonObject("name", json).get).get + //val flags = translateFlag(getJsonAtom[Double]("flags", json).get) + val implements = getJsonAtom[List[_]]("implements", json).get + assert(implements.isEmpty, + "implements keyword in class creation is currently not supported") + val ext = getJsonObject("extends", json) match { + case Some(name) => List(nameNodeToNameString(name)) + case None => List() + } + val stmts = getJsonList("stmts", json) + val classType = nodes + .NewTypeDecl() + .name(name) + .fullName(name) + .isExternal(true) + .inheritsFromTypeFullName(ext) + .order(childNum) + diffGraph.addNode(classType) + diffGraph.addEdge(classType, createCodeBlock(stmts, 0), EdgeTypes.AST) + classType + } + + def createExprConstFetch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val name = getJsonObject("name", json) match { + case name: Option[Map[String, Any]] => + getJsonAtom[List[_]]("parts", name.get).get match { + case sub :: Nil => + sub match { + case x: String => x + } + } + } + val node = nodes + .NewLiteral() + .code(s"$name") + .typeFullName("constant") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(node) + node + } + + def createStmtBreak(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val node = nodes + .NewControlStructure() + .code("break") + .parserTypeName("break") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(node) + node + } + + def createStmtCase(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val trueJson: Map[String, Any] = + Map( + "nodeType" -> "Expr_ConstFetch", + "name" -> Map("nodeType" -> "Name", "parts" -> List("true")), + "attributes" -> Map( + "startLine" -> getLineStart(attributes).get.toDouble) + ) + val condition = create(getJsonObject("cond", json).getOrElse(trueJson), 0) + val stmt = nodes + .NewControlStructure() + .code(s"case(${toCode(condition)}):") + .parserTypeName("case") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(stmt) + diffGraph.addEdge(stmt, condition, EdgeTypes.AST) + diffGraph.addEdge(stmt, + createCodeBlock(getJsonList("stmts", json), 1), + EdgeTypes.AST) + stmt + } + + def createStmtSwitch(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val cond = create(getJsonObject("cond", json).get, 0) + val switch = nodes + .NewControlStructure() + .code(s"switch(${toCode(cond)})") + .parserTypeName("switch") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(switch) + diffGraph.addEdge(switch, cond, EdgeTypes.AST) + create(getJsonList("cases", json), 1).foreach { node => + diffGraph.addEdge(switch, node, EdgeTypes.AST) + } + switch + } + + def createStmtReturn(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val expr = create(getJsonObject("expr", json).get, 0) + val ret = nodes + .NewReturn() + .code(s"return ${toCode(expr)}") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(ret) + diffGraph.addEdge(ret, expr, EdgeTypes.AST) + ret + } + + def createParam(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val default = getJsonObject("default", json) match { + case None => None + case Some(_) => None + } + assert(default.isEmpty, + s"default should be None but is ${default.getClass.toString}") + val ptype = getJsonObject("type", json) + val name = getJsonObject("var", json).get match { + case pvar: Map[String, Any] => getJsonAtom[String]("name", pvar).get + } + assert(getJsonDouble("flags", json).contains(0.0)) + val node = nodes + .NewMethodParameterIn() + .code("$" + name) + .order(childNum) + .name(name) + .typeFullName(ptype match { + case Some(x) => + try { + getJsonAtom[String]("name", x).get + } catch { + case _: RuntimeException => nameNodeToNameString(x) + } + case None => "" + }) + diffGraph.addNode(node) + node + } + + def createStmtFunction(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val stmts = getJsonList("stmts", json) + val attributes = getJsonObject("attributes", json).get + val params = create(getJsonList("params", json)) + val name = getJsonObject("name", json).get match { + case x: Map[String, Any] => getJsonAtom[String]("name", x).get + } + val function = nodes + .NewMethod() + .code(s"$name(${toCode(params).mkString(",")})") + .name(name) + .fullName(name) + .isExternal(true) + .lineNumber(getLineStart(attributes)) + .lineNumberEnd(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(function) + params.foreach(param => diffGraph.addEdge(function, param, EdgeTypes.AST)) + diffGraph.addEdge(function, + createCodeBlock(stmts, params.length), + EdgeTypes.AST) + function + } + + def createStmtExpression(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + json.getOrElse("expr", missingKey("expr", json)) match { + case expr: Map[String, Any] => create(expr, childNum) + } + } + + def createStmtEcho(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val exprs = createCodeBlock(getJsonList("exprs", json), 0) + val call = nodes + .NewCall() + .code(s"echo ${toCode(exprs)}") + .name("echo") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(call) + + diffGraph.addEdge(call, exprs, EdgeTypes.AST) + call + } + + def createStmtElseIf(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val cond = create(getJsonObject("cond", json).get, 0) + val stmts = createCodeBlock(getJsonList("stmts", json), 1) + val call = nodes + .NewControlStructure() + .code(s"elseif(${toCode(cond)})") + .parserTypeName("elseif") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(call) + diffGraph.addEdge(call, cond, EdgeTypes.AST) + diffGraph.addEdge(call, stmts, EdgeTypes.AST) + call + } + + def createStmtIf(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val cond = create(getJsonObject("cond", json).get, 0) + val stmts = createCodeBlock(getJsonList("stmts", json), 1) + val elseifs = create(getJsonList("elseifs", json), 2) + val elseNode = getJsonObject("else", json) match { + case Some(json) => Some(create(json, 2 + elseifs.length)) + case None => None + } + val call = nodes + .NewControlStructure() + .code(s"if(${toCode(cond)})") + .parserTypeName("if") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(call) + // adding the condition + diffGraph.addEdge(call, cond, EdgeTypes.AST) + // adding the true branch + diffGraph.addEdge(call, stmts, EdgeTypes.AST) + // adding the elseif branch + elseifs.foreach(node => diffGraph.addEdge(call, node, EdgeTypes.AST)) + // adding the else branch + elseNode match { + case Some(x) => diffGraph.addEdge(call, x, EdgeTypes.AST) + case None => + } + call + } + + def createExprFuncCall(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val name: String = json.getOrElse("name", missingKey("name", json)) match { + case name: Map[String, Any] => + try { + nameNodeToNameString(name) + } catch { + case _: AssertionError => "VARIABLE" + } + } + val args = create(getJsonList("args", json)) + val attributes = getJsonObject("attributes", json).get + val call = nodes + .NewCall() + .code(s"$name(${toCode(args).mkString(",")}") + .name(name) + .order(childNum) + .argumentIndex(args.length) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(call) + args.foreach(node => diffGraph.addEdge(call, node, EdgeTypes.AST)) + call + } + + def createExprMethodCall(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val name: String = getJsonObject("name", json).get match { + case name: Map[String, Any] => getJsonAtom[String]("name", name).get + } + val obj = create(getJsonObject("var", json).get, 0) + val args = create(getJsonList("args", json), 1) + val attributes = getJsonObject("attributes", json).get + val call = nodes + .NewCall() + .code(s"${toCode(obj)}->$name(${toCode(args).mkString(",")})") + .name(name) + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(call) + args.foreach(node => diffGraph.addEdge(call, node, EdgeTypes.AST)) + call + } + + def createExprStaticCall(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val name: String = getJsonObject("name", json).get match { + case name: Map[String, Any] => getJsonAtom[String]("name", name).get + } + val obj = getJsonObject("class", json).get match { + case name: Map[String, Any] => + try { + nameNodeToNameString(name) + } catch { + case _: AssertionError => "WEIRDNAME" + } + } + val args = create(getJsonList("args", json)) + val attributes = getJsonObject("attributes", json).get + val call = nodes + .NewCall() + .code(s"$obj::$name(${toCode(args).mkString(",")}") + .name(name) + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(call) + args.foreach(node => diffGraph.addEdge(call, node, EdgeTypes.AST)) + call + } + + def createExprBinaryOp(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val lhs = create(getJsonObject("left", json).get, 0) + val rhs = create(getJsonObject("right", json).get, 1) + val (code, _) = getJsonAtom[String]("nodeType", json).get + .substring("Expr_BinaryOp_".length) match { + case "Plus" => ("+", "BinaryOp_Plus") + case "Minus" => ("-", "BinaryOp_Minus") + case "Div" => ("/", "BinaryOp_Div") + case "Mod" => ("%", "BinaryOp_Mod") + case "Mul" => ("*", "BinaryOp_Mul") + case "Equal" => ("==", "BinaryOp_Equal") + case "Pow" => ("**", "BinaryOp_Equal") + case "BitwiseAnd" => ("&", "BinaryOp_BitwiseAnd") + case "BitwiseOr" => ("|", "BinaryOp_BitwiseOr") + case "ShiftLeft" => ("<<", "BinaryOp_ShiftLeft") + case "ShiftRight" => (">>", "BinaryOp_ShiftRight") + case "Concat" => (".", "BinaryOp_Concat") + case "Coalesce" => ("??", "BinaryOp_Coalesce") + case "Identical" => ("===", "BinaryOp_Identical") + case "NotEqual" => ("!=", "BinaryOp_NotEqual") + case "NotIdentical" => ("!==", "BinaryOp_NotIdentical") + case "Smaller" => ("<", "BinaryOp_Smaller") + case "Greater" => (">", "BinaryOp_Greater") + case "SmallerOrEqual" => ("<=", "BinaryOp_SmallerOrEqual") + case "GreaterOrEqual" => (">=", "BinaryOp_GreaterOrEqual") + case "Spaceship" => ("<=>", "BinaryOp_Spaceship") + case "LogicalAnd" => ("&&", "BinaryOp_LogicalAnd") + case "LogicalOr" => ("||", "BinaryOp_LogicalOr") + case "BooleanAnd" => ("and", "BinaryOp_BooleanAnd") + case "BooleanOr" => ("or", "BinaryOp_BooleanOr") + case "LogicalXor" => ("xor", "BinaryOp_LogicalXor") + } + val attributes = getJsonObject("attributes", json).get + val call = nodes + .NewCall() + .code(s"${toCode(lhs)} $code ${toCode(rhs)}") + .name(code) + .order(childNum) + .argumentIndex(2) + .lineNumber(getLineStart(attributes)) + + diffGraph.addNode(call) + diffGraph.addEdge(call, lhs, EdgeTypes.AST) + diffGraph.addEdge(call, rhs, EdgeTypes.AST) + call + } + + def createExprUnaryOp(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val (code, _) = getJsonAtom[String]("nodeType", json).get + .substring("Expr_".length) match { + case "BooleanNot" => ("!", "BooleanNot") + case "UnaryMinus" => ("-", "UnaryMinus") + case "UnaryPlus" => ("+", "UnaryPlus") + case "BitwiseNot" => ("~", "BitwiseNot") + case "PreInc" => ("--", "PreInc") + case "PostInc" => ("--", "PostInc") + case "PreDec" => ("++", "PreDec") + case "PostDec" => ("++", "PostDec") + case "ErrorSuppress" => ("@", "ErrorSuppress") + } + val expr = create(json.get("expr") match { + case Some(x) => + x match { + case expr: Map[String, Any] => expr + } + case None => getJsonObject("var", json).get + }, 0) + val op = nodes + .NewCall() + .code(s"$code${toCode(expr)}") + .name(code) + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(op) + diffGraph.addEdge(op, expr, EdgeTypes.AST) + op + } + + def createExprVariable(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val name = getJsonAtom[String]("name", json).get + val attributes = getJsonObject("attributes", json).get + val node = nodes + .NewIdentifier() + .code("$" + name) + .name(name) + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(node) + node + } + + def createArg(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + create(getJsonObject("value", json).get, childNum) + } + + def createScalar(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val stype = + getJsonAtom[String]("nodeType", json).get.substring("Scalar_".length) + if (stype == "Encapsed") { + createScalarEncapsedString(json, childNum) + } else { + val value = getJsonAtom[Any]("value", json).get + val attributes = getJsonObject("attributes", json).get + val node = nodes + .NewLiteral() + .typeFullName(stype) + .code(s"$value") + .order(childNum) + .lineNumber(getLineStart(attributes)) + diffGraph.addNode(node) + node + } + } + + def createScalarEncapsedString(json: Map[String, Any], childNum: Integer)( + implicit diffGraph: DiffGraph.Builder): nodes.NewNode = { + val attributes = getJsonObject("attributes", json).get + val parts = create(getJsonList("parts", json)) + val node = nodes + .NewCall() + .code("\"" + toCode(parts).mkString("") + "\"") + .name("EncapsedString") + .lineNumber(getLineStart(attributes)) + .order(childNum) + diffGraph.addNode(node) + parts.foreach(child => diffGraph.addEdge(node, child, EdgeTypes.AST)) + node + } + +} diff --git a/layerSourceCode/src/main/scala/io/joern/php/passes/MetaDataPass.scala b/layerSourceCode/src/main/scala/io/joern/php/passes/MetaDataPass.scala new file mode 100644 index 0000000..aa0ad38 --- /dev/null +++ b/layerSourceCode/src/main/scala/io/joern/php/passes/MetaDataPass.scala @@ -0,0 +1,21 @@ +package io.joern.php.passes + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.passes.{DiffGraph, IntervalKeyPool, ParallelCpgPass} + +class MetaDataPass(cpg: Cpg, keyPool: IntervalKeyPool) + extends ParallelCpgPass[String]( + cpg, + keyPools = Some(keyPool.split(1)) + ) { + // don't care at the moment + override def partIterator: Iterator[String] = List("").iterator + + override def runOnPart(part: String): Iterator[DiffGraph] = { + implicit val diffGraph: DiffGraph.Builder = DiffGraph.newBuilder + val metaDataNode = nodes.NewMetaData().language("PHP").version("0.1") + diffGraph.addNode(metaDataNode) + Iterator(diffGraph.build()) + } +} diff --git a/layerSourceCode/src/main/scala/io/joern/php/passes/utility/ASTJSON.scala b/layerSourceCode/src/main/scala/io/joern/php/passes/utility/ASTJSON.scala new file mode 100644 index 0000000..28f4510 --- /dev/null +++ b/layerSourceCode/src/main/scala/io/joern/php/passes/utility/ASTJSON.scala @@ -0,0 +1,86 @@ +package io.joern.php.passes.utility + +object ASTJSON { + + def format(t: Any, i: Int = 0): String = t match { + + case o: Map[String, Any] => + o.map { + case (k, v) => + " " * (i + 1) + k + " : " + format(v, i + 1) + } + .mkString("{\n", "\n", "\n" + " " * i + "}") + case a: List[_] => + a.map { e => + " " * (i + 1) + format(e, i + 1) + } + .mkString("[\n", "\n", "\n" + " " * i + "]") + + case x => s"$x" + } + + def missingKey(key: String, json: Any): Unit = { + throw new RuntimeException(s"key $key does not exist in ${format(json)}") + } + + def getLineStart(json: Map[String, Any]): Option[Integer] = { + json.getOrElse("startLine", missingKey("startLine", json)) match { + case x: Double => Some(x.toInt) + } + } + + def getLineEnd(json: Map[String, Any]): Option[Integer] = { + json.getOrElse("endLine", missingKey("startLine", json)) match { + case x: Double => Some(x.toInt) + } + } + + def getJsonObject(attr: String, + json: Map[String, Any]): Option[Map[String, Any]] = { + json.getOrElse(attr, missingKey(attr, json)) match { + case x: Map[String, Any] => Some(x) + case x => + Option(x) match { + case None => None + case Some(_) => + throw new RuntimeException("there should be only a None value") + } + } + } + + def getJsonAtom[T](attr: String, json: Map[String, Any]): Option[T] = { + json.getOrElse(attr, missingKey(attr, json)) match { + case x: T => Option(x) + case x => + Option(x) match { + case None => None + case Some(_) => + throw new RuntimeException("there should be only a None value") + } + } + } + + def getJsonDouble(attr: String, json: Map[String, Any]): Option[Double] = { + json.getOrElse(attr, missingKey(attr, json)) match { + case x: Double => Some(x) + case x => + Option(x) match { + case None => None + case Some(_) => + throw new RuntimeException("there should be only a None value") + } + } + } + + def getJsonList(attr: String, + json: Map[String, Any]): List[Map[String, Any]] = { + json.getOrElse(attr, missingKey(attr, json)) match { + case x: List[Map[String, Any]] => x + case y => + println(format(json)) + throw new RuntimeException( + s"the requested attribute $attr is not a list but ${y.getClass.toString}") + } + } + +} diff --git a/layerSourceCode/src/main/scala/parser/php/FileParser.scala b/layerSourceCode/src/main/scala/parser/php/FileParser.scala new file mode 100644 index 0000000..70b7aa2 --- /dev/null +++ b/layerSourceCode/src/main/scala/parser/php/FileParser.scala @@ -0,0 +1,38 @@ +package parser.php + +import org.apache.commons.io.FileUtils + +import scala.sys.process.{ProcessLogger, _} +import scala.util.parsing.json._ + +object FileParser { + + val defaultPhpParserPath + : String = FileUtils.getUserDirectory.toString + "/.config/composer/vendor/bin/php-parse" + + def parse( + str: String, + phpParser: String = defaultPhpParserPath): List[Map[String, Any]] = { + if (new java.io.File(str).exists()) { + val cmd = s"$phpParser --json-dump $str" + val stdout = new StringBuilder + val stderr = new StringBuilder + cmd ! ProcessLogger(stdout append _, stderr append _) + JSON.parseFull(stdout.toString()) match { + case Some(x) => + x match { + case x: List[Map[String, Any]] => x + case _ => + throw new RuntimeException( + s"return of json parsing was unexpected ${x.getClass.toString}") + } + case None => + throw new RuntimeException(s"unable to parse file $str successfully") + } + } else { + throw new RuntimeException( + s"file $str does not exist and I only support file parsing") + } + } + +} diff --git a/layerSourceCode/src/test/scala/io/joern/AbstractCpgTestFixture.scala b/layerSourceCode/src/test/scala/io/joern/AbstractCpgTestFixture.scala new file mode 100644 index 0000000..a772d7b --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/AbstractCpgTestFixture.scala @@ -0,0 +1,9 @@ +package io.joern + +import io.shiftleft.codepropertygraph.Cpg + +abstract class AbstractCpgTestFixture { + + var cpg: Cpg + +} diff --git a/layerSourceCode/src/test/scala/io/joern/TraversalUtils.scala b/layerSourceCode/src/test/scala/io/joern/TraversalUtils.scala new file mode 100644 index 0000000..9041c77 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/TraversalUtils.scala @@ -0,0 +1,66 @@ +package io.joern + +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import overflowdb.{Edge, Node} + +import scala.util.control.Breaks.break + +trait TraversalUtils extends Matchers { + val fixture: AbstractCpgTestFixture + + def getMethods(name: String): List[Node] = { + fixture.cpg.method.nameExact(name).l + } + + def getSingleFile(name: String): Node = { + fixture.cpg.file.nameExact(name).l match { + case List(x) => x + case _ => fail + } + } + + def followEdge(node: Node, + edge: String, + direction: String, + labels: List[(String, AnyRef)] = List()): List[Node] = { + var collection: List[Edge] = List() + + val iterator = direction match { + case "OUT" => node.outE(edge) + case "IN" => node.inE(edge) + case "BOTH" => node.bothE(edge) + } + + while (iterator.hasNext) { + val next = iterator.next + var fullMatch = true + for ((key, value) <- labels) { + if (next.property(key) != value) { + fullMatch = false + break + } + } + if (fullMatch) { + collection = next :: collection + } + } + direction match { + case "OUT" => collection.map(edge => edge.inNode()) + case "IN" => collection.map(edge => edge.outNode()) + } + + } + + def out(node: Node, + edge: String, + labels: List[(String, AnyRef)] = List()): List[Node] = { + followEdge(node, edge, "OUT", labels) + } + + def in(node: Node, + edge: String, + labels: List[(String, AnyRef)] = List()): List[Node] = { + followEdge(node, edge, "IN", labels) + } +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/ArrayTest.scala b/layerSourceCode/src/test/scala/io/joern/php/ArrayTest.scala new file mode 100644 index 0000000..2e5bc92 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/ArrayTest.scala @@ -0,0 +1,17 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ArrayTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """$test2 = array(1,2,3); + |$test = array(); + |$test[] = 13; + |$test['test'] = 42; + |$test[2] = 33; + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/AssignmentOperatorsTest.scala b/layerSourceCode/src/test/scala/io/joern/php/AssignmentOperatorsTest.scala new file mode 100644 index 0000000..ac9bbcb --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/AssignmentOperatorsTest.scala @@ -0,0 +1,57 @@ +package io.joern.php + +import io.joern.TraversalUtils +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class AssignmentOperatorsTest + extends AnyWordSpec + with Matchers + with TraversalUtils { + + override val fixture = CpgFromCodeTestFixture( + """$test += 1; + |$test -= 1; + |$test *= 1; + |$test %= 1; + |$test /= 1; + |$test .= "test"; + |$test &= $test; + |$test |= $test; + |$test ^= $test; + |$test <<= $test; + |$test >> $test; + |$test ??= $test; + |""".stripMargin + ) + + "cpg" should { + "have a /= call" in { + val call = fixture.cpg.call("/=").l + call.length shouldBe 1 + call.head.name shouldBe "/=" + } + "have a *= call" in { + val call = fixture.cpg.call("\\*=").l + call.length shouldBe 1 + call.head.name shouldBe "*=" + } + "have a += call" in { + val call = fixture.cpg.call("\\+=").l + call.length shouldBe 1 + call.head.name shouldBe "+=" + } + "have a -= call" in { + val call = fixture.cpg.call("-=").l + call.length shouldBe 1 + call.head.name shouldBe "-=" + } + "have a %= call" in { + val call = fixture.cpg.call("%=").l + call.length shouldBe 1 + call.head.name shouldBe "%=" + } + } + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/BinaryOpTest.scala b/layerSourceCode/src/test/scala/io/joern/php/BinaryOpTest.scala new file mode 100644 index 0000000..de27b04 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/BinaryOpTest.scala @@ -0,0 +1,41 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class BinaryOpTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """$a + $b; + |$a - $b; + |$a * $b; + |$a / $b; + |$a % $b; + |$a ** $b; + |$a & $b; + |$a | $b; + |$a << $b; + |$a >> $b; + |$a . $b; + |$a ?? $b; + |$a == $b; + |$a === $b; + |$a != $b; + |$a <> $b; + |$a !== $b; + |$a < $b; + |$a > $b; + |$a <= $b; + |$a <=> $b; + |$a and $b; + |$a && $b; + |$a or $b; + |$a || $b; + |$a . $b; + |$a instanceof $b; + |$a xor $b; + |""".stripMargin + ) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/ClassAndMethodDefinitionTest.scala b/layerSourceCode/src/test/scala/io/joern/php/ClassAndMethodDefinitionTest.scala new file mode 100644 index 0000000..d834820 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/ClassAndMethodDefinitionTest.scala @@ -0,0 +1,58 @@ +package io.joern.php + +import io.joern.TraversalUtils +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ClassAndMethodDefinitionTest + extends AnyWordSpec + with Matchers + with TraversalUtils { + + override val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """class test extends otherClass { + | + | private static $other = 33; + | private $attribute = 42; + | + | const CONST = 'const'; + | + | public function __construct($test) { + | $this->attribute = 23; + | } + | + | public function test($default = 42) { + | echo $this->attribute; + | } + | + |} + | + |$val = new test(33); + |$val->test(); + |test::CONST; + |""".stripMargin) + + "cpg" should { + "have one type definition" in { + val types = fixture.cpg.typeDecl.l + types.length shouldBe 1 + types.head.name shouldBe "test" + } + "have two fetch calls" in { + val fetchs = fixture.cpg.call("->").l + fetchs.length shouldBe 2 + fetchs.head.astChildren.order(0).head match { + case lhs: nodes.Identifier => lhs.name shouldBe "this" + } + fetchs.head.astChildren.order(1).head match { + case rhs: nodes.Identifier => rhs.name shouldBe "attribute" + } + } + "have a single method call" in { + + } + } + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/ConditionalTest.scala b/layerSourceCode/src/test/scala/io/joern/php/ConditionalTest.scala new file mode 100644 index 0000000..252365b --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/ConditionalTest.scala @@ -0,0 +1,49 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ConditionalTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """if($test) { + | echo 'a'; + |} elseif ($test == 3) { + | echo 3; + |} elseif ($test == 4) { + | echo 4; + |} else { + | echo 'b'; + |} + |""".stripMargin) + + "cpg" should { + "have two calls" in { + // four echo and two == (equal) + fixture.cpg.call.l.length shouldBe 6 + } + "have a three control structure" in { + // one if and two elseif + fixture.cpg.controlStructure.l.length shouldBe 3 + } + "have two control structures under the if control structure" in { + fixture.cpg.controlStructure + .code("if.*") + .astChildren + .isControlStructure + .l + .length shouldBe 2 + } + "have two code blocks under the if control structure" in { + fixture.cpg.controlStructure + .code("if.*") + .astChildren + .isBlock + .l + .length shouldBe 2 + } + } + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/CpgTestFixture.scala b/layerSourceCode/src/test/scala/io/joern/php/CpgTestFixture.scala new file mode 100644 index 0000000..b8aa306 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/CpgTestFixture.scala @@ -0,0 +1,49 @@ +package io.joern.php + +import better.files._ +import io.joern.php.passes._ +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.IntervalKeyPool +import io.shiftleft.x2cpg.SourceFiles +import overflowdb.Node + +import scala.jdk.CollectionConverters._ + +case class CpgTestFixture(projectName: String) + extends io.joern.AbstractCpgTestFixture { + import CpgTestFixture._ + + private val dirName: String = + String.format("layerSourceCode/resources/unittesting/testprojects/%s", + projectName) + val filenames: List[String] = SourceFiles.determine(Set(dirName), Set(".php")) + override var cpg: Cpg = cpgForDir(filenames) + + def V: Iterator[Node] = cpg.graph.V.asScala + +} + +case class CpgFromCodeTestFixture(code: String) + extends io.joern.AbstractCpgTestFixture { + import CpgTestFixture._ + + override var cpg: Cpg = _ + File.usingTemporaryDirectory("php2cpg") { dir => + (dir / "test.php").write("") + val dirname = dir.path.toAbsolutePath.toString + val filenames: List[String] = + SourceFiles.determine(Set(dirname), Set(".php")) + cpg = cpgForDir(filenames) + } +} + +object CpgTestFixture { + + def cpgForDir(filenames: List[String]): Cpg = { + val cpg: Cpg = Cpg.emptyCpg + val keyPool = new IntervalKeyPool(1001, 2000) + new AstCreationPass(filenames, cpg, keyPool).createAndApply() + cpg + } + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/ForeachTest.scala b/layerSourceCode/src/test/scala/io/joern/php/ForeachTest.scala new file mode 100644 index 0000000..d8f3cf0 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/ForeachTest.scala @@ -0,0 +1,19 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class ForeachTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """ + |foreach($array as $key => $value) { + | echo $key; + |} + |foreach($array as $value) { + | echo $x; + |} + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/FunctionDefinitionTest.scala b/layerSourceCode/src/test/scala/io/joern/php/FunctionDefinitionTest.scala new file mode 100644 index 0000000..bd764bf --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/FunctionDefinitionTest.scala @@ -0,0 +1,35 @@ +package io.joern.php + +import io.joern.TraversalUtils +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class FunctionDefinitionTest + extends AnyWordSpec + with Matchers + with TraversalUtils { + + override val fixture = CpgFromCodeTestFixture("""function test($var) { + | print_r($var); + | return $var + $var; + |} + |""".stripMargin) + + "cpg" should { + "contain a single method called test" in { + fixture.cpg.method("test").l.length shouldBe 1 + } + "that method should have 2 substatements" in { + fixture.cpg.method("test").astChildren.isBlock.l.length shouldBe 1 + fixture.cpg + .method("test") + .astChildren + .isBlock + .head + .astChildren + .l + .length shouldBe 2 + } + } +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/NameSpaceTest.scala b/layerSourceCode/src/test/scala/io/joern/php/NameSpaceTest.scala new file mode 100644 index 0000000..3e70a50 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/NameSpaceTest.scala @@ -0,0 +1,126 @@ +package io.joern.php + +import io.joern.TraversalUtils +import io.shiftleft.codepropertygraph.generated.nodes +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class NameSpaceTest extends AnyWordSpec with Matchers with TraversalUtils { + + override val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture( + """ + |namespace test\test\test; + |use otherNameSpace\test as other; + |use function name\space\func as func; + |use const name\space\constant; + |name\space\function(); + |\PleskX\Api\Client::RESPONSE_FULL; + |""".stripMargin) + + "cpg" should { + "have a single file" in { + fixture.cpg.file.l.length shouldBe 1 + } + "have a single name space with a single child with 5 children" in { + fixture.cpg + .namespaceBlock("test\\test\\test".replace("\\", "\\\\")) + .l + .length shouldBe 1 + fixture.cpg + .namespaceBlock("test\\test\\test".replace("\\", "\\\\")) + .astChildren + .l + .length shouldBe 1 + fixture.cpg + .namespaceBlock("test\\test\\test".replace("\\", "\\\\")) + .astChildren + .astChildren + .l + .length shouldBe 5 + } + "has three use calls" in { + fixture.cpg.call("use").l.length shouldBe 3 + } + "the first one should be a regular with alias" in { + val call = + fixture.cpg.call("use").l.sortWith((a, b) => a.order < b.order).head + call.astChildren.l.length shouldBe 3 + val children = call.astChildren.l.sortWith((a, b) => a.order < b.order) + children.head match { + case child: nodes.Literal => + child.code shouldBe "general" + child.typeFullName shouldBe "keyword" + } + children(1) match { + case child: nodes.Literal => + child.code shouldBe "otherNameSpace\\test" + child.typeFullName shouldBe "namespaceName" + } + children(2) match { + case child: nodes.Identifier => + child.name shouldBe "other" + } + } + + "the second one should be a function with alias" in { + val call = + fixture.cpg.call("use").l.sortWith((a, b) => a.order < b.order).apply(1) + call.astChildren.l.length shouldBe 3 + val children = call.astChildren.l.sortWith((a, b) => a.order < b.order) + children.head match { + case child: nodes.Literal => + child.code shouldBe "function" + child.typeFullName shouldBe "keyword" + } + children(1) match { + case child: nodes.Literal => + child.code shouldBe "name\\space\\func" + child.typeFullName shouldBe "namespaceName" + } + children(2) match { + case child: nodes.Identifier => + child.name shouldBe "func" + } + } + + "the third one should be a const without alias" in { + val call = + fixture.cpg.call("use").l.sortWith((a, b) => a.order < b.order).apply(2) + call.astChildren.l.length shouldBe 2 + val children = call.astChildren.l.sortWith((a, b) => a.order < b.order) + children.head match { + case child: nodes.Literal => + child.code shouldBe "const" + child.typeFullName shouldBe "keyword" + } + children(1) match { + case child: nodes.Literal => + child.code shouldBe "name\\space\\constant" + child.typeFullName shouldBe "namespaceName" + } + } + + "have a single call to name\\space\\function" in { + fixture.cpg + .call("name\\space\\function".replace("\\", "\\\\")) + .l + .length shouldBe 1 + } + + "have a single const fetch" in { + val const = fixture.cpg.call("classConstFetch").l.head + const.name shouldBe "classConstFetch" + const.astChildren.l.length shouldBe 2 + const.astChildren.order(0).l.head match { + case node: nodes.Literal => + node.code shouldBe "\\PleskX\\Api\\Client" + node.typeFullName shouldBe "fullyQualifiedName" + } + const.astChildren.order(1).head match { + case node: nodes.Identifier => + node.name shouldBe "RESPONSE_FULL" + } + } + } +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/PropertyFetchTest.scala b/layerSourceCode/src/test/scala/io/joern/php/PropertyFetchTest.scala new file mode 100644 index 0000000..9e0c4fe --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/PropertyFetchTest.scala @@ -0,0 +1,15 @@ +package io.joern.php + +import io.joern.TraversalUtils +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class PropertyFetchTest extends AnyWordSpec with Matchers with TraversalUtils { + + override val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture(""" + |static::$value; + |static::$value->function(); + |$value::property; + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/SpecialExprTest.scala b/layerSourceCode/src/test/scala/io/joern/php/SpecialExprTest.scala new file mode 100644 index 0000000..f8269d5 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/SpecialExprTest.scala @@ -0,0 +1,22 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class SpecialExprTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """ `ls -la`; + |empty($test); + |function def(int $test) {} + |"test ${var}"; + |['id' => 1, + | 'title' => 2 ]; + | function($test) use ($test) { + | echo $test; + | }; + |$a ? $b : $c; + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/SwitchCaseTest.scala b/layerSourceCode/src/test/scala/io/joern/php/SwitchCaseTest.scala new file mode 100644 index 0000000..9f3aea9 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/SwitchCaseTest.scala @@ -0,0 +1,41 @@ +package io.joern.php + +import io.joern.TraversalUtils +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class SwitchCaseTest extends AnyWordSpec with Matchers with TraversalUtils { + + override val fixture = CpgFromCodeTestFixture("""switch($i) { + | case 1: + | echo "test"; + | break; + | case 2: + | echo "otherTest"; + | case true: + | echo "True"; + | break; + | default: + | echo "default"; + |} + |""".stripMargin) + + "cpg" should { + "have no function" in { + fixture.cpg.method.l.length shouldBe 0 + } + "have a single switch" in { + fixture.cpg.controlStructure.code("switch.*").l.length shouldBe 1 + } + "the switch should have 4 case children" in { + fixture.cpg.controlStructure + .code("switch.*") + .astChildren + .isControlStructure + .code("case.*") + .l + .length shouldBe 4 + } + } +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/TryCatchTest.scala b/layerSourceCode/src/test/scala/io/joern/php/TryCatchTest.scala new file mode 100644 index 0000000..14a5e25 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/TryCatchTest.scala @@ -0,0 +1,19 @@ +package io.joern.php + +import io.joern.TraversalUtils +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class TryCatchTest extends AnyWordSpec with Matchers with TraversalUtils { + + override val fixture: CpgFromCodeTestFixture = CpgFromCodeTestFixture(""" + |try { + | echo "20"; + |} catch (Exception $e) { + | echo $e; + |} finally { + | echo "finally"; + |} + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/TypeCastTest.scala b/layerSourceCode/src/test/scala/io/joern/php/TypeCastTest.scala new file mode 100644 index 0000000..9b18e11 --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/TypeCastTest.scala @@ -0,0 +1,14 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class TypeCastTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture(""" + |(string)42; + |(int)3.0; + |""".stripMargin) + +} diff --git a/layerSourceCode/src/test/scala/io/joern/php/UnaryOpTest.scala b/layerSourceCode/src/test/scala/io/joern/php/UnaryOpTest.scala new file mode 100644 index 0000000..8672f0e --- /dev/null +++ b/layerSourceCode/src/test/scala/io/joern/php/UnaryOpTest.scala @@ -0,0 +1,24 @@ +package io.joern.php + +import io.joern.{AbstractCpgTestFixture, TraversalUtils} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class UnaryOpTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: AbstractCpgTestFixture = CpgFromCodeTestFixture( + """!$test; + |-$test; + |+$test; + |~$test; + |++$test; + |--$test; + |$test++; + |$test--; + |@test(); + |""".stripMargin + ) + + //@test() + //`ls -la` +} diff --git a/layerSourceCode/src/test/scala/parser/php/FileParserTest.scala b/layerSourceCode/src/test/scala/parser/php/FileParserTest.scala new file mode 100644 index 0000000..dc7a2de --- /dev/null +++ b/layerSourceCode/src/test/scala/parser/php/FileParserTest.scala @@ -0,0 +1,25 @@ +package parser.php + +import io.joern.TraversalUtils +import io.joern.php.CpgTestFixture +import io.shiftleft.semanticcpg.language._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class FileParserTest extends AnyWordSpec with Matchers with TraversalUtils { + + val fixture: CpgTestFixture = CpgTestFixture("onlyMainCreation") + + "project" should { + "have a single file" in { + fixture.cpg.file.l.length shouldBe 1 + } + "have a single namespaceblock" in { + fixture.cpg.namespaceBlock.l.length shouldBe 1 + } + "that have 1 phpinfo call" in { + fixture.cpg.call.l.length shouldBe 3 + fixture.cpg.call("phpinfo").l.length shouldBe 1 + } + } +} diff --git a/main.conf.default b/main.conf.default new file mode 100644 index 0000000..f7ddc72 --- /dev/null +++ b/main.conf.default @@ -0,0 +1,37 @@ +cpg { + output = "out.cpg", + export = "out.dot", + files = [".php", ".inc"], + strictLinking = true, + strictParsing = false, + report = true, + forced = false, + activePasses = [ + "MetaData", + "MethodsCreation", + "TypeCreation", + "CfgIntraBBCreation", + "CfgInterBBCreation", + "DeleteUnreachableCode", + "Dominator", + "PostDominator", + "DataDependency", + "DeleteEmptyOpcodes", + "DeleteEmptyMethods", + "Inheritance", + "CreateStubMethods", + "CallFinishing", + "NamespaceMemberCreation", + "LocalIdentification"], + php { + 7 { + interpreter = "/opt/php7-sp/bin/php" + phpini = "/opt/php7-sp/etc/" + } + 8 { + interpreter = "/opt/php8-sp/bin/php" + phpini = "/opt/php8-sp/etc/" + } + + } +} \ No newline at end of file diff --git a/php2cpg b/php2cpg new file mode 100755 index 0000000..30d4985 --- /dev/null +++ b/php2cpg @@ -0,0 +1,5 @@ +#!/bin/bash + +SCRIPT_ABS_PATH=$(readlink -f "$0") +SCRIPT_ABS_DIR=$(dirname $SCRIPT_ABS_PATH) +JAVA_OPTS='-Xmx20g -Xss30m -XX:+UnlockDiagnosticVMOptions -XX:+ExitOnOutOfMemoryError -XX:AbortVMOnException=java.lang.StackOverflowError' $SCRIPT_ABS_DIR/target/universal/stage/bin/multilayer-php-cpg-generator -- $@ diff --git a/project/Projects.scala b/project/Projects.scala new file mode 100644 index 0000000..0cc258e --- /dev/null +++ b/project/Projects.scala @@ -0,0 +1,6 @@ +import sbt._ + +class Projects { + lazy val sourcecode = project.in(file("layerSourceCode")) + lazy val bytecode = project.in(file("layerByteCode")) +} diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..abbbce5 --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.9.8 diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 0000000..b404317 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,12 @@ +dependencyOverrides += "com.puppycrawl.tools" % "checkstyle" % "7.3" +addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") +//addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.1.0") +addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0") +addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.5") +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2") +addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.8.0") +addSbtPlugin("com.github.sbt" % "sbt-findbugs" % "2.0.0") +addSbtPlugin("io.shiftleft" % "sbt-ci-release-early" % "1.2.2") +addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") +addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "1.5.1") +addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.18-1") \ No newline at end of file diff --git a/resources/docker/PHP-StringPatched/Dockerfile b/resources/docker/PHP-StringPatched/Dockerfile new file mode 100644 index 0000000..a632e8c --- /dev/null +++ b/resources/docker/PHP-StringPatched/Dockerfile @@ -0,0 +1,44 @@ +FROM ubuntu:latest as patchedphp + +ENV SCALA_VERSION="2.13.6" +ENV SBT_VERSION="1.7.0" + +RUN apt-get update && apt-get -y install gnupg ubuntu-keyring && apt-key update && apt-get update && apt-get install -y \ + locales \ + pkg-config build-essential autoconf bison re2c libxml2-dev libsqlite3-dev \ + apt-transport-https curl \ + git \ + openjdk-17-jdk openjdk-17-jre \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ + && echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | tee /etc/apt/sources.list.d/sbt.list \ + && echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | tee /etc/apt/sources.list.d/sbt_old.list \ + && curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import \ + && chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg \ + && apt-get update \ + && apt-get install sbt \ + && rm -rf /var/lib/apt/lists/* \ +ENV LANG en_US.utf8 + +WORKDIR / +RUN git clone --depth 1 https://github.com/simkoc/php-src.git -b PHP-7.4.27 php-src-7 \ + && cd php-src-7 \ + && ./buildconf \ + && ./configure --prefix /opt/php-7/ \ + && make -j$(nproc) install \ + && mkdir /opt/php-7/etc/ \ + && cp php.ini-production /opt/php-7/etc/php.ini \ + && sed -i 's/;extension_dir = ".\/"/extension_dir = "\/opt\/php-7\/lib\/php\/extensions\/no-debug-non-zts-20190902\/"/' /opt/php-7/etc/php.ini \ + && sed -i 's/;zend_extension/zend_extension/' /opt/php-7/etc/php.ini \ + && rm -rf /php-src-7 + +WORKDIR / +RUN git clone --depth 1 https://github.com/simkoc/php-src.git -b PHP-8.2.2 php-src-8 \ + && cd php-src-8 \ + && ./buildconf \ + && ./configure --prefix /opt/php-8/ \ + && make -j$(nproc) install \ + && mkdir /opt/php-8/etc/ \ + && cp php.ini-production /opt/php-8/etc/php.ini \ + && sed -i 's/;extension_dir = ".\/"/extension_dir = "\/opt\/php-8\/lib\/php\/extensions\/no-debug-non-zts-20220829\/"/' /opt/php-8/etc/php.ini \ + && sed -i 's/;zend_extension/zend_extension/' /opt/php-8/etc/php.ini \ + && rm -rf /php-src-8 diff --git a/resources/docker/PHP-StringPatched/create.sh b/resources/docker/PHP-StringPatched/create.sh new file mode 100755 index 0000000..3dc8e70 --- /dev/null +++ b/resources/docker/PHP-StringPatched/create.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker build -t patchedphp --no-cache . diff --git a/resources/docker/multilayer-php-cpg/Dockerfile b/resources/docker/multilayer-php-cpg/Dockerfile new file mode 100644 index 0000000..e163606 --- /dev/null +++ b/resources/docker/multilayer-php-cpg/Dockerfile @@ -0,0 +1,16 @@ +FROM patchedphp:latest as multilayer-php-cpg + +ARG BRANCH +WORKDIR / +RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts +RUN mkdir project +RUN mkdir out +ENV CLONE_URL="git@github.com:simkoc/php-cpg.git" +RUN --mount=type=ssh git clone ${CLONE_URL} +WORKDIR php-cpg +RUN git checkout ${BRANCH} +COPY main.conf . +RUN sbt "project bytecode" test +RUN sbt stage +RUN sbt publishLocal +RUN rm -rf /php-cpg/layerByteCode/src && rm -rf /php-cpg/.git \ No newline at end of file diff --git a/resources/docker/multilayer-php-cpg/clean.sh b/resources/docker/multilayer-php-cpg/clean.sh new file mode 100755 index 0000000..9f64ece --- /dev/null +++ b/resources/docker/multilayer-php-cpg/clean.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +docker container prune +docker image rm multilayer-cpg-php:latest +docker image prune \ No newline at end of file diff --git a/resources/docker/multilayer-php-cpg/create.sh b/resources/docker/multilayer-php-cpg/create.sh new file mode 100755 index 0000000..db6b2c6 --- /dev/null +++ b/resources/docker/multilayer-php-cpg/create.sh @@ -0,0 +1,3 @@ +#/bin/bash +branch=${1:-"master"} + DOCKER_BUILDKIT=1 docker build --ssh default --no-cache --build-arg BRANCH=${branch} -t multilayer-cpg-php . \ No newline at end of file diff --git a/resources/docker/multilayer-php-cpg/publish.sh b/resources/docker/multilayer-php-cpg/publish.sh new file mode 100755 index 0000000..368e8cc --- /dev/null +++ b/resources/docker/multilayer-php-cpg/publish.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#/bin/bash +branch=${1:-"master"} + DOCKER_BUILDKIT=1 docker build --squash --ssh default --no-cache --build-arg BRANCH=${branch} -t multilayer-cpg-php-squashed . \ No newline at end of file diff --git a/resources/docker/multilayer-php-cpg/run.sh b/resources/docker/multilayer-php-cpg/run.sh new file mode 100755 index 0000000..2d7c632 --- /dev/null +++ b/resources/docker/multilayer-php-cpg/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash +if [ $# -lt 2 ] + then + echo "./run.sh {outname } ..." + exit 1 +fi + +project=$1 +outfolder=$2 +version=$3 +outname=${4:-"out.cpg"} +rem=$# +#remaining ${@:3} +docker run -v ${project}:/project/ \ + -v ${outfolder}:/out/ \ + multilayer-cpg-php \ + /project/ -o /out/${outname} bytecode ${version} diff --git a/resources/php7.ini b/resources/php7.ini new file mode 100644 index 0000000..12d84e0 --- /dev/null +++ b/resources/php7.ini @@ -0,0 +1,1949 @@ +[PHP] + +;;;;;;;;;;;;;;;;;;; +; About php.ini ; +;;;;;;;;;;;;;;;;;;; +; PHP's initialization file, generally called php.ini, is responsible for +; configuring many of the aspects of PHP's behavior. + +; PHP attempts to find and load this configuration from a number of locations. +; The following is a summary of its search order: +; 1. SAPI module specific location. +; 2. The PHPRC environment variable. (As of PHP 5.2.0) +; 3. A number of predefined registry keys on Windows (As of PHP 5.2.0) +; 4. Current working directory (except CLI) +; 5. The web server's directory (for SAPI modules), or directory of PHP +; (otherwise in Windows) +; 6. The directory from the --with-config-file-path compile time option, or the +; Windows directory (usually C:\windows) +; See the PHP docs for more specific information. +; http://php.net/configuration.file + +; The syntax of the file is extremely simple. Whitespace and lines +; beginning with a semicolon are silently ignored (as you probably guessed). +; Section headers (e.g. [Foo]) are also silently ignored, even though +; they might mean something in the future. + +; Directives following the section heading [PATH=/www/mysite] only +; apply to PHP files in the /www/mysite directory. Directives +; following the section heading [HOST=www.example.com] only apply to +; PHP files served from www.example.com. Directives set in these +; special sections cannot be overridden by user-defined INI files or +; at runtime. Currently, [PATH=] and [HOST=] sections only work under +; CGI/FastCGI. +; http://php.net/ini.sections + +; Directives are specified using the following syntax: +; directive = value +; Directive names are *case sensitive* - foo=bar is different from FOO=bar. +; Directives are variables used to configure PHP or PHP extensions. +; There is no name validation. If PHP can't find an expected +; directive because it is not set or is mistyped, a default value will be used. + +; The value can be a string, a number, a PHP constant (e.g. E_ALL or M_PI), one +; of the INI constants (On, Off, True, False, Yes, No and None) or an expression +; (e.g. E_ALL & ~E_NOTICE), a quoted string ("bar"), or a reference to a +; previously set variable or directive (e.g. ${foo}) + +; Expressions in the INI file are limited to bitwise operators and parentheses: +; | bitwise OR +; ^ bitwise XOR +; & bitwise AND +; ~ bitwise NOT +; ! boolean NOT + +; Boolean flags can be turned on using the values 1, On, True or Yes. +; They can be turned off using the values 0, Off, False or No. + +; An empty string can be denoted by simply not writing anything after the equal +; sign, or by using the None keyword: + +; foo = ; sets foo to an empty string +; foo = None ; sets foo to an empty string +; foo = "None" ; sets foo to the string 'None' + +; If you use constants in your value, and these constants belong to a +; dynamically loaded extension (either a PHP extension or a Zend extension), +; you may only use these constants *after* the line that loads the extension. + +;;;;;;;;;;;;;;;;;;; +; About this file ; +;;;;;;;;;;;;;;;;;;; +; PHP comes packaged with two INI files. One that is recommended to be used +; in production environments and one that is recommended to be used in +; development environments. + +; php.ini-production contains settings which hold security, performance and +; best practices at its core. But please be aware, these settings may break +; compatibility with older or less security conscience applications. We +; recommending using the production ini in production and testing environments. + +; php.ini-development is very similar to its production variant, except it is +; much more verbose when it comes to errors. We recommend using the +; development version only in development environments, as errors shown to +; application users can inadvertently leak otherwise secure information. + +; This is the php.ini-development INI file. + +;;;;;;;;;;;;;;;;;;; +; Quick Reference ; +;;;;;;;;;;;;;;;;;;; +; The following are all the settings which are different in either the production +; or development versions of the INIs with respect to PHP's default behavior. +; Please see the actual settings later in the document for more details as to why +; we recommend these changes in PHP's behavior. + +; display_errors +; Default Value: On +; Development Value: On +; Production Value: Off + +; display_startup_errors +; Default Value: Off +; Development Value: On +; Production Value: Off + +; error_reporting +; Default Value: E_ALL & ~E_NOTICE & ~E_STRICT & ~E_DEPRECATED +; Development Value: E_ALL +; Production Value: E_ALL & ~E_DEPRECATED & ~E_STRICT + +; log_errors +; Default Value: Off +; Development Value: On +; Production Value: On + +; max_input_time +; Default Value: -1 (Unlimited) +; Development Value: 60 (60 seconds) +; Production Value: 60 (60 seconds) + +; output_buffering +; Default Value: Off +; Development Value: 4096 +; Production Value: 4096 + +; register_argc_argv +; Default Value: On +; Development Value: Off +; Production Value: Off + +; request_order +; Default Value: None +; Development Value: "GP" +; Production Value: "GP" + +; session.gc_divisor +; Default Value: 100 +; Development Value: 1000 +; Production Value: 1000 + +; session.sid_bits_per_character +; Default Value: 4 +; Development Value: 5 +; Production Value: 5 + +; short_open_tag +; Default Value: On +; Development Value: Off +; Production Value: Off + +; variables_order +; Default Value: "EGPCS" +; Development Value: "GPCS" +; Production Value: "GPCS" + +;;;;;;;;;;;;;;;;;;;; +; php.ini Options ; +;;;;;;;;;;;;;;;;;;;; +; Name for user-defined php.ini (.htaccess) files. Default is ".user.ini" +;user_ini.filename = ".user.ini" + +; To disable this feature set this option to an empty value +;user_ini.filename = + +; TTL for user-defined php.ini files (time-to-live) in seconds. Default is 300 seconds (5 minutes) +;user_ini.cache_ttl = 300 + +;;;;;;;;;;;;;;;;;;;; +; Language Options ; +;;;;;;;;;;;;;;;;;;;; + +; Enable the PHP scripting language engine under Apache. +; http://php.net/engine +engine = On + +; This directive determines whether or not PHP will recognize code between +; tags as PHP source which should be processed as such. It is +; generally recommended that should be used and that this feature +; should be disabled, as enabling it may result in issues when generating XML +; documents, however this remains supported for backward compatibility reasons. +; Note that this directive does not control the would work. +; http://php.net/syntax-highlighting +;highlight.string = #DD0000 +;highlight.comment = #FF9900 +;highlight.keyword = #007700 +;highlight.default = #0000BB +;highlight.html = #000000 + +; If enabled, the request will be allowed to complete even if the user aborts +; the request. Consider enabling it if executing long requests, which may end up +; being interrupted by the user or a browser timing out. PHP's default behavior +; is to disable this feature. +; http://php.net/ignore-user-abort +;ignore_user_abort = On + +; Determines the size of the realpath cache to be used by PHP. This value should +; be increased on systems where PHP opens many files to reflect the quantity of +; the file operations performed. +; Note: if open_basedir is set, the cache is disabled +; http://php.net/realpath-cache-size +;realpath_cache_size = 4096k + +; Duration of time, in seconds for which to cache realpath information for a given +; file or directory. For systems with rarely changing files, consider increasing this +; value. +; http://php.net/realpath-cache-ttl +;realpath_cache_ttl = 120 + +; Enables or disables the circular reference collector. +; http://php.net/zend.enable-gc +zend.enable_gc = On + +; If enabled, scripts may be written in encodings that are incompatible with +; the scanner. CP936, Big5, CP949 and Shift_JIS are the examples of such +; encodings. To use this feature, mbstring extension must be enabled. +; Default: Off +;zend.multibyte = Off + +; Allows to set the default encoding for the scripts. This value will be used +; unless "declare(encoding=...)" directive appears at the top of the script. +; Only affects if zend.multibyte is set. +; Default: "" +;zend.script_encoding = + +; Allows to include or exclude arguments from stack traces generated for exceptions. +; In production, it is recommended to turn this setting on to prohibit the output +; of sensitive information in stack traces +; Default: Off +zend.exception_ignore_args = Off + +;;;;;;;;;;;;;;;;; +; Miscellaneous ; +;;;;;;;;;;;;;;;;; + +; Decides whether PHP may expose the fact that it is installed on the server +; (e.g. by adding its signature to the Web server header). It is no security +; threat in any way, but it makes it possible to determine whether you use PHP +; on your server or not. +; http://php.net/expose-php +expose_php = On + +;;;;;;;;;;;;;;;;;;; +; Resource Limits ; +;;;;;;;;;;;;;;;;;;; + +; Maximum execution time of each script, in seconds +; http://php.net/max-execution-time +; Note: This directive is hardcoded to 0 for the CLI SAPI +max_execution_time = 30 + +; Maximum amount of time each script may spend parsing request data. It's a good +; idea to limit this time on productions servers in order to eliminate unexpectedly +; long running scripts. +; Note: This directive is hardcoded to -1 for the CLI SAPI +; Default Value: -1 (Unlimited) +; Development Value: 60 (60 seconds) +; Production Value: 60 (60 seconds) +; http://php.net/max-input-time +max_input_time = 60 + +; Maximum input variable nesting level +; http://php.net/max-input-nesting-level +;max_input_nesting_level = 64 + +; How many GET/POST/COOKIE input variables may be accepted +;max_input_vars = 1000 + +; Maximum amount of memory a script may consume +; http://php.net/memory-limit +memory_limit = 128M + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Error handling and logging ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This directive informs PHP of which errors, warnings and notices you would like +; it to take action for. The recommended way of setting values for this +; directive is through the use of the error level constants and bitwise +; operators. The error level constants are below here for convenience as well as +; some common settings and their meanings. +; By default, PHP is set to take action on all errors, notices and warnings EXCEPT +; those related to E_NOTICE and E_STRICT, which together cover best practices and +; recommended coding standards in PHP. For performance reasons, this is the +; recommend error reporting setting. Your production server shouldn't be wasting +; resources complaining about best practices and coding standards. That's what +; development servers and development settings are for. +; Note: The php.ini-development file has this setting as E_ALL. This +; means it pretty much reports everything which is exactly what you want during +; development and early testing. +; +; Error Level Constants: +; E_ALL - All errors and warnings (includes E_STRICT as of PHP 5.4.0) +; E_ERROR - fatal run-time errors +; E_RECOVERABLE_ERROR - almost fatal run-time errors +; E_WARNING - run-time warnings (non-fatal errors) +; E_PARSE - compile-time parse errors +; E_NOTICE - run-time notices (these are warnings which often result +; from a bug in your code, but it's possible that it was +; intentional (e.g., using an uninitialized variable and +; relying on the fact it is automatically initialized to an +; empty string) +; E_STRICT - run-time notices, enable to have PHP suggest changes +; to your code which will ensure the best interoperability +; and forward compatibility of your code +; E_CORE_ERROR - fatal errors that occur during PHP's initial startup +; E_CORE_WARNING - warnings (non-fatal errors) that occur during PHP's +; initial startup +; E_COMPILE_ERROR - fatal compile-time errors +; E_COMPILE_WARNING - compile-time warnings (non-fatal errors) +; E_USER_ERROR - user-generated error message +; E_USER_WARNING - user-generated warning message +; E_USER_NOTICE - user-generated notice message +; E_DEPRECATED - warn about code that will not work in future versions +; of PHP +; E_USER_DEPRECATED - user-generated deprecation warnings +; +; Common Values: +; E_ALL (Show all errors, warnings and notices including coding standards.) +; E_ALL & ~E_NOTICE (Show all errors, except for notices) +; E_ALL & ~E_NOTICE & ~E_STRICT (Show all errors, except for notices and coding standards warnings.) +; E_COMPILE_ERROR|E_RECOVERABLE_ERROR|E_ERROR|E_CORE_ERROR (Show only errors) +; Default Value: E_ALL & ~E_NOTICE & ~E_STRICT & ~E_DEPRECATED +; Development Value: E_ALL +; Production Value: E_ALL & ~E_DEPRECATED & ~E_STRICT +; http://php.net/error-reporting +error_reporting = E_ALL + +; This directive controls whether or not and where PHP will output errors, +; notices and warnings too. Error output is very useful during development, but +; it could be very dangerous in production environments. Depending on the code +; which is triggering the error, sensitive information could potentially leak +; out of your application such as database usernames and passwords or worse. +; For production environments, we recommend logging errors rather than +; sending them to STDOUT. +; Possible Values: +; Off = Do not display any errors +; stderr = Display errors to STDERR (affects only CGI/CLI binaries!) +; On or stdout = Display errors to STDOUT +; Default Value: On +; Development Value: On +; Production Value: Off +; http://php.net/display-errors +display_errors = On + +; The display of errors which occur during PHP's startup sequence are handled +; separately from display_errors. PHP's default behavior is to suppress those +; errors from clients. Turning the display of startup errors on can be useful in +; debugging configuration problems. We strongly recommend you +; set this to 'off' for production servers. +; Default Value: Off +; Development Value: On +; Production Value: Off +; http://php.net/display-startup-errors +display_startup_errors = On + +; Besides displaying errors, PHP can also log errors to locations such as a +; server-specific log, STDERR, or a location specified by the error_log +; directive found below. While errors should not be displayed on productions +; servers they should still be monitored and logging is a great way to do that. +; Default Value: Off +; Development Value: On +; Production Value: On +; http://php.net/log-errors +log_errors = On + +; Set maximum length of log_errors. In error_log information about the source is +; added. The default is 1024 and 0 allows to not apply any maximum length at all. +; http://php.net/log-errors-max-len +log_errors_max_len = 1024 + +; Do not log repeated messages. Repeated errors must occur in same file on same +; line unless ignore_repeated_source is set true. +; http://php.net/ignore-repeated-errors +ignore_repeated_errors = Off + +; Ignore source of message when ignoring repeated messages. When this setting +; is On you will not log errors with repeated messages from different files or +; source lines. +; http://php.net/ignore-repeated-source +ignore_repeated_source = Off + +; If this parameter is set to Off, then memory leaks will not be shown (on +; stdout or in the log). This is only effective in a debug compile, and if +; error reporting includes E_WARNING in the allowed list +; http://php.net/report-memleaks +report_memleaks = On + +; This setting is on by default. +;report_zend_debug = 0 + +; Store the last error/warning message in $php_errormsg (boolean). Setting this value +; to On can assist in debugging and is appropriate for development servers. It should +; however be disabled on production servers. +; This directive is DEPRECATED. +; Default Value: Off +; Development Value: Off +; Production Value: Off +; http://php.net/track-errors +;track_errors = Off + +; Turn off normal error reporting and emit XML-RPC error XML +; http://php.net/xmlrpc-errors +;xmlrpc_errors = 0 + +; An XML-RPC faultCode +;xmlrpc_error_number = 0 + +; When PHP displays or logs an error, it has the capability of formatting the +; error message as HTML for easier reading. This directive controls whether +; the error message is formatted as HTML or not. +; Note: This directive is hardcoded to Off for the CLI SAPI +; http://php.net/html-errors +;html_errors = On + +; If html_errors is set to On *and* docref_root is not empty, then PHP +; produces clickable error messages that direct to a page describing the error +; or function causing the error in detail. +; You can download a copy of the PHP manual from http://php.net/docs +; and change docref_root to the base URL of your local copy including the +; leading '/'. You must also specify the file extension being used including +; the dot. PHP's default behavior is to leave these settings empty, in which +; case no links to documentation are generated. +; Note: Never use this feature for production boxes. +; http://php.net/docref-root +; Examples +;docref_root = "/phpmanual/" + +; http://php.net/docref-ext +;docref_ext = .html + +; String to output before an error message. PHP's default behavior is to leave +; this setting blank. +; http://php.net/error-prepend-string +; Example: +;error_prepend_string = "" + +; String to output after an error message. PHP's default behavior is to leave +; this setting blank. +; http://php.net/error-append-string +; Example: +;error_append_string = "" + +; Log errors to specified file. PHP's default behavior is to leave this value +; empty. +; http://php.net/error-log +; Example: +;error_log = php_errors.log +; Log errors to syslog (Event Log on Windows). +;error_log = syslog + +; The syslog ident is a string which is prepended to every message logged +; to syslog. Only used when error_log is set to syslog. +;syslog.ident = php + +; The syslog facility is used to specify what type of program is logging +; the message. Only used when error_log is set to syslog. +;syslog.facility = user + +; Set this to disable filtering control characters (the default). +; Some loggers only accept NVT-ASCII, others accept anything that's not +; control characters. If your logger accepts everything, then no filtering +; is needed at all. +; Allowed values are: +; ascii (all printable ASCII characters and NL) +; no-ctrl (all characters except control characters) +; all (all characters) +; raw (like "all", but messages are not split at newlines) +; http://php.net/syslog.filter +;syslog.filter = ascii + +;windows.show_crt_warning +; Default value: 0 +; Development value: 0 +; Production value: 0 + +;;;;;;;;;;;;;;;;; +; Data Handling ; +;;;;;;;;;;;;;;;;; + +; The separator used in PHP generated URLs to separate arguments. +; PHP's default setting is "&". +; http://php.net/arg-separator.output +; Example: +;arg_separator.output = "&" + +; List of separator(s) used by PHP to parse input URLs into variables. +; PHP's default setting is "&". +; NOTE: Every character in this directive is considered as separator! +; http://php.net/arg-separator.input +; Example: +;arg_separator.input = ";&" + +; This directive determines which super global arrays are registered when PHP +; starts up. G,P,C,E & S are abbreviations for the following respective super +; globals: GET, POST, COOKIE, ENV and SERVER. There is a performance penalty +; paid for the registration of these arrays and because ENV is not as commonly +; used as the others, ENV is not recommended on productions servers. You +; can still get access to the environment variables through getenv() should you +; need to. +; Default Value: "EGPCS" +; Development Value: "GPCS" +; Production Value: "GPCS"; +; http://php.net/variables-order +variables_order = "GPCS" + +; This directive determines which super global data (G,P & C) should be +; registered into the super global array REQUEST. If so, it also determines +; the order in which that data is registered. The values for this directive +; are specified in the same manner as the variables_order directive, +; EXCEPT one. Leaving this value empty will cause PHP to use the value set +; in the variables_order directive. It does not mean it will leave the super +; globals array REQUEST empty. +; Default Value: None +; Development Value: "GP" +; Production Value: "GP" +; http://php.net/request-order +request_order = "GP" + +; This directive determines whether PHP registers $argv & $argc each time it +; runs. $argv contains an array of all the arguments passed to PHP when a script +; is invoked. $argc contains an integer representing the number of arguments +; that were passed when the script was invoked. These arrays are extremely +; useful when running scripts from the command line. When this directive is +; enabled, registering these variables consumes CPU cycles and memory each time +; a script is executed. For performance reasons, this feature should be disabled +; on production servers. +; Note: This directive is hardcoded to On for the CLI SAPI +; Default Value: On +; Development Value: Off +; Production Value: Off +; http://php.net/register-argc-argv +register_argc_argv = Off + +; When enabled, the ENV, REQUEST and SERVER variables are created when they're +; first used (Just In Time) instead of when the script starts. If these +; variables are not used within a script, having this directive on will result +; in a performance gain. The PHP directive register_argc_argv must be disabled +; for this directive to have any effect. +; http://php.net/auto-globals-jit +auto_globals_jit = On + +; Whether PHP will read the POST data. +; This option is enabled by default. +; Most likely, you won't want to disable this option globally. It causes $_POST +; and $_FILES to always be empty; the only way you will be able to read the +; POST data will be through the php://input stream wrapper. This can be useful +; to proxy requests or to process the POST data in a memory efficient fashion. +; http://php.net/enable-post-data-reading +;enable_post_data_reading = Off + +; Maximum size of POST data that PHP will accept. +; Its value may be 0 to disable the limit. It is ignored if POST data reading +; is disabled through enable_post_data_reading. +; http://php.net/post-max-size +post_max_size = 8M + +; Automatically add files before PHP document. +; http://php.net/auto-prepend-file +auto_prepend_file = + +; Automatically add files after PHP document. +; http://php.net/auto-append-file +auto_append_file = + +; By default, PHP will output a media type using the Content-Type header. To +; disable this, simply set it to be empty. +; +; PHP's built-in default media type is set to text/html. +; http://php.net/default-mimetype +default_mimetype = "text/html" + +; PHP's default character set is set to UTF-8. +; http://php.net/default-charset +default_charset = "UTF-8" + +; PHP internal character encoding is set to empty. +; If empty, default_charset is used. +; http://php.net/internal-encoding +;internal_encoding = + +; PHP input character encoding is set to empty. +; If empty, default_charset is used. +; http://php.net/input-encoding +;input_encoding = + +; PHP output character encoding is set to empty. +; If empty, default_charset is used. +; See also output_buffer. +; http://php.net/output-encoding +;output_encoding = + +;;;;;;;;;;;;;;;;;;;;;;;;; +; Paths and Directories ; +;;;;;;;;;;;;;;;;;;;;;;;;; + +; UNIX: "/path1:/path2" +;include_path = ".:/php/includes" +; +; Windows: "\path1;\path2" +;include_path = ".;c:\php\includes" +; +; PHP's default setting for include_path is ".;/path/to/php/pear" +; http://php.net/include-path + +; The root of the PHP pages, used only if nonempty. +; if PHP was not compiled with FORCE_REDIRECT, you SHOULD set doc_root +; if you are running php as a CGI under any web server (other than IIS) +; see documentation for security issues. The alternate is to use the +; cgi.force_redirect configuration below +; http://php.net/doc-root +doc_root = + +; The directory under which PHP opens the script using /~username used only +; if nonempty. +; http://php.net/user-dir +user_dir = + +; Directory in which the loadable extensions (modules) reside. +; http://php.net/extension-dir +extension_dir = "/opt/php7-sp/lib/php/extensions/no-debug-non-zts-20190902/" +; On windows: +;extension_dir = "ext" + +; Directory where the temporary files should be placed. +; Defaults to the system default (see sys_get_temp_dir) +;sys_temp_dir = "/tmp" + +; Whether or not to enable the dl() function. The dl() function does NOT work +; properly in multithreaded servers, such as IIS or Zeus, and is automatically +; disabled on them. +; http://php.net/enable-dl +enable_dl = Off + +; cgi.force_redirect is necessary to provide security running PHP as a CGI under +; most web servers. Left undefined, PHP turns this on by default. You can +; turn it off here AT YOUR OWN RISK +; **You CAN safely turn this off for IIS, in fact, you MUST.** +; http://php.net/cgi.force-redirect +;cgi.force_redirect = 1 + +; if cgi.nph is enabled it will force cgi to always sent Status: 200 with +; every request. PHP's default behavior is to disable this feature. +;cgi.nph = 1 + +; if cgi.force_redirect is turned on, and you are not running under Apache or Netscape +; (iPlanet) web servers, you MAY need to set an environment variable name that PHP +; will look for to know it is OK to continue execution. Setting this variable MAY +; cause security issues, KNOW WHAT YOU ARE DOING FIRST. +; http://php.net/cgi.redirect-status-env +;cgi.redirect_status_env = + +; cgi.fix_pathinfo provides *real* PATH_INFO/PATH_TRANSLATED support for CGI. PHP's +; previous behaviour was to set PATH_TRANSLATED to SCRIPT_FILENAME, and to not grok +; what PATH_INFO is. For more information on PATH_INFO, see the cgi specs. Setting +; this to 1 will cause PHP CGI to fix its paths to conform to the spec. A setting +; of zero causes PHP to behave as before. Default is 1. You should fix your scripts +; to use SCRIPT_FILENAME rather than PATH_TRANSLATED. +; http://php.net/cgi.fix-pathinfo +;cgi.fix_pathinfo=1 + +; if cgi.discard_path is enabled, the PHP CGI binary can safely be placed outside +; of the web tree and people will not be able to circumvent .htaccess security. +;cgi.discard_path=1 + +; FastCGI under IIS supports the ability to impersonate +; security tokens of the calling client. This allows IIS to define the +; security context that the request runs under. mod_fastcgi under Apache +; does not currently support this feature (03/17/2002) +; Set to 1 if running under IIS. Default is zero. +; http://php.net/fastcgi.impersonate +;fastcgi.impersonate = 1 + +; Disable logging through FastCGI connection. PHP's default behavior is to enable +; this feature. +;fastcgi.logging = 0 + +; cgi.rfc2616_headers configuration option tells PHP what type of headers to +; use when sending HTTP response code. If set to 0, PHP sends Status: header that +; is supported by Apache. When this option is set to 1, PHP will send +; RFC2616 compliant header. +; Default is zero. +; http://php.net/cgi.rfc2616-headers +;cgi.rfc2616_headers = 0 + +; cgi.check_shebang_line controls whether CGI PHP checks for line starting with #! +; (shebang) at the top of the running script. This line might be needed if the +; script support running both as stand-alone script and via PHP CGI<. PHP in CGI +; mode skips this line and ignores its content if this directive is turned on. +; http://php.net/cgi.check-shebang-line +;cgi.check_shebang_line=1 + +;;;;;;;;;;;;;;;; +; File Uploads ; +;;;;;;;;;;;;;;;; + +; Whether to allow HTTP file uploads. +; http://php.net/file-uploads +file_uploads = On + +; Temporary directory for HTTP uploaded files (will use system default if not +; specified). +; http://php.net/upload-tmp-dir +;upload_tmp_dir = + +; Maximum allowed size for uploaded files. +; http://php.net/upload-max-filesize +upload_max_filesize = 2M + +; Maximum number of files that can be uploaded via a single request +max_file_uploads = 20 + +;;;;;;;;;;;;;;;;;; +; Fopen wrappers ; +;;;;;;;;;;;;;;;;;; + +; Whether to allow the treatment of URLs (like http:// or ftp://) as files. +; http://php.net/allow-url-fopen +allow_url_fopen = On + +; Whether to allow include/require to open URLs (like http:// or ftp://) as files. +; http://php.net/allow-url-include +allow_url_include = Off + +; Define the anonymous ftp password (your email address). PHP's default setting +; for this is empty. +; http://php.net/from +;from="john@doe.com" + +; Define the User-Agent string. PHP's default setting for this is empty. +; http://php.net/user-agent +;user_agent="PHP" + +; Default timeout for socket based streams (seconds) +; http://php.net/default-socket-timeout +default_socket_timeout = 60 + +; If your scripts have to deal with files from Macintosh systems, +; or you are running on a Mac and need to deal with files from +; unix or win32 systems, setting this flag will cause PHP to +; automatically detect the EOL character in those files so that +; fgets() and file() will work regardless of the source of the file. +; http://php.net/auto-detect-line-endings +;auto_detect_line_endings = Off + +;;;;;;;;;;;;;;;;;;;;;; +; Dynamic Extensions ; +;;;;;;;;;;;;;;;;;;;;;; + +; If you wish to have an extension loaded automatically, use the following +; syntax: +; +; extension=modulename +; +; For example: +; +; extension=mysqli +; +; When the extension library to load is not located in the default extension +; directory, You may specify an absolute path to the library file: +; +; extension=/path/to/extension/mysqli.so +; +; Note : The syntax used in previous PHP versions ('extension=.so' and +; 'extension='php_.dll') is supported for legacy reasons and may be +; deprecated in a future PHP major version. So, when it is possible, please +; move to the new ('extension=) syntax. +; +; Notes for Windows environments : +; +; - Many DLL files are located in the extensions/ (PHP 4) or ext/ (PHP 5+) +; extension folders as well as the separate PECL DLL download (PHP 5+). +; Be sure to appropriately set the extension_dir directive. +; +zend_extension=opcache +;zend_extension=xdebug +;extension=bz2 +;extension=curl +;extension=ffi +;extension=ftp +;extension=fileinfo +;extension=gd2 +;extension=gettext +;extension=gmp +;extension=intl +;extension=imap +;extension=ldap +;extension=mbstring +;extension=exif ; Must be after mbstring as it depends on it +;extension=mysqli +;extension=oci8_12c ; Use with Oracle Database 12c Instant Client +;extension=odbc +;extension=openssl +;extension=pdo_firebird +;extension=pdo_mysql +;extension=pdo_oci +;extension=pdo_odbc +;extension=pdo_pgsql +;extension=pdo_sqlite +;extension=pgsql +;extension=shmop + +; The MIBS data available in the PHP distribution must be installed. +; See http://www.php.net/manual/en/snmp.installation.php +;extension=snmp + +;extension=soap +;extension=sockets +;extension=sodium +;extension=sqlite3 +;extension=tidy +;extension=xmlrpc +;extension=xsl + +;;;;;;;;;;;;;;;;;;; +; Module Settings ; +;;;;;;;;;;;;;;;;;;; + +[CLI Server] +; Whether the CLI web server uses ANSI color coding in its terminal output. +cli_server.color = On + +[Date] +; Defines the default timezone used by the date functions +; http://php.net/date.timezone +;date.timezone = + +; http://php.net/date.default-latitude +;date.default_latitude = 31.7667 + +; http://php.net/date.default-longitude +;date.default_longitude = 35.2333 + +; http://php.net/date.sunrise-zenith +;date.sunrise_zenith = 90.583333 + +; http://php.net/date.sunset-zenith +;date.sunset_zenith = 90.583333 + +[filter] +; http://php.net/filter.default +;filter.default = unsafe_raw + +; http://php.net/filter.default-flags +;filter.default_flags = + +[iconv] +; Use of this INI entry is deprecated, use global input_encoding instead. +; If empty, default_charset or input_encoding or iconv.input_encoding is used. +; The precedence is: default_charset < input_encoding < iconv.input_encoding +;iconv.input_encoding = + +; Use of this INI entry is deprecated, use global internal_encoding instead. +; If empty, default_charset or internal_encoding or iconv.internal_encoding is used. +; The precedence is: default_charset < internal_encoding < iconv.internal_encoding +;iconv.internal_encoding = + +; Use of this INI entry is deprecated, use global output_encoding instead. +; If empty, default_charset or output_encoding or iconv.output_encoding is used. +; The precedence is: default_charset < output_encoding < iconv.output_encoding +; To use an output encoding conversion, iconv's output handler must be set +; otherwise output encoding conversion cannot be performed. +;iconv.output_encoding = + +[imap] +; rsh/ssh logins are disabled by default. Use this INI entry if you want to +; enable them. Note that the IMAP library does not filter mailbox names before +; passing them to rsh/ssh command, thus passing untrusted data to this function +; with rsh/ssh enabled is insecure. +;imap.enable_insecure_rsh=0 + +[intl] +;intl.default_locale = +; This directive allows you to produce PHP errors when some error +; happens within intl functions. The value is the level of the error produced. +; Default is 0, which does not produce any errors. +;intl.error_level = E_WARNING +;intl.use_exceptions = 0 + +[sqlite3] +; Directory pointing to SQLite3 extensions +; http://php.net/sqlite3.extension-dir +;sqlite3.extension_dir = + +; SQLite defensive mode flag (only available from SQLite 3.26+) +; When the defensive flag is enabled, language features that allow ordinary +; SQL to deliberately corrupt the database file are disabled. This forbids +; writing directly to the schema, shadow tables (eg. FTS data tables), or +; the sqlite_dbpage virtual table. +; https://www.sqlite.org/c3ref/c_dbconfig_defensive.html +; (for older SQLite versions, this flag has no use) +;sqlite3.defensive = 1 + +[Pcre] +; PCRE library backtracking limit. +; http://php.net/pcre.backtrack-limit +;pcre.backtrack_limit=100000 + +; PCRE library recursion limit. +; Please note that if you set this value to a high number you may consume all +; the available process stack and eventually crash PHP (due to reaching the +; stack size limit imposed by the Operating System). +; http://php.net/pcre.recursion-limit +;pcre.recursion_limit=100000 + +; Enables or disables JIT compilation of patterns. This requires the PCRE +; library to be compiled with JIT support. +;pcre.jit=1 + +[Pdo] +; Whether to pool ODBC connections. Can be one of "strict", "relaxed" or "off" +; http://php.net/pdo-odbc.connection-pooling +;pdo_odbc.connection_pooling=strict + +;pdo_odbc.db2_instance_name + +[Pdo_mysql] +; Default socket name for local MySQL connects. If empty, uses the built-in +; MySQL defaults. +pdo_mysql.default_socket= + +[Phar] +; http://php.net/phar.readonly +;phar.readonly = On + +; http://php.net/phar.require-hash +;phar.require_hash = On + +;phar.cache_list = + +[mail function] +; For Win32 only. +; http://php.net/smtp +SMTP = localhost +; http://php.net/smtp-port +smtp_port = 25 + +; For Win32 only. +; http://php.net/sendmail-from +;sendmail_from = me@example.com + +; For Unix only. You may supply arguments as well (default: "sendmail -t -i"). +; http://php.net/sendmail-path +;sendmail_path = + +; Force the addition of the specified parameters to be passed as extra parameters +; to the sendmail binary. These parameters will always replace the value of +; the 5th parameter to mail(). +;mail.force_extra_parameters = + +; Add X-PHP-Originating-Script: that will include uid of the script followed by the filename +mail.add_x_header = Off + +; The path to a log file that will log all mail() calls. Log entries include +; the full path of the script, line number, To address and headers. +;mail.log = +; Log mail to syslog (Event Log on Windows). +;mail.log = syslog + +[ODBC] +; http://php.net/odbc.default-db +;odbc.default_db = Not yet implemented + +; http://php.net/odbc.default-user +;odbc.default_user = Not yet implemented + +; http://php.net/odbc.default-pw +;odbc.default_pw = Not yet implemented + +; Controls the ODBC cursor model. +; Default: SQL_CURSOR_STATIC (default). +;odbc.default_cursortype + +; Allow or prevent persistent links. +; http://php.net/odbc.allow-persistent +odbc.allow_persistent = On + +; Check that a connection is still valid before reuse. +; http://php.net/odbc.check-persistent +odbc.check_persistent = On + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/odbc.max-persistent +odbc.max_persistent = -1 + +; Maximum number of links (persistent + non-persistent). -1 means no limit. +; http://php.net/odbc.max-links +odbc.max_links = -1 + +; Handling of LONG fields. Returns number of bytes to variables. 0 means +; passthru. +; http://php.net/odbc.defaultlrl +odbc.defaultlrl = 4096 + +; Handling of binary data. 0 means passthru, 1 return as is, 2 convert to char. +; See the documentation on odbc_binmode and odbc_longreadlen for an explanation +; of odbc.defaultlrl and odbc.defaultbinmode +; http://php.net/odbc.defaultbinmode +odbc.defaultbinmode = 1 + +[MySQLi] + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/mysqli.max-persistent +mysqli.max_persistent = -1 + +; Allow accessing, from PHP's perspective, local files with LOAD DATA statements +; http://php.net/mysqli.allow_local_infile +;mysqli.allow_local_infile = On + +; Allow or prevent persistent links. +; http://php.net/mysqli.allow-persistent +mysqli.allow_persistent = On + +; Maximum number of links. -1 means no limit. +; http://php.net/mysqli.max-links +mysqli.max_links = -1 + +; Default port number for mysqli_connect(). If unset, mysqli_connect() will use +; the $MYSQL_TCP_PORT or the mysql-tcp entry in /etc/services or the +; compile-time value defined MYSQL_PORT (in that order). Win32 will only look +; at MYSQL_PORT. +; http://php.net/mysqli.default-port +mysqli.default_port = 3306 + +; Default socket name for local MySQL connects. If empty, uses the built-in +; MySQL defaults. +; http://php.net/mysqli.default-socket +mysqli.default_socket = + +; Default host for mysqli_connect() (doesn't apply in safe mode). +; http://php.net/mysqli.default-host +mysqli.default_host = + +; Default user for mysqli_connect() (doesn't apply in safe mode). +; http://php.net/mysqli.default-user +mysqli.default_user = + +; Default password for mysqli_connect() (doesn't apply in safe mode). +; Note that this is generally a *bad* idea to store passwords in this file. +; *Any* user with PHP access can run 'echo get_cfg_var("mysqli.default_pw") +; and reveal this password! And of course, any users with read access to this +; file will be able to reveal the password as well. +; http://php.net/mysqli.default-pw +mysqli.default_pw = + +; Allow or prevent reconnect +mysqli.reconnect = Off + +[mysqlnd] +; Enable / Disable collection of general statistics by mysqlnd which can be +; used to tune and monitor MySQL operations. +mysqlnd.collect_statistics = On + +; Enable / Disable collection of memory usage statistics by mysqlnd which can be +; used to tune and monitor MySQL operations. +mysqlnd.collect_memory_statistics = On + +; Records communication from all extensions using mysqlnd to the specified log +; file. +; http://php.net/mysqlnd.debug +;mysqlnd.debug = + +; Defines which queries will be logged. +;mysqlnd.log_mask = 0 + +; Default size of the mysqlnd memory pool, which is used by result sets. +;mysqlnd.mempool_default_size = 16000 + +; Size of a pre-allocated buffer used when sending commands to MySQL in bytes. +;mysqlnd.net_cmd_buffer_size = 2048 + +; Size of a pre-allocated buffer used for reading data sent by the server in +; bytes. +;mysqlnd.net_read_buffer_size = 32768 + +; Timeout for network requests in seconds. +;mysqlnd.net_read_timeout = 31536000 + +; SHA-256 Authentication Plugin related. File with the MySQL server public RSA +; key. +;mysqlnd.sha256_server_public_key = + +[OCI8] + +; Connection: Enables privileged connections using external +; credentials (OCI_SYSOPER, OCI_SYSDBA) +; http://php.net/oci8.privileged-connect +;oci8.privileged_connect = Off + +; Connection: The maximum number of persistent OCI8 connections per +; process. Using -1 means no limit. +; http://php.net/oci8.max-persistent +;oci8.max_persistent = -1 + +; Connection: The maximum number of seconds a process is allowed to +; maintain an idle persistent connection. Using -1 means idle +; persistent connections will be maintained forever. +; http://php.net/oci8.persistent-timeout +;oci8.persistent_timeout = -1 + +; Connection: The number of seconds that must pass before issuing a +; ping during oci_pconnect() to check the connection validity. When +; set to 0, each oci_pconnect() will cause a ping. Using -1 disables +; pings completely. +; http://php.net/oci8.ping-interval +;oci8.ping_interval = 60 + +; Connection: Set this to a user chosen connection class to be used +; for all pooled server requests with Oracle 11g Database Resident +; Connection Pooling (DRCP). To use DRCP, this value should be set to +; the same string for all web servers running the same application, +; the database pool must be configured, and the connection string must +; specify to use a pooled server. +;oci8.connection_class = + +; High Availability: Using On lets PHP receive Fast Application +; Notification (FAN) events generated when a database node fails. The +; database must also be configured to post FAN events. +;oci8.events = Off + +; Tuning: This option enables statement caching, and specifies how +; many statements to cache. Using 0 disables statement caching. +; http://php.net/oci8.statement-cache-size +;oci8.statement_cache_size = 20 + +; Tuning: Enables statement prefetching and sets the default number of +; rows that will be fetched automatically after statement execution. +; http://php.net/oci8.default-prefetch +;oci8.default_prefetch = 100 + +; Compatibility. Using On means oci_close() will not close +; oci_connect() and oci_new_connect() connections. +; http://php.net/oci8.old-oci-close-semantics +;oci8.old_oci_close_semantics = Off + +[PostgreSQL] +; Allow or prevent persistent links. +; http://php.net/pgsql.allow-persistent +pgsql.allow_persistent = On + +; Detect broken persistent links always with pg_pconnect(). +; Auto reset feature requires a little overheads. +; http://php.net/pgsql.auto-reset-persistent +pgsql.auto_reset_persistent = Off + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/pgsql.max-persistent +pgsql.max_persistent = -1 + +; Maximum number of links (persistent+non persistent). -1 means no limit. +; http://php.net/pgsql.max-links +pgsql.max_links = -1 + +; Ignore PostgreSQL backends Notice message or not. +; Notice message logging require a little overheads. +; http://php.net/pgsql.ignore-notice +pgsql.ignore_notice = 0 + +; Log PostgreSQL backends Notice message or not. +; Unless pgsql.ignore_notice=0, module cannot log notice message. +; http://php.net/pgsql.log-notice +pgsql.log_notice = 0 + +[bcmath] +; Number of decimal digits for all bcmath functions. +; http://php.net/bcmath.scale +bcmath.scale = 0 + +[browscap] +; http://php.net/browscap +;browscap = extra/browscap.ini + +[Session] +; Handler used to store/retrieve data. +; http://php.net/session.save-handler +session.save_handler = files + +; Argument passed to save_handler. In the case of files, this is the path +; where data files are stored. Note: Windows users have to change this +; variable in order to use PHP's session functions. +; +; The path can be defined as: +; +; session.save_path = "N;/path" +; +; where N is an integer. Instead of storing all the session files in +; /path, what this will do is use subdirectories N-levels deep, and +; store the session data in those directories. This is useful if +; your OS has problems with many files in one directory, and is +; a more efficient layout for servers that handle many sessions. +; +; NOTE 1: PHP will not create this directory structure automatically. +; You can use the script in the ext/session dir for that purpose. +; NOTE 2: See the section on garbage collection below if you choose to +; use subdirectories for session storage +; +; The file storage module creates files using mode 600 by default. +; You can change that by using +; +; session.save_path = "N;MODE;/path" +; +; where MODE is the octal representation of the mode. Note that this +; does not overwrite the process's umask. +; http://php.net/session.save-path +;session.save_path = "/tmp" + +; Whether to use strict session mode. +; Strict session mode does not accept an uninitialized session ID, and +; regenerates the session ID if the browser sends an uninitialized session ID. +; Strict mode protects applications from session fixation via a session adoption +; vulnerability. It is disabled by default for maximum compatibility, but +; enabling it is encouraged. +; https://wiki.php.net/rfc/strict_sessions +session.use_strict_mode = 0 + +; Whether to use cookies. +; http://php.net/session.use-cookies +session.use_cookies = 1 + +; http://php.net/session.cookie-secure +;session.cookie_secure = + +; This option forces PHP to fetch and use a cookie for storing and maintaining +; the session id. We encourage this operation as it's very helpful in combating +; session hijacking when not specifying and managing your own session id. It is +; not the be-all and end-all of session hijacking defense, but it's a good start. +; http://php.net/session.use-only-cookies +session.use_only_cookies = 1 + +; Name of the session (used as cookie name). +; http://php.net/session.name +session.name = PHPSESSID + +; Initialize session on request startup. +; http://php.net/session.auto-start +session.auto_start = 0 + +; Lifetime in seconds of cookie or, if 0, until browser is restarted. +; http://php.net/session.cookie-lifetime +session.cookie_lifetime = 0 + +; The path for which the cookie is valid. +; http://php.net/session.cookie-path +session.cookie_path = / + +; The domain for which the cookie is valid. +; http://php.net/session.cookie-domain +session.cookie_domain = + +; Whether or not to add the httpOnly flag to the cookie, which makes it +; inaccessible to browser scripting languages such as JavaScript. +; http://php.net/session.cookie-httponly +session.cookie_httponly = + +; Add SameSite attribute to cookie to help mitigate Cross-Site Request Forgery (CSRF/XSRF) +; Current valid values are "Strict", "Lax" or "None". When using "None", +; make sure to include the quotes, as `none` is interpreted like `false` in ini files. +; https://tools.ietf.org/html/draft-west-first-party-cookies-07 +session.cookie_samesite = + +; Handler used to serialize data. php is the standard serializer of PHP. +; http://php.net/session.serialize-handler +session.serialize_handler = php + +; Defines the probability that the 'garbage collection' process is started on every +; session initialization. The probability is calculated by using gc_probability/gc_divisor, +; e.g. 1/100 means there is a 1% chance that the GC process starts on each request. +; Default Value: 1 +; Development Value: 1 +; Production Value: 1 +; http://php.net/session.gc-probability +session.gc_probability = 1 + +; Defines the probability that the 'garbage collection' process is started on every +; session initialization. The probability is calculated by using gc_probability/gc_divisor, +; e.g. 1/100 means there is a 1% chance that the GC process starts on each request. +; For high volume production servers, using a value of 1000 is a more efficient approach. +; Default Value: 100 +; Development Value: 1000 +; Production Value: 1000 +; http://php.net/session.gc-divisor +session.gc_divisor = 1000 + +; After this number of seconds, stored data will be seen as 'garbage' and +; cleaned up by the garbage collection process. +; http://php.net/session.gc-maxlifetime +session.gc_maxlifetime = 1440 + +; NOTE: If you are using the subdirectory option for storing session files +; (see session.save_path above), then garbage collection does *not* +; happen automatically. You will need to do your own garbage +; collection through a shell script, cron entry, or some other method. +; For example, the following script is the equivalent of setting +; session.gc_maxlifetime to 1440 (1440 seconds = 24 minutes): +; find /path/to/sessions -cmin +24 -type f | xargs rm + +; Check HTTP Referer to invalidate externally stored URLs containing ids. +; HTTP_REFERER has to contain this substring for the session to be +; considered as valid. +; http://php.net/session.referer-check +session.referer_check = + +; Set to {nocache,private,public,} to determine HTTP caching aspects +; or leave this empty to avoid sending anti-caching headers. +; http://php.net/session.cache-limiter +session.cache_limiter = nocache + +; Document expires after n minutes. +; http://php.net/session.cache-expire +session.cache_expire = 180 + +; trans sid support is disabled by default. +; Use of trans sid may risk your users' security. +; Use this option with caution. +; - User may send URL contains active session ID +; to other person via. email/irc/etc. +; - URL that contains active session ID may be stored +; in publicly accessible computer. +; - User may access your site with the same session ID +; always using URL stored in browser's history or bookmarks. +; http://php.net/session.use-trans-sid +session.use_trans_sid = 0 + +; Set session ID character length. This value could be between 22 to 256. +; Shorter length than default is supported only for compatibility reason. +; Users should use 32 or more chars. +; http://php.net/session.sid-length +; Default Value: 32 +; Development Value: 26 +; Production Value: 26 +session.sid_length = 26 + +; The URL rewriter will look for URLs in a defined set of HTML tags. +;
is special; if you include them here, the rewriter will +; add a hidden field with the info which is otherwise appended +; to URLs. tag's action attribute URL will not be modified +; unless it is specified. +; Note that all valid entries require a "=", even if no value follows. +; Default Value: "a=href,area=href,frame=src,form=" +; Development Value: "a=href,area=href,frame=src,form=" +; Production Value: "a=href,area=href,frame=src,form=" +; http://php.net/url-rewriter.tags +session.trans_sid_tags = "a=href,area=href,frame=src,form=" + +; URL rewriter does not rewrite absolute URLs by default. +; To enable rewrites for absolute paths, target hosts must be specified +; at RUNTIME. i.e. use ini_set() +; tags is special. PHP will check action attribute's URL regardless +; of session.trans_sid_tags setting. +; If no host is defined, HTTP_HOST will be used for allowed host. +; Example value: php.net,www.php.net,wiki.php.net +; Use "," for multiple hosts. No spaces are allowed. +; Default Value: "" +; Development Value: "" +; Production Value: "" +;session.trans_sid_hosts="" + +; Define how many bits are stored in each character when converting +; the binary hash data to something readable. +; Possible values: +; 4 (4 bits: 0-9, a-f) +; 5 (5 bits: 0-9, a-v) +; 6 (6 bits: 0-9, a-z, A-Z, "-", ",") +; Default Value: 4 +; Development Value: 5 +; Production Value: 5 +; http://php.net/session.hash-bits-per-character +session.sid_bits_per_character = 5 + +; Enable upload progress tracking in $_SESSION +; Default Value: On +; Development Value: On +; Production Value: On +; http://php.net/session.upload-progress.enabled +;session.upload_progress.enabled = On + +; Cleanup the progress information as soon as all POST data has been read +; (i.e. upload completed). +; Default Value: On +; Development Value: On +; Production Value: On +; http://php.net/session.upload-progress.cleanup +;session.upload_progress.cleanup = On + +; A prefix used for the upload progress key in $_SESSION +; Default Value: "upload_progress_" +; Development Value: "upload_progress_" +; Production Value: "upload_progress_" +; http://php.net/session.upload-progress.prefix +;session.upload_progress.prefix = "upload_progress_" + +; The index name (concatenated with the prefix) in $_SESSION +; containing the upload progress information +; Default Value: "PHP_SESSION_UPLOAD_PROGRESS" +; Development Value: "PHP_SESSION_UPLOAD_PROGRESS" +; Production Value: "PHP_SESSION_UPLOAD_PROGRESS" +; http://php.net/session.upload-progress.name +;session.upload_progress.name = "PHP_SESSION_UPLOAD_PROGRESS" + +; How frequently the upload progress should be updated. +; Given either in percentages (per-file), or in bytes +; Default Value: "1%" +; Development Value: "1%" +; Production Value: "1%" +; http://php.net/session.upload-progress.freq +;session.upload_progress.freq = "1%" + +; The minimum delay between updates, in seconds +; Default Value: 1 +; Development Value: 1 +; Production Value: 1 +; http://php.net/session.upload-progress.min-freq +;session.upload_progress.min_freq = "1" + +; Only write session data when session data is changed. Enabled by default. +; http://php.net/session.lazy-write +;session.lazy_write = On + +[Assertion] +; Switch whether to compile assertions at all (to have no overhead at run-time) +; -1: Do not compile at all +; 0: Jump over assertion at run-time +; 1: Execute assertions +; Changing from or to a negative value is only possible in php.ini! (For turning assertions on and off at run-time, see assert.active, when zend.assertions = 1) +; Default Value: 1 +; Development Value: 1 +; Production Value: -1 +; http://php.net/zend.assertions +zend.assertions = 1 + +; Assert(expr); active by default. +; http://php.net/assert.active +;assert.active = On + +; Throw an AssertionError on failed assertions +; http://php.net/assert.exception +;assert.exception = On + +; Issue a PHP warning for each failed assertion. (Overridden by assert.exception if active) +; http://php.net/assert.warning +;assert.warning = On + +; Don't bail out by default. +; http://php.net/assert.bail +;assert.bail = Off + +; User-function to be called if an assertion fails. +; http://php.net/assert.callback +;assert.callback = 0 + +; Eval the expression with current error_reporting(). Set to true if you want +; error_reporting(0) around the eval(). +; http://php.net/assert.quiet-eval +;assert.quiet_eval = 0 + +[COM] +; path to a file containing GUIDs, IIDs or filenames of files with TypeLibs +; http://php.net/com.typelib-file +;com.typelib_file = + +; allow Distributed-COM calls +; http://php.net/com.allow-dcom +;com.allow_dcom = true + +; autoregister constants of a component's typlib on com_load() +; http://php.net/com.autoregister-typelib +;com.autoregister_typelib = true + +; register constants casesensitive +; http://php.net/com.autoregister-casesensitive +;com.autoregister_casesensitive = false + +; show warnings on duplicate constant registrations +; http://php.net/com.autoregister-verbose +;com.autoregister_verbose = true + +; The default character set code-page to use when passing strings to and from COM objects. +; Default: system ANSI code page +;com.code_page= + +[mbstring] +; language for internal character representation. +; This affects mb_send_mail() and mbstring.detect_order. +; http://php.net/mbstring.language +;mbstring.language = Japanese + +; Use of this INI entry is deprecated, use global internal_encoding instead. +; internal/script encoding. +; Some encoding cannot work as internal encoding. (e.g. SJIS, BIG5, ISO-2022-*) +; If empty, default_charset or internal_encoding or iconv.internal_encoding is used. +; The precedence is: default_charset < internal_encoding < iconv.internal_encoding +;mbstring.internal_encoding = + +; Use of this INI entry is deprecated, use global input_encoding instead. +; http input encoding. +; mbstring.encoding_translation = On is needed to use this setting. +; If empty, default_charset or input_encoding or mbstring.input is used. +; The precedence is: default_charset < input_encoding < mbstring.http_input +; http://php.net/mbstring.http-input +;mbstring.http_input = + +; Use of this INI entry is deprecated, use global output_encoding instead. +; http output encoding. +; mb_output_handler must be registered as output buffer to function. +; If empty, default_charset or output_encoding or mbstring.http_output is used. +; The precedence is: default_charset < output_encoding < mbstring.http_output +; To use an output encoding conversion, mbstring's output handler must be set +; otherwise output encoding conversion cannot be performed. +; http://php.net/mbstring.http-output +;mbstring.http_output = + +; enable automatic encoding translation according to +; mbstring.internal_encoding setting. Input chars are +; converted to internal encoding by setting this to On. +; Note: Do _not_ use automatic encoding translation for +; portable libs/applications. +; http://php.net/mbstring.encoding-translation +;mbstring.encoding_translation = Off + +; automatic encoding detection order. +; "auto" detect order is changed according to mbstring.language +; http://php.net/mbstring.detect-order +;mbstring.detect_order = auto + +; substitute_character used when character cannot be converted +; one from another +; http://php.net/mbstring.substitute-character +;mbstring.substitute_character = none + +; overload(replace) single byte functions by mbstring functions. +; mail(), ereg(), etc are overloaded by mb_send_mail(), mb_ereg(), +; etc. Possible values are 0,1,2,4 or combination of them. +; For example, 7 for overload everything. +; 0: No overload +; 1: Overload mail() function +; 2: Overload str*() functions +; 4: Overload ereg*() functions +; http://php.net/mbstring.func-overload +;mbstring.func_overload = 0 + +; enable strict encoding detection. +; Default: Off +;mbstring.strict_detection = On + +; This directive specifies the regex pattern of content types for which mb_output_handler() +; is activated. +; Default: mbstring.http_output_conv_mimetype=^(text/|application/xhtml\+xml) +;mbstring.http_output_conv_mimetype= + +; This directive specifies maximum stack depth for mbstring regular expressions. It is similar +; to the pcre.recursion_limit for PCRE. +; Default: 100000 +;mbstring.regex_stack_limit=100000 + +; This directive specifies maximum retry count for mbstring regular expressions. It is similar +; to the pcre.backtrack_limit for PCRE. +; Default: 1000000 +;mbstring.regex_retry_limit=1000000 + +[gd] +; Tell the jpeg decode to ignore warnings and try to create +; a gd image. The warning will then be displayed as notices +; disabled by default +; http://php.net/gd.jpeg-ignore-warning +;gd.jpeg_ignore_warning = 1 + +[exif] +; Exif UNICODE user comments are handled as UCS-2BE/UCS-2LE and JIS as JIS. +; With mbstring support this will automatically be converted into the encoding +; given by corresponding encode setting. When empty mbstring.internal_encoding +; is used. For the decode settings you can distinguish between motorola and +; intel byte order. A decode setting cannot be empty. +; http://php.net/exif.encode-unicode +;exif.encode_unicode = ISO-8859-15 + +; http://php.net/exif.decode-unicode-motorola +;exif.decode_unicode_motorola = UCS-2BE + +; http://php.net/exif.decode-unicode-intel +;exif.decode_unicode_intel = UCS-2LE + +; http://php.net/exif.encode-jis +;exif.encode_jis = + +; http://php.net/exif.decode-jis-motorola +;exif.decode_jis_motorola = JIS + +; http://php.net/exif.decode-jis-intel +;exif.decode_jis_intel = JIS + +[Tidy] +; The path to a default tidy configuration file to use when using tidy +; http://php.net/tidy.default-config +;tidy.default_config = /usr/local/lib/php/default.tcfg + +; Should tidy clean and repair output automatically? +; WARNING: Do not use this option if you are generating non-html content +; such as dynamic images +; http://php.net/tidy.clean-output +tidy.clean_output = Off + +[soap] +; Enables or disables WSDL caching feature. +; http://php.net/soap.wsdl-cache-enabled +soap.wsdl_cache_enabled=1 + +; Sets the directory name where SOAP extension will put cache files. +; http://php.net/soap.wsdl-cache-dir +soap.wsdl_cache_dir="/tmp" + +; (time to live) Sets the number of second while cached file will be used +; instead of original one. +; http://php.net/soap.wsdl-cache-ttl +soap.wsdl_cache_ttl=86400 + +; Sets the size of the cache limit. (Max. number of WSDL files to cache) +soap.wsdl_cache_limit = 5 + +[sysvshm] +; A default size of the shared memory segment +;sysvshm.init_mem = 10000 + +[ldap] +; Sets the maximum number of open links or -1 for unlimited. +ldap.max_links = -1 + +[dba] +;dba.default_handler= + +[opcache] +; Determines if Zend OPCache is enabled +;opcache.enable=1 + +; Determines if Zend OPCache is enabled for the CLI version of PHP +;opcache.enable_cli=0 + +; The OPcache shared memory storage size. +;opcache.memory_consumption=128 + +; The amount of memory for interned strings in Mbytes. +;opcache.interned_strings_buffer=8 + +; The maximum number of keys (scripts) in the OPcache hash table. +; Only numbers between 200 and 1000000 are allowed. +;opcache.max_accelerated_files=10000 + +; The maximum percentage of "wasted" memory until a restart is scheduled. +;opcache.max_wasted_percentage=5 + +; When this directive is enabled, the OPcache appends the current working +; directory to the script key, thus eliminating possible collisions between +; files with the same name (basename). Disabling the directive improves +; performance, but may break existing applications. +;opcache.use_cwd=1 + +; When disabled, you must reset the OPcache manually or restart the +; webserver for changes to the filesystem to take effect. +;opcache.validate_timestamps=1 + +; How often (in seconds) to check file timestamps for changes to the shared +; memory storage allocation. ("1" means validate once per second, but only +; once per request. "0" means always validate) +;opcache.revalidate_freq=2 + +; Enables or disables file search in include_path optimization +;opcache.revalidate_path=0 + +; If disabled, all PHPDoc comments are dropped from the code to reduce the +; size of the optimized code. +;opcache.save_comments=1 + +; Allow file existence override (file_exists, etc.) performance feature. +;opcache.enable_file_override=0 + +; A bitmask, where each bit enables or disables the appropriate OPcache +; passes +;opcache.optimization_level=0x7FFFBFFF + +;opcache.dups_fix=0 + +; The location of the OPcache blacklist file (wildcards allowed). +; Each OPcache blacklist file is a text file that holds the names of files +; that should not be accelerated. The file format is to add each filename +; to a new line. The filename may be a full path or just a file prefix +; (i.e., /var/www/x blacklists all the files and directories in /var/www +; that start with 'x'). Line starting with a ; are ignored (comments). +;opcache.blacklist_filename= + +; Allows exclusion of large files from being cached. By default all files +; are cached. +;opcache.max_file_size=0 + +; Check the cache checksum each N requests. +; The default value of "0" means that the checks are disabled. +;opcache.consistency_checks=0 + +; How long to wait (in seconds) for a scheduled restart to begin if the cache +; is not being accessed. +;opcache.force_restart_timeout=180 + +; OPcache error_log file name. Empty string assumes "stderr". +;opcache.error_log= + +; All OPcache errors go to the Web server log. +; By default, only fatal errors (level 0) or errors (level 1) are logged. +; You can also enable warnings (level 2), info messages (level 3) or +; debug messages (level 4). +;opcache.log_verbosity_level=1 + +; Preferred Shared Memory back-end. Leave empty and let the system decide. +;opcache.preferred_memory_model= + +; Protect the shared memory from unexpected writing during script execution. +; Useful for internal debugging only. +;opcache.protect_memory=0 + +; Allows calling OPcache API functions only from PHP scripts which path is +; started from specified string. The default "" means no restriction +;opcache.restrict_api= + +; Mapping base of shared memory segments (for Windows only). All the PHP +; processes have to map shared memory into the same address space. This +; directive allows to manually fix the "Unable to reattach to base address" +; errors. +;opcache.mmap_base= + +; Facilitates multiple OPcache instances per user (for Windows only). All PHP +; processes with the same cache ID and user share an OPcache instance. +;opcache.cache_id= + +; Enables and sets the second level cache directory. +; It should improve performance when SHM memory is full, at server restart or +; SHM reset. The default "" disables file based caching. +;opcache.file_cache= + +; Enables or disables opcode caching in shared memory. +;opcache.file_cache_only=0 + +; Enables or disables checksum validation when script loaded from file cache. +;opcache.file_cache_consistency_checks=1 + +; Implies opcache.file_cache_only=1 for a certain process that failed to +; reattach to the shared memory (for Windows only). Explicitly enabled file +; cache is required. +;opcache.file_cache_fallback=1 + +; Enables or disables copying of PHP code (text segment) into HUGE PAGES. +; This should improve performance, but requires appropriate OS configuration. +;opcache.huge_code_pages=0 + +; Validate cached file permissions. +;opcache.validate_permission=0 + +; Prevent name collisions in chroot'ed environment. +;opcache.validate_root=0 + +; If specified, it produces opcode dumps for debugging different stages of +; optimizations. +;opcache.opt_debug_level=0 + +; Specifies a PHP script that is going to be compiled and executed at server +; start-up. +; http://php.net/opcache.preload +;opcache.preload= + +; Preloading code as root is not allowed for security reasons. This directive +; facilitates to let the preloading to be run as another user. +; http://php.net/opcache.preload_user +;opcache.preload_user= + +; Prevents caching files that are less than this number of seconds old. It +; protects from caching of incompletely updated files. In case all file updates +; on your site are atomic, you may increase performance by setting it to "0". +;opcache.file_update_protection=2 + +; Absolute path used to store shared lockfiles (for *nix only). +;opcache.lockfile_path=/tmp + +[curl] +; A default value for the CURLOPT_CAINFO option. This is required to be an +; absolute path. +;curl.cainfo = + +[openssl] +; The location of a Certificate Authority (CA) file on the local filesystem +; to use when verifying the identity of SSL/TLS peers. Most users should +; not specify a value for this directive as PHP will attempt to use the +; OS-managed cert stores in its absence. If specified, this value may still +; be overridden on a per-stream basis via the "cafile" SSL stream context +; option. +;openssl.cafile= + +; If openssl.cafile is not specified or if the CA file is not found, the +; directory pointed to by openssl.capath is searched for a suitable +; certificate. This value must be a correctly hashed certificate directory. +; Most users should not specify a value for this directive as PHP will +; attempt to use the OS-managed cert stores in its absence. If specified, +; this value may still be overridden on a per-stream basis via the "capath" +; SSL stream context option. +;openssl.capath= + +[ffi] +; FFI API restriction. Possible values: +; "preload" - enabled in CLI scripts and preloaded files (default) +; "false" - always disabled +; "true" - always enabled +;ffi.enable=preload + +; List of headers files to preload, wildcard patterns allowed. +;ffi.preload= diff --git a/resources/php8.ini b/resources/php8.ini new file mode 100644 index 0000000..d372b9f --- /dev/null +++ b/resources/php8.ini @@ -0,0 +1,1949 @@ +[PHP] + +;;;;;;;;;;;;;;;;;;; +; About php.ini ; +;;;;;;;;;;;;;;;;;;; +; PHP's initialization file, generally called php.ini, is responsible for +; configuring many of the aspects of PHP's behavior. + +; PHP attempts to find and load this configuration from a number of locations. +; The following is a summary of its search order: +; 1. SAPI module specific location. +; 2. The PHPRC environment variable. (As of PHP 5.2.0) +; 3. A number of predefined registry keys on Windows (As of PHP 5.2.0) +; 4. Current working directory (except CLI) +; 5. The web server's directory (for SAPI modules), or directory of PHP +; (otherwise in Windows) +; 6. The directory from the --with-config-file-path compile time option, or the +; Windows directory (usually C:\windows) +; See the PHP docs for more specific information. +; http://php.net/configuration.file + +; The syntax of the file is extremely simple. Whitespace and lines +; beginning with a semicolon are silently ignored (as you probably guessed). +; Section headers (e.g. [Foo]) are also silently ignored, even though +; they might mean something in the future. + +; Directives following the section heading [PATH=/www/mysite] only +; apply to PHP files in the /www/mysite directory. Directives +; following the section heading [HOST=www.example.com] only apply to +; PHP files served from www.example.com. Directives set in these +; special sections cannot be overridden by user-defined INI files or +; at runtime. Currently, [PATH=] and [HOST=] sections only work under +; CGI/FastCGI. +; http://php.net/ini.sections + +; Directives are specified using the following syntax: +; directive = value +; Directive names are *case sensitive* - foo=bar is different from FOO=bar. +; Directives are variables used to configure PHP or PHP extensions. +; There is no name validation. If PHP can't find an expected +; directive because it is not set or is mistyped, a default value will be used. + +; The value can be a string, a number, a PHP constant (e.g. E_ALL or M_PI), one +; of the INI constants (On, Off, True, False, Yes, No and None) or an expression +; (e.g. E_ALL & ~E_NOTICE), a quoted string ("bar"), or a reference to a +; previously set variable or directive (e.g. ${foo}) + +; Expressions in the INI file are limited to bitwise operators and parentheses: +; | bitwise OR +; ^ bitwise XOR +; & bitwise AND +; ~ bitwise NOT +; ! boolean NOT + +; Boolean flags can be turned on using the values 1, On, True or Yes. +; They can be turned off using the values 0, Off, False or No. + +; An empty string can be denoted by simply not writing anything after the equal +; sign, or by using the None keyword: + +; foo = ; sets foo to an empty string +; foo = None ; sets foo to an empty string +; foo = "None" ; sets foo to the string 'None' + +; If you use constants in your value, and these constants belong to a +; dynamically loaded extension (either a PHP extension or a Zend extension), +; you may only use these constants *after* the line that loads the extension. + +;;;;;;;;;;;;;;;;;;; +; About this file ; +;;;;;;;;;;;;;;;;;;; +; PHP comes packaged with two INI files. One that is recommended to be used +; in production environments and one that is recommended to be used in +; development environments. + +; php.ini-production contains settings which hold security, performance and +; best practices at its core. But please be aware, these settings may break +; compatibility with older or less security conscience applications. We +; recommending using the production ini in production and testing environments. + +; php.ini-development is very similar to its production variant, except it is +; much more verbose when it comes to errors. We recommend using the +; development version only in development environments, as errors shown to +; application users can inadvertently leak otherwise secure information. + +; This is the php.ini-development INI file. + +;;;;;;;;;;;;;;;;;;; +; Quick Reference ; +;;;;;;;;;;;;;;;;;;; +; The following are all the settings which are different in either the production +; or development versions of the INIs with respect to PHP's default behavior. +; Please see the actual settings later in the document for more details as to why +; we recommend these changes in PHP's behavior. + +; display_errors +; Default Value: On +; Development Value: On +; Production Value: Off + +; display_startup_errors +; Default Value: Off +; Development Value: On +; Production Value: Off + +; error_reporting +; Default Value: E_ALL & ~E_NOTICE & ~E_STRICT & ~E_DEPRECATED +; Development Value: E_ALL +; Production Value: E_ALL & ~E_DEPRECATED & ~E_STRICT + +; log_errors +; Default Value: Off +; Development Value: On +; Production Value: On + +; max_input_time +; Default Value: -1 (Unlimited) +; Development Value: 60 (60 seconds) +; Production Value: 60 (60 seconds) + +; output_buffering +; Default Value: Off +; Development Value: 4096 +; Production Value: 4096 + +; register_argc_argv +; Default Value: On +; Development Value: Off +; Production Value: Off + +; request_order +; Default Value: None +; Development Value: "GP" +; Production Value: "GP" + +; session.gc_divisor +; Default Value: 100 +; Development Value: 1000 +; Production Value: 1000 + +; session.sid_bits_per_character +; Default Value: 4 +; Development Value: 5 +; Production Value: 5 + +; short_open_tag +; Default Value: On +; Development Value: Off +; Production Value: Off + +; variables_order +; Default Value: "EGPCS" +; Development Value: "GPCS" +; Production Value: "GPCS" + +;;;;;;;;;;;;;;;;;;;; +; php.ini Options ; +;;;;;;;;;;;;;;;;;;;; +; Name for user-defined php.ini (.htaccess) files. Default is ".user.ini" +;user_ini.filename = ".user.ini" + +; To disable this feature set this option to an empty value +;user_ini.filename = + +; TTL for user-defined php.ini files (time-to-live) in seconds. Default is 300 seconds (5 minutes) +;user_ini.cache_ttl = 300 + +;;;;;;;;;;;;;;;;;;;; +; Language Options ; +;;;;;;;;;;;;;;;;;;;; + +; Enable the PHP scripting language engine under Apache. +; http://php.net/engine +engine = On + +; This directive determines whether or not PHP will recognize code between +; tags as PHP source which should be processed as such. It is +; generally recommended that should be used and that this feature +; should be disabled, as enabling it may result in issues when generating XML +; documents, however this remains supported for backward compatibility reasons. +; Note that this directive does not control the would work. +; http://php.net/syntax-highlighting +;highlight.string = #DD0000 +;highlight.comment = #FF9900 +;highlight.keyword = #007700 +;highlight.default = #0000BB +;highlight.html = #000000 + +; If enabled, the request will be allowed to complete even if the user aborts +; the request. Consider enabling it if executing long requests, which may end up +; being interrupted by the user or a browser timing out. PHP's default behavior +; is to disable this feature. +; http://php.net/ignore-user-abort +;ignore_user_abort = On + +; Determines the size of the realpath cache to be used by PHP. This value should +; be increased on systems where PHP opens many files to reflect the quantity of +; the file operations performed. +; Note: if open_basedir is set, the cache is disabled +; http://php.net/realpath-cache-size +;realpath_cache_size = 4096k + +; Duration of time, in seconds for which to cache realpath information for a given +; file or directory. For systems with rarely changing files, consider increasing this +; value. +; http://php.net/realpath-cache-ttl +;realpath_cache_ttl = 120 + +; Enables or disables the circular reference collector. +; http://php.net/zend.enable-gc +zend.enable_gc = On + +; If enabled, scripts may be written in encodings that are incompatible with +; the scanner. CP936, Big5, CP949 and Shift_JIS are the examples of such +; encodings. To use this feature, mbstring extension must be enabled. +; Default: Off +;zend.multibyte = Off + +; Allows to set the default encoding for the scripts. This value will be used +; unless "declare(encoding=...)" directive appears at the top of the script. +; Only affects if zend.multibyte is set. +; Default: "" +;zend.script_encoding = + +; Allows to include or exclude arguments from stack traces generated for exceptions. +; In production, it is recommended to turn this setting on to prohibit the output +; of sensitive information in stack traces +; Default: Off +zend.exception_ignore_args = Off + +;;;;;;;;;;;;;;;;; +; Miscellaneous ; +;;;;;;;;;;;;;;;;; + +; Decides whether PHP may expose the fact that it is installed on the server +; (e.g. by adding its signature to the Web server header). It is no security +; threat in any way, but it makes it possible to determine whether you use PHP +; on your server or not. +; http://php.net/expose-php +expose_php = On + +;;;;;;;;;;;;;;;;;;; +; Resource Limits ; +;;;;;;;;;;;;;;;;;;; + +; Maximum execution time of each script, in seconds +; http://php.net/max-execution-time +; Note: This directive is hardcoded to 0 for the CLI SAPI +max_execution_time = 30 + +; Maximum amount of time each script may spend parsing request data. It's a good +; idea to limit this time on productions servers in order to eliminate unexpectedly +; long running scripts. +; Note: This directive is hardcoded to -1 for the CLI SAPI +; Default Value: -1 (Unlimited) +; Development Value: 60 (60 seconds) +; Production Value: 60 (60 seconds) +; http://php.net/max-input-time +max_input_time = 60 + +; Maximum input variable nesting level +; http://php.net/max-input-nesting-level +;max_input_nesting_level = 64 + +; How many GET/POST/COOKIE input variables may be accepted +;max_input_vars = 1000 + +; Maximum amount of memory a script may consume +; http://php.net/memory-limit +memory_limit = 128M + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Error handling and logging ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This directive informs PHP of which errors, warnings and notices you would like +; it to take action for. The recommended way of setting values for this +; directive is through the use of the error level constants and bitwise +; operators. The error level constants are below here for convenience as well as +; some common settings and their meanings. +; By default, PHP is set to take action on all errors, notices and warnings EXCEPT +; those related to E_NOTICE and E_STRICT, which together cover best practices and +; recommended coding standards in PHP. For performance reasons, this is the +; recommend error reporting setting. Your production server shouldn't be wasting +; resources complaining about best practices and coding standards. That's what +; development servers and development settings are for. +; Note: The php.ini-development file has this setting as E_ALL. This +; means it pretty much reports everything which is exactly what you want during +; development and early testing. +; +; Error Level Constants: +; E_ALL - All errors and warnings (includes E_STRICT as of PHP 5.4.0) +; E_ERROR - fatal run-time errors +; E_RECOVERABLE_ERROR - almost fatal run-time errors +; E_WARNING - run-time warnings (non-fatal errors) +; E_PARSE - compile-time parse errors +; E_NOTICE - run-time notices (these are warnings which often result +; from a bug in your code, but it's possible that it was +; intentional (e.g., using an uninitialized variable and +; relying on the fact it is automatically initialized to an +; empty string) +; E_STRICT - run-time notices, enable to have PHP suggest changes +; to your code which will ensure the best interoperability +; and forward compatibility of your code +; E_CORE_ERROR - fatal errors that occur during PHP's initial startup +; E_CORE_WARNING - warnings (non-fatal errors) that occur during PHP's +; initial startup +; E_COMPILE_ERROR - fatal compile-time errors +; E_COMPILE_WARNING - compile-time warnings (non-fatal errors) +; E_USER_ERROR - user-generated error message +; E_USER_WARNING - user-generated warning message +; E_USER_NOTICE - user-generated notice message +; E_DEPRECATED - warn about code that will not work in future versions +; of PHP +; E_USER_DEPRECATED - user-generated deprecation warnings +; +; Common Values: +; E_ALL (Show all errors, warnings and notices including coding standards.) +; E_ALL & ~E_NOTICE (Show all errors, except for notices) +; E_ALL & ~E_NOTICE & ~E_STRICT (Show all errors, except for notices and coding standards warnings.) +; E_COMPILE_ERROR|E_RECOVERABLE_ERROR|E_ERROR|E_CORE_ERROR (Show only errors) +; Default Value: E_ALL & ~E_NOTICE & ~E_STRICT & ~E_DEPRECATED +; Development Value: E_ALL +; Production Value: E_ALL & ~E_DEPRECATED & ~E_STRICT +; http://php.net/error-reporting +error_reporting = E_ALL + +; This directive controls whether or not and where PHP will output errors, +; notices and warnings too. Error output is very useful during development, but +; it could be very dangerous in production environments. Depending on the code +; which is triggering the error, sensitive information could potentially leak +; out of your application such as database usernames and passwords or worse. +; For production environments, we recommend logging errors rather than +; sending them to STDOUT. +; Possible Values: +; Off = Do not display any errors +; stderr = Display errors to STDERR (affects only CGI/CLI binaries!) +; On or stdout = Display errors to STDOUT +; Default Value: On +; Development Value: On +; Production Value: Off +; http://php.net/display-errors +display_errors = On + +; The display of errors which occur during PHP's startup sequence are handled +; separately from display_errors. PHP's default behavior is to suppress those +; errors from clients. Turning the display of startup errors on can be useful in +; debugging configuration problems. We strongly recommend you +; set this to 'off' for production servers. +; Default Value: Off +; Development Value: On +; Production Value: Off +; http://php.net/display-startup-errors +display_startup_errors = On + +; Besides displaying errors, PHP can also log errors to locations such as a +; server-specific log, STDERR, or a location specified by the error_log +; directive found below. While errors should not be displayed on productions +; servers they should still be monitored and logging is a great way to do that. +; Default Value: Off +; Development Value: On +; Production Value: On +; http://php.net/log-errors +log_errors = On + +; Set maximum length of log_errors. In error_log information about the source is +; added. The default is 1024 and 0 allows to not apply any maximum length at all. +; http://php.net/log-errors-max-len +log_errors_max_len = 1024 + +; Do not log repeated messages. Repeated errors must occur in same file on same +; line unless ignore_repeated_source is set true. +; http://php.net/ignore-repeated-errors +ignore_repeated_errors = Off + +; Ignore source of message when ignoring repeated messages. When this setting +; is On you will not log errors with repeated messages from different files or +; source lines. +; http://php.net/ignore-repeated-source +ignore_repeated_source = Off + +; If this parameter is set to Off, then memory leaks will not be shown (on +; stdout or in the log). This is only effective in a debug compile, and if +; error reporting includes E_WARNING in the allowed list +; http://php.net/report-memleaks +report_memleaks = On + +; This setting is on by default. +;report_zend_debug = 0 + +; Store the last error/warning message in $php_errormsg (boolean). Setting this value +; to On can assist in debugging and is appropriate for development servers. It should +; however be disabled on production servers. +; This directive is DEPRECATED. +; Default Value: Off +; Development Value: Off +; Production Value: Off +; http://php.net/track-errors +;track_errors = Off + +; Turn off normal error reporting and emit XML-RPC error XML +; http://php.net/xmlrpc-errors +;xmlrpc_errors = 0 + +; An XML-RPC faultCode +;xmlrpc_error_number = 0 + +; When PHP displays or logs an error, it has the capability of formatting the +; error message as HTML for easier reading. This directive controls whether +; the error message is formatted as HTML or not. +; Note: This directive is hardcoded to Off for the CLI SAPI +; http://php.net/html-errors +;html_errors = On + +; If html_errors is set to On *and* docref_root is not empty, then PHP +; produces clickable error messages that direct to a page describing the error +; or function causing the error in detail. +; You can download a copy of the PHP manual from http://php.net/docs +; and change docref_root to the base URL of your local copy including the +; leading '/'. You must also specify the file extension being used including +; the dot. PHP's default behavior is to leave these settings empty, in which +; case no links to documentation are generated. +; Note: Never use this feature for production boxes. +; http://php.net/docref-root +; Examples +;docref_root = "/phpmanual/" + +; http://php.net/docref-ext +;docref_ext = .html + +; String to output before an error message. PHP's default behavior is to leave +; this setting blank. +; http://php.net/error-prepend-string +; Example: +;error_prepend_string = "" + +; String to output after an error message. PHP's default behavior is to leave +; this setting blank. +; http://php.net/error-append-string +; Example: +;error_append_string = "" + +; Log errors to specified file. PHP's default behavior is to leave this value +; empty. +; http://php.net/error-log +; Example: +;error_log = php_errors.log +; Log errors to syslog (Event Log on Windows). +;error_log = syslog + +; The syslog ident is a string which is prepended to every message logged +; to syslog. Only used when error_log is set to syslog. +;syslog.ident = php + +; The syslog facility is used to specify what type of program is logging +; the message. Only used when error_log is set to syslog. +;syslog.facility = user + +; Set this to disable filtering control characters (the default). +; Some loggers only accept NVT-ASCII, others accept anything that's not +; control characters. If your logger accepts everything, then no filtering +; is needed at all. +; Allowed values are: +; ascii (all printable ASCII characters and NL) +; no-ctrl (all characters except control characters) +; all (all characters) +; raw (like "all", but messages are not split at newlines) +; http://php.net/syslog.filter +;syslog.filter = ascii + +;windows.show_crt_warning +; Default value: 0 +; Development value: 0 +; Production value: 0 + +;;;;;;;;;;;;;;;;; +; Data Handling ; +;;;;;;;;;;;;;;;;; + +; The separator used in PHP generated URLs to separate arguments. +; PHP's default setting is "&". +; http://php.net/arg-separator.output +; Example: +;arg_separator.output = "&" + +; List of separator(s) used by PHP to parse input URLs into variables. +; PHP's default setting is "&". +; NOTE: Every character in this directive is considered as separator! +; http://php.net/arg-separator.input +; Example: +;arg_separator.input = ";&" + +; This directive determines which super global arrays are registered when PHP +; starts up. G,P,C,E & S are abbreviations for the following respective super +; globals: GET, POST, COOKIE, ENV and SERVER. There is a performance penalty +; paid for the registration of these arrays and because ENV is not as commonly +; used as the others, ENV is not recommended on productions servers. You +; can still get access to the environment variables through getenv() should you +; need to. +; Default Value: "EGPCS" +; Development Value: "GPCS" +; Production Value: "GPCS"; +; http://php.net/variables-order +variables_order = "GPCS" + +; This directive determines which super global data (G,P & C) should be +; registered into the super global array REQUEST. If so, it also determines +; the order in which that data is registered. The values for this directive +; are specified in the same manner as the variables_order directive, +; EXCEPT one. Leaving this value empty will cause PHP to use the value set +; in the variables_order directive. It does not mean it will leave the super +; globals array REQUEST empty. +; Default Value: None +; Development Value: "GP" +; Production Value: "GP" +; http://php.net/request-order +request_order = "GP" + +; This directive determines whether PHP registers $argv & $argc each time it +; runs. $argv contains an array of all the arguments passed to PHP when a script +; is invoked. $argc contains an integer representing the number of arguments +; that were passed when the script was invoked. These arrays are extremely +; useful when running scripts from the command line. When this directive is +; enabled, registering these variables consumes CPU cycles and memory each time +; a script is executed. For performance reasons, this feature should be disabled +; on production servers. +; Note: This directive is hardcoded to On for the CLI SAPI +; Default Value: On +; Development Value: Off +; Production Value: Off +; http://php.net/register-argc-argv +register_argc_argv = Off + +; When enabled, the ENV, REQUEST and SERVER variables are created when they're +; first used (Just In Time) instead of when the script starts. If these +; variables are not used within a script, having this directive on will result +; in a performance gain. The PHP directive register_argc_argv must be disabled +; for this directive to have any effect. +; http://php.net/auto-globals-jit +auto_globals_jit = On + +; Whether PHP will read the POST data. +; This option is enabled by default. +; Most likely, you won't want to disable this option globally. It causes $_POST +; and $_FILES to always be empty; the only way you will be able to read the +; POST data will be through the php://input stream wrapper. This can be useful +; to proxy requests or to process the POST data in a memory efficient fashion. +; http://php.net/enable-post-data-reading +;enable_post_data_reading = Off + +; Maximum size of POST data that PHP will accept. +; Its value may be 0 to disable the limit. It is ignored if POST data reading +; is disabled through enable_post_data_reading. +; http://php.net/post-max-size +post_max_size = 8M + +; Automatically add files before PHP document. +; http://php.net/auto-prepend-file +auto_prepend_file = + +; Automatically add files after PHP document. +; http://php.net/auto-append-file +auto_append_file = + +; By default, PHP will output a media type using the Content-Type header. To +; disable this, simply set it to be empty. +; +; PHP's built-in default media type is set to text/html. +; http://php.net/default-mimetype +default_mimetype = "text/html" + +; PHP's default character set is set to UTF-8. +; http://php.net/default-charset +default_charset = "UTF-8" + +; PHP internal character encoding is set to empty. +; If empty, default_charset is used. +; http://php.net/internal-encoding +;internal_encoding = + +; PHP input character encoding is set to empty. +; If empty, default_charset is used. +; http://php.net/input-encoding +;input_encoding = + +; PHP output character encoding is set to empty. +; If empty, default_charset is used. +; See also output_buffer. +; http://php.net/output-encoding +;output_encoding = + +;;;;;;;;;;;;;;;;;;;;;;;;; +; Paths and Directories ; +;;;;;;;;;;;;;;;;;;;;;;;;; + +; UNIX: "/path1:/path2" +;include_path = ".:/php/includes" +; +; Windows: "\path1;\path2" +;include_path = ".;c:\php\includes" +; +; PHP's default setting for include_path is ".;/path/to/php/pear" +; http://php.net/include-path + +; The root of the PHP pages, used only if nonempty. +; if PHP was not compiled with FORCE_REDIRECT, you SHOULD set doc_root +; if you are running php as a CGI under any web server (other than IIS) +; see documentation for security issues. The alternate is to use the +; cgi.force_redirect configuration below +; http://php.net/doc-root +doc_root = + +; The directory under which PHP opens the script using /~username used only +; if nonempty. +; http://php.net/user-dir +user_dir = + +; Directory in which the loadable extensions (modules) reside. +; http://php.net/extension-dir +extension_dir = "/opt/php8-sp/lib/php/extensions/no-debug-non-zts-20220829/" +; On windows: +;extension_dir = "ext" + +; Directory where the temporary files should be placed. +; Defaults to the system default (see sys_get_temp_dir) +;sys_temp_dir = "/tmp" + +; Whether or not to enable the dl() function. The dl() function does NOT work +; properly in multithreaded servers, such as IIS or Zeus, and is automatically +; disabled on them. +; http://php.net/enable-dl +enable_dl = Off + +; cgi.force_redirect is necessary to provide security running PHP as a CGI under +; most web servers. Left undefined, PHP turns this on by default. You can +; turn it off here AT YOUR OWN RISK +; **You CAN safely turn this off for IIS, in fact, you MUST.** +; http://php.net/cgi.force-redirect +;cgi.force_redirect = 1 + +; if cgi.nph is enabled it will force cgi to always sent Status: 200 with +; every request. PHP's default behavior is to disable this feature. +;cgi.nph = 1 + +; if cgi.force_redirect is turned on, and you are not running under Apache or Netscape +; (iPlanet) web servers, you MAY need to set an environment variable name that PHP +; will look for to know it is OK to continue execution. Setting this variable MAY +; cause security issues, KNOW WHAT YOU ARE DOING FIRST. +; http://php.net/cgi.redirect-status-env +;cgi.redirect_status_env = + +; cgi.fix_pathinfo provides *real* PATH_INFO/PATH_TRANSLATED support for CGI. PHP's +; previous behaviour was to set PATH_TRANSLATED to SCRIPT_FILENAME, and to not grok +; what PATH_INFO is. For more information on PATH_INFO, see the cgi specs. Setting +; this to 1 will cause PHP CGI to fix its paths to conform to the spec. A setting +; of zero causes PHP to behave as before. Default is 1. You should fix your scripts +; to use SCRIPT_FILENAME rather than PATH_TRANSLATED. +; http://php.net/cgi.fix-pathinfo +;cgi.fix_pathinfo=1 + +; if cgi.discard_path is enabled, the PHP CGI binary can safely be placed outside +; of the web tree and people will not be able to circumvent .htaccess security. +;cgi.discard_path=1 + +; FastCGI under IIS supports the ability to impersonate +; security tokens of the calling client. This allows IIS to define the +; security context that the request runs under. mod_fastcgi under Apache +; does not currently support this feature (03/17/2002) +; Set to 1 if running under IIS. Default is zero. +; http://php.net/fastcgi.impersonate +;fastcgi.impersonate = 1 + +; Disable logging through FastCGI connection. PHP's default behavior is to enable +; this feature. +;fastcgi.logging = 0 + +; cgi.rfc2616_headers configuration option tells PHP what type of headers to +; use when sending HTTP response code. If set to 0, PHP sends Status: header that +; is supported by Apache. When this option is set to 1, PHP will send +; RFC2616 compliant header. +; Default is zero. +; http://php.net/cgi.rfc2616-headers +;cgi.rfc2616_headers = 0 + +; cgi.check_shebang_line controls whether CGI PHP checks for line starting with #! +; (shebang) at the top of the running script. This line might be needed if the +; script support running both as stand-alone script and via PHP CGI<. PHP in CGI +; mode skips this line and ignores its content if this directive is turned on. +; http://php.net/cgi.check-shebang-line +;cgi.check_shebang_line=1 + +;;;;;;;;;;;;;;;; +; File Uploads ; +;;;;;;;;;;;;;;;; + +; Whether to allow HTTP file uploads. +; http://php.net/file-uploads +file_uploads = On + +; Temporary directory for HTTP uploaded files (will use system default if not +; specified). +; http://php.net/upload-tmp-dir +;upload_tmp_dir = + +; Maximum allowed size for uploaded files. +; http://php.net/upload-max-filesize +upload_max_filesize = 2M + +; Maximum number of files that can be uploaded via a single request +max_file_uploads = 20 + +;;;;;;;;;;;;;;;;;; +; Fopen wrappers ; +;;;;;;;;;;;;;;;;;; + +; Whether to allow the treatment of URLs (like http:// or ftp://) as files. +; http://php.net/allow-url-fopen +allow_url_fopen = On + +; Whether to allow include/require to open URLs (like http:// or ftp://) as files. +; http://php.net/allow-url-include +allow_url_include = Off + +; Define the anonymous ftp password (your email address). PHP's default setting +; for this is empty. +; http://php.net/from +;from="john@doe.com" + +; Define the User-Agent string. PHP's default setting for this is empty. +; http://php.net/user-agent +;user_agent="PHP" + +; Default timeout for socket based streams (seconds) +; http://php.net/default-socket-timeout +default_socket_timeout = 60 + +; If your scripts have to deal with files from Macintosh systems, +; or you are running on a Mac and need to deal with files from +; unix or win32 systems, setting this flag will cause PHP to +; automatically detect the EOL character in those files so that +; fgets() and file() will work regardless of the source of the file. +; http://php.net/auto-detect-line-endings +;auto_detect_line_endings = Off + +;;;;;;;;;;;;;;;;;;;;;; +; Dynamic Extensions ; +;;;;;;;;;;;;;;;;;;;;;; + +; If you wish to have an extension loaded automatically, use the following +; syntax: +; +; extension=modulename +; +; For example: +; +; extension=mysqli +; +; When the extension library to load is not located in the default extension +; directory, You may specify an absolute path to the library file: +; +; extension=/path/to/extension/mysqli.so +; +; Note : The syntax used in previous PHP versions ('extension=.so' and +; 'extension='php_.dll') is supported for legacy reasons and may be +; deprecated in a future PHP major version. So, when it is possible, please +; move to the new ('extension=) syntax. +; +; Notes for Windows environments : +; +; - Many DLL files are located in the extensions/ (PHP 4) or ext/ (PHP 5+) +; extension folders as well as the separate PECL DLL download (PHP 5+). +; Be sure to appropriately set the extension_dir directive. +; +zend_extension=opcache +;zend_extension=xdebug +;extension=bz2 +;extension=curl +;extension=ffi +;extension=ftp +;extension=fileinfo +;extension=gd2 +;extension=gettext +;extension=gmp +;extension=intl +;extension=imap +;extension=ldap +;extension=mbstring +;extension=exif ; Must be after mbstring as it depends on it +;extension=mysqli +;extension=oci8_12c ; Use with Oracle Database 12c Instant Client +;extension=odbc +;extension=openssl +;extension=pdo_firebird +;extension=pdo_mysql +;extension=pdo_oci +;extension=pdo_odbc +;extension=pdo_pgsql +;extension=pdo_sqlite +;extension=pgsql +;extension=shmop + +; The MIBS data available in the PHP distribution must be installed. +; See http://www.php.net/manual/en/snmp.installation.php +;extension=snmp + +;extension=soap +;extension=sockets +;extension=sodium +;extension=sqlite3 +;extension=tidy +;extension=xmlrpc +;extension=xsl + +;;;;;;;;;;;;;;;;;;; +; Module Settings ; +;;;;;;;;;;;;;;;;;;; + +[CLI Server] +; Whether the CLI web server uses ANSI color coding in its terminal output. +cli_server.color = On + +[Date] +; Defines the default timezone used by the date functions +; http://php.net/date.timezone +;date.timezone = + +; http://php.net/date.default-latitude +;date.default_latitude = 31.7667 + +; http://php.net/date.default-longitude +;date.default_longitude = 35.2333 + +; http://php.net/date.sunrise-zenith +;date.sunrise_zenith = 90.583333 + +; http://php.net/date.sunset-zenith +;date.sunset_zenith = 90.583333 + +[filter] +; http://php.net/filter.default +;filter.default = unsafe_raw + +; http://php.net/filter.default-flags +;filter.default_flags = + +[iconv] +; Use of this INI entry is deprecated, use global input_encoding instead. +; If empty, default_charset or input_encoding or iconv.input_encoding is used. +; The precedence is: default_charset < input_encoding < iconv.input_encoding +;iconv.input_encoding = + +; Use of this INI entry is deprecated, use global internal_encoding instead. +; If empty, default_charset or internal_encoding or iconv.internal_encoding is used. +; The precedence is: default_charset < internal_encoding < iconv.internal_encoding +;iconv.internal_encoding = + +; Use of this INI entry is deprecated, use global output_encoding instead. +; If empty, default_charset or output_encoding or iconv.output_encoding is used. +; The precedence is: default_charset < output_encoding < iconv.output_encoding +; To use an output encoding conversion, iconv's output handler must be set +; otherwise output encoding conversion cannot be performed. +;iconv.output_encoding = + +[imap] +; rsh/ssh logins are disabled by default. Use this INI entry if you want to +; enable them. Note that the IMAP library does not filter mailbox names before +; passing them to rsh/ssh command, thus passing untrusted data to this function +; with rsh/ssh enabled is insecure. +;imap.enable_insecure_rsh=0 + +[intl] +;intl.default_locale = +; This directive allows you to produce PHP errors when some error +; happens within intl functions. The value is the level of the error produced. +; Default is 0, which does not produce any errors. +;intl.error_level = E_WARNING +;intl.use_exceptions = 0 + +[sqlite3] +; Directory pointing to SQLite3 extensions +; http://php.net/sqlite3.extension-dir +;sqlite3.extension_dir = + +; SQLite defensive mode flag (only available from SQLite 3.26+) +; When the defensive flag is enabled, language features that allow ordinary +; SQL to deliberately corrupt the database file are disabled. This forbids +; writing directly to the schema, shadow tables (eg. FTS data tables), or +; the sqlite_dbpage virtual table. +; https://www.sqlite.org/c3ref/c_dbconfig_defensive.html +; (for older SQLite versions, this flag has no use) +;sqlite3.defensive = 1 + +[Pcre] +; PCRE library backtracking limit. +; http://php.net/pcre.backtrack-limit +;pcre.backtrack_limit=100000 + +; PCRE library recursion limit. +; Please note that if you set this value to a high number you may consume all +; the available process stack and eventually crash PHP (due to reaching the +; stack size limit imposed by the Operating System). +; http://php.net/pcre.recursion-limit +;pcre.recursion_limit=100000 + +; Enables or disables JIT compilation of patterns. This requires the PCRE +; library to be compiled with JIT support. +;pcre.jit=1 + +[Pdo] +; Whether to pool ODBC connections. Can be one of "strict", "relaxed" or "off" +; http://php.net/pdo-odbc.connection-pooling +;pdo_odbc.connection_pooling=strict + +;pdo_odbc.db2_instance_name + +[Pdo_mysql] +; Default socket name for local MySQL connects. If empty, uses the built-in +; MySQL defaults. +pdo_mysql.default_socket= + +[Phar] +; http://php.net/phar.readonly +;phar.readonly = On + +; http://php.net/phar.require-hash +;phar.require_hash = On + +;phar.cache_list = + +[mail function] +; For Win32 only. +; http://php.net/smtp +SMTP = localhost +; http://php.net/smtp-port +smtp_port = 25 + +; For Win32 only. +; http://php.net/sendmail-from +;sendmail_from = me@example.com + +; For Unix only. You may supply arguments as well (default: "sendmail -t -i"). +; http://php.net/sendmail-path +;sendmail_path = + +; Force the addition of the specified parameters to be passed as extra parameters +; to the sendmail binary. These parameters will always replace the value of +; the 5th parameter to mail(). +;mail.force_extra_parameters = + +; Add X-PHP-Originating-Script: that will include uid of the script followed by the filename +mail.add_x_header = Off + +; The path to a log file that will log all mail() calls. Log entries include +; the full path of the script, line number, To address and headers. +;mail.log = +; Log mail to syslog (Event Log on Windows). +;mail.log = syslog + +[ODBC] +; http://php.net/odbc.default-db +;odbc.default_db = Not yet implemented + +; http://php.net/odbc.default-user +;odbc.default_user = Not yet implemented + +; http://php.net/odbc.default-pw +;odbc.default_pw = Not yet implemented + +; Controls the ODBC cursor model. +; Default: SQL_CURSOR_STATIC (default). +;odbc.default_cursortype + +; Allow or prevent persistent links. +; http://php.net/odbc.allow-persistent +odbc.allow_persistent = On + +; Check that a connection is still valid before reuse. +; http://php.net/odbc.check-persistent +odbc.check_persistent = On + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/odbc.max-persistent +odbc.max_persistent = -1 + +; Maximum number of links (persistent + non-persistent). -1 means no limit. +; http://php.net/odbc.max-links +odbc.max_links = -1 + +; Handling of LONG fields. Returns number of bytes to variables. 0 means +; passthru. +; http://php.net/odbc.defaultlrl +odbc.defaultlrl = 4096 + +; Handling of binary data. 0 means passthru, 1 return as is, 2 convert to char. +; See the documentation on odbc_binmode and odbc_longreadlen for an explanation +; of odbc.defaultlrl and odbc.defaultbinmode +; http://php.net/odbc.defaultbinmode +odbc.defaultbinmode = 1 + +[MySQLi] + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/mysqli.max-persistent +mysqli.max_persistent = -1 + +; Allow accessing, from PHP's perspective, local files with LOAD DATA statements +; http://php.net/mysqli.allow_local_infile +;mysqli.allow_local_infile = On + +; Allow or prevent persistent links. +; http://php.net/mysqli.allow-persistent +mysqli.allow_persistent = On + +; Maximum number of links. -1 means no limit. +; http://php.net/mysqli.max-links +mysqli.max_links = -1 + +; Default port number for mysqli_connect(). If unset, mysqli_connect() will use +; the $MYSQL_TCP_PORT or the mysql-tcp entry in /etc/services or the +; compile-time value defined MYSQL_PORT (in that order). Win32 will only look +; at MYSQL_PORT. +; http://php.net/mysqli.default-port +mysqli.default_port = 3306 + +; Default socket name for local MySQL connects. If empty, uses the built-in +; MySQL defaults. +; http://php.net/mysqli.default-socket +mysqli.default_socket = + +; Default host for mysqli_connect() (doesn't apply in safe mode). +; http://php.net/mysqli.default-host +mysqli.default_host = + +; Default user for mysqli_connect() (doesn't apply in safe mode). +; http://php.net/mysqli.default-user +mysqli.default_user = + +; Default password for mysqli_connect() (doesn't apply in safe mode). +; Note that this is generally a *bad* idea to store passwords in this file. +; *Any* user with PHP access can run 'echo get_cfg_var("mysqli.default_pw") +; and reveal this password! And of course, any users with read access to this +; file will be able to reveal the password as well. +; http://php.net/mysqli.default-pw +mysqli.default_pw = + +; Allow or prevent reconnect +mysqli.reconnect = Off + +[mysqlnd] +; Enable / Disable collection of general statistics by mysqlnd which can be +; used to tune and monitor MySQL operations. +mysqlnd.collect_statistics = On + +; Enable / Disable collection of memory usage statistics by mysqlnd which can be +; used to tune and monitor MySQL operations. +mysqlnd.collect_memory_statistics = On + +; Records communication from all extensions using mysqlnd to the specified log +; file. +; http://php.net/mysqlnd.debug +;mysqlnd.debug = + +; Defines which queries will be logged. +;mysqlnd.log_mask = 0 + +; Default size of the mysqlnd memory pool, which is used by result sets. +;mysqlnd.mempool_default_size = 16000 + +; Size of a pre-allocated buffer used when sending commands to MySQL in bytes. +;mysqlnd.net_cmd_buffer_size = 2048 + +; Size of a pre-allocated buffer used for reading data sent by the server in +; bytes. +;mysqlnd.net_read_buffer_size = 32768 + +; Timeout for network requests in seconds. +;mysqlnd.net_read_timeout = 31536000 + +; SHA-256 Authentication Plugin related. File with the MySQL server public RSA +; key. +;mysqlnd.sha256_server_public_key = + +[OCI8] + +; Connection: Enables privileged connections using external +; credentials (OCI_SYSOPER, OCI_SYSDBA) +; http://php.net/oci8.privileged-connect +;oci8.privileged_connect = Off + +; Connection: The maximum number of persistent OCI8 connections per +; process. Using -1 means no limit. +; http://php.net/oci8.max-persistent +;oci8.max_persistent = -1 + +; Connection: The maximum number of seconds a process is allowed to +; maintain an idle persistent connection. Using -1 means idle +; persistent connections will be maintained forever. +; http://php.net/oci8.persistent-timeout +;oci8.persistent_timeout = -1 + +; Connection: The number of seconds that must pass before issuing a +; ping during oci_pconnect() to check the connection validity. When +; set to 0, each oci_pconnect() will cause a ping. Using -1 disables +; pings completely. +; http://php.net/oci8.ping-interval +;oci8.ping_interval = 60 + +; Connection: Set this to a user chosen connection class to be used +; for all pooled server requests with Oracle 11g Database Resident +; Connection Pooling (DRCP). To use DRCP, this value should be set to +; the same string for all web servers running the same application, +; the database pool must be configured, and the connection string must +; specify to use a pooled server. +;oci8.connection_class = + +; High Availability: Using On lets PHP receive Fast Application +; Notification (FAN) events generated when a database node fails. The +; database must also be configured to post FAN events. +;oci8.events = Off + +; Tuning: This option enables statement caching, and specifies how +; many statements to cache. Using 0 disables statement caching. +; http://php.net/oci8.statement-cache-size +;oci8.statement_cache_size = 20 + +; Tuning: Enables statement prefetching and sets the default number of +; rows that will be fetched automatically after statement execution. +; http://php.net/oci8.default-prefetch +;oci8.default_prefetch = 100 + +; Compatibility. Using On means oci_close() will not close +; oci_connect() and oci_new_connect() connections. +; http://php.net/oci8.old-oci-close-semantics +;oci8.old_oci_close_semantics = Off + +[PostgreSQL] +; Allow or prevent persistent links. +; http://php.net/pgsql.allow-persistent +pgsql.allow_persistent = On + +; Detect broken persistent links always with pg_pconnect(). +; Auto reset feature requires a little overheads. +; http://php.net/pgsql.auto-reset-persistent +pgsql.auto_reset_persistent = Off + +; Maximum number of persistent links. -1 means no limit. +; http://php.net/pgsql.max-persistent +pgsql.max_persistent = -1 + +; Maximum number of links (persistent+non persistent). -1 means no limit. +; http://php.net/pgsql.max-links +pgsql.max_links = -1 + +; Ignore PostgreSQL backends Notice message or not. +; Notice message logging require a little overheads. +; http://php.net/pgsql.ignore-notice +pgsql.ignore_notice = 0 + +; Log PostgreSQL backends Notice message or not. +; Unless pgsql.ignore_notice=0, module cannot log notice message. +; http://php.net/pgsql.log-notice +pgsql.log_notice = 0 + +[bcmath] +; Number of decimal digits for all bcmath functions. +; http://php.net/bcmath.scale +bcmath.scale = 0 + +[browscap] +; http://php.net/browscap +;browscap = extra/browscap.ini + +[Session] +; Handler used to store/retrieve data. +; http://php.net/session.save-handler +session.save_handler = files + +; Argument passed to save_handler. In the case of files, this is the path +; where data files are stored. Note: Windows users have to change this +; variable in order to use PHP's session functions. +; +; The path can be defined as: +; +; session.save_path = "N;/path" +; +; where N is an integer. Instead of storing all the session files in +; /path, what this will do is use subdirectories N-levels deep, and +; store the session data in those directories. This is useful if +; your OS has problems with many files in one directory, and is +; a more efficient layout for servers that handle many sessions. +; +; NOTE 1: PHP will not create this directory structure automatically. +; You can use the script in the ext/session dir for that purpose. +; NOTE 2: See the section on garbage collection below if you choose to +; use subdirectories for session storage +; +; The file storage module creates files using mode 600 by default. +; You can change that by using +; +; session.save_path = "N;MODE;/path" +; +; where MODE is the octal representation of the mode. Note that this +; does not overwrite the process's umask. +; http://php.net/session.save-path +;session.save_path = "/tmp" + +; Whether to use strict session mode. +; Strict session mode does not accept an uninitialized session ID, and +; regenerates the session ID if the browser sends an uninitialized session ID. +; Strict mode protects applications from session fixation via a session adoption +; vulnerability. It is disabled by default for maximum compatibility, but +; enabling it is encouraged. +; https://wiki.php.net/rfc/strict_sessions +session.use_strict_mode = 0 + +; Whether to use cookies. +; http://php.net/session.use-cookies +session.use_cookies = 1 + +; http://php.net/session.cookie-secure +;session.cookie_secure = + +; This option forces PHP to fetch and use a cookie for storing and maintaining +; the session id. We encourage this operation as it's very helpful in combating +; session hijacking when not specifying and managing your own session id. It is +; not the be-all and end-all of session hijacking defense, but it's a good start. +; http://php.net/session.use-only-cookies +session.use_only_cookies = 1 + +; Name of the session (used as cookie name). +; http://php.net/session.name +session.name = PHPSESSID + +; Initialize session on request startup. +; http://php.net/session.auto-start +session.auto_start = 0 + +; Lifetime in seconds of cookie or, if 0, until browser is restarted. +; http://php.net/session.cookie-lifetime +session.cookie_lifetime = 0 + +; The path for which the cookie is valid. +; http://php.net/session.cookie-path +session.cookie_path = / + +; The domain for which the cookie is valid. +; http://php.net/session.cookie-domain +session.cookie_domain = + +; Whether or not to add the httpOnly flag to the cookie, which makes it +; inaccessible to browser scripting languages such as JavaScript. +; http://php.net/session.cookie-httponly +session.cookie_httponly = + +; Add SameSite attribute to cookie to help mitigate Cross-Site Request Forgery (CSRF/XSRF) +; Current valid values are "Strict", "Lax" or "None". When using "None", +; make sure to include the quotes, as `none` is interpreted like `false` in ini files. +; https://tools.ietf.org/html/draft-west-first-party-cookies-07 +session.cookie_samesite = + +; Handler used to serialize data. php is the standard serializer of PHP. +; http://php.net/session.serialize-handler +session.serialize_handler = php + +; Defines the probability that the 'garbage collection' process is started on every +; session initialization. The probability is calculated by using gc_probability/gc_divisor, +; e.g. 1/100 means there is a 1% chance that the GC process starts on each request. +; Default Value: 1 +; Development Value: 1 +; Production Value: 1 +; http://php.net/session.gc-probability +session.gc_probability = 1 + +; Defines the probability that the 'garbage collection' process is started on every +; session initialization. The probability is calculated by using gc_probability/gc_divisor, +; e.g. 1/100 means there is a 1% chance that the GC process starts on each request. +; For high volume production servers, using a value of 1000 is a more efficient approach. +; Default Value: 100 +; Development Value: 1000 +; Production Value: 1000 +; http://php.net/session.gc-divisor +session.gc_divisor = 1000 + +; After this number of seconds, stored data will be seen as 'garbage' and +; cleaned up by the garbage collection process. +; http://php.net/session.gc-maxlifetime +session.gc_maxlifetime = 1440 + +; NOTE: If you are using the subdirectory option for storing session files +; (see session.save_path above), then garbage collection does *not* +; happen automatically. You will need to do your own garbage +; collection through a shell script, cron entry, or some other method. +; For example, the following script is the equivalent of setting +; session.gc_maxlifetime to 1440 (1440 seconds = 24 minutes): +; find /path/to/sessions -cmin +24 -type f | xargs rm + +; Check HTTP Referer to invalidate externally stored URLs containing ids. +; HTTP_REFERER has to contain this substring for the session to be +; considered as valid. +; http://php.net/session.referer-check +session.referer_check = + +; Set to {nocache,private,public,} to determine HTTP caching aspects +; or leave this empty to avoid sending anti-caching headers. +; http://php.net/session.cache-limiter +session.cache_limiter = nocache + +; Document expires after n minutes. +; http://php.net/session.cache-expire +session.cache_expire = 180 + +; trans sid support is disabled by default. +; Use of trans sid may risk your users' security. +; Use this option with caution. +; - User may send URL contains active session ID +; to other person via. email/irc/etc. +; - URL that contains active session ID may be stored +; in publicly accessible computer. +; - User may access your site with the same session ID +; always using URL stored in browser's history or bookmarks. +; http://php.net/session.use-trans-sid +session.use_trans_sid = 0 + +; Set session ID character length. This value could be between 22 to 256. +; Shorter length than default is supported only for compatibility reason. +; Users should use 32 or more chars. +; http://php.net/session.sid-length +; Default Value: 32 +; Development Value: 26 +; Production Value: 26 +session.sid_length = 26 + +; The URL rewriter will look for URLs in a defined set of HTML tags. +; is special; if you include them here, the rewriter will +; add a hidden field with the info which is otherwise appended +; to URLs. tag's action attribute URL will not be modified +; unless it is specified. +; Note that all valid entries require a "=", even if no value follows. +; Default Value: "a=href,area=href,frame=src,form=" +; Development Value: "a=href,area=href,frame=src,form=" +; Production Value: "a=href,area=href,frame=src,form=" +; http://php.net/url-rewriter.tags +session.trans_sid_tags = "a=href,area=href,frame=src,form=" + +; URL rewriter does not rewrite absolute URLs by default. +; To enable rewrites for absolute paths, target hosts must be specified +; at RUNTIME. i.e. use ini_set() +; tags is special. PHP will check action attribute's URL regardless +; of session.trans_sid_tags setting. +; If no host is defined, HTTP_HOST will be used for allowed host. +; Example value: php.net,www.php.net,wiki.php.net +; Use "," for multiple hosts. No spaces are allowed. +; Default Value: "" +; Development Value: "" +; Production Value: "" +;session.trans_sid_hosts="" + +; Define how many bits are stored in each character when converting +; the binary hash data to something readable. +; Possible values: +; 4 (4 bits: 0-9, a-f) +; 5 (5 bits: 0-9, a-v) +; 6 (6 bits: 0-9, a-z, A-Z, "-", ",") +; Default Value: 4 +; Development Value: 5 +; Production Value: 5 +; http://php.net/session.hash-bits-per-character +session.sid_bits_per_character = 5 + +; Enable upload progress tracking in $_SESSION +; Default Value: On +; Development Value: On +; Production Value: On +; http://php.net/session.upload-progress.enabled +;session.upload_progress.enabled = On + +; Cleanup the progress information as soon as all POST data has been read +; (i.e. upload completed). +; Default Value: On +; Development Value: On +; Production Value: On +; http://php.net/session.upload-progress.cleanup +;session.upload_progress.cleanup = On + +; A prefix used for the upload progress key in $_SESSION +; Default Value: "upload_progress_" +; Development Value: "upload_progress_" +; Production Value: "upload_progress_" +; http://php.net/session.upload-progress.prefix +;session.upload_progress.prefix = "upload_progress_" + +; The index name (concatenated with the prefix) in $_SESSION +; containing the upload progress information +; Default Value: "PHP_SESSION_UPLOAD_PROGRESS" +; Development Value: "PHP_SESSION_UPLOAD_PROGRESS" +; Production Value: "PHP_SESSION_UPLOAD_PROGRESS" +; http://php.net/session.upload-progress.name +;session.upload_progress.name = "PHP_SESSION_UPLOAD_PROGRESS" + +; How frequently the upload progress should be updated. +; Given either in percentages (per-file), or in bytes +; Default Value: "1%" +; Development Value: "1%" +; Production Value: "1%" +; http://php.net/session.upload-progress.freq +;session.upload_progress.freq = "1%" + +; The minimum delay between updates, in seconds +; Default Value: 1 +; Development Value: 1 +; Production Value: 1 +; http://php.net/session.upload-progress.min-freq +;session.upload_progress.min_freq = "1" + +; Only write session data when session data is changed. Enabled by default. +; http://php.net/session.lazy-write +;session.lazy_write = On + +[Assertion] +; Switch whether to compile assertions at all (to have no overhead at run-time) +; -1: Do not compile at all +; 0: Jump over assertion at run-time +; 1: Execute assertions +; Changing from or to a negative value is only possible in php.ini! (For turning assertions on and off at run-time, see assert.active, when zend.assertions = 1) +; Default Value: 1 +; Development Value: 1 +; Production Value: -1 +; http://php.net/zend.assertions +zend.assertions = 1 + +; Assert(expr); active by default. +; http://php.net/assert.active +;assert.active = On + +; Throw an AssertionError on failed assertions +; http://php.net/assert.exception +;assert.exception = On + +; Issue a PHP warning for each failed assertion. (Overridden by assert.exception if active) +; http://php.net/assert.warning +;assert.warning = On + +; Don't bail out by default. +; http://php.net/assert.bail +;assert.bail = Off + +; User-function to be called if an assertion fails. +; http://php.net/assert.callback +;assert.callback = 0 + +; Eval the expression with current error_reporting(). Set to true if you want +; error_reporting(0) around the eval(). +; http://php.net/assert.quiet-eval +;assert.quiet_eval = 0 + +[COM] +; path to a file containing GUIDs, IIDs or filenames of files with TypeLibs +; http://php.net/com.typelib-file +;com.typelib_file = + +; allow Distributed-COM calls +; http://php.net/com.allow-dcom +;com.allow_dcom = true + +; autoregister constants of a component's typlib on com_load() +; http://php.net/com.autoregister-typelib +;com.autoregister_typelib = true + +; register constants casesensitive +; http://php.net/com.autoregister-casesensitive +;com.autoregister_casesensitive = false + +; show warnings on duplicate constant registrations +; http://php.net/com.autoregister-verbose +;com.autoregister_verbose = true + +; The default character set code-page to use when passing strings to and from COM objects. +; Default: system ANSI code page +;com.code_page= + +[mbstring] +; language for internal character representation. +; This affects mb_send_mail() and mbstring.detect_order. +; http://php.net/mbstring.language +;mbstring.language = Japanese + +; Use of this INI entry is deprecated, use global internal_encoding instead. +; internal/script encoding. +; Some encoding cannot work as internal encoding. (e.g. SJIS, BIG5, ISO-2022-*) +; If empty, default_charset or internal_encoding or iconv.internal_encoding is used. +; The precedence is: default_charset < internal_encoding < iconv.internal_encoding +;mbstring.internal_encoding = + +; Use of this INI entry is deprecated, use global input_encoding instead. +; http input encoding. +; mbstring.encoding_translation = On is needed to use this setting. +; If empty, default_charset or input_encoding or mbstring.input is used. +; The precedence is: default_charset < input_encoding < mbstring.http_input +; http://php.net/mbstring.http-input +;mbstring.http_input = + +; Use of this INI entry is deprecated, use global output_encoding instead. +; http output encoding. +; mb_output_handler must be registered as output buffer to function. +; If empty, default_charset or output_encoding or mbstring.http_output is used. +; The precedence is: default_charset < output_encoding < mbstring.http_output +; To use an output encoding conversion, mbstring's output handler must be set +; otherwise output encoding conversion cannot be performed. +; http://php.net/mbstring.http-output +;mbstring.http_output = + +; enable automatic encoding translation according to +; mbstring.internal_encoding setting. Input chars are +; converted to internal encoding by setting this to On. +; Note: Do _not_ use automatic encoding translation for +; portable libs/applications. +; http://php.net/mbstring.encoding-translation +;mbstring.encoding_translation = Off + +; automatic encoding detection order. +; "auto" detect order is changed according to mbstring.language +; http://php.net/mbstring.detect-order +;mbstring.detect_order = auto + +; substitute_character used when character cannot be converted +; one from another +; http://php.net/mbstring.substitute-character +;mbstring.substitute_character = none + +; overload(replace) single byte functions by mbstring functions. +; mail(), ereg(), etc are overloaded by mb_send_mail(), mb_ereg(), +; etc. Possible values are 0,1,2,4 or combination of them. +; For example, 7 for overload everything. +; 0: No overload +; 1: Overload mail() function +; 2: Overload str*() functions +; 4: Overload ereg*() functions +; http://php.net/mbstring.func-overload +;mbstring.func_overload = 0 + +; enable strict encoding detection. +; Default: Off +;mbstring.strict_detection = On + +; This directive specifies the regex pattern of content types for which mb_output_handler() +; is activated. +; Default: mbstring.http_output_conv_mimetype=^(text/|application/xhtml\+xml) +;mbstring.http_output_conv_mimetype= + +; This directive specifies maximum stack depth for mbstring regular expressions. It is similar +; to the pcre.recursion_limit for PCRE. +; Default: 100000 +;mbstring.regex_stack_limit=100000 + +; This directive specifies maximum retry count for mbstring regular expressions. It is similar +; to the pcre.backtrack_limit for PCRE. +; Default: 1000000 +;mbstring.regex_retry_limit=1000000 + +[gd] +; Tell the jpeg decode to ignore warnings and try to create +; a gd image. The warning will then be displayed as notices +; disabled by default +; http://php.net/gd.jpeg-ignore-warning +;gd.jpeg_ignore_warning = 1 + +[exif] +; Exif UNICODE user comments are handled as UCS-2BE/UCS-2LE and JIS as JIS. +; With mbstring support this will automatically be converted into the encoding +; given by corresponding encode setting. When empty mbstring.internal_encoding +; is used. For the decode settings you can distinguish between motorola and +; intel byte order. A decode setting cannot be empty. +; http://php.net/exif.encode-unicode +;exif.encode_unicode = ISO-8859-15 + +; http://php.net/exif.decode-unicode-motorola +;exif.decode_unicode_motorola = UCS-2BE + +; http://php.net/exif.decode-unicode-intel +;exif.decode_unicode_intel = UCS-2LE + +; http://php.net/exif.encode-jis +;exif.encode_jis = + +; http://php.net/exif.decode-jis-motorola +;exif.decode_jis_motorola = JIS + +; http://php.net/exif.decode-jis-intel +;exif.decode_jis_intel = JIS + +[Tidy] +; The path to a default tidy configuration file to use when using tidy +; http://php.net/tidy.default-config +;tidy.default_config = /usr/local/lib/php/default.tcfg + +; Should tidy clean and repair output automatically? +; WARNING: Do not use this option if you are generating non-html content +; such as dynamic images +; http://php.net/tidy.clean-output +tidy.clean_output = Off + +[soap] +; Enables or disables WSDL caching feature. +; http://php.net/soap.wsdl-cache-enabled +soap.wsdl_cache_enabled=1 + +; Sets the directory name where SOAP extension will put cache files. +; http://php.net/soap.wsdl-cache-dir +soap.wsdl_cache_dir="/tmp" + +; (time to live) Sets the number of second while cached file will be used +; instead of original one. +; http://php.net/soap.wsdl-cache-ttl +soap.wsdl_cache_ttl=86400 + +; Sets the size of the cache limit. (Max. number of WSDL files to cache) +soap.wsdl_cache_limit = 5 + +[sysvshm] +; A default size of the shared memory segment +;sysvshm.init_mem = 10000 + +[ldap] +; Sets the maximum number of open links or -1 for unlimited. +ldap.max_links = -1 + +[dba] +;dba.default_handler= + +[opcache] +; Determines if Zend OPCache is enabled +;opcache.enable=1 + +; Determines if Zend OPCache is enabled for the CLI version of PHP +;opcache.enable_cli=0 + +; The OPcache shared memory storage size. +;opcache.memory_consumption=128 + +; The amount of memory for interned strings in Mbytes. +;opcache.interned_strings_buffer=8 + +; The maximum number of keys (scripts) in the OPcache hash table. +; Only numbers between 200 and 1000000 are allowed. +;opcache.max_accelerated_files=10000 + +; The maximum percentage of "wasted" memory until a restart is scheduled. +;opcache.max_wasted_percentage=5 + +; When this directive is enabled, the OPcache appends the current working +; directory to the script key, thus eliminating possible collisions between +; files with the same name (basename). Disabling the directive improves +; performance, but may break existing applications. +;opcache.use_cwd=1 + +; When disabled, you must reset the OPcache manually or restart the +; webserver for changes to the filesystem to take effect. +;opcache.validate_timestamps=1 + +; How often (in seconds) to check file timestamps for changes to the shared +; memory storage allocation. ("1" means validate once per second, but only +; once per request. "0" means always validate) +;opcache.revalidate_freq=2 + +; Enables or disables file search in include_path optimization +;opcache.revalidate_path=0 + +; If disabled, all PHPDoc comments are dropped from the code to reduce the +; size of the optimized code. +;opcache.save_comments=1 + +; Allow file existence override (file_exists, etc.) performance feature. +;opcache.enable_file_override=0 + +; A bitmask, where each bit enables or disables the appropriate OPcache +; passes +;opcache.optimization_level=0x7FFFBFFF + +;opcache.dups_fix=0 + +; The location of the OPcache blacklist file (wildcards allowed). +; Each OPcache blacklist file is a text file that holds the names of files +; that should not be accelerated. The file format is to add each filename +; to a new line. The filename may be a full path or just a file prefix +; (i.e., /var/www/x blacklists all the files and directories in /var/www +; that start with 'x'). Line starting with a ; are ignored (comments). +;opcache.blacklist_filename= + +; Allows exclusion of large files from being cached. By default all files +; are cached. +;opcache.max_file_size=0 + +; Check the cache checksum each N requests. +; The default value of "0" means that the checks are disabled. +;opcache.consistency_checks=0 + +; How long to wait (in seconds) for a scheduled restart to begin if the cache +; is not being accessed. +;opcache.force_restart_timeout=180 + +; OPcache error_log file name. Empty string assumes "stderr". +;opcache.error_log= + +; All OPcache errors go to the Web server log. +; By default, only fatal errors (level 0) or errors (level 1) are logged. +; You can also enable warnings (level 2), info messages (level 3) or +; debug messages (level 4). +;opcache.log_verbosity_level=1 + +; Preferred Shared Memory back-end. Leave empty and let the system decide. +;opcache.preferred_memory_model= + +; Protect the shared memory from unexpected writing during script execution. +; Useful for internal debugging only. +;opcache.protect_memory=0 + +; Allows calling OPcache API functions only from PHP scripts which path is +; started from specified string. The default "" means no restriction +;opcache.restrict_api= + +; Mapping base of shared memory segments (for Windows only). All the PHP +; processes have to map shared memory into the same address space. This +; directive allows to manually fix the "Unable to reattach to base address" +; errors. +;opcache.mmap_base= + +; Facilitates multiple OPcache instances per user (for Windows only). All PHP +; processes with the same cache ID and user share an OPcache instance. +;opcache.cache_id= + +; Enables and sets the second level cache directory. +; It should improve performance when SHM memory is full, at server restart or +; SHM reset. The default "" disables file based caching. +;opcache.file_cache= + +; Enables or disables opcode caching in shared memory. +;opcache.file_cache_only=0 + +; Enables or disables checksum validation when script loaded from file cache. +;opcache.file_cache_consistency_checks=1 + +; Implies opcache.file_cache_only=1 for a certain process that failed to +; reattach to the shared memory (for Windows only). Explicitly enabled file +; cache is required. +;opcache.file_cache_fallback=1 + +; Enables or disables copying of PHP code (text segment) into HUGE PAGES. +; This should improve performance, but requires appropriate OS configuration. +;opcache.huge_code_pages=0 + +; Validate cached file permissions. +;opcache.validate_permission=0 + +; Prevent name collisions in chroot'ed environment. +;opcache.validate_root=0 + +; If specified, it produces opcode dumps for debugging different stages of +; optimizations. +;opcache.opt_debug_level=0 + +; Specifies a PHP script that is going to be compiled and executed at server +; start-up. +; http://php.net/opcache.preload +;opcache.preload= + +; Preloading code as root is not allowed for security reasons. This directive +; facilitates to let the preloading to be run as another user. +; http://php.net/opcache.preload_user +;opcache.preload_user= + +; Prevents caching files that are less than this number of seconds old. It +; protects from caching of incompletely updated files. In case all file updates +; on your site are atomic, you may increase performance by setting it to "0". +;opcache.file_update_protection=2 + +; Absolute path used to store shared lockfiles (for *nix only). +;opcache.lockfile_path=/tmp + +[curl] +; A default value for the CURLOPT_CAINFO option. This is required to be an +; absolute path. +;curl.cainfo = + +[openssl] +; The location of a Certificate Authority (CA) file on the local filesystem +; to use when verifying the identity of SSL/TLS peers. Most users should +; not specify a value for this directive as PHP will attempt to use the +; OS-managed cert stores in its absence. If specified, this value may still +; be overridden on a per-stream basis via the "cafile" SSL stream context +; option. +;openssl.cafile= + +; If openssl.cafile is not specified or if the CA file is not found, the +; directory pointed to by openssl.capath is searched for a suitable +; certificate. This value must be a correctly hashed certificate directory. +; Most users should not specify a value for this directive as PHP will +; attempt to use the OS-managed cert stores in its absence. If specified, +; this value may still be overridden on a per-stream basis via the "capath" +; SSL stream context option. +;openssl.capath= + +[ffi] +; FFI API restriction. Possible values: +; "preload" - enabled in CLI scripts and preloaded files (default) +; "false" - always disabled +; "true" - always enabled +;ffi.enable=preload + +; List of headers files to preload, wildcard patterns allowed. +;ffi.preload= diff --git a/src/main/scala/io/joern/FileSystemInteraction.scala b/src/main/scala/io/joern/FileSystemInteraction.scala new file mode 100644 index 0000000..c7bdfbd --- /dev/null +++ b/src/main/scala/io/joern/FileSystemInteraction.scala @@ -0,0 +1,68 @@ +package io.joern + +import java.io.File +import scala.annotation.tailrec +import scala.io.StdIn.readLine + +class FileAlreadyExists(path: String) extends Error { + + override def getMessage: String = + s"the file ${path} already exists and shall not be overwritten" + +} + +object FileSystemInteraction { + + def fileExists(path: String): Boolean = { + new File(path).exists() + } + + @tailrec + def askPermission(message: String): Boolean = { + print(message + " [y/N]") + readLine() match { + case "y" | "Y" => true + case "n" | "N" => false + case _ => askPermission(message) + } + } + + def interactiveOverwriteCheck(path: String): String = { + if (!fileExists(path) || askPermission(s"$path already exists. Overwrite?")) { + path + } else { + throw new FileAlreadyExists(path) + } + } + + def deleteDirectory(dir: File): Unit = { + // if the provided file is a directory + if (dir.isDirectory) { + // get the children of the directory + val children: Array[File] = dir.listFiles() + // iterate over all children + children.foreach { child => + // if a child is a directory BUT NOT A SYMLINK + if (child.isDirectory && !java.nio.file.Files.isSymbolicLink( + child.toPath)) { + // recurse + deleteDirectory(child) + } else { + // only delete the symlink but do not recurse + child.delete() + } + // if a child is a proper file + if (child.isFile) { + // delete + child.delete() + } + } + // now also delete + dir.delete() + } else { + // well we expected a directory didn't we? + throw new RuntimeException(s"${dir.getAbsolutePath} is not a directory") + } + } + +} diff --git a/src/main/scala/io/joern/Main.scala b/src/main/scala/io/joern/Main.scala new file mode 100644 index 0000000..cbe3c51 --- /dev/null +++ b/src/main/scala/io/joern/Main.scala @@ -0,0 +1,77 @@ +package io.joern + +import io.joern.bytecode.parser.PHPVersion +import io.joern.bytecode.util.FilterFiles.filterFiles +import io.joern.config.CPGConfig +import io.joern.reporting.Report +import io.shiftleft.x2cpg.SourceFiles + +import java.io.File +import java.nio.file.Files +import scala.sys.exit + +object Main { + + def main(args: Array[String]): Unit = { + val config = Option(io.joern.config.CPGConfig.initializeConfig(args)) + .getOrElse(return ) + if (!config.forced) { + FileSystemInteraction.interactiveOverwriteCheck(config.output) + } + val layerReport: Option[Report] = try { + config.layer.get match { + case "bytecode" => Some(createByteCodeCPG(config)) + case "sourcecode" => throw new NotImplementedError() + case "all" => throw new NotImplementedError() + case "log-profiler" => + None + } + } catch { + case x: Throwable => + throw new RuntimeException( + s"We encountered a unmitigated error. Please create a bug report for $x: ${x.getMessage}.") + } + layerReport match { + case Some(layerReport) => + if (config.report) { + val reportOut = config.output + ".report" + layerReport.writeToFile(reportOut) + } + if (layerReport.successWithinSpecs) { + exit(0) + } else { + exit(1) + } + case None => + } + } + + private def createByteCodeCPG(config: CPGConfig): Report = { + val newRootFolder = config.rootFolder + val sourceFilesNames = SourceFiles + .determine( + Set(newRootFolder), + config.files.toSet + ) + .distinct + val sourceFiles = filterFiles(sourceFilesNames.map(new File(_))) + implicit val version: PHPVersion.Value = config.phpversion match { + case "7" => PHPVersion.V7 + case "8" => PHPVersion.V8 + case x => throw new RuntimeException(s"PHP version $x is not supported") + } + val byteCodeCpg = new bytecode.PhpToCpg() + var report: Option[Report] = None + try { + byteCodeCpg + .createCpg(sourceFiles, config.output, config) + .close() + } finally { + report = Some(byteCodeCpg.getFinalReport) + } + report.getOrElse( + throw new RuntimeException( + "there is no report! Really bad sign - good luck debugging")) + } + +} diff --git a/updateDependencies.sh b/updateDependencies.sh new file mode 100755 index 0000000..cc583ee --- /dev/null +++ b/updateDependencies.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +NON_INTERACTIVE_OPTION=$1 + +function update { + local NAME=$1 + local REPO=$2 + + local HIGHEST_TAG=`git ls-remote --tags $REPO | awk -F"/" '{print $3}' | grep '^v[0-9]*\.[0-9]*\.[0-9]*' | grep -v {} | sort --version-sort | tail -n1` + # drop initial v from git tag + local VERSION=${HIGHEST_TAG:1} + local SEARCH="val ${NAME}Version\([ ]*\)= .*" + local REPLACE="val ${NAME}Version\1= \"$VERSION\"" + + if [ "$NON_INTERACTIVE_OPTION" == "--non-interactive" ]; then + echo "non-interactive mode, auto-updating all dependencies" + sed -i "s/$SEARCH/$REPLACE/" build.sbt + else + echo "set version for $NAME to $VERSION? [Y/n]" + read ANSWER + if [ -z $ANSWER ] || [ "y" == $ANSWER ] || [ "Y" == $ANSWER ]; then + sed -i "s/$SEARCH/$REPLACE/" build.sbt + fi + fi +} + +update cpg https://github.com/ShiftLeftSecurity/codepropertygraph/ diff --git a/version.sbt b/version.sbt new file mode 100644 index 0000000..7a9150b --- /dev/null +++ b/version.sbt @@ -0,0 +1 @@ +ThisBuild / version := "2.5.9"