diff --git a/.clang-tidy b/.clang-tidy index 94ed69701b..3308a915db 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -29,10 +29,12 @@ Checks: '-*, -cppcoreguidelines-init-variables, -cppcoreguidelines-macro-usage, -cppcoreguidelines-avoid-do-while, + -cppcoreguidelines-avoid-c-arrays, bugprone-*, -bugprone-easily-swappable-parameters, modernize-*, -modernize-use-trailing-return-type, + -modernize-avoid-c-arrays, performance-*, clang-analyzer-* ' @@ -59,7 +61,7 @@ CheckOptions: - key: readability-identifier-naming.ParameterIgnoredRegexp value: (d|d1|d2|d3|d4|d5|eP|f|n) - key: readability-identifier-naming.FunctionIgnoredRegexp - value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString|hash_value) + value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString|hash_value|dyn_cast) - key: cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor value: 1 - key: cppcoreguidelines-special-member-functions.AllowMissingMoveFunctions diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30b1cd00c2..2b7f22bad6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-20.04, ubuntu-24.04-arm] + os: [ubuntu-24.04, ubuntu-24.04-arm] compiler: [ [clang++-19, clang-19, "clang-19 libclang-rt-19-dev"] ] build: [ Debug, Release, DebugLibdeps ] include: @@ -46,12 +46,7 @@ jobs: run: | ./utils/InstallAptDependencies.sh --noninteractive tzdata ${{ matrix.compiler[2] }} - - uses: swift-actions/setup-swift@v2 - if: matrix.os == 'ubuntu-20.04' - with: - swift-version: "5.8.1" - - name: Building Phasar in ${{ matrix.build }} with ${{ matrix.compiler[0] }} including swift - if: matrix.os == 'ubuntu-20.04' + - name: Building Phasar in ${{ matrix.build }} with ${{ matrix.compiler[0] }} env: CXX: ${{ matrix.compiler[0] }} CC: ${{ matrix.compiler[1] }} @@ -60,29 +55,25 @@ jobs: cmake -S . -B build \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \ -DBUILD_PHASAR_CLANG=OFF \ - -DBUILD_SWIFT_TESTS=ON \ -DPHASAR_USE_Z3=ON \ ${{ matrix.flags }} \ -G Ninja ninja -C build - - name: Building Phasar in ${{ matrix.build }} with ${{ matrix.compiler[0] }} - if: matrix.os != 'ubuntu-20.04' - env: - CXX: ${{ matrix.compiler[0] }} - CC: ${{ matrix.compiler[1] }} + - name: Run Unittests shell: bash run: | - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \ - -DBUILD_PHASAR_CLANG=OFF \ - -DPHASAR_USE_Z3=ON \ - ${{ matrix.flags }} \ - -G Ninja - ninja -C build + cmake --build ./build --target check-phasar-unittests - - name: Run Unittests + - name: Install PhASAR and Build Examples + if: matrix.build == 'DebugLibdeps' # Circumvent conflicting ASAn flags + env: + CXX: ${{ matrix.compiler[0] }} + CC: ${{ matrix.compiler[1] }} shell: bash run: | - cd build - cmake --build . --target check-phasar-unittests + cmake -DCMAKE_INSTALL_PREFIX=./INSTALL -P ./build/cmake_install.cmake + PHASAR_ROOT_DIR=$(pwd) + cd ./examples/how-to + cmake -S . -B build -Dphasar_ROOT="$PHASAR_ROOT_DIR/INSTALL" + cmake --build ./build --target run_sample_programs diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 0000000000..7588c25fd2 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,41 @@ +name: Build and Deploy Doxygen Docs +on: + push: + branches: [ development ] + # pull_request: # For testing only. Remove before merge! + # branches: [ development ] +permissions: + contents: write +jobs: + build-and-deploy: + runs-on: ubuntu-24.04 + strategy: + fail-fast: true + continue-on-error: false + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Install Phasar Dependencies + shell: bash + run: | + ./utils/InstallAptDependencies.sh --noninteractive tzdata doxygen graphviz + + - name: Build Doxygen Docs + shell: bash + env: + CXX: clang++-15 + CC: clang-15 + run: | + cmake -S . -B build -DPHASAR_BUILD_DOC=ON + cmake --build ./build --target doc_doxygen + + - name: Deploy Doxygen Docs on GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: build/docs/html + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 86df3a0ad8..bce9eb49b1 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,14 +5,14 @@ on: jobs: pre-commit: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 - - uses: pre-commit/action@v2.0.0 + - uses: pre-commit/action@v3.0.1 with: extra_args: --from-ref origin/development --to-ref HEAD diff --git a/.github/workflows/reviewdog-clang-format.yml b/.github/workflows/reviewdog-clang-format.yml index d3eed65d9a..b6b5bacb91 100644 --- a/.github/workflows/reviewdog-clang-format.yml +++ b/.github/workflows/reviewdog-clang-format.yml @@ -6,7 +6,7 @@ on: jobs: format: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: fail-fast: false matrix: @@ -14,17 +14,15 @@ jobs: include: - tool: clang-format install: | - sudo apt-key adv --fetch-keys https://apt.llvm.org/llvm-snapshot.gpg.key - sudo add-apt-repository -y 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-14 main' sudo apt-get update - sudo apt-get -y install --no-install-recommends clang-format-14 + sudo apt-get -y install --no-install-recommends clang-format-19 regex: \.(h|c|hpp|cpp)$ - command: clang-format-14 --style=file -i + command: clang-format-19 --style=file -i continue-on-error: false steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: submodules: recursive diff --git a/.gitignore b/.gitignore index f86934e8b1..2aaf8ce5f8 100644 --- a/.gitignore +++ b/.gitignore @@ -30,11 +30,8 @@ doc/* log/* **/*/logs/ -# CMake build dir -build/* - # MS VS Code -.vscode/* +.vscode/ # Eclipse .cproject diff --git a/.gitmodules b/.gitmodules index 70653bf9e2..350885ac54 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,10 +2,6 @@ path = external/json url = https://github.com/nlohmann/json.git ignore = dirty -[submodule "lib/googletest"] - path = external/googletest - url = https://github.com/google/googletest.git - branch = master [submodule "external/json-schema-validator"] path = external/json-schema-validator url = https://github.com/pboettch/json-schema-validator.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 786e0aba36..5543f809cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,6 @@ repos: - id: check-added-large-files - id: requirements-txt-fixer - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v14.0.6' + rev: 'v19.1.7' hooks: - id: clang-format diff --git a/BUILD.md b/BUILD.md new file mode 100644 index 0000000000..a98acf7cdb --- /dev/null +++ b/BUILD.md @@ -0,0 +1,130 @@ +# Building PhASAR + + +It is recommended to compile PhASAR yourself in order to get the full C++ experience and to have full control over the build mode. +However, you may also want to try out one of the pre-built versions of PhASAR or the Docker container. + +As a shortcut for the very first PhASAR build on your system, you can use our [bootstrap](./bootstrap.sh) script. +Please note that you must have python installed for the script to work properly. + +```bash +./bootstrap.sh +``` + +Note: If you want to do changes within PhASAR, it is recommended to build it in Debug mode: + +```bash +./bootstrap.sh -DCMAKE_BUILD_TYPE=Debug +``` + +The bootstrap script may ask for superuser permissions (to install the dependencies); however it is not recommended to start the whole script with `sudo`. + +For subsequent builds, see [Compiling PhASAR](#compiling-phasar-if-not-already-done-using-the-installation-scripts). + +### Compiling PhASAR (if not already done using the bootstrap script) + +Set the system's variables for the C and C++ compiler to clang: + +```bash +export CC=/usr/local/bin/clang +export CXX=/usr/local/bin/clang++ +``` + +You may need to adjust the paths according to your system. When you cloned PhASAR from Github you need to initialize PhASAR's submodules before building it: + +```bash +git submodule update --init +``` + +If you downloaded PhASAR as a compressed release (e.g. .zip or .tar.gz) you can use the `init-submodules-release.sh` script that manually clones the required submodules: + +```bash +utils/init-submodules-release.sh +``` + +Navigate into the PhASAR directory. The following commands will do the job and compile the PhASAR framework: + +```bash +mkdir build +cd build/ +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. +ninja -j $(nproc) # or use a different number of cores to compile it +sudo ninja install # only if you wish to install PhASAR system wide +``` + +When you have used the `bootstrap.sh` script to install PhASAR, the above steps are already done. +Use them as a reference if you wish to modify PhASAR and recompile it. + +After compilation using cmake the following two binaries can be found in the build/tools directory: + ++ `phasar-cli` - the PhASAR command-line tool (previously called `phasar-llvm`) that provides access to analyses that are already implemented within PhASAR. Use this if you don't want to build an own tool on top of PhASAR. ++ `myphasartool` - an example tool that shows how tools can be build on top of PhASAR + +Please be careful and check if errors occur during the compilation. + +When using CMake to compile PhASAR the following optional parameters can be used: + +| Parameter : Type| Effect | +|-----------|--------| +| **BUILD_SHARED_LIBS** : BOOL | Build shared libraries -- Not recommended anymore. You may want to use PHASAR_BUILD_DYNLIB instead (default is OFF) | +| **PHASAR_BUILD_DYNLIB** : BOOL | Build one fat shared library (default is OFF) | +| **CMAKE_BUILD_TYPE** : STRING | Build PhASAR in 'Debug', 'RelWithDebInfo' or 'Release' mode (default is 'Debug') | +| **CMAKE_INSTALL_PREFIX** : PATH | Path where PhASAR will be installed if "ninja install” is invoked or the “install” target is built (default is /usr/local/phasar) | +| **PHASAR_CUSTOM_CONFIG_INSTALL_DIR** : PATH | If set, customizes the directory, where configuration files for PhASAR are installed (default is /usr/local/.phasar-config)| +| **PHASAR_ENABLE_DYNAMIC_LOG** : BOOL|Makes it possible to switch the logger on and off at runtime (default is ON)| +| **PHASAR_BUILD_DOC** : BOOL | Build PhASAR documentation (default is OFF) | +| **PHASAR_BUILD_UNITTESTS** : BOOL | Build PhASAR unit tests (default is ON) | +| **PHASAR_BUILD_IR** : BOOL | Build PhASAR IR (required for running the unit tests) (default is ON) | +| **PHASAR_BUILD_OPENSSL_TS_UNITTESTS** : BOOL | Build PhASAR unit tests that require OpenSSL (default is OFF) | +| **PHASAR_ENABLE_PAMM** : STRING | Enable the performance measurement mechanism ('Off', 'Core' or 'Full', default is Off) | +| **PHASAR_ENABLE_PIC** : BOOL | Build Position-Independed Code (default is ON) | +| **PHASAR_ENABLE_WARNINGS** : BOOL | Enable compiler warnings (default is ON) | +| **CMAKE_CXX_STANDARD** : INT|Build phasar in C++17 or C++20 mode (default is 17)| + +You can use these parameters either directly or modify the installer-script `bootstrap.sh` + +#### A Remark on Compile Time + +C++'s long compile times are always a pain. As shown in the above, when using cmake the compilation can easily be run in parallel, resulting in shorter compilation times. Make use of it! + +### Running a Test Solver + +To test if everything works as expected please run the following command: + +`$ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest` + +You can find the `phasar-cli` tool in the build-tree under `tools/phasar-cli`. + +If you obtain output other than a segmentation fault or an exception terminating the program abnormally everything works as expected. + +### Building PhASAR on a MacOS System + +Due to unfortunate updates to MacOS and the handling of C++, especially on the newer M1 processors, we can't support native development on Mac. +The easiest solution to develop PhASAR on a Mac right now is to use [dockers development environments](https://docs.docker.com/desktop/dev-environments/). Clone this repository as described in their documentation. Afterwards, you have to login once manually, as a root user by running `docker exec -it -u root /bin/bash` to complete the rest of the build process as described in this readme (install submodules, run bootstrap.sh, ...). +Now you can just attach your docker container to VS Code or any other IDE, which supports remote development. + +## Installation + +PhASAR can be installed using the installer scripts as explained in the following. +However, you do not need to install PhASAR in order to use it. + +### Installing PhASAR on an Ubuntu System + +In the following, we would like to give an complete example of how to install +PhASAR using an Ubuntu or Unix-like system. + +Therefore, we provide an installation script. To install PhASAR, just navigate to the top-level +directory of PhASAR and use the following command: + +```bash +./bootstrap.sh --install +``` + +The bootstrap script may ask for superuser permissions. + +Done! + +If You have already built phasar, you can just invoke +```bash +sudo ninja install +``` diff --git a/BreakingChanges.md b/BreakingChanges.md index 8c17daa8ae..af467cf67f 100644 --- a/BreakingChanges.md +++ b/BreakingChanges.md @@ -4,6 +4,20 @@ *None* +## v2510 + +- Removed some old APIs from `PhasarConfig` +- Removed the header `phasar/Config/Version.h`. Use the generated header `phasar/Config/phasar-config.h` instead. +- Removed `getAsJson()` from various classes. Use `printAsJson(llvm::raw_ostream &)` instead. +- Removed `CallGraphAnalysisType::DTA` and the `DTAResolver` (see below) +- Removed the legacy flow functions `Identity`, `LambdaFlow`, etc. Use the static functions from `FlowFunctionTemplates` instead. +- Removed getter-functions from `GeneralStatistics`. Use the corresponding public fields instead. +- Removed `LLVMAliasGraph`. Use `LLVMAliasSet` instead. +- Removed `TypeGraphs/*` as they are not used. +- Removed the namespace-scoped function `initializeLogger()`. Use the static functions in the `Logger` class instead. +- Removed `legacy::stripPointer(const llvm::Type *)` as it does not work anymore with opaque pointers. + + ## v2503 - The `DTAResolver` and the cli option `--call-graph-analysis=dta` do not work anymore (due to opaque pointers) and will be removed for the next release. Please use the `OTF` or `RTA` resolver instead. diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b8d993810..0cf3198a04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,9 +27,10 @@ if (NOT PHASAR_IN_TREE) project (phasar LANGUAGES C CXX DESCRIPTION "A LLVM-based static analysis framework." + VERSION 2510 ) endif () -set(PHASAR_VERSION 2503) +set(PHASAR_VERSION ${PROJECT_VERSION}) # NOTE: When we require cmake >= 3.21, we can use PROJECT_IS_TOP_LEVEL instead if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) @@ -38,12 +39,6 @@ else() set(PHASAR_BUILD_OPTIONAL_TARGETS_DEFAULT OFF) endif() -option(PHASAR_EXPERIMENTAL_CXX20 "Build phasar in C++20 mode. This is an experimental feature" OFF) -if(PHASAR_EXPERIMENTAL_CXX20) - message(DEPRECATION "The option PHASAR_EXPERIMENTAL_CXX20 is deprecated and will be removed in a future version of PhASAR. Use CMAKE_CXX_STANDARD=20 instead.") - set(CMAKE_CXX_STANDARD 20) -endif() - set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -84,6 +79,8 @@ string(APPEND CMAKE_CXX_FLAGS_RELEASE "") option(CMAKE_VISIBILITY_INLINES_HIDDEN "Hide inlined functions from the DSO table (default ON)" ON) +option(PHASAR_BUILD_MODULES "Build C++20 modules for phasar" OFF) + include(CheckCXXCompilerFlag) # Handle memory issues with linking @@ -288,7 +285,7 @@ if (NOT PHASAR_IN_TREE) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.16.0 + GIT_TAG v1.17.0 ) FetchContent_MakeAvailable(googletest) endif() @@ -314,6 +311,18 @@ endif() include(add_llvm) add_llvm() +# SVF +option(PHASAR_USE_SVF "Use SVF for more options in alias analysis (default is OFF)" OFF) +if(PHASAR_USE_SVF) + find_package(SVF REQUIRED CONFIG) + message(STATUS "Found SVF ${SVF_VERSION}") + + if (NOT PHASAR_USE_Z3) + message(WARNING "SVF requires Z3. Set PHASAR_USE_Z3=ON") + set(PHASAR_USE_Z3 ON) + endif() +endif() + # Z3 Solver if(PHASAR_IN_TREE) set (PHASAR_USE_Z3 OFF) diff --git a/Config.cmake.in b/Config.cmake.in index e531879c5d..0af87e19b9 100644 --- a/Config.cmake.in +++ b/Config.cmake.in @@ -16,6 +16,7 @@ set(PHASAR_USE_LLVM_FAT_LIB @USE_LLVM_FAT_LIB@) set(PHASAR_BUILD_DYNLIB @PHASAR_BUILD_DYNLIB@) set(PHASAR_USE_Z3 @PHASAR_USE_Z3@) set(PHASAR_HAS_SQLITE @PHASAR_HAS_SQLITE@) +set(PHASAR_BUILD_MODULES @PHASAR_BUILD_MODULES@) if (PHASAR_USE_Z3) find_dependency(Z3 REQUIRED) @@ -53,31 +54,30 @@ include("${CMAKE_CURRENT_LIST_DIR}/PhasarExports.cmake") foreach(component ${phasar_FIND_COMPONENTS}) if(NOT ${component} IN_LIST PHASAR_COMPONENTS) - set(phasar_FOUND false) + set(phasar_FOUND FALSE) set(phasar_NOT_FOUND_MESSAGE "Unsupported component: ${component}. Valid components are: ${PHASAR_COMPONENTS}") endif() list(APPEND PHASAR_NEEDED_LIBS phasar::${component}) endforeach() -if (NOT DEFINED phasar_FOUND OR phasar_FOUND EQUAL TRUE) - foreach(component ${phasar_FIND_COMPONENTS}) - # For backwards compatibility -- will be removed with next release - add_library(phasar::phasar_${component} ALIAS phasar::${component}) - endforeach() +if (NOT DEFINED phasar_FOUND) + set(phasar_FOUND TRUE) +endif() + +include(FindPackageHandleStandardArgs) +if (phasar_FOUND) if (NOT phasar_FIND_COMPONENTS) list(APPEND PHASAR_NEEDED_LIBS phasar::phasar) # Default target add_library(phasar ALIAS phasar::phasar) endif() - function(phasar_config executable) - message(DEPRECATION "The function 'phasar_config' is deprecated. Use target_link_libraries(${executable} PUBLIC phasar::phasar) instead!") - - target_link_libraries(${executable} - PUBLIC - ${PHASAR_NEEDED_LIBS} - ) - endfunction() + find_package_message(phasar + "Found phasar: ${PHASAR_LIBRARY_DIR} (found version \"${PHASAR_VERSION}\")" + "[${PHASAR_LIBRARY_DIR}][${PHASAR_INCLUDE_DIR}]" + ) +else() + message(STATUS "Could NOT find phasar") endif() diff --git a/README.md b/README.md index d15020fae3..684f5dffa7 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,10 @@ ![PhASAR logo](img/Logo_RGB/Phasar_Logo.png) -# PhASAR a LLVM-based Static Analysis Framework +# PhASAR: A LLVM-based Static Analysis Framework [![C++ Standard](https://img.shields.io/badge/C++_Standard-C%2B%2B17-blue.svg?style=flat&logo=c%2B%2B)](https://isocpp.org/) [![GitHub license](https://img.shields.io/badge/license-MIT-blueviolet.svg)](https://raw.githubusercontent.com/secure-software-engineering/phasar/master/LICENSE.txt) - -Version 2503 - -## Secure Software Engineering Group - -PhASAR is primarily developed and maintained by the Secure Software Engineering Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. - -PhASAR was initially developed by Philipp Dominik Schubert (@pdschubert)(). - -Currently, PhASAR is maintained by -- Fabian Schiebel (@fabianbs96)() -- Sriteja Kummita (@sritejakv) -- Lucas Briese (@jusito) -- Martin Mory (@MMory)() -- *others* - -## Required Version of the C++ Standard - -PhASAR requires at least C++-17. - -However, building in C++20 mode is supported. You may enable this setting the cmake variable `CMAKE_CXX_STANDARD` to `20`. -Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 ealier. - -## Currently Supported Version of LLVM - -PhASAR is currently set up to support LLVM-15.0.* +[![GitHub Release](https://img.shields.io/github/v/release/secure-software-engineering/phasar?label=version)](https://github.com/secure-software-engineering/phasar/releases) ## What is PhASAR? @@ -39,147 +14,52 @@ fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. -## Breaking Changes - -To keep PhASAR in a state that it is well suited for state-of-the-art research in static analysis, as well as for productive use, we have to make breaking changes. Please refer to [Breaking Changes](./BreakingChanges.md) for detailed information on what was broken recently and how to migrate. +You can find available literature on PhASAR [here](https://github.com/secure-software-engineering/phasar/wiki/Useful-Literature#papers-on-phasar). -## How do I get started with PhASAR? +### How do I get started with PhASAR? We have some documentation on PhASAR in our [***Wiki***](https://github.com/secure-software-engineering/phasar/wiki). You probably would like to read this README first. -Please also have a look on PhASAR's project directory and notice the project directory `examples/` as well as the custom tool `tools/example-tool/myphasartool.cpp`. - -## Building PhASAR - -It is recommended to compile PhASAR yourself in order to get the full C++ experience and to have full control over the build mode. -However, you may also want to try out one of the pre-built versions of PhASAR or the Docker container. - -As a shortcut for the very first PhASAR build on your system, you can use our [bootstrap](./bootstrap.sh) script. -Please note that you must have python installed for the script to work properly. - -```bash -./bootstrap.sh -``` - -Note: If you want to do changes within PhASAR, it is recommended to build it in Debug mode: - -```bash -./bootstrap.sh -DCMAKE_BUILD_TYPE=Debug -``` - -The bootstrap script may ask for superuser permissions (to install the dependencies); however it is not recommended to start the whole script with `sudo`. +Please also have a look on PhASAR's project directory and notice the project directory [examples](./examples/) as well as the custom tool `tools/example-tool/myphasartool.cpp`. -For subsequent builds, see [Compiling PhASAR](#compiling-phasar-if-not-already-done-using-the-installation-scripts). - -### Compiling PhASAR (if not already done using the bootstrap script) - -Set the system's variables for the C and C++ compiler to clang: - -```bash -export CC=/usr/local/bin/clang -export CXX=/usr/local/bin/clang++ -``` +**NEW:** You can find PhASAR's API reference [here](https://secure-software-engineering.github.io/phasar/). -You may need to adjust the paths according to your system. When you cloned PhASAR from Github you need to initialize PhASAR's submodules before building it: -```bash -git submodule update --init -``` - -If you downloaded PhASAR as a compressed release (e.g. .zip or .tar.gz) you can use the `init-submodules-release.sh` script that manually clones the required submodules: - -```bash -utils/init-submodules-release.sh -``` - -Navigate into the PhASAR directory. The following commands will do the job and compile the PhASAR framework: - -```bash -mkdir build -cd build/ -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. -ninja -j $(nproc) # or use a different number of cores to compile it -sudo ninja install # only if you wish to install PhASAR system wide -``` - -When you have used the `bootstrap.sh` script to install PhASAR, the above steps are already done. -Use them as a reference if you wish to modify PhASAR and recompile it. - -After compilation using cmake the following two binaries can be found in the build/tools directory: - -+ `phasar-cli` - the PhASAR command-line tool (previously called `phasar-llvm`) that provides access to analyses that are already implemented within PhASAR. Use this if you don't want to build an own tool on top of PhASAR. -+ `myphasartool` - an example tool that shows how tools can be build on top of PhASAR - -Please be careful and check if errors occur during the compilation. - -When using CMake to compile PhASAR the following optional parameters can be used: - -| Parameter : Type| Effect | -|-----------|--------| -| **BUILD_SHARED_LIBS** : BOOL | Build shared libraries -- Not recommended anymore. You may want to use PHASAR_BUILD_DYNLIB instead (default is OFF) | -| **PHASAR_BUILD_DYNLIB** : BOOL | Build one fat shared library (default is OFF) | -| **CMAKE_BUILD_TYPE** : STRING | Build PhASAR in 'Debug', 'RelWithDebInfo' or 'Release' mode (default is 'Debug') | -| **CMAKE_INSTALL_PREFIX** : PATH | Path where PhASAR will be installed if "ninja install” is invoked or the “install” target is built (default is /usr/local/phasar) | -| **PHASAR_CUSTOM_CONFIG_INSTALL_DIR** : PATH | If set, customizes the directory, where configuration files for PhASAR are installed (default is /usr/local/.phasar-config)| -| **PHASAR_ENABLE_DYNAMIC_LOG** : BOOL|Makes it possible to switch the logger on and off at runtime (default is ON)| -| **PHASAR_BUILD_DOC** : BOOL | Build PhASAR documentation (default is OFF) | -| **PHASAR_BUILD_UNITTESTS** : BOOL | Build PhASAR unit tests (default is ON) | -| **PHASAR_BUILD_IR** : BOOL | Build PhASAR IR (required for running the unit tests) (default is ON) | -| **PHASAR_BUILD_OPENSSL_TS_UNITTESTS** : BOOL | Build PhASAR unit tests that require OpenSSL (default is OFF) | -| **PHASAR_ENABLE_PAMM** : STRING | Enable the performance measurement mechanism ('Off', 'Core' or 'Full', default is Off) | -| **PHASAR_ENABLE_PIC** : BOOL | Build Position-Independed Code (default is ON) | -| **PHASAR_ENABLE_WARNINGS** : BOOL | Enable compiler warnings (default is ON) | -| **CMAKE_CXX_STANDARD** : INT|Build phasar in C++17 or C++20 mode (default is 17)| - -You can use these parameters either directly or modify the installer-script `bootstrap.sh` - -#### A Remark on Compile Time - -C++'s long compile times are always a pain. As shown in the above, when using cmake the compilation can easily be run in parallel, resulting in shorter compilation times. Make use of it! - -### Running a Test Solver - -To test if everything works as expected please run the following command: - -`$ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest` +## Secure Software Engineering Group -You can find the `phasar-cli` tool in the build-tree under `tools/phasar-cli`. +PhASAR is primarily developed and maintained by the Secure Software Engineering Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. -If you obtain output other than a segmentation fault or an exception terminating the program abnormally everything works as expected. +PhASAR was initially developed by Philipp Dominik Schubert (@pdschubert)(). -### Building PhASAR on a MacOS System +Currently, PhASAR is maintained by +- Fabian Schiebel (@fabianbs96)() +- Sriteja Kummita (@sritejakv) +- Lucas Briese (@jusito) +- Martin Mory (@MMory)() +- *others* -Due to unfortunate updates to MacOS and the handling of C++, especially on the newer M1 processors, we can't support native development on Mac. -The easiest solution to develop PhASAR on a Mac right now is to use [dockers development environments](https://docs.docker.com/desktop/dev-environments/). Clone this repository as described in their documentation. Afterwards, you have to login once manually, as a root user by running `docker exec -it -u root /bin/bash` to complete the rest of the build process as described in this readme (install submodules, run bootstrap.sh, ...). -Now you can just attach your docker container to VS Code or any other IDE, which supports remote development. +## Required Version of the C++ Standard -## Installation +PhASAR requires at least C++-17. -PhASAR can be installed using the installer scripts as explained in the following. -However, you do not need to install PhASAR in order to use it. +However, building in C++20 mode is supported. You may enable this setting the cmake variable `CMAKE_CXX_STANDARD` to `20`. +Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 earlier. -### Installing PhASAR on an Ubuntu System +**NEW**: PhASAR supports C++20 modules as an experimental feature. -In the following, we would like to give an complete example of how to install -PhASAR using an Ubuntu or Unix-like system. +## Currently Supported Version of LLVM -Therefore, we provide an installation script. To install PhASAR, just navigate to the top-level -directory of PhASAR and use the following command: +PhASAR is currently set up to support LLVM-15.0.* -```bash -./bootstrap.sh --install -``` +## Breaking Changes -The bootstrap script may ask for superuser permissions. +To keep PhASAR in a state that it is well suited for state-of-the-art research in static analysis, as well as for productive use, we have to make breaking changes. Please refer to [Breaking Changes](./BreakingChanges.md) for detailed information on what was broken recently and how to migrate. -Done! +## Building PhASAR -If You have already built phasar, you can just invoke -```bash -sudo ninja install -``` +Please refer to [BUILD.md](./BUILD.md) for instructions on how to build PhASAR. ## How to use PhASAR? diff --git a/cmake/phasar_macros.cmake b/cmake/phasar_macros.cmake index c991f508db..2bd952852c 100644 --- a/cmake/phasar_macros.cmake +++ b/cmake/phasar_macros.cmake @@ -241,7 +241,7 @@ endmacro(add_phasar_executable) function(add_phasar_library name) set(PHASAR_LIB_OPTIONS SHARED STATIC MODULE INTERFACE) - set(PHASAR_LIB_MULTIVAL LLVM_LINK_COMPONENTS LINKS LINK_PUBLIC LINK_PRIVATE FILES) + set(PHASAR_LIB_MULTIVAL LLVM_LINK_COMPONENTS LINKS LINK_PUBLIC LINK_PRIVATE FILES MODULE_FILES) cmake_parse_arguments(PHASAR_LIB "${PHASAR_LIB_OPTIONS}" "" "${PHASAR_LIB_MULTIVAL}" ${ARGN}) set(srcs ${PHASAR_LIB_UNPARSED_ARGUMENTS}) list(APPEND srcs ${PHASAR_LIB_FILES}) @@ -277,6 +277,28 @@ function(add_phasar_library name) target_compile_features(${name} PUBLIC cxx_std_17) + set(install_module) + if(PHASAR_LIB_MODULE_FILES) + if(PHASAR_BUILD_MODULES) + target_sources(${name} PUBLIC + FILE_SET cxx_modules + TYPE CXX_MODULES + FILES ${PHASAR_LIB_MODULE_FILES} + ) + + target_compile_features(${name} PUBLIC cxx_std_20) + + set(install_module FILE_SET cxx_modules DESTINATION ${CMAKE_INSTALL_LIBDIR}) + elseif(NOT srcs) + # Add dummy src to prevent cmake error + set(dummy_src "${CMAKE_CURRENT_BINARY_DIR}/${name}_dummysrc.cpp") + if(NOT EXISTS "${dummy_src}") + file(WRITE "${dummy_src}" "") + endif() + target_sources(${name} PRIVATE "${dummy_src}") + endif() + endif() + if(LLVM_COMMON_DEPENDS) add_dependencies(${name} ${LLVM_COMMON_DEPENDS}) endif(LLVM_COMMON_DEPENDS) @@ -316,13 +338,15 @@ function(add_phasar_library name) EXPORT LLVMExports LIBRARY DESTINATION lib ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} + ${install_module} ) else() install(TARGETS ${name} EXPORT PhasarExports - # NOTE: Library, archive and runtime destination are automatically set by # GNUInstallDirs which is included in the top-level CMakeLists.txt + + ${install_module} ) endif() diff --git a/config.h.in b/config.h.in index 97b0e8a9f6..926d69b0bf 100644 --- a/config.h.in +++ b/config.h.in @@ -13,4 +13,6 @@ #cmakedefine PHASAR_HAS_SQLITE +#cmakedefine PHASAR_USE_SVF + #endif /* PHASAR_CONFIG_CONFIG_H */ diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index b45f14eebd..2014d73648 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -32,19 +32,19 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "PhASAR" +PROJECT_NAME = PhASAR # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = @PHASAR_VERSION@ +PROJECT_NUMBER = @PHASAR_VERSION@@development # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = "PhASAR a LLVM-based Static Analysis Framework" +PROJECT_BRIEF = "A LLVM-based Static Analysis Framework" # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels @@ -144,7 +144,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = +STRIP_FROM_PATH = @PHASAR_SRC_DIR@ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -153,8 +153,7 @@ STRIP_FROM_PATH = # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = - +STRIP_FROM_INC_PATH = @PHASAR_SRC_DIR@/include # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. @@ -398,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. @@ -743,7 +742,8 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ @CMAKE_CURRENT_SOURCE_DIR@/lib/ @CMAKE_CURRENT_SOURCE_DIR@/docs +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ \ + @CMAKE_CURRENT_SOURCE_DIR@/docs # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -794,7 +794,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = */external/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -805,7 +805,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = *::detail::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -905,7 +905,7 @@ INLINE_SOURCES = NO # Fortran comments will always remain visible. # The default value is: YES. -STRIP_CODE_COMMENTS = YES +STRIP_CODE_COMMENTS = NO # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. @@ -917,7 +917,7 @@ REFERENCED_BY_RELATION = YES # all documented entities called/used by that function will be listed. # The default value is: NO. -REFERENCES_RELATION = NO +REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and @@ -1407,7 +1407,7 @@ FORMULA_TRANSPARENT = YES # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -USE_MATHJAX = NO +USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: @@ -1521,7 +1521,7 @@ SEARCHDATA_FILE = searchdata.xml # projects and redirect the results back to the right project. # This tag requires that the tag SEARCHENGINE is set to YES. -EXTERNAL_SEARCH_ID = +EXTERNAL_SEARCH_ID = phasar # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are @@ -1540,7 +1540,7 @@ EXTRA_SEARCH_MAPPINGS = # If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. # The default value is: YES. -GENERATE_LATEX = YES +GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of @@ -1917,7 +1917,7 @@ SEARCH_INCLUDES = YES # preprocessor. # This tag requires that the tag SEARCH_INCLUDES is set to YES. -INCLUDE_PATH = +INCLUDE_PATH = @PHASAR_BINARY_DIR@/include # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the @@ -2040,7 +2040,7 @@ DIA_PATH = # and usage relations if the target is undocumented or is not a class. # The default value is: YES. -HIDE_UNDOC_RELATIONS = YES +HIDE_UNDOC_RELATIONS = NO # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz (see: @@ -2100,7 +2100,7 @@ CLASS_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -COLLABORATION_GRAPH = YES +COLLABORATION_GRAPH = NO # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for # groups, showing the direct groups dependencies. @@ -2136,7 +2136,7 @@ UML_LIMIT_NUM_FIELDS = 10 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -TEMPLATE_RELATIONS = NO +TEMPLATE_RELATIONS = YES # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to # YES then doxygen will generate a graph for each documented file showing the @@ -2145,7 +2145,7 @@ TEMPLATE_RELATIONS = NO # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDE_GRAPH = YES +INCLUDE_GRAPH = NO # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are # set to YES then doxygen will generate a graph for each documented file showing @@ -2154,7 +2154,7 @@ INCLUDE_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDED_BY_GRAPH = YES +INCLUDED_BY_GRAPH = NO # If the CALL_GRAPH tag is set to YES then doxygen will generate a call # dependency graph for every global function or class method. @@ -2203,7 +2203,7 @@ DIRECTORY_GRAPH = YES # The default value is: png. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_IMAGE_FORMAT = png +DOT_IMAGE_FORMAT = svg # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. @@ -2276,7 +2276,7 @@ MAX_DOT_GRAPH_DEPTH = 0 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_TRANSPARENT = NO +DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This diff --git a/docs/README.dox b/docs/README.dox index e00ef59771..6ae3b786c0 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -2,14 +2,27 @@ @mainpage PhASAR: A LLVM-based Static Analysis Framework -@author Philipp Schubert (E-Mail: philipp.schubert@upb.de) and others +PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. -\b Copyright \n - Copyright 2017 Philipp Schubert. All rights reserved. +This page contains the generated code documentation of PhASAR. +You can find the original source on GitHub: . The README should already give you a good first overview. -\b License \n - See LICENSE.txt +For further information, please checkout PhASAR's [Wiki](https://github.com/secure-software-engineering/phasar/wiki). + +@subsubsection SSEG Secure Software Engineering Group + +PhASAR is primarily developed and maintained by the [Secure Software Engineering Group](https://www.hni.uni-paderborn.de/sse) at Heinz Nixdorf Institute (University of Paderborn) and [Fraunhofer IEM](https://www.iem.fraunhofer.de/). -TODO: add detailed description. +PhASAR was initially developed by Philipp Dominik Schubert (@pdschubert)(). + +\b Currently, PhASAR is maintained by +- Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) +- Sriteja Kummita (@sritejakv) +- Lucas Briese (@jusito) +- Martin Mory (@MMory)(martin.mory@upb.de) +- *others* + +\b License \n + PhASAR is made available under the permissive MIT License. See LICENSE.txt */ diff --git a/examples/how-to/00-load-llvm-ir/CMakeLists.txt b/examples/how-to/00-load-llvm-ir/CMakeLists.txt new file mode 100644 index 0000000000..9d572feff4 --- /dev/null +++ b/examples/how-to/00-load-llvm-ir/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(load-llvm-ir) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(load-llvm-ir main.cpp) +target_link_libraries(load-llvm-ir PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_load_llvm_ir + DEPENDS load-llvm-ir + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/simple.ll" + ) + + add_dependencies(run_sample_programs run_load_llvm_ir) +endif() diff --git a/examples/how-to/00-load-llvm-ir/README.md b/examples/how-to/00-load-llvm-ir/README.md new file mode 100644 index 0000000000..e3cbaa9821 --- /dev/null +++ b/examples/how-to/00-load-llvm-ir/README.md @@ -0,0 +1,24 @@ +# Load LLVM IR + +Shows, how you can use PhASAR to load and manage a LLVM IR module. + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 00-load-llvm-ir root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 00-load-llvm-ir/build folder: +./load-llvm-ir ../../../llvm-hello-world/target/simple.ll +``` diff --git a/examples/how-to/00-load-llvm-ir/main.cpp b/examples/how-to/00-load-llvm-ir/main.cpp new file mode 100644 index 0000000000..98179ca279 --- /dev/null +++ b/examples/how-to/00-load-llvm-ir/main.cpp @@ -0,0 +1,55 @@ +#include "phasar/PhasarLLVM/DB.h" // For LLVMProjectIRDB +#include "phasar/PhasarLLVM/Passes.h" // For GeneralStatisticsAnalysis +#include "phasar/PhasarLLVM/Utils.h" // For llvmIRToString() + +#include "llvm/IR/InstIterator.h" // For llvm::instructions() + +static void printIRStats(psr::LLVMProjectIRDB &IRDB); + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: load-llvm-ir \n"; + return 1; + } + + // The LLVMProjectIRDB loads and manages an LLVM-IR module. + // You can load both .ll (human readable) and .bc (smaller, faster load-times) + // files. + // If you already have an llvm::Module*, you can also pass it here. + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + // If phasar yould not load the IR, we should exit. + // Phasar has already printed an error message to the terminal. + return 1; + } + + // ======== + // Now, you can work with the module + + printIRStats(IRDB); + + // Inspect the module (see also llvm-hello-world) + + auto *F = IRDB.getFunctionDefinition("main"); + if (!F) { + llvm::errs() << "error: could not find function 'main'\n"; + return 1; + } + + llvm::outs() << "--------------- Instructions of 'main' ---------------\n"; + + for (const auto &Inst : llvm::instructions(F)) { + // Phasar annotates all instructions (and global variables) with IRDB-wide + // unique integer Ids: + auto InstId = IRDB.getInstructionId(&Inst); + + llvm::outs() << '#' << InstId << ": " << psr::llvmIRToString(&Inst) << '\n'; + + // TODO: Analyze instruction 'Inst' here. + } +} + +static void printIRStats(psr::LLVMProjectIRDB &IRDB) { + psr::GeneralStatisticsAnalysis Stats; + llvm::outs() << Stats.runOnModule(*IRDB.getModule()) << '\n'; +} diff --git a/examples/how-to/01-build-type-hierarchy/CMakeLists.txt b/examples/how-to/01-build-type-hierarchy/CMakeLists.txt new file mode 100644 index 0000000000..f6e0303099 --- /dev/null +++ b/examples/how-to/01-build-type-hierarchy/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(build-type-hierarchy) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(build-type-hierarchy main.cpp) +target_link_libraries(build-type-hierarchy PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_build_type_hierarchy + DEPENDS build-type-hierarchy + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/class_hierarchy.ll" + ) + + add_dependencies(run_sample_programs run_build_type_hierarchy) +endif() diff --git a/examples/how-to/01-build-type-hierarchy/README.md b/examples/how-to/01-build-type-hierarchy/README.md new file mode 100644 index 0000000000..88ade55b42 --- /dev/null +++ b/examples/how-to/01-build-type-hierarchy/README.md @@ -0,0 +1,24 @@ +# Build Type Hierarchy + +Shows, how you can use PhASAR to build and use a type hierarchy from a LLVM IR module. + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 01-build-type-hierarchy root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 01-build-type-hierarchy/build folder: +./build-type-hierarchy ../../../llvm-hello-world/target/class_hierarchy.ll +``` diff --git a/examples/how-to/01-build-type-hierarchy/main.cpp b/examples/how-to/01-build-type-hierarchy/main.cpp new file mode 100644 index 0000000000..28412a25ff --- /dev/null +++ b/examples/how-to/01-build-type-hierarchy/main.cpp @@ -0,0 +1,47 @@ +#include "phasar/PhasarLLVM/DB.h" +#include "phasar/PhasarLLVM/TypeHierarchy.h" + +#include "llvm/Demangle/Demangle.h" + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: build-type-hierarchy \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // Build the type hierarchy. + // Note that this DIBasedTypeHierarchy requires debug information (DI) to be + // embedded into the LLVM IR. You can achieve this by passing -g to clang. + psr::DIBasedTypeHierarchy TH(IRDB); + + for (const auto *ClassTy : TH.getAllTypes()) { + llvm::outs() << "Found class type " << ClassTy->getName() << " (" + << TH.getTypeName(ClassTy) << ")\n"; + llvm::outs() << "> demangled name: " + << llvm::demangle(TH.getTypeName(ClassTy).str()) << '\n'; + } + llvm::outs() << '\n'; + + // Try to find class 'A' + const auto *ClassA = TH.getType("_ZTS1A"); + + // If TH does not find, it returns nullptr. + + if (ClassA != nullptr) { + // Get the (transitive) sub-types of ClassA + for (const auto *ClassTy : TH.subTypesOf(ClassA)) { + llvm::outs() << "Class " << ClassTy->getName() + << " is a (transitive) sub-type of A\n"; + + // You can also check, whether a type is a (transitive) sub-type of + // another type: + assert(TH.isSubType(ClassA, ClassTy)); + } + } +} diff --git a/examples/how-to/02-build-call-graph/CMakeLists.txt b/examples/how-to/02-build-call-graph/CMakeLists.txt new file mode 100644 index 0000000000..56d5fc25d8 --- /dev/null +++ b/examples/how-to/02-build-call-graph/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(build-call-graph) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(build-llvm-based-icfg build_llvm_based_icfg.cpp) +target_link_libraries(build-llvm-based-icfg PRIVATE phasar::phasar) + +add_executable(build-llvm-based-call-graph build_llvm_based_call_graph.cpp) +target_link_libraries(build-llvm-based-call-graph PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_build_call_graph + DEPENDS build-llvm-based-icfg build-llvm-based-call-graph + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/class_hierarchy.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/class_hierarchy.ll" + ) + + add_dependencies(run_sample_programs run_build_call_graph) +endif() diff --git a/examples/how-to/02-build-call-graph/README.md b/examples/how-to/02-build-call-graph/README.md new file mode 100644 index 0000000000..3d9981c109 --- /dev/null +++ b/examples/how-to/02-build-call-graph/README.md @@ -0,0 +1,38 @@ +# Build CallGraph + +Shows several ways, how you can use PhASAR to build and use a call graph from a LLVM IR module. + +You may look at the different C++ source files to see, how you can build a call graph using PhASAR. +You may want to start with [build_llvm_based_icfg.cpp](./build_llvm_based_icfg.cpp). + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 02-build-call-graph root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 02-build-call-graph/build folder: +./build-llvm-based-icfg ../../../llvm-hello-world/target/class_hierarchy.ll + +./build-llvm-based-call-graph ../../../llvm-hello-world/target/class_hierarchy.ll +``` + +### Visualizing the CallGraph + +The test programs show, how you can export a call-graph to a dot-graph. +You can use the `dot` command-line tool (get this by, e.g., invoking `apt install graphviz` or similar). + +The call-graph obtained from the example program on the sample `class_hierarchy.ll` should look similar to this: + +![Sample Call-Graph](./img/cg.svg) diff --git a/examples/how-to/02-build-call-graph/build_llvm_based_call_graph.cpp b/examples/how-to/02-build-call-graph/build_llvm_based_call_graph.cpp new file mode 100644 index 0000000000..5704f31eee --- /dev/null +++ b/examples/how-to/02-build-call-graph/build_llvm_based_call_graph.cpp @@ -0,0 +1,87 @@ +#include "phasar/PhasarLLVM/ControlFlow.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" +#include "phasar/PhasarLLVM/DB.h" +#include "phasar/PhasarLLVM/TypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils.h" + +#include "llvm/Demangle/Demangle.h" + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: build-llvm-based-call-graph \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + if (!IRDB.getFunctionDefinition("main")) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + // We may wish to use type-information for construcing the call-graph + psr::DIBasedTypeHierarchy TH(IRDB); + + // Needed to resolve indirect calls to C++ virtual functions + psr::LLVMVFTableProvider VTP(IRDB); + + // The resolver defines, how the call-graph construction algorithm resolves + // indirect calls. + // This comprises calls through a function pointer, as well as virtual + // functions. Here, we select the Rapid Type Analysis that requires a + // type-hierarchy as input. + // + // You can also write your own resolver by creating a class that inherits from + // the psr::Resolver interface. + psr::RTAResolver Resolver(&IRDB, &VTP, &TH); + + // You must specify at least one function as entry-point. The + // LLVMBasedICFG will only consider those functions for the call-graph + // that are reachable from at least on eof the entry-points. + auto CG = psr::buildLLVMBasedCallGraph(IRDB, Resolver, {"main"}); + + // Iterate over all call-sites: + for (const auto *Call : CG.getAllVertexCallSites()) { + if (Call->isDebugOrPseudoInst()) { + // We may with to skip the auto-generated debug-intrinsics + continue; + } + + llvm::outs() << "Found call-site: " << psr::llvmIRToString(Call) << '\n'; + + // The probably most important function: getCalleesOfCallAt() + for (const auto *CalleeFun : CG.getCalleesOfCallAt(Call)) { + llvm::outs() << "> calling " + << llvm::demangle(CalleeFun->getName().str()) << '\n'; + } + llvm::outs() << '\n'; + } + + llvm::outs() << "--------------------------\n"; + + // You can also go the other way around: + for (const auto *Fun : CG.getAllVertexFunctions()) { + llvm::outs() << "Found Function: " << llvm::demangle(Fun->getName().str()) + << '\n'; + + // The probably second-most important function: getCallersOf() + for (const auto *CallSite : CG.getCallersOf(Fun)) { + llvm::outs() << "> called from " << psr::llvmIRToString(CallSite) + << '\n'; + } + llvm::outs() << '\n'; + } + + // You can create an LLVMBasedICFG from an already existing call-graph: + psr::LLVMBasedICFG ICFG(std::move(CG), &IRDB); + + llvm::outs() << "--------------------------\n"; + + // You can export the call-graph as dot, such that you can display it + // using a graphviz viewer: + ICFG.print(); +} diff --git a/examples/how-to/02-build-call-graph/build_llvm_based_icfg.cpp b/examples/how-to/02-build-call-graph/build_llvm_based_icfg.cpp new file mode 100644 index 0000000000..61a5b438bd --- /dev/null +++ b/examples/how-to/02-build-call-graph/build_llvm_based_icfg.cpp @@ -0,0 +1,81 @@ +#include "phasar/PhasarLLVM/ControlFlow.h" +#include "phasar/PhasarLLVM/DB.h" +#include "phasar/PhasarLLVM/TypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils.h" + +#include "llvm/Demangle/Demangle.h" + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: build-llvm-based-icfg \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + if (!IRDB.getFunctionDefinition("main")) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + // We may wish to use type-information for construcing the call-graph + psr::DIBasedTypeHierarchy TH(IRDB); + + // The easiest way of getting a call graph is by construcing an + // inter-procedural control-flow graph (ICFG): + // + // You can select the call-graph resolver algorithm using the + // CallGraphAnalysisType enum. The LLVMBasedICFG will create required + // data-structures that you don't explicitly pass in, on demand. + // Here, we select the Rapid Type Analysis that requires a type-hierarchy as + // input. + // + // You must specify at least one function as entry-point. The LLVMBasedICFG + // will only consider those functions for the call-graph that are + // (transitively) reachable from at least on of the entry-points. + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::RTA, {"main"}, + &TH); + const auto &CG = ICFG.getCallGraph(); + + // Iterate over all call-sites: + for (const auto *Call : CG.getAllVertexCallSites()) { + if (Call->isDebugOrPseudoInst()) { + // We may wish to skip the auto-generated debug-intrinsics + continue; + } + + llvm::outs() << "Found call-site: " << psr::llvmIRToString(Call) << '\n'; + + // The probably most important function: getCalleesOfCallAt() + for (const auto *CalleeFun : CG.getCalleesOfCallAt(Call)) { + llvm::outs() << "> calling " + << llvm::demangle(CalleeFun->getName().str()) << '\n'; + } + llvm::outs() << '\n'; + } + + llvm::outs() << "--------------------------\n"; + + // You can also go the other way around: + for (const auto *Fun : CG.getAllVertexFunctions()) { + llvm::outs() << "Found Function: " << llvm::demangle(Fun->getName().str()) + << '\n'; + + // The probably second-most important function: getCallersOf() + for (const auto *CallSite : CG.getCallersOf(Fun)) { + llvm::outs() << "> called from " << psr::llvmIRToString(CallSite) + << '\n'; + } + llvm::outs() << '\n'; + } + + llvm::outs() << "--------------------------\n"; + + // You can also export the call-graph as dot, such that you can display it + // using a graphviz viewer: + ICFG.print(); +} diff --git a/examples/how-to/02-build-call-graph/img/cg.svg b/examples/how-to/02-build-call-graph/img/cg.svg new file mode 100644 index 0000000000..300f05cae6 --- /dev/null +++ b/examples/how-to/02-build-call-graph/img/cg.svg @@ -0,0 +1,298 @@ + + + + + + +CallGraph + + + +0 + +_ZN1BD2Ev + + + +3 + +_ZN1AD2Ev + + + +0->3 + + +call void @_ZN1AD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %this1) | ID: 90 + + + +11 + +llvm.dbg.declare + + + +0->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 88 + + + +1 + +main + + + +1->0 + + +call void @_ZN1BD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %b) | ID: 47 + + + +1->0 + + +call void @_ZN1BD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %b) | ID: 32 + + + +2 + +_ZN1BC2Ev + + + +1->2 + + +call void @_ZN1BC2Ev(ptr noundef nonnull align 8 dereferenceable(8) %b) | ID: 19 + + + +4 + +_ZN1CD2Ev + + + +1->4 + + +call void @_ZN1CD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %c) | ID: 45 + + + +1->4 + + +call void @_ZN1CD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %c) | ID: 31 + + + +5 + +_ZN1CC2Ev + + + +1->5 + + +call void @_ZN1CC2Ev(ptr noundef nonnull align 8 dereferenceable(8) %c) | ID: 22 + + + +7 + +_ZN1B3fooEv + + + +1->7 + + +invoke void @_ZN1B3fooEv(ptr noundef nonnull align 8 dereferenceable(8) %b) +          to label %invoke.cont unwind label %lpad | ID: 20 + + + +1->7 + + +invoke void %1(ptr noundef nonnull align 8 dereferenceable(8) %0) +          to label %invoke.cont3 unwind label %lpad1 | ID: 30 + + + +1->11 + + +call void @llvm.dbg.declare(metadata ptr %c, metadata | ID: 21 + + + +1->11 + + +call void @llvm.dbg.declare(metadata ptr %b, metadata | ID: 18 + + + +1->11 + + +call void @llvm.dbg.declare(metadata ptr %a, metadata | ID: 24 + + + +12 + +_ZN1C3fooEv + + + +1->12 + + +invoke void %1(ptr noundef nonnull align 8 dereferenceable(8) %0) +          to label %invoke.cont3 unwind label %lpad1 | ID: 30 + + + +1->12 + + +invoke void @_ZN1C3fooEv(ptr noundef nonnull align 8 dereferenceable(8) %c) +          to label %invoke.cont2 unwind label %lpad1 | ID: 23 + + + +9 + +_ZN1AC2Ev + + + +2->9 + + +call void @_ZN1AC2Ev(ptr noundef nonnull align 8 dereferenceable(8) %this1) | ID: 58 + + + +2->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 56 + + + +3->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 107 + + + +4->3 + + +call void @_ZN1AD2Ev(ptr noundef nonnull align 8 dereferenceable(8) %this1) | ID: 84 + + + +4->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 82 + + + +5->9 + + +call void @_ZN1AC2Ev(ptr noundef nonnull align 8 dereferenceable(8) %this1) | ID: 71 + + + +5->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 69 + + + +6 + +__psrCRuntimeGlobalDtorsModel + + + +8 + +puts + + + +7->8 + + +%call = call i32 @puts(ptr noundef @.str) | ID: 65 + + + +7->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 63 + + + +9->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 94 + + + +10 + +__psrCRuntimeGlobalCtorsModel + + + +10->1 + + +%2 = call i32 @main() | ID: 124 + + + +10->6 + + +call void @__psrCRuntimeGlobalDtorsModel() | ID: 125 + + + +12->8 + + +%call = call i32 @puts(ptr noundef @.str.1) | ID: 78 + + + +12->11 + + +call void @llvm.dbg.declare(metadata ptr %this.addr, metadata | ID: 76 + + + diff --git a/examples/how-to/03-create-alias-info/CMakeLists.txt b/examples/how-to/03-create-alias-info/CMakeLists.txt new file mode 100644 index 0000000000..d47aa2d3a2 --- /dev/null +++ b/examples/how-to/03-create-alias-info/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(create-alias-info) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(create-alias-info main.cpp) +target_link_libraries(create-alias-info PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_create_alias_info + DEPENDS create-alias-info + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/pointers.ll" + ) + + add_dependencies(run_sample_programs run_create_alias_info) +endif() diff --git a/examples/how-to/03-create-alias-info/README.md b/examples/how-to/03-create-alias-info/README.md new file mode 100644 index 0000000000..e9ef4509fe --- /dev/null +++ b/examples/how-to/03-create-alias-info/README.md @@ -0,0 +1,24 @@ +# Create Alias Information + +Shows, how you can use PhASAR to generate alias information from a LLVM IR module. + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 03-create-alias-info root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 03-create-alias-info/build folder: +./create-alias-info ../../../llvm-hello-world/target/pointers.ll +``` diff --git a/examples/how-to/03-create-alias-info/main.cpp b/examples/how-to/03-create-alias-info/main.cpp new file mode 100644 index 0000000000..b45e969418 --- /dev/null +++ b/examples/how-to/03-create-alias-info/main.cpp @@ -0,0 +1,84 @@ +#include "phasar/PhasarLLVM/DB.h" +#include "phasar/PhasarLLVM/Pointer.h" +#include "phasar/PhasarLLVM/Utils.h" + +#include "llvm/IR/InstIterator.h" + +#include + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: create-alias-info \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // The easiest way of getting alias information is using the LLVMAliasSet: + psr::LLVMAliasSet AS(&IRDB, /*UseLazyEvaluation=*/false); + + // PhASAR APIs usually do not care, which alias-info implementation you use. + // They take a type-erased reference to any alias-info object. + // You can implicitly convert a pointer to any compatible alias-info object to + // an LLVMAliasInfoRef. + // Since the LLVMAliasInfoRef is a non-owning reference, you must make sure + // that the actual LLVMAliasSet object outlives any use of the references to + // it. + psr::LLVMAliasInfoRef ASRef = &AS; + + // You can print and load alias information from/to JSON: + AS.printAsJson(); + + const auto *MainF = IRDB.getFunctionDefinition("main"); + if (!MainF) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + // Manually printing the alias sets: + + for (const auto &Inst : llvm::instructions(MainF)) { + if (!Inst.getType()->isPointerTy()) { + // For aliasing, we only care about pointers... + continue; + } + + // Retrieve the aliases of the result of the instruction Inst (first + // parameter) at the program location determined by Inst (second parameter). + // + // Implementations may ignore the second parameter. + auto AliasesOfInstAtInst = AS.getAliasSet(&Inst, &Inst); + + llvm::outs() << "For pointer " << psr::llvmIRToString(&Inst) << ":\n"; + for (const auto *Alias : *AliasesOfInstAtInst) { + llvm::outs() << "> aliasing " << psr::llvmIRToShortString(Alias) << '\n'; + + // You can also check, whether two pointers are (potentially) aliasing: + assert(AS.alias(&Inst, Alias, &Inst)); + } + + // Retrieve a filtered alias set only containing allocation-sites for the + // aliases of the result of the instruction Inst (first parameter), further + // filtered to not contain allocation-sites from other functions (second + // parameter), at the program location determined by Inst (third + // parameter). + // + // Implementations may ignore the third parameter. + auto ReachableAllocSites = + AS.getReachableAllocationSites(&Inst, /*IntraProcOnly=*/true, &Inst); + for (const auto *AllocSite : *ReachableAllocSites) { + llvm::outs() << "> reachable alloc-site " + << psr::llvmIRToShortString(AllocSite) << '\n'; + + // You can also check, whether a pointer is in the reachable + // allocation-sites of another pointer: + assert(AS.isInReachableAllocationSites(&Inst, AllocSite, + /*IntraProcOnly=*/true, &Inst)); + } + llvm::outs() << '\n'; + } +} diff --git a/examples/how-to/04-run-ifds-analysis/CMakeLists.txt b/examples/how-to/04-run-ifds-analysis/CMakeLists.txt new file mode 100644 index 0000000000..6b8c99365b --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(run-ifds-analysis) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(run-ifds-analysis-simple simple.cpp) +target_link_libraries(run-ifds-analysis-simple PRIVATE phasar::phasar) + +add_executable(run-ifds-analysis-helper-analyses helper-analyses.cpp) +target_link_libraries(run-ifds-analysis-helper-analyses PRIVATE phasar::phasar) + +add_executable(run-ifds-analysis-otf-reporter otf-reporter.cpp) +target_link_libraries(run-ifds-analysis-otf-reporter PRIVATE phasar::phasar) + +add_executable(run-ifds-analysis-ifds-solver ifds-solver.cpp) +target_link_libraries(run-ifds-analysis-ifds-solver PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_run_ifds_analysis + DEPENDS run-ifds-analysis-simple run-ifds-analysis-helper-analyses run-ifds-analysis-otf-reporter run-ifds-analysis-ifds-solver + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + ) + + add_dependencies(run_sample_programs run_run_ifds_analysis) +endif() diff --git a/examples/how-to/04-run-ifds-analysis/README.md b/examples/how-to/04-run-ifds-analysis/README.md new file mode 100644 index 0000000000..d2a6cffa2b --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/README.md @@ -0,0 +1,34 @@ +# Run an IFDS Analysis + +Shows several ways, how you can use PhASAR to run an already existing IFDS analysis on a LLVM IR module. +For this example, we selected the `IFDSTaintAnalysis`. + +You may look at the different C++ source files to see, how you can run an IFDS taint analysis using PhASAR. +We suggest to start with the simplest examples [simple.cpp](./simple.cpp) and [helper-analyses.cpp](./helper-analyses.cpp). + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 04-run-ifds-analysis root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 04-run-ifds-analysis/build folder: +./run-ifds-analysis-simple ../../../llvm-hello-world/target/taint.ll + +./run-ifds-analysis-helper-analyses ../../../llvm-hello-world/target/taint.ll + +./run-ifds-analysis-otf-reporter ../../../llvm-hello-world/target/taint.ll + +./run-ifds-analysis-ifds-solver ../../../llvm-hello-world/target/taint.ll +``` diff --git a/examples/how-to/04-run-ifds-analysis/helper-analyses.cpp b/examples/how-to/04-run-ifds-analysis/helper-analyses.cpp new file mode 100644 index 0000000000..1469349163 --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/helper-analyses.cpp @@ -0,0 +1,52 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/PhasarLLVM.h" // For the HelperAnalyses +#include "phasar/PhasarLLVM/DataFlow.h" // For the IFDSTaintAnalysis +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-simple \n"; + return 1; + } + + using namespace std::string_literals; + std::vector EntryPoints = {"main"s}; + + // Instead of creating all the helper analyses ourselves, we can just use the + // HelperAnalyses class. It will create the necessary information on-demand. + // + // You can customize the underlying algorithms by passing a + // HelperAnalysisConfig as third parameter + psr::HelperAnalyses HA(Argv[1], EntryPoints); + if (!HA.getProjectIRDB()) { + return 1; + } + + // Create the taint configuration + psr::LLVMTaintConfig TC(HA.getProjectIRDB()); + TC.print(); + llvm::outs() << "------------------------\n"; + + // Create the taint analysis problem: + // The utility function createAnalysisProblem() simplifies creating an + // analysis problem with a HelperAnalyses object. It automatically passes the + // right arguments + auto TaintProblem = psr::createAnalysisProblem( + HA, &TC, EntryPoints, /*TaintMainArgs=*/false); + + // Solving the TaintProblem. This may take some time, depending on the size of + // the ICFG + psr::solveIFDSProblem(TaintProblem, HA.getICFG()); + + // After we have solved the TaintProblem, we can now inspect the detected + // leaks: + for (const auto &[LeakInst, LeakFacts] : TaintProblem.Leaks) { + llvm::outs() << "Detected taint leak at " << psr::llvmIRToString(LeakInst) + << '\n'; + for (const auto *Fact : LeakFacts) { + llvm::outs() << "> leaking fact " << psr::llvmIRToShortString(Fact) + << '\n'; + } + llvm::outs() << '\n'; + } +} diff --git a/examples/how-to/04-run-ifds-analysis/ifds-solver.cpp b/examples/how-to/04-run-ifds-analysis/ifds-solver.cpp new file mode 100644 index 0000000000..2ea821d332 --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/ifds-solver.cpp @@ -0,0 +1,73 @@ +#include "phasar/DataFlow.h" // For the IFDSSolver +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IFDSTaintAnalysis +#include "phasar/PhasarLLVM/Pointer.h" // For the LLVMAliasSet +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +#include +#include + +static constexpr std::string_view Spinner[] = {"⠙", "⠹", "⠸", "⠼", + "⠴", "⠦", "⠇", "⠿"}; + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-ifds-solver \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // The IFDSTaintAnalysis requires alias information, so create it here + psr::LLVMAliasSet AS(&IRDB); + + // Create the taint configuration + psr::LLVMTaintConfig TC(IRDB); + TC.print(); + llvm::outs() << "------------------------\n"; + + // Create the taint analysis problem + psr::IFDSTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}, + /*TaintMainArgs=*/false); + + // Create the ICFG + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, + nullptr, &AS); + + // To solve the taint problem, we now create an instance of the IFDSSolver. + // The function psr::solveIFDSProblem() uses this solver internally as well. + // Having the solver explicitly, allows more control over the solving process: + psr::IFDSSolver Solver(&TaintProblem, &ICFG); + + // The simple solution. You don't really need an explicit solver for this: + // Solver.solve(); + + // Have more control over the solving process: + if (Solver.initialize()) { + int i = 0; + + // Perform the next 10 analysis steps, while we still have some + while (Solver.nextN(10)) { + // Perform some intermediate task *during* the solving process. + // We could also interrupt the solver at any time and continue later. + llvm::outs() << "\b\b" << Spinner[i] << ' '; + i = (i + 1) % std::size(Spinner); + + // Wait a bit, such that we have time to see the beautiful animation for + // our tiny example target programs: + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + } + + Solver.finalize(); + llvm::outs() << "\nSolving finished\n"; + } + + // Here, we could loop over TaintProblem.Leaks. Instead, we will now use + // the Solver to dump the whole raw IFDS results: + Solver.dumpResults(); +} diff --git a/examples/how-to/04-run-ifds-analysis/otf-reporter.cpp b/examples/how-to/04-run-ifds-analysis/otf-reporter.cpp new file mode 100644 index 0000000000..5cb69dbc43 --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/otf-reporter.cpp @@ -0,0 +1,70 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/Domain/BinaryDomain.h" +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IFDSTaintAnalysis +#include "phasar/PhasarLLVM/Pointer.h" // For the LLVMAliasSet +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +namespace { +/// A listener that gets notified, whenever the taint analysis detects a leak +/// +/// Checkout the analysis-printers that are already provided by PhASAR: +/// - "phasar/Utils/OnTheFlyReporter.h" +/// - "phasar/PhasarLLVM/Utils/LLVMAnalysisPrinter.h" +/// - "phasar/PhasarLLVM/Utils/SourceMgrPrinter.h" +class LeakReporter + : public psr::AnalysisPrinterBase { + + /// This function will be called once for each detected leak, **while the + /// analysis is still running**. + void doOnResult(const llvm::Instruction *LeakInst, + const llvm::Value *LeakFact, + psr::BinaryDomain /*LatticeElement*/, + psr::DataFlowAnalysisType /*AnalysisType*/) override { + llvm::outs() << "Detected taint leak at " << psr::llvmIRToString(LeakInst) + << '\n'; + llvm::outs() << "> leaking fact " << psr::llvmIRToShortString(LeakFact) + << "\n\n"; + } +}; +} // namespace + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-otf-reporter \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // The IFDSTaintAnalysis requires alias information, so create it here + psr::LLVMAliasSet AS(&IRDB); + + // Create the taint configuration + psr::LLVMTaintConfig TC(IRDB); + TC.print(); + llvm::outs() << "------------------------\n"; + + // Create the taint analysis problem + psr::IFDSTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}, + /*TaintMainArgs=*/false); + + // Create the ICFG + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, + nullptr, &AS); + + // We want to get notified, whenever the taint analysis detects a leak: + LeakReporter Reporter; + TaintProblem.setAnalysisPrinter(&Reporter); + + // Solving the TaintProblem. This may take some time, depending on the size of + // the ICFG + psr::solveIFDSProblem(TaintProblem, ICFG); + + // Don't need to loop over the leaks anymore. We have already intercepted all + // incoming leaks with our Reporter +} diff --git a/examples/how-to/04-run-ifds-analysis/simple.cpp b/examples/how-to/04-run-ifds-analysis/simple.cpp new file mode 100644 index 0000000000..f550dd171e --- /dev/null +++ b/examples/how-to/04-run-ifds-analysis/simple.cpp @@ -0,0 +1,71 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IFDSTaintAnalysis +#include "phasar/PhasarLLVM/Pointer.h" // For the LLVMAliasSet +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-simple \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // The IFDSTaintAnalysis requires alias information, so create it here + psr::LLVMAliasSet AS(&IRDB); + + // To tell the IFDSTaintAnalysis, which functions are actually sources, sinks + // and sanitizers, we use the LLVMTaintConfig class. + // + // There are several ways of getting a taint configuration into this class: + // - Loading a JSON file + // - Specifying call-backs + // - Annotating the target code + // + // For simplicity, we selected the annotated target code here (checkout the + // taint.cpp target program in llvm-hello-world/target) + psr::LLVMTaintConfig TC(IRDB); + TC.print(); + llvm::outs() << "------------------------\n"; + + // Here, we instantiate the taint analysis problem. + // We need to pass all information that the taint analysis requires: The IRDB, + // alias info, taint config, and the functions where the analysis should + // start. The IFDS solver will walk the inter-procedural control-flow graph + // (ICFG) to analyze all statements and functions that are reachable from the + // entrypoints. + psr::IFDSTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}, + /*TaintMainArgs=*/false); + + // To solve the TaintProblem, we need an ICFG. + // Checkout the example 02-build-call-graph for details. + // Here, we select the OTF call-graph algorithm, which uses alias information + // for indirect call resolution. + // + // Since we already have computed alias information, it would be wasteful to + // let the LLVMBasedICFG compute the alias info again, so we pass the AS here. + // The OTF analysis does not require a type-hierarchy. + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, + nullptr, &AS); + + // Solving the TaintProblem. This may take some time, depending on the size of + // the ICFG + psr::solveIFDSProblem(TaintProblem, ICFG); + + // After we have solved the TaintProblem, we can now inspect the detected + // leaks: + for (const auto &[LeakInst, LeakFacts] : TaintProblem.Leaks) { + llvm::outs() << "Detected taint leak at " << psr::llvmIRToString(LeakInst) + << '\n'; + for (const auto *Fact : LeakFacts) { + llvm::outs() << "> leaking fact " << psr::llvmIRToShortString(Fact) + << '\n'; + } + llvm::outs() << '\n'; + } +} diff --git a/examples/how-to/05-run-ide-analysis/CMakeLists.txt b/examples/how-to/05-run-ide-analysis/CMakeLists.txt new file mode 100644 index 0000000000..cc71bd81a8 --- /dev/null +++ b/examples/how-to/05-run-ide-analysis/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(run-ide-analysis) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(run-ide-analysis-simple simple.cpp) +target_link_libraries(run-ide-analysis-simple PRIVATE phasar::phasar) + +add_executable(run-ide-analysis-helper-analyses helper-analyses.cpp) +target_link_libraries(run-ide-analysis-helper-analyses PRIVATE phasar::phasar) + +add_executable(run-ide-analysis-ide-solver ide-solver.cpp) +target_link_libraries(run-ide-analysis-ide-solver PRIVATE phasar::phasar) + +if (TARGET run_sample_programs) + add_custom_target(run_run_ide_analysis + DEPENDS run-ide-analysis-simple run-ide-analysis-helper-analyses run-ide-analysis-ide-solver + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/call2.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/call2.ll" + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/call2.ll" + ) + + add_dependencies(run_sample_programs run_run_ide_analysis) +endif() diff --git a/examples/how-to/05-run-ide-analysis/README.md b/examples/how-to/05-run-ide-analysis/README.md new file mode 100644 index 0000000000..c46f5c5bce --- /dev/null +++ b/examples/how-to/05-run-ide-analysis/README.md @@ -0,0 +1,32 @@ +# Run an IDE Analysis + +Shows several ways, how you can use PhASAR to run an already existing IDE analysis on a LLVM IR module. +For this example, we selected the `IDELinearConstantAnalysis`. + +You may look at the different C++ source files to see, how you can run an IDE linear constant analysis using PhASAR. +We suggest to start with the simplest examples [simple.cpp](./simple.cpp) and [helper-analyses.cpp](./helper-analyses.cpp). + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 05-run-ide-analysis root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 05-run-ide-analysis/build folder: +./run-ide-analysis-simple ../../../llvm-hello-world/target/call2.ll + +./run-ide-analysis-helper-analyses ../../../llvm-hello-world/target/call2.ll + +./run-ide-analysis-ide-solver ../../../llvm-hello-world/target/call2.ll +``` diff --git a/examples/how-to/05-run-ide-analysis/helper-analyses.cpp b/examples/how-to/05-run-ide-analysis/helper-analyses.cpp new file mode 100644 index 0000000000..d2e1229948 --- /dev/null +++ b/examples/how-to/05-run-ide-analysis/helper-analyses.cpp @@ -0,0 +1,65 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/PhasarLLVM.h" // For the HelperAnalyses +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IDELinearConstantAnalysis + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-simple \n"; + return 1; + } + + // Similar to IFDS, you can also use the HelperAnalyses class to reduce some + // boilerplate code: + + using namespace std::string_literals; + std::vector EntryPoints = {"main"s}; + // Instead of creating all the helper analyses ourselves, we can just use the + // HelperAnalyses class. It will create the necessary information on-demand. + // + // You can customize the underlying algorithms by passing a + // HelperAnalysisConfig as third parameter + psr::HelperAnalyses HA(Argv[1], EntryPoints); + if (!HA.getProjectIRDB()) { + return 1; + } + + // Here, we instantiate the linear constant analysis problem. + // In contrast to the example in simple.cpp, we only need to pass the + // HelperAnalyses and the entry points (this may vary, depending on the + // analysis problem to solve) + auto LCAProblem = psr::createAnalysisProblem( + HA, EntryPoints); + + // Solving the LCAProblem. This may take some time, depending on the size of + // the ICFG + auto Results = psr::solveIDEProblem(LCAProblem, HA.getICFG()); + + // After we have solved the LCAProblem, we can now inspect the detected + // constants: + + const auto *MainF = HA.getProjectIRDB().getFunctionDefinition("main"); + if (!MainF) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + const auto *ExitOfMain = psr::getAllExitPoints(MainF).front(); + + // Get the analysis results right **after** main's return statement + const auto &AllConstantsAtMainExit = Results.resultsAt(ExitOfMain); + + llvm::outs() << "Detected constants at " << psr::llvmIRToString(ExitOfMain) + << ":\n"; + for (const auto &[LLVMVar, ConstVal] : AllConstantsAtMainExit) { + llvm::outs() << " " << psr::llvmIRToString(LLVMVar) << "\n --> "; + if (ConstVal.isBottom()) { + // A "bottom" value here means that the analysis does not know the value + // at this point and that any value may be possible. + + llvm::outs() << "\n\n"; + } else { + llvm::outs() << ConstVal << "\n\n"; + } + } +} diff --git a/examples/how-to/05-run-ide-analysis/ide-solver.cpp b/examples/how-to/05-run-ide-analysis/ide-solver.cpp new file mode 100644 index 0000000000..62099d4eb0 --- /dev/null +++ b/examples/how-to/05-run-ide-analysis/ide-solver.cpp @@ -0,0 +1,73 @@ +#include "phasar/DataFlow.h" // For the IDESolver +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IDELinearConstantAnalysis + +#include +#include + +static constexpr std::string_view Spinner[] = {"⠙", "⠹", "⠸", "⠼", + "⠴", "⠦", "⠇", "⠿"}; + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-simple \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // To solve the LCAProblem, we need an ICFG. + // Checkout the example 02-build-call-graph for details. + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}); + + // Here, we instantiate the linear constant analysis problem. + // We need to pass all information that the analysis requires: The IRDB, + // the ICFG, and the functions where the analysis should start. The IDE + // solver will walk the ICFG to analyze all statements and functions that are + // reachable from the entrypoints. + psr::IDELinearConstantAnalysis LCAProblem(&IRDB, &ICFG, {"main"}); + + // To solve the linear constant analysis problem, we now create an instance of + // the IDESolver. The function psr::solveIDEProblem() uses this solver + // internally as well. Having the solver explicitly, allows more control over + // the solving process: + psr::IDESolver Solver(&LCAProblem, &ICFG); + + // The simple solution. You don't really need an explicit solver for this: + // Solver.solve(); + + // Have more control over the solving process: + if (Solver.initialize()) { + int i = 0; + + // Perform the next 10 analysis steps, while we still have some + while (Solver.nextN(10)) { + // Perform some intermediate task *during* the solving process. + // We could also interrupt the solver at any time and continue later. + llvm::outs() << "\b\b" << Spinner[i] << ' '; + i = (i + 1) % std::size(Spinner); + + // Wait a bit, such that we have time to see the beautiful animation for + // our tiny example target programs: + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + } + + // In contrast to the IFDSSolver, finalize may take some time with IDE. + // It will still be significantly faster than the above loop. + Solver.finalize(); + llvm::outs() << "\nSolving finished\n"; + } + + // Accessing the results: + auto Results = Solver.getSolverResults(); + + // After we have solved the LCAProblem, we can now inspect the detected + // constants. Instead of manually looping, will now use + // the Solver to dump the whole raw IDE results: + Solver.dumpResults(); +} diff --git a/examples/how-to/05-run-ide-analysis/simple.cpp b/examples/how-to/05-run-ide-analysis/simple.cpp new file mode 100644 index 0000000000..98a78e658c --- /dev/null +++ b/examples/how-to/05-run-ide-analysis/simple.cpp @@ -0,0 +1,59 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For the IDELinearConstantAnalysis + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-ifds-analysis-simple \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + // To solve the LCAProblem, we need an ICFG. + // Checkout the example 02-build-call-graph for details. + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}); + + // Here, we instantiate the linear constant analysis problem. + // We need to pass all information that the analysis requires: The IRDB, + // the ICFG, and the functions where the analysis should start. The IDE + // solver will walk the ICFG to analyze all statements and functions that are + // reachable from the entrypoints. + psr::IDELinearConstantAnalysis LCAProblem(&IRDB, &ICFG, {"main"}); + + // Solving the LCAProblem. This may take some time, depending on the size of + // the ICFG + auto Results = psr::solveIDEProblem(LCAProblem, ICFG); + + // After we have solved the LCAProblem, we can now inspect the detected + // constants: + + const auto *MainF = IRDB.getFunctionDefinition("main"); + if (!MainF) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + const auto *ExitOfMain = psr::getAllExitPoints(MainF).front(); + + // Get the analysis results right **after** main's return statement + const auto &AllConstantsAtMainExit = Results.resultsAt(ExitOfMain); + + llvm::outs() << "Detected constants at " << psr::llvmIRToString(ExitOfMain) + << ":\n"; + for (const auto &[LLVMVar, ConstVal] : AllConstantsAtMainExit) { + llvm::outs() << " " << psr::llvmIRToString(LLVMVar) << "\n --> "; + if (ConstVal.isBottom()) { + // A "bottom" value here means that the analysis does not know the value + // at this point and that any value may be possible. + + llvm::outs() << "\n\n"; + } else { + llvm::outs() << ConstVal << "\n\n"; + } + } +} diff --git a/examples/how-to/07-write-ifds-analysis/CMakeLists.txt b/examples/how-to/07-write-ifds-analysis/CMakeLists.txt new file mode 100644 index 0000000000..fe1839e072 --- /dev/null +++ b/examples/how-to/07-write-ifds-analysis/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(write-ifds-analysis) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(write-ifds-analysis-simple simple.cpp) +target_link_libraries(write-ifds-analysis-simple PRIVATE phasar::phasar) + + +if (TARGET run_sample_programs) + add_custom_target(run_write_ifds_analysis + DEPENDS write-ifds-analysis-simple + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + ) + + add_dependencies(run_sample_programs run_write_ifds_analysis) +endif() diff --git a/examples/how-to/07-write-ifds-analysis/README.md b/examples/how-to/07-write-ifds-analysis/README.md new file mode 100644 index 0000000000..f99548f5d1 --- /dev/null +++ b/examples/how-to/07-write-ifds-analysis/README.md @@ -0,0 +1,28 @@ +# Write an IFDS Analysis + +Shows, how you can use PhASAR to write an IFDS analysis to analyze LLVM IR. +For this example, we selected the versatile *taint analysis* as problem to implement. + +For more information, we suggest taking a look into PhASAR's Wiki: [Writing an IFDS Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IFDS-analysis). + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 07-write-ifds-analysis root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 07-write-ifds-analysis/build folder: +./write-ifds-analysis-simple ../../../llvm-hello-world/target/taint.ll + +``` diff --git a/examples/how-to/07-write-ifds-analysis/simple.cpp b/examples/how-to/07-write-ifds-analysis/simple.cpp new file mode 100644 index 0000000000..34ea367b47 --- /dev/null +++ b/examples/how-to/07-write-ifds-analysis/simple.cpp @@ -0,0 +1,147 @@ +#include "phasar/DataFlow.h" // For solveIFDSProblem() +#include "phasar/PhasarLLVM/ControlFlow.h" // For the LLVMBasedICFG +#include "phasar/PhasarLLVM/DB.h" // For the LLVMProjectIRDB +#include "phasar/PhasarLLVM/DataFlow.h" // For DefaultAliasAwareIFDSProblem, etc. +#include "phasar/PhasarLLVM/Pointer.h" // For the LLVMAliasSet +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +namespace { + +void populateWithMayAliases(psr::LLVMAliasIteratorRef AS, + std::set &Facts, + const llvm::Instruction *At); + +/// To create a custom IFDS analysis, we must create a subclass of the +/// IFDSTabulationProblem. +/// The utility class DefaultAliasAwareIFDSProblem implements +/// IFDSTabulationProblem and already provides some default flow-functions and +/// handles aliasing, so that we can focus on the specifica of our analysis. +class ExampleTaintAnalysis : public psr::DefaultAliasAwareIFDSProblem { +public: + /// Constructor of the taint-analysis problem. Just forward all parameters to + /// the base-class and initialize the taint-config. + /// + /// The last parameter of the base-ctor denotes the special zero-value of the + /// IFDS problem. We use LLVMZeroValue for this. + explicit ExampleTaintAnalysis(const psr::LLVMProjectIRDB *IRDB, + psr::LLVMAliasIteratorRef AS, + const psr::LLVMTaintConfig *Config, + std::vector EntryPoints) + : psr::DefaultAliasAwareIFDSProblem(IRDB, AS, std::move(EntryPoints), + psr::LLVMZeroValue::getInstance()), + Config(&psr::assertNotNull(Config)) {} + + /// Provides the initial seeds, i.e., the pairs that are assumed + /// to hold un-conditionally at the beginning of the analysis. + /// This is the start state that the IFDS solver will use to start with. + [[nodiscard]] psr::InitialSeeds initialSeeds() override { + psr::InitialSeeds Seeds; + + psr::LLVMBasedCFG CFG; + // Here, we just say that for all entry-functions in the EntryPoints, the + // zero-value should hold at the very first statement. + addSeedsForStartingPoints(EntryPoints, IRDB, CFG, Seeds, getZeroValue()); + + return Seeds; + }; + + /// Here, we define special semantics of function-calls that are specified + /// outside of the target program. In the case of taint analysis, we need to + /// handle sources, sinks and sanitizers here: + [[nodiscard]] FlowFunctionPtrType + getSummaryFlowFunction(n_t CallSite, f_t DestFun) override { + const auto *CS = llvm::cast(CallSite); + + // Process the effects of source or sink functions that are called + auto Gen = psr::getGeneratedFacts(*Config, CS, DestFun); + auto Leak = psr::getLeakedFacts(*Config, CS, DestFun); + auto Kill = psr::getSanitizedFacts(*Config, CS, DestFun); + + if (Gen.empty() && Leak.empty() && Kill.empty()) { + // This CallSite apparently is not calling a special source/sink/sanitizer + // function. Fallback to the default-behavior. + return DefaultAliasAwareIFDSProblem::getSummaryFlowFunction(CS, DestFun); + } + + // Since our analysis is alias-aware, we must handle aliasing here: + populateWithMayAliases(getAliasInfo(), Gen, CallSite); + populateWithMayAliases(getAliasInfo(), Leak, CallSite); + + // We have special behavior to communicate to the analysis solver, so create + // a flow-function that captures this behavior: + return lambdaFlow([this, CS, Gen{std::move(Gen)}, Leak{std::move(Leak)}, + Kill{std::move(Kill)}](d_t Source) -> container_type { + if (isZeroValue(Source)) { + // In case of a source, we generate the new taints from zero (Source). + return Gen; + } + + if (Leak.count(Source)) { + // In case of a sink, we create a leak if one of the leaking parameters + // (Leak) is tainted (Source). + Leaks.insert(CS); + } + + if (Kill.count(Source)) { + // In case of a sanitizer, we kill tainted values (Source) that flow + // into the sanitizied parameters (Kill). + return {}; + } + + // Otherwise, the taint is unaffected from the source/sink/sanitizer, so + // propagate it as identity + return {Source}; + }); + } + + // We collect the leaking sink-calls here + llvm::DenseSet Leaks{}; + +private: + const psr::LLVMTaintConfig *Config{}; +}; + +// For all given facts, we add their aliases: +void populateWithMayAliases(psr::LLVMAliasIteratorRef AS, + std::set &Facts, + const llvm::Instruction *At) { + auto Tmp = Facts; + for (const auto *Fact : Facts) { + AS.forallAliasesOf(Fact, At, [&](const auto *Alias) { Tmp.insert(Alias); }); + } + + Facts = std::move(Tmp); +} + +} // namespace + +// Invoke the analysis the same way as explained in 04-run-ifds-analysis: +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: write-ifds-analysis-simple \n"; + return 1; + } + + // Load the IR + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + psr::LLVMAliasSet AS(&IRDB); + psr::LLVMTaintConfig TC(IRDB); + TC.print(); + llvm::outs() << "------------------------\n"; + + ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); + + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, + nullptr, &AS); + + psr::solveIFDSProblem(TaintProblem, ICFG); + + for (const auto *LeakInst : TaintProblem.Leaks) { + llvm::outs() << "Detected taint leak at " << psr::llvmIRToString(LeakInst) + << '\n'; + } +} diff --git a/examples/how-to/08-write-ide-analysis/CMakeLists.txt b/examples/how-to/08-write-ide-analysis/CMakeLists.txt new file mode 100644 index 0000000000..25d4088d49 --- /dev/null +++ b/examples/how-to/08-write-ide-analysis/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(write-ide-analysis) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + +add_executable(write-ide-analysis-simple simple.cpp) +target_link_libraries(write-ide-analysis-simple PRIVATE phasar::phasar) + + +if (TARGET run_sample_programs) + add_custom_target(run_write_ide_analysis + DEPENDS write-ide-analysis-simple + COMMAND $ "${CMAKE_CURRENT_LIST_DIR}/../../llvm-hello-world/target/taint.ll" + ) + + add_dependencies(run_sample_programs run_write_ide_analysis) +endif() diff --git a/examples/how-to/08-write-ide-analysis/README.md b/examples/how-to/08-write-ide-analysis/README.md new file mode 100644 index 0000000000..bf67bf15e6 --- /dev/null +++ b/examples/how-to/08-write-ide-analysis/README.md @@ -0,0 +1,29 @@ +# Write an IDE Analysis + +Shows, how you can use PhASAR to write an IDE analysis to analyze LLVM IR. +For this example, we selected the linear constant analysis as problem to implement. + +The code example exactly matches the example from our Wiki: [Writing an IDE Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IDE-analysis). +So, we highly recommend taking a look there first. + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 08-write-ide-analysis root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 08-write-ide-analysis/build folder: +./write-ide-analysis-simple ../../../llvm-hello-world/target/call2.ll + +``` diff --git a/examples/how-to/08-write-ide-analysis/simple.cpp b/examples/how-to/08-write-ide-analysis/simple.cpp new file mode 100644 index 0000000000..45443ced08 --- /dev/null +++ b/examples/how-to/08-write-ide-analysis/simple.cpp @@ -0,0 +1,484 @@ +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/Domain/LatticeDomain.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +namespace { + +/// The domain for our analysis. We specialize the edge-value type l_t, i.e., +/// the type of constant values that are assigned to constant integer +/// variables. +struct ExampleIDELinearConstantAnalysisDomain : psr::LLVMAnalysisDomainDefault { + // We want to propagate constant integers. To make this domain a lattice, we + // wrap it into psr::LatticeDomain, adding special values for TOP and BOTTOM. + using l_t = psr::LatticeDomain; +}; + +/// To create a custom IDE analysis, we must create a subclass of the +/// IDETabulationProblem. +/// The utility class DefaultNoAliasIDEProblem implements +/// IDETabulationProblem and already provides some default flow-functions, so +/// that we can focus on the specifica of our analysis. +/// +/// \note For simplicity, we don't handle aliasing in this example; however, you +/// can use DefaultAliasAwareIDEProblem to handle aliasing in most cases. +class ExampleLinearConstantAnalysis + : public psr::DefaultNoAliasIDEProblem< + ExampleIDELinearConstantAnalysisDomain> { +public: + /// Constructor of the constant-analysis problem. Just forward all parameters + /// to the base-class. + /// + /// The last parameter of the base-ctor denotes the special zero-value, aka. + /// Λ, of the IDE problem. We use LLVMZeroValue for this. + explicit ExampleLinearConstantAnalysis(const psr::LLVMProjectIRDB *IRDB, + std::vector EntryPoints) + : DefaultNoAliasIDEProblem(IRDB, std::move(EntryPoints), + psr::LLVMZeroValue::getInstance()) {} + + /// Provides the initial seeds, i.e., the pairs that are assumed + /// to hold un-conditionally at the beginning of the analysis. + /// Similar to IFDS, this is the start state that the IDE solver will use to + /// start with. + [[nodiscard]] psr::InitialSeeds initialSeeds() override { + psr::InitialSeeds Seeds; + + psr::LLVMBasedCFG CFG; + // Here, we just say that for all entry-functions in the EntryPoints, the + // zero-value should hold at the very first statement. + addSeedsForStartingPoints(EntryPoints, IRDB, CFG, Seeds, getZeroValue(), + bottomElement()); + + return Seeds; + }; + + FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override { + if (const auto *Alloca = llvm::dyn_cast(Curr)) { + // Freshly allocated variables hold no constant value + + auto *AT = Alloca->getAllocatedType(); + if (AT->isIntegerTy() || psr::isIntegerLikeType(AT)) { + return generateFromZero(Alloca); + } + } + + if (const auto *Store = llvm::dyn_cast(Curr)) { + // Storing a constant integer. + if (llvm::isa(Store->getValueOperand())) { + return psr::strongUpdateStore(Store, + psr::LLVMZeroValue::isLLVMZeroValue); + } + } + + // Leave everything else defaulted + return this->DefaultNoAliasIDEProblem::getNormalFlowFunction(Curr, Succ); + } + + FlowFunctionPtrType getCallFlowFunction(n_t CallSite, f_t DestFun) override { + // We definitely want to re-use as much as possible from the default + // call-flow-function + auto DefaultFn = + this->DefaultNoAliasIDEProblem::getCallFlowFunction(CallSite, DestFun); + + // If a constant int is passed as parameter, we need to generate the + // parameter inside the callee from zero + + const auto *Call = llvm::cast(CallSite); + container_type Gen; + for (const auto &[Arg, Param] : llvm::zip(Call->args(), DestFun->args())) { + if (llvm::isa(Arg)) { + Gen.insert(&Param); + } + } + + if (Gen.empty()) { + // Nothing special, we can directly use the default call-FF + return DefaultFn; + } + + // Here, we combine both flow-functions: + + auto GenFn = + generateManyFlowsAndKillAllOthers(std::move(Gen), getZeroValue()); + return unionFlows(std::move(DefaultFn), std::move(GenFn)); + } + + FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, + n_t ExitInst, n_t RetSite) override { + + auto DefaultFn = this->DefaultNoAliasIDEProblem::getRetFlowFunction( + CallSite, CalleeFun, ExitInst, RetSite); + + const auto *RetInst = llvm::dyn_cast(ExitInst); + if (RetInst && + llvm::isa_and_present(RetInst->getReturnValue())) { + // If we return a literal constant int, we must generate the corresponding + // value at the call-site from zero, i.e., the CallSite itself in case of + // LLVM's SSA form + + auto RetFn = generateFlowAndKillAllOthers(CallSite, getZeroValue()); + return unionFlows(std::move(DefaultFn), std::move(RetFn)); + } + + return DefaultFn; + } + + // Fallback edge-function that models two composed edge-functions. We try to + // use this as little as possible for performance reasons. + struct LCAEdgeFunctionComposer : psr::EdgeFunctionComposer { + static psr::EdgeFunction + join(psr::EdgeFunctionRef This, + const psr::EdgeFunction &OtherFunction) { + // Just use the default join. + + if (auto Default = defaultJoinOrNull(This, OtherFunction)) { + return Default; + } + return psr::AllBottom{}; + } + }; + + // The custom edge-function for binary operations + struct BinOp { + using l_t = ExampleIDELinearConstantAnalysisDomain::l_t; + + unsigned OpCode{}; + const llvm::ConstantInt *LeftConst{}; + const llvm::ConstantInt *RightConst{}; + + // Utility function to make implementing computeTarget() easier + [[nodiscard]] l_t executeBinOperation(l_t LVal, l_t RVal) const { + auto *LopPtr = LVal.getValueOrNull(); + auto *RopPtr = RVal.getValueOrNull(); + + if (!LopPtr || !RopPtr) { + return psr::Bottom{}; + } + + auto Lop = *LopPtr; + auto Rop = *RopPtr; + + // default initialize with BOTTOM (all information) + int64_t Res; + switch (OpCode) { + case llvm::Instruction::Add: + if (llvm::AddOverflow(Lop, Rop, Res)) { + return psr::Bottom{}; + } + return Res; + + case llvm::Instruction::Sub: + if (llvm::SubOverflow(Lop, Rop, Res)) { + return psr::Bottom{}; + } + return Res; + + case llvm::Instruction::Mul: + if (llvm::MulOverflow(Lop, Rop, Res)) { + return psr::Bottom{}; + } + return Res; + + case llvm::Instruction::UDiv: + case llvm::Instruction::SDiv: + if (Lop == std::numeric_limits::min() && + Rop == -1) { // Would produce and overflow, as the complement of min + // is not representable in a signed type. + return psr::Bottom{}; + } + if (Rop == 0) { // Division by zero is UB, so we return Bot + return psr::Bottom{}; + } + return Lop / Rop; + + case llvm::Instruction::URem: + case llvm::Instruction::SRem: + if (Rop == 0) { // Division by zero is UB, so we return Bot + return psr::Bottom{}; + } + return Lop % Rop; + + case llvm::Instruction::And: + return Lop & Rop; + case llvm::Instruction::Or: + return Lop | Rop; + case llvm::Instruction::Xor: + return Lop ^ Rop; + default: + PHASAR_LOG_LEVEL(DEBUG, "Operation not supported by " + "IDELinearConstantAnalysis::" + "executeBinOperation()"); + return psr::Bottom{}; + } + } + + // Utility function to aid the printing operator<< + static char opToChar(const unsigned Op) { + switch (Op) { + case llvm::Instruction::Add: + return '+'; + case llvm::Instruction::Sub: + return '-'; + case llvm::Instruction::Mul: + return '*'; + case llvm::Instruction::UDiv: + case llvm::Instruction::SDiv: + return '/'; + case llvm::Instruction::URem: + case llvm::Instruction::SRem: + return '%'; + case llvm::Instruction::And: + return '&'; + case llvm::Instruction::Or: + return '|'; + case llvm::Instruction::Xor: + return '^'; + default: + return ' '; + } + } + + // Required function that invokes the edge-function with an incoming value + // that is substituted with the non-constant operand of the modeled binary + // operation. + [[nodiscard]] l_t computeTarget(l_t Source) const { + if (LeftConst && RightConst) { // Simple constant-folding + return executeBinOperation(LeftConst->getSExtValue(), + RightConst->getSExtValue()); + } + if (Source == psr::Bottom{}) { + // Bottom is the top-value of our lattice. Whatever we do to it, it will + // always stay Bottom + return Source; + } + + // Now, perform the linear arithmetic. + // First, we have to check, which of the both operands is the literal + if (RightConst) { + // The right operand is the literal, so we plug in the incoming value as + // left operand + return executeBinOperation(Source, RightConst->getSExtValue()); + } + if (LeftConst) { + // The left operand is the literal, so we plug in the incoming value as + // right operand + return executeBinOperation(LeftConst->getSExtValue(), Source); + } + + llvm::report_fatal_error( + "Only linear constant propagation can be specified!"); + } + + // Optional function to expose the constant-ness of this edge-function for + // optimization purposes + [[nodiscard]] constexpr bool isConstant() const noexcept { + // If both operands of this binary operation are literal constant, this + // edge function always computes the same value + return LeftConst && RightConst; + } + + // Compose This edge-function with a different SecondFunction + static psr::EdgeFunction + compose(psr::EdgeFunctionRef This, + const psr::EdgeFunction &SecondFunction) { + // Trivial compositions can be defaulted: + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + // Here, we could for example add transformations like: + // compose(BinOp{Add, ..., Const1}, BinOp{Add, ..., Const2}) --> + // BinOp{Add, ..., Const1 + Const2} + + // Fallback for when we don't know better: + return LCAEdgeFunctionComposer{This, SecondFunction}; + } + + // Join This edge-function dith a different OtherFunction, going in the + // EF-lattice towards AllBottom. + static psr::EdgeFunction + join(psr::EdgeFunctionRef This, + const psr::EdgeFunction &OtherFunction) { + // Trivial joins can be defaulted: + if (auto Default = defaultJoinOrNull(This, OtherFunction)) { + return Default; + } + + // Here we could, e.g., check whether the two edge functions are + // semantically equivalent, althouth different and then return one of them + + // Sound fallback in case, we don't know better + return psr::AllBottom{}; + } + + constexpr bool operator==(const BinOp &Other) const noexcept { + return OpCode == Other.OpCode && LeftConst == Other.LeftConst && + RightConst == Other.RightConst; + } + + // Printing. optional + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const BinOp &Bop) { + OS << "BinOp["; + if (Bop.LeftConst) { + OS << *Bop.LeftConst; + } else { + OS << 'x'; + } + + OS << ' ' << opToChar(Bop.OpCode) << ' '; + if (Bop.LeftConst) { + OS << *Bop.LeftConst; + } else { + OS << 'x'; + } + + return OS; + } + }; + + psr::EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, + n_t /*Succ*/, + d_t SuccNode) override { + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Handle the two cases, where we generate facts from zero: + + if (const auto *Alloca = llvm::dyn_cast(Curr)) { + // Freshly allocated variables hold no constant value + return psr::AllBottom{}; + } + + if (const auto *Store = llvm::dyn_cast(Curr)) { + + // Storing a constant integer. + const auto *ConstOperand = + llvm::cast(Store->getValueOperand()); + return psr::ConstantEdgeFunction{ConstOperand->getSExtValue()}; + } + } + + // Handle binary operations. The corresponding flow-function is defaulted. + if (llvm::isa(Curr) && SuccNode == Curr && + CurrNode != SuccNode) { + unsigned Op = Curr->getOpcode(); + auto *Lop = Curr->getOperand(0); + auto *Rop = Curr->getOperand(1); + // For non linear constant computation we propagate bottom + if ((CurrNode == Lop && !llvm::isa(Rop)) || + (CurrNode == Rop && !llvm::isa(Lop))) { + return psr::AllBottom{}; + } + + // Attach the arithmetic transformer to this edge + return BinOp{Op, llvm::dyn_cast(Lop), + llvm::dyn_cast(Rop)}; + } + + // Pass everything else as identity + return psr::EdgeIdentity{}; + } + + psr::EdgeFunction getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t /*DestinationFunction*/, + d_t DestNode) override { + if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { + // If a constant int is passed as parameter, we need to generate the + // parameter inside the callee from zero + const auto *DestParam = llvm::cast(DestNode); + const auto *ConstOperand = llvm::cast( + CallSite->getOperand(DestParam->getArgNo())); + return psr::ConstantEdgeFunction{ConstOperand->getSExtValue()}; + } + + // Pass everything else as identity + return psr::EdgeIdentity{}; + } + + psr::EdgeFunction + getReturnEdgeFunction(n_t CallSite, f_t /*CalleeFunction*/, n_t ExitStmt, + d_t ExitNode, n_t /*RetSite*/, d_t RetNode) override { + if (isZeroValue(ExitNode) && RetNode == CallSite) { + // If we return a literal constant int, we must generate the corresponding + // value at the call-site from zero, i.e., the CallSite itself in case of + // LLVM's SSA form + const auto *RetVal = + llvm::cast(ExitStmt)->getReturnValue(); + return psr::ConstantEdgeFunction{ + llvm::cast(RetVal)->getSExtValue()}; + } + + // Pass everything else as identity + return psr::EdgeIdentity{}; + } + + psr::EdgeFunction + getCallToRetEdgeFunction(n_t /*CallSite*/, d_t /*CallNode*/, n_t /*RetSite*/, + d_t /*RetSiteNode*/, + llvm::ArrayRef /*Callees*/) override { + // The call-to-return edge-function handles facts that are not affected by + // the call. This is usually the identity function. + return psr::EdgeIdentity{}; + } +}; +} // namespace + +// Invoke the analysis the same way as explained in 05-run-ide-analysis: +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: write-ide-analysis-simple \n"; + return 1; + } + + psr::LLVMProjectIRDB IRDB(Argv[1]); + if (!IRDB) { + return 1; + } + + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}); + + ExampleLinearConstantAnalysis LCAProblem(&IRDB, {"main"}); + + auto Results = psr::solveIDEProblem(LCAProblem, ICFG); + + // After we have solved the LCAProblem, we can now inspect the detected + // constants: + + const auto *MainF = IRDB.getFunctionDefinition("main"); + if (!MainF) { + llvm::errs() << "Required function 'main' not found\n"; + return 1; + } + + const auto *ExitOfMain = psr::getAllExitPoints(MainF).front(); + + // Get the analysis results right **after** main's return statement + const auto &AllConstantsAtMainExit = Results.resultsAt(ExitOfMain); + + llvm::outs() << "Detected constants at " << psr::llvmIRToString(ExitOfMain) + << ":\n"; + for (const auto &[LLVMVar, ConstVal] : AllConstantsAtMainExit) { + llvm::outs() << " " << psr::llvmIRToString(LLVMVar) << "\n --> "; + if (ConstVal.isBottom()) { + // A "bottom" value here means that the analysis does not know the value + // at this point and that any value may be possible. + + llvm::outs() << "\n\n"; + } else { + llvm::outs() << ConstVal << "\n\n"; + } + } +} diff --git a/examples/how-to/CMakeLists.txt b/examples/how-to/CMakeLists.txt new file mode 100644 index 0000000000..5a1ae5b094 --- /dev/null +++ b/examples/how-to/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(phasar-how-tos) + +file(GLOB children RELATIVE ${CMAKE_CURRENT_LIST_DIR} ${CMAKE_CURRENT_LIST_DIR}/*) + +add_custom_target(run_sample_programs) + +foreach(child ${children}) + if(IS_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/${child} AND NOT "${child}" STREQUAL "build") + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/${child}) + endif() +endforeach() diff --git a/examples/how-to/README.md b/examples/how-to/README.md new file mode 100644 index 0000000000..b84875a609 --- /dev/null +++ b/examples/how-to/README.md @@ -0,0 +1,13 @@ +# How To ... + +This folder contains various examples on how to use certain features of PhASAR. + +Currently supporting: +- [x] Working with the IR ([here](./00-load-llvm-ir/README.md)) +- [x] Build a type-hierarchy ([here](./01-build-type-hierarchy/README.md)) +- [x] Build a call-graph ([here](./02-build-call-graph/README.md)) +- [x] Create alias-information ([here](./03-create-alias-info/README.md)) +- [x] Run an IFDS analysis ([here](./04-run-ifds-analysis/README.md)) +- [x] Run an IDE analysis ([here](./05-run-ide-analysis/README.md)) +- [x] Write an IFDS analysis ([here](./07-write-ifds-analysis/README.md)) +- [x] Write an IDE analysis ([here](./08-write-ide-analysis/)) diff --git a/examples/llvm-hello-world/CMakeLists.txt b/examples/llvm-hello-world/CMakeLists.txt new file mode 100644 index 0000000000..8ed9e80124 --- /dev/null +++ b/examples/llvm-hello-world/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(llvm-hello-world) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(LLVM 15 REQUIRED CONFIG) + +add_executable(main main.cpp) +target_link_libraries(main PRIVATE LLVMCore LLVMIRReader) +target_include_directories(main PRIVATE ${LLVM_INCLUDE_DIRS}) + +add_subdirectory(target) diff --git a/examples/llvm-hello-world/README.md b/examples/llvm-hello-world/README.md new file mode 100644 index 0000000000..23a1eb7854 --- /dev/null +++ b/examples/llvm-hello-world/README.md @@ -0,0 +1,30 @@ +# LLVM Hello World + +The "Hello, World!" program can be compiled using: + +```bash +$ make +``` + +However, we recommend using cmake: + +```bash +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +"Hello, World!" reads a LLVM IR file (.ll or .bc) specified by the first +command-line argument. It then looks for the main function, iterates all of its +instructions and prints them to the command-line using an LLVM output stream. +Have a look at the comments within the source code in main.cpp. + +Example use: + +```bash +# Invoked from the llvm-hello-world root folder if compiled with make: +./main ./target/simple.ll + +# Invoked from the llvm-hello-world/build folder if compiled with cmake: +./main ./target/simple_cpp_dbg.ll +``` diff --git a/examples/llvm-hello-world/README.txt b/examples/llvm-hello-world/README.txt deleted file mode 100644 index 51b3fcedf6..0000000000 --- a/examples/llvm-hello-world/README.txt +++ /dev/null @@ -1,12 +0,0 @@ -The "Hello, World!" program can be compiled using: - - $ make - -The auto-generated files can be removed using: - - $ make clean - -"Hello, World!" reads a LLVM IR file (.ll or .bc) specified by the first -command-line argument. It then looks for the main function, iterates all of its -instructions and prints them to the command-line using an LLVM output stream. -Have a look at the comments within the source code in main.cpp. diff --git a/examples/llvm-hello-world/main.cpp b/examples/llvm-hello-world/main.cpp index 3cb439b218..ce435a7de2 100644 --- a/examples/llvm-hello-world/main.cpp +++ b/examples/llvm-hello-world/main.cpp @@ -2,18 +2,14 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include -#include int main(int argc, char **argv) { if (argc != 2) { diff --git a/examples/llvm-hello-world/target/CMakeLists.txt b/examples/llvm-hello-world/target/CMakeLists.txt new file mode 100644 index 0000000000..f8f25bef38 --- /dev/null +++ b/examples/llvm-hello-world/target/CMakeLists.txt @@ -0,0 +1,12 @@ +add_custom_target(LLFileGeneration ALL) + +# Use phasar's capabilities to automate the LLVM-IR file generation + +include(../../../cmake/phasar_macros.cmake) +set(PHASAR_LLVM_VERSION 15) + +file(GLOB target_files RELATIVE ${CMAKE_CURRENT_LIST_DIR} *.cpp) + +foreach(target_file ${target_files}) + generate_ll_file(FILE ${target_file} DEBUG) +endforeach() diff --git a/examples/llvm-hello-world/target/branching.ll b/examples/llvm-hello-world/target/branching.ll index cc4c36efa6..098eee04f0 100644 --- a/examples/llvm-hello-world/target/branching.ll +++ b/examples/llvm-hello-world/target/branching.ll @@ -1,43 +1,89 @@ ; ModuleID = 'branching.cpp' source_filename = "branching.cpp" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: noinline norecurse nounwind optnone uwtable -define i32 @main(i32, i8**) #0 { - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8**, align 8 - %6 = alloca i32, align 4 - %7 = alloca i32, align 4 - store i32 0, i32* %3, align 4 - store i32 %0, i32* %4, align 4 - store i8** %1, i8*** %5, align 8 - store i32 10, i32* %6, align 4 - %8 = load i32, i32* %4, align 4 - %9 = sub nsw i32 %8, 1 - %10 = icmp ne i32 %9, 0 - br i1 %10, label %11, label %12 - -;