diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..321b838 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,183 @@ +name: Build + +on: + push: + branches: + - master + pull_request: + +jobs: + # ───────────────────────────────────────────────────────────────────────────── + # macOS · Apple Silicon (arm64) + # Runner: macos-15 (M-series, arm64) + # cmake 4.x is pre-installed; libomp from Homebrew patches Apple Clang OpenMP + # ───────────────────────────────────────────────────────────────────────────── + build-macos-arm64: + name: macOS · Apple Silicon (arm64) + runs-on: macos-15 + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: brew install ccache gsl libomp boost + + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-macos-arm64-${{ github.sha }} + restore-keys: ccache-macos-arm64- + + - name: Configure + run: | + mkdir build && cd build + cmake ../src \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_PREFIX_PATH="$(brew --prefix boost)" \ + -DGSL_ROOT_DIR="$(brew --prefix gsl)" + + - name: Build + run: make -C build -j$(sysctl -n hw.logicalcpu) + + - name: Verify + run: | + ./build/pureclip --version + ./build/winextract --version + file build/pureclip | grep -q arm64 && echo "✓ native arm64 binary" + + - name: ccache stats + run: ccache --show-stats + + # ───────────────────────────────────────────────────────────────────────────── + # macOS · strict warnings (-Werror) + # Non-blocking: flags new warnings introduced by a PR without failing the + # merge gate. Only runs on macOS (AppleClang has the clearest diagnostics). + # ───────────────────────────────────────────────────────────────────────────── + warnings-macos-arm64: + name: macOS · warnings as errors (non-blocking) + runs-on: macos-15 + continue-on-error: true + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: brew install ccache gsl libomp boost + + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-macos-arm64-werror-${{ github.sha }} + restore-keys: ccache-macos-arm64-werror- + + - name: Configure (warnings as errors) + run: | + mkdir build && cd build + cmake ../src \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_PREFIX_PATH="$(brew --prefix boost)" \ + -DGSL_ROOT_DIR="$(brew --prefix gsl)" \ + -DWERROR=ON + + - name: Build + run: make -C build -j$(sysctl -n hw.logicalcpu) + + # ───────────────────────────────────────────────────────────────────────────── + # Linux · x86_64 + # Runner: ubuntu-24.04 (standard x86_64 runner, available on all plans) + # Primary deployment target for HPC/server environments and the existing + # Bioconda package. GCC supports OpenMP natively. + # ───────────────────────────────────────────────────────────────────────────── + build-linux-x86: + name: Linux · x86_64 + runs-on: ubuntu-24.04 + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -y ccache cmake g++ libgsl-dev libboost-dev zlib1g-dev libbz2-dev + + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-linux-x86-${{ github.sha }} + restore-keys: ccache-linux-x86- + + - name: Configure + run: | + mkdir build && cd build + cmake ../src \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + + - name: Build + run: make -C build -j$(nproc) + + - name: Verify + run: | + ./build/pureclip --version + ./build/winextract --version + file build/pureclip | grep -q x86-64 && echo "✓ native x86_64 binary" + + - name: ccache stats + run: ccache --show-stats + + # ───────────────────────────────────────────────────────────────────────────── + # Linux · arm64 + # Runner: ubuntu-24.04-arm (available on GitHub Pro and above) + # GCC supports OpenMP natively — no libomp workaround needed + # cmake 3.31 pre-installed — no cmake 4.x compat issues + # ───────────────────────────────────────────────────────────────────────────── + build-linux-arm64: + name: Linux · arm64 + runs-on: ubuntu-24.04-arm + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -y ccache cmake g++ libgsl-dev libboost-dev zlib1g-dev libbz2-dev + + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-linux-arm64-${{ github.sha }} + restore-keys: ccache-linux-arm64- + + - name: Configure + run: | + mkdir build && cd build + cmake ../src \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + + - name: Build + run: make -C build -j$(nproc) + + - name: Verify + run: | + ./build/pureclip --version + ./build/winextract --version + file build/pureclip | grep -q aarch64 && echo "✓ native aarch64 binary" + + - name: ccache stats + run: ccache --show-stats diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 415694e..0000000 --- a/.travis.yml +++ /dev/null @@ -1,67 +0,0 @@ -language: cpp -matrix: - include: - - os: linux - dist: trusty - sudo: false - compiler: gcc-7 - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-7 - - cmake - - zlib1g-dev - - libbz2-dev - - libboost-dev - - libgsl0-dev - before_install: - - "python3 --version || :" - - "python --version || :" - install: export CXX="g++-7" - env: - - LINK_TOOL="ldd" - - DEPLOY_PATTERN="PureCLIP-*-linux64-{static,dynamic}.tar.gz" - - os: osx - osx_image: xcode9.2 - compiler: gcc-7 - before_install: - - brew ls --versions - - brew install gcc@7 gsl - - brew install python || brew upgrade python - - brew ls --versions - - softwareupdate -l - - "python3 --version || :" - - "python --version || :" - install: export CXX="g++-7" - env: - - LINK_TOOL="otool -L" - - DEPLOY_PATTERN="PureCLIP-*-mac64.tar.gz" -script: -- ./pkg_util/pkg_check.py . -- mkdir -p build && cd build -- cmake ../src -DCMAKE_BUILD_TYPE=Release -- make VERBOSE=1 -- "./pureclip --version" -- "./winextract --version" -- cmake . -DPKG_BUILD=1 -DSTATIC_BUILD=0 -- make package -- "$LINK_TOOL pureclip || :" -- cmake . -DSTATIC_BUILD=1 -- make package -- "$LINK_TOOL pureclip || :" -- ls -l -- 'for f in *.tar.gz; do echo "$f":; tar tf "$f"; done' -deploy: - skip_cleanup: true - provider: releases - draft: true - tag_name: $TRAVIS_TAG # Workaround for Travis bug - target_commitish: $TRAVIS_COMMIT # Workaround for Travis bug - api_key: - secure: PiqTCnGo/oM8hkj+RwNj47F9KcD7E26tdQ1ZoJlRCoG7UK35dnjBDNsK7TwUp1jT9VQ7J7WckD3VjybLkeDL0D+4rHLkBJWtmViIU/cH1aGg1ltksjREgeB8El5FUVXQkd3RND02aPjqKsIdXlIEo7Z8wmQtTPhAQULqQLUBQzT5BAZq7mQJYPo3n2MpicwFQ6BrjCB0mB9+v/hgKaxEepED+rA5WhUG8djLrrwJ9X5sIdj13kwrbJwgBwdzGAKdoabZ/Sh7+nbK/YNilntnNxEdhD+WUhswOxEPuDuB+TsdTB96wrbAdrJctHIWIlDdgFiyn+gCpvvitTNG/JPZKa0z06NpqRcwqpvzPnAmNeQHw5F7qgvqB5zhoT2e9Xfvem4Rg1MwIhte+qDLCS6s7ySOvW+4+FCZXgSDHOC+wo4e40f382UmmbPctLplw04H9uEsYpF2vzVr1KNbCqigGetLz3OEHoSmjtrxpsgrMAcfvy1nmSK6vY1ESljD0zVVShHw4sVzOprE2jY93z49hQMZo9D0PbJRIh5arydMcdD4B7BKXS1/zM1XtWZRzNaJH8Rzw0jWPSLbtx7igfyZ/jOEdNIziS3KdacEJHEU5dkGoF7WovcAffZNxpWwQefry5Es8ox4MsI+A22jZ9YBIqqLK2+Lf3aOiP9Zn39ByI4= - file_glob: True - file: $DEPLOY_PATTERN - on: - tags: True diff --git a/INSTALLATION.md b/INSTALLATION.md new file mode 100644 index 0000000..4775289 --- /dev/null +++ b/INSTALLATION.md @@ -0,0 +1,164 @@ +# Installing PureCLIP + +This guide covers installing PureCLIP from source on **Linux** and **macOS** +(including Apple Silicon / arm64), as well as via Bioconda. + +--- + +## Table of Contents + +- [Bioconda (easiest)](#bioconda-easiest) +- [Linux – build from source](#linux--build-from-source) +- [macOS – build from source](#macos--build-from-source) + - [Intel (x86\_64)](#intel-x86_64) + - [Apple Silicon (arm64)](#apple-silicon-arm64) +- [Verify the build](#verify-the-build) +- [Quick sample run](#quick-sample-run) + +--- + +## Bioconda (easiest) + +```bash +conda install -c bioconda pureclip +``` + +> Requires an active [Bioconda channel](https://bioconda.github.io). +> Pre-built bottles are available for Linux (x86\_64) and macOS (x86\_64). +> For Apple Silicon use the native build from source described below. + +--- + +## Linux – build from source + +### Prerequisites + +| Tool | Version | Install | +|------|---------|---------| +| CMake | ≥ 3.0 | `apt install cmake` / `dnf install cmake` | +| GCC | ≥ 5 (C++14) | `apt install g++` | +| GSL | any | `apt install libgsl-dev` | +| zlib | any | `apt install zlib1g-dev` | + +SeqAn 2.2.0 and Boost 1.64.0 are **downloaded automatically** by CMake on +first configure. + +### Build + +```bash +git clone https://github.com/skrakau/PureCLIP.git +cd PureCLIP +mkdir build && cd build +cmake ../src -DCMAKE_BUILD_TYPE=Release +make -j$(nproc) +``` + +Binaries are written to `build/pureclip` and `build/winextract`. +Optionally install system-wide: + +```bash +sudo make install # installs to /usr/local/bin by default +``` + +--- + +## macOS – build from source + +### Prerequisites (all architectures) + +Install [Homebrew](https://brew.sh) if you do not have it, then: + +```bash +brew install cmake gsl libomp +``` + +| Package | Purpose | +|---------|---------| +| `cmake` | Build system | +| `gsl` | GNU Scientific Library | +| `libomp` | OpenMP runtime for Apple Clang (both Intel and arm64) | + +### Intel (x86\_64) + +```bash +git clone https://github.com/skrakau/PureCLIP.git +cd PureCLIP +mkdir build && cd build +cmake ../src -DCMAKE_BUILD_TYPE=Release +make -j$(sysctl -n hw.logicalcpu) +``` + +### Apple Silicon (arm64) + +Apple's bundled `clang` does not ship with an OpenMP runtime. +The CMakeLists.txt in this repo detects Apple Clang automatically and picks up +`libomp` from Homebrew — **no extra flags are needed**. + +```bash +git clone https://github.com/skrakau/PureCLIP.git +cd PureCLIP +mkdir build && cd build +cmake ../src -DCMAKE_BUILD_TYPE=Release +make -j$(sysctl -n hw.logicalcpu) +``` + +> **What happens under the hood:** CMake detects `Apple Clang`, calls +> `brew --prefix libomp`, and sets `-Xpreprocessor -fopenmp` together with the +> correct include / library paths automatically. +> If `libomp` is not found cmake will abort with a clear install instruction. + +#### Troubleshooting + +| Problem | Fix | +|---------|-----| +| `brew: command not found` during cmake | Install Homebrew or set `LIBOMP_PREFIX` manually: `cmake ../src -DOpenMP_omp_LIBRARY=/path/to/libomp.dylib` | +| `ld: library 'omp' not found` | Run `brew install libomp` and re-run cmake | +| SeqAn / Boost download fails | Check internet access; or pass `-DSEQAN_ROOT=` / `-DBOOST_ROOT=` pointing to local copies | + +--- + +## Verify the build + +```bash +./pureclip --version +./winextract --version +``` + +--- + +## Quick sample run + +The following uses a preprocessed eCLIP BAM from ENCODE +([Van Nostrand et al., 2016](https://www.ncbi.nlm.nih.gov/pubmed/27018577)) +and the hg19 reference genome to run PureCLIP in basic mode. + +> **Disk space:** ~4 GB for the BAM + genome. +> **Runtime:** ~10–20 min depending on CPU count. + +```bash +# 1. Download data (requires samtools on PATH) +wget -O aligned.prepro.bam \ + https://www.encodeproject.org/files/ENCFF280ONP/@@download/ENCFF280ONP.bam +samtools view -hb -f 130 aligned.prepro.bam -o aligned.prepro.R2.bam +samtools index aligned.prepro.R2.bam + +# 2. Download reference genome +wget -O ref.hg19.fa.gz \ + https://www.encodeproject.org/files/female.hg19/@@download/female.hg19.fasta.gz +gunzip ref.hg19.fa.gz + +# 3. Run PureCLIP (learn on chr1–3, 10 threads) +./pureclip \ + -i aligned.prepro.R2.bam \ + -bai aligned.prepro.R2.bam.bai \ + -g ref.hg19.fa \ + -iv 'chr1;chr2;chr3;' \ + -nt 10 \ + -o PureCLIP.crosslink_sites.bed +``` + +Output files: +- `PureCLIP.crosslink_sites.bed` — individual crosslink sites +- `PureCLIP.crosslink_clusters.bed` — merged binding regions (generated automatically) + +For more options see the [full documentation](http://pureclip.readthedocs.io/en/latest/). diff --git a/README.md b/README.md index bc554cc..6f2f96a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,10 @@ You can install PureCLIP from Bioconda, from the release tarballs and from sourc Using Conda with an activated [Bioconda](http://bioconda.github.io) channel is the easiest way to install PureCLIP: $ conda install pureclip - + +> **Apple Silicon (arm64):** Bioconda bottles are built for x86\_64. For a native +> arm64 binary build from source — see [INSTALLATION.md](INSTALLATION.md). + ## Release Tarballs Alternatively, you can get the source code and binaries for macOS and Linux [here](https://github.com/skrakau/PureCLIP/releases/latest). @@ -22,25 +25,29 @@ PureCLIP has also been integrated into the European Galaxy server https://usegal Thanks to the Freiburg Galaxy Team! -# Build from source +# Installation -Clone the repository +For full installation instructions — including **Apple Silicon (arm64)** native +builds, Linux, and Bioconda — see **[INSTALLATION.md](INSTALLATION.md)**. + +### Quick start (Linux / macOS) $ git clone https://github.com/skrakau/PureCLIP.git $ cd PureCLIP + $ mkdir build && cd build + $ cmake ../src -DCMAKE_BUILD_TYPE=Release + $ make -j$(nproc 2>/dev/null || sysctl -n hw.logicalcpu) -Create a build directory, configure the build and compile - - $ mkdir build - $ cd build - $ cmake ../src - $ make +> **macOS (Apple Silicon):** install `libomp` first (`brew install cmake gsl libomp`). +> The build system detects Apple Clang automatically — no extra flags needed. +> See [INSTALLATION.md](INSTALLATION.md) for details. Requirements - - C++14 compliant compiler + - C++14 compliant compiler (GCC ≥ 5 or Clang ≥ 3.4) - GSL - cmake 3.0 or newer + - OpenMP (`libomp` via Homebrew on macOS) # Documentation diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2cc515c..9f17558 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,95 +1,184 @@ # Minimum cmake version -cmake_minimum_required (VERSION 3.0.0) +cmake_minimum_required (VERSION 3.5...3.31) # Generate Release builds by default -if ( NOT CMAKE_BUILD_TYPE ) - message ( STATUS "Setting build type to Release" ) - set ( CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE ) +if (NOT CMAKE_BUILD_TYPE) + message (STATUS "Setting build type to Release") + set (CMAKE_BUILD_TYPE "Release" CACHE STRING "Build Type" FORCE) else() - message ( STATUS "User selected build type: ${CMAKE_BUILD_TYPE}" ) + message (STATUS "User selected build type: ${CMAKE_BUILD_TYPE}") endif() # Name of project and that it is C++ only. project (PureCLIP CXX) +# C++23 — both compilers on supported platforms support it (AppleClang 17+, +# GCC 13+) and SeqAn 2.5 tests against C++23 in its own CI. +# The codebase compiles cleanly at this standard with zero errors. +# +# Note on numerical stability: PureCLIP operates entirely in IEEE 754 f64. +# The C++ standard does not change how the FPU evaluates arithmetic; if a +# result shifts by an ULP between standard versions both answers are equally +# valid approximations. A standard bump is not a regression source. +set (CMAKE_CXX_STANDARD 23) +set (CMAKE_CXX_STANDARD_REQUIRED ON) +set (CMAKE_CXX_EXTENSIONS OFF) + # ---------------------------------------------------------------------------- # Dependencies # ---------------------------------------------------------------------------- -LIST ( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ ) -find_package ( OpenMP REQUIRED ) -include ( SeqAn ) -include ( Boost ) +list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/) + +# ---------------------------------------------------------------------------- +# Apple Clang / arm64 (Apple Silicon) OpenMP support +# Apple's clang does not bundle OpenMP; use libomp installed via Homebrew. +# Install with: brew install libomp +# ---------------------------------------------------------------------------- +if (APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # Try to locate libomp via Homebrew (works for both arm64 and Intel). + # brew is typically not on cmake's default PATH, so probe known prefixes. + find_program (BREW_EXEC brew + PATHS /opt/homebrew/bin /usr/local/bin + NO_DEFAULT_PATH + ) + if (BREW_EXEC) + execute_process ( + COMMAND ${BREW_EXEC} --prefix libomp + OUTPUT_VARIABLE LIBOMP_PREFIX + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE LIBOMP_BREW_RESULT + ) + endif() + # Fall back to well-known Homebrew keg paths if brew lookup failed + if (NOT LIBOMP_PREFIX OR NOT EXISTS "${LIBOMP_PREFIX}/lib/libomp.dylib") + foreach (_candidate + "/opt/homebrew/opt/libomp" + "/usr/local/opt/libomp") + if (EXISTS "${_candidate}/lib/libomp.dylib") + set (LIBOMP_PREFIX "${_candidate}") + break() + endif() + endforeach() + endif() + if (EXISTS "${LIBOMP_PREFIX}/lib/libomp.dylib") + message (STATUS "Apple Clang detected -- using libomp at ${LIBOMP_PREFIX}") + set (OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${LIBOMP_PREFIX}/include") + set (OpenMP_CXX_LIB_NAMES "omp") + set (OpenMP_omp_LIBRARY "${LIBOMP_PREFIX}/lib/libomp.dylib") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${LIBOMP_PREFIX}/lib -lomp") + else() + message (FATAL_ERROR + "Apple Clang requires libomp for OpenMP support.\n" + "Install it with: brew install libomp\n" + "Then re-run cmake." + ) + endif() +endif() + +find_package (OpenMP REQUIRED) +include (SeqAn) +include (Boost) find_package (GSL REQUIRED) # ---------------------------------------------------------------------------- # Build Setup # ---------------------------------------------------------------------------- -# Add include directories. -include_directories (SYSTEM ${SEQAN_INCLUDE_DIRS}) -include_directories (SYSTEM ${GSL_INCLUDE_DIRS}) -include_directories (SYSTEM ${Boost_INCLUDE_DIRS}) +# SeqAn defaults BGZF I/O to 16 threads; PureCLIP manages its own +# threading via -nt and requires serial BAM access for correct results. +# This replicates the single change made in the original skrakau/seqan fork. +set (PURECLIP_COMPILE_DEFS + SEQAN_BGZF_NUM_THREADS=1 + ${SEQAN_DEFINITIONS} +) + +# target_compile_options requires a proper cmake list (one flag per element). +# Both SEQAN_CXX_FLAGS and OpenMP_CXX_FLAGS are space-separated strings so we +# split them before passing to target_compile_options. +separate_arguments (SEQAN_CXX_FLAGS_LIST UNIX_COMMAND "${SEQAN_CXX_FLAGS}") +separate_arguments (OpenMP_CXX_FLAGS_LIST UNIX_COMMAND "${OpenMP_CXX_FLAGS}") + +# Warnings: enable a broad set for all builds. Treated as errors only when +# the caller explicitly sets -DWERROR=ON (used by the strict CI job). +set (PURECLIP_WARNING_FLAGS -Wall -Wextra -Wpedantic) +if (WERROR) + list (APPEND PURECLIP_WARNING_FLAGS -Werror) +endif() -# Add definitions set by find_package (SeqAn). -add_definitions (${SEQAN_DEFINITIONS}) +set (PURECLIP_COMPILE_OPTIONS + ${SEQAN_CXX_FLAGS_LIST} + ${OpenMP_CXX_FLAGS_LIST} + ${PURECLIP_WARNING_FLAGS}) -# Add CXX flags found by find_package (SeqAn). -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SEQAN_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +set (PURECLIP_INCLUDE_DIRS + ${SEQAN_INCLUDE_DIRS} + ${GSL_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) # PureCLIP static build for Linux set (LINK_TYPE "-dynamic") -if ( STATIC_BUILD ) +if (STATIC_BUILD) set (LINK_TYPE "-static") - if(UNIX AND NOT APPLE) - message ( STATUS "Configuring Linux static build" ) - SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") - SET(CMAKE_EXE_LINKER_FLAGS "-static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive") + if (UNIX AND NOT APPLE) + message (STATUS "Configuring Linux static build") + set (CMAKE_FIND_LIBRARY_SUFFIXES ".a") + set (CMAKE_EXE_LINKER_FLAGS "-static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive") elseif (UNIX AND APPLE) set (LINK_TYPE "") - message ( STATUS "Configuring Mac build with static libstdc++" ) + message (STATUS "Configuring Mac build with static libstdc++") string (REPLACE "dylib" "a" GSL_LIBRARIES "${GSL_LIBRARIES}") - SET(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") - SET(BUILD_SHARED_LIBRARIES OFF) + set (CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") + set (BUILD_SHARED_LIBRARIES OFF) endif() endif() -# Add executable and link against SeqAn dependencies. +# Executables add_executable (pureclip - pureclip.cpp - util.h - call_sites.h - parse_alignments.h - prepro_mle.h - hmm_1.h - density_functions.h) + pureclip.cpp + util.h + call_sites.h + parse_alignments.h + prepro_mle.h + hmm_1.h + density_functions.h) add_executable (winextract winextract.cpp) -target_link_libraries (pureclip ${Boost_LIBRARIES} ${SEQAN_LIBRARIES} ${GSL_LIBRARIES}) -target_link_libraries (winextract ${Boost_LIBRARIES} ${SEQAN_LIBRARIES} ${GSL_LIBRARIES}) +foreach (_target pureclip winextract) + target_include_directories (${_target} SYSTEM PRIVATE ${PURECLIP_INCLUDE_DIRS}) + target_compile_definitions (${_target} PRIVATE ${PURECLIP_COMPILE_DEFS}) + target_compile_options (${_target} PRIVATE ${PURECLIP_COMPILE_OPTIONS}) + target_link_libraries (${_target} PRIVATE + ${Boost_LIBRARIES} + ${SEQAN_LIBRARIES} + ${GSL_LIBRARIES}) +endforeach() # Installation -if ( PKG_BUILD ) - SET ( BINDIR "." ) - SET ( EXTRADIR "." ) +if (PKG_BUILD) + set (BINDIR ".") + set (EXTRADIR ".") else() - SET ( BINDIR "bin" ) - SET ( EXTRADIR "share/pureclip" ) + set (BINDIR "bin") + set (EXTRADIR "share/pureclip") endif() install (TARGETS pureclip winextract RUNTIME DESTINATION "${BINDIR}") install (FILES "../LICENSE.md" DESTINATION "${EXTRADIR}") # Packaging -if(UNIX AND NOT APPLE) - SET(CPACK_SYSTEM_NAME "linux64${LINK_TYPE}") +if (UNIX AND NOT APPLE) + set (CPACK_SYSTEM_NAME "linux64${LINK_TYPE}") elseif (UNIX AND APPLE) - SET(CPACK_SYSTEM_NAME "mac64${LINK_TYPE}") + set (CPACK_SYSTEM_NAME "mac64${LINK_TYPE}") endif() -SET(CPACK_PACKAGE_VERSION_MAJOR "1") -SET(CPACK_PACKAGE_VERSION_MINOR "3") -SET(CPACK_PACKAGE_VERSION_PATCH "1") -SET(CPACK_GENERATOR "TGZ") -SET(CPACK_INSTALLED_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/../util;util" "${CMAKE_CURRENT_SOURCE_DIR}/../pkg;.") -message(STATUS "Adding file ${CMAKE_CURRENT_SOURCE_DIR}/../pkg/README") -include ( CPack ) +set (CPACK_PACKAGE_VERSION_MAJOR "1") +set (CPACK_PACKAGE_VERSION_MINOR "3") +set (CPACK_PACKAGE_VERSION_PATCH "1") +set (CPACK_GENERATOR "TGZ") +set (CPACK_INSTALLED_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/../util;util" + "${CMAKE_CURRENT_SOURCE_DIR}/../pkg;.") +message (STATUS "Adding file ${CMAKE_CURRENT_SOURCE_DIR}/../pkg/README") +include (CPack) diff --git a/src/call_sites.h b/src/call_sites.h index 627defb..3653e90 100644 --- a/src/call_sites.h +++ b/src/call_sites.h @@ -261,7 +261,7 @@ bool loadBAMCovariates(Data &data, TBai &inputBaiIndex, TStore &store, bool para stop = true; continue; } - String<__uint16> truncCounts; + String truncCounts; resize(truncCounts, data.setObs[s][i].length(), 0, Exact()); if (s == 0) { diff --git a/src/call_sites_replicates.h b/src/call_sites_replicates.h index d866a90..a6af072 100644 --- a/src/call_sites_replicates.h +++ b/src/call_sites_replicates.h @@ -211,7 +211,7 @@ bool intersect_replicateIntervals(String &data_replicates) template -HMM merge_HMMs(String > &hmms_replicates, String > &modelParams, AppOptions &options) +HMM merge_HMMs(String > &hmms_replicates, String > & /*modelParams*/, AppOptions &options) { HMM mergedHmm = hmms_replicates[0]; @@ -358,7 +358,7 @@ void setUp(String &data_replicates) unsigned no_t = 10; Observations obsShort; - String<__uint16> truncs; + String truncs; resize(truncs, 100, 0); obsShort.truncCounts = infix(truncs, 10, 20); resize(obsShort.nEstimates, no_t, 0); diff --git a/src/cmake/Boost.cmake b/src/cmake/Boost.cmake index 1ff7ce8..1df819f 100644 --- a/src/cmake/Boost.cmake +++ b/src/cmake/Boost.cmake @@ -1,30 +1,26 @@ -find_package ( Boost 1.58 ) +find_package (Boost 1.58) -if (NOT Boost_FOUND ) - set ( BOOST_URL "https://boostorg.jfrog.io/artifactory/main/release/1.64.0/source/boost_1_64_0.tar.bz2") - set ( BOOST_SHA256 "7bcc5caace97baa948931d712ea5f37038dbb1c5d89b43ad4def4ed7cb683332") - set ( BOOST_ZIP_OUT ${CMAKE_CURRENT_BINARY_DIR}/boost_1_64_0.tar.bz2 ) - set ( BOOST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/boost_1_64_0 ) +if (NOT Boost_FOUND) + set (BOOST_URL "https://boostorg.jfrog.io/artifactory/main/release/1.64.0/source/boost_1_64_0.tar.bz2") + set (BOOST_SHA256 "7bcc5caace97baa948931d712ea5f37038dbb1c5d89b43ad4def4ed7cb683332") + set (BOOST_ZIP_OUT ${CMAKE_CURRENT_BINARY_DIR}/boost_1_64_0.tar.bz2) + set (BOOST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/boost_1_64_0) - if ( NOT EXISTS ${BOOST_ROOT} ) - # Download zip file - message ("Downloading ${BOOST_URL}") - file (DOWNLOAD ${BOOST_URL} ${BOOST_ZIP_OUT} - EXPECTED_SHA256 ${BOOST_SHA256} - SHOW_PROGRESS STATUS status) - list ( GET status 0 ret ) - list ( GET status 0 str) - if ( NOT ret EQUAL 0) - message (FATAL_ERROR "Download failed") - endif() - # Unpack zip file - message ("Unpacking ${BOOST_ZIP_OUT}") - execute_process(COMMAND cmake -E tar zxf ${BOOST_ZIP_OUT} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - # Remove zip file - if ( EXISTS ${BOOST_ZIP_OUT} ) - file (REMOVE ${BOOST_ZIP_OUT}) - endif() - # Try to find Boost again, this time REQUIRED - endif() - find_package ( Boost 1.58 REQUIRED ) + if (NOT EXISTS ${BOOST_ROOT}) + message ("Downloading ${BOOST_URL}") + file (DOWNLOAD ${BOOST_URL} ${BOOST_ZIP_OUT} + EXPECTED_HASH SHA256=${BOOST_SHA256} + SHOW_PROGRESS STATUS status) + list (GET status 0 ret) + if (NOT ret EQUAL 0) + message (FATAL_ERROR "Boost download failed") + endif() + message ("Unpacking ${BOOST_ZIP_OUT}") + execute_process (COMMAND ${CMAKE_COMMAND} -E tar xf ${BOOST_ZIP_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if (EXISTS ${BOOST_ZIP_OUT}) + file (REMOVE ${BOOST_ZIP_OUT}) + endif() + endif() + find_package (Boost 1.58 REQUIRED) endif() diff --git a/src/cmake/SeqAn.cmake b/src/cmake/SeqAn.cmake index dd70336..a86050e 100644 --- a/src/cmake/SeqAn.cmake +++ b/src/cmake/SeqAn.cmake @@ -1,43 +1,41 @@ if (SEQAN_ROOT) - if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/Modules/FindSeqAn.cmake) - message ( FATAL_ERROR "SEQAN_ROOT was specified but '${SEQAN_ROOT}/share/cmake/Modules/FindSeqAn.cmake' does not exist." ) + if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/seqan/seqan-config.cmake) + message (FATAL_ERROR "SEQAN_ROOT was specified but '${SEQAN_ROOT}/share/cmake/seqan/seqan-config.cmake' does not exist.") endif() else() - set ( SEQAN_URL "https://github.com/skrakau/seqan/releases/download/seqan-v2.2.0_mod/seqan-library-2.2.0_mod.zip") - set ( SEQAN_MD5 "7758a6b8f3000e07d580cfdd4cc57f2f") - set ( SEQAN_ZIP_OUT ${CMAKE_CURRENT_BINARY_DIR}/seqan-library-2.2.0_mod.zip ) - set ( SEQAN_ROOT ${CMAKE_CURRENT_BINARY_DIR}/seqan-library-2.2.0_mod ) + set ( SEQAN_URL "https://github.com/seqan/seqan/releases/download/seqan-v2.5.3/seqan-library-2.5.3.zip") + set ( SEQAN_SHA256 "7da029319f0d0674f5aa1a3939b4e7b89d2a9a8e7a288a182492347ce3b5db3d") + set ( SEQAN_ZIP_OUT ${CMAKE_CURRENT_BINARY_DIR}/seqan-library-2.5.3.zip ) + set ( SEQAN_ROOT ${CMAKE_CURRENT_BINARY_DIR}/seqan-library-2.5.3 ) - if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/Modules/FindSeqAn.cmake ) - # Download zip file + if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/seqan/seqan-config.cmake) + # Download zip message ("Downloading ${SEQAN_URL}") file (DOWNLOAD ${SEQAN_URL} ${SEQAN_ZIP_OUT} - EXPECTED_MD5 ${SEQAN_MD5} + EXPECTED_HASH SHA256=${SEQAN_SHA256} SHOW_PROGRESS STATUS status) - list ( GET status 0 ret ) - list ( GET status 0 str) - if ( NOT ret EQUAL 0) - message (FATAL_ERROR "Download failed") + list (GET status 0 ret) + if (NOT ret EQUAL 0) + message (FATAL_ERROR "SeqAn download failed") endif() - # Unpack zip file + # Unpack zip message ("Unpacking ${SEQAN_ZIP_OUT}") - execute_process(COMMAND cmake -E tar zxf ${SEQAN_ZIP_OUT} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - # Remove zip file - if ( EXISTS ${SEQAN_ZIP_OUT} ) + execute_process (COMMAND ${CMAKE_COMMAND} -E tar xf ${SEQAN_ZIP_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + # Remove zip + if (EXISTS ${SEQAN_ZIP_OUT}) file (REMOVE ${SEQAN_ZIP_OUT}) endif() endif() - # Check if FindSeqAn.cmake can be found wher it should - if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/Modules/FindSeqAn.cmake ) + + if (NOT EXISTS ${SEQAN_ROOT}/share/cmake/seqan/seqan-config.cmake) message (FATAL_ERROR "Failed to download and unpack '${SEQAN_URL}'") endif() endif () -LIST ( APPEND CMAKE_MODULE_PATH ${SEQAN_ROOT}/share/cmake/Modules/ ) -SET( SEQAN_INCLUDE_PATH ${SEQAN_ROOT}/include/ ) - # Search for zlib as a dependency for SeqAn. find_package (ZLIB REQUIRED) -# Load the SeqAn module and fail if not found. -find_package (SeqAn REQUIRED) +# Load SeqAn 2.5.3 via its CMake config file (uses execute_process, cmake 4.x safe). +set (SeqAn_DIR ${SEQAN_ROOT}/share/cmake/seqan) +find_package (SeqAn CONFIG REQUIRED) diff --git a/src/density_functions.h b/src/density_functions.h index 3951d50..dd49540 100644 --- a/src/density_functions.h +++ b/src/density_functions.h @@ -595,7 +595,7 @@ void printParams(TOut &out, GAMMA &gamma, int i) } -void checkOrderG1G2(GAMMA &gamma1, GAMMA &gamma2, AppOptions &options) +void checkOrderG1G2(GAMMA &gamma1, GAMMA &gamma2, AppOptions & /*options*/) { if (exp(gamma1.b0) > exp(gamma2.b0)) { diff --git a/src/density_functions_crosslink.h b/src/density_functions_crosslink.h index 27684d8..ec5120f 100644 --- a/src/density_functions_crosslink.h +++ b/src/density_functions_crosslink.h @@ -92,7 +92,7 @@ void ZTBIN::updateP(String > > &statePosteriors, // k: diagnostic events (de); n: read counts (c) -long double ZTBIN::getDensity(unsigned const &k, unsigned const &n, AppOptions const&options) +long double ZTBIN::getDensity(unsigned const &k, unsigned const &n, AppOptions const& /*options*/) { if (k == 0) return 0.0; // zero-truncated diff --git a/src/density_functions_crosslink_reg.h b/src/density_functions_crosslink_reg.h index 190e0a8..e360d37 100644 --- a/src/density_functions_crosslink_reg.h +++ b/src/density_functions_crosslink_reg.h @@ -177,7 +177,7 @@ void ZTBIN_REG::updateP(String > > &statePosteriors, // k: diagnostic events (de); n: read counts (c) -long double ZTBIN_REG::getDensity(unsigned const &k, unsigned const &n, long double const &pred, AppOptions const& options) +long double ZTBIN_REG::getDensity(unsigned const &k, unsigned const &n, long double const &pred, AppOptions const& /*options*/) { if (k == 0) return 0.0; // zero-truncated @@ -198,7 +198,7 @@ long double ZTBIN_REG::getDensity(unsigned const &k, unsigned const &n, long dou return res * (long double)(1.0/(1.0 - pow((1.0 - (long double)pred), n2))); // zero-truncated TODO ??? } -long double ZTBIN_REG::getDensity(unsigned const &k, unsigned const &n, AppOptions const&options) +long double ZTBIN_REG::getDensity(unsigned const &k, unsigned const &n, AppOptions const& /*options*/) { if (k == 0) return 0.0; // zero-truncated diff --git a/src/density_functions_reg.h b/src/density_functions_reg.h index ccc75f6..ef137ca 100644 --- a/src/density_functions_reg.h +++ b/src/density_functions_reg.h @@ -146,8 +146,6 @@ struct Fct_GSL_X_GAMMA_REG long double operator()(const gsl_vector * x) { const long double k = gsl_vector_get (x, 0); - const long double b0 = gsl_vector_get (x, 1); - const long double b1 = gsl_vector_get (x, 2); long double f = 0.0; diff --git a/src/hmm_1.h b/src/hmm_1.h index 0f01865..67690d7 100644 --- a/src/hmm_1.h +++ b/src/hmm_1.h @@ -1,7 +1,7 @@ // ====================================================================== // PureCLIP: capturing target-specific protein-RNA interaction footprints // ====================================================================== -// Copyright (C) 2017 Sabrina Krakau, Max Planck Institute for Molecular +// Copyright (C) 2017 Sabrina Krakau, Max Planck Institute for Molecular // Genetics // // This program is free software: you can redistribute it and/or modify @@ -23,7 +23,7 @@ #ifndef APPS_HMMS_HMM_1_H_ #define APPS_HMMS_HMM_1_H_ - + #include #include @@ -32,17 +32,17 @@ #include "density_functions_reg.h" #include "density_functions_crosslink.h" #include "density_functions_crosslink_reg.h" -#include +#include using namespace seqan; template -class HMM { +class HMM { public: - __uint8 K; // no. of sates + uint8_t K; // no. of sates String > > initProbs; // initial probabilities String > & setObs; // workaround for partial specialization @@ -56,7 +56,7 @@ class HMM { resize(transMatrix, K, Exact()); for (unsigned i = 0; i < K; ++i) resize(transMatrix[i], K, 0.25, Exact()); - + resize(initProbs, 2, Exact()); resize(eProbs, 2, Exact()); resize(statePosteriors, 2, Exact()); @@ -87,15 +87,15 @@ class HMM { } } } - // Copy constructor -// HMM(const HMM &hmm2) {x = p2.x; y = p2.y; } + // Copy constructor +// HMM(const HMM &hmm2) {x = p2.x; y = p2.y; } + + HMM(); + ~HMM(); - HMM(); - ~HMM(); - bool computeEmissionProbs(ModelParams &modelParams, bool learning, AppOptions &options); - bool iForward(String > &alphas_1, unsigned s, unsigned i, String > &logA, AppOptions &options); - bool iBackward(String > &betas_1, unsigned s, unsigned i, String > &logA, AppOptions &options); + bool iForward(String > &alphas_1, unsigned s, unsigned i, String > &logA, AppOptions &options); + bool iBackward(String > &betas_1, unsigned s, unsigned i, String > &logA, AppOptions &options); bool computeStatePosteriorsFB(AppOptions &options); bool computeStatePosteriorsFBupdateTrans(AppOptions &options); bool updateTransAndPostProbs(AppOptions &options); @@ -103,8 +103,8 @@ class HMM { bool updateDensityParams(ModelParams &modelParams, AppOptions &options); bool baumWelch(ModelParams &modelParams, CharString learnTag, AppOptions &options); bool applyParameters(ModelParams &modelParams, AppOptions &/*options*/); - void posteriorDecoding(String > > &states); - void rmBoarderArtifacts(String > > &states, String &data_replicates, String > &modelParams); + void posteriorDecoding(String > > &states); + void rmBoarderArtifacts(String > > &states, String &data_replicates, String > &modelParams); // for each F/R,interval,t, state .... String > > > eProbs; // emission/observation probabilities P(Y_t | S_t) -> precompute for each t given Y_t = (C_t, T_t) !!! @@ -113,7 +113,7 @@ class HMM { template -HMM::~HMM() +HMM::~HMM() { clear(this->eProbs); clear(this->statePosteriors); @@ -129,13 +129,13 @@ HMM::~HMM() long double myLog(long double x) { - if (x == 0) return std::numeric_limits::quiet_NaN(); + if (x == 0) return std::numeric_limits::quiet_NaN(); return log(x); } long double myExp(long double x) { - if (std::isnan(x)) return 0.0; + if (std::isnan(x)) return 0.0; return exp(x); } @@ -154,7 +154,7 @@ long double get_logSumExp(long double &f1, long double &f2, LogSumExp_lookupTabl // log-sum-exp trick long double get_logSumExp_states(long double f1, long double f2, long double f3, long double f4, LogSumExp_lookupTable &lookUp) { - long double sum; + long double sum; sum = get_logSumExp(f1, f2, lookUp); sum = get_logSumExp(sum, f3, lookUp); sum = get_logSumExp(sum, f4, lookUp); @@ -163,7 +163,7 @@ long double get_logSumExp_states(long double f1, long double f2, long double f3, // log-sum-exp trick for string long double get_logSumExp(String &fs, LogSumExp_lookupTable &lookUp) -{ +{ long double sum = std::numeric_limits::quiet_NaN(); for (unsigned i = 0; i < length(fs); ++i) @@ -195,10 +195,10 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 { long double gamma1_d = 1.0; long double gamma2_d = 0.0; - if (setObs.kdes[t] >= gamma1.tp) + if (setObs.kdes[t] >= gamma1.tp) { gamma1_d = gamma1.getDensity(setObs.kdes[t]); - gamma2_d = gamma2.getDensity(setObs.kdes[t]); + gamma2_d = gamma2.getDensity(setObs.kdes[t]); } long double bin1_d = 1.0; long double bin2_d = 0.0; @@ -208,7 +208,7 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 bin2_d = bin2.getDensity(setObs.truncCounts[t], setObs.nEstimates[t], options); } // log-space - eProbs[0] = myLog(gamma1_d) + myLog(bin1_d); + eProbs[0] = myLog(gamma1_d) + myLog(bin1_d); eProbs[1] = myLog(gamma1_d) + myLog(bin2_d); eProbs[2] = myLog(gamma2_d) + myLog(bin1_d); eProbs[3] = myLog(gamma2_d) + myLog(bin2_d); @@ -219,21 +219,21 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 { if (options.verbosity >= 2) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "WARNING: emission probabilities 0.0!" << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " fragment coverage (kde): " << setObs.kdes[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " read start count: " << (int)setObs.truncCounts[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " estimated n: " << setObs.nEstimates[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-enriched' gamma: " << gamma1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'enriched' gamma: " << gamma2_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-crosslink' binomial: " << bin1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'crosslink' binomial: " << bin2_d << std::endl; } eProbs[0] = 0.0; @@ -241,7 +241,7 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 eProbs[2] = std::numeric_limits::quiet_NaN(); eProbs[3] = std::numeric_limits::quiet_NaN(); return false; - } + } return true; } @@ -254,10 +254,10 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA_REG &gamma1, GAMMA_REG long double gamma1_d = 1.0; long double gamma2_d = 0.0; - if (setObs.kdes[t] >= gamma1.tp) + if (setObs.kdes[t] >= gamma1.tp) { gamma1_d = gamma1.getDensity(setObs.kdes[t], gamma1_pred, options); - gamma2_d = gamma2.getDensity(setObs.kdes[t], gamma2_pred, options); + gamma2_d = gamma2.getDensity(setObs.kdes[t], gamma2_pred, options); } long double bin1_d = 1.0; @@ -268,36 +268,36 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA_REG &gamma1, GAMMA_REG bin2_d = bin2.getDensity(setObs.truncCounts[t], setObs.nEstimates[t], options); } // log-space - eProbs[0] = myLog(gamma1_d) + myLog(bin1_d); + eProbs[0] = myLog(gamma1_d) + myLog(bin1_d); eProbs[1] = myLog(gamma1_d) + myLog(bin2_d); eProbs[2] = myLog(gamma2_d) + myLog(bin1_d); eProbs[3] = myLog(gamma2_d) + myLog(bin2_d); - // + // if ((gamma1_d + gamma2_d == 0.0) || (bin1_d + bin2_d == 0.0) || (std::isnan(eProbs[0]) && std::isnan(eProbs[1]) && std::isnan(eProbs[2]) && std::isnan(eProbs[3])) ) { if (options.verbosity >= 2) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "WARNING: emission probabilities going against 0.0!" << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " fragment coverage (kde): " << setObs.kdes[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " read start count: " << (int)setObs.truncCounts[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " estimated n: " << setObs.nEstimates[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " covariate b: " << x << " predicted mean 'non-enriched': " << gamma1_pred << " predicted mean 'enriched': " << gamma2_pred << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-enriched' gamma: " << gamma1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'enriched' gamma: " << gamma2_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-crosslink' binomial: " << bin1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'crosslink' binomial: " << bin2_d << std::endl; - } + } eProbs[0] = 0.0; eProbs[1] = std::numeric_limits::quiet_NaN(); eProbs[2] = std::numeric_limits::quiet_NaN(); @@ -312,10 +312,10 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 { long double gamma1_d = 1.0; long double gamma2_d = 0.0; - if (setObs.kdes[t] >= gamma1.tp) + if (setObs.kdes[t] >= gamma1.tp) { gamma1_d = gamma1.getDensity(setObs.kdes[t]); - gamma2_d = gamma2.getDensity(setObs.kdes[t]); + gamma2_d = gamma2.getDensity(setObs.kdes[t]); } unsigned mId = setObs.motifIds[t]; long double bin1_pred = 1.0/(1.0+exp(-bin1.b0 - bin1.regCoeffs[mId]*setObs.fimoScores[t])); @@ -340,23 +340,23 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA &gamma1, GAMMA &gamma2 { if (options.verbosity >= 2) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "WARNING: emission probabilities going against 0.0!" << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " fragment coverage (kde): " << setObs.kdes[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " read start count: " << (int)setObs.truncCounts[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " estimated n: " << setObs.nEstimates[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " covariate x: " << setObs.fimoScores[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-enriched' gamma: " << gamma1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'enriched' gamma: " << gamma2_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-crosslink' binomial: " << bin1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'crosslink' binomial: " << bin2_d << std::endl; } eProbs[0] = 0.0; @@ -377,10 +377,10 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA_REG &gamma1, GAMMA_REG long double gamma1_d = 1.0; long double gamma2_d = 0.0; - if (setObs.kdes[t] >= gamma1.tp) + if (setObs.kdes[t] >= gamma1.tp) { gamma1_d = gamma1.getDensity(setObs.kdes[t], gamma1_pred, options); - gamma2_d = gamma2.getDensity(setObs.kdes[t], gamma2_pred, options); + gamma2_d = gamma2.getDensity(setObs.kdes[t], gamma2_pred, options); } unsigned mId = setObs.motifIds[t]; long double bin1_pred = 1.0/(1.0+exp(-bin1.b0 - bin1.regCoeffs[mId]*setObs.fimoScores[t])); @@ -399,31 +399,31 @@ bool computeEProb(TEProbs &eProbs, TSetObs &setObs, GAMMA_REG &gamma1, GAMMA_REG eProbs[2] = myLog(gamma2_d) + myLog(bin1_d); eProbs[3] = myLog(gamma2_d) + myLog(bin2_d); - // + // if ((gamma1_d + gamma2_d == 0.0) || (bin1_d + bin2_d == 0.0) || (std::isnan(eProbs[0]) && std::isnan(eProbs[1]) && std::isnan(eProbs[2]) && std::isnan(eProbs[3])) ) { if (options.verbosity >= 2) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "WARNING: emission probabilities going against 0.0!" << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " fragment coverage (kde): " << setObs.kdes[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " read start count: " << (int)setObs.truncCounts[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " estimated n: " << setObs.nEstimates[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " covariate b: " << x << " predicted mean 'non-enriched': " << gamma1_pred << " predicted mean 'enriched': " << gamma2_pred << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " covariate x: " << setObs.fimoScores[t] << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-enriched' gamma: " << gamma1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'enriched' gamma: " << gamma2_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'non-crosslink' binomial: " << bin1_d << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << " emission probability 'crosslink' binomial: " << bin2_d << std::endl; } eProbs[0] = 0.0; @@ -443,65 +443,65 @@ bool HMM::computeEmissionProbs(ModelParams &modelPar for (unsigned s = 0; s < 2; ++s) { #if HMM_PARALLEL - SEQAN_OMP_PRAGMA(parallel for schedule(dynamic, 1)) -#endif + SEQAN_OMP_PRAGMA(parallel for schedule(dynamic, 1)) +#endif for (unsigned i = 0; i < length(this->setObs[s]); ++i) { bool discardInterval = false; - for (unsigned t = 0; t < this->setObs[s][i].length(); ++t) + for (unsigned t = 0; t < this->setObs[s][i].length(); ++t) { if (this->setObs[s][i].kdes[t] == 0.0) { std::cerr << "ERROR: KDE is 0.0 at i " << i << " t: " << t << std::endl; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) stop = true; } if (!computeEProb(this->eProbs[s][i][t], this->setObs[s][i], modelParams.gamma1, modelParams.gamma2, modelParams.bin1, modelParams.bin2, t, options)) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) discardInterval = true; } } if (learning && discardInterval) { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "ERROR: Emission probability became 0.0! This might be due to artifacts or outliers." << std::endl; SEQAN_OMP_PRAGMA(critical) if (options.verbosity >= 2) { - if (s == 0) + if (s == 0) std::cout << " Interval: [" << (this->setPos[s][i]) << ", " << (this->setPos[s][i] + this->setObs[s][i].length()) << ") on forward strand." << std::endl; - else + else std::cout << " Interval: [" << (this->contigLength - this->setPos[s][i] - 1) << ", " << (this->contigLength - this->setPos[s][i] - 1 + this->setObs[s][i].length()) << ") on reverse strand." << std::endl; } stop = true; if (!options.useHighPrecision) // TODO ? { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "NOTE: Try running PureCLIP in high floating-point precision mode (long double, parameter '-ld')." << std::endl; } } - else if (!learning && discardInterval) + else if (!learning && discardInterval) { this->setObs[s][i].discard = true; - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "Warning: discarding interval on forward strand due to emission probabilities of 0.0 (set to state 'non-enriched + non-crosslink')." << std::endl; if (options.verbosity >= 2) { - SEQAN_OMP_PRAGMA(critical) - if (s == 0) + SEQAN_OMP_PRAGMA(critical) + if (s == 0) std::cout << " Interval [" << (this->setPos[s][i]) << ", " << (this->setPos[s][i] + this->setObs[s][i].length()) << ") on forward strand. " << std::endl; - else + else std::cout << " Interval [" << (this->contigLength - this->setPos[s][i] - 1) << ", " << (this->contigLength - this->setPos[s][i] - 1 + this->setObs[s][i].length()) << ") on reverse strand." << std::endl; } if (!options.useHighPrecision) // TODO ? { - SEQAN_OMP_PRAGMA(critical) + SEQAN_OMP_PRAGMA(critical) std::cout << "NOTE: If this happens frequently, rerun PureCLIP in high floating-point precision mode (long double, parameter '-ld')." << std::endl; } } - } + } } if (stop) return false; return true; @@ -532,7 +532,7 @@ bool HMM::iForward(String > &alphas_1, unsigne for (unsigned t = 1; t < this->setObs[s][i].length(); ++t) { for (unsigned k = 0; k < this->K; ++k) - { + { long double f1 = alphas_1[t-1][0] + logA[0][k] + this->eProbs[s][i][t][k]; long double f2 = alphas_1[t-1][1] + logA[1][k] + this->eProbs[s][i][t][k]; long double f3 = alphas_1[t-1][2] + logA[2][k] + this->eProbs[s][i][t][k]; @@ -563,8 +563,8 @@ bool HMM::iBackward(String > &betas_1, unsigne unsigned T = this->setObs[s][i].length(); // for t = T for (unsigned k = 0; k < this->K; ++k) - betas_1[this->setObs[s][i].length() - 1][k] = log(1.0); - + betas_1[T - 1][k] = log(1.0); + // for t = 2:T for (int t = this->setObs[s][i].length() - 2; t >= 0; --t) { @@ -599,7 +599,7 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) for (unsigned k_1 = 0; k_1 < this->K; ++k_1) for (unsigned k_2 = 0; k_2 < this->K; ++k_2) logA[k_1][k_2] = log(this->transMatrix[k_1][k_2]); - + String > p; resize(p, this->K, Exact()); for (unsigned k_1 = 0; k_1 < this->K; ++k_1) @@ -610,14 +610,14 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) p[k_1][k_2] = 0.0; } long double p_2_2 = 0.0; // for separate learning of trans. prob from '2' -> '2' - long double p_2_3 = 0.0; // for separate learning of trans. prob from '2' -> '3' + long double p_2_3 = 0.0; // for separate learning of trans. prob from '2' -> '3' for (unsigned s = 0; s < 2; ++s) { bool stop = false; #if HMM_PARALLEL SEQAN_OMP_PRAGMA(parallel for schedule(dynamic, 1)) -#endif +#endif for (unsigned i = 0; i < length(this->setObs[s]); ++i) { unsigned T = setObs[s][i].length(); @@ -634,7 +634,7 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) continue; } - // backward probabilities + // backward probabilities String > betas_1; resize(betas_1, T, Exact()); for (unsigned t = 0; t < T; ++t) @@ -645,7 +645,7 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) stop = true; continue; } - + // compute state posterior probabilities for (unsigned t = 0; t < this->setObs[s][i].length(); ++t) { @@ -659,9 +659,9 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) for (unsigned k = 0; k < this->K; ++k) { this->statePosteriors[s][k][i][t] = myExp(alphas_1[t][k] + betas_1[t][k] - norm); // store not in log-space! - - if (std::isnan(this->statePosteriors[s][k][i][t]) || std::isinf(this->statePosteriors[s][k][i][t]) || - this->statePosteriors[s][k][i][t] < 0.0 || this->statePosteriors[s][k][i][t] > 1.0) + + if (std::isnan(this->statePosteriors[s][k][i][t]) || std::isinf(this->statePosteriors[s][k][i][t]) || + this->statePosteriors[s][k][i][t] < 0.0 || this->statePosteriors[s][k][i][t] > 1.0) { std::cout << "ERROR: state posterior probability is " << this->statePosteriors[s][k][i][t] << "." << std::endl; std::cout << " s: " << s << " i: " << i << " t: " << t << " k:" << k << std::endl; @@ -674,21 +674,21 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) // update initial probabilities for (unsigned k = 0; k < this->K; ++k) - this->initProbs[s][i][k] = this->statePosteriors[s][k][i][0]; + this->initProbs[s][i][k] = this->statePosteriors[s][k][i][0]; // compute xi values for interval in preparation for new trans. probs String > xis; resize(xis, this->K, Exact()); String > p_i; resize(p_i, this->K, Exact()); - for (unsigned k_1 = 0; k_1 < this->K; ++k_1) + for (unsigned k_1 = 0; k_1 < this->K; ++k_1) { resize(xis[k_1], this->K, 0.0, Exact()); resize(p_i[k_1], this->K, 0.0, Exact()); } long double p_2_2_i = 0.0; long double p_2_3_i = 0.0; - // + // for (unsigned t = 1; t < this->setObs[s][i].length(); ++t) { long double norm = std::numeric_limits::quiet_NaN(); @@ -712,11 +712,11 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) } } // add to global sum - for (unsigned k_1 = 0; k_1 < this->K; ++k_1) + for (unsigned k_1 = 0; k_1 < this->K; ++k_1) for (unsigned k_2 = 0; k_2 < this->K; ++k_2) SEQAN_OMP_PRAGMA(critical) p[k_1][k_2] += p_i[k_1][k_2]; - + SEQAN_OMP_PRAGMA(critical) p_2_2 += p_2_2_i; SEQAN_OMP_PRAGMA(critical) @@ -731,7 +731,7 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) { long double denumerator = 0.0; for (unsigned k_3 = 0; k_3 < this->K; ++k_3) - denumerator += p[k_1][k_3]; + denumerator += p[k_1][k_3]; for (unsigned k_2 = 0; k_2 < this->K; ++k_2) { @@ -739,7 +739,7 @@ bool HMM::computeStatePosteriorsFBupdateTrans(AppOptions &options) if (A[k_1][k_2] <= 0.0) A[k_1][k_2] = DBL_MIN; // make sure not getting zero } } - // Fix p[2->2/3] using only trans. probs. for region over nThresholdForP, while keeping sum of p[2->2] and p[2->3] constant + // Fix p[2->2/3] using only trans. probs. for region over nThresholdForP, while keeping sum of p[2->2] and p[2->3] constant if (options.nThresholdForTransP > 0) { long double sum_2_23 = A[2][2] + A[2][3]; @@ -772,8 +772,8 @@ bool HMM::computeStatePosteriorsFB(AppOptions &options) { bool stop = false; #if HMM_PARALLEL - SEQAN_OMP_PRAGMA(parallel for schedule(dynamic, 1)) -#endif + SEQAN_OMP_PRAGMA(parallel for schedule(dynamic, 1)) +#endif for (unsigned i = 0; i < length(this->setObs[s]); ++i) { unsigned T = setObs[s][i].length(); @@ -800,7 +800,7 @@ bool HMM::computeStatePosteriorsFB(AppOptions &options) stop = true; continue; } - + // compute state posterior probabilities (in log-space) for (unsigned t = 0; t < this->setObs[s][i].length(); ++t) { @@ -832,7 +832,7 @@ bool HMM::computeStatePosteriorsFB(AppOptions &options) bool updateDensityParams2(String > > &statePosteriors1, String > > &statePosteriors2, String > &setObs, GAMMA &gamma1, GAMMA &gamma2, - unsigned &iter, unsigned &trial, + unsigned & /*iter*/, unsigned & /*trial*/, AppOptions &options) { if (!gamma1.updateThetaAndK(statePosteriors1, setObs, options.g1_kMin, options.g1_kMax, options)) @@ -841,13 +841,13 @@ bool updateDensityParams2(String > > &statePosteriors1, St if (!gamma2.updateThetaAndK(statePosteriors2, setObs, options.g2_kMin, options.g2_kMax, options)) // make sure g1k <= g2k return false; - // make sure gamma1.mu < gamma2.mu + // make sure gamma1.mu < gamma2.mu checkOrderG1G2(gamma1, gamma2, options); return true; } -bool updateDensityParams2(String > > &statePosteriors1, String > > &statePosteriors2, String > &setObs, - GAMMA_REG &gamma1, GAMMA_REG &gamma2, +bool updateDensityParams2(String > > &statePosteriors1, String > > &statePosteriors2, String > &setObs, + GAMMA_REG &gamma1, GAMMA_REG &gamma2, unsigned &iter, unsigned &trial, AppOptions &options) { @@ -861,14 +861,14 @@ bool updateDensityParams2(String > > &statePosteriors1, St if (!gamma2.updateRegCoeffsAndK(statePosteriors2, setObs, g2_kMin, options.g2_kMax, options)) return false; - // make sure gamma1.mu < gamma2.mu + // make sure gamma1.mu < gamma2.mu checkOrderG1G2(gamma1, gamma2, iter, trial, options); return true; } template -bool HMM::updateDensityParams(TGAMMA &gamma1, TGAMMA &gamma2, unsigned &iter, unsigned &trial, AppOptions &options) +bool HMM::updateDensityParams(TGAMMA &gamma1, TGAMMA &gamma2, unsigned &iter, unsigned &trial, AppOptions &options) { String > > statePosteriors1; String > > statePosteriors2; @@ -890,14 +890,14 @@ bool HMM::updateDensityParams(TGAMMA &gamma1, TGAMMA &gamma2, unsi } } - updateDensityParams2(statePosteriors1, statePosteriors2, this->setObs, gamma1, gamma2, iter, trial, options); + updateDensityParams2(statePosteriors1, statePosteriors2, this->setObs, gamma1, gamma2, iter, trial, options); return true; } template -bool HMM::updateDensityParams(ModelParams &modelParams, AppOptions &options) +bool HMM::updateDensityParams(ModelParams &modelParams, AppOptions &options) { String > > statePosteriors1; String > > statePosteriors2; @@ -920,10 +920,10 @@ bool HMM::updateDensityParams(ModelParams &modelPara } // truncation counts - modelParams.bin1.updateP(statePosteriors1, this->setObs, options); + modelParams.bin1.updateP(statePosteriors1, this->setObs, options); modelParams.bin2.updateP(statePosteriors2, this->setObs, options); - // make sure bin1.p < bin2.p + // make sure bin1.p < bin2.p checkOrderBin1Bin2(modelParams.bin1, modelParams.bin2); return true; @@ -933,7 +933,7 @@ bool HMM::updateDensityParams(ModelParams &modelPara // Baum-Welch // in log-space (using log-sum-exp trick) -template +template bool HMM::baumWelch(ModelParams &modelParams, CharString learnTag, AppOptions &options) { TGAMMA prev_gamma1 = modelParams.gamma1; @@ -956,7 +956,7 @@ bool HMM::baumWelch(ModelParams &modelParams, CharSt std::cerr << "ERROR: Could not compute forward-backward algorithm! " << std::endl; return false; } - + std::cout << " updateDensityParams() " << std::endl; if (learnTag == "LEARN_BINOMIAL") @@ -981,13 +981,13 @@ bool HMM::baumWelch(ModelParams &modelParams, CharSt } } - - if (learnTag == "LEARN_GAMMA" && checkConvergence(modelParams.gamma1, prev_gamma1, options) && checkConvergence(modelParams.gamma2, prev_gamma2, options) ) + + if (learnTag == "LEARN_GAMMA" && checkConvergence(modelParams.gamma1, prev_gamma1, options) && checkConvergence(modelParams.gamma2, prev_gamma2, options) ) { std::cout << " **** Convergence ! **** " << std::endl; break; } - else if (learnTag != "LEARN_GAMMA" && checkConvergence(modelParams.bin1, prev_bin1, options) && checkConvergence(modelParams.bin2, prev_bin2, options) ) + else if (learnTag != "LEARN_GAMMA" && checkConvergence(modelParams.bin1, prev_bin1, options) && checkConvergence(modelParams.bin2, prev_bin2, options) ) { std::cout << " **** Convergence ! **** " << std::endl; break; @@ -999,7 +999,7 @@ bool HMM::baumWelch(ModelParams &modelParams, CharSt myPrint(modelParams.gamma1); myPrint(modelParams.gamma2); - + std::cout << "*** Transition probabilitites ***" << std::endl; for (unsigned k_1 = 0; k_1 < this->K; ++k_1) { @@ -1019,7 +1019,7 @@ bool HMM::baumWelch(ModelParams &modelParams, CharSt } -template +template bool HMM::applyParameters(ModelParams &modelParams, AppOptions &options) { if (!computeEmissionProbs(modelParams, false, options)) @@ -1038,8 +1038,8 @@ bool HMM::applyParameters(ModelParams &modelParams, template -void HMM::posteriorDecoding(String > > &states) -{ +void HMM::posteriorDecoding(String > > &states) +{ for (unsigned s = 0; s < 2; ++s) { resize(states[s], length(this->setObs[s]), Exact()); @@ -1068,8 +1068,8 @@ void HMM::posteriorDecoding(String > > &sta } -void rmBoarderArtifacts2(String > > &states, - String > &setObs, +void rmBoarderArtifacts2(String > > &states, + String > & /*setObs*/, String &data_replicates, String > &modelParams) { @@ -1099,8 +1099,8 @@ void rmBoarderArtifacts2(String > > &states, } } -void rmBoarderArtifacts2(String > > &states, - String > &setObs, +void rmBoarderArtifacts2(String > > &states, + String > & /*setObs*/, String &data_replicates, String > &modelParams) { @@ -1131,23 +1131,23 @@ void rmBoarderArtifacts2(String > > &states, } // do nothing -void rmBoarderArtifacts2(String > > &states, - String > &setObs, - String &data_replicates, - String > &modelParams){} +void rmBoarderArtifacts2(String > > & /*states*/, + String > & /*setObs*/, + String & /*data_replicates*/, + String > & /*modelParams*/){} -void rmBoarderArtifacts2(String > > &states, - String > &setObs, - String &data_replicates, - String > &modelParams){} +void rmBoarderArtifacts2(String > > & /*states*/, + String > & /*setObs*/, + String & /*data_replicates*/, + String > & /*modelParams*/){} -// for GLM with input signal: +// for GLM with input signal: // when using free gamma shapes, i.e. gamma1.k can be > gamma2.k // make sure sites with fragment coverage (KDE) below gamma1.mean are classified as 'non-enriched' template -void HMM::rmBoarderArtifacts(String > > &states, - String &data_replicates, +void HMM::rmBoarderArtifacts(String > > &states, + String &data_replicates, String > &modelParams) { rmBoarderArtifacts2(states, this->setObs, data_replicates, modelParams); @@ -1163,7 +1163,7 @@ double getCrosslinkSiteScore(double postProb0, double postProb1, double postProb else if (score_type == 2) // log(3/1) "enrichment focussed" { return (double)log(postProb3/std::max(postProb1, DBL_MIN)); - } + } else if (score_type == 3) // log(enriched/non-enriched) + log(crosslinked/non-crosslinked) "balanced" { return ((double)log((postProb2 + postProb3)/std::max(postProb0 + postProb1, DBL_MIN)) + (double)log((postProb1 + postProb3)/std::max(postProb0 + postProb2, DBL_MIN))); @@ -1181,10 +1181,10 @@ double getCrosslinkSiteScore(double postProb0, double postProb1, double postProb void writeStates(String > &bedRecords_sites, Data &data, - FragmentStore<> &store, + FragmentStore<> &store, unsigned contigId, - AppOptions &options) -{ + AppOptions &options) +{ for (unsigned s = 0; s < 2; ++s) { for (unsigned i = 0; i < length(data.setObs[s]); ++i) // data.states[s] @@ -1208,7 +1208,7 @@ void writeStates(String > &bedRecords_sites, } else // '-'-strand; { - if (!options.crosslinkAtTruncSite) + if (!options.crosslinkAtTruncSite) record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]); else record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]) - 1; @@ -1219,13 +1219,13 @@ void writeStates(String > &bedRecords_sites, std::stringstream ss; ss << (int)data.states[s][i][t]; record.name = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); ss << getCrosslinkSiteScore(data.statePosteriors[s][0][i][t], data.statePosteriors[s][1][i][t], data.statePosteriors[s][2][i][t], data.statePosteriors[s][3][i][t], options.score_type); record.score = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); if (s == 0) record.strand = '+'; else @@ -1241,7 +1241,7 @@ void writeStates(String > &bedRecords_sites, ss << ";"; ss << (double)data.statePosteriors[s][3][i][t]; - ss << ";"; + ss << ";"; if (options.useCov_RPKM) ss << (double)data.setObs[s][i].rpkms[t]; else @@ -1251,8 +1251,8 @@ void writeStates(String > &bedRecords_sites, ss << ";"; record.data = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); appendValue(bedRecords_sites, record); } @@ -1272,7 +1272,7 @@ void writeStates(String > &bedRecords_sites, } else // '-'-strand; { - if (!options.crosslinkAtTruncSite) + if (!options.crosslinkAtTruncSite) record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]); else record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]) - 1; @@ -1281,17 +1281,17 @@ void writeStates(String > &bedRecords_sites, } std::stringstream ss; - ss << (int)0; // assign 'non-enriched + non-crosslink' + ss << (int)0; // assign 'non-enriched + non-crosslink' record.name = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); - // log posterior prob. ratio score + // log posterior prob. ratio score ss << "NA"; record.score = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); if (s == 0) record.strand = '+'; else @@ -1307,7 +1307,7 @@ void writeStates(String > &bedRecords_sites, ss << ";"; ss << "NA"; - ss << ";"; + ss << ";"; if (options.useCov_RPKM) ss << (double)data.setObs[s][i].rpkms[t]; else @@ -1317,8 +1317,8 @@ void writeStates(String > &bedRecords_sites, ss << ";"; record.data = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); appendValue(bedRecords_sites, record); } @@ -1349,13 +1349,13 @@ void writeStates(String > &bedRecords_sites, std::stringstream ss; ss << (int)data.states[s][i][t]; record.name = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); ss << getCrosslinkSiteScore(data.statePosteriors[s][0][i][t], data.statePosteriors[s][1][i][t], data.statePosteriors[s][2][i][t], data.statePosteriors[s][3][i][t], options.score_type); record.score = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); if (s == 0) record.strand = '+'; else @@ -1367,11 +1367,11 @@ void writeStates(String > &bedRecords_sites, ss << "score_UC=" << getCrosslinkSiteScore(data.statePosteriors[s][0][i][t], data.statePosteriors[s][1][i][t], data.statePosteriors[s][2][i][t], data.statePosteriors[s][3][i][t], 0) << "]"; record.data = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); appendValue(bedRecords_sites, record); - } + } } } } @@ -1380,10 +1380,10 @@ void writeStates(String > &bedRecords_sites, void writeRegions(String > &bedRecords_regions, Data &data, - FragmentStore<> &store, + FragmentStore<> &store, unsigned contigId, - AppOptions &options) -{ + AppOptions &options) +{ for (unsigned s = 0; s < 2; ++s) { for (unsigned i = 0; i < length(data.states[s]); ++i) @@ -1400,7 +1400,7 @@ void writeRegions(String > &bedRecords_regions, record.beginPos = t + data.setPos[s][i] - 1; else record.beginPos = t + data.setPos[s][i]; - + record.endPos = record.beginPos + 1; } else @@ -1431,17 +1431,17 @@ void writeRegions(String > &bedRecords_regions, if (s == 0) // crosslink sites (not truncation site) { if (!options.crosslinkAtTruncSite) - record.endPos = t + data.setPos[s][i]; + record.endPos = t + data.setPos[s][i]; else - record.endPos = t + data.setPos[s][i] + 1; + record.endPos = t + data.setPos[s][i] + 1; } else { - if (!options.crosslinkAtTruncSite) + if (!options.crosslinkAtTruncSite) record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]); else record.beginPos = length(store.contigStore[contigId].seq) - (t + data.setPos[s][i]) - 1; - } + } score = getCrosslinkSiteScore(data.statePosteriors[s][0][i][t], data.statePosteriors[s][1][i][t], data.statePosteriors[s][2][i][t], data.statePosteriors[s][3][i][t], options.score_type); scoresSum += score; @@ -1450,23 +1450,23 @@ void writeRegions(String > &bedRecords_regions, } } - std::stringstream ss; + std::stringstream ss; ss << scoresSum; record.score = ss.str(); - ss.str(""); - ss.clear(); + ss.str(""); + ss.clear(); record.name = ss_indivScores.str(); - ss_indivScores.str(""); - ss_indivScores.clear(); - + ss_indivScores.str(""); + ss_indivScores.clear(); + appendValue(bedRecords_regions, record); - } + } } } } } - + template void myPrint(HMM &hmm) { diff --git a/src/parse_alignments.h b/src/parse_alignments.h index f1dea3d..812c9dd 100644 --- a/src/parse_alignments.h +++ b/src/parse_alignments.h @@ -122,7 +122,11 @@ bool parse_bamRegion(TTruncCounts &truncCounts, TBamIn &inFile, TBai &baiIndex, // If we are on the next reference if (bamRecord.rID == -1 || bamRecord.rID > rID) break; - if (bamRecord.beginPos >= (endPos + 100)) + // bamRecord.beginPos is int32_t (SeqAn); cast once to unsigned for all comparisons. + // Reads with rID != -1 always have beginPos >= 0. + const unsigned recBegin = static_cast(bamRecord.beginPos); + const unsigned recEnd = recBegin + getAlignmentLengthInRef(bamRecord); + if (recBegin >= (endPos + 100)) break; // check if read corresponds to 3' cDNA end corresponding to user parameter @@ -133,20 +137,20 @@ bool parse_bamRegion(TTruncCounts &truncCounts, TBamIn &inFile, TBai &baiIndex, if (isForward && !hasFlagRC(bamRecord)) // Forward { - if (bamRecord.beginPos >= beginPos && // already there ? - bamRecord.beginPos < endPos && // before end of interval ? - truncCounts[bamRecord.beginPos - beginPos] < options.maxTruncCount2) + if (recBegin >= beginPos && // already there ? + recBegin < endPos && // before end of interval ? + truncCounts[recBegin - beginPos] < options.maxTruncCount2) { - ++truncCounts[bamRecord.beginPos - beginPos]; + ++truncCounts[recBegin - beginPos]; } } - else if (!isForward && hasFlagRC(bamRecord)) // Reverse + else if (!isForward && hasFlagRC(bamRecord)) // Reverse { - if ((bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1) >= beginPos && // already there ? - (bamRecord.beginPos + getAlignmentLengthInRef(bamRecord) - 1) < endPos && // before end of interval ? - truncCounts[bamRecord.beginPos - beginPos + getAlignmentLengthInRef(bamRecord) - 1] < options.maxTruncCount2) + if ((recEnd - 1) >= beginPos && // already there ? + (recEnd - 1) < endPos && // before end of interval ? + truncCounts[recEnd - 1 - beginPos] < options.maxTruncCount2) { - ++truncCounts[bamRecord.beginPos - beginPos + getAlignmentLengthInRef(bamRecord) - 1]; + ++truncCounts[recEnd - 1 - beginPos]; } } } diff --git a/src/util.h b/src/util.h index cd32e57..018e006 100644 --- a/src/util.h +++ b/src/util.h @@ -31,9 +31,12 @@ #include +// SeqAn 2.5+ renamed namespace seqan -> seqan2; alias for source compatibility. +namespace seqan = seqan2; + using namespace seqan; -namespace seqan { +namespace seqan2 { // TODO investigate impact class LogSumExp_lookupTable @@ -48,7 +51,7 @@ namespace seqan { LogSumExp_lookupTable(unsigned size_, double minValue_) : size(size_), minValue(minValue_) { resize(lookupTable, size+1, Exact()); - for(int i = 0; i <= size; ++i) + for(unsigned i = 0; i <= size; ++i) lookupTable[i] = log1p(exp(i*(-minValue/size) + minValue)); std::cout << "Created look-up table for values from " << ((0 * -minValue/size) + minValue) << " to " << ((size * -minValue/size) + minValue) << " with step size " << (-minValue/size) << " (size: " << size<< ")." << std::endl; } @@ -285,7 +288,7 @@ namespace seqan { struct ContigObservations { - String<__uint16> truncCounts; + String truncCounts; }; void reverse(ContigObservations &contigObservations) @@ -297,10 +300,10 @@ namespace seqan { // workaround because partially specialized member function are forbidden // wrapper class for observations struct Observations { - Infix >::Type truncCounts; + Infix >::Type truncCounts; unsigned contigId; - String<__uint32> nEstimates; + String nEstimates; String kdes; // used for 'enriched'. 'non-enriched' classification String kdesN; // used to estimate the binomial n parameters (decoupled, might be useful e.g. for longer crosslink clusters) String rpkms; // change name -> e.g. bgSignal @@ -308,7 +311,7 @@ namespace seqan { String motifIds; // for each t: one motif score bool discard; // NOTE: only use for application, not for learning! - Observations(Infix >::Type _truncCounts) : truncCounts(_truncCounts), + Observations(Infix >::Type _truncCounts) : truncCounts(_truncCounts), discard(false) {} Observations() : truncCounts(), discard(false) {} @@ -316,7 +319,7 @@ namespace seqan { void estimateNs(AppOptions &options); // using raw counts void estimateNs(double b0, double b1, AppOptions /*&options*/); // using KDEs void computeKDEs(AppOptions &options); - void computeKDEs(String<__uint16> &inputTruncCounts, AppOptions &options); // input signal + void computeKDEs(String &inputTruncCounts, AppOptions &options); // input signal unsigned length(); }; @@ -347,7 +350,7 @@ namespace seqan { resize(this->nEstimates, length(), Exact()); for (unsigned t = 0; t < length(); ++t) { - this->nEstimates[t] = (__uint16)std::max((int)floor(b0 + b1*this->kdesN[t] + 0.5), 1); // avoid becoming 0 + this->nEstimates[t] = (uint16_t)std::max((int)floor(b0 + b1*this->kdesN[t] + 0.5), 1); // avoid becoming 0 } } @@ -449,7 +452,7 @@ namespace seqan { // keep in mind: same intervals used as for target (+- 2*bdw) // could cause underestimation of input KDEs at interval boarders // anyway only very low values of gaussian kernel there - void Observations::computeKDEs(String<__uint16> &truncCounts, AppOptions &options) + void Observations::computeKDEs(String &truncCounts, AppOptions &options) { resize(this->rpkms, length()); @@ -490,7 +493,7 @@ namespace seqan { String > setObs; // F/R:interval:t String > setPos; String > > > statePosteriors; // F/R:state:interval:t - String > > states; + String > > states; }; void append(Data &dataA, Data &dataB) diff --git a/src/winextract.cpp b/src/winextract.cpp index 058c93f..04e12b9 100644 --- a/src/winextract.cpp +++ b/src/winextract.cpp @@ -35,6 +35,9 @@ #include #include +// SeqAn 2.5+ renamed namespace seqan -> seqan2; alias for source compatibility. +namespace seqan = seqan2; + using namespace seqan;