diff --git a/.gitignore b/.gitignore index 78a43a6ff989cfb9979a061b432a5c2fc282901e..fd17af6c52cad70d1c2df3571fa414185db62a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ work *.csv *.zip *.gz +*.sif \ No newline at end of file diff --git a/README.md b/README.md index ee239bc89d5fb674d3abf3ea3fbdc01bd406f096..5b85206aac649f5fec08e8fe68180003e740c3bf 100755 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to sudo apt install git lz4 ``` - - Install Java: install either [OpenJRE/JDK][openjava] (**recommended, see below**) or [OracleJRE/JDK][oraclejava]. to install both openjre and openjdk using Debian/Ubuntu: + - Install Java: install either [OpenJRE/JDK][openjava] (**recommended, see below**) or [OracleJRE/JDK][oraclejava]. To install both openjre and openjdk using Debian/Ubuntu: ```sh sudo apt install default-jre default-jdk @@ -30,6 +30,11 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to - Install [NextFlow][nextflow-docs-install] (skip Java installation) - Install [Apptainer][apptainer-docs-install-deb] + - Install [NVIDIA Container Toolkit][nvidia-container-toolkit]: This enables Apptainer to access your GPU during basecalling. After installing it, to know if you have GPU support inside the container you can download a test container and check it by running: + + ```sh + apptainer pull docker://nvidia/cuda:12.2.0-base-ubuntu22.04 && apptainer exec --nvccli cuda_12.2.0-base-ubuntu22.04.sif nvidia-smi + ``` 1. Check that all dependencies are accessible via your users `$PATH`: @@ -72,6 +77,7 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to [oraclejava]:https://www.java.com/en/download/linux_manual.jsp [nextflow-docs-install]:https://www.nextflow.io/docs/latest/install.html#install-nextflow [apptainer-docs-install-deb]:https://apptainer.org/docs/admin/main/installation.html#install-debian-packages +[nvidia-container-toolkit]:https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html [top](#table-of-contents) diff --git a/containers/debian-nanopore.def b/containers/debian-nanopore.def index c48acff27c6e12ca3ec566e529558d6a242655a1..e55c1c18f56def3a5593a502f857596726eb8496 100644 --- a/containers/debian-nanopore.def +++ b/containers/debian-nanopore.def @@ -49,10 +49,10 @@ From: debian:12 # Install Dorado cd /opt mkdir dorado && cd dorado - wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-0.9.6-linux-x64.tar.gz - tar -xzf dorado-0.9.6-linux-x64.tar.gz - rm dorado-0.9.6-linux-x64.tar.gz - echo 'export PATH="/opt/dorado/dorado-0.9.6-linux-x64/bin/:$PATH"' >> "$SINGULARITY_ENVIRONMENT" + wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-1.0.1-linux-x64.tar.gz + tar -xzf dorado-1.0.1-linux-x64.tar.gz + rm dorado-1.0.1-linux-x64.tar.gz + echo 'export PATH="/opt/dorado/dorado-1.0.1-linux-x64/bin/:$PATH"' >> "$SINGULARITY_ENVIRONMENT" %test # Check if installations are on path and display their versions @@ -67,12 +67,12 @@ From: debian:12 %labels author Joao Henrique Chrusciel - version v0.4.0 + version v0.5.0 %help Software included in the container are: - dorado==0.9.6 + dorado==1.0.1 modkit==0.4.4 samtools==1.16.1 pod5==0.3.23 diff --git a/src/modules/basecall.nf b/src/modules/basecall.nf index e1da10c5d6c2545cd378dad47e9250126cb33b7f..18efc8b1e07a51215f27a0de6ea5a701ca963c3b 100755 --- a/src/modules/basecall.nf +++ b/src/modules/basecall.nf @@ -14,110 +14,7 @@ process FAST5_to_POD5 { """ } -process BASECALL_CPU { - publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true - label 'cpu' - - input: - tuple val(id), path(pod5_dir) - val speed - val mods - val config - val trim - val qscore - val devices - path ref - - output: - path ("*.bam"), emit: bam - path ("*.txt"), emit: txt - - script: - """ - if [[ "${config}" == "false" ]]; then - if [[ "${mods}" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - else - dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - fi - else - if [[ "${mods}" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - else - dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - fi - fi - samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam" - rm "${id}.bam" - mv "${id}_sorted.bam" "${id}.bam" - dorado summary "${id}.bam" > "${id}.txt" - """ -} - -process BASECALL_CPU_DEMUX { - publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true - label 'cpu' - - input: - tuple val(id), path(pod5_dir) - val speed - val mods - val config - val trim - val qscore - val trim_barcode - val devices - path ref - - output: - path ("*.bam"), emit: bam - path ("*.txt"), emit: txt - - script: - """ - if [[ "${config}" == "false" ]]; then - if [[ "${mods}" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - else - dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - fi - else - if [[ "${mods}" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - else - dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" - - fi - fi - - samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam" - rm "${id}.bam" - mv "${id}_sorted.bam" "${id}.bam" - - if [[ "${trim_barcode}" == "true" ]]; then - dorado demux --output-dir "./demux_data/" --no-classify "${id}.bam" - else - dorado demux --no-trim --output-dir "./demux_data/" --no-classify "${id}.bam" - fi - - cd ./demux_data/ - for file in *; do - samtools sort -@ -12 "\$file" -o "${id}_\${file}" - rm "\$file" - done - - cd ../ - rm "${id}.bam" - mv ./demux_data/* ./ - rm -r ./demux_data/ - for file in *.bam; do - new_id="\${file%%.*}" - dorado summary "\$file" > "\${new_id}.txt" - done - """ -} - -process BASECALL_GPU { +process BASECALL { publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true label 'gpu' @@ -132,11 +29,12 @@ process BASECALL_GPU { path ref output: - path ("*.bam"), emit: bam - path ("*.txt"), emit: txt + path ("${id}.bam"), emit: bam + path ("${id}.txt"), emit: txt script: """ + echo "Basecalling started for: ${id}" if [[ "${config}" == "false" ]]; then if [[ "${mods}" == "false" ]]; then dorado basecaller "${speed}" . --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" @@ -151,15 +49,18 @@ process BASECALL_GPU { fi fi + echo "Basecalling completed, sorting bams..." samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam" rm "${id}.bam" mv "${id}_sorted.bam" "${id}.bam" + + echo "Bams sorted, generating summary with dorado..." dorado summary "${id}.bam" > "${id}.txt" + echo "Process completed for: ${id}" """ } - -process BASECALL_GPU_DEMUX { +process BASECALL_DEMUX { publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true label 'gpu' @@ -175,11 +76,12 @@ process BASECALL_GPU_DEMUX { path ref output: - path ("*.bam"), emit: bam - path ("*.txt"), emit: txt + path ("${id}.bam"), emit: bam + path ("${id}.txt"), emit: txt script: """ + echo "Demultiplexed basecalling started for: ${id}" if [[ "${config}" == "false" ]]; then if [[ "${mods}" == "false" ]]; then dorado basecaller "${speed}" . --trim "none" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" @@ -194,22 +96,28 @@ process BASECALL_GPU_DEMUX { fi fi + echo "Basecalling completed, sorting bams..." samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam" rm "${id}.bam" mv "${id}_sorted.bam" "${id}.bam" - + + echo "Bams sorted, demultiplexing..." if [[ "${trim_barcode}" == "true" ]]; then + echo "Demultiplexing with barcode trimming..." dorado demux --output-dir "./demux_data/" --no-classify "${id}.bam" else + echo "Demultiplexing without barcode trimming..." dorado demux --no-trim --output-dir "./demux_data/" --no-classify "${id}.bam" fi - + + echo "Demultiplexing completed, sorting barcode files..." cd ./demux_data/ for file in *; do samtools sort -@ -12 "\$file" -o "${id}_\${file}" - rm "\$file" + rm "\$file" done + echo "Bams sorted, generating summary with dorado..." cd ../ rm "${id}.bam" mv ./demux_data/* ./ @@ -218,5 +126,6 @@ process BASECALL_GPU_DEMUX { new_id="\${file%%.*}" dorado summary "\$file" > "\${new_id}.txt" done + echo "Process completed for: ${id}" """ -} +} \ No newline at end of file diff --git a/src/nextflow.config b/src/nextflow.config index 4c73a4929bcd3e803111cc62fc3a6ea3f2e5b546..7b511f58104bbc477de5e78d7f209f99b5c335cb 100755 --- a/src/nextflow.config +++ b/src/nextflow.config @@ -58,7 +58,7 @@ process { // Define local gpu execution withLabel: gpu { executor='local' - containerOptions = '--nv' + containerOptions = '--nv --nvccli' } // Define the container for every process container = "./images/debian-nanopore.sif" diff --git a/src/sub_workflows/BASECALLING.nf b/src/sub_workflows/BASECALLING.nf index 1e0a62fb33dd1c0474f5d3c909eeabc1e86790c5..dfd2afff9607c6cc247b28bbccbe2a2082002433 100755 --- a/src/sub_workflows/BASECALLING.nf +++ b/src/sub_workflows/BASECALLING.nf @@ -1,4 +1,4 @@ -include { FAST5_to_POD5 ; BASECALL_CPU ; BASECALL_CPU_DEMUX ; BASECALL_GPU ; BASECALL_GPU_DEMUX } from '../modules/basecall.nf' +include { FAST5_to_POD5 ; BASECALL ; BASECALL_DEMUX } from '../modules/basecall.nf' workflow BASECALLING { take: @@ -16,28 +16,16 @@ workflow BASECALLING { main: FAST5_to_POD5(fast5_path) pod5_path = FAST5_to_POD5.out.mix(pod5_path) - if (params.basecall_compute?.equalsIgnoreCase("cpu")) { - if (params.basecall_demux == true) { - BASECALL_CPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) - bams = BASECALL_CPU_DEMUX.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2) - txts = BASECALL_CPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() - } - else { - BASECALL_CPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref) - bams = BASECALL_CPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2) - txts = BASECALL_CPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() - } + + if (params.basecall_demux == true) { + BASECALL_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) + bams = BASECALL_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() + txts = BASECALL_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() } - else if (params.basecall_compute?.equalsIgnoreCase("gpu")) { - if (params.basecall_demux == true) { - BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) - bams = BASECALL_GPU_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() - txts = BASECALL_GPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() - } - else { - BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref) - bams = BASECALL_GPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2) - txts = BASECALL_GPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() - } + else { + BASECALL(pod5_path, speed, modifications, config, trim, quality_score, devices, ref) + bams = BASECALL.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2) + txts = BASECALL.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() } + }