From 8274539259af3fb01ae4b92625d26977bcb79010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Chrusciel?= <chruscieljoao@gmail.com>
Date: Tue, 27 May 2025 18:32:08 -0300
Subject: [PATCH 1/5] up: basecalling.nf code optmization

- removed CPU process from basecalling since it was not needed.
---
 .gitignore                       |   1 +
 src/modules/basecall.nf          | 104 -------------------------------
 src/sub_workflows/BASECALLING.nf |  34 ++++------
 3 files changed, 12 insertions(+), 127 deletions(-)

diff --git a/.gitignore b/.gitignore
index 78a43a6..fd17af6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ work
 *.csv
 *.zip
 *.gz
+*.sif
\ No newline at end of file
diff --git a/src/modules/basecall.nf b/src/modules/basecall.nf
index e1da10c..7a9408f 100755
--- a/src/modules/basecall.nf
+++ b/src/modules/basecall.nf
@@ -14,109 +14,6 @@ process FAST5_to_POD5 {
         """
 }
 
-process BASECALL_CPU {
-    publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
-    label 'cpu'
-
-    input:
-        tuple val(id), path(pod5_dir)
-        val speed
-        val mods
-        val config
-        val trim
-        val qscore
-        val devices
-        path ref
-
-    output:
-        path ("*.bam"), emit: bam
-        path ("*.txt"), emit: txt
-
-    script:
-        """
-        if [[ "${config}" == "false" ]]; then
-            if [[ "${mods}" == "false" ]]; then 
-                dorado basecaller "${speed}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" 
-            else
-                dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            fi
-        else
-            if [[ "${mods}" == "false" ]]; then
-                dorado basecaller "${speed}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            else
-                dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            fi
-        fi 
-        samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam"
-        rm "${id}.bam"
-        mv "${id}_sorted.bam" "${id}.bam"
-        dorado summary "${id}.bam" > "${id}.txt"
-        """
-}
-
-process BASECALL_CPU_DEMUX {
-    publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
-    label 'cpu'
-
-    input:
-        tuple val(id), path(pod5_dir)
-        val speed
-        val mods
-        val config
-        val trim
-        val qscore
-        val trim_barcode
-        val devices
-        path ref
-
-    output:
-        path ("*.bam"), emit: bam
-        path ("*.txt"), emit: txt
-
-    script:
-        """
-        if [[ "${config}" == "false" ]]; then
-            if [[ "${mods}" == "false" ]]; then
-                dorado basecaller "${speed}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            else
-                dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            fi
-        else
-            if [[ "${mods}" == "false" ]]; then
-                dorado basecaller "${speed}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-            else
-                dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam"
-
-            fi
-        fi
-        
-        samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam"
-        rm "${id}.bam"
-        mv "${id}_sorted.bam" "${id}.bam"
-       
-        if [[ "${trim_barcode}" == "true" ]]; then
-            dorado demux --output-dir "./demux_data/" --no-classify "${id}.bam"
-        else
-            dorado demux --no-trim --output-dir "./demux_data/" --no-classify "${id}.bam"
-        fi
-
-        cd ./demux_data/
-        for file in *; do
-            samtools sort -@ -12 "\$file" -o "${id}_\${file}"
-            rm "\$file"
-        done
-
-        cd ../
-        rm "${id}.bam"
-        mv ./demux_data/* ./
-        rm -r ./demux_data/
-        for file in *.bam; do
-            new_id="\${file%%.*}"
-            dorado summary "\$file" > "\${new_id}.txt"
-        done
-        """
-}
-
 process BASECALL_GPU {
     publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
     label 'gpu'
@@ -158,7 +55,6 @@ process BASECALL_GPU {
         """
 }
 
-
 process BASECALL_GPU_DEMUX {
     publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
     label 'gpu'
diff --git a/src/sub_workflows/BASECALLING.nf b/src/sub_workflows/BASECALLING.nf
index 1e0a62f..bc5bd31 100755
--- a/src/sub_workflows/BASECALLING.nf
+++ b/src/sub_workflows/BASECALLING.nf
@@ -1,4 +1,4 @@
-include { FAST5_to_POD5 ; BASECALL_CPU ; BASECALL_CPU_DEMUX ; BASECALL_GPU ; BASECALL_GPU_DEMUX } from '../modules/basecall.nf'
+include { FAST5_to_POD5 ; BASECALL_GPU ; BASECALL_GPU_DEMUX } from '../modules/basecall.nf'
 
 workflow BASECALLING {
     take:
@@ -16,28 +16,16 @@ workflow BASECALLING {
     main:
         FAST5_to_POD5(fast5_path)
         pod5_path = FAST5_to_POD5.out.mix(pod5_path)
-        if (params.basecall_compute?.equalsIgnoreCase("cpu")) {
-            if (params.basecall_demux == true) {
-                BASECALL_CPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref)
-                bams = BASECALL_CPU_DEMUX.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
-                txts = BASECALL_CPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-            }
-            else {
-                BASECALL_CPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref)
-                bams = BASECALL_CPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
-                txts = BASECALL_CPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-            }
+       
+        if (params.basecall_demux == true) {
+            BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref)
+            bams = BASECALL_GPU_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
+            txts = BASECALL_GPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
         }
-        else if (params.basecall_compute?.equalsIgnoreCase("gpu")) {
-            if (params.basecall_demux == true) {
-                BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref)
-                bams = BASECALL_GPU_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-                txts = BASECALL_GPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-            }
-            else {
-                BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref)
-                bams = BASECALL_GPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
-                txts = BASECALL_GPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-            }
+        else {
+            BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref)
+            bams = BASECALL_GPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
+            txts = BASECALL_GPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
         }
+
 }
-- 
GitLab


From fea1474769abcd6446d2213c6189badd1ded5584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Chrusciel?= <chruscieljoao@gmail.com>
Date: Sat, 7 Jun 2025 13:47:39 -0300
Subject: [PATCH 2/5] fixed conventions in BASECALL and BASECALL_DEMUX
 processes

- added echo logs for each step
---
 src/modules/basecall.nf          | 25 +++++++++++++++++++------
 src/sub_workflows/BASECALLING.nf | 14 +++++++-------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/modules/basecall.nf b/src/modules/basecall.nf
index 7a9408f..2405f67 100755
--- a/src/modules/basecall.nf
+++ b/src/modules/basecall.nf
@@ -14,7 +14,7 @@ process FAST5_to_POD5 {
         """
 }
 
-process BASECALL_GPU {
+process BASECALL {
     publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
     label 'gpu'
 
@@ -34,6 +34,7 @@ process BASECALL_GPU {
 
     script:
         """
+        echo "Basecalling started for: ${id}"
         if [[ "${config}" == "false" ]]; then    
             if [[ "${mods}" == "false" ]]; then 
                 dorado basecaller "${speed}" . --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" 
@@ -48,14 +49,18 @@ process BASECALL_GPU {
             fi
         fi
 
+        echo "Basecalling completed, sorting bams..."
         samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam"
         rm "${id}.bam"
         mv "${id}_sorted.bam" "${id}.bam"
+
+        echo "Bams sorted, generating summary with dorado..."
         dorado summary "${id}.bam" > "${id}.txt"
+        echo "Process completed for: ${id}"
         """
 }
 
-process BASECALL_GPU_DEMUX {
+process BASECALL_DEMUX {
     publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true
     label 'gpu'
 
@@ -76,6 +81,7 @@ process BASECALL_GPU_DEMUX {
 
     script:
         """
+        echo "Demultiplexed basecalling started for: ${id}"
         if [[ "${config}" == "false" ]]; then
             if [[ "${mods}" == "false" ]]; then
                 dorado basecaller "${speed}" . --trim "none" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam"
@@ -90,22 +96,28 @@ process BASECALL_GPU_DEMUX {
             fi
         fi
 
+        echo "Basecalling completed, sorting bams..."
 	    samtools sort -@ -12 "${id}.bam" -o "${id}_sorted.bam"
     	rm "${id}.bam"
     	mv "${id}_sorted.bam" "${id}.bam"
-	
+
+        echo "Bams sorted, demultiplexing..."
         if [[ "${trim_barcode}" == "true" ]]; then
+            echo "Demultiplexing with barcode trimming..."
             dorado demux --output-dir "./demux_data/" --no-classify "${id}.bam"
         else
+            echo "Demultiplexing without barcode trimming..."
             dorado demux --no-trim --output-dir "./demux_data/" --no-classify "${id}.bam"
         fi
-
+        
+        echo "Demultiplexing completed, sorting barcode files..."
         cd ./demux_data/
         for file in *; do
             samtools sort -@ -12 "\$file" -o "${id}_\${file}"
-            rm "\$file"	
+            rm "\$file"
         done
         
+        echo "Bams sorted, generating summary with dorado..."
         cd ../
         rm "${id}.bam"
         mv ./demux_data/* ./
@@ -114,5 +126,6 @@ process BASECALL_GPU_DEMUX {
             new_id="\${file%%.*}"
             dorado summary "\$file" > "\${new_id}.txt"
         done
+        echo "Process completed for: ${id}"
         """
-}
+}
\ No newline at end of file
diff --git a/src/sub_workflows/BASECALLING.nf b/src/sub_workflows/BASECALLING.nf
index bc5bd31..dfd2aff 100755
--- a/src/sub_workflows/BASECALLING.nf
+++ b/src/sub_workflows/BASECALLING.nf
@@ -1,4 +1,4 @@
-include { FAST5_to_POD5 ; BASECALL_GPU ; BASECALL_GPU_DEMUX } from '../modules/basecall.nf'
+include { FAST5_to_POD5 ; BASECALL ; BASECALL_DEMUX } from '../modules/basecall.nf'
 
 workflow BASECALLING {
     take:
@@ -18,14 +18,14 @@ workflow BASECALLING {
         pod5_path = FAST5_to_POD5.out.mix(pod5_path)
        
         if (params.basecall_demux == true) {
-            BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref)
-            bams = BASECALL_GPU_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
-            txts = BASECALL_GPU_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
+            BASECALL_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref)
+            bams = BASECALL_DEMUX.out.bam.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
+            txts = BASECALL_DEMUX.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
         }
         else {
-            BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref)
-            bams = BASECALL_GPU.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
-            txts = BASECALL_GPU.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
+            BASECALL(pod5_path, speed, modifications, config, trim, quality_score, devices, ref)
+            bams = BASECALL.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2)
+            txts = BASECALL.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten()
         }
 
 }
-- 
GitLab


From 6fad528590822bfcbc2534570bc3f8ff18629da5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Chrusciel?= <chruscieljoao@gmail.com>
Date: Sat, 7 Jun 2025 14:57:13 -0300
Subject: [PATCH 3/5] updated dorado to v1.0.1

---
 containers/debian-nanopore.def | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/containers/debian-nanopore.def b/containers/debian-nanopore.def
index c48acff..e55c1c1 100644
--- a/containers/debian-nanopore.def
+++ b/containers/debian-nanopore.def
@@ -49,10 +49,10 @@ From: debian:12
     # Install Dorado
     cd /opt
     mkdir dorado && cd dorado
-    wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-0.9.6-linux-x64.tar.gz
-    tar -xzf dorado-0.9.6-linux-x64.tar.gz
-    rm dorado-0.9.6-linux-x64.tar.gz
-    echo 'export PATH="/opt/dorado/dorado-0.9.6-linux-x64/bin/:$PATH"' >> "$SINGULARITY_ENVIRONMENT"
+    wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-1.0.1-linux-x64.tar.gz
+    tar -xzf dorado-1.0.1-linux-x64.tar.gz
+    rm dorado-1.0.1-linux-x64.tar.gz
+    echo 'export PATH="/opt/dorado/dorado-1.0.1-linux-x64/bin/:$PATH"' >> "$SINGULARITY_ENVIRONMENT"
 
 %test
     # Check if installations are on path and display their versions
@@ -67,12 +67,12 @@ From: debian:12
 
 %labels
     author Joao Henrique Chrusciel
-    version v0.4.0
+    version v0.5.0
 
 %help
     Software included in the container are:
 
-    dorado==0.9.6
+    dorado==1.0.1
     modkit==0.4.4
     samtools==1.16.1
     pod5==0.3.23
-- 
GitLab


From 7498e42e1ec518e7381a5bf4ee54a7e80aeecdf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Chrusciel?= <chruscieljoao@gmail.com>
Date: Sun, 8 Jun 2025 18:32:10 -0300
Subject: [PATCH 4/5] fixed GPU access issue by adding --nvccli

---
 src/nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nextflow.config b/src/nextflow.config
index 4c73a49..7b511f5 100755
--- a/src/nextflow.config
+++ b/src/nextflow.config
@@ -58,7 +58,7 @@ process {
     // Define local gpu execution
     withLabel: gpu {
         executor='local'
-        containerOptions = '--nv'
+        containerOptions = '--nv --nvccli'
     }
     // Define the container for every process
     container = "./images/debian-nanopore.sif"
-- 
GitLab


From bd18ebaa36e043e4e7c8fb4558b8d863c1027cd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Chrusciel?= <chruscieljoao@gmail.com>
Date: Fri, 20 Jun 2025 09:19:16 -0300
Subject: [PATCH 5/5] up: README.md and basecall.nf

 - added the nvidia-conatiner-toolkit dependency info in README.md
 - modified basecall.nf to improve output handling
---
 README.md               | 8 +++++++-
 src/modules/basecall.nf | 8 ++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index ee239bc..5b85206 100755
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to
       sudo apt install git lz4
       ```
 
-    - Install Java: install either [OpenJRE/JDK][openjava] (**recommended, see below**) or [OracleJRE/JDK][oraclejava]. to install both openjre and openjdk using Debian/Ubuntu:
+    - Install Java: install either [OpenJRE/JDK][openjava] (**recommended, see below**) or [OracleJRE/JDK][oraclejava]. To install both openjre and openjdk using Debian/Ubuntu:
 
       ```sh
       sudo apt install default-jre default-jdk
@@ -30,6 +30,11 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to
 
     - Install [NextFlow][nextflow-docs-install] (skip Java installation)
     - Install [Apptainer][apptainer-docs-install-deb]
+    - Install [NVIDIA Container Toolkit][nvidia-container-toolkit]: This enables Apptainer to access your GPU during basecalling. After installing it, to know if you have GPU support inside the container you can download a test container and check it by running:
+  
+    ```sh
+    apptainer pull docker://nvidia/cuda:12.2.0-base-ubuntu22.04 && apptainer exec --nvccli cuda_12.2.0-base-ubuntu22.04.sif nvidia-smi
+    ```
 
 1. Check that all dependencies are accessible via your users `$PATH`:
 
@@ -72,6 +77,7 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to
 [oraclejava]:https://www.java.com/en/download/linux_manual.jsp
 [nextflow-docs-install]:https://www.nextflow.io/docs/latest/install.html#install-nextflow
 [apptainer-docs-install-deb]:https://apptainer.org/docs/admin/main/installation.html#install-debian-packages
+[nvidia-container-toolkit]:https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
 
 [top](#table-of-contents)
 
diff --git a/src/modules/basecall.nf b/src/modules/basecall.nf
index 2405f67..18efc8b 100755
--- a/src/modules/basecall.nf
+++ b/src/modules/basecall.nf
@@ -29,8 +29,8 @@ process BASECALL {
         path ref
 
     output:
-        path ("*.bam"), emit: bam
-        path ("*.txt"), emit: txt
+        path ("${id}.bam"), emit: bam
+        path ("${id}.txt"), emit: txt
 
     script:
         """
@@ -76,8 +76,8 @@ process BASECALL_DEMUX {
         path ref
 
     output:
-        path ("*.bam"), emit: bam
-        path ("*.txt"), emit: txt
+        path ("${id}.bam"), emit: bam
+        path ("${id}.txt"), emit: txt
 
     script:
         """
-- 
GitLab