From 04f7456f56ffa20be7d6241af12d0e64dbf3fbd1 Mon Sep 17 00:00:00 2001 From: Nickyecen Date: Mon, 28 Jul 2025 07:49:40 -0300 Subject: [PATCH 1/4] [#13] Fix empty values Signed-off-by: Nickyecen --- src/main.nf | 8 +++---- src/modules/basecall.nf | 36 +++++++------------------------- src/nextflow.config | 14 ++++++------- src/sub_workflows/BASECALLING.nf | 14 +++++++++++-- 4 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/main.nf b/src/main.nf index d7aa89f..c9db6e6 100755 --- a/src/main.nf +++ b/src/main.nf @@ -56,16 +56,16 @@ workflow { samtools_threads = Channel.value(params.samtools_threads) // step conditionals if (params.step == 1) { - if (params.prefix == "None") { + if (params.prefix == null) { fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple(file.parent.toString().split("/")[-3] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple(file.parent.toString().split("/")[-3] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() } else { fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() } - basecall_speed = Channel.value(params.basecall_speed) - basecall_mods = Channel.value(params.basecall_mods) - basecall_config = Channel.value(params.basecall_config) + basecall_speed = params.basecall_speed + basecall_mods = params.basecall_mods + basecall_config = params.basecall_config basecall_trim = Channel.value(params.basecall_trim) qscore_thresh = Channel.value(params.qscore_thresh) barcoding_kit = Channel.value(params.barcoding_kit) diff --git a/src/modules/basecall.nf b/src/modules/basecall.nf index a2cad7b..9ea9c16 100755 --- a/src/modules/basecall.nf +++ b/src/modules/basecall.nf @@ -24,9 +24,7 @@ process BASECALL { input: tuple val(id), path(pod5_dir) - val basecall_speed - val basecall_mods - val basecall_config + val basecall_arg val basecall_trim val qscore_thresh val barcoding_kit @@ -42,30 +40,12 @@ process BASECALL { script: """ echo "Basecalling started for: ${id}" - if [[ "${basecall_config}" == "None" ]]; then - if [[ "${basecall_mods}" == "None" ]]; then - dorado basecaller "${basecall_speed}" . \ - ${barcoding_kit != "None" ? "--kit-name ${barcoding_kit}" : ""} \ - --trim "${basecall_trim}" \ - --min-qscore "${qscore_thresh}" \ - --reference "${reference_file}" \ - --device "cuda:${gpu_devices}" > "${id}.bam" - else - dorado basecaller "${basecall_speed},${basecall_mods}" . \ - ${barcoding_kit != "None" ? "--kit-name ${barcoding_kit}" : ""} \ - --trim "${basecall_trim}" \ - --min-qscore "${qscore_thresh}" \ - --reference "${reference_file}" \ - --device "cuda:${gpu_devices}" > "${id}.bam" - fi - else - dorado basecaller "${basecall_config}" . \ - ${barcoding_kit != "None" ? "--kit-name ${barcoding_kit}" : ""} \ - --trim "${basecall_trim}" \ - --min-qscore "${qscore_thresh}" \ - --reference "${reference_file}" \ - --device "cuda:${gpu_devices}" > "${id}.bam" - fi + dorado basecaller "${basecall_arg}" . \ + ${barcoding_kit != null ? "--kit-name ${barcoding_kit}" : ""} \ + --trim "${basecall_trim}" \ + --min-qscore "${qscore_thresh}" \ + --reference "${reference_file}" \ + --device "cuda:${gpu_devices}" > "${id}.bam" echo "Basecalling completed, sorting bams..." samtools sort -@ ${samtools_threads} "${id}.bam" -o "${id}_sorted.bam" @@ -73,7 +53,7 @@ process BASECALL { mv "${id}_sorted.bam" "${id}.bam" echo "Bams sorted, demultiplexing..." - if [[ "${trimmed_barcodes}" == "True" ]]; then + if [[ ${trimmed_barcodes} ]]; then echo "Demultiplexing with barcode trimming..." dorado demux --output-dir "./demux_data/" --no-classify "${id}.bam" else diff --git a/src/nextflow.config b/src/nextflow.config index 7987397..119d5de 100755 --- a/src/nextflow.config +++ b/src/nextflow.config @@ -8,9 +8,9 @@ params { // Project name (used to identify which project you're working on) project_name = "default" // Input reference fasta file - reference_file = "None" + reference_file = null // Step of pipeline to execute - step = "None" + step = null // Output directory for pipeline results out_dir = "results_${params.project_name}/" // directory of basecalling data @@ -23,24 +23,24 @@ params { basecall_speed = "sup@latest" // Desired basecaller modifications (4mC_5mC, 5mCG_5hmCG, 5mC_5hmC, 6mA). Can't use more than one modification per nucleotide. basecall_mods = "5mC_5hmC" - // Kit name (kit used to barcode the samples (e.g. SQK-RBK114-24); Use "None" to skip --kit-name in basecalling) + // Kit name (kit used to barcode the samples (e.g. SQK-RBK114-24); Use null to skip --kit-name in basecalling) barcoding_kit = "SQK-RBK114-24" // Threshold for mapped reasds min_mapped_reads_thresh = 500 // Desired basecall model version as a path (e.g. ./models/dna_r10.4.1_e8.2_400bps_sup@v5.2.0) - basecall_config = "None" + basecall_config = null // Type of read trimming during basecalling ("all", "primers", "adapters", "none"); You should change to "none" if you don't want to trim in the basecalling basecall_trim = "all" // Basecalling demultiplexing basecall_demux = false // Barcodes were trimmed? (if True = demux will only separate the files; if False = demux will trim after basecalling and separate them) - trimmed_barcodes = "True" + trimmed_barcodes = true // Add prefix to all output files - prefix = "None" + prefix = null // Which GPU devices to use for basecalling? gpu_devices = "all" // Previous results - steps_2_and_3_input_directory = "None" + steps_2_and_3_input_directory = null // MultiQC config multiqc_config = "./references/multiqc_config.yaml" // Are the files from MinKNOW barcoded or not diff --git a/src/sub_workflows/BASECALLING.nf b/src/sub_workflows/BASECALLING.nf index 30b7817..9c092cd 100755 --- a/src/sub_workflows/BASECALLING.nf +++ b/src/sub_workflows/BASECALLING.nf @@ -18,8 +18,18 @@ workflow BASECALLING { main: FAST5_to_POD5(fast5_path, pod5_threads) - pod5_path = FAST5_to_POD5.out.mix(pod5_path) - BASECALL(pod5_path, basecall_speed, basecall_mods, basecall_config, basecall_trim, qscore_thresh, barcoding_kit, trimmed_barcodes, gpu_devices, reference_file, samtools_threads) + pod5_path = FAST5_to_POD5.out.mix(pod5_path) + + basecall_arg = null + if(basecall_config != null) { + basecall_arg = basecall_config + } else if(basecall_mods != null) { + basecall_arg = "${basecall_speed},${basecall_mods}" + } else { + basecall_arg = basecall_speed + } + + BASECALL(pod5_path, Channel.value(basecall_arg), basecall_trim, qscore_thresh, barcoding_kit, trimmed_barcodes, gpu_devices, reference_file, samtools_threads) bams = BASECALL.out.bam.toSortedList { a, b -> a[0] <=> b[0] }.flatten().buffer(size: 2) txts = BASECALL.out.txt.toSortedList { a, b -> a.baseName <=> b.baseName }.flatten() -- GitLab From 41d2d7e829b66350fc161b5c3a076a63a013fb5f Mon Sep 17 00:00:00 2001 From: Nickyecen Date: Mon, 28 Jul 2025 07:50:30 -0300 Subject: [PATCH 2/4] Remove remaining toString call Signed-off-by: Nickyecen --- src/modules/convert_input_from_minknow.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/convert_input_from_minknow.nf b/src/modules/convert_input_from_minknow.nf index 4339e42..6a66a94 100755 --- a/src/modules/convert_input_from_minknow.nf +++ b/src/modules/convert_input_from_minknow.nf @@ -58,7 +58,7 @@ process CONVERT_INPUT_FROM_MINKNOW_NOT_BARCODED { script: """ # Define the input directory path - input_dir="${input.toString()}" + input_dir="${input}" # Check if the input directory exists if [ -d "\${input_dir}" ]; then echo "Input directory exists." -- GitLab From 85f6a5a85cb14c218e4eeba78cab3acaada27372 Mon Sep 17 00:00:00 2001 From: Nickyecen Date: Mon, 28 Jul 2025 07:51:24 -0300 Subject: [PATCH 3/4] Update README to accomodate null values Signed-off-by: Nickyecen --- README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9796483..91b3067 100755 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ NextFlow pipeline used by the Developmental Cognitive Neuroscience Lab (DCNL) to ## Pipeline parameters -The default values for all parameters are set in `src/nexflow.config`. Please notice that it's required to overwrite a few because they depend on the procedure you need to run (e.g., `--step`). The specifics of each are described next. +The default values for all parameters are set in `src/nextflow.config`. Please notice that it's required to overwrite a few because they depend on the procedure you need to run (e.g., `--step`). The specifics of each are described next. ### Global options @@ -99,7 +99,7 @@ The default values for all parameters are set in `src/nexflow.config`. Please no ```txt --step - + ``` ```txt @@ -111,13 +111,13 @@ The default values for all parameters are set in `src/nexflow.config`. Please no ```txt --steps_2_and_3_input_directory -". Default = "None"> +". Default = null> ``` ```txt --prefix - + ``` ```txt @@ -157,7 +157,7 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --basecall_config - + ``` ```txt @@ -169,7 +169,7 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --barcoding_kit - + ``` ```txt @@ -181,13 +181,13 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --basecall_demux - + ``` ```txt --trimmed_barcodes - + ``` ```txt @@ -225,7 +225,7 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --is_barcoded - + ``` ### Step 3: Methylation Calling and MultiQC @@ -309,10 +309,8 @@ The following examples assume your current directory is the root directory of th --gpu_devices "all" \ --basecall_mods "5mC_5hmC" \ --qscore_thresh 9 \ - --basecall_config "False" \ --basecall_trim "none" \ --basecall_compute "gpu" \ - --basecall_demux "False" \ --queue_size 1 \ --out_dir "$OUTPUT_DIR_NAME" ``` @@ -334,7 +332,7 @@ The following examples assume your current directory is the root directory of th nextflow ./src/main.nf \ --steps_2_and_3_input_directory "./results/$OUTPUT_DIR_NAME/" \ --min_mapped_reads_thresh 500 \ - --is_barcoded "True" \ + --is_barcoded \ --qscore_thresh 9 \ --mapq 10 \ --step "2_from_step_1" -- GitLab From 20d8a4d99dfa69b597c2e6a421a69b1729c219b3 Mon Sep 17 00:00:00 2001 From: Nickyecen Date: Sun, 10 Aug 2025 16:32:21 -0300 Subject: [PATCH 4/4] Add comment explaining basecall_arg Signed-off-by: Nickyecen --- src/sub_workflows/BASECALLING.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sub_workflows/BASECALLING.nf b/src/sub_workflows/BASECALLING.nf index 9c092cd..390f0c5 100755 --- a/src/sub_workflows/BASECALLING.nf +++ b/src/sub_workflows/BASECALLING.nf @@ -20,6 +20,7 @@ workflow BASECALLING { FAST5_to_POD5(fast5_path, pod5_threads) pod5_path = FAST5_to_POD5.out.mix(pod5_path) + // Saves model, modifications and speed on a separate variable for the basecall basecall_arg = null if(basecall_config != null) { basecall_arg = basecall_config -- GitLab