diff --git a/README.md b/README.md index 5219af899716c193a2d76ea7b99f23939feb2cbc..a738049fe527f4bff967d22a3042ed5ade0558a3 100755 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ The default values for all parameters are set in `src/nexflow.config`. Please no ```txt --step - + ``` ```txt diff --git a/src/configs/human_blood.config b/src/configs/human_blood.config index 9560781f2cb418e78d301b1b3a81a953a9ba5d24..0f3b8688b8cb3956aaf0c8cadd9ef4d8db1b7dcc 100644 --- a/src/configs/human_blood.config +++ b/src/configs/human_blood.config @@ -25,4 +25,14 @@ profiles { out_dir = "results_human_blood/" } } -} \ No newline at end of file + human_blood { + params { + project_name = "results_human_blood" + step = "all" + basecall_path = "./data/pod5" + reference_file = "./references/Homo_sapiens.GRCh38.dna.primary_assembly.fa" + steps_2_and_3_input_directory = "./results/results_human_blood/" + out_dir = "results_human_blood/" + } + } +} diff --git a/src/main.nf b/src/main.nf index d38bd5e9a79f76d9d661ec903de132c9c37a42e1..e79677911038eb909c587dc3163e81db46a77346 100755 --- a/src/main.nf +++ b/src/main.nf @@ -6,8 +6,10 @@ include {MODKIT_AND_MULTIQC} from './sub_workflows/MODKIT_AND_MULTIQC.nf' // Main workflow logic workflow { - // Log execution parameters - if (params.step.toString() == "1") { + valid = false + if (params.step.toString() == "all" || params.step.toString() == "1") { + valid = true + // Log execution parameters log.info """ ================================================================= STEP 1 - OXFORD NANOPORE DNA SEQUENCING BASECALLING AND ALIGNMENT @@ -24,34 +26,8 @@ workflow { GPU device for submission : ${params.gpu_devices} Output directory : ${params.out_dir} ================================================================= - """ - } else if (params.step.toString() == "2_from_step_1" || params.step.toString() == "2_from_minknow") { - log.info """ - ====================================== - STEP 2 - FILTERING AND QUALITY CONTROL - ====================================== - Input directory (output dir from step 1) : ${params.steps_2_and_3_input_directory} - Basecall quality score threshold : ${params.qscore_thresh} - MAPQ filtering threshold : ${params.mapq} - Min number of mapped reads per sample/barcode : ${params.min_mapped_reads_thresh} - BAM files barcoded? : ${params.is_barcoded} - ====================================== """ - } else if (params.step.toString() == "3") { - log.info """ - =============================================== - STEP 3 - METHYLATION CALLING AND MULTIQC REPORT - =============================================== - Input directory (input dir from step 2) : ${params.steps_2_and_3_input_directory} - MultiQC configuration file : ${params.multiqc_config} - =============================================== - """ - } else { - println "ERROR: You must set parameter --step to '1' or '2_from_step_1' or '2_from_minknow' or '3'. Please refer to documentation at: https://gmapsrv.pucrs.br/gitlab/ccd-public/nanopore" - System.exit(1) - } - // Set initial files and channels - if (params.step.toString() == "1") { + // Set initial files and channels if (params.prefix == "None") { fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple(file.parent.toString().split("/")[-3] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple(file.parent.toString().split("/")[-3] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[-3..-2].join("_"), file) }.groupTuple() @@ -68,16 +44,52 @@ workflow { trimmed_barcodes = Channel.value(params.trimmed_barcodes) gpu_devices = Channel.value(params.gpu_devices) reference_file = file(params.reference_file) - } else if (params.step.toString() == "2_from_step_1") { - bam_files = Channel.fromPath("${params.steps_2_and_3_input_directory}/basecalling_output/*.bam").map {file -> tuple(file.baseName, file) }.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) - txt_files = Channel.fromPath("${params.steps_2_and_3_input_directory}/basecalling_output/*.txt").toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() - mapq = Channel.value(params.mapq) - qscore_thresh = Channel.value(params.qscore_thresh) - } else if (params.step.toString() == "2_from_minknow") { - input_dir = Channel.fromPath("${params.steps_2_and_3_input_directory}/") - mapq = Channel.value(params.mapq) - qscore_thresh = Channel.value(params.qscore_thresh) - } else if (params.step.toString() == "3") { + // Run steps + BASECALLING(pod5_path, fast5_path, basecall_speed, basecall_mods, basecall_config, basecall_trim, qscore_thresh, barcoding_kit, trimmed_barcodes, gpu_devices, reference_file) + } + if (params.step.toString() == "all" || params.step.toString() == "2_from_step_1" || params.step.toString() == "2_from_minknow") { + valid = true + // Log execution parameters + log.info """ + ====================================== + STEP 2 - FILTERING AND QUALITY CONTROL + ====================================== + Input directory (output dir from step 1) : ${params.steps_2_and_3_input_directory} + Basecall quality score threshold : ${params.qscore_thresh} + MAPQ filtering threshold : ${params.mapq} + Min number of mapped reads per sample/barcode : ${params.min_mapped_reads_thresh} + BAM files barcoded? : ${params.is_barcoded} + ====================================== + """ + if (params.step.toString() == "all" || params.step.toString() == "2_from_step_1") { + // Set initial files and channels + bam_files = Channel.fromPath("${params.steps_2_and_3_input_directory}/basecalling_output/*.bam").map {file -> tuple(file.baseName, file) }.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) + txt_files = Channel.fromPath("${params.steps_2_and_3_input_directory}/basecalling_output/*.txt").toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() + mapq = Channel.value(params.mapq) + qscore_thresh = Channel.value(params.qscore_thresh) + // Run steps + FILTERING_AND_QC_FROM_STEP_1(bam_files, txt_files, mapq, qscore_thresh) + } else if (params.step.toString() == "2_from_minknow") { + // Set initial files and channels + input_dir = Channel.fromPath("${params.steps_2_and_3_input_directory}/") + mapq = Channel.value(params.mapq) + qscore_thresh = Channel.value(params.qscore_thresh) + // Run steps + FILTERING_AND_QC_FROM_MINKNOW(input_dir, mapq, qscore_thresh) + } + } + if (params.step.toString() == "all" || params.step.toString() == "3") { + valid = true + // Log execution parameters + log.info """ + =============================================== + STEP 3 - METHYLATION CALLING AND MULTIQC REPORT + =============================================== + Input directory (input dir from step 2) : ${params.steps_2_and_3_input_directory} + MultiQC configuration file : ${params.multiqc_config} + =============================================== + """ + // Set initial files and channels filtered_bams = Channel.fromPath("${params.steps_2_and_3_input_directory}/bam_filtering/*-Filtered*.bam").map {file -> tuple(file.baseName, file) }.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) filtered_bais = Channel.fromPath("${params.steps_2_and_3_input_directory}/bam_filtering/*-Filtered*.bam.bai").toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() num_reads = Channel.fromPath("${params.steps_2_and_3_input_directory}/intermediate_qc_reports/number_of_reads/*") @@ -85,15 +97,12 @@ workflow { quality_thresholds = Channel.fromPath("${params.steps_2_and_3_input_directory}/intermediate_qc_reports/quality_score_thresholds/*") multiqc_config = Channel.fromPath(params.multiqc_config) multiqc_input = Channel.fromPath("${params.steps_2_and_3_input_directory}/multiqc_input/**", type: "file") - } - // Run steps - if (params.step.toString() == "1") { - BASECALLING(pod5_path, fast5_path, basecall_speed, basecall_mods, basecall_config, basecall_trim, qscore_thresh, barcoding_kit, trimmed_barcodes, gpu_devices, reference_file) - } else if (params.step.toString() == "2_from_step_1") { - FILTERING_AND_QC_FROM_STEP_1(bam_files, txt_files, mapq, qscore_thresh) - } else if (params.step.toString() == "2_from_minknow") { - FILTERING_AND_QC_FROM_MINKNOW(input_dir, mapq, qscore_thresh) - } else if (params.step.toString()== "3") { + // Run steps MODKIT_AND_MULTIQC(filtered_bams, filtered_bais, num_reads, read_length, quality_thresholds, multiqc_config, multiqc_input) } + if (!valid) { + // Log execution parameters + println "ERROR: You must set parameter --step to 'all', '1' or '2_from_step_1' or '2_from_minknow' or '3'. Please refer to documentation at: https://gmapsrv.pucrs.br/gitlab/ccd-public/nanopore" + System.exit(1) + } }