diff --git a/modules/convert_input_from_minknow.nf b/modules/convert_input_from_minknow.nf index 4506c48e37ce2f8c13f9aa184f1efca3a623f210..a906269f485b58eb9a719eb7e7824a4d76c585fd 100755 --- a/modules/convert_input_from_minknow.nf +++ b/modules/convert_input_from_minknow.nf @@ -1,4 +1,4 @@ -process CONVERT_INPUT_FROM_MINKNOW { +process CONVERT_INPUT_FROM_MINKNOW_BARCODED { publishDir "results/${params.out_dir}/minknow_converted_input", mode: "copy", overwrite: true @@ -46,3 +46,49 @@ process CONVERT_INPUT_FROM_MINKNOW { """ } + +process CONVERT_INPUT_FROM_MINKNOW_NOT_BARCODED { + + publishDir "results/${params.out_dir}/minknow_converted_input", mode: "copy", overwrite: true + + label 'cpu' + + input: + path input + output: + path "*.bam", emit: bam + path "*.txt", emit: txt + + script: + """ + # Define the input directory path + input_dir="${input.toString()}" + + # Check if the input directory exists + if [ -d "\${input_dir}" ]; then + echo "Input directory exists." + else + echo "Input directory does not exist." + exit 1 + fi + + # Find all 'pass' directories, follow symlinks with -L + find -L "\${input_dir}" -type d \\( -name 'pass' -o -name '*pass*' \\) -print0 | while IFS= read -r -d '' pass_dir; do + + ## Get the name of the input directory for file naming + parent_dir="\${pass_dir%/*}"; parent_dir="\${parent_dir##*/}" + + # Check if the pass directory contains any BAM files + bam_files=(\$(find -L "\$pass_dir" -type f -name '*.bam' | grep -v 'iltered')) + if [ \${#bam_files[@]} -gt 0 ]; then + # Merge BAM files using samtools + output_bam="./\${parent_dir}.bam" + samtools merge "./\$output_bam" "\${bam_files[@]}" + + # Generate summary with dorado + dorado summary "\$output_bam" > "./\${parent_dir}.txt" + fi + done + """ +} + diff --git a/sub_workflows/FILTERING_AND_QC_FROM_MINKNOW.nf b/sub_workflows/FILTERING_AND_QC_FROM_MINKNOW.nf index f4656eefb3c302fc396dcd368d5de882763da730..e68e76caad4a43e1f405d10bf427d09415951228 100755 --- a/sub_workflows/FILTERING_AND_QC_FROM_MINKNOW.nf +++ b/sub_workflows/FILTERING_AND_QC_FROM_MINKNOW.nf @@ -3,7 +3,7 @@ include {PYCOQC_NO_FILTER ; PYCOQC_FILTER} from '../modules/pycoqc.nf' include {FILTER_BAM} from '../modules/filter_bam.nf' include {MAKE_QC_REPORT} from '../modules/num_reads_report.nf' -include {CONVERT_INPUT_FROM_MINKNOW} from '../modules/convert_input_from_minknow.nf' +include {CONVERT_INPUT_FROM_MINKNOW_BARCODED; CONVERT_INPUT_FROM_MINKNOW_NOT_BARCODED} from '../modules/convert_input_from_minknow.nf' workflow FILTERING_AND_QC_FROM_MINKNOW { @@ -13,11 +13,17 @@ workflow FILTERING_AND_QC_FROM_MINKNOW { quality_score main: - - CONVERT_INPUT_FROM_MINKNOW(input) - - FILTER_BAM(CONVERT_INPUT_FROM_MINKNOW.out.bam.flatten().map {file -> tuple(file.baseName, file) }.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2), - CONVERT_INPUT_FROM_MINKNOW.out.txt.toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten(), mapq) + + if (params.is_barcoded == true) { + CONVERT_INPUT_FROM_MINKNOW_BARCODED(input) + converted_input = CONVERT_INPUT_FROM_MINKNOW_BARCODED.out + } else if (params.is_barcoded == false) { + CONVERT_INPUT_FROM_MINKNOW_NOT_BARCODED(input) + converted_input = CONVERT_INPUT_FROM_MINKNOW_NOT_BARCODED.out + } + + FILTER_BAM(converted_input.bam.flatten().map {file -> tuple(file.baseName, file) }.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2), + converted_input.txt.toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten(), mapq) PYCOQC_NO_FILTER(FILTER_BAM.out.id, FILTER_BAM.out.total_bam, FILTER_BAM.out.total_bai, FILTER_BAM.out.filtered_bam, FILTER_BAM.out.filtered_bai, FILTER_BAM.out.unfiltered_flagstat, FILTER_BAM.out.filtered_flagstat, FILTER_BAM.out.txt, quality_score) diff --git a/workflow/main.nf b/workflow/main.nf index f85e809f0056c3c363a07877e1872c934a6a37e1..be8790855739d55c90f436599b90097ad922fa34 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -36,6 +36,8 @@ Basecall quality score threshold for basecalling (make sure it is the same as in MAPQ filtering threshold : ${params.mapq} Minimum number of mapped reads per sample/barcode for a file to be included in analysis : ${params.min_mapped_reads_thresh} + +Are input bam files barcoded? (Only relevant to "step_2_from_minknow" option) : ${params.is_barcoded} ====================================================================================================================================================================================== """ diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 363ab57001c5ccf1e8c7d82b56f3ae5636412a12..b410106f4afb8792e9a61e416e2fd2b77729fc9c 100755 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -56,6 +56,11 @@ params.steps_2_and_3_input_directory = "None" // MultiQC config params.multiqc_config = "None" +// Are the files from MinKNOW barcoded or not +params.is_barcoded = true + + + if (params.step == 1) {