diff --git a/README.md b/README.md index 47834be970c7b3e85fedd6048e2b7ff4253f5c18..76f10170ee53c2043fd778bb18cc2ba80c5e8cc4 100755 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --basecall_mods - + ``` ```txt diff --git a/parameters/human_blood/modkit.yaml b/parameters/human_blood/modkit.yaml index 5149ad894bc1204d3e42e2a5579b0d4274dcaa6e..3a694f0d54cf2ad28e2bcf93b1ed0bc5fd91fab1 100644 --- a/parameters/human_blood/modkit.yaml +++ b/parameters/human_blood/modkit.yaml @@ -1,4 +1,5 @@ project_name: "results_human_blood" step: 3 steps_2_and_3_input_directory: "./results/results_human_blood/" +reference_file: "./references/Homo_sapiens.GRCh38.dna.primary_assembly.fa" out_dir: "results_human_blood/" \ No newline at end of file diff --git a/src/main.nf b/src/main.nf index d38bd5e9a79f76d9d661ec903de132c9c37a42e1..25986bb409a974a75e10d1130aff64e6663d2549 100755 --- a/src/main.nf +++ b/src/main.nf @@ -85,6 +85,7 @@ workflow { quality_thresholds = Channel.fromPath("${params.steps_2_and_3_input_directory}/intermediate_qc_reports/quality_score_thresholds/*") multiqc_config = Channel.fromPath(params.multiqc_config) multiqc_input = Channel.fromPath("${params.steps_2_and_3_input_directory}/multiqc_input/**", type: "file") + reference_file = file(params.reference_file) } // Run steps if (params.step.toString() == "1") { @@ -94,6 +95,6 @@ workflow { } else if (params.step.toString() == "2_from_minknow") { FILTERING_AND_QC_FROM_MINKNOW(input_dir, mapq, qscore_thresh) } else if (params.step.toString()== "3") { - MODKIT_AND_MULTIQC(filtered_bams, filtered_bais, num_reads, read_length, quality_thresholds, multiqc_config, multiqc_input) + MODKIT_AND_MULTIQC(filtered_bams, filtered_bais, num_reads, read_length, quality_thresholds, multiqc_config, multiqc_input, reference_file) } } diff --git a/src/modules/modkit.nf b/src/modules/modkit.nf index d5a1a9da7165b85c6b8f7384be697ae5f4ab7625..34dc67e855abbb5c5ceb539fae3fdb5408da014c 100755 --- a/src/modules/modkit.nf +++ b/src/modules/modkit.nf @@ -5,21 +5,43 @@ process MODKIT { input: tuple val(id), path(bam) path bai + path reference_file output: path "*" script: """ - echo "starting modkit" + echo "starting probabilities calculation" + ## Inspect base modification probabilities + modkit sample-probs --hist -t 4 -i 10000 \ + --out-dir "./modkit_prob/" "${bam}" + echo "modification prob. calculated" + + echo "starting modkit extract calls" ## Make Methylation TSV file - modkit extract full --bgzf --queue-size 1000 -t 4 -i 10000 --mapped-only "${bam}" "${id}_modkit_output.tsv" + modkit extract calls --bgzf --queue-size 1000 -t 4 -i 10000 \ + --reference "${reference_file}" \ + --filter-threshold 0.9 \ + --mapped-only \ + --region MT \ + "${bam}" "${id}_modkit_calls.tsv" echo "extract successful" + ## Make Methylation Table - modkit pileup -t 4 -i 10000 --chunk-size 4 "${bam}" "${id}_modkit_pileup.bed" --log-filepath "${id}_modkit_pileup.log" + modkit pileup -t 4 -i 10000 \ + --filter-threshold 0.9 \ + --region MT \ + --sample-region MT \ + "${bam}" "${id}_modkit_pileup.bed" \ + --log-filepath "${id}_modkit_pileup.log" echo "pileup successful" + ## Make Summary - modkit summary -i 10000 ${bam} > "${id}_modkit_summary.txt" + modkit summary -t 4 -i 10000 \ + --filter-threshold 0.9 \ + --region MT \ + ${bam} > "${id}_modkit_summary.txt" echo "summary successful" """ } diff --git a/src/nextflow.config b/src/nextflow.config index 41d22397149db2cd972c6cbea2069d6c26aa7051..a8d5c388ce1194666bed3b4071d4163ef1fdb335 100755 --- a/src/nextflow.config +++ b/src/nextflow.config @@ -19,7 +19,7 @@ params { // Desired basecall speed ("fast", "hac", "sup"; @latest <- latest version available) basecall_speed = "sup@latest" // Desired basecaller modifications (4mC_5mC, 5mCG_5hmCG, 5mC_5hmC, 6mA). Can't use more than one modification per nucleotide. - basecall_mods = "5mC_5hmC" + basecall_mods = "5mC_5hmC,6mA" // Kit name (kit used to barcode the samples (e.g. SQK-RBK114-24); Use "None" to skip --kit-name in basecalling) barcoding_kit = "SQK-RBK114-24" // Threshold for mapped reasds diff --git a/src/sub_workflows/MODKIT_AND_MULTIQC.nf b/src/sub_workflows/MODKIT_AND_MULTIQC.nf index 35294c78f20ee2aef614636017fef98b5f09af10..b6d77d36057abb9bd5978f5cc3fbd215ab30a13c 100755 --- a/src/sub_workflows/MODKIT_AND_MULTIQC.nf +++ b/src/sub_workflows/MODKIT_AND_MULTIQC.nf @@ -13,11 +13,12 @@ workflow MODKIT_AND_MULTIQC { quality_thresholds multiqc_config multiqc_input + reference_file main: CALCULATE_COVERAGE(filtered_bams, filtered_bais) MERGE_COVERAGE(CALCULATE_COVERAGE.out.collect()) MERGE_QC_REPORT(num_reads.collect(), read_length.collect(), quality_thresholds.collect()) MULTIQC(multiqc_input.concat(MERGE_QC_REPORT.out.flatten()).collect(), MERGE_COVERAGE.out.mqc, multiqc_config) - MODKIT(filtered_bams, filtered_bais) + MODKIT(filtered_bams, filtered_bais, reference_file) }