diff --git a/README.md b/README.md index fec96e2466dd17b76f2adaaf94f53f4ec012d9a0..c169f13e95a6ea84daedbae59460d2c365324cc3 100755 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Many of the parameters for this step are based on dorado basecaller, see their [ ```txt --basecall_mods - + ``` ```txt diff --git a/parameters/human_blood/modkit.yaml b/parameters/human_blood/modkit.yaml index 5149ad894bc1204d3e42e2a5579b0d4274dcaa6e..3a694f0d54cf2ad28e2bcf93b1ed0bc5fd91fab1 100644 --- a/parameters/human_blood/modkit.yaml +++ b/parameters/human_blood/modkit.yaml @@ -1,4 +1,5 @@ project_name: "results_human_blood" step: 3 steps_2_and_3_input_directory: "./results/results_human_blood/" +reference_file: "./references/Homo_sapiens.GRCh38.dna.primary_assembly.fa" out_dir: "results_human_blood/" \ No newline at end of file diff --git a/src/modules/modkit.nf b/src/modules/modkit.nf index d9549373aec63153ac06499c28b6cb393f5075df..d39eddebeef9eeb96054342ea398259d4e3201e8 100755 --- a/src/modules/modkit.nf +++ b/src/modules/modkit.nf @@ -14,15 +14,48 @@ process MODKIT { script: """ - echo "starting modkit" + echo "starting probabilities calculation" + ## Inspect base modification probabilities + modkit sample-probs --hist \ + --threads ${modkit_threads} \ + --interval-size ${modkit_isize} \ + --only-mapped \ + --region MT \ + --out-dir "./modkit_prob/" "${bam}" + echo "modification prob. calculated" + echo "starting modkit extract calls" ## Make Methylation TSV file - modkit extract full --bgzf --queue-size ${modkit_extract_qsize} --threads ${modkit_threads} --interval-size ${modkit_isize} --mapped-only "${bam}" "${id}_modkit_output.tsv" + modkit extract full --bgzf \ + --queue-size ${modkit_extract_qsize} \ + --threads ${modkit_threads} \ + --interval-size ${modkit_isize} \ + --region MT \ + --mapped-only \ + "${bam}" "${id}_modkit_full.tsv.gz" echo "extract successful" ## Make Methylation Table (--chunk-size is a multiplier of threads, 1.5x by default) - modkit pileup --threads ${modkit_threads} --interval-size ${modkit_isize} "${bam}" "${id}_modkit_pileup.bed" --log-filepath "${id}_modkit_pileup.log" + modkit pileup --threads ${modkit_threads} \ + --interval-size ${modkit_isize} \ + --region MT \ + --filter-threshold C:0.75 \ + --filter-threshold A:0.75 \ + --mod-thresholds m:0.75 \ + --mod-thresholds h:0.75 \ + --mod-thresholds a:0.75 \ + "${bam}" "${id}_modkit_pileup.bed" \ + --log-filepath "${id}_modkit_pileup.log" echo "pileup successful" ## Make Summary - modkit summary --threads ${modkit_threads} --interval-size ${modkit_isize} ${bam} > "${id}_modkit_summary.txt" + modkit summary --only-mapped \ + --threads ${modkit_threads} \ + --interval-size ${modkit_isize} \ + --region MT \ + --filter-threshold C:0.75 \ + --filter-threshold A:0.75 \ + --mod-thresholds m:0.75 \ + --mod-thresholds h:0.75 \ + --mod-thresholds a:0.75 \ + ${bam} > "${id}_modkit_summary.txt" echo "summary successful" """ -} +} \ No newline at end of file diff --git a/src/nextflow.config b/src/nextflow.config index 79873970d8f2d700a3030df66701a3e7b0fe39d8..e7b10a71ebeb24a7920b34e720ea5b201a75fc2e 100755 --- a/src/nextflow.config +++ b/src/nextflow.config @@ -22,7 +22,7 @@ params { // Desired basecall speed ("fast", "hac", "sup"; @latest <- latest version available) basecall_speed = "sup@latest" // Desired basecaller modifications (4mC_5mC, 5mCG_5hmCG, 5mC_5hmC, 6mA). Can't use more than one modification per nucleotide. - basecall_mods = "5mC_5hmC" + basecall_mods = "5mC_5hmC,6mA" // Kit name (kit used to barcode the samples (e.g. SQK-RBK114-24); Use "None" to skip --kit-name in basecalling) barcoding_kit = "SQK-RBK114-24" // Threshold for mapped reasds