From 8b4e15bffd02d0f264d4b69963e022b828288852 Mon Sep 17 00:00:00 2001 From: bernardo-heberle Date: Wed, 10 Jan 2024 17:22:36 -0500 Subject: [PATCH] big update --- modules/bambu.nf | 89 -------- modules/basecall.nf | 60 +++-- modules/chm13_gff3_to_gtf.nf | 39 ---- modules/contamination.nf | 117 ---------- modules/convert_U_to_T.nf | 33 --- modules/gffcompare.nf | 19 -- modules/make_contamination_report.nf | 56 ----- modules/make_fai.nf | 17 -- modules/make_index.nf | 72 ------ modules/make_transcriptome.nf | 19 -- modules/minimap2.nf | 149 ------------ modules/multiqc.nf | 41 ---- modules/num_reads_report.nf | 95 -------- modules/pychopper.nf | 69 ------ modules/pycoqc.nf | 53 +---- modules/rseqc.nf | 21 -- modules/trim_dRNA.nf | 22 -- modules/unzip_and_concatenate.sh | 44 ---- .../2023-12-21_ont_methylation.def | 0 singularity_containers/README.md | 0 ...pore_workflow_STEP_1.nf => BASECALLING.nf} | 12 +- .../nanopore_cDNA_workflow_STEP_2.nf | 86 ------- .../nanopore_dRNA_workflow_STEP_2.nf | 98 -------- .../nanopore_unzip_and_concatenate.nf | 21 -- sub_workflows/nanopore_workflow_STEP_2_BAM.nf | 41 ---- sub_workflows/nanopore_workflow_STEP_3.nf | 61 ----- workflow/bin/bambu_discovery.R | 29 --- workflow/bin/bambu_prep.R | 16 -- workflow/bin/bambu_quant.R | 18 -- workflow/bin/convert_U_to_T.py | 38 ---- workflow/bin/filter_by_mean_base_quality.py | 31 --- .../bin/fix_sequencing_summary_porechop.py | 75 ------ .../bin/fix_sequencing_summary_pychopper.py | 82 ------- workflow/bin/gff_to_gtf.py | 93 -------- workflow/bin/multiqc_config.yaml | 11 - workflow/main.nf | 215 ++---------------- workflow/nextflow.config | 163 +------------ 37 files changed, 61 insertions(+), 2044 deletions(-) delete mode 100755 modules/bambu.nf delete mode 100755 modules/chm13_gff3_to_gtf.nf delete mode 100755 modules/contamination.nf delete mode 100755 modules/convert_U_to_T.nf delete mode 100755 modules/gffcompare.nf delete mode 100755 modules/make_contamination_report.nf delete mode 100755 modules/make_fai.nf delete mode 100755 modules/make_index.nf delete mode 100755 modules/make_transcriptome.nf delete mode 100755 modules/minimap2.nf delete mode 100755 modules/multiqc.nf delete mode 100755 modules/num_reads_report.nf delete mode 100755 modules/pychopper.nf delete mode 100755 modules/rseqc.nf delete mode 100755 modules/trim_dRNA.nf delete mode 100755 modules/unzip_and_concatenate.sh mode change 100644 => 100755 singularity_containers/2023-12-21_ont_methylation.def mode change 100644 => 100755 singularity_containers/README.md rename sub_workflows/{nanopore_workflow_STEP_1.nf => BASECALLING.nf} (80%) delete mode 100755 sub_workflows/nanopore_cDNA_workflow_STEP_2.nf delete mode 100755 sub_workflows/nanopore_dRNA_workflow_STEP_2.nf delete mode 100755 sub_workflows/nanopore_unzip_and_concatenate.nf delete mode 100755 sub_workflows/nanopore_workflow_STEP_2_BAM.nf delete mode 100755 sub_workflows/nanopore_workflow_STEP_3.nf delete mode 100755 workflow/bin/bambu_discovery.R delete mode 100755 workflow/bin/bambu_prep.R delete mode 100755 workflow/bin/bambu_quant.R delete mode 100755 workflow/bin/convert_U_to_T.py delete mode 100755 workflow/bin/filter_by_mean_base_quality.py delete mode 100755 workflow/bin/fix_sequencing_summary_porechop.py delete mode 100755 workflow/bin/fix_sequencing_summary_pychopper.py delete mode 100755 workflow/bin/gff_to_gtf.py delete mode 100755 workflow/bin/multiqc_config.yaml diff --git a/modules/bambu.nf b/modules/bambu.nf deleted file mode 100755 index 82a269e..0000000 --- a/modules/bambu.nf +++ /dev/null @@ -1,89 +0,0 @@ -process BAMBU_PREP { - - publishDir "results/${params.out_dir}/", mode: "copy", overwrite: true - - label 'bambu_prep_job' - - input: - val(id) - val(mapq) - path(bam) - path(bai) - path(ref) - path(gtf) - path(fai) - val(track_reads) - - output: - path("bambu_prep/*.rds") - - script: - """ - mkdir -p bambu_prep - - bambu_prep.R $bam $ref $gtf $track_reads - - mv ./bambu_prep/*.rds "./bambu_prep/${id}_mapq_${mapq}.rds" - """ -} - -process BAMBU_DISCOVERY { - - publishDir "results/${params.out_dir}/", mode: "copy", overwrite: true - - label 'huge_long' - - input: - path(rc_files) - path(ref) - path(gtf) - path(fai) - val(NDR) - val(track_reads) - - - output: - path("./bambu_discovery/extended_annotations.gtf"), emit:gtf - path("bambu_discovery/*"), emit: outty - - shell: - ''' - mkdir bambu_discovery - - dummy="!{rc_files}" - - rc_files2="$(tr ' ' ',' <<<$dummy)" - - bambu_discovery.R $rc_files2 "!{ref}" "!{gtf}" "!{NDR}" "!{track_reads}" - ''' -} - - -process BAMBU_QUANT { - - publishDir "results/${params.out_dir}/", mode: "copy", overwrite: true - - label 'huge_long' - - input: - path(rc_files) - path(ref) - path(gtf) - path(fai) - - - output: - path("./bambu_quant/extended_annotations.gtf"), emit:gtf - path("bambu_quant/*"), emit: outty - - shell: - ''' - mkdir bambu_quant - - dummy="!{rc_files}" - - rc_files2="$(tr ' ' ',' <<<$dummy)" - - bambu_quant.R $rc_files2 "!{ref}" "!{gtf}" - ''' -} diff --git a/modules/basecall.nf b/modules/basecall.nf index 07e4d32..dc5abf3 100755 --- a/modules/basecall.nf +++ b/modules/basecall.nf @@ -2,7 +2,7 @@ process FAST5_to_POD5 { publishDir "results/${params.out_dir}/fast5_to_pod5/${id}/", mode: "symlink", overwrite: true - label 'large' + label 'cpu' input: tuple val(id), path(fast5) @@ -14,7 +14,7 @@ process FAST5_to_POD5 { script: """ - pod5 convert fast5 *.fast5 --output . --one-to-one . --threads 50 + pod5 convert fast5 *.fast5 --output . --one-to-one . --threads 12 """ @@ -24,7 +24,7 @@ process BASECALL_CPU { publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true - label 'huge' + label 'cpu' input: tuple val(id), path(pod5_dir) @@ -33,6 +33,8 @@ process BASECALL_CPU { val config val trim val qscore + val devices + path ref output: path("*") @@ -43,20 +45,20 @@ process BASECALL_CPU { if [[ "$config" == "false" ]]; then if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" fi else if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . -x cpu --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" fi fi @@ -64,10 +66,6 @@ process BASECALL_CPU { dorado summary "${id}.bam" > "${id}.txt" - samtools fastq -T "*" "${id}.bam" > "${id}.fastq" - - rm "${id}.bam" - """ } @@ -77,7 +75,7 @@ process BASECALL_CPU_DEMUX { publishDir "results/${params.out_dir}/basecalling_output/", mode: "copy", overwrite: true - label 'huge' + label 'cpu' input: tuple val(id), path(pod5_dir) @@ -87,6 +85,8 @@ process BASECALL_CPU_DEMUX { val trim val qscore val trim_barcode + val devices + path ref output: path("*") @@ -97,20 +97,20 @@ process BASECALL_CPU_DEMUX { if [[ "$config" == "false" ]]; then if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "none" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" fi else if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . -x cpu --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" > "${id}.bam" fi fi @@ -138,8 +138,6 @@ process BASECALL_CPU_DEMUX { for file in *.bam; do new_id="\${file%%.*}" dorado summary "\$file" > "\${new_id}.txt" - samtools fastq -T "*" "\$file" > "\${new_id}.fastq" - rm "\$file" done """ @@ -158,6 +156,8 @@ process BASECALL_GPU { val config val trim val qscore + val devices + path ref output: path("*") @@ -168,20 +168,20 @@ process BASECALL_GPU { if [[ "$config" == "false" ]]; then if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . --trim "${trim}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . --trim "${trim}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . --trim "${trim}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" fi else if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . --trim "${trim}" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . --trim "${trim}" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . --trim "${trim}" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" fi fi @@ -189,10 +189,6 @@ process BASECALL_GPU { dorado summary "${id}.bam" > "${id}.txt" - samtools fastq -T "*" "${id}.bam" > "${id}.fastq" - - rm "${id}.bam" - """ } @@ -212,6 +208,8 @@ process BASECALL_GPU_DEMUX { val trim val qscore val trim_barcode + val devices + path ref output: path("*") @@ -222,20 +220,20 @@ process BASECALL_GPU_DEMUX { if [[ "$config" == "false" ]]; then if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . --trim "none" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . --trim "none" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . --trim "none" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . --trim "none" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" fi else if [[ "$mods" == "false" ]]; then - dorado basecaller "${speed}" . --trim "none" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed}" . --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" else - dorado basecaller "${speed},${mods}" . --trim "none" --config "${config}" --min-qscore "${qscore}" > "${id}.bam" + dorado basecaller "${speed},${mods}" . --trim "none" --config "${config}" --min-qscore "${qscore}" --reference "${ref}" --device "cuda:${devices}" > "${id}.bam" fi fi @@ -263,8 +261,6 @@ process BASECALL_GPU_DEMUX { for file in *.bam; do new_id="\${file%%.*}" dorado summary "\$file" > "\${new_id}.txt" - samtools fastq -T "*" "\$file" > "\${new_id}.fastq" - rm "\$file" done """ diff --git a/modules/chm13_gff3_to_gtf.nf b/modules/chm13_gff3_to_gtf.nf deleted file mode 100755 index fcb348f..0000000 --- a/modules/chm13_gff3_to_gtf.nf +++ /dev/null @@ -1,39 +0,0 @@ -process CHM13_GTF_ERCC { - - publishDir "results/${params.out_dir}/CHM13_gtf/", mode: "copy", overwrite: true - - label 'medium' - - input: - path gff - path ercc - - output: - path('CHM13_v2.0_ERCC.gtf') - - script: - """ - gff_to_gtf.py $gff CHM13_v2.0.gtf - - cat CHM13_v2.0.gtf $ercc > CHM13_v2.0_ERCC.gtf - """ -} - -process CHM13_GTF { - - publishDir "results/${params.out_dir}/CHM13_gtf/", mode: "copy", overwrite: true - - label 'medium' - - input: - path gff - - output: - path('CHM13_v2.0.gtf') - - script: - """ - gff_to_gtf.py $gff CHM13_v2.0.gtf - """ -} - diff --git a/modules/contamination.nf b/modules/contamination.nf deleted file mode 100755 index 22c463e..0000000 --- a/modules/contamination.nf +++ /dev/null @@ -1,117 +0,0 @@ -process MAP_CONTAMINATION_cDNA { - - publishDir "results/${params.out_dir}/contamination_report/${id}", mode: "copy", pattern: "*" - - label 'contamination' - - input: - val(id) - tuple path(bam), path(index_contaminants), path(index_chm13) - path(bai) - val(num_reads) - - output: - val("$id"), emit: id - val("$num_reads"), emit: num_reads - env(NUM_UNMAPPED_READS_BEFORE_CHM13), emit: num_unmapped_reads_before_chm13 - env(NUM_UNMAPPED_READS_AFTER_CHM13), emit: num_unmapped_reads_after_chm13 - env(NUM_CONTAMINANT_READS), emit: num_contaminant_reads - path("${id}*"), emit: outty - - script: - """ - samtools view -h -b -f 4 "${bam}" > "${id}_unmapped.bam" - samtools fastq "${id}_unmapped.bam" > "${id}_unmapped_reads.fastq" - - NUM_UNMAPPED_READS_BEFORE_CHM13=\$(samtools view -F 0x40 "${id}_unmapped.bam" | cut -f1 | sort | uniq | wc -l) - - minimap2 -t 50 -ax splice \ - -uf \ - $index_chm13 \ - "${id}_unmapped_reads.fastq" > "${id}_chm13.bam" - - - samtools view -h -b -f 4 "${id}_chm13.bam" > "${id}_unmapped_chm13.bam" - samtools fastq "${id}_unmapped_chm13.bam" > "${id}_unmapped_reads_chm13.fastq" - NUM_UNMAPPED_READS_AFTER_CHM13=\$(samtools view -F 0x40 "${id}_unmapped_chm13.bam" | cut -f1 | sort | uniq | wc -l) - - minimap2 -t 50 -ax splice \ - --split-prefix /tmp/tmp_name \ - -uf \ - $index_contaminants \ - "${id}_unmapped_reads_chm13.fastq" > "${id}_contaminants_unsorted.bam" - - samtools view -b -F 260 "${id}_contaminants_unsorted.bam" > "${id}_contaminants_unsorted_primary.bam" - samtools sort -@ 12 "${id}_contaminants_unsorted_primary.bam" -o "${id}_contaminants_sorted_primary.bam" - samtools index "${id}_contaminants_sorted_primary.bam" - - samtools idxstat "${id}_contaminants_sorted_primary.bam" > "tmp.tsv" - awk '{print \$1,\$3}' "tmp.tsv" > "tmp2.tsv" - sort -k2nr "tmp2.tsv" > "${id}_number_of_mapped_reads_per_contaminant.tsv" - - NUM_CONTAMINANT_READS=\$(samtools view -F 0x40 "${id}_contaminants_sorted_primary.bam" | cut -f1 | sort | uniq | wc -l) - - rm "${id}_contaminants_unsorted.bam" "${id}_unmapped.bam" "${id}_contaminants_unsorted_primary.bam" "tmp.tsv" "tmp2.tsv" "${id}_chm13.bam" "${id}_unmapped_chm13.bam" "${id}_unmapped_reads_chm13.fastq" - """ - -} - -process MAP_CONTAMINATION_dRNA { - - publishDir "results/${params.out_dir}/contamination_report/${id}", mode: "copy", pattern: "*" - - label 'contamination' - - input: - val(id) - tuple path(bam), path(index_contaminants), path(index_chm13) - path(bai) - val(num_reads) - - output: - val("$id"), emit: id - val("$num_reads"), emit: num_reads - env(NUM_UNMAPPED_READS_BEFORE_CHM13), emit: num_unmapped_reads_before_chm13 - env(NUM_UNMAPPED_READS_AFTER_CHM13), emit: num_unmapped_reads_after_chm13 - env(NUM_CONTAMINANT_READS), emit: num_contaminant_reads - path("${id}*"), emit: outty - - script: - """ - samtools view -h -b -f 4 "${bam}" > "${id}_unmapped.bam" - samtools fastq "${id}_unmapped.bam" > "${id}_unmapped_reads.fastq" - - NUM_UNMAPPED_READS_BEFORE_CHM13=\$(samtools view -F 0x40 "${id}_unmapped.bam" | cut -f1 | sort | uniq | wc -l) - - minimap2 -t 50 -ax splice \ - -k14 -uf \ - $index_chm13 \ - "${id}_unmapped_reads.fastq" > "${id}_chm13.bam" - - - samtools view -h -b -f 4 "${id}_chm13.bam" > "${id}_unmapped_chm13.bam" - samtools fastq "${id}_unmapped_chm13.bam" > "${id}_unmapped_reads_chm13.fastq" - NUM_UNMAPPED_READS_AFTER_CHM13=\$(samtools view -F 0x40 "${id}_unmapped_chm13.bam" | cut -f1 | sort | uniq | wc -l) - - minimap2 -t 50 -ax splice \ - --split-prefix /tmp/tmp_name \ - -k14 -uf \ - $index_contaminants \ - "${id}_unmapped_reads_chm13.fastq" > "${id}_contaminants_unsorted.bam" - - samtools view -b -F 260 "${id}_contaminants_unsorted.bam" > "${id}_contaminants_unsorted_primary.bam" - samtools sort -@ 12 "${id}_contaminants_unsorted_primary.bam" -o "${id}_contaminants_sorted_primary.bam" - samtools index "${id}_contaminants_sorted_primary.bam" - - samtools idxstat "${id}_contaminants_sorted_primary.bam" > "tmp.tsv" - awk '{print \$1,\$3}' "tmp.tsv" > "tmp2.tsv" - sort -k2nr "tmp2.tsv" > "${id}_number_of_mapped_reads_per_contaminant.tsv" - - NUM_CONTAMINANT_READS=\$(samtools view -F 0x40 "${id}_contaminants_sorted_primary.bam" | cut -f1 | sort | uniq | wc -l) - - rm "${id}_contaminants_unsorted.bam" "${id}_unmapped.bam" "${id}_contaminants_unsorted_primary.bam" "tmp.tsv" "tmp2.tsv" "${id}_chm13.bam" "${id}_unmapped_chm13.bam" "${id}_unmapped_reads_chm13.fastq" - """ - - - -} diff --git a/modules/convert_U_to_T.nf b/modules/convert_U_to_T.nf deleted file mode 100755 index a16d740..0000000 --- a/modules/convert_U_to_T.nf +++ /dev/null @@ -1,33 +0,0 @@ -process CONVERT_U_TO_T { - - label "medium" - - input: - tuple val(id), file(fastq) - val(txt) - val(qscore) - - output: - tuple val("$id"), path("${id}_U_to_T_qscore_${qscore}.fastq"), emit: fastq - path "${id}.txt", emit: txt - - script: - """ - - if [[ "${txt}" != "None" ]] && [[ "${txt}" != "${id}.txt" ]]; then - cp "${txt}" "./${id}.txt" - else - touch "./${id}.txt" - fi - - ## convert U to T - convert_U_to_T.py $fastq "${id}_U_to_T.fastq" - - ## Filter by mean base quality threshold - filter_by_mean_base_quality.py "${id}_U_to_T.fastq" "${qscore}" "${id}_U_to_T_qscore_${qscore}.fastq" - - ## Delete intermediate files - rm "${id}_U_to_T.fastq" - - """ -} diff --git a/modules/gffcompare.nf b/modules/gffcompare.nf deleted file mode 100755 index 2caab7b..0000000 --- a/modules/gffcompare.nf +++ /dev/null @@ -1,19 +0,0 @@ -process GFFCOMPARE { - - publishDir "results/${params.out_dir}/gffcompare/", mode: "copy", overwrite: true - - label 'small' - - input: - path(extended_annotation) - path(reference_annotation) - - output: - path "*" - - script: - """ - gffcompare -r $reference_annotation $extended_annotation -o "gffcompare_output" - """ -} - diff --git a/modules/make_contamination_report.nf b/modules/make_contamination_report.nf deleted file mode 100755 index ea28040..0000000 --- a/modules/make_contamination_report.nf +++ /dev/null @@ -1,56 +0,0 @@ -process MAKE_CONTAMINATION_REPORT_1 { - - publishDir "results/${params.out_dir}/intermediate_qc_reports/contamination/", mode: "copy", pattern: "*.tsv" - - label 'small' - - input: - val(id) - val(num_reads) - val(num_unmapped_reads_before_chm13) - val(num_unmapped_reads_after_chm13) - val(num_contaminant_reads) - - output: - path("*.tsv") - - shell: - ''' - - reads="!{num_reads}" - unmapped_reads_before_chm13="!{num_unmapped_reads_before_chm13}" - unmapped_reads_after_chm13="!{num_unmapped_reads_after_chm13}" - mapped_to_contaminant="!{num_contaminant_reads}" - ID="!{id}" - - mapped_chm13=$(awk -v var1=$unmapped_reads_before_chm13 -v var2=$unmapped_reads_after_chm13 'BEGIN { print ( var1 - var2 ) }') - mapped_to_target=$(awk -v var1=$unmapped_reads_before_chm13 -v var2=$reads 'BEGIN { print ( var2 - var1 ) }') - unmapped=$(awk -v var1=$unmapped_reads_after_chm13 -v var2=$mapped_to_contaminant 'BEGIN { print ( var2 - var1 ) }') - - echo "${ID}\t${mapped_to_target}\t${mapped_chm13}\t${mapped_to_contaminant}\t${unmapped}" > "${ID}.tsv" - ''' - -} - -process MAKE_CONTAMINATION_REPORT_2 { - - publishDir "results/${params.out_dir}/multiQC_input/contamination/", mode: "copy", pattern: "*" - - label 'small' - - input: - path(report) - - output: - path("*") - - script: - """ - - echo "Sample ID\tMapped to Target\tMapped to CHM13\tMapped to Contaminant\tUnmapped" >> "Percent_Contaminant_Reads_mqc.tsv" - cat $report >> "Percent_Contaminant_Reads_mqc.tsv" - - """ - -} - diff --git a/modules/make_fai.nf b/modules/make_fai.nf deleted file mode 100755 index c468972..0000000 --- a/modules/make_fai.nf +++ /dev/null @@ -1,17 +0,0 @@ -process MAKE_FAI { - - publishDir "results/${params.out_dir}/fai/", mode: "copy", overwrite: true - - label 'tiny' - - input: - path ref - - output: - path '*.fai' - - script: - """ - samtools faidx $ref - """ -} diff --git a/modules/make_index.nf b/modules/make_index.nf deleted file mode 100755 index 1897318..0000000 --- a/modules/make_index.nf +++ /dev/null @@ -1,72 +0,0 @@ -process MAKE_INDEX_cDNA { - - label 'large' - - input: - path(ref) - - output: - path("${ref}.mmi") - - - script: - """ - minimap2 -t 8 -ax splice -uf -d "${ref}.mmi" $ref - """ -} - -process MAKE_INDEX_dRNA { - - label 'large' - - input: - path(ref) - - output: - path("${ref}.mmi") - - - script: - """ - minimap2 -t 8 -k14 -ax splice -uf -d "${ref}.mmi" $ref - """ -} - - -process MAKE_INDEX_cDNA_CONTAMINATION_CHM13 { - - label 'large' - - output: - path("chm13v2.0.mmi") - - - script: - """ - - wget https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/analysis_set/chm13v2.0.fa.gz - - gzip -d chm13v2.0.fa.gz - - minimap2 -t 8 -ax splice -uf -d "chm13v2.0.mmi" "chm13v2.0.fa" - """ -} - -process MAKE_INDEX_dRNA_CONTAMINATION_CHM13 { - - label 'large' - - output: - path("chm13v2.0.mmi") - - - script: - """ - - wget https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/analysis_set/chm13v2.0.fa.gz - - gzip -d chm13v2.0.fa.gz - - minimap2 -t 8 -k14 -ax splice -uf -d "chm13v2.0.mmi" "chm13v2.0.fa" - """ -} diff --git a/modules/make_transcriptome.nf b/modules/make_transcriptome.nf deleted file mode 100755 index a6d37a1..0000000 --- a/modules/make_transcriptome.nf +++ /dev/null @@ -1,19 +0,0 @@ -process MAKE_TRANSCRIPTOME { - - publishDir "results/${params.out_dir}/transcriptome/", mode: "copy", overwrite: true - - label 'medium' - - input: - path genome - path genome_index - path annotation - - output: - path('transcriptome.fa') - - script: - """ - gffread -w transcriptome.fa -g $genome $annotation - """ -} diff --git a/modules/minimap2.nf b/modules/minimap2.nf deleted file mode 100755 index 679bc36..0000000 --- a/modules/minimap2.nf +++ /dev/null @@ -1,149 +0,0 @@ -process MINIMAP2_cDNA { - - publishDir "results/${params.out_dir}/mapping_cDNA/", pattern: "*.ba*", mode: "copy", overwrite: true - publishDir "results/${params.out_dir}/multiQC_input/minimap2/", pattern: "*.*stat", mode: "copy", overwrite: true - - label 'large' - - input: - val(id) - path(fastq) - file(index) - val(txt) - - output: - val("$id"), emit: id - path("$fastq"), emit: fastq - path("${id}.bam"), emit: bam - path("${id}.bam.bai"), emit: bai - path("${id}*stat"), emit: QC_out - val("$txt"), emit: txt - env(NUM_READS), emit: num_reads - - script: - """ - minimap2 -t 50 -ax splice \ - -uf \ - $index \ - $fastq > "${id}_all.bam" \ - - - samtools sort -@ -12 "${id}_all.bam" -o "${id}.bam" - samtools index "${id}.bam" - samtools flagstat "${id}.bam" > "${id}.flagstat" - samtools idxstats "${id}.bam" > "${id}.idxstat" - - NUM_READS=\$(samtools view -F 0x40 "${id}.bam" | cut -f1 | sort | uniq | wc -l) - - rm "${id}_all.bam" - """ - -} - -process MINIMAP2_dRNA { - - publishDir "results/${params.out_dir}/mapping_dRNA/", pattern: "*.ba*", mode: "copy", overwrite: true - publishDir "results/${params.out_dir}/multiQC_input/minimap2/", pattern: "*.*stat", mode: "copy", overwrite: true - - label 'large' - - input: - tuple val(id), path(fastq) - file(index) - val(txt) - - output: - val("$id"), emit: id - path("$fastq"), emit: fastq - path("${id}.bam"), emit: bam - path("${id}.bam.bai"), emit: bai - path("${id}*stat"), emit: QC_out - val("${txt}"), emit: txt - env(NUM_READS), emit: num_reads - - script: - """ - - minimap2 -t 50 -ax splice \ - -k14 -uf \ - $index \ - $fastq > "${id}_all.bam" \ - - - samtools sort -@ -12 "${id}_all.bam" -o "${id}.bam" - samtools index "${id}.bam" - samtools flagstat "${id}.bam" > "${id}.flagstat" - samtools idxstats "${id}.bam" > "${id}.idxstat" - - NUM_READS=\$(samtools view -F 0x40 "${id}.bam" | cut -f1 | sort | uniq | wc -l) - - rm "${id}_all.bam" - """ - -} - - -process FILTER_BAM { - - publishDir "results/${params.out_dir}/bam_filtering/", mode: "copy", pattern: "*.*stat" - - label 'medium_small' - - input: - val(id) - val(mapq) - path(bam) - path(bai) - - output: - val("$id"), emit: id - path("${id}_filtered_mapq_${mapq}.bam"), emit: bam - path("${id}_filtered_mapq_${mapq}.bam.bai"), emit: bai - path("*.*stat"), emit: QC - - script: - """ - - samtools view -b -q $mapq -F 2304 -@ 12 $bam > 'intermediate.bam' - samtools sort -@ 12 "intermediate.bam" -o '${id}_filtered_mapq_${mapq}.bam' - samtools index '${id}_filtered_mapq_${mapq}.bam' - samtools flagstat "${id}_filtered_mapq_${mapq}.bam" > "${id}_filtered_mapq_${mapq}.flagstat" - samtools idxstats "${id}_filtered_mapq_${mapq}.bam" > "${id}_filtered_mapq_${mapq}.idxstat" - - rm "intermediate.bam" - """ - -} - - - -process FILTER_BAM_ONLY { - - publishDir "results/${params.out_dir}/bam_filtering/", mode: "copy", pattern: "*.*stat" - - label 'medium_small' - - input: - tuple val(id), path(bam) - val(bai) - val(mapq) - - output: - val("$id"), emit: id - path("${id}_filtered_mapq_${mapq}.bam"), emit: bam - path("${id}_filtered_mapq_${mapq}.bam.bai"), emit: bai - path("*.*stat"), emit: QC - - script: - """ - - samtools view -b -q $mapq -F 2304 -@ 12 $bam > 'intermediate.bam' - samtools sort -@ 12 "intermediate.bam" -o '${id}_filtered_mapq_${mapq}.bam' - samtools index '${id}_filtered_mapq_${mapq}.bam' - samtools flagstat "${id}_filtered_mapq_${mapq}.bam" > "${id}_filtered_mapq_${mapq}.flagstat" - samtools idxstats "${id}_filtered_mapq_${mapq}.bam" > "${id}_filtered_mapq_${mapq}.idxstat" - - rm "intermediate.bam" - """ - -} diff --git a/modules/multiqc.nf b/modules/multiqc.nf deleted file mode 100755 index 0087c56..0000000 --- a/modules/multiqc.nf +++ /dev/null @@ -1,41 +0,0 @@ -process MULTIQC_GRCh38 { - - publishDir "results/${params.out_dir}/multiQC_output", mode: "copy", overwrite: true - - label 'tiny' - - input: - path(multiqc_input) - path(multiqc_config) - path(contamination) - path(reads_reports) - - output: - path "*" - - script: - """ - multiqc -c $multiqc_config -n multiQC_report.html . - """ -} - -process MULTIQC_CHM13 { - - publishDir "results/${params.out_dir}/multiqc_output", mode: "copy", overwrite: true - - label 'tiny' - - input: - path(multiqc_input) - path(multiqc_config) - path(contamination) - path(reads_reports) - - output: - path "*" - - script: - """ - multiqc -c $multiqc_config -n multiQC_report.html . - """ -} diff --git a/modules/num_reads_report.nf b/modules/num_reads_report.nf deleted file mode 100755 index 83150d5..0000000 --- a/modules/num_reads_report.nf +++ /dev/null @@ -1,95 +0,0 @@ -process MAKE_QC_REPORT { - - publishDir "results/${params.out_dir}/intermediate_qc_reports/number_of_reads/", pattern: "*num_reads.tsv", mode: "copy", overwrite: true - publishDir "results/${params.out_dir}/intermediate_qc_reports/read_length/", pattern: "*length.tsv", mode: "copy", overwrite: true - publishDir "results/${params.out_dir}/intermediate_qc_reports/quality_score_thresholds/", pattern: "*thresholds.tsv", mode: "copy", overwrite: true - - label 'small' - - input: - tuple val(id), val(num_trimmed_fastq), val(mapq), path(json), path(flagstat) - val(qscore_thresh) - - output: - path("${id}_num_reads.tsv"), emit: num_reads - path("${id}_read_length.tsv"), emit: read_length - path("${id}_quality_thresholds.tsv"), emit: qscore_thresh - - script: - """ - echo "hi" - - reads_number_fastq_all=\$(jq '.["All Reads"].basecall.reads_number' "${json}") - - echo "hi" - - reads_number_aligned=\$(jq '.["All Reads"].alignment.reads_number' "${json}") - - echo "hi" - - reads_number_aligned_filtered=\$(grep "primary mapped" "${flagstat}" | awk '{print \$1}') - - echo "hi" - - N50_fastq=\$(jq '.["All Reads"].basecall.N50' "${json}") - - echo "hi" - - median_read_length_fastq=\$(jq '.["All Reads"].basecall.len_percentiles[50]' "${json}") - - echo "hi" - - N50_alignment=\$(jq '.["All Reads"].alignment.N50' "${json}") - - echo "hi" - - median_read_length_alignment=\$(jq '.["All Reads"].alignment.len_percentiles[50]' "${json}") - - echo "hi" - - echo "${id}\t\${reads_number_fastq_all}\t${num_trimmed_fastq}\t\${reads_number_aligned}\t\${reads_number_aligned_filtered}" > "${id}_num_reads.tsv" - - echo "hi" - - echo "${id}\t\${N50_fastq}\t\${median_read_length_fastq}\t\${N50_alignment}\t\${median_read_length_alignment}" > "${id}_read_length.tsv" - - echo "hi" - - echo "${id}\t${qscore_thresh}\t${mapq}" > "${id}_quality_thresholds.tsv" - - echo "hi" - """ - -} - -process MERGE_QC_REPORT { - - publishDir "results/${params.out_dir}/multiQC_input/reads_report/", pattern: "*", mode: "copy", overwrite: true - - label 'small' - - input: - path(num_reads) - path(read_length) - path(qscore_thresh) - - output: - path("*") - - script: - """ - - echo "Sample_ID\tAll_Reads\tFiltered_Reads\tAligned_Reads\tFiltered_Aligned_Reads\t" >> "Number_of_Reads_mqc.tsv" - cat $num_reads >> "Number_of_Reads_mqc.tsv" - - - echo "Sample_ID\tN50_FASTQ\tMedian_Read_Length_FASTQ\tN50_BAM\tMedian_Read_Length_BAM\t" >> "Read_Length_mqc.tsv" - cat $read_length >> "Read_Length_mqc.tsv" - - - echo "Sample_ID\tRead_Mean_Base_Quality_Score_Threshold_(PHRED)\tMapping_Quality_Threshold_(MAPQ)" >> "Quality_Thresholds_mqc.tsv" - cat $qscore_thresh >> "Quality_Thresholds_mqc.tsv" - - """ - -} diff --git a/modules/pychopper.nf b/modules/pychopper.nf deleted file mode 100755 index 3748f80..0000000 --- a/modules/pychopper.nf +++ /dev/null @@ -1,69 +0,0 @@ -process PYCHOPPER { - - publishDir "results/${params.out_dir}/multiQC_input/pychopper/", mode: 'copy', overwrite: true, pattern: "*pychopper.stats" - - label "large" - - input: - tuple val(id), path(fastq) - path(txt) - val(cdna_kit) - val(quality_score) - - output: - val "$id", emit: id - path "${id}_pychop.fq", emit: fastq - path "${id}.txt", emit: txt - path "$fastq", emit: original_fastq - path "*pychopper.stats", emit: multiQC - - script: - - """ - if [[ "${txt}" != "None" ]] && [[ "${txt}" != "${id}.txt" ]]; then - cp "${txt}" "./${id}.txt" - else - touch "./${id}.txt" - fi - - ## Pychopper does not have PCS114 primers yes, need to create them ## - if [[ "${cdna_kit}" == "PCS114" ]]; then - - ## Create primer config file ## - echo "+:MySSP,-MyVNP|-:MyVNP,-MySSP" > primer_config.txt - - ## Create custom primers for PCS114 ## - echo ">MyVNP" > custom_pimers.fas - echo "ACTTGCCTGTCGCTCTATCTTCAGAGGAGAGTCCGCCGCCCGCAAGTTTT" >> custom_pimers.fas - echo ">MySSP" >> custom_pimers.fas - echo "TTTCTGTTGGTGCTGATATTGCTTT" >> custom_pimers.fas - - ## Run pychopper with the custom primers and primer config ## - pychopper -m edlib -b custom_pimers.fas -c primer_config.txt \ - -t 50 \ - -Q $quality_score \ - -r "${id}_pychopper_report.pdf" \ - -u "${id}_pychopper.unclassified.fq" \ - -w "${id}_pychopper.rescued.fq" \ - -S "${id}_pychopper.stats" \ - -A "${id}_pychopper.scores" \ - "${fastq}" "${id}_pychop.fq" - - ## All other kits just use default settings ## - else - - pychopper -m edlib \ - -t 50 \ - -Q $quality_score \ - -k $cdna_kit \ - -r "${id}_pychopper_report.pdf" \ - -u "${id}_pychopper.unclassified.fq" \ - -w "${id}_pychopper.rescued.fq" \ - -S "${id}_pychopper.stats" \ - -A "${id}_pychopper.scores" \ - "${fastq}" "${id}_pychop.fq" - - fi - - """ -} diff --git a/modules/pycoqc.nf b/modules/pycoqc.nf index c4e674d..d88bcf5 100755 --- a/modules/pycoqc.nf +++ b/modules/pycoqc.nf @@ -2,68 +2,21 @@ process PYCOQC { publishDir "results/${params.out_dir}/multiQC_input/pycoqc/", mode: 'copy', overwrite: true, pattern: "*pycoqc*" - label 'huge' + label 'cpu' input: val(id) - path(fastq) path(seq_summary) path(total_bam) path(total_bai) val(quality_score) - val(mapq) - path(stats) output: - path "*pycoqc*", emit: multiQC - tuple val("${id}"), env(num_reads_trimmed), val("${mapq}"), path("${id}_pycoqc.json"), path("${id}_mapq_${mapq}_new.flagstat"), emit: num_reads_report + path "*" script: """ - num_reads_trimmed=\$(fix_sequencing_summary_pychopper.py $fastq $seq_summary "${id}_sequencing_summary_pyco.txt") - - cp *.flagstat "${id}_mapq_${mapq}_new.flagstat" - - pycoQC -f "${id}_sequencing_summary_pyco.txt" \ - -v \ - -a $total_bam \ - --min_pass_qual $quality_score \ - -o "./${id}_pycoqc.html" \ - -j "./${id}_pycoqc.json" - """ -} - -process PYCOQC_dRNA { - - - publishDir "results/${params.out_dir}/multiQC_input/pycoqc/", mode: 'copy', overwrite: true, pattern: "*pycoqc*" - - label 'huge' - - input: - val(id) - path(fastq) - path(seq_summary) - path(total_bam) - path(total_bai) - val(quality_score) - val(mapq) - path(stats) - - output: - path "*pycoqc*", emit: multiQC - tuple val("${id}"), env(num_reads_trimmed), val("${mapq}"), path("${id}_pycoqc.json"), path("${id}_mapq_${mapq}_new.flagstat"), emit: num_reads_report - - - script: - """ - - num_reads_trimmed=\$(fix_sequencing_summary_porechop.py $fastq $seq_summary "${id}_sequencing_summary_pyco.txt") - - cp *.flagstat "${id}_mapq_${mapq}_new.flagstat" - - - pycoQC -f "${id}_sequencing_summary_pyco.txt" \ + pycoQC -f "${seq_summary}" \ -v \ -a $total_bam \ --min_pass_qual $quality_score \ diff --git a/modules/rseqc.nf b/modules/rseqc.nf deleted file mode 100755 index 1c93bdd..0000000 --- a/modules/rseqc.nf +++ /dev/null @@ -1,21 +0,0 @@ -process RSEQC { - - publishDir "results/${params.out_dir}/multiQC_input/RseQC/", mode: "copy", overwrite: true, pattern: "*geneBody*" - - - label "medium_large" - - input: - val(id) - path(bam) - path(bai) - path(housekeeping) - - output: - path "*geneBody*", emit: multiQC - - script: - """ - geneBody_coverage.py -i $bam -r $housekeeping -o "${id}_geneBody_coverage" - """ -} diff --git a/modules/trim_dRNA.nf b/modules/trim_dRNA.nf deleted file mode 100755 index 3e95a42..0000000 --- a/modules/trim_dRNA.nf +++ /dev/null @@ -1,22 +0,0 @@ -process TRIM_dRNA { - - publishDir "results/${params.out_dir}/dRNA_adapter_trimming_stats/", mode: 'copy', overwrite: true, pattern: "*.txt" - - label "medium" - - input: - tuple val(id), file(fastq) - val(txt) - - output: - tuple val("$id"), path("${id}.trimmed.fastq"), emit: fastq - val("${txt}"), emit: txt - path("*adapter_data*.txt"), emit: outty - - script: - """ - ## Trim adapters and primers - porechop_abi -abi -i "${fastq}" -o "${id}.trimmed.fastq" > "${id}_adapter_data.txt" - - """ -} diff --git a/modules/unzip_and_concatenate.sh b/modules/unzip_and_concatenate.sh deleted file mode 100755 index 431f824..0000000 --- a/modules/unzip_and_concatenate.sh +++ /dev/null @@ -1,44 +0,0 @@ -process UNZIP_AND_CONCATENATE { - - publishDir "results/${params.out_dir}/concatenated_fastq_and_sequencing_summary_files/", mode: 'copy', overwrite: true, pattern: "*.fastq" - - label "medium" - - input: - tuple val(id), file(reads) - - output: - tuple val("$id"), path("${id}.fastq") - - script: - """ - - find -L . -maxdepth 1 -name "*.fastq.gz" | parallel -j 16 'gunzip --keep --force {}' - - find . -type f -maxdepth 1 -name "*.fastq" ! -name "${id}.fastq" -exec cat {} \\; >> "${id}.fastq" - - find . -maxdepth 1 -type f -name "*.fastq" ! -name "${id}.fastq" -exec rm {} \\; - - """ -} - -process FIX_SEQUENCING_SUMMARY_NAME { - - publishDir "results/${params.out_dir}/concatenated_fastq_and_sequencing_summary_files/", mode: 'copy', overwrite: true, pattern: "*.txt" - - label "small" - - input: - tuple val(id), file(txt) - - output: - path "${id}.txt" - - script: - """ - - mv $txt "${id}.txt" - - """ - -} diff --git a/singularity_containers/2023-12-21_ont_methylation.def b/singularity_containers/2023-12-21_ont_methylation.def old mode 100644 new mode 100755 diff --git a/singularity_containers/README.md b/singularity_containers/README.md old mode 100644 new mode 100755 diff --git a/sub_workflows/nanopore_workflow_STEP_1.nf b/sub_workflows/BASECALLING.nf similarity index 80% rename from sub_workflows/nanopore_workflow_STEP_1.nf rename to sub_workflows/BASECALLING.nf index 687bb83..b283fee 100755 --- a/sub_workflows/nanopore_workflow_STEP_1.nf +++ b/sub_workflows/BASECALLING.nf @@ -2,7 +2,7 @@ include {FAST5_to_POD5; BASECALL_CPU; BASECALL_CPU_DEMUX; BASECALL_GPU; BASECALL_GPU_DEMUX} from '../modules/basecall.nf' -workflow NANOPORE_STEP_1 { +workflow BASECALLING { take: pod5_path @@ -13,6 +13,8 @@ workflow NANOPORE_STEP_1 { trim quality_score trim_barcode + devices + ref main: @@ -25,11 +27,11 @@ workflow NANOPORE_STEP_1 { if (params.basecall_demux == true) { - BASECALL_CPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode) + BASECALL_CPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) } else { - BASECALL_CPU(pod5_path, speed, modifications, config, trim, quality_score) + BASECALL_CPU(pod5_path, speed, modifications, config, trim, quality_score, devices, ref) } @@ -37,11 +39,11 @@ workflow NANOPORE_STEP_1 { if (params.basecall_demux == true) { - BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode) + BASECALL_GPU_DEMUX(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) } else { - BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode) + BASECALL_GPU(pod5_path, speed, modifications, config, trim, quality_score, trim_barcode, devices, ref) } diff --git a/sub_workflows/nanopore_cDNA_workflow_STEP_2.nf b/sub_workflows/nanopore_cDNA_workflow_STEP_2.nf deleted file mode 100755 index f56de39..0000000 --- a/sub_workflows/nanopore_cDNA_workflow_STEP_2.nf +++ /dev/null @@ -1,86 +0,0 @@ -// Import Modules -include {MAKE_FAI} from '../modules/make_fai' -include {MAKE_INDEX_cDNA ; MAKE_INDEX_cDNA_CONTAMINATION_CHM13} from '../modules/make_index' -include {MAKE_INDEX_cDNA as MAKE_INDEX_CONTAMINANTS} from '../modules/make_index' -include {CHM13_GTF; CHM13_GTF_ERCC} from '../modules/chm13_gff3_to_gtf' -include {PYCHOPPER} from '../modules/pychopper' -include {PYCOQC} from '../modules/pycoqc' -include {MINIMAP2_cDNA; FILTER_BAM} from '../modules/minimap2' -include {RSEQC} from '../modules/rseqc' -include {BAMBU_PREP} from '../modules/bambu' -include {MAP_CONTAMINATION_cDNA} from '../modules/contamination' -include {MAKE_CONTAMINATION_REPORT_1} from '../modules/make_contamination_report.nf' -include {MAKE_QC_REPORT} from '../modules/num_reads_report.nf' - - -workflow NANOPORE_cDNA_STEP_2 { - - take: - ref - annotation - housekeeping - ont_reads_txt - ont_reads_fq - ercc - cdna_kit - track_reads - mapq - contamination_ref - quality_score - - main: - MAKE_FAI(ref) - MAKE_INDEX_cDNA(ref) - PYCHOPPER(ont_reads_fq, ont_reads_txt, cdna_kit, quality_score) - MINIMAP2_cDNA(PYCHOPPER.out.id, PYCHOPPER.out.fastq, MAKE_INDEX_cDNA.out, PYCHOPPER.out.txt) - FILTER_BAM(MINIMAP2_cDNA.out.id, mapq, MINIMAP2_cDNA.out.bam, MINIMAP2_cDNA.out.bai) - - if (params.contamination_ref != "None") { - - MAKE_INDEX_CONTAMINANTS(contamination_ref) - - MAKE_INDEX_cDNA_CONTAMINATION_CHM13() - - BAM_AND_INDEX = MINIMAP2_cDNA.out.bam.combine(MAKE_INDEX_CONTAMINANTS.out).combine(MAKE_INDEX_cDNA_CONTAMINATION_CHM13.out) - - MAP_CONTAMINATION_cDNA(MINIMAP2_cDNA.out.id, BAM_AND_INDEX, MINIMAP2_cDNA.out.bai, MINIMAP2_cDNA.out.num_reads) - - MAKE_CONTAMINATION_REPORT_1(MAP_CONTAMINATION_cDNA.out.id, MAP_CONTAMINATION_cDNA.out.num_reads, MAP_CONTAMINATION_cDNA.out.num_unmapped_reads_before_chm13, - MAP_CONTAMINATION_cDNA.out.num_unmapped_reads_after_chm13, MAP_CONTAMINATION_cDNA.out.num_contaminant_reads) - - } - - - if ((params.ont_reads_txt != "None") || (params.path != "None")) { - - PYCOQC(MINIMAP2_cDNA.out.id, MINIMAP2_cDNA.out.fastq, MINIMAP2_cDNA.out.txt, MINIMAP2_cDNA.out.bam, MINIMAP2_cDNA.out.bai, quality_score, - mapq, FILTER_BAM.out.QC) - - MAKE_QC_REPORT(PYCOQC.out.num_reads_report, quality_score) - - } - - if (params.is_chm13 == true) - { - if (params.ercc == "None") - { - CHM13_GTF(annotation) - annotation = CHM13_GTF.out.collect() - } - - else - { - CHM13_GTF_ERCC(annotation, ercc) - annotation = CHM13_GTF_ERCC.out.collect() - } - } - - else - { - RSEQC(FILTER_BAM.out.id, FILTER_BAM.out.bam, FILTER_BAM.out.bai, housekeeping) - } - - - BAMBU_PREP(FILTER_BAM.out.id, mapq, FILTER_BAM.out.bam, FILTER_BAM.out.bai, ref, annotation, MAKE_FAI.out, track_reads) - -} diff --git a/sub_workflows/nanopore_dRNA_workflow_STEP_2.nf b/sub_workflows/nanopore_dRNA_workflow_STEP_2.nf deleted file mode 100755 index ab15286..0000000 --- a/sub_workflows/nanopore_dRNA_workflow_STEP_2.nf +++ /dev/null @@ -1,98 +0,0 @@ -// Import Modules -include {MAKE_FAI} from '../modules/make_fai' -include {MAKE_INDEX_dRNA ; MAKE_INDEX_dRNA_CONTAMINATION_CHM13} from '../modules/make_index' -include {MAKE_INDEX_dRNA as MAKE_INDEX_CONTAMINANTS} from '../modules/make_index' -include {CHM13_GTF; CHM13_GTF_ERCC} from '../modules/chm13_gff3_to_gtf' -include {PYCOQC_dRNA} from '../modules/pycoqc' -include {MINIMAP2_dRNA; FILTER_BAM} from '../modules/minimap2' -include {RSEQC} from '../modules/rseqc' -include {BAMBU_PREP} from '../modules/bambu' -include {MAP_CONTAMINATION_dRNA} from '../modules/contamination' -include {MAKE_CONTAMINATION_REPORT_1} from '../modules/make_contamination_report.nf' -include {TRIM_dRNA} from '../modules/trim_dRNA.nf' -include {CONVERT_U_TO_T} from '../modules/convert_U_to_T.nf' -include {MAKE_QC_REPORT} from '../modules/num_reads_report.nf' - -workflow NANOPORE_dRNA_STEP_2 { - - take: - ref - annotation - housekeeping - ont_reads_txt - ont_reads_fq - ercc - cdna_kit - track_reads - mapq - contamination_ref - quality_score - - main: - MAKE_FAI(ref) - MAKE_INDEX_dRNA(ref) - CONVERT_U_TO_T(ont_reads_fq, ont_reads_txt, quality_score) - - - if (params.trim_dRNA == true) { - - TRIM_dRNA(CONVERT_U_TO_T.out.fastq, CONVERT_U_TO_T.out.txt) - MINIMAP2_dRNA(TRIM_dRNA.out.fastq, MAKE_INDEX_dRNA.out, TRIM_dRNA.out.txt) - - } else { - - MINIMAP2_dRNA(CONVERT_U_TO_T.out.fastq, MAKE_INDEX_dRNA.out, CONVERT_U_TO_T.out.txt) - - } - - - FILTER_BAM(MINIMAP2_dRNA.out.id, mapq, MINIMAP2_dRNA.out.bam, MINIMAP2_dRNA.out.bai) - - - if (params.contamination_ref != "None") { - - MAKE_INDEX_CONTAMINANTS(contamination_ref) - - MAKE_INDEX_dRNA_CONTAMINATION_CHM13() - - BAM_AND_INDEX = MINIMAP2_dRNA.out.bam.combine(MAKE_INDEX_CONTAMINANTS.out).combine(MAKE_INDEX_dRNA_CONTAMINATION_CHM13.out) - - MAP_CONTAMINATION_dRNA(MINIMAP2_dRNA.out.id, BAM_AND_INDEX, MINIMAP2_dRNA.out.bai, MINIMAP2_dRNA.out.num_reads) - - MAKE_CONTAMINATION_REPORT_1(MAP_CONTAMINATION_dRNA.out.id, MAP_CONTAMINATION_dRNA.out.num_reads, MAP_CONTAMINATION_dRNA.out.num_unmapped_reads_before_chm13, - MAP_CONTAMINATION_dRNA.out.num_unmapped_reads_after_chm13, MAP_CONTAMINATION_dRNA.out.num_contaminant_reads) - - } - - if ((params.ont_reads_txt != "None") || (params.path != "None")) { - - PYCOQC_dRNA(MINIMAP2_dRNA.out.id, MINIMAP2_dRNA.out.fastq, MINIMAP2_dRNA.out.txt, MINIMAP2_dRNA.out.bam, MINIMAP2_dRNA.out.bai, - quality_score, mapq, FILTER_BAM.out.QC) - - MAKE_QC_REPORT(PYCOQC_dRNA.out.num_reads_report, quality_score) - - } - - if (params.is_chm13 == true) - { - if (params.ercc == "None") - { - CHM13_GTF(annotation) - annotation = CHM13_GTF.out.collect() - } - - else - { - CHM13_GTF_ERCC(annotation, ercc) - annotation = CHM13_GTF_ERCC.out.collect() - } - } - - else - { - RSEQC(FILTER_BAM.out.id, FILTER_BAM.out.bam, FILTER_BAM.out.bai, housekeeping) - } - - BAMBU_PREP(FILTER_BAM.out.id, mapq, FILTER_BAM.out.bam, FILTER_BAM.out.bai, ref, annotation, MAKE_FAI.out, track_reads) - -} diff --git a/sub_workflows/nanopore_unzip_and_concatenate.nf b/sub_workflows/nanopore_unzip_and_concatenate.nf deleted file mode 100755 index b0b6ed3..0000000 --- a/sub_workflows/nanopore_unzip_and_concatenate.nf +++ /dev/null @@ -1,21 +0,0 @@ -// Import Modules -include {UNZIP_AND_CONCATENATE; FIX_SEQUENCING_SUMMARY_NAME} from '../modules/unzip_and_concatenate.sh' - -workflow NANOPORE_UNZIP_AND_CONCATENATE { - - take: - fastq_path - txt_path - main: - - UNZIP_AND_CONCATENATE(fastq_path) - FIX_SEQUENCING_SUMMARY_NAME(txt_path) - - fastq_final = UNZIP_AND_CONCATENATE.out.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) - txt_final = FIX_SEQUENCING_SUMMARY_NAME.out.toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() - - - emit: - fastq_final - txt_final -} diff --git a/sub_workflows/nanopore_workflow_STEP_2_BAM.nf b/sub_workflows/nanopore_workflow_STEP_2_BAM.nf deleted file mode 100755 index beebf92..0000000 --- a/sub_workflows/nanopore_workflow_STEP_2_BAM.nf +++ /dev/null @@ -1,41 +0,0 @@ -// Import Modules -include {MAKE_FAI} from '../modules/make_fai.nf' -include {CHM13_GTF; CHM13_GTF_ERCC} from '../modules/chm13_gff3_to_gtf' -include {FILTER_BAM_ONLY} from '../modules/minimap2' -include {BAMBU_PREP} from '../modules/bambu' - -workflow NANOPORE_STEP_2_BAM { - - take: - ref - annotation - bam - bai - ercc - track_reads - mapq - - main: - - MAKE_FAI(ref) - - FILTER_BAM_ONLY(bam, bai, mapq) - - if (params.is_chm13 == true) - { - if (params.ercc == "None") - { - CHM13_GTF(annotation) - annotation = CHM13_GTF.out.collect() - } - - else - { - CHM13_GTF_ERCC(annotation, ercc) - annotation = CHM13_GTF_ERCC.out.collect() - } - } - - BAMBU_PREP(FILTER_BAM_ONLY.out.id, mapq, FILTER_BAM_ONLY.out.bam, FILTER_BAM_ONLY.out.bai, ref, annotation, MAKE_FAI.out, track_reads) - -} diff --git a/sub_workflows/nanopore_workflow_STEP_3.nf b/sub_workflows/nanopore_workflow_STEP_3.nf deleted file mode 100755 index d439c93..0000000 --- a/sub_workflows/nanopore_workflow_STEP_3.nf +++ /dev/null @@ -1,61 +0,0 @@ -// Import Modules -include {BAMBU_DISCOVERY; BAMBU_QUANT} from '../modules/bambu' -include {GFFCOMPARE} from '../modules/gffcompare' -include {MAKE_TRANSCRIPTOME} from '../modules/make_transcriptome' -include {MULTIQC_GRCh38 ; MULTIQC_CHM13} from '../modules/multiqc' -include {MAKE_CONTAMINATION_REPORT_2} from '../modules/make_contamination_report.nf' -include {MERGE_QC_REPORT} from '../modules/num_reads_report.nf' - -workflow NANOPORE_STEP_3 { - - take: - ref - fai - annotation - NDR - track_reads - bambu_rds - multiqc_input - multiqc_config - contamination - num_reads - read_length - quality_thresholds - - main: - - - MAKE_CONTAMINATION_REPORT_2(contamination.collect()) - MERGE_QC_REPORT(num_reads.collect(), read_length.collect(), quality_thresholds.collect()) - - - if (params.is_chm13 == true) - { - MULTIQC_CHM13(multiqc_input.collect(), multiqc_config, MAKE_CONTAMINATION_REPORT_2.out, MERGE_QC_REPORT.out) - } - - else - { - MULTIQC_GRCh38(multiqc_input.collect(), multiqc_config, MAKE_CONTAMINATION_REPORT_2.out, MERGE_QC_REPORT.out) - } - - if (params.is_discovery == true) - { - - BAMBU_DISCOVERY(bambu_rds.collect(), ref, annotation, fai, NDR, track_reads) - new_annotation = BAMBU_DISCOVERY.out.gtf - GFFCOMPARE(new_annotation, annotation) - - } - - else - { - - BAMBU_QUANT(bambu_rds.collect(), ref, annotation, fai) - new_annotation = BAMBU_QUANT.out.gtf - - } - - MAKE_TRANSCRIPTOME(ref, fai, new_annotation) - -} diff --git a/workflow/bin/bambu_discovery.R b/workflow/bin/bambu_discovery.R deleted file mode 100755 index e6f453e..0000000 --- a/workflow/bin/bambu_discovery.R +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/Rscript - -library("bambu") - -args <- commandArgs(trailingOnly = TRUE) - -rc_files <- unlist(strsplit(args[1], ",")) -fa_file <- args[2] -gtf_file <- args[3] -NDR_input <- args[4] -track_reads_input <- args[5] == "true" - -bambuAnnotations <- prepareAnnotations(gtf_file) - - -if (NDR_input == "auto") { - se_novel <- bambu(reads=rc_files, annotations=bambuAnnotations, genome=fa_file, - lowMemory=FALSE, ncore=12, discovery=TRUE, quant=TRUE, trackReads=track_reads_input) -} else { - - NDR_input <- as.double(NDR_input) - - se_novel <- bambu(reads=rc_files, annotations=bambuAnnotations, genome=fa_file, - lowMemory=FALSE, ncore=12, NDR=NDR_input, discovery=TRUE, quant=TRUE, trackReads=track_reads_input) -} - -writeBambuOutput(se_novel, path = "./bambu_discovery/") - -saveRDS(se_novel, file="./bambu_discovery/final_discovery.RDS") diff --git a/workflow/bin/bambu_prep.R b/workflow/bin/bambu_prep.R deleted file mode 100755 index 8991811..0000000 --- a/workflow/bin/bambu_prep.R +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/Rscript - -library("bambu") - -args <- commandArgs(trailingOnly = TRUE) - -bam <- args[1] -fa_file <- args[2] -gtf_file <- args[3] -track_reads_input <- args[4] == "true" - -bambuAnnotations <- prepareAnnotations(gtf_file) - -se <- bambu(reads = bam, annotations = bambuAnnotations, genome = fa_file, rcOutDir = "./bambu_prep/", trackReads=track_reads_input, - ncore=12, lowMemory=FALSE, quant=FALSE, discovery=FALSE, yieldSize = 400000, verbose=TRUE) - diff --git a/workflow/bin/bambu_quant.R b/workflow/bin/bambu_quant.R deleted file mode 100755 index b9c54d7..0000000 --- a/workflow/bin/bambu_quant.R +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/Rscript - -library("bambu") - -args <- commandArgs(trailingOnly = TRUE) - -rc_files <- unlist(strsplit(args[1], ",")) -fa_file <- args[2] -gtf_file <- args[3] - -bambuAnnotations <- prepareAnnotations(gtf_file) - -se_quant <- bambu(reads=rc_files, annotations=bambuAnnotations, genome=fa_file, - ncore=12, lowMemory=TRUE, discovery=FALSE, quant=TRUE) - -writeBambuOutput(se_quant, path = "./bambu_quant/") - -saveRDS(se_quant, file="./bambu_quant/final_quant.RDS") diff --git a/workflow/bin/convert_U_to_T.py b/workflow/bin/convert_U_to_T.py deleted file mode 100755 index 82a5022..0000000 --- a/workflow/bin/convert_U_to_T.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 - -import sys - - -def convert_u_to_t_in_sequence(input_file, output_file): - """ - This function reads a FASTQ file, replaces all occurrences of 'U' with 'T' - in the sequence lines only, and writes the modified content to a new file. - """ - try: - with open(input_file, 'r') as infile, open(output_file, 'w') as outfile: - line_counter = 0 # To keep track of line numbers - - for line in infile: - line_counter += 1 - # In FASTQ format, the sequence line is every 2nd line out of sets of 4 lines - if line_counter % 4 == 2: - # Replace 'U' with 'T' in the sequence line - line = line.replace('U', 'T') - - outfile.write(line) - - # Reset the line counter after every set of 4 lines - if line_counter == 4: - line_counter = 0 - - return "Conversion successful. Output file created: " + output_file - except Exception as e: - return "An error occurred: " + str(e) - - -# Example usage -input_fastq = sys.argv[1] # First input from the command line, the input fastq file -output_fastq = sys.argv[2] # Second input from the command line, the name of the output fastq file - -# Call the function and print the result -convert_u_to_t_in_sequence(input_fastq, output_fastq) diff --git a/workflow/bin/filter_by_mean_base_quality.py b/workflow/bin/filter_by_mean_base_quality.py deleted file mode 100755 index c3bc323..0000000 --- a/workflow/bin/filter_by_mean_base_quality.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 - -import sys - -def calculate_mean_quality(quality_string): - return sum(ord(char) - 33 for char in quality_string) / len(quality_string) - -def filter_fastq(input_file, threshold, output_file): - with open(input_file, 'r') as infile, open(output_file, 'w') as outfile: - while True: - header = infile.readline().strip() - if not header: - break - sequence = infile.readline().strip() - plus = infile.readline().strip() - quality = infile.readline().strip() - - if calculate_mean_quality(quality) >= threshold: - outfile.write(f"{header}\n{sequence}\n{plus}\n{quality}\n") - -if __name__ == "__main__": - if len(sys.argv) != 4: - print("Usage: python filter_fastq.py ") - sys.exit(1) - - input_fastq = sys.argv[1] - quality_threshold = float(sys.argv[2]) - output_fastq = sys.argv[3] - - filter_fastq(input_fastq, quality_threshold, output_fastq) - print(f"Filtered FASTQ file saved to {output_fastq}") diff --git a/workflow/bin/fix_sequencing_summary_porechop.py b/workflow/bin/fix_sequencing_summary_porechop.py deleted file mode 100755 index 5aaf9b7..0000000 --- a/workflow/bin/fix_sequencing_summary_porechop.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -## Import libraries -import numpy as np -import pandas as pd -import sys - -## Load filenames from command lines -fastq_filename = sys.argv[1] -txt_filename = sys.argv[2] -output_name = sys.argv[3] - -## Read fastq file line by line -fastq = open(fastq_filename, 'r') -Lines = fastq.readlines() - -read_ids_list = [] -read_length_list = [] - -# Strips the newline character -for line in Lines: - - ## Get line content - line_content = line.strip() - - ## Get read_ids from fastq_file - if ((line_content[0] == "@") & (line_content.find("runid=") != -1)): - - ## Get original and pychopper read ids - read_id = line_content.split('@')[1].split(' ')[0] - - ## Append ids to their respective lists - read_ids_list.append(read_id) - is_read_id = True - - - ## If this is a sequence line - elif ((line_content[0] in ["C", "T", "G", "A"]) & (line_content.isalpha()) & (is_read_id)): - - ## Get new read size after pychopper - length = len(line_content) - read_length_list.append(length) - is_read_id = False - - -## Read txt sequencing sammary file in -df_txt = pd.read_csv(txt_filename, delimiter='\t') - -## Remove useless columns -df_txt = df_txt[["read_id", "run_id", "channel", "start_time", "mean_qscore_template", "sequence_length_template"]].copy() - -## Remove points where the txt files were joined (Headers in the middle of the data) -df_txt = df_txt.loc[df_txt["channel"]!="channel"].copy() -df_txt[["channel", "start_time", "mean_qscore_template", "sequence_length_template"]] = df_txt[["channel", "start_time", - "mean_qscore_template", "sequence_length_template"]].apply(pd.to_numeric, errors="ignore").copy() -df_txt.dropna(inplace=True, axis=0) - -## Create dataframe with pychopper info -df_read_converter = pd.DataFrame() -df_read_converter["read_id"] = np.asarray(read_ids_list) -df_read_converter["sequence_length_template_porechop"] = np.asarray(read_length_list).astype('int32') - - -## Merge with original txt and substitute old columns -df_txt_final = df_txt.merge(df_read_converter, on="read_id", how="outer") -df_txt_final["sequence_length_template_porechop"].fillna(df_txt_final["sequence_length_template"], inplace=True) -df_txt_final.drop(columns="sequence_length_template", inplace=True) -df_txt_final.rename(columns={'sequence_length_template_porechop': 'sequence_length_template'}, inplace=True) -df_txt_final.drop_duplicates(inplace=True) - -## Save new filtered sequencing summary file as a txt file -df_txt_final.to_csv(output_name, index=False, sep="\t") - -## Print number of reads to stdout so it can be stored in a variable -print(len(read_ids_list)) diff --git a/workflow/bin/fix_sequencing_summary_pychopper.py b/workflow/bin/fix_sequencing_summary_pychopper.py deleted file mode 100755 index 63aa748..0000000 --- a/workflow/bin/fix_sequencing_summary_pychopper.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 - -## Import libraries -import numpy as np -import pandas as pd -import sys - -## Load filenames from command lines -fastq_filename = sys.argv[1] -txt_filename = sys.argv[2] -output_name = sys.argv[3] - -## Read fastq file line by line -fastq = open(fastq_filename, 'r') -Lines = fastq.readlines() - -original_read_ids_list = [] -pychopper_read_ids_list = [] -read_length_list = [] - -# Strips the newline character -for line in Lines: - - ## Get line content - line_content = line.strip() - - ## Get read_ids from fastq_file - if ((line_content[0] == "@") & (line_content.find("runid=") != -1)): - - ## Get original and pychopper read ids - pychopper_read_id = line_content.split('@')[1].split(' ')[0] - original_read_id = line_content.split("|")[1].split(" ")[0] - - ## Append ids to their respective lists - original_read_ids_list.append(original_read_id) - pychopper_read_ids_list.append(pychopper_read_id) - is_read_id = True - - - ## If this is a sequence line - elif ((line_content[0] in ["C", "T", "G", "A"]) & (line_content.isalpha()) & (is_read_id)): - - ## Get new read size after pychopper - length = len(line_content) - read_length_list.append(length) - is_read_id = False - - -## Read txt sequencing sammary file in -df_txt = pd.read_csv(txt_filename, delimiter='\t') - -## Remove useless columns -df_txt = df_txt[["read_id", "run_id", "channel", "start_time", "sequence_length_template", "mean_qscore_template"]].copy() - -## Remove points where the txt files were joined (Headers in the middle of the data) -df_txt = df_txt.loc[df_txt["channel"]!="channel"].copy() -df_txt[["channel", "start_time", "sequence_length_template", - "mean_qscore_template"]] = df_txt[["channel", "start_time", "sequence_length_template", - "mean_qscore_template"]].apply(pd.to_numeric, errors="ignore").copy() -df_txt.dropna(inplace=True, axis=0) - -## Create dataframe with pychopper info -df_read_converter = pd.DataFrame() -df_read_converter["read_id"] = np.asarray(original_read_ids_list) -df_read_converter["pychopper_read_id"] = np.asarray(pychopper_read_ids_list) -df_read_converter["pychopper_sequence_length_template"] = np.asarray(read_length_list).astype('int32') - - -## Merge with original txt and substitute old columns -df_txt_final = df_txt.merge(df_read_converter, on="read_id", how="outer") -df_txt_final["pychopper_read_id"].fillna(df_txt_final["read_id"], inplace=True) -df_txt_final["pychopper_sequence_length_template"].fillna(df_txt_final["sequence_length_template"], inplace=True) -df_txt_final["read_id"] = df_txt_final["pychopper_read_id"].copy() -df_txt_final["sequence_length_template"] = df_txt_final["pychopper_sequence_length_template"].copy() -df_txt_final.drop(columns=["pychopper_read_id", "pychopper_sequence_length_template"], inplace=True) - - -## Save new filtered sequencing summary file as a txt file -df_txt_final.to_csv(output_name, index=False, sep="\t") - -## Print number of reads to stdout so it can be stored in a variable -print(len(pychopper_read_ids_list)) diff --git a/workflow/bin/gff_to_gtf.py b/workflow/bin/gff_to_gtf.py deleted file mode 100755 index 448b0d4..0000000 --- a/workflow/bin/gff_to_gtf.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python - -## Import libraries -import pandas as pd -import sys -import csv -import numpy as np - - -''' -function name: parse_df_columns - -purpose: parsing the last aggregate column of the gtf/gff3 into useful columns and cleaning non-relevant columns - -input: dataframe containining "raw" gtf/gff3 data - -output: dataframe containing gtf with useful columns -''' - -def parse_df_columns(df, is_ref=True): - - if is_ref: - - ## Get gene ids - df["gene_id"] = df["other"].str.split("source_gene=", expand=True)[1].str.split(';', expand=True)[0] - - ## Get transcript ids - df["transcript_id"] = df["other"].str.split("source_transcript=", expand=True)[1].str.split(';', expand=True)[0] - - ## Get CHM gene_ids - df["CHM_gene_id"] = df["other"].str.split("gene_id=", expand=True)[1].str.split(';', expand=True)[0] - - ## Get transcript ids - df["CHM_transcript_id"] = df["other"].str.split("transcript_id=", expand=True)[1].str.split(';', expand=True)[0] - - ## Only keep relevant - df.drop(columns="other", inplace=True) - - ## Drop duplicates - df.drop_duplicates(inplace=True) - - for col in df.columns: - df.loc[df[col].isnull(), col] = np.NaN - - return df - - -def main(): - - ## Define file names - gff_name = sys.argv[1] - output_name = sys.argv[2] - - ## Open gff reference file - gff = pd.read_csv(gff_name, delimiter="\t", header=1, - names = ["chr", "source", "type", "start", "end", "dot1", "strand", "dot2", "other"]) - ## Only keep transcripts - gff = gff.loc[gff["type"].isin(["transcript", "exon"])] - - ## Parse through "other" column to extract important information - gff = parse_df_columns(gff, is_ref=True) - - ## Change name of duplicate Ensembl IDs to CHM IDs - gff.loc[gff["transcript_id"] == "N/A", "transcript_id"] = gff["CHM_transcript_id"] - gff_transcripts = gff.loc[gff["type"] == "transcript"].copy() - gff_transcripts = gff_transcripts[["transcript_id", "CHM_transcript_id"]].drop_duplicates() - gff_transcripts = gff_transcripts[gff_transcripts['transcript_id'].duplicated() == True] - dup_trans = gff_transcripts["transcript_id"].dropna().values.tolist() - gff.loc[gff["transcript_id"].isin(dup_trans), "transcript_id"] = gff["transcript_id"] + "(" + gff["CHM_transcript_id"] + ")" - - ## Change name of duplicate gene ids to CHM ids - gff.loc[gff["gene_id"] == "None", "gene_id"] = gff["CHM_gene_id"] - gff_genes = gff.loc[gff["type"] == "transcript"].copy() - gff_genes = gff_genes[["gene_id", "CHM_gene_id"]].drop_duplicates() - gff_genes = gff_genes[gff_genes['gene_id'].duplicated() == True] - dup_genes = gff_genes["gene_id"].dropna().values.tolist() - gff.loc[gff["gene_id"].isin(dup_genes), "gene_id"] = gff["gene_id"] + "(" + gff["CHM_gene_id"] + ")" - - - ## Create other column with proper transcript ids and gene ids - gff["other"] = 'gene_id "' + gff["gene_id"] + '"; ' \ - + 'transcript_id "' + gff["transcript_id"] + '";' - - ## Drop columns that don't belong in gtf file - gff.drop(columns = ["gene_id", "transcript_id", "CHM_gene_id", "CHM_transcript_id"], inplace=True) - - ## Sort by index - gff.sort_index(inplace=True) - - ## Save file - gff.to_csv(output_name, index=False, header=False, sep="\t", quoting=csv.QUOTE_NONE) - -main() diff --git a/workflow/bin/multiqc_config.yaml b/workflow/bin/multiqc_config.yaml deleted file mode 100755 index 2a70ae9..0000000 --- a/workflow/bin/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -sp: - pycoqc: - fn: '*pycoqc*' - - -extra_fn_clean_exts: - - type: regex_keep - pattern: "^[^_]*_[^_]*_[^_]*_" - - - diff --git a/workflow/main.nf b/workflow/main.nf index d0cd3a5..e34d171 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -1,10 +1,8 @@ // Make this pipeline a nextflow 2 implementation nextflow.enable.dsl=2 -if (params.step == 1) { - log.info """ - OXFORD NANOPORE cDNA/dRNA SEQUENCING PIPELINE - STEP 1: BASECALLING - Bernardo Aguzzoli Heberle - EBBERT LAB - University of Kentucky + OXFORD NANOPORE DNA SEQUENCING BASECALLING - Bernardo Aguzzoli Heberle ====================================================================================================================================================================================== path containing samples and files to be basecalled (basecall only) : ${params.basecall_path} basecall speed (basecall only) : ${params.basecall_speed} @@ -15,147 +13,38 @@ log.info """ basecall demultiplexing : ${params.basecall_demux} trim barcodes during demultiplexing : ${params.trim_barcode} submission output file prefix : ${params.prefix} + GPU device for submission : ${params.gpu_devices} step: 1 = basecalling, 2 = mapping, 3 = quantification : ${params.step} Output directory : ${params.out_dir} ===================================================================================================================================================================================== - """ -} else if ((params.step == 2) && (params.bam == "None")) { - -log.info """ - OXFORD NANOPORE cDNA/dRNA SEQUENCING PIPELINE - STEP 2: QC, Alignment, and Bambu pre-processing - Bernardo Aguzzoli Heberle - EBBERT LAB - University of Kentucky -====================================================================================================================================================================================== - RAW unzipped nanopore fastq.gz file path : ${params.path} - - nanopore fastq files : ${params.ont_reads_fq} - nanopore sequencing summary files : ${params.ont_reads_txt} - submission output file prefix : ${params.prefix} - - reference genome : ${params.ref} - reference annotation : ${params.annotation} - housekeeping genes 3' bias assessment : ${params.housekeeping} - nanopore library prep kit (cDNA only) : ${params.cdna_kit} - reference genome is CHM13 : ${params.is_chm13} - path to ERCC annotations (CHM13 only) : ${params.ercc} - - quality score threshold for fastq reads : ${params.qscore_thresh} - MAPQ value for filtering bam file : ${params.mapq} - - Is this a direct RNAseq dataset? : ${params.is_dRNA} - Trim dRNA adapters? : ${params.trim_dRNA} - - Reference for contamination analysis : ${params.contamination_ref} - - step: 1 = basecalling, 2 = mapping, 3 = quantification : ${params.step} - Output directory : ${params.out_dir} - ===================================================================================================================================================================================== - -""" - -} else if ((params.step == 2) && (params.bam != "None")) { - -log.info """ - OXFORD NANOPORE cDNA/dRNA SEQUENCING PIPELINE - STEP 2 - Filtering BAM - Bernardo Aguzzoli Heberle - EBBERT LAB - University of Kentucky -====================================================================================================================================================================================== - bam files : ${params.bam} - bai files : ${params.bai} - submission output file prefix : ${params.prefix} - - reference genome : ${params.ref} - reference annotation : ${params.annotation} - - reference genome is CHM13 : ${params.is_chm13} - path to ERCC annotations (CHM13 only) : ${params.err} - - - MAPQ value for filtering bam file : ${params.mapq} - - - step: 1 = basecalling, 2 = mapping, 3 = quantification : ${params.step} - Output directory : ${params.out_dir} - ===================================================================================================================================================================================== - -""" - -} else { - -log.info """ - OXFORD NANOPORE cDNA/dRNA SEQUENCING PIPELINE - STEP 3: Transcript Quantification and/or Discovery - Bernardo Aguzzoli Heberle - EBBERT LAB - University of Kentucky -====================================================================================================================================================================================== - - reference genome : ${params.ref} - reference annotation : ${params.annotation} - reference genome is CHM13 : ${params.is_chm13} - - multiqc configuration file : ${params.multiqc_config} - multiqc input path : ${params.multiqc_input} - intermediate qc file paths : ${params.intermediate_qc} - - transcript discovery status : ${params.is_discovery} - NDR Value for Bambu (Novel Discovery Rate) : ${params.NDR} - Track read_ids with bambu? : ${params.track_reads} - Path to pre-processed bambu RDS files : ${params.bambu_rds} - - step: 1 = basecalling, 2 = mapping, 3 = quantification : ${params.step} - Output directory : ${params.out_dir} - ===================================================================================================================================================================================== - - - -""" - -} + """ // Import Workflows -include {NANOPORE_UNZIP_AND_CONCATENATE} from '../sub_workflows/nanopore_unzip_and_concatenate.nf' -include {NANOPORE_STEP_1} from '../sub_workflows/nanopore_workflow_STEP_1' -include {NANOPORE_cDNA_STEP_2} from '../sub_workflows/nanopore_cDNA_workflow_STEP_2' -include {NANOPORE_dRNA_STEP_2} from '../sub_workflows/nanopore_dRNA_workflow_STEP_2' -include {NANOPORE_STEP_2_BAM} from '../sub_workflows/nanopore_workflow_STEP_2_BAM' -include {NANOPORE_STEP_3} from '../sub_workflows/nanopore_workflow_STEP_3' +include {BASECALLING} from '../sub_workflows/BASECALLING' // Define initial files and channels if (params.prefix == "None") { - fastq_path = Channel.fromPath("${params.path}/**/fastq_pass/*.fastq.gz").map{file -> tuple("sample_" + file.parent.toString().split("/fastq_pass")[0].split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file)}.groupTuple() - txt_path = Channel.fromPath("${params.path}/**/*uencing_summary*.txt").map{file -> tuple("sample_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[2..-1].join("_"), file)}.groupTuple() - ont_reads_fq = Channel.fromPath(params.ont_reads_fq).map { file -> tuple(file.baseName, file) } - ont_reads_txt = Channel.fromPath(file(params.ont_reads_txt)) - fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple(file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() - pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple(file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() - bam = Channel.fromPath(params.bam).map { file -> tuple(file.baseName, file) } - bai = Channel.fromPath(params.bai) + fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple(file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() + + pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple(file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() } else { - fastq_path = Channel.fromPath("${params.path}/**/fastq_pass/*.fastq.gz").map{file -> tuple("${params.prefix}_sample_" + file.parent.toString().split("/fastq_pass")[0].split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file)}.groupTuple() - txt_path = Channel.fromPath("${params.path}/**/*uencing_summary*.txt").map{file -> tuple("${params.prefix}_sample_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[2..-1].join("_"), file)}.groupTuple() - ont_reads_fq = Channel.fromPath(params.ont_reads_fq).map { file -> tuple("${params.prefix}_" + file.baseName, file) } - ont_reads_txt = Channel.fromPath(file(params.ont_reads_txt)) - fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() - pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() - bam = Channel.fromPath(params.bam).map { file -> tuple("${params.prefix}_" + file.baseName, file) } - bai = Channel.fromPath(params.bai).map { file -> file.parent.resolve("${params.prefix}_${file.name}") } + fast5_path = Channel.fromPath("${params.basecall_path}/**.fast5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() + + pod5_path = Channel.fromPath("${params.basecall_path}/**.pod5").map{file -> tuple("${params.prefix}_" + file.parent.toString().split("/")[-2] + "_" + file.simpleName.split('_')[0] + "_" + file.simpleName.split('_')[2..-2].join("_"), file) }.groupTuple() } ref = file(params.ref) -housekeeping = file(params.housekeeping) -annotation = file(params.annotation) -cdna_kit = Channel.value(params.cdna_kit) -multiqc_config = Channel.fromPath(params.multiqc_config) -NDR = Channel.value(params.NDR) -track_reads = Channel.value(params.track_reads) mapq = Channel.value(params.mapq) -bambu_rds = Channel.fromPath(params.bambu_rds) -multiqc_input = Channel.fromPath(params.multiqc_input, type: "file") -fai = file(params.fai) -contamination_ref = Channel.fromPath(params.contamination_ref) quality_score = Channel.value(params.qscore_thresh) basecall_speed = Channel.value(params.basecall_speed) basecall_mods = Channel.value(params.basecall_mods) @@ -163,90 +52,12 @@ basecall_config = Channel.value(params.basecall_config) basecall_trim = Channel.value(params.basecall_trim) basecall_compute = Channel.value(params.basecall_compute) trim_barcode = Channel.value(params.trim_barcode) -contamination = Channel.fromPath("${params.intermediate_qc}/contamination/*") -num_reads = Channel.fromPath("${params.intermediate_qc}/number_of_reads/*") -read_length = Channel.fromPath("${params.intermediate_qc}/read_length/*") -quality_thresholds = Channel.fromPath("${params.intermediate_qc}/quality_score_thresholds/*") - - - - -if (params.ercc != "None") { - ercc = Channel.fromPath(params.ercc) - } -else { - ercc = params.ercc - } - -if (params.ont_reads_txt == "None") { - ont_reads_txt = Channel.value(params.ont_reads_txt) - } else { - // Make sure ONT sequencing summary and fastq files are in the same order - ont_reads_txt = ont_reads_txt.toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() - } - -if (params.ont_reads_fq != "None") { - - // Make sure files are in same order - ont_reads_fq = ont_reads_fq.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) +devices = Channel.value(params.gpu_devices) - } -if ((params.bam != "None") && (params.bai != "None")) { - - // Make sure bam and bai files are in the correct order - bam = bam.toSortedList( { a, b -> a[0] <=> b[0] } ).flatten().buffer(size:2) - bai = bai.toSortedList( { a, b -> a.baseName <=> b.baseName } ).flatten() - - } workflow { - - if ((params.path != "None") && (params.step == 2)) { - - NANOPORE_UNZIP_AND_CONCATENATE(fastq_path, txt_path) - - if (params.is_dRNA == false) { - - NANOPORE_cDNA_STEP_2(ref, annotation, housekeeping, NANOPORE_UNZIP_AND_CONCATENATE.out[1], NANOPORE_UNZIP_AND_CONCATENATE.out[0], ercc, cdna_kit, track_reads, mapq, contamination_ref, quality_score) - - } else { - - NANOPORE_dRNA_STEP_2(ref, annotation, housekeeping, NANOPORE_UNZIP_AND_CONCATENATE.out[1], NANOPORE_UNZIP_AND_CONCATENATE.out[0], ercc, cdna_kit, track_reads, mapq, contamination_ref, quality_score) - - } - } - - - else if (params.step == 1){ - - NANOPORE_STEP_1(pod5_path, fast5_path, basecall_speed, basecall_mods, basecall_config, basecall_trim, quality_score, trim_barcode) - } - - else if ((params.step == 2) && (params.bam == "None") && (params.path == "None")){ - - if (params.is_dRNA == false) { - - NANOPORE_cDNA_STEP_2(ref, annotation, housekeeping, ont_reads_txt, ont_reads_fq, ercc, cdna_kit, track_reads, mapq, contamination_ref, quality_score) - } - - else if (params.is_dRNA = true) { - - NANOPORE_dRNA_STEP_2(ref, annotation, housekeeping, ont_reads_txt, ont_reads_fq, ercc, cdna_kit, track_reads, mapq, contamination_ref, quality_score) - - } - } - - - else if ((params.step == 2) && (params.bam != "None") && (params.path == "None")) { - - NANOPORE_STEP_2_BAM(ref, annotation, bam, bai, ercc, track_reads, mapq) - - } - - else if(params.step == 3){ - - NANOPORE_STEP_3(ref, fai, annotation, NDR, track_reads, bambu_rds, multiqc_input, multiqc_config, contamination, num_reads, read_length, quality_thresholds) - } + + BASECALLING(pod5_path, fast5_path, basecall_speed, basecall_mods, basecall_config, basecall_trim, quality_score, trim_barcode, devices, ref) } diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 9097893..d0a9cdc 100755 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -2,84 +2,21 @@ // Pipeline parameter default values, can be modified by user when calling pipeline on command line (e.g. --ont_reads_fq sample_1.fastq) ## -// Input unzipped "raw" ONT output files -params.path = 'None' - -// Input fastq reads -params.ont_reads_fq = 'None' - -// Input sequencing summary files -params.ont_reads_txt = 'None' - // Input reference fasta file params.ref = 'None' -// Input bed file for housekeeping genes (RSEQc) -params.housekeeping = 'None' - -// Input GTF/GFF genomic annotation -params.annotation = 'None' - // Output directory for pipeline results params.out_dir = "output_directory/" -// Logical, is the reference CHM13? -params.is_chm13 = false - // directory of basecalling data params.basecall_path = 'None' -// Want to concatenate ERCC GTF to CHM13 GFF? Add the ERCC gtf file here -params.ercc = "None" - -// cDNA sequencing kit adapters for Pychopper to trim and orient reads -params.cdna_kit = "PCS111" - -// MultiQC configuration file -params.multiqc_config = "None" - -// Logical, do you want to perform discovery using Bambu? True = Yes -params.is_discovery = "None" - -// NDR value for Bambu novel discovery filtering - Leave it on Auto for most applications -params.NDR = "auto" - -// Logical, Track Bambu read assignments. True = Track. Tracking uses more memory, but allows you to extract reads that align to specific transcripts -params.track_reads = false - // MAPQ filtering threshold for bam files, 0 for no filtering params.mapq = "0" -// Which step of the pipeline to perform. 1 = Basecalling, 2 = Pre-processing, 3 = Discovery and quantification -params.step = "None" - -// Directory with MultiQC input for making report. -params.multiqc_input = "None" - -// Directory containing RDS files -params.bambu_rds = "None" - -// Index file for reference genome -params.fai = "None" - -// Unfiltered bam file input if you want to start from the bam filtering step -params.bam = "None" - -// Unfiltered bam index file input if you want to start from the bam filtering step -params.bai = "None" - -// Binary boolean parameter to check if user is performing Direct RNAseq analysis -params.is_dRNA = false - -// Reference file for contamination analysis -params.contamination_ref = "None" - // Quality score threshold params.qscore_thresh = "9" -// Trim dRNA adapters and primers? -params.trim_dRNA = false - // Desired basecall speed params.basecall_speed = "hac" @@ -98,116 +35,36 @@ params.basecall_demux = false // CPU vs GPU basecalling params.basecall_compute = "gpu" -// Trime barcodes (only counts if demultiplexing is enabled) +// Trim barcodes (only counts if demultiplexing is enabled) params.trim_barcode = "True" -// Intermediate QC reports -params.intermediate_qc = "None" - // Add prefix to all output files params.prefix = "None" +// Which GPU devices to use for basecalling? +params.gpu_devices = "all" -process { - - // Define job scheduler parameters for jobs that require little memory computation/memory ## - - withLabel: tiny { - executor='slurm' - clusterOptions='--partition normal --time 00:15:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 4 --mem-per-cpu 4G --exclude rome008' - } - - - - withLabel: small { - executor='slurm' - clusterOptions='--partition normal --time 1:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 8 --mem-per-cpu 4G --exclude rome008' - } - - - - // Define job scheduler parameters for jobs that require medium computation/memory ## - - withLabel: medium_small { - - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 12 --mem-per-cpu 4G --exclude rome008' - } - - - withLabel: medium { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 16 --mem-per-cpu 4G --exclude rome008' - } - - withLabel: medium_large { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 20 --mem-per-cpu 4G --exclude rome008' - } - - // Define job scheduler parameters for jobs that require lots of computation/memory ## - - withLabel: large { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 50 --mem-per-cpu 4G --exclude rome008' - } - - - - withLabel: bambu_prep_job { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 12 --mem-per-cpu 16G --exclude rome008' - } - withLabel: huge { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 1 --mem-per-cpu 500G --exclude rome008' - } - - withLabel: contamination { - executor='slurm' - clusterOptions='--partition normal --time 23:00:00 --account coa_mteb223_uksr --nodes 1 --ntasks 1 --cpus-per-task 50 --mem-per-cpu 10G --exclude rome008' - } +process { - // Define local execution + // Define local cpu execution - withLabel: local { + withLabel: cpu { executor='local' } - + + // Define local gpu execution withLabel: gpu { - executor='slurm' - clusterOptions='--partition P4V12_SKY32M192_L --time 7-00:00:00 --account gol_mteb223_uksr --gres=gpu:1 --mem 16G' + executor='local' containerOptions = '--nv' } - // Define cluster options for BAMBU_DUSCOVERY and BAMBU_QUANT - withLabel: huge_long { - executor='slurm' - clusterOptions='--partition normal --time 7-00:00:00 --account coa_mteb223_uksr --nodes 1 --mem-per-cpu 41G --ntasks 1 --cpus-per-task 12' - } - // Define the singularity containers for each process, will pull containers from the cloud - // Nanopore - withName: "(TRIM_dRNA|MAKE_FAI|FIX_SEQUENCING_SUMMARY_NAME|UNZIP_AND_CONCATENATE|MAP_CONTAMINATION_dRNA|MAP_CONTAMINATION_cDNA|GFFCOMPARE|MAKE_INDEX_cDNA|MAKE_INDEX_dRNA|MINIMAP2_cDNA|MINIMAP2_dRNA|MINIMAP2_QC|FILTER_BAM|PYCHOPPER|MAKE_TRANSCRIPTOME|MAKE_INDEX_cDNA_CONTAMINATION_CHM13|MAKE_INDEX_dRNA_CONTAMINATION_CHM13)" { - container = "library://ebbertlab/nanopore_cdna/nanopore:sha256.a7ba8e994740f48afbffcfd1524d9a945a11ee6166a53c6ea19246af15e6b640" - } - - // Quality Control - withName: "(MERGE_QC_REPORT|MAKE_QC_REPORT|MULTIQC_GRCh38|MULTIQC_CHM13|RSEQC|PYCOQC|PYCOQC_dRNA|DECOMPRESS|TRIM_GALORE|CHM13_GTF_ERCC|CHM13_GTF)" { - container = "library://ebbertlab/nanopore_cdna/quality_control:sha256.0615a619e6a3ba2d24c37e95dc66dd3524e1b368c2af1eea9fec1229b16da555" - } - // Basecalling withName: "(FAST5_to_POD5|BASECALL_CPU|BASECALL_CPU_DEMUX|BASECALL_GPU|BASECALL_GPU_DEMUX)" { - container = "library://ebbertlab/nanopore_cdna/dorado:sha256.4e1548994e6e5cad392a0acd812c8fed6f93f88fb3989449c7e18ad77729cb25" - } - - // Bambu - withName: "(BAMBU_PREP|BAMBU_DISCOVERY|BAMBU_QUANT)" { - container = "library://ebbertlab/nanopore_cdna/bambu:sha256.c766826dd183d2c09be2ae4b64524954cecc66ea31557483fda249dd34c21c1d" + container = "library://bernardo-a-heberle/dcnl/ont_methylation:sha256.55c5e2c5cfbcfed11277fd8d4df272b858a8cc1e51f873c414a01d38debe2de0" } } -- GitLab