Hi,
I have a question regarding Mutect2 output vcf. I am trying to run Mutect2 on paired-end reads (F and R reads for 1 sample at a time), tumor-only, sequenced by Illumina.
I ran the following bash script and unfortunately, I receive a vcf file that has all None values in FILTER column. The other columns look reasonable. I have run this script on a similar data previously and it worked, so I wonder what the problem might be.
My plan is then to sort the vcf file based on coverage and filter status, but I really don't know where the problem might be.
Thank you in advance.
#!/bin/bash
ROOTDIR='~/projects/patologie
cd $ROOTDIR/hs37d5
bwa index -a bwtsw $ROOTDIR/hs37d5/hs37d5.fa
samtools faidx $ROOTDIR/hs37d5/hs37d5.fa
picard CreateSequenceDictionary R=$ROOTDIR/hs37d5/hs37d5.fa O=$ROOTDIR/hs37d5/hs37d5.dict
cd $ROOTDIR/analysis2
bwa mem -M $ROOTDIR/hs37d5/hs37d5.fa $ROOTDIR/01_raw_data/BRCA1_S1_L001_R1_001.fastq $ROOTDIR/01_raw_data/BRCA1_S1_L001_R2_001.fastq > sample.sam
samtools view -bS sample.sam > sample.bam
picard SortSam I=sample.bam O=sorted_sample.bam SORT_ORDER=coordinate
samtools index sorted_sample.bam
picard MarkDuplicates I=sorted_sample.bam O=sample_marked.bam M=marked_metrics.txt ASSUME_SORT_ORDER=coordinate
samtools index sample_marked.bam
picard AddOrReplaceReadGroups I=sample_marked.bam O=sample_rg.bam RGID=1 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=1
gatk IndexFeatureFile -F $ROOTDIR/reference/1000G_phase1.indels.b37.vcf
gatk IndexFeatureFile -F $ROOTDIR/reference/Mills_and_1000G_gold_standard.indels.b37.vcf
gatk IndexFeatureFile -F $ROOTDIR/reference/dbsnp_138.b37.vcf
gatk BaseRecalibrator -R $ROOTDIR/hs37d5/hs37d5.fa -I sample_rg.bam --known-sites $ROOTDIR/reference/1000G_phase1.indels.b37.vcf --known-sites $ROOTDIR/reference/dbsnp_138.b37.vcf --known-sites $ROOTDIR/reference/Mills_and_1000G_gold_standard.indels.b37.vcf -O recal_table
gatk ApplyBQSR -bqsr recal_table -I sample_rg.bam -R $ROOTDIR/hs37d5/hs37d5.fa -O sample_recal.bam
gatk IndexFeatureFile -F $ROOTDIR/reference/af-only-gnomad.raw.sites.b37.vcf
gatk Mutect2 -R $ROOTDIR/hs37d5/hs37d5.fa -I $ROOTDIR/analysis2/sample_recal.bam -tumor 1 -O sample_single.vcf
gatk VariantAnnotator -R $ROOTDIR/hs37d5/hs37d5.fa -V sample_single.vcf --dbsnp $ROOTDIR/reference/dbsnp_138.b37.vcf -O sample_single_id_som.vcf
grep -v '^##.*' sample_single_id_som.vcf > sample_filter.vcf
I have a question regarding Mutect2 output vcf. I am trying to run Mutect2 on paired-end reads (F and R reads for 1 sample at a time), tumor-only, sequenced by Illumina.
I ran the following bash script and unfortunately, I receive a vcf file that has all None values in FILTER column. The other columns look reasonable. I have run this script on a similar data previously and it worked, so I wonder what the problem might be.
My plan is then to sort the vcf file based on coverage and filter status, but I really don't know where the problem might be.
Thank you in advance.
#!/bin/bash
ROOTDIR='~/projects/patologie
cd $ROOTDIR/hs37d5
bwa index -a bwtsw $ROOTDIR/hs37d5/hs37d5.fa
samtools faidx $ROOTDIR/hs37d5/hs37d5.fa
picard CreateSequenceDictionary R=$ROOTDIR/hs37d5/hs37d5.fa O=$ROOTDIR/hs37d5/hs37d5.dict
cd $ROOTDIR/analysis2
bwa mem -M $ROOTDIR/hs37d5/hs37d5.fa $ROOTDIR/01_raw_data/BRCA1_S1_L001_R1_001.fastq $ROOTDIR/01_raw_data/BRCA1_S1_L001_R2_001.fastq > sample.sam
samtools view -bS sample.sam > sample.bam
picard SortSam I=sample.bam O=sorted_sample.bam SORT_ORDER=coordinate
samtools index sorted_sample.bam
picard MarkDuplicates I=sorted_sample.bam O=sample_marked.bam M=marked_metrics.txt ASSUME_SORT_ORDER=coordinate
samtools index sample_marked.bam
picard AddOrReplaceReadGroups I=sample_marked.bam O=sample_rg.bam RGID=1 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=1
gatk IndexFeatureFile -F $ROOTDIR/reference/1000G_phase1.indels.b37.vcf
gatk IndexFeatureFile -F $ROOTDIR/reference/Mills_and_1000G_gold_standard.indels.b37.vcf
gatk IndexFeatureFile -F $ROOTDIR/reference/dbsnp_138.b37.vcf
gatk BaseRecalibrator -R $ROOTDIR/hs37d5/hs37d5.fa -I sample_rg.bam --known-sites $ROOTDIR/reference/1000G_phase1.indels.b37.vcf --known-sites $ROOTDIR/reference/dbsnp_138.b37.vcf --known-sites $ROOTDIR/reference/Mills_and_1000G_gold_standard.indels.b37.vcf -O recal_table
gatk ApplyBQSR -bqsr recal_table -I sample_rg.bam -R $ROOTDIR/hs37d5/hs37d5.fa -O sample_recal.bam
gatk IndexFeatureFile -F $ROOTDIR/reference/af-only-gnomad.raw.sites.b37.vcf
gatk Mutect2 -R $ROOTDIR/hs37d5/hs37d5.fa -I $ROOTDIR/analysis2/sample_recal.bam -tumor 1 -O sample_single.vcf
gatk VariantAnnotator -R $ROOTDIR/hs37d5/hs37d5.fa -V sample_single.vcf --dbsnp $ROOTDIR/reference/dbsnp_138.b37.vcf -O sample_single_id_som.vcf
grep -v '^##.*' sample_single_id_som.vcf > sample_filter.vcf