Dear team
Currently, I have downloaed the gatk4 (version 4.0.7.0) dockers and the "gatk4-data-processing-master" wdl coupled with the "gatk4-somatic-snvs-indels-master" wdl for somatic mutation detection. The followed command is utilized for the whole workflow with human genome reference data (version hg38);
(1) the command of "java -jar cromwell-34.jar run processing-for-variant-discovery-gatk4.wdl --inputs normal.json"
and "java -jar cromwell-34.jar run processing-for-variant-discovery-gatk4.wdl --inputs tissue.json" is called to generate bam and bai file for both cancer normal and tissue samples.
this step is successfully finished and bam and bai files are provided.
(2) the command "java -jar cromwell-34.jar run mutect2.wdl --inputs mutect2.json" is called for somatica mutation detection with normal and tissue bam and bai files as input.
unfortunately, the error of "htsjdk.samtools.SAMException: Query asks for data past end of contig" are occured in many contig of chromosome (for example, Query contig chrX start:224664443 stop:224664487 contigLength:156040895)
Someone can help me to fixed these errors, thanks a lot.
Here are those json files used in the issue
(a) the normal json file
{
"##_COMMENT1": "SAMPLE NAME AND UNMAPPED BAMS",
"PreProcessingForVariantDiscovery_GATK4.sample_name": "mytestN",
"PreProcessingForVariantDiscovery_GATK4.ref_name": "hg38",
"PreProcessingForVariantDiscovery_GATK4.flowcell_unmapped_bams_list": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/normal_u
bam_list.txt",
"PreProcessingForVariantDiscovery_GATK4.unmapped_bam_suffix": ".bam",
"##COMMENT2": "REFERENCE FILES",
"PreProcessingForVariantDiscovery_GATK4.ref_dict": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
assembly38.dict",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
_assembly38.fasta",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_s
apiens_assembly38.fasta.fai",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_alt": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.alt",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_sa": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/
hg38/Homo_sapiens_assembly38.fasta.64.sa",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_amb": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.amb",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_bwt": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.bwt",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_ann": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.ann",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_pac": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.pac",
"##_COMMENT3": "KNOWN SITES RESOURCES",
"PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
_assembly38.dbsnp138.sort.vcf",
"PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_s
apiens_assembly38.dbsnp138.sort.vcf.idx",
"PreProcessingForVariantDiscovery_GATK4.known_indels_sites_VCFs": [
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.known_indels.vcf.gz"
],
"PreProcessingForVariantDiscovery_GATK4.known_indels_sites_indices": [
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi"
],
"##_COMMENT4": "MISC PARAMETERS",
"PreProcessingForVariantDiscovery_GATK4.bwa_commandline": "bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta",
"PreProcessingForVariantDiscovery_GATK4.compression_level": 5,
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu": "16",
"##_COMMENT5": "DOCKERS",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.1-1512499786",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.7.0",
"PreProcessingForVariantDiscovery_GATK4.python_docker": "python:2.7",
"##_COMMENT6": "PATHS",
"PreProcessingForVariantDiscovery_GATK4.gotc_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.gatk_path": "/gatk/gatk",
"##_COMMENT7": "JAVA OPTIONS",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.java_opt": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_sort": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_fix": "-Xms500m",
"PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.java_opt": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.java_opt": "-Xms2000m",
"##_COMMENT8": "MEMORY ALLOCATION",
"PreProcessingForVariantDiscovery_GATK4.GetBwaVersion.mem_size": "1 GB",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.mem_size": "14 GB",
"PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.mem_size": "7 GB",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.mem_size": "5000 MB",
"PreProcessingForVariantDiscovery_GATK4.CreateSequenceGroupingTSV.mem_size": "2 GB",
"PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.mem_size": "6 GB",
"PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.mem_size": "3 GB",
"##_COMMENT9": "DISK SIZE ALLOCATION",
"PreProcessingForVariantDiscovery_GATK4.agg_small_disk": 200,
"PreProcessingForVariantDiscovery_GATK4.agg_medium_disk": 300,
"PreProcessingForVariantDiscovery_GATK4.agg_large_disk": 400,
"PreProcessingForVariantDiscovery_GATK4.flowcell_small_disk": 100,
"PreProcessingForVariantDiscovery_GATK4.flowcell_medium_disk": 200,
"##_COMMENT10": "PREEMPTIBLES",
"PreProcessingForVariantDiscovery_GATK4.preemptible_tries": 3,
"PreProcessingForVariantDiscovery_GATK4.agg_preemptible_tries": 3
}
(b) the tissue json file
{
"##_COMMENT1": "SAMPLE NAME AND UNMAPPED BAMS",
"PreProcessingForVariantDiscovery_GATK4.sample_name": "mytestT",
"PreProcessingForVariantDiscovery_GATK4.ref_name": "hg38",
"PreProcessingForVariantDiscovery_GATK4.flowcell_unmapped_bams_list": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/tissue_u
bam_list.txt",
"PreProcessingForVariantDiscovery_GATK4.unmapped_bam_suffix": ".bam",
"##COMMENT2": "REFERENCE FILES",
"PreProcessingForVariantDiscovery_GATK4.ref_dict": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
assembly38.dict",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
_assembly38.fasta",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_s
apiens_assembly38.fasta.fai",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_alt": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.alt",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_sa": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/
hg38/Homo_sapiens_assembly38.fasta.64.sa",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_amb": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.amb",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_bwt": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.bwt",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_ann": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.ann",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_pac": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data
/hg38/Homo_sapiens_assembly38.fasta.64.pac",
"##_COMMENT3": "KNOWN SITES RESOURCES",
"PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens
_assembly38.dbsnp138.sort.vcf",
"PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_s
apiens_assembly38.dbsnp138.sort.vcf.idx",
"PreProcessingForVariantDiscovery_GATK4.known_indels_sites_VCFs": [
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.known_indels.vcf.gz"
],
"PreProcessingForVariantDiscovery_GATK4.known_indels_sites_indices": [
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi"
],
"##_COMMENT4": "MISC PARAMETERS",
"PreProcessingForVariantDiscovery_GATK4.bwa_commandline": "bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta",
"PreProcessingForVariantDiscovery_GATK4.compression_level": 5,
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu": "16",
"##_COMMENT5": "DOCKERS",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.1-1512499786",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.7.0",
"PreProcessingForVariantDiscovery_GATK4.python_docker": "python:2.7",
"##_COMMENT6": "PATHS",
"PreProcessingForVariantDiscovery_GATK4.gotc_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.gatk_path": "/gatk/gatk",
"##_COMMENT7": "JAVA OPTIONS",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.java_opt": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_sort": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_fix": "-Xms500m",
"PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.java_opt": "-Xms4000m",
"PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.java_opt": "-Xms3000m",
"PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.java_opt": "-Xms2000m",
"##_COMMENT8": "MEMORY ALLOCATION",
"PreProcessingForVariantDiscovery_GATK4.GetBwaVersion.mem_size": "1 GB",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.mem_size": "14 GB",
"PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.mem_size": "7 GB",
"PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.mem_size": "5000 MB",
"PreProcessingForVariantDiscovery_GATK4.CreateSequenceGroupingTSV.mem_size": "2 GB",
"PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.mem_size": "6 GB",
"PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.mem_size": "3500 MB",
"PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.mem_size": "3 GB",
"##_COMMENT9": "DISK SIZE ALLOCATION",
"PreProcessingForVariantDiscovery_GATK4.agg_small_disk": 200,
"PreProcessingForVariantDiscovery_GATK4.agg_medium_disk": 300,
"PreProcessingForVariantDiscovery_GATK4.agg_large_disk": 400,
"PreProcessingForVariantDiscovery_GATK4.flowcell_small_disk": 100,
"PreProcessingForVariantDiscovery_GATK4.flowcell_medium_disk": 200,
"##_COMMENT10": "PREEMPTIBLES",
"PreProcessingForVariantDiscovery_GATK4.preemptible_tries": 3,
"PreProcessingForVariantDiscovery_GATK4.agg_preemptible_tries": 3
}
(c) the mutect2 json file
{
"##_COMMENT1": "Runtime",
"##Mutect2.oncotator_docker": "(optional) String?",
"Mutect2.gatk_docker": "broadinstitute/gatk:4.0.7.0",
"##_COMMENT2": "Workflow options",
"##_Mutect2.intervals": "gs://gatk-best-practices/somatic-b37/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.
interval_list",
"Mutect2.scatter_count": 50,
"Mutect2.artifact_modes": ["G/T", "C/T"],
"##_Mutect2.m2_extra_args": "(optional) String?",
"##_Mutect2.m2_extra_filtering_args": "(optional) String?",
"Mutect2.run_orientation_bias_filter": "False",
"Mutect2.run_oncotator": "False",
"##_COMMENT3": "Primary inputs",
"Mutect2.ref_fasta": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.fasta",
"Mutect2.ref_dict": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.dict",
"Mutect2.ref_fai": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/Homo_sapiens_assembly38.fasta.fai",
"Mutect2.normal_bam": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/mytestN.hg38.bam",
"Mutect2.normal_bai": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/mytestN.hg38.bai",
"Mutect2.tumor_bam": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/mytestT.hg38.bam",
"Mutect2.tumor_bai": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/test7/mytestT.hg38.bai",
"##COMMENT4": "Primary resources",
"##_Mutect2.pon": "(optional) File?",
"##_Mutect2.pon_index": "(optional) File?",
"Mutect2.gnomad": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/af-only-gnomad.hg38.vcf.gz",
"Mutect2.gnomad_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/af-only-gnomad.hg38.vcf.gz.tbi",
"Mutect2.variants_for_contamination": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/small_exac_common
3.hg38.vcf.gz",
"Mutect2.variants_for_contamination_index": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/small_exac_c
ommon_3.hg38.vcf.gz.tbi",
"##Mutect2.realignment_index_bundle": "File? (optional)",
"##_COMMENT5": "Secondary resources",
"Mutect2.onco_ds_tar_gz": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/oncotator_v1_ds_April052016.ta
r.gz",
"Mutect2.default_config_file": "/bdp-picb/bioinfo/gyzheng/GATK_pipeline/GATK_resource/reference_data/hg38/somatic/onco_config.txt",
"##_Mutect2.sequencing_center": "(optional) String?",
"##_Mutect2.sequence_source": "(optional) String?",
"##_COMMENT6": "Secondary resources",
"##_Mutect2.MergeBamOuts.mem": "(optional) Int?",
"##_Mutect2.SplitIntervals.mem": "(optional) Int?",
"##_Mutect2.M2.mem": "(optional) Int?",
"##_Mutect2.MergeVCFs.mem": "(optional) Int?",
"##_Mutect2.oncotate_m2.mem": "(optional) Int?",
"##_COMMENT7": "Secondary resources",
"##_Mutect2.onco_ds_local_db_dir": "(optional) String?",
"##_Mutect2.sequencing_center": "(optional) String?",
"##_Mutect2.oncotate_m2.oncotator_exe": "(optional) String?",
"##_Mutect2.gatk4_override": "(optional) File?",
"##_Mutect2.CollectSequencingArtifactMetrics.mem": "(optional) Int?",
"##_COMMENT8": "Disk space",
"##_Mutect2.MergeVCFs.disk_space_gb": "(optional) Int?",
"##_Mutect2.Filter.disk_space_gb": "(optional) Int?",
"##_Mutect2.M2.disk_space_gb": "(optional) Int?",
"##_Mutect2.M2.disk_space_gb": 100,
"##_Mutect2.oncotate_m2.disk_space_gb": "(optional) Int?",
"##_Mutect2.SplitIntervals.disk_space_gb": "(optional) Int?",
"##_Mutect2.MergeBamOuts.disk_space_gb": "(optional) Int?",
"##_Mutect2.CollectSequencingArtifactMetrics.disk_space_gb": "(optional) Int?",
"##_Mutect2.emergency_extra_disk": "(optional) Int?",
"##_COMMENT9": "Preemptibles",
"##_Mutect2.MergeBamOuts.preemptible_attempts": "(optional) Int?",
"Mutect2.preemptible_attempts": 3
}