When I finished Svreprocess, I check the output in xxxx.discovery.vcf.gz,there is nothing under the header
##fileformat=VCFv4.2
##ALT=<ID=DEL,Description="Deletion">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
##INFO=<ID=GSCOHERENCE,Number=1,Type=Float,Description="Value of coherence statistic">
##INFO=<ID=GSCOHFN,Number=1,Type=Float,Description="Coherence statistic per pair">
##INFO=<ID=GSCOHPVALUE,Number=1,Type=Float,Description="Coherence metric (not a true p-value)">
##INFO=<ID=GSCOORDS,Number=4,Type=Integer,Description="Original cluster coordinates">
##INFO=<ID=GSCORA6,Number=1,Type=Float,Description="Correlation with array intensity from Affy6 arrays">
##INFO=<ID=GSCORI1M,Number=1,Type=Float,Description="Correlation with array intensity from Illumina 1M arrays">
##INFO=<ID=GSCORNG,Number=1,Type=Float,Description="Correlation with array intensity from NimbleGen arrays">
##INFO=<ID=GSDEPTHCALLS,Number=.,Type=String,Description="Samples with discrepant read pairs or low read depth">
##INFO=<ID=GSDEPTHCALLTHRESHOLD,Number=1,Type=Float,Description="Read depth threshold (median read depth of samples with discrepant read pairs)">
##INFO=<ID=GSDEPTHNOBSSAMPLES,Number=1,Type=Integer,Description="Number of samples with discrepant read pairs in depth test">
##INFO=<ID=GSDEPTHNTOTALSAMPLES,Number=1,Type=Integer,Description="Total samples in depth test">
##INFO=<ID=GSDEPTHOBSSAMPLES,Number=.,Type=String,Description="Samples with discrepant read pairs in depth test">
##INFO=<ID=GSDEPTHPVALUE,Number=1,Type=Float,Description="Depth p-value using chi-squared test">
##INFO=<ID=GSDEPTHPVALUECOUNTS,Number=4,Type=Integer,Description="Depth test read counts (carrier inside event, carrier outside event, non-carrier inside, non-carrier outside)">
##INFO=<ID=GSDEPTHRANKSUMPVALUE,Number=1,Type=Float,Description="Depth p-value using rank-sum test">
##INFO=<ID=GSDEPTHRATIO,Number=1,Type=Float,Description="Read depth ratio test">
##INFO=<ID=GSDMAX,Number=1,Type=Integer,Description="Maximum value considered for DOpt">
##INFO=<ID=GSDMIN,Number=1,Type=Integer,Description="Minimum value considered for DOpt">
##INFO=<ID=GSDOPT,Number=1,Type=Integer,Description="Most likely event length">
##INFO=<ID=GSDSPAN,Number=1,Type=Integer,Description="Inner span length of read pair cluster">
##INFO=<ID=GSELENGTH,Number=1,Type=Integer,Description="Effective length">
##INFO=<ID=GSMEMBNPAIRS,Number=1,Type=Integer,Description="Number of pairs used in membership test">
##INFO=<ID=GSMEMBNSAMPLES,Number=1,Type=Integer,Description="Number of samples used in membership test">
##INFO=<ID=GSMEMBOBSSAMPLES,Number=.,Type=String,Description="Samples participating in membership test">
##INFO=<ID=GSMEMBPVALUE,Number=1,Type=Float,Description="Membership p-value">
##INFO=<ID=GSMEMBSTATISTIC,Number=1,Type=Float,Description="Value of membership statistic">
##INFO=<ID=GSNDEPTHCALLS,Number=1,Type=Integer,Description="Number of samples with discrepant read pairs or low read depth">
##INFO=<ID=GSNHET,Number=1,Type=Integer,Description="Number of heterozygous snp genotype calls inside the event">
##INFO=<ID=GSNHOM,Number=1,Type=Integer,Description="Number of homozygous snp genotype calls inside the event">
##INFO=<ID=GSNNOCALL,Number=1,Type=Integer,Description="Number of snp genotype non-calls inside the event">
##INFO=<ID=GSNPAIRS,Number=1,Type=Integer,Description="Number of discrepant read pairs">
##INFO=<ID=GSNSAMPLES,Number=1,Type=Integer,Description="Number of samples with discrepant read pairs">
##INFO=<ID=GSNSNPS,Number=1,Type=Integer,Description="Number of snps inside the event">
##INFO=<ID=GSOUTLEFT,Number=1,Type=Integer,Description="Number of outlier read pairs on left">
##INFO=<ID=GSOUTLIERS,Number=1,Type=Integer,Description="Number of outlier read pairs">
##INFO=<ID=GSOUTRIGHT,Number=1,Type=Integer,Description="Number of outlier read pairs on right">
##INFO=<ID=GSREADGROUPS,Number=.,Type=String,Description="Read groups contributing discrepant read pairs">
##INFO=<ID=GSREADNAMES,Number=.,Type=String,Description="Discrepant read pair identifiers">
##INFO=<ID=GSRPORIENTATION,Number=1,Type=String,Description="Read pair orientation">
##INFO=<ID=GSSAMPLES,Number=.,Type=String,Description="Samples contributing discrepant read pairs">
##INFO=<ID=GSSNPHET,Number=1,Type=Float,Description="Fraction of het snp genotype calls inside the event">
##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##fileDate=20161013
##source=GenomeSTRiP_v2.00
#CHROM POS ID REF ALT QUAL FILTER INFO
and my command is like so:
java -cp ${classpath} ${mx} \
org.broadinstitute.gatk.queue.QCommandLine \
-S ${SV_DIR}/qscript/SVPreprocess.q \
-S ${SV_DIR}/qscript/SVQScript.q \
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
--disableJobReport \
-cp ${classpath} \
-configFile ${SV_DIR}/conf/genstrip_parameters.txt\
-tempDir ${SV_TMPDIR} \
-R /home/liyc/test_bam/1000G_phase1/human_g1k_v37.fasta \
-genomeMaskFile /home/liyc/test_bam/1000G_phase1/human_g1k_v37.svmask.fasta \
-copyNumberMaskFile /home/liyc/test_bam/1000G_phase1/human_g1k_v37.gcmask.fasta \
-genderMaskBedFile /home/liyc/test_bam/1000G_phase1/human_g1k_v37.gendermask.bed \
-runDirectory ${runDir} \
-md ${runDir}/metadata \
-disableGATKTraversal \
-useMultiStep \
-reduceInsertSizeDistributions false \
-computeGCProfiles true \
-computeReadCounts true \
-jobLogDir ${runDir}/logs \
-I ${inputFile} \
-run \
|| exit 1
# Run discovery.
java -cp ${classpath} ${mx} \
org.broadinstitute.gatk.queue.QCommandLine \
-S ${SV_DIR}/qscript/SVDiscovery.q \
-S ${SV_DIR}/qscript/SVQScript.q \
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
--disableJobReport \
-cp ${classpath} \
-configFile /home/liyc/SV/svtoolkit/installtest/conf/genstrip_installtest_parameters.txt\
-tempDir ${SV_TMPDIR} \
-R /home/liyc/test_bam/1000G_phase1/human_g1k_v37.fasta \
-genomeMaskFile /home/liyc/test_bam/1000G_phase1/human_g1k_v37.svmask.fasta \
-genderMapFile /home/liyc/test_bam/test_gender.map \
-runDirectory ${runDir} \
-md ${runDir}/metadata \
-disableGATKTraversal \
-minimumSize 100 \
-maximumSize 1000000 \
-jobLogDir ${runDir}/logs \
-suppressVCFCommandLines \
-I ${inputFile} \
-O ${sites} \
-run \
|| exit 1
Through my Svpreprocess I have met 3 problems,the first is that:
INFO 14:43:05,581 QGraph - Failed: samtools index test2_2_new/metadata/headers.bam
I just extract the failed component.I solve this by run by hand:
samtools index headers.bam
The second and third problem are as followd:
INFO 14:59:58,545 QGraph - Failed: 'java' '-Xmx2048m' '-XX:+UseParallelOldGC' '-XX:ParallelGCThreads=4' '-XX:GCTimeLimit=50' '-XX:GCHeapFreeLimit=10' '-Djava.io.tmpdir=/home/liyc/test_bam/test2_raw/tmpdir2_2' '-cp' '/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/SVToolkit.jar:/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/gatk/GenomeAnalysisTK.jar:/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/gatk/Queue.jar' '-cp' '/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/SVToolkit.jar:/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/gatk/GenomeAnalysisTK.jar:/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/lib/gatk/Queue.jar' 'org.broadinstitute.sv.apps.ComputeDepthProfiles' '-O' '/home/liyc/test_bam/test2_raw/test2_2_new/metadata/profiles_100Kb/profile_seq_2_100000.dat.gz' '-I' 'test2_2_new/metadata/headers.bam' '-configFile' '/home/liyc/SV/svtoolkit/liuc_sv/svtoolkit/conf/genstrip_parameters.txt' '-R' '/home/liyc/test_bam/1000G_phase1/human_g1k_v37.fasta' '-L' '2:0-0' '-genomeMaskFile' '/home/liyc/test_bam/1000G_phase1/human_g1k_v37.svmask.fasta' '-md' 'test2_2_new/metadata' '-profileBinSize' '100000' '-maximumReferenceGapLength' '10000'
INFO 14:59:58,545 QGraph - Log: /home/liyc/test_bam/test2_raw/test2_2_new/logs/SVPreprocess-21.out
INFO 14:59:58,546 QCommandLine - Script failed: 2 Pend, 0 Run, 1 Fail, 186 Done
I just solved them by rerun the Queue, and it did not output failed.
My headers.bam was that:
@HD VN:1.5 GO:none SO:coordinate
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
@SQ SN:4 LN:191154276
@SQ SN:5 LN:180915260
@SQ SN:6 LN:171115067
@SQ SN:7 LN:159138663
@SQ SN:8 LN:146364022
@SQ SN:9 LN:141213431
@SQ SN:10 LN:135534747
@SQ SN:11 LN:135006516
@SQ SN:12 LN:133851895
@SQ SN:13 LN:115169878
@SQ SN:14 LN:107349540
@SQ SN:15 LN:102531392
@SQ SN:16 LN:90354753
@SQ SN:17 LN:81195210
@SQ SN:18 LN:78077248
@SQ SN:19 LN:59128983
@SQ SN:20 LN:63025520
@SQ SN:21 LN:48129895
@SQ SN:22 LN:51304566
@SQ SN:X LN:155270560
@SQ SN:Y LN:59373566
@SQ SN:MT LN:16569
@SQ SN:GL000207.1 LN:4262
@SQ SN:GL000226.1 LN:15008
@SQ SN:GL000229.1 LN:19913
@SQ SN:GL000231.1 LN:27386
@SQ SN:GL000210.1 LN:27682
@SQ SN:GL000239.1 LN:33824
@SQ SN:GL000235.1 LN:34474
@SQ SN:GL000201.1 LN:36148
@SQ SN:GL000247.1 LN:36422
@SQ SN:GL000245.1 LN:36651
@SQ SN:GL000197.1 LN:37175
@SQ SN:GL000203.1 LN:37498
@SQ SN:GL000246.1 LN:38154
@SQ SN:GL000249.1 LN:38502
@SQ SN:GL000196.1 LN:38914
@SQ SN:GL000248.1 LN:39786
@SQ SN:GL000244.1 LN:39929
@SQ SN:GL000238.1 LN:39939
@SQ SN:GL000202.1 LN:40103
@SQ SN:GL000234.1 LN:40531
@SQ SN:GL000232.1 LN:40652
@SQ SN:GL000206.1 LN:41001
@SQ SN:GL000240.1 LN:41933
@SQ SN:GL000236.1 LN:41934
@SQ SN:GL000241.1 LN:42152
@SQ SN:GL000243.1 LN:43341
@SQ SN:GL000242.1 LN:43523
@SQ SN:GL000230.1 LN:43691
@SQ SN:GL000237.1 LN:45867
@SQ SN:GL000233.1 LN:45941
@SQ SN:GL000204.1 LN:81310
@SQ SN:GL000198.1 LN:90085
@SQ SN:GL000208.1 LN:92689
@SQ SN:GL000191.1 LN:106433
@SQ SN:GL000227.1 LN:128374
@SQ SN:GL000228.1 LN:129120
@SQ SN:GL000214.1 LN:137718
@SQ SN:GL000221.1 LN:155397
@SQ SN:GL000209.1 LN:159169
@SQ SN:GL000218.1 LN:161147
@SQ SN:GL000220.1 LN:161802
@SQ SN:GL000213.1 LN:164239
@SQ SN:GL000211.1 LN:166566
@SQ SN:GL000199.1 LN:169874
@SQ SN:GL000217.1 LN:172149
@SQ SN:GL000216.1 LN:172294
@SQ SN:GL000215.1 LN:172545
@SQ SN:GL000205.1 LN:174588
@SQ SN:GL000219.1 LN:179198
@SQ SN:GL000224.1 LN:179693
@SQ SN:GL000223.1 LN:180455
@SQ SN:GL000195.1 LN:182896
@SQ SN:GL000212.1 LN:186858
@SQ SN:GL000222.1 LN:186861
@SQ SN:GL000200.1 LN:187035
@SQ SN:GL000193.1 LN:189789
@SQ SN:GL000194.1 LN:191469
@SQ SN:GL000225.1 LN:211173
@SQ SN:GL000192.1 LN:547496
@RG ID:ST-E0020653.1 PL:ILLUMINA PU:H5KFTCCXX.1.test LB:lib1 SM:test1
@PG ID:bwa PN:bwa VN:0.7.12-r1039 CL:bwa mem -t 10 -R @RG\tID:ST-E0020653.1\tPL:ILLUMINA\tPU:H5KFTCCXX.1.test\tLB:lib1\tSM:test1 /home/liyc/test_bam/1000G_phase1/human_g1k_v37.fasta 1_test2.fastq 2_test2.fastq
I use the bwa in my system that vision is :
Program: bwa (alignment via Burrows-Wheeler transformation)
Version: 0.7.12-r1039
Contact: Heng Li <lh3@sanger.ac.uk>
Thank you very much!