11package org .broadinstitute .hellbender .tools .spark ;
22
33import htsjdk .samtools .*;
4+ import htsjdk .samtools .BAMSBIIndexer ;
5+ import htsjdk .samtools .seekablestream .SeekableFileStream ;
6+ import htsjdk .samtools .seekablestream .SeekableStream ;
7+ import htsjdk .samtools .util .BlockCompressedFilePointerUtil ;
48import org .apache .logging .log4j .LogManager ;
59import org .apache .logging .log4j .Logger ;
610import org .broadinstitute .barclay .argparser .Argument ;
1418import org .broadinstitute .hellbender .utils .io .IOUtils ;
1519import org .broadinstitute .hellbender .utils .read .ReadConstants ;
1620import org .codehaus .plexus .util .FileUtils ;
17- import org .seqdoop .hadoop_bam .SplittingBAMIndexer ;
1821import picard .cmdline .programgroups .OtherProgramGroup ;
1922
2023import java .io .*;
@@ -70,15 +73,15 @@ public final class CreateHadoopBamSplittingIndex extends CommandLineProgram {
7073
7174 @ Argument (fullName = StandardArgumentDefinitions .OUTPUT_LONG_NAME ,
7275 shortName = StandardArgumentDefinitions .OUTPUT_SHORT_NAME ,
73- doc = "The BAM splitting_index file. If this is unspecified an index will be created with the same name as " +
74- "the input file but with the additional extension " + SplittingBAMIndexer . OUTPUT_FILE_EXTENSION ,
76+ doc = "The splitting index (SBI) file. If this is unspecified an index will be created with the same name as " +
77+ "the input file but with the additional extension " + SBIIndex . FILE_EXTENSION ,
7578 optional = true )
7679 public File output ;
7780
7881 @ Argument (fullName = SPLITTING_INDEX_GRANULARITY_LONG_NAME ,
7982 doc = "Splitting index granularity, an entry is created in the index every this many reads." ,
8083 optional = true )
81- public int granularity = SplittingBAMIndexer .DEFAULT_GRANULARITY ;
84+ public long granularity = SBIIndexWriter .DEFAULT_GRANULARITY ;
8285
8386 @ Argument (fullName = CREATE_BAI_LONG_NAME ,
8487 doc = "Set this to create a bai index at the same time as creating a splitting index" ,
@@ -89,7 +92,7 @@ public final class CreateHadoopBamSplittingIndex extends CommandLineProgram {
8992 @ Override
9093 public Object doWork () {
9194 if ( granularity <= 0 ) {
92- throw new CommandLineException .BadArgumentValue (SPLITTING_INDEX_GRANULARITY_LONG_NAME , Integer .toString (granularity ), "Granularity must be > 0" );
95+ throw new CommandLineException .BadArgumentValue (SPLITTING_INDEX_GRANULARITY_LONG_NAME , Long .toString (granularity ), "Granularity must be > 0" );
9396 }
9497 final File index = getOutputFile (output , inputBam );
9598 if (createBai ){
@@ -101,19 +104,17 @@ public Object doWork() {
101104 return 0 ;
102105 }
103106
104- private static void createOnlySplittingIndex (final File inputBam , final File index , final int granularity ) {
107+ private static void createOnlySplittingIndex (final File inputBam , final File index , final long granularity ) {
105108 assertIsBam (inputBam );
106- //createBamSplittingIndex(inputBam, getOutputFile(output, inputBam), readValidationStringency, granularity);
107- try (BufferedInputStream in = new BufferedInputStream (new FileInputStream (inputBam ));
108- BufferedOutputStream out = new BufferedOutputStream (new FileOutputStream (index ))) {
109- SplittingBAMIndexer .index (in , out , inputBam .length (), granularity );
110-
109+ try (SeekableStream in = new SeekableFileStream (inputBam );
110+ BufferedOutputStream out = new BufferedOutputStream (new FileOutputStream (index ))) {
111+ BAMSBIIndexer .createIndex (in , out , granularity );
111112 } catch (final IOException e ) {
112113 throw new UserException ("Couldn't create splitting index" , e );
113114 }
114115 }
115116
116- private static void createBaiAndSplittingIndex (final File inputBam , final File index , final int granularity , final ValidationStringency readValidationStringency ) {
117+ private static void createBaiAndSplittingIndex (final File inputBam , final File index , final long granularity , final ValidationStringency readValidationStringency ) {
117118 assertIsBam (inputBam );
118119 try (SamReader reader = SamReaderFactory .makeDefault ()
119120 .validationStringency (readValidationStringency )
@@ -122,14 +123,24 @@ private static void createBaiAndSplittingIndex(final File inputBam, final File i
122123 BufferedOutputStream out = new BufferedOutputStream (new FileOutputStream (index ))) {
123124 final SAMFileHeader header = reader .getFileHeader ();
124125 assertBamIsCoordinateSorted (header );
125- final SplittingBAMIndexer indexer = new SplittingBAMIndexer (out , granularity );
126+ final SBIIndexWriter indexer = new SBIIndexWriter (out , granularity );
126127
127128 final BAMIndexer bamIndexer = new BAMIndexer (IOUtils .replaceExtension (index , BAMIndex .BAMIndexSuffix ), header );
129+ BAMFileSpan lastFilePointer = null ;
128130 for (final SAMRecord read : reader ){
129- indexer .processAlignment (read );
131+ BAMFileSpan filePointer = (BAMFileSpan ) read .getFileSource ().getFilePointer ();
132+ indexer .processRecord (filePointer .getFirstOffset ());
130133 bamIndexer .processAlignment (read );
134+ lastFilePointer = filePointer ;
135+ }
136+ long nextStart = 0 ;
137+ if (lastFilePointer != null && !lastFilePointer .getChunks ().isEmpty ()) {
138+ nextStart = lastFilePointer .getChunks ().get (0 ).getChunkEnd ();
139+ }
140+ if (nextStart == 0 ) {
141+ nextStart = BlockCompressedFilePointerUtil .makeFilePointer (inputBam .length ()); // default to file length (in case of no reads)
131142 }
132- indexer .finish (inputBam .length ());
143+ indexer .finish (nextStart , inputBam .length ()); // nextStart is start of next record that would be added
133144 bamIndexer .finish ();
134145 } catch (final IOException e ) {
135146 throw new UserException ("Couldn't create splitting index" , e );
@@ -153,11 +164,11 @@ private static void assertIsBam(final File inputBam) {
153164
154165 private static File getOutputFile (final File suggestedOutput , final File input ) {
155166 if (suggestedOutput == null ){
156- return new File (input .getPath () + SplittingBAMIndexer . OUTPUT_FILE_EXTENSION );
167+ return new File (input .getPath () + SBIIndex . FILE_EXTENSION );
157168 } else {
158- if (!suggestedOutput .getAbsolutePath ().endsWith ("bam" + SplittingBAMIndexer . OUTPUT_FILE_EXTENSION )){
159- logger .warn ("Creating a splitting index with an extension that doesn't match "
160- + "bam" +SplittingBAMIndexer . OUTPUT_FILE_EXTENSION + ". Output file: " +suggestedOutput );
169+ if (!suggestedOutput .getAbsolutePath ().endsWith ("bam" + SBIIndex . FILE_EXTENSION )){
170+ logger .warn ("Creating a splitting index (SBI) with an extension that doesn't match "
171+ + "bam" +SBIIndex . FILE_EXTENSION + ". Output file: " +suggestedOutput );
161172 }
162173 return suggestedOutput ;
163174 }
0 commit comments