Skip to content

Commit 71dd32e

Browse files
Add target similiar k-mer search to prefiler
1 parent 390457d commit 71dd32e

File tree

10 files changed

+94
-48
lines changed

10 files changed

+94
-48
lines changed

src/commons/Parameters.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Parameters::Parameters():
3737
alphabetSize(NuclAA<int>(INT_MAX,INT_MAX)),
3838
PARAM_S(PARAM_S_ID, "-s", "Sensitivity", "Sensitivity: 1.0 faster; 4.0 fast; 7.5 sensitive", typeid(float), (void *) &sensitivity, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PREFILTER),
3939
PARAM_K(PARAM_K_ID, "-k", "k-mer length", "k-mer length (0: automatically set to optimum)", typeid(int), (void *) &kmerSize, "^[0-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PREFILTER | MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
40+
PARAM_TARGET_SEARCH_MODE(PARAM_TARGET_SEARCH_MODE_ID, "--target-search-mode", "Target search mode", "target search mode (0: regular k-mer, 1: similar k-mer)", typeid(int), (void *) &targetSearchMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_PREFILTER | MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
4041
PARAM_THREADS(PARAM_THREADS_ID, "--threads", "Threads", "Number of CPU-cores used (all by default)", typeid(int), (void *) &threads, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_COMMON),
4142
PARAM_COMPRESSED(PARAM_COMPRESSED_ID, "--compressed", "Compressed", "Write compressed output", typeid(int), (void *) &compressed, "^[0-1]{1}$", MMseqsParameter::COMMAND_COMMON),
4243
PARAM_ALPH_SIZE(PARAM_ALPH_SIZE_ID, "--alph-size", "Alphabet size", "Alphabet size (range 2-21)", typeid(MultiParam<NuclAA<int>>), (void *) &alphabetSize, "", MMseqsParameter::COMMAND_PREFILTER | MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
@@ -392,6 +393,7 @@ Parameters::Parameters():
392393
prefilter.push_back(&PARAM_SEED_SUB_MAT);
393394
prefilter.push_back(&PARAM_S);
394395
prefilter.push_back(&PARAM_K);
396+
prefilter.push_back(&PARAM_TARGET_SEARCH_MODE);
395397
prefilter.push_back(&PARAM_K_SCORE);
396398
prefilter.push_back(&PARAM_ALPH_SIZE);
397399
prefilter.push_back(&PARAM_MAX_SEQ_LEN);
@@ -2211,6 +2213,7 @@ void Parameters::setDefaults() {
22112213
seedScoringMatrixFile = MultiParam<NuclAA<std::string>>(NuclAA<std::string>("VTML80.out", "nucleotide.out"));
22122214

22132215
kmerSize = 0;
2216+
targetSearchMode = 0;
22142217
kmerScore.values = INT_MAX;
22152218
alphabetSize = MultiParam<NuclAA<int>>(NuclAA<int>(21,5));
22162219
maxSeqLen = MAX_SEQ_LEN; // 2^16

src/commons/Parameters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ class Parameters {
383383
// PREFILTER
384384
float sensitivity; // target sens
385385
int kmerSize; // kmer size for the prefilter
386+
int targetSearchMode; // target search mode
386387
MultiParam<SeqProf<int>> kmerScore; // kmer score for the prefilter
387388
MultiParam<NuclAA<int>> alphabetSize; // alphabet size for the prefilter
388389
int compBiasCorrection; // Aminoacid composiont correction
@@ -716,6 +717,7 @@ class Parameters {
716717

717718
PARAMETER(PARAM_S)
718719
PARAMETER(PARAM_K)
720+
PARAMETER(PARAM_TARGET_SEARCH_MODE)
719721
PARAMETER(PARAM_THREADS)
720722
PARAMETER(PARAM_COMPRESSED)
721723
PARAMETER(PARAM_ALPH_SIZE)

src/prefiltering/IndexBuilder.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "IndexBuilder.h"
22
#include "tantan.h"
3+
#include "ExtendedSubstitutionMatrix.h"
34

45
#ifdef OPENMP
56
#include <omp.h>
@@ -51,13 +52,14 @@ class DbInfo {
5152

5253

5354
void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedLookup,
54-
SequenceLookup **unmaskedLookup,BaseMatrix &subMat, Sequence *seq,
55+
SequenceLookup **unmaskedLookup,BaseMatrix &subMat,
56+
ScoreMatrix & three, ScoreMatrix & two, Sequence *seq,
5557
DBReader<unsigned int> *dbr, size_t dbFrom, size_t dbTo, int kmerThr,
56-
bool mask, bool maskLowerCaseMode, float maskProb) {
58+
bool mask, bool maskLowerCaseMode, float maskProb, int targetSearchMode) {
5759
Debug(Debug::INFO) << "Index table: counting k-mers\n";
5860

5961
const bool isProfile = Parameters::isEqualDbtype(seq->getSeqType(), Parameters::DBTYPE_HMM_PROFILE);
60-
62+
const bool isTargetSimiliarKmerSearch = isProfile || targetSearchMode;
6163
dbTo = std::min(dbTo, dbr->getSize());
6264
size_t dbSize = dbTo - dbFrom;
6365
DbInfo* info = new DbInfo(dbFrom, dbTo, seq->getEffectiveKmerSize(), *dbr);
@@ -101,9 +103,13 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
101103
Sequence s(seq->getMaxLen(), seq->getSeqType(), &subMat, seq->getKmerSize(), seq->isSpaced(), false, true, seq->getUserSpacedKmerPattern());
102104

103105
KmerGenerator *generator = NULL;
104-
if (isProfile) {
106+
if (isTargetSimiliarKmerSearch) {
105107
generator = new KmerGenerator(seq->getKmerSize(), indexTable->getAlphabetSize(), kmerThr);
106-
generator->setDivideStrategy(s.profile_matrix);
108+
if(isProfile){
109+
generator->setDivideStrategy(s.profile_matrix);
110+
}else{
111+
generator->setDivideStrategy(&three, &two);
112+
}
107113
}
108114

109115
unsigned int *buffer = static_cast<unsigned int*>(malloc(seq->getMaxLen() * sizeof(unsigned int)));
@@ -122,10 +128,15 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
122128
bufferSize = seq->getMaxLen();
123129
}
124130
// count similar or exact k-mers based on sequence type
125-
if (isProfile) {
131+
if (isTargetSimiliarKmerSearch) {
126132
// Find out if we should also mask profiles
127133
totalKmerCount += indexTable->addSimilarKmerCount(&s, generator);
128-
(*unmaskedLookup)->addSequence(s.numConsensusSequence, s.L, id - dbFrom, info->sequenceOffsets[id - dbFrom]);
134+
unsigned char * seq = (isProfile) ? s.numConsensusSequence : s.numSequence;
135+
if (unmaskedLookup != NULL) {
136+
(*unmaskedLookup)->addSequence(seq, s.L, id - dbFrom, info->sequenceOffsets[id - dbFrom]);
137+
} else if (maskedLookup != NULL) {
138+
(*maskedLookup)->addSequence(seq, s.L, id - dbFrom, info->sequenceOffsets[id - dbFrom]);
139+
}
129140
} else {
130141
// Do not mask if column state sequences are used
131142
if (unmaskedLookup != NULL) {
@@ -219,9 +230,13 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
219230
IndexEntryLocalTmp *buffer = static_cast<IndexEntryLocalTmp *>(malloc( seq->getMaxLen() * sizeof(IndexEntryLocalTmp)));
220231
size_t bufferSize = seq->getMaxLen();
221232
KmerGenerator *generator = NULL;
222-
if (isProfile) {
233+
if (isTargetSimiliarKmerSearch) {
223234
generator = new KmerGenerator(seq->getKmerSize(), indexTable->getAlphabetSize(), kmerThr);
224-
generator->setDivideStrategy(s.profile_matrix);
235+
if(isProfile){
236+
generator->setDivideStrategy(s.profile_matrix);
237+
}else{
238+
generator->setDivideStrategy(&three, &two);
239+
}
225240
}
226241

227242
#pragma omp for schedule(dynamic, 100)
@@ -230,7 +245,7 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
230245
progress2.updateProgress();
231246

232247
unsigned int qKey = dbr->getDbKey(id);
233-
if (isProfile) {
248+
if (isTargetSimiliarKmerSearch) {
234249
s.mapSequence(id - dbFrom, qKey, dbr->getData(id, thread_idx), dbr->getSeqLen(id));
235250
indexTable->addSimilarSequence(&s, generator, &buffer, bufferSize, &idxer);
236251
} else {

src/prefiltering/IndexBuilder.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
#define MMSEQS_INDEXBUILDER_H
33

44
#include "IndexTable.h"
5+
#include "ExtendedSubstitutionMatrix.h"
56

67
class IndexBuilder {
78
public:
89
static void fillDatabase(IndexTable *indexTable, SequenceLookup **maskedLookup, SequenceLookup **unmaskedLookup,
9-
BaseMatrix &subMat, Sequence *seq,
10-
DBReader<unsigned int> *dbr, size_t dbFrom, size_t dbTo, int kmerThr, bool mask, bool maskLowerCaseMode, float maskProb);
10+
BaseMatrix &subMat,
11+
ScoreMatrix & three, ScoreMatrix & two, Sequence *seq,
12+
DBReader<unsigned int> *dbr, size_t dbFrom, size_t dbTo, int kmerThr,
13+
bool mask, bool maskLowerCaseMode, float maskProb, int targetSearchMode);
1114
};
1215

1316
#endif

src/prefiltering/IndexTable.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ class IndexTable {
101101
//idxer->reset();
102102
while(s->hasNextKmer()){
103103
const unsigned char * kmer = s->nextKmer();
104+
if(s->kmerContainsX()){
105+
continue;
106+
}
104107
const std::pair<size_t *, size_t> kmerList = kmerGenerator->generateKmerList(kmer);
105108

106109
//unsigned int kmerIdx = idxer->int2index(kmer, 0, kmerSize);
@@ -302,6 +305,9 @@ class IndexTable {
302305
size_t kmerPos = 0;
303306
while(s->hasNextKmer()){
304307
const unsigned char * kmer = s->nextKmer();
308+
if(s->kmerContainsX()){
309+
continue;
310+
}
305311
std::pair<size_t *, size_t> scoreMatrix = kmerGenerator->generateKmerList(kmer);
306312
if(kmerPos+scoreMatrix.second >= bufferSize){
307313
*buffer = static_cast<IndexEntryLocalTmp*>(realloc(*buffer, sizeof(IndexEntryLocalTmp) * bufferSize*2));

src/prefiltering/Prefiltering.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "Prefiltering.h"
22
#include "NucleotideMatrix.h"
33
#include "ReducedMatrix.h"
4-
#include "ExtendedSubstitutionMatrix.h"
54
#include "SubstitutionMatrixProfileStates.h"
65
#include "DBWriter.h"
76
#include "QueryMatcherTaxonomyHook.h"
@@ -42,6 +41,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
4241
scoringMatrixFile(par.scoringMatrixFile),
4342
seedScoringMatrixFile(par.seedScoringMatrixFile),
4443
targetSeqType(targetSeqType),
44+
targetSearchMode(par.targetSearchMode),
4545
maxResListLen(par.maxResListLen),
4646
sensitivity(par.sensitivity),
4747
maxSeqLen(par.maxSeqLen),
@@ -52,7 +52,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
5252
aaBiasCorrectionScale(par.compBiasCorrectionScale),
5353
covThr(par.covThr), covMode(par.covMode), includeIdentical(par.includeIdentity),
5454
preloadMode(par.preloadMode),
55-
threads(static_cast<unsigned int>(par.threads)), compressed(par.compressed) {
55+
threads(static_cast<unsigned int>(par.threads)),
56+
compressed(par.compressed) {
5657
sameQTDB = isSameQTDB();
5758

5859
// init the substitution matrices
@@ -173,7 +174,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
173174

174175
takeOnlyBestKmer = (par.exactKmerMatching==1) ||
175176
(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype(querySeqType,Parameters::DBTYPE_AMINO_ACIDS)) ||
176-
(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype(querySeqType,Parameters::DBTYPE_NUCLEOTIDES));
177+
(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype(querySeqType,Parameters::DBTYPE_NUCLEOTIDES)) ||
178+
(targetSearchMode == 1);
177179

178180
// memoryLimit in bytes
179181
size_t memoryLimit=Util::computeMemory(par.splitMemoryLimit);
@@ -203,6 +205,13 @@ Prefiltering::Prefiltering(const std::string &queryDB,
203205

204206
Debug(Debug::INFO) << "Target database size: " << tdbr->getSize() << " type: " <<Parameters::getDbTypeName(targetSeqType) << "\n";
205207

208+
if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
209+
kmerSubMat->alphabetSize = kmerSubMat->alphabetSize - 1;
210+
_2merSubMatrix = getScoreMatrix(*kmerSubMat, 2);
211+
_3merSubMatrix = getScoreMatrix(*kmerSubMat, 3);
212+
kmerSubMat->alphabetSize = alphabetSize;
213+
}
214+
206215
if (splitMode == Parameters::QUERY_DB_SPLIT) {
207216
// create the whole index table
208217
getIndexTable(0, 0, tdbr->getSize());
@@ -214,12 +223,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
214223
EXIT(EXIT_FAILURE);
215224
}
216225

217-
if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
218-
kmerSubMat->alphabetSize = kmerSubMat->alphabetSize - 1;
219-
_2merSubMatrix = getScoreMatrix(*kmerSubMat, 2);
220-
_3merSubMatrix = getScoreMatrix(*kmerSubMat, 3);
221-
kmerSubMat->alphabetSize = alphabetSize;
222-
}
226+
223227

224228
if (par.taxonList.length() > 0) {
225229
taxonomyHook = new QueryMatcherTaxonomyHook(targetDB, tdbr, par.taxonList);
@@ -519,7 +523,7 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
519523
Sequence tseq(maxSeqLen, targetSeqType, kmerSubMat, kmerSize, spacedKmer, aaBiasCorrection, true, spacedKmerPattern);
520524
int localKmerThr = (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) ||
521525
Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
522-
(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false && takeOnlyBestKmer == true) ) ? 0 : kmerThr;
526+
(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false && targetSearchMode == 0 && takeOnlyBestKmer == true) ) ? 0 : kmerThr;
523527

524528
// remove X or N for seeding
525529
int adjustAlphabetSize = (Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
@@ -530,7 +534,10 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
530534
SequenceLookup **maskedLookup = maskMode == 1 || maskLowerCaseMode == 1 ? &sequenceLookup : NULL;
531535

532536
Debug(Debug::INFO) << "Index table k-mer threshold: " << localKmerThr << " at k-mer size " << kmerSize << " \n";
533-
IndexBuilder::fillDatabase(indexTable, maskedLookup, unmaskedLookup, *kmerSubMat, &tseq, tdbr, dbFrom, dbFrom + dbSize, localKmerThr, maskMode, maskLowerCaseMode, maskProb);
537+
IndexBuilder::fillDatabase(indexTable, maskedLookup, unmaskedLookup, *kmerSubMat,
538+
_3merSubMatrix, _2merSubMatrix,
539+
&tseq, tdbr, dbFrom, dbFrom + dbSize,
540+
localKmerThr, maskMode, maskLowerCaseMode, maskProb, targetSearchMode);
534541

535542
// sequenceLookup has to be temporarily present to speed up masking
536543
// afterwards its not needed anymore without diagonal scoring

src/prefiltering/Prefiltering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class Prefiltering {
9595
MultiParam<NuclAA<std::string>> scoringMatrixFile;
9696
MultiParam<NuclAA<std::string>> seedScoringMatrixFile;
9797
int targetSeqType;
98+
int targetSearchMode;
9899
bool takeOnlyBestKmer;
99100
size_t maxResListLen;
100101

src/prefiltering/PrefilteringIndexReader.cpp

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB,
5656
BaseMatrix *subMat, int maxSeqLen,
5757
bool hasSpacedKmer, const std::string &spacedKmerPattern,
5858
bool compBiasCorrection, int alphabetSize, int kmerSize, int maskMode,
59-
int maskLowerCase, float maskProb, int kmerThr, int splits, int indexSubset) {
59+
int maskLowerCase, float maskProb, int kmerThr, int targetSearchMode, int splits,
60+
int indexSubset) {
6061

6162
const int SPLIT_META = splits > 1 ? 0 : 0;
6263
const int SPLIT_SEQS = splits > 1 ? 1 : 0;
@@ -82,27 +83,6 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB,
8283
writer.writeData(metadataptr, sizeof(metadata), META, SPLIT_META);
8384
writer.alignToPageSize(SPLIT_META);
8485

85-
if (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_HMM_PROFILE) == false && indexSubset != Parameters::INDEX_SUBSET_NO_PREFILTER) {
86-
int alphabetSize = subMat->alphabetSize;
87-
subMat->alphabetSize = subMat->alphabetSize-1;
88-
ScoreMatrix s3 = ExtendedSubstitutionMatrix::calcScoreMatrix(*subMat, 3);
89-
ScoreMatrix s2 = ExtendedSubstitutionMatrix::calcScoreMatrix(*subMat, 2);
90-
subMat->alphabetSize = alphabetSize;
91-
92-
char* serialized3mer = ScoreMatrix::serialize(s3);
93-
Debug(Debug::INFO) << "Write SCOREMATRIX3MER (" << SCOREMATRIX3MER << ")\n";
94-
writer.writeData(serialized3mer, ScoreMatrix::size(s3), SCOREMATRIX3MER, SPLIT_META);
95-
writer.alignToPageSize(SPLIT_META);
96-
ExtendedSubstitutionMatrix::freeScoreMatrix(s3);
97-
free(serialized3mer);
98-
99-
char* serialized2mer = ScoreMatrix::serialize(s2);
100-
Debug(Debug::INFO) << "Write SCOREMATRIX2MER (" << SCOREMATRIX2MER << ")\n";
101-
writer.writeData(serialized2mer, ScoreMatrix::size(s2), SCOREMATRIX2MER, SPLIT_META);
102-
writer.alignToPageSize(SPLIT_META);
103-
ExtendedSubstitutionMatrix::freeScoreMatrix(s2);
104-
free(serialized2mer);
105-
}
10686

10787
Debug(Debug::INFO) << "Write SCOREMATRIXNAME (" << SCOREMATRIXNAME << ")\n";
10888
char* subData = BaseMatrix::serialize(subMat->matrixName, subMat->matrixData);
@@ -213,6 +193,29 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB,
213193
if (indexSubset == Parameters::INDEX_SUBSET_NO_PREFILTER) {
214194
splits = 0;
215195
}
196+
197+
ScoreMatrix s3;
198+
ScoreMatrix s2;
199+
if (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_HMM_PROFILE) == false && indexSubset != Parameters::INDEX_SUBSET_NO_PREFILTER) {
200+
int alphabetSize = subMat->alphabetSize;
201+
subMat->alphabetSize = subMat->alphabetSize-1;
202+
s3 = ExtendedSubstitutionMatrix::calcScoreMatrix(*subMat, 3);
203+
s2 = ExtendedSubstitutionMatrix::calcScoreMatrix(*subMat, 2);
204+
subMat->alphabetSize = alphabetSize;
205+
206+
char* serialized3mer = ScoreMatrix::serialize(s3);
207+
Debug(Debug::INFO) << "Write SCOREMATRIX3MER (" << SCOREMATRIX3MER << ")\n";
208+
writer.writeData(serialized3mer, ScoreMatrix::size(s3), SCOREMATRIX3MER, SPLIT_META);
209+
writer.alignToPageSize(SPLIT_META);
210+
free(serialized3mer);
211+
212+
char* serialized2mer = ScoreMatrix::serialize(s2);
213+
Debug(Debug::INFO) << "Write SCOREMATRIX2MER (" << SCOREMATRIX2MER << ")\n";
214+
writer.writeData(serialized2mer, ScoreMatrix::size(s2), SCOREMATRIX2MER, SPLIT_META);
215+
writer.alignToPageSize(SPLIT_META);
216+
free(serialized2mer);
217+
}
218+
216219
for (int s = 0; s < splits; s++) {
217220
size_t dbFrom = 0;
218221
size_t dbSize = 0;
@@ -226,7 +229,8 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB,
226229
IndexBuilder::fillDatabase(&indexTable,
227230
(maskMode == 1 || maskLowerCase == 1) ? &sequenceLookup : NULL,
228231
(maskMode == 0 && maskLowerCase == 0) ? &sequenceLookup : NULL,
229-
*subMat, &seq, dbr1, dbFrom, dbFrom + dbSize, kmerThr, maskMode, maskLowerCase, maskProb);
232+
*subMat, s3, s2, &seq, dbr1, dbFrom, dbFrom + dbSize, kmerThr,
233+
maskMode, maskLowerCase, maskProb, targetSearchMode);
230234
indexTable.printStatistics(subMat->num2aa);
231235

232236
if (sequenceLookup == NULL) {
@@ -282,6 +286,11 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB,
282286
writer.alignToPageSize(SPLIT_INDX + s);
283287
}
284288

289+
if (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_HMM_PROFILE) == false && indexSubset != Parameters::INDEX_SUBSET_NO_PREFILTER) {
290+
ExtendedSubstitutionMatrix::freeScoreMatrix(s3);
291+
ExtendedSubstitutionMatrix::freeScoreMatrix(s2);
292+
}
293+
285294
writer.close(false);
286295
}
287296

src/prefiltering/PrefilteringIndexReader.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class PrefilteringIndexReader {
6060
DBReader<unsigned int> *alndbr,
6161
BaseMatrix *seedSubMat, int maxSeqLen, bool spacedKmer, const std::string &spacedKmerPattern,
6262
bool compBiasCorrection, int alphabetSize, int kmerSize, int maskMode,
63-
int maskLowerCase, float maskProb, int kmerThr, int splits, int indexSubset = 0);
63+
int maskLowerCase, float maskProb, int kmerThr, int targetSearchMode, int splits, int indexSubset = 0);
6464

6565
static DBReader<unsigned int> *openNewHeaderReader(DBReader<unsigned int>*dbr, unsigned int dataIdx, unsigned int indexIdx, int threads, bool touchIndex, bool touchData);
6666

0 commit comments

Comments
 (0)