11#include  " Prefiltering.h" 
22#include  " NucleotideMatrix.h" 
33#include  " ReducedMatrix.h" 
4- #include  " ExtendedSubstitutionMatrix.h" 
54#include  " SubstitutionMatrixProfileStates.h" 
65#include  " DBWriter.h" 
76#include  " QueryMatcherTaxonomyHook.h" 
@@ -42,6 +41,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
4241        scoringMatrixFile(par.scoringMatrixFile),
4342        seedScoringMatrixFile(par.seedScoringMatrixFile),
4443        targetSeqType(targetSeqType),
44+         targetSearchMode(par.targetSearchMode),
4545        maxResListLen(par.maxResListLen),
4646        sensitivity(par.sensitivity),
4747        maxSeqLen(par.maxSeqLen),
@@ -52,7 +52,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
5252        aaBiasCorrectionScale(par.compBiasCorrectionScale),
5353        covThr(par.covThr), covMode(par.covMode), includeIdentical(par.includeIdentity),
5454        preloadMode(par.preloadMode),
55-         threads(static_cast <unsigned  int >(par.threads)), compressed(par.compressed) {
55+         threads(static_cast <unsigned  int >(par.threads)),
56+         compressed(par.compressed) {
5657    sameQTDB = isSameQTDB ();
5758
5859    //  init the substitution matrices
@@ -173,7 +174,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
173174
174175    takeOnlyBestKmer = (par.exactKmerMatching ==1 ) ||
175176                       (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_AMINO_ACIDS)) ||
176-                        (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_NUCLEOTIDES));
177+                        (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_NUCLEOTIDES)) ||
178+                        (targetSearchMode == 1 );
177179
178180    //  memoryLimit in bytes
179181    size_t  memoryLimit=Util::computeMemory (par.splitMemoryLimit );
@@ -203,6 +205,13 @@ Prefiltering::Prefiltering(const std::string &queryDB,
203205
204206    Debug (Debug::INFO) << " Target database size: " getSize () << "  type: " Parameters::getDbTypeName (targetSeqType) << " \n " 
205207
208+     if  (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
209+         kmerSubMat->alphabetSize  = kmerSubMat->alphabetSize  - 1 ;
210+         _2merSubMatrix = getScoreMatrix (*kmerSubMat, 2 );
211+         _3merSubMatrix = getScoreMatrix (*kmerSubMat, 3 );
212+         kmerSubMat->alphabetSize  = alphabetSize;
213+     }
214+ 
206215    if  (splitMode == Parameters::QUERY_DB_SPLIT) {
207216        //  create the whole index table
208217        getIndexTable (0 , 0 , tdbr->getSize ());
@@ -214,12 +223,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
214223        EXIT (EXIT_FAILURE);
215224    }
216225
217-     if  (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
218-         kmerSubMat->alphabetSize  = kmerSubMat->alphabetSize  - 1 ;
219-         _2merSubMatrix = getScoreMatrix (*kmerSubMat, 2 );
220-         _3merSubMatrix = getScoreMatrix (*kmerSubMat, 3 );
221-         kmerSubMat->alphabetSize  = alphabetSize;
222-     }
226+ 
223227
224228    if  (par.taxonList .length () > 0 ) {
225229        taxonomyHook = new  QueryMatcherTaxonomyHook (targetDB, tdbr, par.taxonList );
@@ -519,7 +523,7 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
519523        Sequence tseq (maxSeqLen, targetSeqType, kmerSubMat, kmerSize, spacedKmer, aaBiasCorrection, true , spacedKmerPattern);
520524        int  localKmerThr = (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_HMM_PROFILE) ||
521525                            Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
522-                             (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false  && takeOnlyBestKmer == true ) ) ? 0  : kmerThr;
526+                             (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false  && targetSearchMode ==  0  &&  takeOnlyBestKmer == true ) ) ? 0  : kmerThr;
523527
524528        //  remove X or N for seeding
525529        int  adjustAlphabetSize = (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
@@ -530,7 +534,10 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
530534        SequenceLookup **maskedLookup   = maskMode == 1  || maskLowerCaseMode == 1  ? &sequenceLookup : NULL ;
531535
532536        Debug (Debug::INFO) << " Index table k-mer threshold: " "  at k-mer size " "  \n " 
533-         IndexBuilder::fillDatabase (indexTable, maskedLookup, unmaskedLookup, *kmerSubMat,  &tseq, tdbr, dbFrom, dbFrom + dbSize, localKmerThr, maskMode, maskLowerCaseMode, maskProb);
537+         IndexBuilder::fillDatabase (indexTable, maskedLookup, unmaskedLookup, *kmerSubMat,
538+                                    _3merSubMatrix, _2merSubMatrix,
539+                                    &tseq, tdbr, dbFrom, dbFrom + dbSize,
540+                                    localKmerThr, maskMode, maskLowerCaseMode, maskProb, targetSearchMode);
534541
535542        //  sequenceLookup has to be temporarily present to speed up masking
536543        //  afterwards its not needed anymore without diagonal scoring
0 commit comments