@@ -97,15 +97,17 @@ std::pair<hit_t*, size_t> QueryMatcher::matchQuery(Sequence *querySeq, unsigned
9797    } else  {
9898        memset (compositionBias, 0 , sizeof (float ) * querySeq->L );
9999    }
100- 
100+     if (diagonalScoring == true ){
101+         ungappedAlignment->createProfile (querySeq, compositionBias);
102+     }
101103    size_t  resultSize = match (querySeq, compositionBias);
102104    if  (hook != NULL ) {
103105        resultSize = hook->afterDiagonalMatchingHook (*this , resultSize);
104106    }
105107    std::pair<hit_t  *, size_t > queryResult;
106108    if  (diagonalScoring) {
107109        //  write diagonal scores in count value
108-         ungappedAlignment->processQuery (querySeq, compositionBias,  foundDiagonals, resultSize);
110+         ungappedAlignment->align ( foundDiagonals, resultSize);
109111        memset (scoreSizes, 0 , SCORE_RANGE * sizeof (unsigned  int ));
110112        CounterResult * resultReadPos  = foundDiagonals;
111113        CounterResult * resultWritePos = foundDiagonals + resultSize;
@@ -267,35 +269,37 @@ size_t QueryMatcher::match(Sequence *seq, float *compositionBias) {
267269            // std::cout << seq->getDbKey() << std::endl;
268270            // idx.printKmer(index[kmerPos], kmerSize, kmerSubMat->num2aa);
269271            // std::cout << "\t" << current_i << "\t"<< index[kmerPos] << std::endl;
270-             // for (size_t i = 0; i < seqListSize; i++) {
271-             //     char diag = entries[i].position_j - current_i;
272-             //     std::cout << "(" << entries[i].seqId << " " << (int) diag << ")\t";
273-             // }
272+ //             for (size_t i = 0; i < seqListSize; i++) {
273+ //                 if(23865 == entries[i].seqId ){
274+ //                     char diag = entries[i].position_j - current_i;
275+ //                     std::cout << "(" << entries[i].seqId << " " << (int) diag << ")\t";
276+ //                 }
277+ //             }
274278            // std::cout << std::endl;
275279
276280            //  detected overflow while matching
277281            if  ((sequenceHits + seqListSize) >= lastSequenceHit) {
278282                stats->diagonalOverflow  = true ;
279-                 //  realloc foundDiagonals if only 10% of memory left
280-                 if ((foundDiagonalsSize - overflowHitCount) < 0.1  * foundDiagonalsSize){
281-                     foundDiagonalsSize *= 1.5 ;
282-                     foundDiagonals = (CounterResult*) realloc (foundDiagonals, foundDiagonalsSize * sizeof (CounterResult));
283-                     if (foundDiagonals == NULL ){
284-                         Debug (Debug::ERROR) << " Out of memory in QueryMatcher::match\n " 
285-                         EXIT (EXIT_FAILURE);
286-                     }
287-                 }
288283                //  last pointer
289284                indexPointer[current_i + 1 ] = sequenceHits;
290285                // std::cout << "Overflow in i=" << indexStart << std::endl;
291286                const  size_t  hitCount = findDuplicates (indexPointer,
292287                                                       foundDiagonals + overflowHitCount,
293288                                                       foundDiagonalsSize - overflowHitCount,
294289                                                       indexStart, current_i, (diagonalScoring == false ));
295- 
290+                  //  this happens only if we have two overflows in a row 
296291                if  (overflowHitCount != 0 ) {
297-                     //  merge lists, hitCount is max. dbSize so there can be no overflow in mergeElements
298-                     overflowHitCount = mergeElements (foundDiagonals, hitCount + overflowHitCount);
292+                     if (diagonalScoring == true ){
293+                         overflowHitCount = mergeElements (foundDiagonals, hitCount + overflowHitCount, true );
294+                         //  align the new diaognals
295+                         ungappedAlignment->align (foundDiagonals, overflowHitCount);
296+                         //  We keep only the maximal diagonal scoring hit, so the max number of hits is DBsize
297+                         overflowHitCount = keepMaxScoreElementOnly (foundDiagonals, overflowHitCount);
298+                     } else  {
299+                         //  in case of scoring we just sum up in mergeElements, so the max number of hits is DBsize
300+                         //  merge lists, hitCount is max. dbSize so there can be no overflow in mergeElements
301+                         overflowHitCount = mergeElements (foundDiagonals, hitCount + overflowHitCount);
302+                     }
299303                } else  {
300304                    overflowHitCount = hitCount;
301305                }
@@ -463,11 +467,11 @@ size_t QueryMatcher::findDuplicates(IndexEntryLocal **hitsByIndex,
463467    return  localResultSize;
464468}
465469
466- size_t  QueryMatcher::mergeElements (CounterResult *foundDiagonals, size_t  hitCounter) {
470+ size_t  QueryMatcher::mergeElements (CounterResult *foundDiagonals, size_t  hitCounter,  bool  keepScoredHits ) {
467471    size_t  overflowHitCount = 0 ;
468472#define  MERGE_CASE (x ) \
469473    case  x: overflowHitCount = diagonalScoring ? \
470-                                cachedOperation##x->mergeElementsByDiagonal (foundDiagonals,hitCounter) : \
474+                                cachedOperation##x->mergeElementsByDiagonal (foundDiagonals,hitCounter, keepScoredHits ) : \
471475                               cachedOperation##x->mergeElementsByScore (foundDiagonals,hitCounter); \
472476    break ;
473477
0 commit comments