Skip to content

Commit 950342d

Browse files
Remove realloc in QueryMatch to safe memory
1 parent 78ae2c5 commit 950342d

File tree

6 files changed

+89
-47
lines changed

6 files changed

+89
-47
lines changed

src/prefiltering/CacheFriendlyOperations.cpp

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ CacheFriendlyOperations<BINSIZE>::~CacheFriendlyOperations<BINSIZE>(){
3636

3737
template<unsigned int BINSIZE>
3838
size_t CacheFriendlyOperations<BINSIZE>::findDuplicates(IndexEntryLocal **input, CounterResult *output,
39-
size_t outputSize, unsigned short indexFrom, unsigned short indexTo, bool computeTotalScore) {
39+
size_t outputSize, unsigned short indexFrom, unsigned short indexTo, bool computeTotalScore) {
4040
do {
4141
setupBinPointer();
4242
CounterResult *lastPosition = (binDataFrame + BINCOUNT * binSize) - 1;
@@ -58,12 +58,16 @@ size_t CacheFriendlyOperations<BINSIZE>::mergeElementsByScore(CounterResult *inp
5858
}
5959

6060
template<unsigned int BINSIZE>
61-
size_t CacheFriendlyOperations<BINSIZE>::mergeElementsByDiagonal(CounterResult *inputOutputArray, const size_t N) {
61+
size_t CacheFriendlyOperations<BINSIZE>::mergeElementsByDiagonal(CounterResult *inputOutputArray, const size_t N, const bool keepScoredHits) {
6262
do {
6363
setupBinPointer();
6464
hashElements(inputOutputArray, N);
6565
} while(checkForOverflowAndResizeArray(false) == true); // overflowed occurred
66-
return mergeDiagonalDuplicates(inputOutputArray);
66+
if(keepScoredHits){
67+
return mergeDiagonalKeepScoredHitsDuplicates(inputOutputArray);
68+
}else{
69+
return mergeDiagonalDuplicates(inputOutputArray);
70+
}
6771
}
6872

6973
template<unsigned int BINSIZE>
@@ -93,6 +97,7 @@ size_t CacheFriendlyOperations<BINSIZE>::mergeDiagonalDuplicates(CounterResult *
9397
--n;
9498
}
9599
// combine diagonals
100+
// we keep only the last diagonal element
96101
for (size_t n = 0; n < currBinSize; n++) {
97102
const CounterResult &element = binStartPos[n];
98103
const unsigned int hashBinElement = element.id >> (MASK_0_5_BIT);
@@ -109,6 +114,40 @@ size_t CacheFriendlyOperations<BINSIZE>::mergeDiagonalDuplicates(CounterResult *
109114
return doubleElementCount;
110115
}
111116

117+
118+
template<unsigned int BINSIZE>
119+
size_t CacheFriendlyOperations<BINSIZE>::mergeDiagonalKeepScoredHitsDuplicates(CounterResult *output) {
120+
size_t doubleElementCount = 0;
121+
const CounterResult *bin_ref_pointer = binDataFrame;
122+
// duplicateBitArray is already zero'd from findDuplicates
123+
124+
for (size_t bin = 0; bin < BINCOUNT; bin++) {
125+
const CounterResult *binStartPos = (bin_ref_pointer + bin * binSize);
126+
const size_t currBinSize = (bins[bin] - binStartPos);
127+
// write diagonals + 1 in reverse order in the byte array
128+
for (size_t n = 0; n < currBinSize; n++) {
129+
const unsigned int element = binStartPos[n].id >> (MASK_0_5_BIT);
130+
duplicateBitArray[element] = static_cast<unsigned char>(binStartPos[n].diagonal) + 1;
131+
}
132+
// combine diagonals
133+
// we keep only the last diagonal element
134+
size_t n = currBinSize - 1;
135+
while (n != static_cast<size_t>(-1)) {
136+
const CounterResult &element = binStartPos[n];
137+
const unsigned int hashBinElement = element.id >> (MASK_0_5_BIT);
138+
output[doubleElementCount].id = element.id;
139+
output[doubleElementCount].count = element.count;
140+
output[doubleElementCount].diagonal = element.diagonal;
141+
// std::cout << output[doubleElementCount].id << " " << (int)output[doubleElementCount].count << " " << (int)static_cast<unsigned char>(output[doubleElementCount].diagonal) << std::endl;
142+
// memory overflow can not happen since input array = output array
143+
doubleElementCount += (output[doubleElementCount].count != 0 || duplicateBitArray[hashBinElement] != static_cast<unsigned char>(binStartPos[n].diagonal)) ? 1 : 0;
144+
duplicateBitArray[hashBinElement] = static_cast<unsigned char>(element.diagonal);
145+
--n;
146+
}
147+
}
148+
return doubleElementCount;
149+
}
150+
112151
template<unsigned int BINSIZE>
113152
size_t CacheFriendlyOperations<BINSIZE>::mergeScoreDuplicates(CounterResult *output) {
114153
size_t doubleElementCount = 0;
@@ -211,12 +250,12 @@ size_t CacheFriendlyOperations<BINSIZE>::findDuplicates(CounterResult *output, s
211250
output[doubleElementCount].id = element;
212251
output[doubleElementCount].count = 0;
213252
output[doubleElementCount].diagonal = tmpElementBuffer[n].diagonal;
214-
// const unsigned char diagonal = static_cast<unsigned char>(tmpElementBuffer[n].diagonal);
253+
// const unsigned char diagonal = static_cast<unsigned char>(tmpElementBuffer[n].diagonal);
215254
// memory overflow can not happen since input array = output array
216-
// if(duplicateBitArray[hashBinElement] != tmpElementBuffer[n].diagonal){
217-
// std::cout << "seq="<< output[doubleElementCount].id << "\tDiag=" << (int) output[doubleElementCount].diagonal
218-
// << " dup.Array=" << (int)duplicateBitArray[hashBinElement] << " tmp.Arr="<< (int)tmpElementBuffer[n].diagonal << std::endl;
219-
// }
255+
// if(duplicateBitArray[hashBinElement] != tmpElementBuffer[n].diagonal){
256+
// std::cout << "seq="<< output[doubleElementCount].id << "\tDiag=" << (int) output[doubleElementCount].diagonal
257+
// << " dup.Array=" << (int)duplicateBitArray[hashBinElement] << " tmp.Arr="<< (int)tmpElementBuffer[n].diagonal << std::endl;
258+
// }
220259
doubleElementCount += (duplicateBitArray[hashBinElement] != static_cast<unsigned char>(tmpElementBuffer[n].diagonal)) ? 1 : 0;
221260
duplicateBitArray[hashBinElement] = static_cast<unsigned char>(tmpElementBuffer[n].diagonal);
222261
}

src/prefiltering/CacheFriendlyOperations.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class CacheFriendlyOperations {
8181
size_t mergeElementsByScore(CounterResult *inputOutputArray, const size_t N);
8282

8383
// merge elements in CounterResult by diagonal, combines elements with same ids that occur after each other
84-
size_t mergeElementsByDiagonal(CounterResult *inputOutputArray, const size_t N);
84+
size_t mergeElementsByDiagonal(CounterResult *inputOutputArray, const size_t N, const bool keepScoredHits = false);
8585

8686
size_t keepMaxScoreElementOnly(CounterResult *inputOutputArray, const size_t N);
8787

@@ -124,6 +124,8 @@ class CacheFriendlyOperations {
124124

125125
size_t mergeDiagonalDuplicates(CounterResult *output);
126126

127+
size_t mergeDiagonalKeepScoredHitsDuplicates(CounterResult *output);
128+
127129
size_t keepMaxElement(CounterResult *output);
128130
};
129131

src/prefiltering/QueryMatcher.cpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,17 @@ std::pair<hit_t*, size_t> QueryMatcher::matchQuery(Sequence *querySeq, unsigned
9797
} else {
9898
memset(compositionBias, 0, sizeof(float) * querySeq->L);
9999
}
100-
100+
if(diagonalScoring == true){
101+
ungappedAlignment->createProfile(querySeq, compositionBias);
102+
}
101103
size_t resultSize = match(querySeq, compositionBias);
102104
if (hook != NULL) {
103105
resultSize = hook->afterDiagonalMatchingHook(*this, resultSize);
104106
}
105107
std::pair<hit_t *, size_t> queryResult;
106108
if (diagonalScoring) {
107109
// write diagonal scores in count value
108-
ungappedAlignment->processQuery(querySeq, compositionBias, foundDiagonals, resultSize);
110+
ungappedAlignment->align(foundDiagonals, resultSize);
109111
memset(scoreSizes, 0, SCORE_RANGE * sizeof(unsigned int));
110112
CounterResult * resultReadPos = foundDiagonals;
111113
CounterResult * resultWritePos = foundDiagonals + resultSize;
@@ -267,35 +269,37 @@ size_t QueryMatcher::match(Sequence *seq, float *compositionBias) {
267269
//std::cout << seq->getDbKey() << std::endl;
268270
//idx.printKmer(index[kmerPos], kmerSize, kmerSubMat->num2aa);
269271
//std::cout << "\t" << current_i << "\t"<< index[kmerPos] << std::endl;
270-
//for (size_t i = 0; i < seqListSize; i++) {
271-
// char diag = entries[i].position_j - current_i;
272-
// std::cout << "(" << entries[i].seqId << " " << (int) diag << ")\t";
273-
//}
272+
// for (size_t i = 0; i < seqListSize; i++) {
273+
// if(23865 == entries[i].seqId ){
274+
// char diag = entries[i].position_j - current_i;
275+
// std::cout << "(" << entries[i].seqId << " " << (int) diag << ")\t";
276+
// }
277+
// }
274278
//std::cout << std::endl;
275279

276280
// detected overflow while matching
277281
if ((sequenceHits + seqListSize) >= lastSequenceHit) {
278282
stats->diagonalOverflow = true;
279-
// realloc foundDiagonals if only 10% of memory left
280-
if((foundDiagonalsSize - overflowHitCount) < 0.1 * foundDiagonalsSize){
281-
foundDiagonalsSize *= 1.5;
282-
foundDiagonals = (CounterResult*) realloc(foundDiagonals, foundDiagonalsSize * sizeof(CounterResult));
283-
if(foundDiagonals == NULL){
284-
Debug(Debug::ERROR) << "Out of memory in QueryMatcher::match\n";
285-
EXIT(EXIT_FAILURE);
286-
}
287-
}
288283
// last pointer
289284
indexPointer[current_i + 1] = sequenceHits;
290285
//std::cout << "Overflow in i=" << indexStart << std::endl;
291286
const size_t hitCount = findDuplicates(indexPointer,
292287
foundDiagonals + overflowHitCount,
293288
foundDiagonalsSize - overflowHitCount,
294289
indexStart, current_i, (diagonalScoring == false));
295-
290+
// this happens only if we have two overflows in a row
296291
if (overflowHitCount != 0) {
297-
// merge lists, hitCount is max. dbSize so there can be no overflow in mergeElements
298-
overflowHitCount = mergeElements(foundDiagonals, hitCount + overflowHitCount);
292+
if(diagonalScoring == true){
293+
overflowHitCount = mergeElements(foundDiagonals, hitCount + overflowHitCount, true);
294+
// align the new diaognals
295+
ungappedAlignment->align(foundDiagonals, overflowHitCount);
296+
// We keep only the maximal diagonal scoring hit, so the max number of hits is DBsize
297+
overflowHitCount = keepMaxScoreElementOnly(foundDiagonals, overflowHitCount);
298+
} else {
299+
// in case of scoring we just sum up in mergeElements, so the max number of hits is DBsize
300+
// merge lists, hitCount is max. dbSize so there can be no overflow in mergeElements
301+
overflowHitCount = mergeElements(foundDiagonals, hitCount + overflowHitCount);
302+
}
299303
} else {
300304
overflowHitCount = hitCount;
301305
}
@@ -463,11 +467,11 @@ size_t QueryMatcher::findDuplicates(IndexEntryLocal **hitsByIndex,
463467
return localResultSize;
464468
}
465469

466-
size_t QueryMatcher::mergeElements(CounterResult *foundDiagonals, size_t hitCounter) {
470+
size_t QueryMatcher::mergeElements(CounterResult *foundDiagonals, size_t hitCounter, bool keepScoredHits) {
467471
size_t overflowHitCount = 0;
468472
#define MERGE_CASE(x) \
469473
case x: overflowHitCount = diagonalScoring ? \
470-
cachedOperation##x->mergeElementsByDiagonal(foundDiagonals,hitCounter) : \
474+
cachedOperation##x->mergeElementsByDiagonal(foundDiagonals,hitCounter, keepScoredHits) : \
471475
cachedOperation##x->mergeElementsByScore(foundDiagonals,hitCounter); \
472476
break;
473477

src/prefiltering/QueryMatcher.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,7 @@ class QueryMatcher {
258258
size_t findDuplicates(IndexEntryLocal **hitsByIndex, CounterResult *output,
259259
size_t outputSize, unsigned short indexFrom, unsigned short indexTo, bool computeTotalScore);
260260

261-
262-
size_t mergeElements(CounterResult *foundDiagonals, size_t hitCounter);
261+
size_t mergeElements(CounterResult *foundDiagonals, size_t hitCounter, bool keepHitsWithCounts = false);
263262

264263
size_t keepMaxScoreElementOnly(CounterResult *foundDiagonals, size_t resultSize);
265264

src/prefiltering/UngappedAlignment.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,8 @@ UngappedAlignment::~UngappedAlignment() {
2222
delete [] score_arr;
2323
}
2424

25-
void UngappedAlignment::processQuery(Sequence *seq,
26-
float *biasCorrection,
27-
CounterResult *results,
28-
size_t resultSize) {
29-
createProfile(seq, biasCorrection, subMatrix->subMatrix);
30-
queryLen = seq->L;
31-
computeScores(queryProfile, seq->L, results, resultSize);
25+
void UngappedAlignment::align(CounterResult *results, size_t resultSize) {
26+
computeScores(queryProfile, queryLen, results, resultSize);
3227
}
3328

3429

@@ -290,7 +285,7 @@ void UngappedAlignment::scoreDiagonalAndUpdateHits(const char * queryProfile,
290285
// update score
291286
for(size_t hitIdx = 0; hitIdx < hitSize; hitIdx++){
292287
hits[seqs[hitIdx].id]->count = static_cast<unsigned char>(std::min(static_cast<unsigned int>(255),
293-
score_arr[hitIdx]));
288+
score_arr[hitIdx]));
294289
if(seqs[hitIdx].seqLen == 1){
295290
std::pair<const unsigned char *, const unsigned int> dbSeq = sequenceLookup->getSequence(hits[hitIdx]->id);
296291
if(dbSeq.second >= 32768){
@@ -344,6 +339,10 @@ void UngappedAlignment::computeScores(const char *queryProfile,
344339
// continue;
345340
// }
346341
const unsigned short currDiag = results[i].diagonal;
342+
// skip results that already have a diagonal score
343+
if(results[i].count != 0){
344+
continue;
345+
}
347346
diagonalMatches[currDiag * DIAGONALBINSIZE + diagonalCounter[currDiag]] = &results[i];
348347
diagonalCounter[currDiag]++;
349348
if(diagonalCounter[currDiag] == DIAGONALBINSIZE) {
@@ -384,9 +383,8 @@ void UngappedAlignment::extractScores(unsigned int *score_arr, simd_int score) {
384383

385384

386385
void UngappedAlignment::createProfile(Sequence *seq,
387-
float * biasCorrection,
388-
short **subMat) {
389-
386+
float * biasCorrection) {
387+
queryLen = seq->L;
390388
if(Parameters::isEqualDbtype(seq->getSequenceType(), Parameters::DBTYPE_HMM_PROFILE)) {
391389
memset(queryProfile, 0, (Sequence::PROFILE_AA_SIZE + 1) * seq->L);
392390
}else{
@@ -409,7 +407,7 @@ void UngappedAlignment::createProfile(Sequence *seq,
409407
for (int pos = 0; pos < seq->L; pos++) {
410408
unsigned int aaIdx = seq->numSequence[pos];
411409
for (int i = 0; i < subMatrix->alphabetSize; i++) {
412-
queryProfile[pos * (Sequence::PROFILE_AA_SIZE + 1) + i] = (subMat[aaIdx][i] + aaCorrectionScore[pos]);
410+
queryProfile[pos * (Sequence::PROFILE_AA_SIZE + 1) + i] = (subMatrix->subMatrix[aaIdx][i] + aaCorrectionScore[pos]);
413411
}
414412
}
415413
}

src/prefiltering/UngappedAlignment.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ class UngappedAlignment {
1818

1919
~UngappedAlignment();
2020

21+
void createProfile(Sequence *seq, float *biasCorrection);
22+
2123
// This function computes the diagonal score for each CounterResult object
2224
// it assigns the diagonal score to the CounterResult object
23-
void processQuery(Sequence *seq, float *compositionBias, CounterResult *results,
24-
size_t resultSize);
25+
void align(CounterResult *results,
26+
size_t resultSize);
2527

2628
int scoreSingelSequenceByCounterResult(CounterResult &result);
2729

@@ -90,8 +92,6 @@ class UngappedAlignment {
9092

9193
void extractScores(unsigned int *score_arr, simd_int score);
9294

93-
void createProfile(Sequence *seq, float *biasCorrection, short **subMat);
94-
9595
int computeSingelSequenceScores(const char *queryProfile, const unsigned int queryLen,
9696
std::pair<const unsigned char *, const unsigned int> &dbSeq,
9797
int diagonal, unsigned int minDistToDiagonal);

0 commit comments

Comments
 (0)