Skip to content

Commit 1806c0c

Browse files
Add padding module
1 parent f5d8483 commit 1806c0c

File tree

4 files changed

+41
-1
lines changed

4 files changed

+41
-1
lines changed

src/CommandDeclarations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern int convertkb(int argc, const char **argv, const Command& command);
2323
extern int convertmsa(int argc, const char **argv, const Command& command);
2424
extern int convertprofiledb(int argc, const char **argv, const Command& command);
2525
extern int createdb(int argc, const char **argv, const Command& command);
26+
extern int makepaddedseqdb(int argc, const char **argv, const Command& command);
2627
extern int createindex(int argc, const char **argv, const Command& command);
2728
extern int createlinindex(int argc, const char **argv, const Command& command);
2829
extern int createseqfiledb(int argc, const char **argv, const Command& command);

src/MMseqsBase.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,21 @@ std::vector<Command> baseCommands = {
130130
"<i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB>",
131131
CITATION_MMSEQS2, {{"fast[a|q]File[.gz|bz2]|stdin", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::flatfileStdinAndGeneric },
132132
{"sequenceDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::flatfile }}},
133+
{"makepaddedseqdb", makepaddedseqdb, &par.onlyverbosity, COMMAND_HIDDEN,
134+
"Generate a padded sequence DB",
135+
"Generate a padded sequence DB",
136+
"Martin Steinegger <[email protected]>",
137+
"<i:sequenceDB> <o:sequenceDB>",
138+
CITATION_MMSEQS2, {{"sequenceDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA|DbType::NEED_HEADER, &DbValidator::sequenceDb },
139+
{"sequenceIndexDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }}},
133140
{"appenddbtoindex", appenddbtoindex, &par.appenddbtoindex, COMMAND_HIDDEN,
134141
NULL,
135142
NULL,
136143
"Milot Mirdita <[email protected]>",
137144
"<i:DB1> ... <i:DBN> <o:DB>",
138145
CITATION_MMSEQS2, {{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::allDb },
139146
{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb }}},
140-
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
147+
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
141148
NULL,
142149
NULL,
143150
"Martin Steinegger <[email protected]>",

src/util/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ set(util_source_files
3131
util/filterdb.cpp
3232
util/gff2db.cpp
3333
util/renamedbkeys.cpp
34+
util/makepaddedseqdb.cpp
3435
util/masksequence.cpp
3536
util/maskbygff.cpp
3637
util/mergeclusters.cpp

src/util/makepaddedseqdb.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "Parameters.h"
2+
#include "DBReader.h"
3+
#include "DBWriter.h"
4+
#include "Debug.h"
5+
#include "Util.h"
6+
7+
int makepaddedseqdb(int argc, const char **argv, const Command &command) {
8+
Parameters &par = Parameters::getInstance();
9+
par.parseParameters(argc, argv, command, true, 0, 0);
10+
DBReader<unsigned int> dbr(par.db1.c_str(), par.db1Index.c_str(), 1,
11+
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
12+
dbr.open(DBReader<unsigned int>::SORT_BY_LENGTH);
13+
DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), 1, false, dbr.getDbtype());
14+
writer.open();
15+
std::string result;
16+
const int ALIGN = 4;
17+
for (long id = dbr.getSize() - 1; id >= 0; id--) {
18+
unsigned int key = dbr.getDbKey(id);
19+
char *data = dbr.getData(id, 0);
20+
size_t seqLen = dbr.getSeqLen(id);
21+
const size_t sequencepadding = (seqLen % ALIGN == 0) ? 0 : ALIGN - seqLen % ALIGN;
22+
result.append(data, seqLen);
23+
result.append(sequencepadding, ' ');
24+
writer.writeData(data, seqLen + sequencepadding, key, 0, false);
25+
}
26+
writer.close(true);
27+
DBReader<unsigned int>::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY);
28+
29+
dbr.close();
30+
return EXIT_SUCCESS;
31+
}

0 commit comments

Comments
 (0)