Skip to content

Commit 570e3ed

Browse files
committed
Add support to write compressed files in unpackdb with --unpack-suffix .gz
1 parent e379831 commit 570e3ed

File tree

2 files changed

+53
-5
lines changed

2 files changed

+53
-5
lines changed

src/commons/Parameters.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ Parameters::Parameters():
289289
PARAM_TAR_INCLUDE(PARAM_TAR_INCLUDE_ID, "--tar-include", "Tar Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &tarInclude, "^.*$"),
290290
PARAM_TAR_EXCLUDE(PARAM_TAR_EXCLUDE_ID, "--tar-exclude", "Tar Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &tarExclude, "^.*$"),
291291
// unpackdb
292-
PARAM_UNPACK_SUFFIX(PARAM_UNPACK_SUFFIX_ID, "--unpack-suffix", "Unpack suffix", "File suffix for unpacked files", typeid(std::string), (void *) &unpackSuffix, "^.*$"),
292+
PARAM_UNPACK_SUFFIX(PARAM_UNPACK_SUFFIX_ID, "--unpack-suffix", "Unpack suffix", "File suffix for unpacked files.\nAdd .gz suffix to write compressed files.", typeid(std::string), (void *) &unpackSuffix, "^.*$"),
293293
PARAM_UNPACK_NAME_MODE(PARAM_UNPACK_NAME_MODE_ID, "--unpack-name-mode", "Unpack name mode", "Name unpacked files by 0: DB key, 1: accession (through .lookup)", typeid(int), (void *) &unpackNameMode, "^[0-1]{1}$"),
294294
// for modules that should handle -h themselves
295295
PARAM_HELP(PARAM_HELP_ID, "-h", "Help", "Help", typeid(bool), (void *) &help, "", MMseqsParameter::COMMAND_HIDDEN),

src/util/unpackdb.cpp

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
#include "Util.h"
55
#include "FileUtil.h"
66
#include "Debug.h"
7+
#ifdef HAVE_ZLIB
8+
#include <zlib.h>
9+
#endif
710

811
#ifdef OPENMP
912
#include <omp.h>
@@ -27,7 +30,6 @@ int unpackdb(int argc, const char **argv, const Command& command) {
2730

2831
size_t entries = reader.getSize();
2932
Debug::Progress progress(entries);
30-
3133
#pragma omp parallel
3234
{
3335
unsigned int thread_idx = 0;
@@ -50,9 +52,55 @@ int unpackdb(int argc, const char **argv, const Command& command) {
5052
name.append(SSTR(key));
5153
}
5254
name.append(par.unpackSuffix);
53-
FILE* handle = FileUtil::openAndDelete(name.c_str(), "w");
54-
fwrite(reader.getData(i, thread_idx), sizeof(char), reader.getEntryLen(i) - 1, handle);
55-
fclose(handle);
55+
56+
const char* cname = name.c_str();
57+
58+
if (FileUtil::fileExists(cname) == true) {
59+
if (FileUtil::directoryExists(cname) == true) {
60+
Debug(Debug::ERROR) << "Cannot open directory " << name << " for writing\n";
61+
continue;
62+
}
63+
FileUtil::remove(cname);
64+
}
65+
66+
if (Util::endsWith(".gz", name.c_str()) == true) {
67+
#ifdef HAVE_ZLIB
68+
gzFile handle = gzopen(cname, "w");
69+
if (handle == NULL) {
70+
Debug(Debug::ERROR) << "Cannot not open " << name << " for writing\n";
71+
continue;
72+
}
73+
size_t len = reader.getEntryLen(i) - 1;
74+
int n = gzwrite(handle ,reader.getData(i, thread_idx), len * sizeof(char));
75+
if ((size_t)n != len) {
76+
Debug(Debug::ERROR) << "Cannot not write " << name << "\n";
77+
continue;
78+
}
79+
if (gzclose(handle) != 0) {
80+
Debug(Debug::ERROR) << "Cannot not close " << name << "\n";
81+
continue;
82+
}
83+
#else
84+
Debug(Debug::ERROR) << "MMseqs2 was not compiled with zlib support. Cannot write compressed output\n";
85+
EXIT(EXIT_FAILURE);
86+
#endif
87+
} else {
88+
FILE* handle = fopen(cname, "w");
89+
if (handle == NULL) {
90+
Debug(Debug::ERROR) << "Cannot not open " << name << " for writing\n";
91+
continue;
92+
}
93+
size_t len = reader.getEntryLen(i) - 1;
94+
int n = fwrite(reader.getData(i, thread_idx), sizeof(char), len, handle);
95+
if ((size_t)n != len) {
96+
Debug(Debug::ERROR) << "Cannot not write " << name << "\n";
97+
continue;
98+
}
99+
if (fclose(handle) != 0) {
100+
Debug(Debug::ERROR) << "Cannot not close " << name << "\n";
101+
continue;
102+
}
103+
}
56104
}
57105
}
58106
reader.close();

0 commit comments

Comments
 (0)