forked from BurntSushi/cablastp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdb.go
279 lines (237 loc) · 7.08 KB
/
db.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
package cablastp
import (
"fmt"
"os"
"os/exec"
"path"
"strings"
)
const (
FileParams = "params"
FileBlastCoarse = "blastdb-coarse"
FileBlastFine = "blastdb-fine"
)
// A DB represents a cablastp database, which has three main components:
// a coarse database, a compressed database and a configuration file.
//
// A DB can be opened either for writing (compression) or for
// reading (decompression).
type DB struct {
// An embedded configuration.
*DBConf
// The path to the directory on disk.
Path string
// The name of the database. This corresponds to the basename of the path.
Name string
// The compressed database component.
ComDB *CompressedDB
// The coarse database component.
CoarseDB *CoarseDB
// File pointers.
coarseFasta, coarseSeeds, coarseLinks, compressed, index, params *os.File
}
// NewWriteDB creates a new cablastp database, and prepares it for writing.
//
// An error is returned if there is a problem accessing any of the files in
// the database.
//
//
// 'conf' should be a database configuration, typically defined (initially) from
// command line parameters.
func NewWriteDB(conf *DBConf, dir string) (*DB, error) {
Vprintf("Opening database in %s...\n", dir)
if strings.HasSuffix(dir, ".tar") || strings.HasSuffix(dir, ".gz") {
return nil, fmt.Errorf("The CaBLASTP database you've provided does " +
"not appear to be a directory. Please make sure you've extracted " +
"the downloaded database with `tar zxf cablastp-xxx.tar.gz` " +
"before using it with CaBLASTP.")
}
_, err := os.Open(dir)
if err == nil {
return nil, fmt.Errorf("The directory '%s' already exists. A "+
"new compressed database cannot be created in the same "+
"directory as an existing database. If you want to append to "+
"to an existing database with, use the '--append' flag.", dir)
}
if err != nil && !os.IsNotExist(err) {
return nil, fmt.Errorf("An error occurred when checking if '%s' "+
"exists: %s.", dir, err)
}
err = os.Mkdir(dir, 0777)
if err != nil {
return nil,
fmt.Errorf("Could not create directory '%s': %s.", dir, err)
}
db := &DB{
DBConf: conf,
Name: path.Base(dir),
Path: dir,
params: nil,
}
// Do a sanity check and make sure we can access the `makeblastdb`
// executable. Otherwise we might do a lot of work for nothing...
if err = execExists(db.BlastMakeBlastDB); err != nil {
return nil, fmt.Errorf(
"Could not find 'makeblastdb' executable: %s", err)
}
// Now try to load the configuration parameters from the 'params' file.
// We always prefer options from 'params' except when it has been
// overridden via the command line.
db.params, err = db.openWriteFile(FileParams)
if err != nil {
return nil, err
}
db.ComDB, err = newWriteCompressedDB(db)
if err != nil {
return nil, err
}
db.CoarseDB, err = newWriteCoarseDB(db)
if err != nil {
return nil, err
}
Vprintf("Done opening database in %s.\n", dir)
return db, nil
}
func (db *DB) filePath(name string) string {
return path.Join(db.Path, name)
}
func (db *DB) openWriteFile(name string) (*os.File, error) {
var f *os.File
var err error
f, err = os.Create(path.Join(db.Path, name))
if err != nil {
return nil, err
}
return f, nil
}
// NewReadDB opens a cablastp database for reading. An error is returned if
// there is a problem accessing any of the files on disk.
//
// Also, if the 'makeblastdb' or 'blastp' executales are not found, then an
// error is returned.
func NewReadDB(dir string) (*DB, error) {
Vprintf("Opening database in %s...\n", dir)
if strings.HasSuffix(dir, ".tar") || strings.HasSuffix(dir, ".gz") {
return nil, fmt.Errorf("The CaBLASTP database you've provided does " +
"not appear to be a directory. Please make sure you've extracted " +
"the downloaded database with `tar zxf cablastp-xxx.tar.gz` " +
"before using it with CaBLASTP.")
}
_, err := os.Open(dir)
if err != nil {
return nil, fmt.Errorf("Could not open '%s' for reading "+
"because: %s.", dir, err)
}
db := &DB{
Name: path.Base(dir),
Path: dir,
coarseSeeds: nil,
params: nil,
}
db.params, err = db.openReadFile(FileParams)
if err != nil {
return nil, err
}
// Now try to load the configuration parameters from the 'params' file.
db.DBConf, err = LoadDBConf(db.params)
if err != nil {
return nil, err
}
// Do a sanity check and make sure we can access the `makeblastdb`
// and `blastp` executables. Otherwise we might do a lot of work for
// nothing...
if err = execExists(db.BlastMakeBlastDB); err != nil {
return nil, fmt.Errorf(
"Could not find 'makeblastdb' executable: %s", err)
}
db.ComDB, err = newReadCompressedDB(db)
if err != nil {
return nil, err
}
db.CoarseDB, err = newReadCoarseDB(db)
if err != nil {
return nil, err
}
Vprintf("Done opening database in %s.\n", dir)
return db, nil
}
func (db *DB) openReadFile(name string) (*os.File, error) {
f, err := os.Open(path.Join(db.Path, name))
if err != nil {
return nil, err
}
return f, nil
}
// Save will write the contents of the database to disk. This should be called
// after compression is complete.
//
// After the database is saved, a blastp database is created from the coarse
// database.
//
// N.B. The compressed database is written as each sequence is processed, so
// this call will only save the coarse database. This may take a *very* long
// time if the database is not read only (since the seeds table has to be
// written).
func (db *DB) Save() error {
var err error
// Make sure the params file is truncated so that we overwrite any
// previous configuration.
if err = db.params.Truncate(0); err != nil {
return err
}
if _, err = db.params.Seek(0, os.SEEK_SET); err != nil {
return err
}
if err = db.DBConf.Write(db.params); err != nil {
return err
}
// Write the coarse database to disk.
// We don't need to explicitly save the compressed database, since its
// data is written as it is generated (including the index).
if err = db.CoarseDB.save(); err != nil {
return err
}
// Now we need to construct a blastp database from the coarse fasta file.
// e.g., `makeblastdb -dbtype nucl -in coarse.fasta`
cmd := exec.Command(
db.BlastMakeBlastDB, "-dbtype", "nucl",
"-in", FileCoarseFasta, "-out", FileBlastCoarse)
cmd.Dir = db.Path
Vprintf("Creating %s...\n", FileBlastCoarse)
if err = Exec(cmd); err != nil {
return err
}
Vprintf("Done creating %s.\n", FileBlastCoarse)
return nil
}
func (db *DB) CoarseFastaLocation() string {
s := db.CoarseDB.FileFasta.Name()
return s
}
// ReadClose closes all appropriate files after reading from a database.
func (db *DB) ReadClose() {
db.params.Close()
db.CoarseDB.readClose()
db.ComDB.readClose()
}
// WriteClose closes all appropriate files after writing to a database.
func (db *DB) WriteClose() {
db.params.Close()
db.CoarseDB.writeClose()
db.ComDB.writeClose()
}
// execExists tests whether a binary exists in one's PATH.
func execExists(name string) error {
_, err := exec.LookPath(name)
if err != nil {
return err
}
return nil
}
func fileExists(name string) error {
_, err := os.Stat(name)
if err != nil && os.IsNotExist(err) {
return err
}
return nil
}