-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgulpfile.js
131 lines (102 loc) · 3.48 KB
/
gulpfile.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
'use strict';
var gulp = require('gulp'),
csv2 = require('csv2'),
through2 = require('through2'),
_ = require('lodash'),
rename = require('gulp-rename'),
process = require('child_process'),
fs = require('fs');
gulp.task('csv2json', function() {
var src = './data/fcq.csv'
var dest = './data'
return gulp.src(src, {
buffer: false // assuming the input data file is large, open it as a stream
})
.pipe(through2.obj(function(file, enc, callback) {
var header
// modify the file contents to be a stream of JSON strings
file.contents = file.contents
.pipe(csv2())
.pipe(through2.obj(function(record, enc, callback) {
if (!header) {
// take the first record as the header
header = record
} else {
var obj = _.zipObject(header, record)
this.push(JSON.stringify(obj) + '\n')
}
callback()
}))
this.push(file)
callback()
}))
.pipe(rename(function(path) {
path.extname = ".json"
}))
.pipe(gulp.dest(dest))
});
gulp.task('import', function() {
var database = 'sikuli';
var collection = 'fcq';
var args = [
'--type', 'json',
'-d', database,
'-c', collection,
'--file', 'data/fcqFiltered.json'
];
var mongoimport = process.spawn('mongoimport', args);
mongoimport.stdout.on('data', function (data) {
console.log(data.toString());
});
mongoimport.stderr.on('data', function (data) {
console.error(data.toString());
});
});
gulp.task('sanitize', function() {
var readFile = function(file) {
return fs.readFileSync(file, 'utf8');
};
var splitLines = function(data) {
var output = data.split("\n");
return output.splice(0, output.length - 1);
};
var parseJSON = function(collection) {
return _.map(collection, JSON.parse);
};
var sanitizeOutputs = function(collection) {
// Check to see if it is a percentage
var isMatch = function(val) { return val.search("%") != -1; };
// Coerce percentage to float
var coerceToFloat = function(val) {
return val.match(/[0-9]+/) / 100;
};
// Coerce when we have a percentage, otherwise do nothing
var coerceIfPercentage = function(val) {
return isMatch(val) ? coerceToFloat(val) : val;
};
// Map over collection and each object, coercing if percentage
return _.map(collection, function(obj) {
return _.mapValues(obj, coerceIfPercentage);
});
};
// Since Mongo is not very friendly with JSON arrays, we newline each entry
var mongoifyOutputToFile = function(collection) {
var outputFile = 'data/fcqFiltered.json';
var appendFile = _.partial(fs.appendFileSync, outputFile);
_.each(collection, function(val) {
appendFile(JSON.stringify(val) + "\n");
});
console.log('Successfully wrote to file.');
return;
};
// Compostion chain, moves bottom up
var filterData = _.compose(
mongoifyOutputToFile,
sanitizeOutputs,
parseJSON,
splitLines,
readFile
);
// Run the task
filterData('data/fcq.json');
});