Ditch geocoderDataForEach (it's a memory hog) and instead go with an async iterator method with an internal buffer, implemented around sqlite limits, to give caller more control over reads

This commit is contained in:
Andrew Pendleton
2016-03-17 17:55:34 -04:00
parent 3c7f5c8462
commit aac15a8f81

View File

@@ -615,15 +615,64 @@ MBTiles.prototype.putGeocoderData = function(type, shard, data, callback) {
});
};
// Implements carmen#getGeocoderData method.
MBTiles.prototype.geocoderDataForEach = function(type, callback, completeCallback) {
return this._db.each('SELECT shard, data FROM geocoder_data WHERE type = ? ORDER BY shard', type, function(err, row) {
if (err && err.code === 'SQLITE_ERROR' && err.errno === 1) return callback();
if (err) return callback(err);
if (!row) return callback();
callback(row.shard, zlib.inflateSync(row.data));
}, completeCallback);
};
// Implements carmen#geocoderDataIterator method.
MBTiles.prototype.geocoderDataIterator = function(type) {
var chunkSize = 100;
var position = 0;
var getNextIfBelow = 0.2 * chunkSize;
var nextQueue = [];
var dataQueue = [];
var doneSentinel = {};
var _this = this;
var sendIfAvailable = function() {
while (nextQueue.length && dataQueue.length) {
var nextCb = nextQueue.shift(), data;
if (dataQueue[0] == doneSentinel) {
nextCb({value: undefined, done: true});
} else {
data = dataQueue.shift();
maybeRefillBuffer();
nextCb({value: {shard: data.shard, data: zlib.inflateSync(data.data)}, done: false});
}
}
}
var refilling = false;
var refillBuffer = function() {
refilling = true;
var segmentCount = 0;
_this._db.each('SELECT shard, data FROM geocoder_data WHERE type = ? ORDER BY shard limit ?,?', type, position, chunkSize, function(err, row) {
dataQueue.push(row);
segmentCount += 1;
sendIfAvailable();
}, function() {
refilling = false;
if (segmentCount) {
maybeRefillBuffer();
} else {
// we didn't get anything this time, so we're done
dataQueue.push(doneSentinel);
sendIfAvailable();
}
});
position += chunkSize;
}
var maybeRefillBuffer = function() {
if (dataQueue.length <= getNextIfBelow && !refilling && dataQueue[dataQueue.length - 1] != doneSentinel) {
refillBuffer();
}
}
refillBuffer();
return {asyncNext: function(callback) {
nextQueue.push(callback);
sendIfAvailable();
}}
}
// Implements carmen#getIndexableDocs method.
MBTiles.prototype.getIndexableDocs = function(pointer, callback) {