From 0ed1500ac1de7b5462d9ca0eb92c594f0c0e099a Mon Sep 17 00:00:00 2001 From: Young Hahn Date: Tue, 21 Jan 2014 23:55:30 -0500 Subject: [PATCH] Robustify geocoder indexing of legacy MBTiles sources. --- lib/mbtiles.js | 153 ++++++++++++++++++++++++++++++++---------- test/geocoder.test.js | 46 ++++++++++++- 2 files changed, 161 insertions(+), 38 deletions(-) diff --git a/lib/mbtiles.js b/lib/mbtiles.js index d610ac5..406fdb2 100644 --- a/lib/mbtiles.js +++ b/lib/mbtiles.js @@ -588,21 +588,19 @@ MBTiles.prototype.getIndexableDocs = function(pointer, callback) { pointer.offset = pointer.offset || 0; pointer.nogrids = 'nogrids' in pointer ? pointer.nogrids : false; - // Converts MBTiles native TMS coords to ZXY. - function tms2zxy(zxys) { - return zxys.split(',').map(function(tms) { - var zxy = tms.split('/').map(function(v) { return parseInt(v, 10); }); - zxy[2] = (1 << zxy[0]) - 1 - zxy[2]; - return zxy.join('/'); - }); - } - // If 'carmen' option is passed in initial pointer, retrieve indexables from // carmen table. This option can be used to access the previously indexed // documents from an MBTiles database without having to know what search // field was used in the past (see comment below). if (pointer.table === 'carmen') { - return this._db.all('SELECT c.id AS id, c.text AS text, c.zxy, k.key_json FROM carmen c JOIN keymap k ON c.id = k.key_name LIMIT ? OFFSET ?', pointer.limit, pointer.offset, makedocs); + return this._db.all('SELECT c.id AS id, c.text AS text, c.zxy, k.key_json FROM carmen c JOIN keymap k ON c.id = k.key_name LIMIT ? OFFSET ?', pointer.limit, pointer.offset, function(err, rows) { + if (err) return callback(err); + this.geocoderMigrateDocs(rows, function(err, docs) { + if (err) return callback(err); + pointer.offset += pointer.limit; + return callback(null, docs, pointer); + }); + }.bind(this)); } // By default the keymap table contains all indexable documents. @@ -616,32 +614,115 @@ MBTiles.prototype.getIndexableDocs = function(pointer, callback) { sql = "SELECT k.key_name AS id, k.key_json, GROUP_CONCAT(zoom_level||'/'||tile_column ||'/'||tile_row,',') AS zxy FROM keymap k JOIN grid_key g ON k.key_name = g.key_name JOIN map m ON g.grid_id = m.grid_id WHERE m.zoom_level=? GROUP BY k.key_name LIMIT ? OFFSET ?;"; args = [info.maxzoom, pointer.limit, pointer.offset]; } - this._db.all(sql, args, makedocs); + this._db.all(sql, args, function(err, rows) { + if (err) return callback(err); + this.geocoderMigrateDocs(rows, function(err, docs) { + if (err) return callback(err); + pointer.offset += pointer.limit; + return callback(null, docs, pointer); + }); + }.bind(this)); }.bind(this)); - - function makedocs(err, rows) { - if (err) return callback(err); - var docs = []; - for (var i = 0; i < rows.length; i++) { - var row = rows[i]; - var doc = JSON.parse(row.key_json); - var text = row.text || doc.search || doc.name || ''; - if (!('lon' in doc) || !('lat' in doc)) continue; - if (!text) continue; - doc._id = row.id; - doc._text = text; - doc._zxy = row.zxy ? tms2zxy(row.zxy) : []; - doc._center = [doc.lon, doc.lat]; - if (doc.score) doc._score = parseFloat(doc.score); - if (doc.bounds) doc._bbox = doc.bounds.split(',').map(function(v) { return parseFloat(v) }); - delete doc.lon; - delete doc.lat; - delete doc.score; - delete doc.bounds; - docs.push(doc); - } - pointer.offset += pointer.limit; - return callback(null, docs, pointer); - }; }; +MBTiles.prototype.geocoderMigrateDocs = function(rows, callback) { + // Store docs state on callback. + callback.docs = callback.docs || []; + var docs = callback.docs; + var source = this; + + // Done. + if (!rows.length) return callback(null, docs); + + // Converts MBTiles native TMS coords to ZXY. + function tms2zxy(zxys) { + return zxys.split(',').map(function(tms) { + var zxy = tms.split('/').map(function(v) { return parseInt(v, 10); }); + zxy[2] = (1 << zxy[0]) - 1 - zxy[2]; + return zxy.join('/'); + }); + } + + var row = rows.shift(); + var doc = JSON.parse(row.key_json); + var text = row.text || doc.search || doc.name || ''; + if ('zxy' in row && text) { + doc._id = parseInt(row.id,10).toString() === row.id ? + parseInt(row.id,10) : + parseInt(crypto.createHash('md5').update(row.id).digest('hex').substr(0,8), 16); + doc._text = text; + doc._zxy = row.zxy ? tms2zxy(row.zxy) : []; + if (doc.score) doc._score = parseFloat(doc.score); + if (doc.bounds) doc._bbox = doc.bounds.split(',').map(function(v) { return parseFloat(v) }); + delete doc.score; + delete doc.bounds; + if ('lon' in doc && 'lat' in doc) { + doc._center = [ doc.lon, doc.lat ]; + delete doc.lon; + delete doc.lat; + docs.push(doc); + source.geocoderMigrateDocs(rows, callback); + } else { + source.geocoderCentroid(row.id, doc._zxy, function(err, center) { + if (err) return callback(err); + doc._center = center; + docs.push(doc); + source.geocoderMigrateDocs(rows, callback); + }); + } + } else { + source.geocoderMigrateDocs(rows, callback); + } +}; + +// Get the [lon,lat] of a feature given an array of xyz tiles. +// Looks up a point in the feature geometry using a point from a central grid. +MBTiles.prototype.geocoderCentroid = function(id, zxy, callback) { + var coords = []; + for (var i = 0; i < zxy.length; i++) { + var parts = zxy[i].split('/'); + parts[0] = parts[0] | 0; + parts[1] = parts[1] | 0; + parts[2] = parts[2] | 0; + coords.push(parts); + } + coords.sort(function(a, b) { + if (a[1] < b[1]) return -1; + if (a[1] > b[1]) return 1; + if (a[2] < b[2]) return -1; + if (a[2] > b[2]) return 1; + return -1; + }); + var mid = coords[Math.floor(coords.length * 0.5)]; + this.getGrid(mid[0],mid[1],mid[2],function(err, grid) { + if (err) return callback(err); + if (!grid) return callback(new Error('Grid does not exist')); + + // Convert id local index in utfgrid to charactercode. + var key = grid.keys.indexOf(id) + key += 32; + if (key >= 34) key++; + if (key >= 92) key++; + var chr = String.fromCharCode(key); + + var xy = []; + for (var y = 0; y < grid.grid.length; y++) { + if (grid.grid[y].indexOf(chr) === -1) continue; + for (var x = 0; x < 64; x++) { + if (grid.grid[y][x] === chr) xy.push([x,y]); + } + } + xy.sort(function(a, b) { + if (a[0] < b[0]) return -1; + if (a[0] > b[0]) return 1; + if (a[1] < b[1]) return -1; + if (a[1] > b[1]) return 1; + return -1; + }); + var cxy = xy[Math.floor(xy.length * 0.5)]; + callback(null, sm.ll([ + (256*mid[1]) + (cxy[0]*4), + (256*mid[2]) + (cxy[1]*4) + ], mid[0])); + }); +}; diff --git a/test/geocoder.test.js b/test/geocoder.test.js index 471cef2..4bc16f6 100644 --- a/test/geocoder.test.js +++ b/test/geocoder.test.js @@ -89,7 +89,7 @@ it('getIndexableDocs', function(done) { REGION: 19, SUBREGION: 29, UN: 533, - _id: 'ABW', + _id: 4214083313, _text: 'Aruba', _zxy: [ '4/4/7' ], _center: [ -69.977, 12.517 ] @@ -108,7 +108,7 @@ it('getIndexableDocs', function(done) { REGION: 9, SUBREGION: 61, UN: 16, - _id: 'ASM', + _id: 2093723708, _text: 'American Samoa', _zxy: [ '4/0/8' ], _center: [ -170.73, -14.318 ] @@ -118,5 +118,47 @@ it('getIndexableDocs', function(done) { }); }); +it('geocoderCentroid ABW', function(done) { + from.geocoderCentroid('ABW', ['4/4/7'], function(err, center) { + assert.ifError(err); + assert.deepEqual([ -70.3125, 12.554563528593656 ], center); + done(); + }); }); +it('geocoderCentroid ASM', function(done) { + from.geocoderCentroid('ASM', ['4/0/8'], function(err, center) { + assert.ifError(err); + assert.deepEqual([-170.859375,-14.264383087562637], center); + done(); + }); +}); + +it('geocoderCentroid USA', function(done) { + from.geocoderCentroid('USA', [ + '4/0/7', + '4/0/6', + '4/0/5', + '4/0/4', + '4/0/3', + '4/1/7', + '4/1/4', + '4/1/3', + '4/2/6', + '4/2/5', + '4/2/4', + '4/3/6', + '4/3/5', + '4/4/7', + '4/4/6', + '4/4/5', + '4/5/5', + '4/15/5' + ], function(err, center) { + assert.ifError(err); + assert.deepEqual([-118.828125,46.07323062540835], center); + done(); + }); +}); + +});