diff --git a/bin/cert-gen.sh b/bin/cert-gen.sh index 17235119..e0a4e956 100755 --- a/bin/cert-gen.sh +++ b/bin/cert-gen.sh @@ -65,7 +65,8 @@ openssl x509 \ rm /tmp/Lamassu_OP.csr.pem -mkdir -p $OFAC_DATA_DIR +mkdir -p $OFAC_DATA_DIR/sources +touch $OFAC_DATA_DIR/etags.json cat < $CONFIG_DIR/lamassu.json { diff --git a/lib/ofac/index.js b/lib/ofac/index.js index a7c23673..a71ec62b 100644 --- a/lib/ofac/index.js +++ b/lib/ofac/index.js @@ -1,7 +1,7 @@ const fs = require('fs') const path = require('path') const util = require('util') -const parser = require('./parsing') +const loader = require('./loading') const matcher = require('./matching') const nameUtils = require('./name-utils') const options = require('../options') @@ -9,7 +9,7 @@ const _ = require('lodash/fp') const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove -const OFAC_DATA_DIR = options.ofacDataDir +const OFAC_SOURCES_DIR = path.join(options.ofacDataDir, 'sources') let structs = null @@ -18,15 +18,15 @@ const readdir = util.promisify(fs.readdir) function load () { // NOTE: Not sure how you push code updates to existing clients. This problem // might pop up if new code is pushed, without re-doing setup. - if (!OFAC_DATA_DIR) { + if (!OFAC_SOURCES_DIR) { const message = 'The ofacDataDir option has not been set in lamassu.json' return Promise.reject(new Error(message)) } - return readdir(OFAC_DATA_DIR) + return readdir(OFAC_SOURCES_DIR) .then(_.flow( - _.map(file => path.join(OFAC_DATA_DIR, file)), - parser.parse + _.map(file => path.join(OFAC_SOURCES_DIR, file)), + loader.load )) .then(result => { return (structs = result) @@ -42,7 +42,8 @@ function makeCompatible (nameParts) { return _.map(_.zipObject(['partName', 'value']), props) } -function match (nameParts, birthDateString, threshold) { +function match (nameParts, birthDateString, options) { + const {debug} = options if (!structs) { const message = 'The OFAC data sources have not been loaded yet.' return Promise.reject(new Error(message)) @@ -68,10 +69,10 @@ function match (nameParts, birthDateString, threshold) { ])(birthDateString) const candidate = {parts, fullName, words, birthDate} - // debug_log(candidate) + debug && debug_log(candidate) - const result = matcher.match(structs, candidate, threshold) - // debug_log(result) + const result = matcher.match(structs, candidate, options) + debug && debug_log(result) return result } diff --git a/lib/ofac/loading.js b/lib/ofac/loading.js new file mode 100644 index 00000000..84827a20 --- /dev/null +++ b/lib/ofac/loading.js @@ -0,0 +1,103 @@ +const fs = require('fs') +const ndjson = require('ndjson') +const _ = require('lodash/fp') + + +const mapAliases = _.curry((iteratee, individuals) => { + const mapIndividual = individual => { + const {id, aliases} = individual + return _.map(alias => iteratee(id, alias), aliases) + } + return _.flatMap(mapIndividual, individuals) +}) + + +const getPhoneticEntries = (individualId, alias) => { + const pairPhoneticsWithValues = word => { + const {value, phonetics} = word + const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id}) + return _.map(makeEntry, phonetics) + } + return _.flatMap(pairPhoneticsWithValues, alias.words) +} + +const producePhoneticMap = _.flow( + mapAliases(getPhoneticEntries), + _.flatten, + _.groupBy(_.get('phonetic')), + _.mapValues(_.flow( + _.map(_.get('aliasId')), + _.uniq + )), + _.toPairs, + entries => new Map(entries) +) + + +const getWords = (individualId, alias) => { + const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id}) + return _.map(pairWordsWithIds, alias.words) +} + +const produceWordList = _.flow( + mapAliases(getWords), + _.flatten, + _.groupBy(_.get('value')), + _.mapValues(_.map(_.get('aliasId'))), + _.toPairs, + _.map(_.zipObject(['value', 'aliasIds'])) +) + +const parseSource = source => { + const individuals = [] + + const readStream = fs.createReadStream(source) + const jsonStream = readStream.pipe(ndjson.parse()) + jsonStream.on('data', individual => { + individuals.push(individual) + }) + + return new Promise((resolve, reject) => { + jsonStream.on('error', reject) + jsonStream.on('end', () => { + resolve(individuals) + }) + }) +} + +const load = sources => Promise.all(_.map(parseSource, sources)) + .then(_.flow( + _.flatten, + _.compact, + _.uniqBy(_.get('id')), + individuals => { + + const individualsMap = _.flow( + _.groupBy(_.get('id')), + _.mapValues(_.first), + _.toPairs, + entries => new Map(entries) + )(individuals) + + const makeEntries = (individualId, alias) => [alias.id, alias] + const aliasesMap = new Map(mapAliases(makeEntries, individuals)) + + const getIdPairs = (individualId, alias) => [alias.id, individualId] + const idPairs = mapAliases(getIdPairs, individuals) + const aliasToIndividual = new Map(idPairs) + + const phoneticMap = producePhoneticMap(individuals) + const wordList = produceWordList(individuals) + + return { + individuals, + individualsMap, + aliasesMap, + aliasToIndividual, + phoneticMap, + wordList + } + } + )) + +module.exports = {load} diff --git a/lib/ofac/matching.js b/lib/ofac/matching.js index bcf5023e..712b2c98 100644 --- a/lib/ofac/matching.js +++ b/lib/ofac/matching.js @@ -28,7 +28,8 @@ const isBornTooLongSince = _.curry((days, dateObject, individual) => { // algorithm -function match (structs, candidate, threshold) { +function match (structs, candidate, options) { + const {threshold, ratio = 0.1, debug, verboseFor} = options const {fullName, words, birthDate} = candidate // Accept aliases who's full name matches. @@ -44,42 +45,57 @@ function match (structs, candidate, threshold) { )(aliases) - const aliasIds = [] - const phoneticWeight = 0.17 + const aliasIdCounts = new Map() + const phoneticWeight = ratio const stringWeight = 1 - phoneticWeight for (const word of words) { const getPhonetic = phonetic => structs.phoneticMap.get(phonetic) const phoneticMatches = new Set(_.flatMap(getPhonetic, word.phonetics)) + const aliasIds = new Set() + for (const wordEntry of structs.wordList) { const stringScore = stringSimilarity(word.value, wordEntry.value) - if (stringWeight * stringScore + phoneticWeight < threshold) continue + const verbose = _.includes(wordEntry.value, verboseFor) + + if (!verbose && stringWeight * stringScore + phoneticWeight < threshold) continue for (const aliasId of wordEntry.aliasIds) { - const phoneticScore = phoneticMatches.has(aliasId) ? 1 : 0 - const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore + const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1 + // const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore + const finalScore = stringScore + phoneticWeight * phoneticScore + + verbose && console.log(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value) if (finalScore >= threshold) { - aliasIds.push(aliasId) + aliasIds.add(aliasId) } } } + + verboseFor && console.log(aliasIds) + + for (const aliasId of aliasIds.values()) { + const count = aliasIdCounts.get(aliasId) || 0 + aliasIdCounts.set(aliasId, count + 1) + } } - const aliasIdsFromNamePart = _.flow( - _.countBy(_.identity), - _.toPairs, - _.reject(_.flow( - _.last, - _.gt(2) - )), - _.map(_.first) - )(aliasIds) + verboseFor && console.log(aliasIdCounts) - // debug_log(aliasIdsFromFullName) - // debug_log(aliasIdsFromNamePart) + const aliasIdsFromNamePart = [] + + for (const [aliasId, count] of aliasIdCounts) { + const {length} = structs.aliasesMap.get(aliasId).words + if (count >= _.min([2, words.length, length])) { + aliasIdsFromNamePart.push(aliasId) + } + } + + debug && debug_log(aliasIdsFromFullName) + debug && debug_log(aliasIdsFromNamePart) // Get the full record for each matched id const getIndividual = aliasId => { diff --git a/lib/ofac/parsing.js b/lib/ofac/parsing.js index dc987738..65c97d87 100644 --- a/lib/ofac/parsing.js +++ b/lib/ofac/parsing.js @@ -139,111 +139,29 @@ function processProfile (profileNode) { return individual } -function promiseParseDocument (source) { - return new Promise((resolve, reject) => { - const stream = fs.createReadStream(source) - const xml = new XmlStream(stream) +const parse = (source, callback) => { + const stream = fs.createReadStream(source) + const xml = new XmlStream(stream) - xml.on('error', err => { - xml.pause() - const message = `Error while parsing OFAC data source file (${source}): ${err.message}` - reject(new Error(message)) - }) + xml.on('error', err => { + xml.pause() + const message = `Error while parsing OFAC data source file (${source}): ${err.message}` + callback(new Error(message)) + }) - xml.collect('Alias') - xml.collect('DocumentedName') - xml.collect('DocumentedNamePart') - xml.collect('Feature') - xml.collect('MasterNamePartGroup') + xml.collect('Alias') + xml.collect('DocumentedName') + xml.collect('DocumentedNamePart') + xml.collect('Feature') + xml.collect('MasterNamePartGroup') - const individuals = [] + const forwardProfile = profile => profile && callback(null, profile) - const collectResult = result => result && individuals.push(result) - xml.on('updateElement: Profile', _.flow(processProfile, collectResult)) + xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile)) - xml.on('end', _.wrap(resolve, individuals)) + xml.on('end', () => { + callback(null, null) }) } -const mapAliases = _.curry((iteratee, individuals) => { - const mapIndividual = individual => { - const {id, aliases} = individual - return _.map(alias => iteratee(id, alias), aliases) - } - return _.flatMap(mapIndividual, individuals) -}) - - -const getPhoneticEntries = (individualId, alias) => { - const pairPhoneticsWithValues = word => { - const {value, phonetics} = word - const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id}) - return _.map(makeEntry, phonetics) - } - return _.flatMap(pairPhoneticsWithValues, alias.words) -} - -const producePhoneticMap = _.flow( - mapAliases(getPhoneticEntries), - _.flatten, - _.groupBy(_.get('phonetic')), - _.mapValues(_.flow( - _.map(_.get('aliasId')), - _.uniq - )), - _.toPairs, - entries => new Map(entries) -) - - -const getWords = (individualId, alias) => { - const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id}) - return _.map(pairWordsWithIds, alias.words) -} - -const produceWordList = _.flow( - mapAliases(getWords), - _.flatten, - _.groupBy(_.get('value')), - _.mapValues(_.map(_.get('aliasId'))), - _.toPairs, - _.map(_.zipObject(['value', 'aliasIds'])) -) - -function parse (sources) { - return Promise.all(_.map(promiseParseDocument, sources)) - .then(_.flow( - _.flatten, - _.compact, - _.uniqBy(_.get('id')), - individuals => { - const individualsMap = _.flow( - _.groupBy(_.get('id')), - _.mapValues(_.first), - _.toPairs, - entries => new Map(entries) - )(individuals) - - const makeEntries = (individualId, alias) => [alias.id, alias] - const aliasesMap = new Map(mapAliases(makeEntries, individuals)) - - const getIdPairs = (individualId, alias) => [alias.id, individualId] - const idPairs = mapAliases(getIdPairs, individuals) - const aliasToIndividual = new Map(idPairs) - - const phoneticMap = producePhoneticMap(individuals) - const wordList = produceWordList(individuals) - - return { - individuals, - individualsMap, - aliasesMap, - aliasToIndividual, - phoneticMap, - wordList - } - } - )) -} - module.exports = {parse} diff --git a/lib/ofac/update.js b/lib/ofac/update.js new file mode 100644 index 00000000..ad510418 --- /dev/null +++ b/lib/ofac/update.js @@ -0,0 +1,155 @@ +const parser = require('./parsing') +const https = require('https') +const url = require('url') +const fs = require('fs') +const path = require('path') +const util = require('util') +const options = require('../options') +const _ = require('lodash/fp') + +const OFAC_DATA_DIR = options.ofacDataDir +const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources') +const OFAC_SOURCES_FILE = path.join(OFAC_DATA_DIR, 'sources.json') +const OFAC_ETAGS_FILE = path.join(OFAC_DATA_DIR, 'etags.json') +const DOWNLOAD_DIR = path.resolve('/tmp') + + +const readFile = util.promisify(fs.readFile) +const writeFile = util.promisify(fs.writeFile) +const rename = util.promisify(fs.rename) +const unlink = util.promisify(fs.unlink) +const remove = file => { + console.log("remove", file) + return unlink(file) +} + + +const promiseGetEtag = (source) => { + return new Promise((resolve, reject) => { + const {url: sourceUrl} = source + const parsed = url.parse(sourceUrl) + const requestOptions = { + hostname: parsed.hostname, + path: parsed.path, + method: 'HEAD' + } + + const request = https.request(requestOptions, _.flow( + _.get(['headers', 'etag']), + resolve + )) + + request.on('error', reject) + + request.end() + }) +} + +const download = _.curry((dstDir, source) => { + console.log("download", source) + const {url: sourceUrl} = source + const fileName = path.basename(sourceUrl) + const dstFile = path.join(dstDir, fileName) + const file = fs.createWriteStream(dstFile) + + return new Promise((resolve, reject) => { + const request = https.get(sourceUrl, response => { + response.pipe(file); + file.on('finish', () => file.close(() => resolve(dstFile))) + }) + + request.on('error', reject) + }) +}) + +const parseToJson = srcFile => { + console.log("parseToJson", srcFile) + + const dstFile = srcFile.replace(/\.xml$/, '.json') + const writeStream = fs.createWriteStream(dstFile) + + return new Promise((resolve, reject) => { + parser.parse(srcFile, (err, profile) => { + console.log("callback", err, profile) + + if (err) { + reject(err) + return + } + + if (!profile) { + writeStream.end() + return + } + + const json = JSON.stringify(profile) + writeStream.write(json + '\n', 'utf-8') + }) + + writeStream.on('error', reject) + writeStream.on('finish', () => resolve(dstFile)) + }) +} + +const moveToSourcesDir = srcFile => { + console.log("moveToSourcesDir", srcFile) + const name = path.basename(srcFile) + const dstFile = path.join(OFAC_SOURCES_DIR, name) + return rename(srcFile, dstFile) +} + + +function update () { + const promiseOldEtags = readFile(OFAC_ETAGS_FILE, {encoding: 'utf-8'}) + .then(json => JSON.parse(json) || {}) + + const promiseNewEtags = readFile(OFAC_SOURCES_FILE, {encoding: 'utf-8'}) + .then(json => { + const obj = JSON.parse(json) + return obj ? obj.sources : [] + }) + .then(sources => Promise.all(_.map(promiseGetEtag, sources)) + .then(etags => _.map( + ([source, etag]) => ({...source, etag}), + _.zip(sources, etags) + )) + ) + + return Promise.all([promiseOldEtags, promiseNewEtags]) + .then(([oldEtags, newEtags]) => { + console.log("OLD", JSON.stringify(oldEtags, null, 4)) + console.log("NEW", JSON.stringify(newEtags, null, 4)) + + const hasNotChanged = ({name, etag}) => oldEtags[name] === etag + + const downloads = _.flow( + _.reject(hasNotChanged), + _.map(file => download(DOWNLOAD_DIR, file).then(parseToJson)) + )(newEtags) + + const oldFileNames = _.keys(oldEtags) + const newFileNames = _.map(_.get('name'), newEtags) + const missingFileNames = _.difference(oldFileNames, newFileNames) + const resolve = name => path.join(OFAC_SOURCES_DIR, name + '.json') + const missing = _.map(resolve, missingFileNames) + + const etagsJson = _.flow( + _.map(source => [source.name, source.etag]), + _.fromPairs, + obj => JSON.stringify(obj, null, 4) + )(newEtags) + + return Promise.all(downloads) + .then(parsed => { + console.log("finished", parsed) + + const moves = _.map(moveToSourcesDir, parsed) + const deletions = _.map(remove, missing) + const updateEtags = writeFile(OFAC_ETAGS_FILE, etagsJson) + + return Promise.all([updateEtags, ...moves, ...deletions]) + }) + }) +} + +module.exports = {update} diff --git a/package-lock.json b/package-lock.json index 62d8b0df..59a6e25e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5855,6 +5855,17 @@ "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", "dev": true }, + "ndjson": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/ndjson/-/ndjson-1.5.0.tgz", + "integrity": "sha1-rmA7NrE0vOw0e0UkIrC/mNWDLsg=", + "requires": { + "json-stringify-safe": "5.0.1", + "minimist": "1.2.0", + "split2": "2.2.0", + "through2": "2.0.3" + } + }, "negotiator": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz", @@ -7738,6 +7749,14 @@ "through": "2.3.8" } }, + "split2": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-2.2.0.tgz", + "integrity": "sha512-RAb22TG39LhI31MbreBgIuKiIKhVsawfTgEGqKHTK87aG+ul/PB8Sqoi3I7kVdRWiCfrKxK3uo4/YUkpNvhPbw==", + "requires": { + "through2": "2.0.3" + } + }, "sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", @@ -8003,7 +8022,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz", "integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=", - "dev": true, "requires": { "readable-stream": "2.3.3", "xtend": "4.0.1" diff --git a/package.json b/package.json index fb45262b..acc76cc5 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "minimist": "^1.2.0", "moment": "^2.17.0", "morgan": "^1.8.2", + "ndjson": "^1.5.0", "node-hkdf-sync": "^1.0.0", "node-mailjet": "^3.2.1", "numeral": "^2.0.3", diff --git a/tests/ofac/matching.js b/tests/ofac/matching.js index 5690c72b..9450f4fc 100644 --- a/tests/ofac/matching.js +++ b/tests/ofac/matching.js @@ -9,6 +9,10 @@ let fullNames const rand = N => _.random(0, N - 1) +const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0)) +const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u']) +const consonants = _.difference(letters, vowels) + const duplicate = (word, index) => { const c = word[index] return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)]) @@ -25,10 +29,10 @@ const transpose = (word, index) => { } const alter = (word, index) => { - const c = word.charCodeAt(index) - const o = c - 'a'.charCodeAt(0) - const oo = (o + _.random(1, 26)) % 26 - const cc = String.fromCharCode(oo + 'a'.charCodeAt(0)) + const o = word.charCodeAt(index) + const collection = _.includes(o, vowels) ? vowels : consonants + const oo = _.sample(collection) + const cc = String.fromCharCode(oo) return _.join('', [word.slice(0, index), cc, word.slice(index + 1)]) } @@ -54,15 +58,14 @@ const misspellRandomly = word => { const shiftVowel = word => { - const vowels = 'aeiou' - const indexedVowels = _.flow( _.get('length'), _.range(0), _.zip(_.split('', word)), _.map(_.zipObject(['letter', 'index'])), _.map(indexedLetter => { - const vowelIndex = _.indexOf(indexedLetter.letter, vowels) + const ord = indexedLetter.letter.charCodeAt(0) + const vowelIndex = _.indexOf(ord, vowels) return {...indexedLetter, vowelIndex} }), _.reject(_.flow( @@ -78,7 +81,8 @@ const shiftVowel = word => { : indexedVowel.vowelIndex === 4 ? [ -1 ] : [ -1, +1 ] const offset = _.sample(options) - const replacement = vowels[indexedVowel.vowelIndex + offset] + const replacementOrd = vowels[indexedVowel.vowelIndex + offset] + const replacement = String.fromCharCode(replacementOrd) const index = indexedVowel.index return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)]) @@ -110,6 +114,8 @@ const transcribe = word => { } } +const threshold = 0.85 + describe('OFAC', function () { describe('Matching', function () { @@ -130,7 +136,9 @@ describe('OFAC', function () { this.timeout(0) for (const fullName of fullNames) { - const matches = ofac.match({firstName: fullName}, null, 1) + const matches = ofac.match({firstName: fullName}, null, { + threshold,//: 1 + }) assert.ok(!_.isEmpty(matches)) } }) @@ -145,7 +153,9 @@ describe('OFAC', function () { _.join(' ') )(fullName) - const matches = ofac.match({firstName: reversed}, null, 1) + const matches = ofac.match({firstName: reversed}, null, { + threshold,//: 1 + }) assert.ok(!_.isEmpty(matches)) } }) @@ -162,13 +172,29 @@ describe('OFAC', function () { _.join(' ') )(fullName) - const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 0.85) - if (matchesA.length === 0) { console.log(1, fullName, '|', lightlyMisspelled) } - assert.ok(matchesA.length > 0) + const matchesA = ofac.match({firstName: lightlyMisspelled}, null, { + threshold,//: 0.875 + }) + if (_.isEmpty(matchesA)) { + console.log(fullName) + ofac.match({firstName: lightlyMisspelled}, null, { + threshold,//: 0.875, + debug: true + }) + } + assert.ok(!_.isEmpty(matchesA)) - const matchesB = ofac.match({firstName: heavilyMisspelled}, null, 0.75) - if (matchesB.length === 0) { console.log(2, fullName, '|', heavilyMisspelled) } - assert.ok(matchesB.length > 0) + const matchesB = ofac.match({firstName: heavilyMisspelled}, null, { + threshold: threshold - 0.1,//: 0.75 + }) + if (_.isEmpty(matchesB)) { + console.log(fullName) + ofac.match({firstName: heavilyMisspelled}, null, { + threshold: threshold - 0.1,//: 0.75, + debug: true + }) + } + assert.ok(!_.isEmpty(matchesB)) } }) @@ -183,8 +209,16 @@ describe('OFAC', function () { continue } - const matches = ofac.match({firstName: transcribed}, null, 0.85) - if (matches.length === 0) { console.log(fullName, '|', transcribed) } + const matches = ofac.match({firstName: transcribed}, null, { + threshold,//: 0.85 + }) + if (_.isEmpty(matches)) { + console.log(fullName) + ofac.match({firstName: transcribed}, null, { + threshold,//: 0.85, + debug: true + }) + } assert.ok(!_.isEmpty(matches)) } }) @@ -204,7 +238,9 @@ describe('OFAC', function () { )) for (const fullName of fullNames) { - const matches = ofac.match({firstName: fullName}, dateString, 1) + const matches = ofac.match({firstName: fullName}, dateString, { + threshold,//: 1 + }) assert.ok(noMatchesWithBirthDates(matches)) } }) @@ -228,16 +264,42 @@ describe('OFAC', function () { for (const lastName of lastNames.slice(0, 100)) { for (firstName of firstNamesMale.slice(0, 100)) { - const matches = ofac.match({firstName, lastName}, null, 0.85) + const matches = ofac.match({firstName, lastName}, null, { + threshold,//: 0.875 + }) + if (!_.isEmpty(matches)) { + ofac.match({firstName, lastName}, null, { + threshold,//: 0.875, + debug: true + }) + } assert.ok(_.isEmpty(matches)) } for (firstName of firstNamesFemale.slice(0, 100)) { - const matches = ofac.match({firstName, lastName}, null, 0.85) + const matches = ofac.match({firstName, lastName}, null, { + threshold,//: 0.875 + }) + if (!_.isEmpty(matches)) { + ofac.match({firstName, lastName}, null, { + threshold,//: 0.875, + debug: true + }) + } assert.ok(_.isEmpty(matches)) } } }) + + it.skip('test', function () { + const firstName = 'hian chariapaporn' + ofac.match({firstName}, null, { + threshold,//: 0.875, + debug: true, + verboseFor: ['hiran', 'chariapaporn'] + }) + }) + }) }) diff --git a/tests/ofac/parsing.js b/tests/ofac/parsing.js index 290de12a..982da6bb 100644 --- a/tests/ofac/parsing.js +++ b/tests/ofac/parsing.js @@ -134,148 +134,53 @@ const individualB = {id: '11', aliases: [{id: '15', } +const parseIndividuals = source => { + const individuals = [] + + return new Promise((resolve, reject) => { + parser.parse(source, (err, profile) => { + if (err) { + reject(err) + return + } + + if (!profile) { + resolve(individuals) + return + } + + individuals.push(profile) + }) + }) +} + + describe('OFAC', function () { describe('Parsing', function () { // To detect botched downloads it('should fail on malformed XML', function () { const xml = '' - return makeDataFiles([xml]).then(parser.parse) + return makeDataFiles([xml]) + .then(files => Promise.all(_.map(parseIndividuals, files))) .catch(error => { assert.ok(error instanceof Error) - return true + return 'failed' }) .then(ret => { - assert.equal(ret, true) + assert.equal(ret, 'failed') }) }) - it('should return the expected structs', function () { - const xml = makeXml([individualA]) + it('should return the expected individuals', function () { + const xml = makeXml([individualA, individualB]) - return makeDataFiles([xml]).then(parser.parse) - .then(structs => { - const {individuals} = structs - assert.ok(Array.isArray(individuals)) - assert.equal(individuals.length, 1) - assert.deepEqual(individuals[0], individualA) - - const {individualsMap} = structs - assert.ok(individualsMap instanceof Map) - assert.equal(individualsMap.size, 1) - assert.ok(individualsMap.has('9')) - assert.deepEqual(individualsMap.get('9'), individualA) - - const {aliasToIndividual} = structs - assert.ok(aliasToIndividual instanceof Map) - assert.equal(aliasToIndividual.size, 1) - assert.ok(aliasToIndividual.has('5')) - assert.strictEqual(aliasToIndividual.get('5'), '9') - - const {phoneticMap} = structs - assert.ok(phoneticMap instanceof Map) - assert.equal(phoneticMap.size, 3) - assert.ok(phoneticMap.has('JN')) - assert.deepEqual(phoneticMap.get('JN'), ['5']) - assert.ok(phoneticMap.has('AN')) - assert.deepEqual(phoneticMap.get('AN'), ['5']) - assert.ok(phoneticMap.has('T')) - assert.deepEqual(phoneticMap.get('T'), ['5']) - - const {wordList} = structs - assert.ok(Array.isArray(wordList)) - assert.equal(wordList.length, 2) - assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']}) - assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']}) - }) - }) - - it('should be able to combine multiple sources', function () { - const xmlA = makeXml([individualA]) - const xmlB = makeXml([individualB]) - - return makeDataFiles([xmlA, xmlB]).then(parser.parse) - .then(structs => { - const {individuals} = structs + return makeDataFiles([xml]) + .then(files => Promise.all(_.map(parseIndividuals, files))) + .then(([individuals]) => { assert.ok(Array.isArray(individuals)) assert.equal(individuals.length, 2) - assert.deepEqual(individuals[0], individualA) - assert.deepEqual(individuals[1], individualB) - - const {individualsMap} = structs - assert.ok(individualsMap instanceof Map) - assert.equal(individualsMap.size, 2) - assert.ok(individualsMap.has('9')) - assert.deepEqual(individualsMap.get('9'), individualA) - assert.ok(individualsMap.has('11')) - assert.deepEqual(individualsMap.get('11'), individualB) - - const {aliasToIndividual} = structs - assert.ok(aliasToIndividual instanceof Map) - assert.equal(aliasToIndividual.size, 2) - assert.ok(aliasToIndividual.has('5')) - assert.strictEqual(aliasToIndividual.get('5'), '9') - assert.ok(aliasToIndividual.has('15')) - assert.strictEqual(aliasToIndividual.get('15'), '11') - - const {phoneticMap} = structs - assert.ok(phoneticMap instanceof Map) - assert.equal(phoneticMap.size, 4) - assert.ok(phoneticMap.has('JN')) - assert.deepEqual(phoneticMap.get('JN'), ['5', '15']) - assert.ok(phoneticMap.has('AN')) - assert.deepEqual(phoneticMap.get('AN'), ['5', '15']) - assert.ok(phoneticMap.has('T')) - assert.deepEqual(phoneticMap.get('T'), ['5', '15']) - - const {wordList} = structs - assert.ok(Array.isArray(wordList)) - assert.equal(wordList.length, 4) - assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5', '15']}) - assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']}) - assert.deepEqual(wordList[2], {value: 'de', aliasIds: ['15']}) - assert.deepEqual(wordList[3], {value: 'gaul', aliasIds: ['15']}) - }) - }) - - it('should remove duplicates from multiple sources', function () { - const xmlA1 = makeXml([individualA, individualA]) - const xmlA2 = makeXml([individualA]) - - return makeDataFiles([xmlA1, xmlA2]).then(parser.parse) - .then(structs => { - const {individuals} = structs - assert.ok(Array.isArray(individuals)) - assert.equal(individuals.length, 1) - assert.deepEqual(individuals[0], individualA) - - const {individualsMap} = structs - assert.ok(individualsMap instanceof Map) - assert.equal(individualsMap.size, 1) - assert.ok(individualsMap.has('9')) - assert.deepEqual(individualsMap.get('9'), individualA) - - const {aliasToIndividual} = structs - assert.ok(aliasToIndividual instanceof Map) - assert.equal(aliasToIndividual.size, 1) - assert.ok(aliasToIndividual.has('5')) - assert.strictEqual(aliasToIndividual.get('5'), '9') - - const {phoneticMap} = structs - assert.ok(phoneticMap instanceof Map) - assert.equal(phoneticMap.size, 3) - assert.ok(phoneticMap.has('JN')) - assert.deepEqual(phoneticMap.get('JN'), ['5']) - assert.ok(phoneticMap.has('AN')) - assert.deepEqual(phoneticMap.get('AN'), ['5']) - assert.ok(phoneticMap.has('T')) - assert.deepEqual(phoneticMap.get('T'), ['5']) - - const {wordList} = structs - assert.ok(Array.isArray(wordList)) - assert.equal(wordList.length, 2) - assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']}) - assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']}) + assert.deepEqual(individuals, [individualA, individualB]) }) })