Parsing moved to downloading. Matching is being tweaked.

2018-03-15 20:36:34 +02:00 · 2018-03-15 20:36:34 +02:00 · b72f5549a5
commit b72f5549a5
parent 793db0f449
10 changed files with 456 additions and 276 deletions
--- a/bin/cert-gen.sh
+++ b/bin/cert-gen.sh
@ -65,7 +65,8 @@ openssl x509 \
 rm /tmp/Lamassu_OP.csr.pem
-mkdir -p $OFAC_DATA_DIR
+mkdir -p $OFAC_DATA_DIR/sources
 touch $OFAC_DATA_DIR/etags.json
 cat <<EOF > $CONFIG_DIR/lamassu.json
 {
--- a/lib/ofac/index.js
+++ b/lib/ofac/index.js
@ -1,7 +1,7 @@
 const fs = require('fs')
 const path = require('path')
 const util = require('util')
-const parser = require('./parsing')
+const loader = require('./loading')
 const matcher = require('./matching')
 const nameUtils = require('./name-utils')
 const options = require('../options')
@ -9,7 +9,7 @@ const _ = require('lodash/fp')
 const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
-const OFAC_DATA_DIR = options.ofacDataDir
+const OFAC_SOURCES_DIR = path.join(options.ofacDataDir, 'sources')
 let structs = null
@ -18,15 +18,15 @@ const readdir = util.promisify(fs.readdir)
 function load () {
  // NOTE: Not sure how you push code updates to existing clients. This problem
  // might pop up if new code is pushed, without re-doing setup.
-  if (!OFAC_DATA_DIR) {
+  if (!OFAC_SOURCES_DIR) {
    const message = 'The ofacDataDir option has not been set in lamassu.json'
    return Promise.reject(new Error(message))
  }
-  return readdir(OFAC_DATA_DIR)
+  return readdir(OFAC_SOURCES_DIR)
  .then(_.flow(
-    _.map(file => path.join(OFAC_DATA_DIR, file)),
+    _.map(file => path.join(OFAC_SOURCES_DIR, file)),
-    parser.parse
+    loader.load
  ))
  .then(result => {
    return (structs = result)
@ -42,7 +42,8 @@ function makeCompatible (nameParts) {
  return _.map(_.zipObject(['partName', 'value']), props)
 }
-function match (nameParts, birthDateString, threshold) {
+function match (nameParts, birthDateString, options) {
  const {debug} = options
  if (!structs) {
    const message = 'The OFAC data sources have not been loaded yet.'
    return Promise.reject(new Error(message))
@ -68,10 +69,10 @@ function match (nameParts, birthDateString, threshold) {
  ])(birthDateString)
  const candidate = {parts, fullName, words, birthDate}
-  // debug_log(candidate)
+  debug && debug_log(candidate)
-  const result = matcher.match(structs, candidate, threshold)
+  const result = matcher.match(structs, candidate, options)
-  // debug_log(result)
+  debug && debug_log(result)
  return result
 }
--- a/lib/ofac/loading.js
+++ b/lib/ofac/loading.js
@ -0,0 +1,103 @@
 const fs = require('fs')
 const ndjson = require('ndjson')
 const _ = require('lodash/fp')
 const mapAliases = _.curry((iteratee, individuals) => {
  const mapIndividual = individual => {
    const {id, aliases} = individual
    return _.map(alias => iteratee(id, alias), aliases)
  }
  return _.flatMap(mapIndividual, individuals)
 })
 const getPhoneticEntries = (individualId, alias) => {
  const pairPhoneticsWithValues = word => {
    const {value, phonetics} = word
    const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
    return _.map(makeEntry, phonetics)
  }
  return _.flatMap(pairPhoneticsWithValues, alias.words)
 }
 const producePhoneticMap = _.flow(
  mapAliases(getPhoneticEntries),
  _.flatten,
  _.groupBy(_.get('phonetic')),
  _.mapValues(_.flow(
    _.map(_.get('aliasId')),
    _.uniq
  )),
  _.toPairs,
  entries => new Map(entries)
 )
 const getWords = (individualId, alias) => {
  const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
  return _.map(pairWordsWithIds, alias.words)
 }
 const produceWordList = _.flow(
  mapAliases(getWords),
  _.flatten,
  _.groupBy(_.get('value')),
  _.mapValues(_.map(_.get('aliasId'))),
  _.toPairs,
  _.map(_.zipObject(['value', 'aliasIds']))
 )
 const parseSource = source => {
  const individuals = []
  const readStream = fs.createReadStream(source)
  const jsonStream = readStream.pipe(ndjson.parse())
  jsonStream.on('data', individual => {
    individuals.push(individual)
  })
  return new Promise((resolve, reject) => {
    jsonStream.on('error', reject)
    jsonStream.on('end', () => {
      resolve(individuals)
    })
  })
 }
 const load = sources => Promise.all(_.map(parseSource, sources))
  .then(_.flow(
    _.flatten,
    _.compact,
    _.uniqBy(_.get('id')),
    individuals => {
      const individualsMap = _.flow(
        _.groupBy(_.get('id')),
        _.mapValues(_.first),
        _.toPairs,
        entries => new Map(entries)
      )(individuals)
      const makeEntries = (individualId, alias) => [alias.id, alias]
      const aliasesMap = new Map(mapAliases(makeEntries, individuals))
      const getIdPairs = (individualId, alias) => [alias.id, individualId]
      const idPairs = mapAliases(getIdPairs, individuals)
      const aliasToIndividual = new Map(idPairs)
      const phoneticMap = producePhoneticMap(individuals)
      const wordList = produceWordList(individuals)
      return {
        individuals,
        individualsMap,
        aliasesMap,
        aliasToIndividual,
        phoneticMap,
        wordList
      }
    }
  ))
 module.exports = {load}
--- a/lib/ofac/matching.js
+++ b/lib/ofac/matching.js
@ -28,7 +28,8 @@ const isBornTooLongSince = _.curry((days, dateObject, individual) => {
 // algorithm
-function match (structs, candidate, threshold) {
+function match (structs, candidate, options) {
  const {threshold, ratio = 0.1, debug, verboseFor} = options
  const {fullName, words, birthDate} = candidate
  // Accept aliases who's full name matches.
@ -44,42 +45,57 @@ function match (structs, candidate, threshold) {
  )(aliases)
-  const aliasIds = []
+  const aliasIdCounts = new Map()
-  const phoneticWeight = 0.17
+  const phoneticWeight = ratio
  const stringWeight = 1 - phoneticWeight
  for (const word of words) {
    const getPhonetic = phonetic => structs.phoneticMap.get(phonetic)
    const phoneticMatches = new Set(_.flatMap(getPhonetic, word.phonetics))
    const aliasIds = new Set()
    for (const wordEntry of structs.wordList) {
      const stringScore = stringSimilarity(word.value, wordEntry.value)
-      if (stringWeight * stringScore + phoneticWeight < threshold) continue
+      const verbose = _.includes(wordEntry.value, verboseFor)
      if (!verbose && stringWeight * stringScore + phoneticWeight < threshold) continue
      for (const aliasId of wordEntry.aliasIds) {
-        const phoneticScore = phoneticMatches.has(aliasId) ? 1 : 0
+        const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1
-        const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
+        // const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
        const finalScore = stringScore + phoneticWeight * phoneticScore
        verbose && console.log(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value)
        if (finalScore >= threshold) {
-          aliasIds.push(aliasId)
+          aliasIds.add(aliasId)
        }
      }
    }
    verboseFor && console.log(aliasIds)
    for (const aliasId of aliasIds.values()) {
      const count = aliasIdCounts.get(aliasId) || 0
      aliasIdCounts.set(aliasId, count + 1)
    }
  }
-  const aliasIdsFromNamePart = _.flow(
+  verboseFor && console.log(aliasIdCounts)
    _.countBy(_.identity),
    _.toPairs,
    _.reject(_.flow(
      _.last,
      _.gt(2)
    )),
    _.map(_.first)
  )(aliasIds)
-  // debug_log(aliasIdsFromFullName)
+  const aliasIdsFromNamePart = []
-  // debug_log(aliasIdsFromNamePart)
+
  for (const [aliasId, count] of aliasIdCounts) {
    const {length} = structs.aliasesMap.get(aliasId).words
    if (count >= _.min([2, words.length, length])) {
      aliasIdsFromNamePart.push(aliasId)
    }
  }
  debug && debug_log(aliasIdsFromFullName)
  debug && debug_log(aliasIdsFromNamePart)
  // Get the full record for each matched id
  const getIndividual = aliasId => {
--- a/lib/ofac/parsing.js
+++ b/lib/ofac/parsing.js
@ -139,111 +139,29 @@ function processProfile (profileNode) {
  return individual
 }
-function promiseParseDocument (source) {
+const parse = (source, callback) => {
-  return new Promise((resolve, reject) => {
+  const stream = fs.createReadStream(source)
-    const stream = fs.createReadStream(source)
+  const xml = new XmlStream(stream)
    const xml = new XmlStream(stream)
-    xml.on('error', err => {
+  xml.on('error', err => {
-      xml.pause()
+    xml.pause()
-      const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
+    const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
-      reject(new Error(message))
+    callback(new Error(message))
-    })
+  })
-    xml.collect('Alias')
+  xml.collect('Alias')
-    xml.collect('DocumentedName')
+  xml.collect('DocumentedName')
-    xml.collect('DocumentedNamePart')
+  xml.collect('DocumentedNamePart')
-    xml.collect('Feature')
+  xml.collect('Feature')
-    xml.collect('MasterNamePartGroup')
+  xml.collect('MasterNamePartGroup')
-    const individuals = []
+  const forwardProfile = profile => profile && callback(null, profile)
-    const collectResult = result => result && individuals.push(result)
+  xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
    xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
-    xml.on('end', _.wrap(resolve, individuals))
+  xml.on('end', () => {
    callback(null, null)
  })
 }
 const mapAliases = _.curry((iteratee, individuals) => {
  const mapIndividual = individual => {
    const {id, aliases} = individual
    return _.map(alias => iteratee(id, alias), aliases)
  }
  return _.flatMap(mapIndividual, individuals)
 })
 const getPhoneticEntries = (individualId, alias) => {
  const pairPhoneticsWithValues = word => {
    const {value, phonetics} = word
    const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
    return _.map(makeEntry, phonetics)
  }
  return _.flatMap(pairPhoneticsWithValues, alias.words)
 }
 const producePhoneticMap = _.flow(
  mapAliases(getPhoneticEntries),
  _.flatten,
  _.groupBy(_.get('phonetic')),
  _.mapValues(_.flow(
    _.map(_.get('aliasId')),
    _.uniq
  )),
  _.toPairs,
  entries => new Map(entries)
 )
 const getWords = (individualId, alias) => {
  const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
  return _.map(pairWordsWithIds, alias.words)
 }
 const produceWordList = _.flow(
  mapAliases(getWords),
  _.flatten,
  _.groupBy(_.get('value')),
  _.mapValues(_.map(_.get('aliasId'))),
  _.toPairs,
  _.map(_.zipObject(['value', 'aliasIds']))
 )
 function parse (sources) {
  return Promise.all(_.map(promiseParseDocument, sources))
  .then(_.flow(
    _.flatten,
    _.compact,
    _.uniqBy(_.get('id')),
    individuals => {
      const individualsMap = _.flow(
        _.groupBy(_.get('id')),
        _.mapValues(_.first),
        _.toPairs,
        entries => new Map(entries)
      )(individuals)
      const makeEntries = (individualId, alias) => [alias.id, alias]
      const aliasesMap = new Map(mapAliases(makeEntries, individuals))
      const getIdPairs = (individualId, alias) => [alias.id, individualId]
      const idPairs = mapAliases(getIdPairs, individuals)
      const aliasToIndividual = new Map(idPairs)
      const phoneticMap = producePhoneticMap(individuals)
      const wordList = produceWordList(individuals)
      return {
        individuals,
        individualsMap,
        aliasesMap,
        aliasToIndividual,
        phoneticMap,
        wordList
      }
    }
  ))
 }
 module.exports = {parse}
--- a/lib/ofac/update.js
+++ b/lib/ofac/update.js
@ -0,0 +1,155 @@
 const parser = require('./parsing')
 const https = require('https')
 const url = require('url')
 const fs = require('fs')
 const path = require('path')
 const util = require('util')
 const options = require('../options')
 const _ = require('lodash/fp')
 const OFAC_DATA_DIR = options.ofacDataDir
 const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources')
 const OFAC_SOURCES_FILE = path.join(OFAC_DATA_DIR, 'sources.json')
 const OFAC_ETAGS_FILE = path.join(OFAC_DATA_DIR, 'etags.json')
 const DOWNLOAD_DIR = path.resolve('/tmp')
 const readFile = util.promisify(fs.readFile)
 const writeFile = util.promisify(fs.writeFile)
 const rename = util.promisify(fs.rename)
 const unlink = util.promisify(fs.unlink)
 const remove = file => {
  console.log("remove", file)
  return unlink(file)
 }
 const promiseGetEtag = (source) => {
  return new Promise((resolve, reject) => {
    const {url: sourceUrl} = source
    const parsed = url.parse(sourceUrl)
    const requestOptions = {
      hostname: parsed.hostname,
      path: parsed.path,
      method: 'HEAD'
    }
    const request = https.request(requestOptions, _.flow(
      _.get(['headers', 'etag']),
      resolve
    ))
    request.on('error', reject)
    request.end()
  })
 }
 const download = _.curry((dstDir, source) => {
  console.log("download", source)
  const {url: sourceUrl} = source
  const fileName = path.basename(sourceUrl)
  const dstFile = path.join(dstDir, fileName)
  const file = fs.createWriteStream(dstFile)
  return new Promise((resolve, reject) => {
    const request = https.get(sourceUrl, response => {
      response.pipe(file);
      file.on('finish', () => file.close(() => resolve(dstFile)))
    })
    request.on('error', reject)
  })
 })
 const parseToJson = srcFile => {
  console.log("parseToJson", srcFile)
  const dstFile = srcFile.replace(/\.xml$/, '.json')
  const writeStream = fs.createWriteStream(dstFile)
  return new Promise((resolve, reject) => {
    parser.parse(srcFile, (err, profile) => {
      console.log("callback", err, profile)
      if (err) {
        reject(err)
        return
      }
      if (!profile) {
        writeStream.end()
        return
      }
      const json = JSON.stringify(profile)
      writeStream.write(json + '\n', 'utf-8')
    })
    writeStream.on('error', reject)
    writeStream.on('finish', () => resolve(dstFile))
  })
 }
 const moveToSourcesDir = srcFile => {
  console.log("moveToSourcesDir", srcFile)
  const name = path.basename(srcFile)
  const dstFile = path.join(OFAC_SOURCES_DIR, name)
  return rename(srcFile, dstFile)
 }
 function update () {
  const promiseOldEtags = readFile(OFAC_ETAGS_FILE, {encoding: 'utf-8'})
    .then(json => JSON.parse(json) || {})
  const promiseNewEtags = readFile(OFAC_SOURCES_FILE, {encoding: 'utf-8'})
    .then(json => {
      const obj = JSON.parse(json)
      return obj ? obj.sources : []
    })
    .then(sources => Promise.all(_.map(promiseGetEtag, sources))
      .then(etags => _.map(
        ([source, etag]) => ({...source, etag}),
        _.zip(sources, etags)
      ))
    )
  return Promise.all([promiseOldEtags, promiseNewEtags])
  .then(([oldEtags, newEtags]) => {
    console.log("OLD", JSON.stringify(oldEtags, null, 4))
    console.log("NEW", JSON.stringify(newEtags, null, 4))
    const hasNotChanged = ({name, etag}) => oldEtags[name] === etag
    const downloads = _.flow(
      _.reject(hasNotChanged),
      _.map(file => download(DOWNLOAD_DIR, file).then(parseToJson))
    )(newEtags)
    const oldFileNames = _.keys(oldEtags)
    const newFileNames = _.map(_.get('name'), newEtags)
    const missingFileNames = _.difference(oldFileNames, newFileNames)
    const resolve = name => path.join(OFAC_SOURCES_DIR, name + '.json')
    const missing = _.map(resolve, missingFileNames)
    const etagsJson = _.flow(
      _.map(source => [source.name, source.etag]),
      _.fromPairs,
      obj => JSON.stringify(obj, null, 4)
    )(newEtags)
    return Promise.all(downloads)
    .then(parsed => {
      console.log("finished", parsed)
      const moves = _.map(moveToSourcesDir, parsed)
      const deletions = _.map(remove, missing)
      const updateEtags = writeFile(OFAC_ETAGS_FILE, etagsJson)
      return Promise.all([updateEtags, ...moves, ...deletions])
    })
  })
 }
 module.exports = {update}
--- a/package-lock.json
+++ b/package-lock.json
@ -5855,6 +5855,17 @@
      "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
      "dev": true
    },
    "ndjson": {
      "version": "1.5.0",
      "resolved": "https://registry.npmjs.org/ndjson/-/ndjson-1.5.0.tgz",
      "integrity": "sha1-rmA7NrE0vOw0e0UkIrC/mNWDLsg=",
      "requires": {
        "json-stringify-safe": "5.0.1",
        "minimist": "1.2.0",
        "split2": "2.2.0",
        "through2": "2.0.3"
      }
    },
    "negotiator": {
      "version": "0.6.1",
      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz",
@ -7738,6 +7749,14 @@
        "through": "2.3.8"
      }
    },
    "split2": {
      "version": "2.2.0",
      "resolved": "https://registry.npmjs.org/split2/-/split2-2.2.0.tgz",
      "integrity": "sha512-RAb22TG39LhI31MbreBgIuKiIKhVsawfTgEGqKHTK87aG+ul/PB8Sqoi3I7kVdRWiCfrKxK3uo4/YUkpNvhPbw==",
      "requires": {
        "through2": "2.0.3"
      }
    },
    "sprintf-js": {
      "version": "1.0.3",
      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
@ -8003,7 +8022,6 @@
      "version": "2.0.3",
      "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz",
      "integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=",
      "dev": true,
      "requires": {
        "readable-stream": "2.3.3",
        "xtend": "4.0.1"
--- a/package.json
+++ b/package.json
@ -38,6 +38,7 @@
    "minimist": "^1.2.0",
    "moment": "^2.17.0",
    "morgan": "^1.8.2",
    "ndjson": "^1.5.0",
    "node-hkdf-sync": "^1.0.0",
    "node-mailjet": "^3.2.1",
    "numeral": "^2.0.3",
--- a/tests/ofac/matching.js
+++ b/tests/ofac/matching.js
@ -9,6 +9,10 @@ let fullNames
 const rand = N => _.random(0, N - 1)
 const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0))
 const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u'])
 const consonants = _.difference(letters, vowels)
 const duplicate = (word, index) => {
  const c = word[index]
  return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
@ -25,10 +29,10 @@ const transpose = (word, index) => {
 }
 const alter = (word, index) => {
-  const c = word.charCodeAt(index)
+  const o = word.charCodeAt(index)
-  const o = c - 'a'.charCodeAt(0)
+  const collection = _.includes(o, vowels) ? vowels : consonants
-  const oo = (o + _.random(1, 26)) % 26
+  const oo = _.sample(collection)
-  const cc = String.fromCharCode(oo + 'a'.charCodeAt(0))
+  const cc = String.fromCharCode(oo)
  return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
 }
@ -54,15 +58,14 @@ const misspellRandomly = word => {
 const shiftVowel = word => {
  const vowels = 'aeiou'
  const indexedVowels = _.flow(
    _.get('length'),
    _.range(0),
    _.zip(_.split('', word)),
    _.map(_.zipObject(['letter', 'index'])),
    _.map(indexedLetter => {
-      const vowelIndex = _.indexOf(indexedLetter.letter, vowels)
+      const ord = indexedLetter.letter.charCodeAt(0)
      const vowelIndex = _.indexOf(ord, vowels)
      return {...indexedLetter, vowelIndex}
    }),
    _.reject(_.flow(
@ -78,7 +81,8 @@ const shiftVowel = word => {
                : indexedVowel.vowelIndex === 4 ? [ -1 ]
                : [ -1, +1 ]
  const offset = _.sample(options)
-  const replacement = vowels[indexedVowel.vowelIndex + offset]
+  const replacementOrd = vowels[indexedVowel.vowelIndex + offset]
  const replacement = String.fromCharCode(replacementOrd)
  const index = indexedVowel.index
  return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
@ -110,6 +114,8 @@ const transcribe = word => {
  }
 }
 const threshold = 0.85
 describe('OFAC', function () {
  describe('Matching', function () {
@ -130,7 +136,9 @@ describe('OFAC', function () {
      this.timeout(0)
      for (const fullName of fullNames) {
-        const matches = ofac.match({firstName: fullName}, null, 1)
+        const matches = ofac.match({firstName: fullName}, null, {
          threshold,//: 1
        })
        assert.ok(!_.isEmpty(matches))
      }
    })
@ -145,7 +153,9 @@ describe('OFAC', function () {
          _.join(' ')
        )(fullName)
-        const matches = ofac.match({firstName: reversed}, null, 1)
+        const matches = ofac.match({firstName: reversed}, null, {
          threshold,//: 1
        })
        assert.ok(!_.isEmpty(matches))
      }
    })
@ -162,13 +172,29 @@ describe('OFAC', function () {
          _.join(' ')
        )(fullName)
-        const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 0.85)
+        const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
-        if (matchesA.length === 0) { console.log(1, fullName, '|', lightlyMisspelled) }
+          threshold,//: 0.875
-        assert.ok(matchesA.length > 0)
+        })
        if (_.isEmpty(matchesA)) {
          console.log(fullName)
          ofac.match({firstName: lightlyMisspelled}, null, {
            threshold,//: 0.875,
            debug: true
          })
        }
        assert.ok(!_.isEmpty(matchesA))
-        const matchesB = ofac.match({firstName: heavilyMisspelled}, null, 0.75)
+        const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
-        if (matchesB.length === 0) { console.log(2, fullName, '|', heavilyMisspelled) }
+          threshold: threshold - 0.1,//: 0.75
-        assert.ok(matchesB.length > 0)
+        })
        if (_.isEmpty(matchesB)) {
          console.log(fullName)
          ofac.match({firstName: heavilyMisspelled}, null, {
            threshold: threshold - 0.1,//: 0.75,
            debug: true
          })
        }
        assert.ok(!_.isEmpty(matchesB))
      }
    })
@ -183,8 +209,16 @@ describe('OFAC', function () {
          continue
        }
-        const matches = ofac.match({firstName: transcribed}, null, 0.85)
+        const matches = ofac.match({firstName: transcribed}, null, {
-        if (matches.length === 0) { console.log(fullName, '|', transcribed) }
+          threshold,//: 0.85
        })
        if (_.isEmpty(matches)) {
          console.log(fullName)
          ofac.match({firstName: transcribed}, null, {
            threshold,//: 0.85,
            debug: true
          })
        }
        assert.ok(!_.isEmpty(matches))
      }
    })
@ -204,7 +238,9 @@ describe('OFAC', function () {
      ))
      for (const fullName of fullNames) {
-        const matches = ofac.match({firstName: fullName}, dateString, 1)
+        const matches = ofac.match({firstName: fullName}, dateString, {
          threshold,//: 1
        })
        assert.ok(noMatchesWithBirthDates(matches))
      }
    })
@ -228,16 +264,42 @@ describe('OFAC', function () {
      for (const lastName of lastNames.slice(0, 100)) {
        for (firstName of firstNamesMale.slice(0, 100)) {
-          const matches = ofac.match({firstName, lastName}, null, 0.85)
+          const matches = ofac.match({firstName, lastName}, null, {
            threshold,//: 0.875
          })
          if (!_.isEmpty(matches)) {
            ofac.match({firstName, lastName}, null, {
              threshold,//: 0.875,
              debug: true
            })
          }
          assert.ok(_.isEmpty(matches))
        }
        for (firstName of firstNamesFemale.slice(0, 100)) {
-          const matches = ofac.match({firstName, lastName}, null, 0.85)
+          const matches = ofac.match({firstName, lastName}, null, {
            threshold,//: 0.875
          })
          if (!_.isEmpty(matches)) {
            ofac.match({firstName, lastName}, null, {
              threshold,//: 0.875,
              debug: true
            })
          }
          assert.ok(_.isEmpty(matches))
        }
      }
    })
    it.skip('test', function () {
      const firstName = 'hian chariapaporn'
      ofac.match({firstName}, null, {
        threshold,//: 0.875,
        debug: true,
        verboseFor: ['hiran', 'chariapaporn']
      })
    })
  })
 })
--- a/tests/ofac/parsing.js
+++ b/tests/ofac/parsing.js
@ -134,148 +134,53 @@ const individualB = {id: '11', aliases: [{id: '15',
 }
 const parseIndividuals = source => {
  const individuals = []
  return new Promise((resolve, reject) => {
    parser.parse(source, (err, profile) => {
      if (err) {
        reject(err)
        return
      }
      if (!profile) {
        resolve(individuals)
        return
      }
      individuals.push(profile)
    })
  })
 }
 describe('OFAC', function () {
  describe('Parsing', function () {
    // To detect botched downloads
    it('should fail on malformed XML', function () {
      const xml = '<a><b></a>'
-      return makeDataFiles([xml]).then(parser.parse)
+      return makeDataFiles([xml])
      .then(files => Promise.all(_.map(parseIndividuals, files)))
      .catch(error => {
        assert.ok(error instanceof Error)
-        return true
+        return 'failed'
      })
      .then(ret => {
-        assert.equal(ret, true)
+        assert.equal(ret, 'failed')
      })
    })
-    it('should return the expected structs', function () {
+    it('should return the expected individuals', function () {
-      const xml = makeXml([individualA])
+      const xml = makeXml([individualA, individualB])
-      return makeDataFiles([xml]).then(parser.parse)
+      return makeDataFiles([xml])
-      .then(structs => {
+      .then(files => Promise.all(_.map(parseIndividuals, files)))
-        const {individuals} = structs
+      .then(([individuals]) => {
        assert.ok(Array.isArray(individuals))
        assert.equal(individuals.length, 1)
        assert.deepEqual(individuals[0], individualA)
        const {individualsMap} = structs
        assert.ok(individualsMap instanceof Map)
        assert.equal(individualsMap.size, 1)
        assert.ok(individualsMap.has('9'))
        assert.deepEqual(individualsMap.get('9'), individualA)
        const {aliasToIndividual} = structs
        assert.ok(aliasToIndividual instanceof Map)
        assert.equal(aliasToIndividual.size, 1)
        assert.ok(aliasToIndividual.has('5'))
        assert.strictEqual(aliasToIndividual.get('5'), '9')
        const {phoneticMap} = structs
        assert.ok(phoneticMap instanceof Map)
        assert.equal(phoneticMap.size, 3)
        assert.ok(phoneticMap.has('JN'))
        assert.deepEqual(phoneticMap.get('JN'), ['5'])
        assert.ok(phoneticMap.has('AN'))
        assert.deepEqual(phoneticMap.get('AN'), ['5'])
        assert.ok(phoneticMap.has('T'))
        assert.deepEqual(phoneticMap.get('T'), ['5'])
        const {wordList} = structs
        assert.ok(Array.isArray(wordList))
        assert.equal(wordList.length, 2)
        assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']})
        assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
      })
    })
    it('should be able to combine multiple sources', function () {
      const xmlA = makeXml([individualA])
      const xmlB = makeXml([individualB])
      return makeDataFiles([xmlA, xmlB]).then(parser.parse)
      .then(structs => {
        const {individuals} = structs
        assert.ok(Array.isArray(individuals))
        assert.equal(individuals.length, 2)
-        assert.deepEqual(individuals[0], individualA)
+        assert.deepEqual(individuals, [individualA, individualB])
        assert.deepEqual(individuals[1], individualB)
        const {individualsMap} = structs
        assert.ok(individualsMap instanceof Map)
        assert.equal(individualsMap.size, 2)
        assert.ok(individualsMap.has('9'))
        assert.deepEqual(individualsMap.get('9'), individualA)
        assert.ok(individualsMap.has('11'))
        assert.deepEqual(individualsMap.get('11'), individualB)
        const {aliasToIndividual} = structs
        assert.ok(aliasToIndividual instanceof Map)
        assert.equal(aliasToIndividual.size, 2)
        assert.ok(aliasToIndividual.has('5'))
        assert.strictEqual(aliasToIndividual.get('5'), '9')
        assert.ok(aliasToIndividual.has('15'))
        assert.strictEqual(aliasToIndividual.get('15'), '11')
        const {phoneticMap} = structs
        assert.ok(phoneticMap instanceof Map)
        assert.equal(phoneticMap.size, 4)
        assert.ok(phoneticMap.has('JN'))
        assert.deepEqual(phoneticMap.get('JN'), ['5', '15'])
        assert.ok(phoneticMap.has('AN'))
        assert.deepEqual(phoneticMap.get('AN'), ['5', '15'])
        assert.ok(phoneticMap.has('T'))
        assert.deepEqual(phoneticMap.get('T'), ['5', '15'])
        const {wordList} = structs
        assert.ok(Array.isArray(wordList))
        assert.equal(wordList.length, 4)
        assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5', '15']})
        assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
        assert.deepEqual(wordList[2], {value: 'de', aliasIds: ['15']})
        assert.deepEqual(wordList[3], {value: 'gaul', aliasIds: ['15']})
      })
    })
    it('should remove duplicates from multiple sources', function () {
      const xmlA1 = makeXml([individualA, individualA])
      const xmlA2 = makeXml([individualA])
      return makeDataFiles([xmlA1, xmlA2]).then(parser.parse)
      .then(structs => {
        const {individuals} = structs
        assert.ok(Array.isArray(individuals))
        assert.equal(individuals.length, 1)
        assert.deepEqual(individuals[0], individualA)
        const {individualsMap} = structs
        assert.ok(individualsMap instanceof Map)
        assert.equal(individualsMap.size, 1)
        assert.ok(individualsMap.has('9'))
        assert.deepEqual(individualsMap.get('9'), individualA)
        const {aliasToIndividual} = structs
        assert.ok(aliasToIndividual instanceof Map)
        assert.equal(aliasToIndividual.size, 1)
        assert.ok(aliasToIndividual.has('5'))
        assert.strictEqual(aliasToIndividual.get('5'), '9')
        const {phoneticMap} = structs
        assert.ok(phoneticMap instanceof Map)
        assert.equal(phoneticMap.size, 3)
        assert.ok(phoneticMap.has('JN'))
        assert.deepEqual(phoneticMap.get('JN'), ['5'])
        assert.ok(phoneticMap.has('AN'))
        assert.deepEqual(phoneticMap.get('AN'), ['5'])
        assert.ok(phoneticMap.has('T'))
        assert.deepEqual(phoneticMap.get('T'), ['5'])
        const {wordList} = structs
        assert.ok(Array.isArray(wordList))
        assert.equal(wordList.length, 2)
        assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']})
        assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
      })
    })