diff --git a/lib/ofac/index.js b/lib/ofac/index.js index b0e7691d..8db51586 100644 --- a/lib/ofac/index.js +++ b/lib/ofac/index.js @@ -29,7 +29,7 @@ function load () { parser.parse )) .then(result => { - structs = result + return (structs = result) }) } @@ -58,12 +58,17 @@ function match (nameParts, birthDateString, threshold) { const wordPhonetics = _.flatMap(_.get('phonetics'), words) // birthDateString is in YYYYMMDD format - const year = parseInt(birthDateString.slice(0, 4)) - const month = parseInt(birthDateString.slice(4, 6)) - const day = parseInt(birthDateString.slice(6, 8)) - const date = new Date(year, month - 1, day) + const birthDate = _.cond([ + [_.identity, () => { + const year = parseInt(birthDateString.slice(0, 4)) + const month = parseInt(birthDateString.slice(4, 6)) + const day = parseInt(birthDateString.slice(6, 8)) + const date = new Date(year, month - 1, day) - const birthDate = {year, month, day, date} + return {year, month, day, date} + }], + [_.stubTrue, () => null] + ])(birthDateString) const candidate = {parts, fullName, wordValues, wordPhonetics, birthDate} // debug_log(candidate) diff --git a/lib/ofac/matching.js b/lib/ofac/matching.js index b2c7f622..1f600759 100644 --- a/lib/ofac/matching.js +++ b/lib/ofac/matching.js @@ -20,6 +20,7 @@ function isDateWithinSomeDaysOfPeriod (period, date, days) { } const isBornTooLongSince = _.curry((days, dateObject, individual) => { + if (!dateObject) return false if (_.isEmpty(individual.birthDatePeriods)) return false const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days]) return !_.some(isWithinSomeYears, individual.birthDatePeriods) diff --git a/lib/ofac/parsing.js b/lib/ofac/parsing.js index b2f410a4..913228c1 100644 --- a/lib/ofac/parsing.js +++ b/lib/ofac/parsing.js @@ -120,7 +120,7 @@ function processProfile (profileNode) { const birthDatePeriods = mapCompact(processFeature, profileNode.Feature) const individual = {id, aliases, birthDatePeriods} - debug_log(individual) + // debug_log(individual) return individual } diff --git a/tests/ofac/matching.js b/tests/ofac/matching.js index f73e82d4..25513ef9 100644 --- a/tests/ofac/matching.js +++ b/tests/ofac/matching.js @@ -1,10 +1,141 @@ const assert = require('assert') -const parser = require('../../lib/ofac/matching') +const ofac = require('../../lib/ofac') +const _ = require('lodash/fp') + +let structs +let fullNames + +const rand = N => _.random(0, N - 1) + +const duplicate = (word, index) => { + const c = word[index] + return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)]) +} + +const remove = (word, index) => { + return _.join('', [word.slice(0, index), word.slice(index + 1)]) +} + +const transpose = (word, index) => { + const a = word[index] + const b = word[index + 1] || ' ' + return _.join('', [word.slice(0, index), b, a, word.slice(index + 2)]) +} + +const alter = (word, index) => { + const c = word.charCodeAt(index) + const o = c - 'a'.charCodeAt(0) + const oo = (o + _.random(1, 26)) % 26 + const cc = String.fromCharCode(oo + 'a'.charCodeAt(0)) + return _.join('', [word.slice(0, index), cc, word.slice(index + 1)]) +} + +const misspellOps = [ + duplicate, + remove, + transpose, + alter +] + +const misspell = word => { + const len = word.length + const index = _.random(1, len) + const operation = _.sample(misspellOps) + return operation(word, index) +} + +const misspellRandomly = word => { + const len = word.length + const num = _.random(1, Math.sqrt(len)) + return _.flow(..._.times(() => misspell, num))(word) +} describe('OFAC', function () { describe('Matching', function () { - it('should ...') + before(function () { + this.timeout(60000) + return ofac.load() + .then(result => { + structs = result + const {individuals} = structs + fullNames = _.flow( + _.flatMap('aliases'), + _.map('fullName') + )(individuals) + }) + }) + + it('should match the exact full names of suspects', function () { + this.timeout(0) + + for (const fullName of fullNames) { + const matches = ofac.match({firstName: fullName}, null, 1) + assert.ok(matches.length > 0) + } + }) + + it('should match the permutated full names of suspects', function () { + this.timeout(0) + + for (const fullName of fullNames) { + const reversed = _.flow( + _.split(' '), + _.reverse, + _.join(' ') + )(fullName) + + const matches = ofac.match({firstName: reversed}, null, 1) + assert.ok(matches.length > 0) + } + }) + + it('should match despite some misspellings', function () { + this.timeout(0) + this.retries(4) + + for (const fullName of fullNames) { + const lightlyMisspelled = misspell(fullName) + + const heavilyMisspelled = _.flow( + _.split(' '), + _.map(misspell), + _.join(' ') + )(fullName) + + console.log(fullName, '|', lightlyMisspelled, '|', heavilyMisspelled) + + const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 0.95) + assert.ok(matchesA.length > 0) + + const matchesB = ofac.match({firstName: heavilyMisspelled}, null, 0.85) + assert.ok(matchesB.length > 0) + } + }) + + it('should match phonetically similar words', function () { + this.timeout(0) + this.retries(4) + + for (const fullName of fullNames) { + const lightlyMisspelled = misspell(fullName) + + const heavilyMisspelled = _.flow( + _.split(' '), + _.map(misspell), + _.join(' ') + )(fullName) + + console.log(fullName, '|', lightlyMisspelled, '|', heavilyMisspelled) + + const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 1) + assert.ok(matchesA.length > 0) + } + }) + + it('should match names that are only a single word') + + it('should discard matches with inapropriate birthdates') }) }) diff --git a/tests/ofac/parsing.js b/tests/ofac/parsing.js index a18fb873..eda58e4d 100644 --- a/tests/ofac/parsing.js +++ b/tests/ofac/parsing.js @@ -86,7 +86,6 @@ const makeBirthDate = birthDate => '' + ` const makeProfile = profile => { - console.log(profile.birthDatePeriods) return '' + `