diff --git a/lib/ofac/data-parser.js b/lib/ofac/data-parser.js index bc77cca6..7a5a343c 100644 --- a/lib/ofac/data-parser.js +++ b/lib/ofac/data-parser.js @@ -7,7 +7,7 @@ const options = require('../options') const logger = require('../logger') const _ = require('lodash/fp') -const debug_log = require('./debug') // KOSTIS TODO: remove +const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove const OFAC_DATA_DIR = options.ofacDataDir @@ -71,12 +71,12 @@ const processAlias = _.curry((groupTypes, aliasNode) => { const nameParts = _.map(getNamePart, namePartNodes) const parts = _.assignAll(nameParts) - const fullName = nameUtils.fullNameFromParts(parts) + const fullNames = nameUtils.makeFullNames(parts) const phoneticParts = _.mapValues(nameUtils.phonetic, parts) - const phoneticFullName = nameUtils.phonetic(fullName) + const phoneticFullNames = _.map(nameUtils.phonetic, fullNames) - return {parts, fullName, phoneticParts, phoneticFullName} + return {parts, fullNames, phoneticParts, phoneticFullNames} }) // birth date @@ -116,7 +116,7 @@ function processProfile (profileNode) { const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup) const groupTypes = new Map(groupTypesEntries) - const mapCompact = _.flow(_.compact, _.map) + const mapCompact = _.flow(_.map, _.compact) const getNameParts = processAlias(groupTypes) const aliases = mapCompact(getNameParts, identityNode.Alias) diff --git a/lib/ofac/debug.js b/lib/ofac/debug.js deleted file mode 100644 index 93247731..00000000 --- a/lib/ofac/debug.js +++ /dev/null @@ -1 +0,0 @@ -module.exports = (...args) => console.log(require('util').inspect(args, {depth: null, colors: true})) diff --git a/lib/ofac/index.js b/lib/ofac/index.js index 4e698ecf..c7e5c171 100644 --- a/lib/ofac/index.js +++ b/lib/ofac/index.js @@ -1,27 +1,32 @@ -const dataParser = require('./data-paraser') +const dataParser = require('./data-parser') const nameUtils = require('./name-utils') const jaroWinkler = require('talisman/metrics/distance/jaro-winkler') const _ = require('lodash/fp') -const debug_log = require('./debug') // KOSTIS TODO: remove +const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove -let individuals = [] +let individuals = null function load () { - individuals = Array.from(dataParser.parseList()) + return dataParser.parseList() + .then(list => { + individuals = Array.from(list) + }) } // MATCHING +const mapMax = (iteratee, list) => _.max(_.map(iteratee, list)) + // birth date function isDateWithinSomeDaysOfPeriod (period, date, days) { const inMillisecs = 24 * 60 * 60 * 1000 - const startTime = period.from.date.getTime() - days * inMillisecs + const startTime = period.start.date.getTime() - days * inMillisecs const startDate = new Date(startTime) - const endTime = period.to.date.getTime() + days * inMillisecs + const endTime = period.end.date.getTime() + days * inMillisecs const endDate = new Date(endTime) return (startDate < date && date < endDate) @@ -33,52 +38,55 @@ function isBornTooLongSince (individual, dateObject, days) { return !_.some(isWithinSomeYears, individual.birthDatePeriods) } -// exact match +// string similarity -function calcExactMatchScore (candidateFullName) { - return function (alias) { - return jaroWinkler(alias.fullName, candidateFullName) - } -} +const stringMatch = _.curry(jaroWinkler) -// phonetic match +const bestMatchInList = _.curry((list, name) => mapMax(stringMatch(name), list)) -function calcPhoneticMatchScore (candidatePhoneticFullName) { - return function (alias) { - return jaroWinkler(alias.phoneticFullName, candidatePhoneticFullName) - } -} +const aliasStringMatch = _.curry((candidate, alias) => { + const matchWithCandidate = bestMatchInList(candidate.fullNames) + return mapMax(matchWithCandidate, alias.fullNames) +}) // algorithm -// NOTE: I'm still not 100% on what matching algorithm is the best choice. -// I just experiment with a few metrics for now. - const similarity = _.curry((candidate, individual) => { // Calculate if his birth date is within two years of the given date. // If an individual has multiple birth-date periods, return whether any are // within two years. Reject individuals who don't match this criterion. const twoYears = 365 * 2 - if (!isBornTooLongSince(individual, candidate.birthDate, twoYears)) return 0 + if (isBornTooLongSince(individual, candidate.birthDate, twoYears)) return 0 + + debug_log(individual) // Calculate the Jaro-Winkler similarity of the full name. // If an individual has multiple aliases, use the maximum score. - const exactMatchScore = _.max(_.map(calcExactMatchScore(candidate.fullName), individual.aliases)) + const scoreAgainstCandidate = aliasStringMatch(candidate) + const stringMatchScore = mapMax(scoreAgainstCandidate, individual.aliases) - // Calculate the Jaro-Winkler similarity of the phonetic representation of the full name. - // This should approximate the phonetic similarity of the two names. - // If an individual has multiple aliases, use the maximum score. - const phoneticMatchScore = _.max(_.map(calcPhoneticMatchScore(candidate.phoneticFullName), individual.aliases)) + // // Calculate the Jaro-Winkler similarity of the phonetic representation of the full name. + // // This should approximate the phonetic similarity of the two names. + // // If an individual has multiple aliases, use the maximum score. + // const phoneticMatchScore = mapMax(calcPhoneticMatchScore(candidate.phoneticFullName))(individual.aliases) - return _.max([exactMatchScore, phoneticMatchScore]) + console.log(stringMatchScore) + + return _.max([stringMatchScore]) }) -function match (parts, birthDateString) { +function match (nameParts, birthDateString) { + if (!individuals) { + const message = 'The OFAC data sources have not been loaded yet.' + return Promise.reject(new Error(message)) + } + // nameParts should be an object like {firstName: "John", lastName: "Doe", ...} - const fullName = nameUtils.fullNameFromParts(parts) + const parts = _.mapValues(_.lowerCase, nameParts) + const fullNames = nameUtils.makeFullNames(parts) const phoneticParts = _.mapValues(nameUtils.phonetic, parts) - const phoneticFullName = nameUtils.phonetic(fullName) + const phoneticFullNames = _.map(nameUtils.phonetic, fullNames) // birthDateString is in YYYYMMDD format const year = parseInt(birthDateString.slice(0, 4)) @@ -88,10 +96,12 @@ function match (parts, birthDateString) { const birthDate = {year, month, day, date} - const candidate = {parts, fullName, phoneticParts, phoneticFullName, birthDate} + const candidate = {parts, fullNames, phoneticParts, phoneticFullNames, birthDate} const similarToCandidate = similarity(candidate) - return _.max(similarToCandidate, individuals) + const result = mapMax(similarToCandidate, individuals) + debug_log(candidate) + return result } module.exports = {load, match} diff --git a/lib/ofac/name-utils.js b/lib/ofac/name-utils.js index 2da4683b..4bb0df35 100644 --- a/lib/ofac/name-utils.js +++ b/lib/ofac/name-utils.js @@ -12,14 +12,35 @@ const phoneticMethod3 = _.flow(_.split(' '), _.map(phoneticMethod2)) // Combine name-parts in a standard order. -const fullNameFromParts = _.flow( - _.toPairs, - _.sortBy(_.first), // sort by part name, - _.map(_.last), // get part value +const commonOrderings = [ + ['firstName', 'lastName'], + ['firstName', 'middleName', 'lastName'], + ['firstName', 'maidenName', 'lastName'], + ['firstName', 'patronymic', 'lastName'], + ['firstName', 'matronymic', 'lastName'] +] + +// const getFrom = _.flip() + +const getFrom = _.curry((obj, key) => obj[key]) + +const getOrderedParts = (parts, ordering) => _.map(getFrom(parts), ordering) + +const combineParts = _.curryN(2, _.flow( + getOrderedParts, + _.compact, _.join(' ') +)) + +const makeAllOrderings = parts => _.map(combineParts(parts), commonOrderings) + +const makeFullNames = _.flow( + makeAllOrderings, + _.uniq ) + module.exports = { - fullNameFromParts, + makeFullNames, phonetic: phoneticMethod3 }