lamassu-server/lib/ofac/index.js
2018-05-03 20:20:18 +03:00

140 lines
4.4 KiB
JavaScript

const dataParser = require('./data-parser')
const nameUtils = require('./name-utils')
const jaroWinkler = require('talisman/metrics/distance/jaro-winkler')
const _ = require('lodash/fp')
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
let structs = null
function load () {
return dataParser.produceStructs()
.then(result => {
structs = result
})
}
// MATCHING
const mapMax = (iteratee, list) => _.max(_.map(iteratee, list))
const allPairs = _.flow(
(aList, bList) => _.map(a => _.map(b => [a, b], bList), aList),
_.flatten
)
// birth date
function isDateWithinSomeDaysOfPeriod (period, date, days) {
const inMillisecs = 24 * 60 * 60 * 1000
const startTime = period.start.date.getTime() - days * inMillisecs
const startDate = new Date(startTime)
const endTime = period.end.date.getTime() + days * inMillisecs
const endDate = new Date(endTime)
return (startDate < date && date < endDate)
}
function isBornTooLongSince (individual, dateObject, days) {
if (_.isEmpty(individual.birthDatePeriods)) return false
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
}
// similarity algorithm
const stringSimilarity = _.curry(jaroWinkler)
const wordSimilarity = (a, b) => {
const phoneticPairs = allPairs(a.phonetic, b.phonetic)
const phoneticMatch = _.map(_.spread(_.isEqual), phoneticPairs)
if (_.some(_.identity, phoneticMatch)) return 1
return stringSimilarity(a.value, b.value)
}
const similarity = _.curry((candidate, individual) => {
// Calculate if his birth date is within two years of the given date.
// If an individual has multiple birth-date periods, return whether any are
// within two years. Reject individuals who don't match this criterion.
const twoYears = 365 * 2
if (isBornTooLongSince(individual, candidate.birthDate, twoYears)) return 0
debug_log(individual)
// Calculate the Jaro-Winkler similarity of the full name.
// If an individual has multiple aliases, use the maximum score.
const scoreCandidateFullName = _.flow(
_.get('fullName'),
stringSimilarity(candidate.fullName)
)
const stringMatchScore = mapMax(scoreCandidateFullName, individual.aliases)
//
const candidateWords = candidate.fullNameWords
const numCandidateWords = candidateWords.length
const scoreCandidateWords = alias => {
const tooManyWords = _.flow(
_.get(['words', 'length']),
_.lt(numCandidateWords)
)
const parts = _.reject(tooManyWords, alias.parts)
const scorePartAt = _.curry((part, offset) => {
const words = _.slice(offset, offset + part.words.length, candidateWords)
return _.min(_.map(_.spread(wordSimilarity), _.zip(words, part.words)))
})
const scorePart = part => {
const offsets = _.range(0, (numCandidateWords - part.words.length) + 1)
return mapMax(scorePartAt(part), offsets)
}
const scores = _.orderBy([], 'desc', _.map(scorePart, parts))
const thresholdIndex = _.min([2, scores.length]) - 1
return scores[thresholdIndex]
}
const wordMatchScore = mapMax(scoreCandidateWords, individual.aliases)
console.log(stringMatchScore, wordMatchScore)
return _.max([stringMatchScore, wordMatchScore])
})
// nameParts should be an object like {firstName: "John", lastName: "Doe", ...}
function makeCompatible (nameParts) {
const partNames = _.keys(nameParts)
const values = _.values(nameParts)
const props = _.zipAll([partNames, values])
return _.map(_.zipObject(['partName', 'value']), props)
}
function match (nameParts, birthDateString) {
if (!structs) {
const message = 'The OFAC data sources have not been loaded yet.'
return Promise.reject(new Error(message))
}
const parts = makeCompatible(nameParts)
const fullName = nameUtils.makeFullName(parts)
const fullNameWords = nameUtils.makeWords(fullName)
// birthDateString is in YYYYMMDD format
const year = parseInt(birthDateString.slice(0, 4))
const month = parseInt(birthDateString.slice(4, 6))
const day = parseInt(birthDateString.slice(6, 8))
const date = new Date(year, month - 1, day)
const birthDate = {year, month, day, date}
const candidate = {parts, fullName, fullNameWords, birthDate}
debug_log(candidate)
const similarToCandidate = similarity(candidate)
const result = mapMax(similarToCandidate, structs.individuals)
console.log(result)
return result
}
module.exports = {load, match}