lamassu-server/lib/ofac/matching.js
2018-05-03 20:20:18 +03:00

98 lines
3 KiB
JavaScript

const jaroWinkler = require('talisman/metrics/distance/jaro-winkler')
const _ = require('lodash/fp')
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
const stringSimilarity = _.curry(jaroWinkler)
// birth date
function isDateWithinSomeDaysOfPeriod (period, date, days) {
const inMillisecs = 24 * 60 * 60 * 1000
const startTime = period.start.date.getTime() - days * inMillisecs
const startDate = new Date(startTime)
const endTime = period.end.date.getTime() + days * inMillisecs
const endDate = new Date(endTime)
return (startDate < date && date < endDate)
}
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
if (_.isEmpty(individual.birthDatePeriods)) return false
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
})
// algorithm
function match (structs, candidate, threshold) {
const {fullName, wordPhonetics, wordValues, birthDate} = candidate
// Accept aliases who's full name matches.
const doesNameMatch = _.flow(
_.get('fullName'),
stringSimilarity(fullName),
_.lte(threshold)
)
const aliases = _.flatMap(_.get('aliases'), structs.individuals)
const aliasIdsFromFullName = _.flow(
_.filter(doesNameMatch),
_.map(_.get('id'))
)(aliases)
// Gather aliases who's name-parts match phonetically.
const getPhoneticMatches = phonetic => structs.phoneticMap.get(phonetic)
const phoneticMatches = _.flow(
_.map(getPhoneticMatches),
_.compact,
_.flatten
)(wordPhonetics)
// Gether aliases whose name-parts match alphabetically.
const getStringMatches = value => {
const entryMatches = entry => (jaroWinkler(value, entry.value) >= threshold)
return _.filter(entryMatches, structs.wordList)
}
const getSingleEntries = wordEntry => {
const makeEntry = aliasId => ({value: wordEntry.value, aliasId})
return _.map(makeEntry, wordEntry.aliasIds)
}
const stringMatches = _.flow(
_.flatMap(getStringMatches),
_.flatMap(getSingleEntries)
)(wordValues)
// At least two name-parts must match per alias
const adequateMatch = ([aliasId, count]) => {
const alias = structs.aliasesMap.get(aliasId)
return count >= Math.min(2, alias.words.length)
}
const aliasIdsFromNamePart = _.flow(
_.uniqWith(_.isEqual),
_.map(_.get('aliasId')),
_.countBy(_.identity),
_.toPairs,
_.filter(adequateMatch),
_.map(_.first)
)([...phoneticMatches, ...stringMatches])
// Get the full record for each matched id
const getIndividual = aliasId => {
const individualId = structs.aliasToIndividual.get(aliasId)
return structs.individualsMap.get(individualId)
}
const suspects = _.uniq(_.map(getIndividual, [
...aliasIdsFromFullName,
...aliasIdsFromNamePart
]))
// Reject everyone who is born two years away.
const twoYears = 365 * 2
const unqualified = isBornTooLongSince(twoYears, birthDate)
return _.reject(unqualified, suspects)
}
module.exports = {match}