Refactored for testing
This commit is contained in:
parent
1d0aff07fe
commit
2232340f6f
7 changed files with 214 additions and 145 deletions
94
lib/ofac/matching.js
Normal file
94
lib/ofac/matching.js
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
const jaroWinkler = require('talisman/metrics/distance/jaro-winkler')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
|
||||
|
||||
const stringSimilarity = _.curry(jaroWinkler)
|
||||
|
||||
// birth date
|
||||
|
||||
function isDateWithinSomeDaysOfPeriod (period, date, days) {
|
||||
const inMillisecs = 24 * 60 * 60 * 1000
|
||||
|
||||
const startTime = period.start.date.getTime() - days * inMillisecs
|
||||
const startDate = new Date(startTime)
|
||||
|
||||
const endTime = period.end.date.getTime() + days * inMillisecs
|
||||
const endDate = new Date(endTime)
|
||||
|
||||
return (startDate < date && date < endDate)
|
||||
}
|
||||
|
||||
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
|
||||
if (_.isEmpty(individual.birthDatePeriods)) return false
|
||||
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
|
||||
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
|
||||
})
|
||||
|
||||
// algorithm
|
||||
|
||||
function match (structs, candidate, threshold) {
|
||||
const {fullName, wordPhonetics, wordValues, birthDate} = candidate
|
||||
|
||||
// Accept aliases who's full name matches.
|
||||
const doesNameMatch = _.flow(
|
||||
_.get('fullName'),
|
||||
stringSimilarity(fullName),
|
||||
_.lte(threshold)
|
||||
)
|
||||
const aliases = _.flatMap(_.get('aliases'), structs.individuals)
|
||||
const aliasIdsFromFullName = _.flow(
|
||||
_.filter(doesNameMatch),
|
||||
|
||||
_.map(_.get('id'))
|
||||
)(aliases)
|
||||
|
||||
// Gather aliases who's name-parts match phonetically.
|
||||
const getPhoneticMatches = phonetic => structs.phoneticMap.get(phonetic)
|
||||
const phoneticMatches = _.flow(
|
||||
_.map(getPhoneticMatches),
|
||||
_.compact,
|
||||
_.flatten
|
||||
)(wordPhonetics)
|
||||
|
||||
// Gether aliases whose name-parts match alphabetically.
|
||||
const getStringMatches = value => {
|
||||
const entryMatches = entry => (jaroWinkler(value, entry.value) >= threshold)
|
||||
return _.filter(entryMatches, structs.wordList)
|
||||
}
|
||||
const getSingleEntries = wordEntry => {
|
||||
const makeEntry = aliasId => ({value: wordEntry.value, aliasId})
|
||||
return _.map(makeEntry, wordEntry.aliasIds)
|
||||
}
|
||||
const stringMatches = _.flow(
|
||||
_.flatMap(getStringMatches),
|
||||
_.flatMap(getSingleEntries)
|
||||
)(wordValues)
|
||||
|
||||
// At least two name-parts must match per alias
|
||||
const aliasIdsFromNamePart = _.flow(
|
||||
_.uniqWith(_.isEqual),
|
||||
_.map(_.get('aliasId')),
|
||||
_.countBy(_.identity),
|
||||
_.toPairs,
|
||||
_.filter(_.flow(_.last, _.lte(2))),
|
||||
_.map(_.first)
|
||||
)([...phoneticMatches, ...stringMatches])
|
||||
|
||||
// Get the full record for each matched id
|
||||
const getIndividual = aliasId => {
|
||||
const individualId = structs.aliasToIndividual.get(aliasId)
|
||||
return structs.individualsMap.get(individualId)
|
||||
}
|
||||
const suspects = _.uniq(_.map(getIndividual, [
|
||||
...aliasIdsFromFullName,
|
||||
...aliasIdsFromNamePart
|
||||
]))
|
||||
|
||||
// Reject everyone who is born two years away.
|
||||
const twoYears = 365 * 2
|
||||
const unqualified = isBornTooLongSince(twoYears, birthDate)
|
||||
return _.reject(unqualified, suspects)
|
||||
}
|
||||
|
||||
module.exports = {match}
|
||||
Loading…
Add table
Add a link
Reference in a new issue