Some matching tests
This commit is contained in:
parent
f8ec2be6e8
commit
e7193e3948
5 changed files with 146 additions and 10 deletions
|
|
@ -29,7 +29,7 @@ function load () {
|
||||||
parser.parse
|
parser.parse
|
||||||
))
|
))
|
||||||
.then(result => {
|
.then(result => {
|
||||||
structs = result
|
return (structs = result)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -58,12 +58,17 @@ function match (nameParts, birthDateString, threshold) {
|
||||||
const wordPhonetics = _.flatMap(_.get('phonetics'), words)
|
const wordPhonetics = _.flatMap(_.get('phonetics'), words)
|
||||||
|
|
||||||
// birthDateString is in YYYYMMDD format
|
// birthDateString is in YYYYMMDD format
|
||||||
const year = parseInt(birthDateString.slice(0, 4))
|
const birthDate = _.cond([
|
||||||
const month = parseInt(birthDateString.slice(4, 6))
|
[_.identity, () => {
|
||||||
const day = parseInt(birthDateString.slice(6, 8))
|
const year = parseInt(birthDateString.slice(0, 4))
|
||||||
const date = new Date(year, month - 1, day)
|
const month = parseInt(birthDateString.slice(4, 6))
|
||||||
|
const day = parseInt(birthDateString.slice(6, 8))
|
||||||
|
const date = new Date(year, month - 1, day)
|
||||||
|
|
||||||
const birthDate = {year, month, day, date}
|
return {year, month, day, date}
|
||||||
|
}],
|
||||||
|
[_.stubTrue, () => null]
|
||||||
|
])(birthDateString)
|
||||||
|
|
||||||
const candidate = {parts, fullName, wordValues, wordPhonetics, birthDate}
|
const candidate = {parts, fullName, wordValues, wordPhonetics, birthDate}
|
||||||
// debug_log(candidate)
|
// debug_log(candidate)
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ function isDateWithinSomeDaysOfPeriod (period, date, days) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
|
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
|
||||||
|
if (!dateObject) return false
|
||||||
if (_.isEmpty(individual.birthDatePeriods)) return false
|
if (_.isEmpty(individual.birthDatePeriods)) return false
|
||||||
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
|
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
|
||||||
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
|
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ function processProfile (profileNode) {
|
||||||
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
|
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
|
||||||
const individual = {id, aliases, birthDatePeriods}
|
const individual = {id, aliases, birthDatePeriods}
|
||||||
|
|
||||||
debug_log(individual)
|
// debug_log(individual)
|
||||||
|
|
||||||
return individual
|
return individual
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,141 @@
|
||||||
const assert = require('assert')
|
const assert = require('assert')
|
||||||
const parser = require('../../lib/ofac/matching')
|
const ofac = require('../../lib/ofac')
|
||||||
|
const _ = require('lodash/fp')
|
||||||
|
|
||||||
|
let structs
|
||||||
|
let fullNames
|
||||||
|
|
||||||
|
const rand = N => _.random(0, N - 1)
|
||||||
|
|
||||||
|
const duplicate = (word, index) => {
|
||||||
|
const c = word[index]
|
||||||
|
return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
|
||||||
|
}
|
||||||
|
|
||||||
|
const remove = (word, index) => {
|
||||||
|
return _.join('', [word.slice(0, index), word.slice(index + 1)])
|
||||||
|
}
|
||||||
|
|
||||||
|
const transpose = (word, index) => {
|
||||||
|
const a = word[index]
|
||||||
|
const b = word[index + 1] || ' '
|
||||||
|
return _.join('', [word.slice(0, index), b, a, word.slice(index + 2)])
|
||||||
|
}
|
||||||
|
|
||||||
|
const alter = (word, index) => {
|
||||||
|
const c = word.charCodeAt(index)
|
||||||
|
const o = c - 'a'.charCodeAt(0)
|
||||||
|
const oo = (o + _.random(1, 26)) % 26
|
||||||
|
const cc = String.fromCharCode(oo + 'a'.charCodeAt(0))
|
||||||
|
return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
|
||||||
|
}
|
||||||
|
|
||||||
|
const misspellOps = [
|
||||||
|
duplicate,
|
||||||
|
remove,
|
||||||
|
transpose,
|
||||||
|
alter
|
||||||
|
]
|
||||||
|
|
||||||
|
const misspell = word => {
|
||||||
|
const len = word.length
|
||||||
|
const index = _.random(1, len)
|
||||||
|
const operation = _.sample(misspellOps)
|
||||||
|
return operation(word, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
const misspellRandomly = word => {
|
||||||
|
const len = word.length
|
||||||
|
const num = _.random(1, Math.sqrt(len))
|
||||||
|
return _.flow(..._.times(() => misspell, num))(word)
|
||||||
|
}
|
||||||
|
|
||||||
describe('OFAC', function () {
|
describe('OFAC', function () {
|
||||||
describe('Matching', function () {
|
describe('Matching', function () {
|
||||||
|
|
||||||
it('should ...')
|
before(function () {
|
||||||
|
this.timeout(60000)
|
||||||
|
return ofac.load()
|
||||||
|
.then(result => {
|
||||||
|
structs = result
|
||||||
|
const {individuals} = structs
|
||||||
|
fullNames = _.flow(
|
||||||
|
_.flatMap('aliases'),
|
||||||
|
_.map('fullName')
|
||||||
|
)(individuals)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should match the exact full names of suspects', function () {
|
||||||
|
this.timeout(0)
|
||||||
|
|
||||||
|
for (const fullName of fullNames) {
|
||||||
|
const matches = ofac.match({firstName: fullName}, null, 1)
|
||||||
|
assert.ok(matches.length > 0)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should match the permutated full names of suspects', function () {
|
||||||
|
this.timeout(0)
|
||||||
|
|
||||||
|
for (const fullName of fullNames) {
|
||||||
|
const reversed = _.flow(
|
||||||
|
_.split(' '),
|
||||||
|
_.reverse,
|
||||||
|
_.join(' ')
|
||||||
|
)(fullName)
|
||||||
|
|
||||||
|
const matches = ofac.match({firstName: reversed}, null, 1)
|
||||||
|
assert.ok(matches.length > 0)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should match despite some misspellings', function () {
|
||||||
|
this.timeout(0)
|
||||||
|
this.retries(4)
|
||||||
|
|
||||||
|
for (const fullName of fullNames) {
|
||||||
|
const lightlyMisspelled = misspell(fullName)
|
||||||
|
|
||||||
|
const heavilyMisspelled = _.flow(
|
||||||
|
_.split(' '),
|
||||||
|
_.map(misspell),
|
||||||
|
_.join(' ')
|
||||||
|
)(fullName)
|
||||||
|
|
||||||
|
console.log(fullName, '|', lightlyMisspelled, '|', heavilyMisspelled)
|
||||||
|
|
||||||
|
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 0.95)
|
||||||
|
assert.ok(matchesA.length > 0)
|
||||||
|
|
||||||
|
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, 0.85)
|
||||||
|
assert.ok(matchesB.length > 0)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should match phonetically similar words', function () {
|
||||||
|
this.timeout(0)
|
||||||
|
this.retries(4)
|
||||||
|
|
||||||
|
for (const fullName of fullNames) {
|
||||||
|
const lightlyMisspelled = misspell(fullName)
|
||||||
|
|
||||||
|
const heavilyMisspelled = _.flow(
|
||||||
|
_.split(' '),
|
||||||
|
_.map(misspell),
|
||||||
|
_.join(' ')
|
||||||
|
)(fullName)
|
||||||
|
|
||||||
|
console.log(fullName, '|', lightlyMisspelled, '|', heavilyMisspelled)
|
||||||
|
|
||||||
|
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 1)
|
||||||
|
assert.ok(matchesA.length > 0)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should match names that are only a single word')
|
||||||
|
|
||||||
|
it('should discard matches with inapropriate birthdates')
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,6 @@ const makeBirthDate = birthDate => '' +
|
||||||
</Feature>`
|
</Feature>`
|
||||||
|
|
||||||
const makeProfile = profile => {
|
const makeProfile = profile => {
|
||||||
console.log(profile.birthDatePeriods)
|
|
||||||
return '' +
|
return '' +
|
||||||
` <Profile ID="${profile.id}" PartySubTypeID="4">
|
` <Profile ID="${profile.id}" PartySubTypeID="4">
|
||||||
<Identity>
|
<Identity>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue