This commit is contained in:
Konstantin Mamalakis 2018-03-19 14:32:38 +02:00 committed by Josh Harvey
parent 577a85c9b1
commit f7561acf3c
4 changed files with 120 additions and 71 deletions

View file

@ -29,6 +29,7 @@ const transpose = (word, index) => {
}
const alter = (word, index) => {
if (word[index] === ' ') return word
const o = word.charCodeAt(index)
const collection = _.includes(o, vowels) ? vowels : consonants
const oo = _.sample(collection)
@ -115,6 +116,7 @@ const transcribe = word => {
}
const threshold = 0.85
const fullNameThreshold = 0.95
describe('OFAC', function () {
describe('Matching', function () {
@ -132,18 +134,19 @@ describe('OFAC', function () {
})
})
it('should match the exact full names of suspects', function () {
it.skip('should match the exact full names of suspects', function () {
this.timeout(0)
for (const fullName of fullNames) {
const matches = ofac.match({firstName: fullName}, null, {
threshold,//: 1
threshold,
fullNameThreshold,
})
assert.ok(!_.isEmpty(matches))
}
})
it('should match the permutated full names of suspects', function () {
it.skip('should match the permutated full names of suspects', function () {
this.timeout(0)
for (const fullName of fullNames) {
@ -154,7 +157,8 @@ describe('OFAC', function () {
)(fullName)
const matches = ofac.match({firstName: reversed}, null, {
threshold,//: 1
threshold,
fullNameThreshold,
})
assert.ok(!_.isEmpty(matches))
}
@ -163,6 +167,9 @@ describe('OFAC', function () {
it('should match despite some misspellings', function () {
this.timeout(0)
let countMatches = 0
const failures = []
for (const fullName of fullNames) {
const lightlyMisspelled = misspell(fullName)
@ -173,54 +180,80 @@ describe('OFAC', function () {
)(fullName)
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
threshold,//: 0.875
threshold,
fullNameThreshold,
})
if (_.isEmpty(matchesA)) {
console.log(fullName)
ofac.match({firstName: lightlyMisspelled}, null, {
threshold,//: 0.875,
debug: true
})
if (!_.isEmpty(matchesA)) {
countMatches += 1
}
else {
failures.push({fullName, misspelled: lightlyMisspelled})
}
assert.ok(!_.isEmpty(matchesA))
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
threshold: threshold - 0.1,//: 0.75
})
if (_.isEmpty(matchesB)) {
console.log(fullName)
ofac.match({firstName: heavilyMisspelled}, null, {
threshold: threshold - 0.1,//: 0.75,
debug: true
})
if (!_.isEmpty(matchesB)) {
countMatches += 1
}
else {
failures.push({fullName, heavy: true, misspelled: heavilyMisspelled})
}
assert.ok(!_.isEmpty(matchesB))
}
for (const failure of failures) {
const {fullName, heavy, misspelled} = failure
console.log("Original:", fullName)
ofac.match({firstName: misspelled}, null, {
threshold: threshold + (heavy ? -0.1 : 0),
debug: true
})
}
assert.equal(countMatches, fullNames.length * 2)
})
it('should match phonetically similar words', function () {
this.timeout(0)
let countMatches = 0
const failures = []
for (const fullName of fullNames) {
const transcribed = transcribe(fullName)
if (!transcribed) {
console.warn(`Couldn't find an appropriate phonetic alteration for '${fullName}'`)
countMatches += 1
continue
}
const matches = ofac.match({firstName: transcribed}, null, {
threshold,//: 0.85
threshold,
fullNameThreshold,
})
if (_.isEmpty(matches)) {
console.log(fullName)
ofac.match({firstName: transcribed}, null, {
threshold,//: 0.85,
debug: true
})
if (!_.isEmpty(matches)) {
countMatches += 1
}
else {
failures.push({fullName, misspelled: transcribed})
}
assert.ok(!_.isEmpty(matches))
}
for (const failure of failures) {
const {fullName, misspelled} = failure
console.log("Original:", fullName)
ofac.match({firstName: misspelled}, null, {
threshold,
fullNameThreshold,
debug: true
})
}
assert.equal(countMatches, fullNames.length)
})
it('should discard matches with inapropriate birthdates', function () {
@ -239,7 +272,8 @@ describe('OFAC', function () {
for (const fullName of fullNames) {
const matches = ofac.match({firstName: fullName}, dateString, {
threshold,//: 1
threshold,
fullNameThreshold,
})
assert.ok(noMatchesWithBirthDates(matches))
}
@ -262,44 +296,67 @@ describe('OFAC', function () {
const firstNamesMale = getNamesFromFile('dist.male.first.txt')
const firstNamesFemale = getNamesFromFile('dist.female.first.txt')
let countMatches = 0
const failures = []
for (const lastName of lastNames.slice(0, 100)) {
for (firstName of firstNamesMale.slice(0, 100)) {
const matches = ofac.match({firstName, lastName}, null, {
threshold,//: 0.875
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matches)) {
ofac.match({firstName, lastName}, null, {
threshold,//: 0.875,
debug: true
})
countMatches += 1
failures.push({firstName, lastName})
}
assert.ok(_.isEmpty(matches))
}
for (firstName of firstNamesFemale.slice(0, 100)) {
const matches = ofac.match({firstName, lastName}, null, {
threshold,//: 0.875
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matches)) {
ofac.match({firstName, lastName}, null, {
threshold,//: 0.875,
debug: true
})
countMatches += 1
failures.push({firstName, lastName})
}
assert.ok(_.isEmpty(matches))
}
}
for (const failure of failures) {
ofac.match(failure, null, {
threshold,
fullNameThreshold,
debug: true
})
}
assert.equal(countMatches, 0)
})
it.skip('test', function () {
const firstName = 'hian chariapaporn'
ofac.match({firstName}, null, {
threshold,//: 0.875,
threshold,
fullNameThreshold,
debug: true,
verboseFor: ['hiran', 'chariapaporn']
})
})
it.skip('test', function () {
const firstName = 'janice smith'
ofac.match({firstName}, null, {
threshold,
fullNameThreshold,
debug: true,
verboseFor: ['samih', 'anis']
})
})
})
})