chore: use monorepo organization
This commit is contained in:
parent
deaf7d6ecc
commit
a687827f7e
1099 changed files with 8184 additions and 11535 deletions
88799
packages/server/tests/ofac/dist.all.last.txt
Normal file
88799
packages/server/tests/ofac/dist.all.last.txt
Normal file
File diff suppressed because it is too large
Load diff
4275
packages/server/tests/ofac/dist.female.first.txt
Normal file
4275
packages/server/tests/ofac/dist.female.first.txt
Normal file
File diff suppressed because it is too large
Load diff
1219
packages/server/tests/ofac/dist.male.first.txt
Normal file
1219
packages/server/tests/ofac/dist.male.first.txt
Normal file
File diff suppressed because it is too large
Load diff
362
packages/server/tests/ofac/matching.js
Normal file
362
packages/server/tests/ofac/matching.js
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
const assert = require('assert')
|
||||
const ofac = require('../../lib/ofac')
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
let structs
|
||||
let fullNames
|
||||
|
||||
const rand = N => _.random(0, N - 1)
|
||||
|
||||
const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0))
|
||||
const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u'])
|
||||
const consonants = _.difference(letters, vowels)
|
||||
|
||||
const duplicate = (word, index) => {
|
||||
const c = word[index]
|
||||
return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
|
||||
}
|
||||
|
||||
const remove = (word, index) => {
|
||||
return _.join('', [word.slice(0, index), word.slice(index + 1)])
|
||||
}
|
||||
|
||||
const transpose = (word, index) => {
|
||||
const a = word[index]
|
||||
const b = word[index + 1] || ' '
|
||||
return _.join('', [word.slice(0, index), b, a, word.slice(index + 2)])
|
||||
}
|
||||
|
||||
const alter = (word, index) => {
|
||||
if (word[index] === ' ') return word
|
||||
const o = word.charCodeAt(index)
|
||||
const collection = _.includes(o, vowels) ? vowels : consonants
|
||||
const oo = _.sample(collection)
|
||||
const cc = String.fromCharCode(oo)
|
||||
return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
|
||||
}
|
||||
|
||||
const misspellOps = [
|
||||
duplicate,
|
||||
remove,
|
||||
transpose,
|
||||
alter
|
||||
]
|
||||
|
||||
const misspell = word => {
|
||||
const len = word.length
|
||||
const index = _.random(1, len)
|
||||
const operation = _.sample(misspellOps)
|
||||
return operation(word, index)
|
||||
}
|
||||
|
||||
const misspellRandomly = word => {
|
||||
const len = word.length
|
||||
const num = _.random(1, Math.sqrt(len))
|
||||
return _.flow(..._.times(() => misspell, num))(word)
|
||||
}
|
||||
|
||||
|
||||
const shiftVowel = word => {
|
||||
const indexedVowels = _.flow(
|
||||
_.get('length'),
|
||||
_.range(0),
|
||||
_.zip(_.split('', word)),
|
||||
_.map(_.zipObject(['letter', 'index'])),
|
||||
_.map(indexedLetter => {
|
||||
const ord = indexedLetter.letter.charCodeAt(0)
|
||||
const vowelIndex = _.indexOf(ord, vowels)
|
||||
return {...indexedLetter, vowelIndex}
|
||||
}),
|
||||
_.reject(_.flow(
|
||||
_.get('vowelIndex'),
|
||||
_.eq(-1)
|
||||
))
|
||||
)(word)
|
||||
|
||||
if (_.isEmpty(indexedVowels)) return false
|
||||
|
||||
const indexedVowel = _.sample(indexedVowels)
|
||||
const options = indexedVowel.vowelIndex === 0 ? [ +1 ]
|
||||
: indexedVowel.vowelIndex === 4 ? [ -1 ]
|
||||
: [ -1, +1 ]
|
||||
const offset = _.sample(options)
|
||||
const replacementOrd = vowels[indexedVowel.vowelIndex + offset]
|
||||
const replacement = String.fromCharCode(replacementOrd)
|
||||
|
||||
const index = indexedVowel.index
|
||||
return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
|
||||
}
|
||||
|
||||
const makeReplacer = (a, b) => word => {
|
||||
const replaced = word.replace(a, b)
|
||||
return (replaced !== word) && replaced
|
||||
}
|
||||
|
||||
const makeReplacerPair = (a, b) => [
|
||||
makeReplacer(a, b),
|
||||
makeReplacer(b, a),
|
||||
]
|
||||
|
||||
const equivalences = [
|
||||
shiftVowel,
|
||||
...makeReplacerPair('v', 'f'),
|
||||
...makeReplacerPair('ph', 'f'),
|
||||
...makeReplacerPair('ck', 'k'),
|
||||
...makeReplacerPair('q', 'k')
|
||||
]
|
||||
|
||||
const transcribe = word => {
|
||||
const ops = _.shuffle(equivalences)
|
||||
for (const op of ops) {
|
||||
const transcribed = op(word)
|
||||
if (transcribed) return transcribed
|
||||
}
|
||||
}
|
||||
|
||||
const threshold = 0.85
|
||||
const fullNameThreshold = 0.95
|
||||
|
||||
describe('OFAC', function () {
|
||||
describe('Matching', function () {
|
||||
|
||||
before(function () {
|
||||
this.timeout(60000)
|
||||
return ofac.load()
|
||||
.then(result => {
|
||||
structs = result
|
||||
const {individuals} = structs
|
||||
fullNames = _.flow(
|
||||
_.flatMap('aliases'),
|
||||
_.map('fullName')
|
||||
)(individuals)
|
||||
})
|
||||
})
|
||||
|
||||
it.skip('should match the exact full names of suspects', function () {
|
||||
this.timeout(0)
|
||||
|
||||
for (const fullName of fullNames) {
|
||||
const matches = ofac.match({firstName: fullName}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
assert.ok(!_.isEmpty(matches))
|
||||
}
|
||||
})
|
||||
|
||||
it.skip('should match the permutated full names of suspects', function () {
|
||||
this.timeout(0)
|
||||
|
||||
for (const fullName of fullNames) {
|
||||
const reversed = _.flow(
|
||||
_.split(' '),
|
||||
_.reverse,
|
||||
_.join(' ')
|
||||
)(fullName)
|
||||
|
||||
const matches = ofac.match({firstName: reversed}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
assert.ok(!_.isEmpty(matches))
|
||||
}
|
||||
})
|
||||
|
||||
it('should match despite some misspellings', function () {
|
||||
this.timeout(0)
|
||||
|
||||
let countMatches = 0
|
||||
const failures = []
|
||||
|
||||
for (const fullName of fullNames) {
|
||||
const lightlyMisspelled = misspell(fullName)
|
||||
|
||||
const heavilyMisspelled = _.flow(
|
||||
_.split(' '),
|
||||
_.map(misspell),
|
||||
_.join(' ')
|
||||
)(fullName)
|
||||
|
||||
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
|
||||
if (!_.isEmpty(matchesA)) {
|
||||
countMatches += 1
|
||||
}
|
||||
else {
|
||||
failures.push({fullName, misspelled: lightlyMisspelled})
|
||||
}
|
||||
|
||||
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
|
||||
threshold: threshold - 0.1,//: 0.75
|
||||
})
|
||||
|
||||
if (!_.isEmpty(matchesB)) {
|
||||
countMatches += 1
|
||||
}
|
||||
else {
|
||||
failures.push({fullName, heavy: true, misspelled: heavilyMisspelled})
|
||||
}
|
||||
}
|
||||
|
||||
for (const failure of failures) {
|
||||
const {fullName, heavy, misspelled} = failure
|
||||
console.log("Original:", fullName)
|
||||
ofac.match({firstName: misspelled}, null, {
|
||||
threshold: threshold + (heavy ? -0.1 : 0),
|
||||
debug: true
|
||||
})
|
||||
}
|
||||
|
||||
assert.equal(countMatches, fullNames.length * 2)
|
||||
})
|
||||
|
||||
it('should match phonetically similar words', function () {
|
||||
this.timeout(0)
|
||||
|
||||
let countMatches = 0
|
||||
const failures = []
|
||||
|
||||
for (const fullName of fullNames) {
|
||||
const transcribed = transcribe(fullName)
|
||||
|
||||
if (!transcribed) {
|
||||
console.warn(`Couldn't find an appropriate phonetic alteration for '${fullName}'`)
|
||||
countMatches += 1
|
||||
continue
|
||||
}
|
||||
|
||||
const matches = ofac.match({firstName: transcribed}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
|
||||
if (!_.isEmpty(matches)) {
|
||||
countMatches += 1
|
||||
}
|
||||
else {
|
||||
failures.push({fullName, misspelled: transcribed})
|
||||
}
|
||||
}
|
||||
|
||||
for (const failure of failures) {
|
||||
const {fullName, misspelled} = failure
|
||||
console.log("Original:", fullName)
|
||||
ofac.match({firstName: misspelled}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
debug: true
|
||||
})
|
||||
}
|
||||
|
||||
assert.equal(countMatches, fullNames.length)
|
||||
})
|
||||
|
||||
it('should discard matches with inapropriate birthdates', function () {
|
||||
this.timeout(0)
|
||||
|
||||
const date = new Date()
|
||||
const YYYY = _.padCharsStart('0', 4, date.getFullYear())
|
||||
const MM = _.padCharsStart('0', 2, date.getMonth() + 1)
|
||||
const DD = _.padCharsStart('0', 2, date.getDate())
|
||||
const dateString = `${YYYY}${MM}${DD}`
|
||||
|
||||
const noMatchesWithBirthDates = _.every(_.flow(
|
||||
_.get('birthDatePeriods'),
|
||||
_.every(_.isEmpty)
|
||||
))
|
||||
|
||||
for (const fullName of fullNames) {
|
||||
const matches = ofac.match({firstName: fullName}, dateString, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
assert.ok(noMatchesWithBirthDates(matches))
|
||||
}
|
||||
})
|
||||
|
||||
it('should not match against common names', function () {
|
||||
this.timeout(0)
|
||||
|
||||
const getNamesFromFile = _.flow(
|
||||
name => path.resolve(__dirname, name),
|
||||
file => fs.readFileSync(file, 'utf-8'),
|
||||
_.split('\n'),
|
||||
_.map( _.flow(
|
||||
_.split(' '),
|
||||
_.first
|
||||
))
|
||||
)
|
||||
|
||||
const lastNames = getNamesFromFile('dist.all.last.txt')
|
||||
const firstNamesMale = getNamesFromFile('dist.male.first.txt')
|
||||
const firstNamesFemale = getNamesFromFile('dist.female.first.txt')
|
||||
|
||||
let countMatches = 0
|
||||
const failures = []
|
||||
|
||||
for (const lastName of lastNames.slice(0, 100)) {
|
||||
for (firstName of firstNamesMale.slice(0, 100)) {
|
||||
const matches = ofac.match({firstName, lastName}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
|
||||
if (!_.isEmpty(matches)) {
|
||||
countMatches += 1
|
||||
failures.push({firstName, lastName})
|
||||
}
|
||||
}
|
||||
|
||||
for (firstName of firstNamesFemale.slice(0, 100)) {
|
||||
const matches = ofac.match({firstName, lastName}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
})
|
||||
|
||||
if (!_.isEmpty(matches)) {
|
||||
countMatches += 1
|
||||
failures.push({firstName, lastName})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const failure of failures) {
|
||||
ofac.match(failure, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
debug: true
|
||||
})
|
||||
}
|
||||
|
||||
assert.equal(countMatches, 0)
|
||||
})
|
||||
|
||||
|
||||
it.skip('test', function () {
|
||||
const firstName = 'hian chariapaporn'
|
||||
ofac.match({firstName}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
debug: true,
|
||||
verboseFor: ['hiran', 'chariapaporn']
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
it.skip('test', function () {
|
||||
const firstName = 'janice smith'
|
||||
ofac.match({firstName}, null, {
|
||||
threshold,
|
||||
fullNameThreshold,
|
||||
debug: true,
|
||||
verboseFor: ['samih', 'anis']
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
188
packages/server/tests/ofac/parsing.js
Normal file
188
packages/server/tests/ofac/parsing.js
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
const assert = require('assert')
|
||||
const parser = require('../../lib/ofac/parsing')
|
||||
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const util = require('util')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
const randomTmpFileName = () => path.join('/tmp', Math.random().toString())
|
||||
|
||||
const writeFile = util.promisify(fs.writeFile)
|
||||
|
||||
function makeDataFiles (contents) {
|
||||
const fileNames = _.map(randomTmpFileName, Array(contents.length))
|
||||
const pairs = _.zip(fileNames, contents)
|
||||
return Promise.all(_.map(_.spread(writeFile), pairs))
|
||||
.then(() => fileNames)
|
||||
}
|
||||
|
||||
|
||||
const mapLines = _.flow(_.map, _.join(''))
|
||||
|
||||
const partIds = new Map([
|
||||
['lastName', 1520],
|
||||
['firstName', 1521],
|
||||
['middleName', 1522],
|
||||
['maidenName', 1523],
|
||||
['patronymic', 91708],
|
||||
['matronymic', 91709],
|
||||
['nickname', 1528]
|
||||
])
|
||||
|
||||
const getId = part => partIds.get(part.partName)
|
||||
|
||||
const makePart = part => '' +
|
||||
` <DocumentedNamePart>
|
||||
<NamePartValue NamePartGroupID="${getId(part)}">${part.value}</NamePartValue>
|
||||
</DocumentedNamePart>`
|
||||
|
||||
const makeAlias = alias => '' +
|
||||
` <Alias AliasTypeID="1403">
|
||||
<DocumentedName ID="${alias.id}" DocNameStatusID="1">
|
||||
${_.map(makePart, alias.parts)}
|
||||
</DocumentedName>
|
||||
</Alias>`
|
||||
|
||||
const makePartGroup = part => '' +
|
||||
` <MasterNamePartGroup>
|
||||
<NamePartGroup
|
||||
ID="${getId(part)}"
|
||||
NamePartTypeID="${getId(part)}"/>
|
||||
</MasterNamePartGroup>`
|
||||
|
||||
const makePartGroups = alias => mapLines(makePartGroup, alias.parts)
|
||||
|
||||
const makeBirthDate = birthDate => '' +
|
||||
` <Feature FeatureTypeID="8">
|
||||
<FeatureVersion>
|
||||
<DatePeriod>
|
||||
<Start>
|
||||
<From>
|
||||
<Year>${birthDate.start.year}</Year>
|
||||
<Month>${birthDate.start.month}</Month>
|
||||
<Day>${birthDate.start.day}</Day>
|
||||
</From>
|
||||
<To>
|
||||
<Year>${birthDate.start.year}</Year>
|
||||
<Month>${birthDate.start.month}</Month>
|
||||
<Day>${birthDate.start.day}</Day>
|
||||
</To>
|
||||
</Start>
|
||||
<End>
|
||||
<From>
|
||||
<Year>${birthDate.end.year}</Year>
|
||||
<Month>${birthDate.end.month}</Month>
|
||||
<Day>${birthDate.end.day}</Day>
|
||||
</From>
|
||||
<To>
|
||||
<Year>${birthDate.end.year}</Year>
|
||||
<Month>${birthDate.end.month}</Month>
|
||||
<Day>${birthDate.end.day}</Day>
|
||||
</To>
|
||||
</End>
|
||||
</DatePeriod>
|
||||
</FeatureVersion>
|
||||
</Feature>`
|
||||
|
||||
const makeProfile = profile => {
|
||||
return '' +
|
||||
` <Profile ID="${profile.id}" PartySubTypeID="4">
|
||||
<Identity>
|
||||
${mapLines(makeAlias, profile.aliases)}
|
||||
<NamePartGroups>
|
||||
${mapLines(makePartGroups, profile.aliases)}
|
||||
</NamePartGroups>
|
||||
</Identity>
|
||||
${mapLines(makeBirthDate, profile.birthDatePeriods)}
|
||||
</Profile>`
|
||||
}
|
||||
|
||||
const makeXml = profiles => '' +
|
||||
`<?xml version="1.0" encoding="utf-8"?>
|
||||
<doc>
|
||||
${mapLines(makeProfile, profiles)}
|
||||
</doc>`
|
||||
|
||||
|
||||
const individualA = {id: '9', aliases: [{id: '5',
|
||||
parts: [
|
||||
{partName: 'firstName', value: 'john'},
|
||||
{partName: 'lastName', value: 'doe'}],
|
||||
fullName: 'john doe',
|
||||
words: [
|
||||
{value: 'john', phonetics: ['JN', 'AN']},
|
||||
{value: 'doe', phonetics: ['T']}]}],
|
||||
birthDatePeriods: [{
|
||||
start: {year: 1955, month: 10, day: 5},
|
||||
end: {year: 1955, month: 10, day: 5}}]
|
||||
}
|
||||
|
||||
const individualB = {id: '11', aliases: [{id: '15',
|
||||
parts: [
|
||||
{partName: 'firstName', value: 'john'},
|
||||
{partName: 'middleName', value: 'de'},
|
||||
{partName: 'lastName', value: 'gaul'}],
|
||||
fullName: 'john de gaul',
|
||||
words: [
|
||||
{value: 'john', phonetics: ['JN', 'AN']},
|
||||
{value: 'de', phonetics: ['T']},
|
||||
{value: 'gaul', phonetics: ['KL']}]}],
|
||||
birthDatePeriods: [{
|
||||
start: {year: 1965, month: 11, day: 20},
|
||||
end: {year: 1965, month: 11, day: 20}}]
|
||||
}
|
||||
|
||||
|
||||
const parseIndividuals = source => {
|
||||
const individuals = []
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
parser.parse(source, (err, profile) => {
|
||||
if (err) {
|
||||
reject(err)
|
||||
return
|
||||
}
|
||||
|
||||
if (!profile) {
|
||||
resolve(individuals)
|
||||
return
|
||||
}
|
||||
|
||||
individuals.push(profile)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
describe('OFAC', function () {
|
||||
describe('Parsing', function () {
|
||||
|
||||
// To detect botched downloads
|
||||
it('should fail on malformed XML', function () {
|
||||
const xml = '<a><b></a>'
|
||||
return makeDataFiles([xml])
|
||||
.then(files => Promise.all(_.map(parseIndividuals, files)))
|
||||
.catch(error => {
|
||||
assert.ok(error instanceof Error)
|
||||
return 'failed'
|
||||
})
|
||||
.then(ret => {
|
||||
assert.equal(ret, 'failed')
|
||||
})
|
||||
})
|
||||
|
||||
it('should return the expected individuals', function () {
|
||||
const xml = makeXml([individualA, individualB])
|
||||
|
||||
return makeDataFiles([xml])
|
||||
.then(files => Promise.all(_.map(parseIndividuals, files)))
|
||||
.then(([individuals]) => {
|
||||
assert.ok(Array.isArray(individuals))
|
||||
assert.equal(individuals.length, 2)
|
||||
assert.deepEqual(individuals, [individualA, individualB])
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
Loading…
Add table
Add a link
Reference in a new issue