chore: use monorepo organization

This commit is contained in:
Rafael Taranto 2025-05-12 10:52:54 +01:00
parent deaf7d6ecc
commit a687827f7e
1099 changed files with 8184 additions and 11535 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,362 @@
const assert = require('assert')
const ofac = require('../../lib/ofac')
const fs = require('fs')
const path = require('path')
const _ = require('lodash/fp')
let structs
let fullNames
const rand = N => _.random(0, N - 1)
const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0))
const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u'])
const consonants = _.difference(letters, vowels)
const duplicate = (word, index) => {
const c = word[index]
return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
}
const remove = (word, index) => {
return _.join('', [word.slice(0, index), word.slice(index + 1)])
}
const transpose = (word, index) => {
const a = word[index]
const b = word[index + 1] || ' '
return _.join('', [word.slice(0, index), b, a, word.slice(index + 2)])
}
const alter = (word, index) => {
if (word[index] === ' ') return word
const o = word.charCodeAt(index)
const collection = _.includes(o, vowels) ? vowels : consonants
const oo = _.sample(collection)
const cc = String.fromCharCode(oo)
return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
}
const misspellOps = [
duplicate,
remove,
transpose,
alter
]
const misspell = word => {
const len = word.length
const index = _.random(1, len)
const operation = _.sample(misspellOps)
return operation(word, index)
}
const misspellRandomly = word => {
const len = word.length
const num = _.random(1, Math.sqrt(len))
return _.flow(..._.times(() => misspell, num))(word)
}
const shiftVowel = word => {
const indexedVowels = _.flow(
_.get('length'),
_.range(0),
_.zip(_.split('', word)),
_.map(_.zipObject(['letter', 'index'])),
_.map(indexedLetter => {
const ord = indexedLetter.letter.charCodeAt(0)
const vowelIndex = _.indexOf(ord, vowels)
return {...indexedLetter, vowelIndex}
}),
_.reject(_.flow(
_.get('vowelIndex'),
_.eq(-1)
))
)(word)
if (_.isEmpty(indexedVowels)) return false
const indexedVowel = _.sample(indexedVowels)
const options = indexedVowel.vowelIndex === 0 ? [ +1 ]
: indexedVowel.vowelIndex === 4 ? [ -1 ]
: [ -1, +1 ]
const offset = _.sample(options)
const replacementOrd = vowels[indexedVowel.vowelIndex + offset]
const replacement = String.fromCharCode(replacementOrd)
const index = indexedVowel.index
return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
}
const makeReplacer = (a, b) => word => {
const replaced = word.replace(a, b)
return (replaced !== word) && replaced
}
const makeReplacerPair = (a, b) => [
makeReplacer(a, b),
makeReplacer(b, a),
]
const equivalences = [
shiftVowel,
...makeReplacerPair('v', 'f'),
...makeReplacerPair('ph', 'f'),
...makeReplacerPair('ck', 'k'),
...makeReplacerPair('q', 'k')
]
const transcribe = word => {
const ops = _.shuffle(equivalences)
for (const op of ops) {
const transcribed = op(word)
if (transcribed) return transcribed
}
}
const threshold = 0.85
const fullNameThreshold = 0.95
describe('OFAC', function () {
describe('Matching', function () {
before(function () {
this.timeout(60000)
return ofac.load()
.then(result => {
structs = result
const {individuals} = structs
fullNames = _.flow(
_.flatMap('aliases'),
_.map('fullName')
)(individuals)
})
})
it.skip('should match the exact full names of suspects', function () {
this.timeout(0)
for (const fullName of fullNames) {
const matches = ofac.match({firstName: fullName}, null, {
threshold,
fullNameThreshold,
})
assert.ok(!_.isEmpty(matches))
}
})
it.skip('should match the permutated full names of suspects', function () {
this.timeout(0)
for (const fullName of fullNames) {
const reversed = _.flow(
_.split(' '),
_.reverse,
_.join(' ')
)(fullName)
const matches = ofac.match({firstName: reversed}, null, {
threshold,
fullNameThreshold,
})
assert.ok(!_.isEmpty(matches))
}
})
it('should match despite some misspellings', function () {
this.timeout(0)
let countMatches = 0
const failures = []
for (const fullName of fullNames) {
const lightlyMisspelled = misspell(fullName)
const heavilyMisspelled = _.flow(
_.split(' '),
_.map(misspell),
_.join(' ')
)(fullName)
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matchesA)) {
countMatches += 1
}
else {
failures.push({fullName, misspelled: lightlyMisspelled})
}
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
threshold: threshold - 0.1,//: 0.75
})
if (!_.isEmpty(matchesB)) {
countMatches += 1
}
else {
failures.push({fullName, heavy: true, misspelled: heavilyMisspelled})
}
}
for (const failure of failures) {
const {fullName, heavy, misspelled} = failure
console.log("Original:", fullName)
ofac.match({firstName: misspelled}, null, {
threshold: threshold + (heavy ? -0.1 : 0),
debug: true
})
}
assert.equal(countMatches, fullNames.length * 2)
})
it('should match phonetically similar words', function () {
this.timeout(0)
let countMatches = 0
const failures = []
for (const fullName of fullNames) {
const transcribed = transcribe(fullName)
if (!transcribed) {
console.warn(`Couldn't find an appropriate phonetic alteration for '${fullName}'`)
countMatches += 1
continue
}
const matches = ofac.match({firstName: transcribed}, null, {
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matches)) {
countMatches += 1
}
else {
failures.push({fullName, misspelled: transcribed})
}
}
for (const failure of failures) {
const {fullName, misspelled} = failure
console.log("Original:", fullName)
ofac.match({firstName: misspelled}, null, {
threshold,
fullNameThreshold,
debug: true
})
}
assert.equal(countMatches, fullNames.length)
})
it('should discard matches with inapropriate birthdates', function () {
this.timeout(0)
const date = new Date()
const YYYY = _.padCharsStart('0', 4, date.getFullYear())
const MM = _.padCharsStart('0', 2, date.getMonth() + 1)
const DD = _.padCharsStart('0', 2, date.getDate())
const dateString = `${YYYY}${MM}${DD}`
const noMatchesWithBirthDates = _.every(_.flow(
_.get('birthDatePeriods'),
_.every(_.isEmpty)
))
for (const fullName of fullNames) {
const matches = ofac.match({firstName: fullName}, dateString, {
threshold,
fullNameThreshold,
})
assert.ok(noMatchesWithBirthDates(matches))
}
})
it('should not match against common names', function () {
this.timeout(0)
const getNamesFromFile = _.flow(
name => path.resolve(__dirname, name),
file => fs.readFileSync(file, 'utf-8'),
_.split('\n'),
_.map( _.flow(
_.split(' '),
_.first
))
)
const lastNames = getNamesFromFile('dist.all.last.txt')
const firstNamesMale = getNamesFromFile('dist.male.first.txt')
const firstNamesFemale = getNamesFromFile('dist.female.first.txt')
let countMatches = 0
const failures = []
for (const lastName of lastNames.slice(0, 100)) {
for (firstName of firstNamesMale.slice(0, 100)) {
const matches = ofac.match({firstName, lastName}, null, {
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matches)) {
countMatches += 1
failures.push({firstName, lastName})
}
}
for (firstName of firstNamesFemale.slice(0, 100)) {
const matches = ofac.match({firstName, lastName}, null, {
threshold,
fullNameThreshold,
})
if (!_.isEmpty(matches)) {
countMatches += 1
failures.push({firstName, lastName})
}
}
}
for (const failure of failures) {
ofac.match(failure, null, {
threshold,
fullNameThreshold,
debug: true
})
}
assert.equal(countMatches, 0)
})
it.skip('test', function () {
const firstName = 'hian chariapaporn'
ofac.match({firstName}, null, {
threshold,
fullNameThreshold,
debug: true,
verboseFor: ['hiran', 'chariapaporn']
})
})
it.skip('test', function () {
const firstName = 'janice smith'
ofac.match({firstName}, null, {
threshold,
fullNameThreshold,
debug: true,
verboseFor: ['samih', 'anis']
})
})
})
})

View file

@ -0,0 +1,188 @@
const assert = require('assert')
const parser = require('../../lib/ofac/parsing')
const fs = require('fs')
const path = require('path')
const util = require('util')
const _ = require('lodash/fp')
const randomTmpFileName = () => path.join('/tmp', Math.random().toString())
const writeFile = util.promisify(fs.writeFile)
function makeDataFiles (contents) {
const fileNames = _.map(randomTmpFileName, Array(contents.length))
const pairs = _.zip(fileNames, contents)
return Promise.all(_.map(_.spread(writeFile), pairs))
.then(() => fileNames)
}
const mapLines = _.flow(_.map, _.join(''))
const partIds = new Map([
['lastName', 1520],
['firstName', 1521],
['middleName', 1522],
['maidenName', 1523],
['patronymic', 91708],
['matronymic', 91709],
['nickname', 1528]
])
const getId = part => partIds.get(part.partName)
const makePart = part => '' +
` <DocumentedNamePart>
<NamePartValue NamePartGroupID="${getId(part)}">${part.value}</NamePartValue>
</DocumentedNamePart>`
const makeAlias = alias => '' +
` <Alias AliasTypeID="1403">
<DocumentedName ID="${alias.id}" DocNameStatusID="1">
${_.map(makePart, alias.parts)}
</DocumentedName>
</Alias>`
const makePartGroup = part => '' +
` <MasterNamePartGroup>
<NamePartGroup
ID="${getId(part)}"
NamePartTypeID="${getId(part)}"/>
</MasterNamePartGroup>`
const makePartGroups = alias => mapLines(makePartGroup, alias.parts)
const makeBirthDate = birthDate => '' +
` <Feature FeatureTypeID="8">
<FeatureVersion>
<DatePeriod>
<Start>
<From>
<Year>${birthDate.start.year}</Year>
<Month>${birthDate.start.month}</Month>
<Day>${birthDate.start.day}</Day>
</From>
<To>
<Year>${birthDate.start.year}</Year>
<Month>${birthDate.start.month}</Month>
<Day>${birthDate.start.day}</Day>
</To>
</Start>
<End>
<From>
<Year>${birthDate.end.year}</Year>
<Month>${birthDate.end.month}</Month>
<Day>${birthDate.end.day}</Day>
</From>
<To>
<Year>${birthDate.end.year}</Year>
<Month>${birthDate.end.month}</Month>
<Day>${birthDate.end.day}</Day>
</To>
</End>
</DatePeriod>
</FeatureVersion>
</Feature>`
const makeProfile = profile => {
return '' +
` <Profile ID="${profile.id}" PartySubTypeID="4">
<Identity>
${mapLines(makeAlias, profile.aliases)}
<NamePartGroups>
${mapLines(makePartGroups, profile.aliases)}
</NamePartGroups>
</Identity>
${mapLines(makeBirthDate, profile.birthDatePeriods)}
</Profile>`
}
const makeXml = profiles => '' +
`<?xml version="1.0" encoding="utf-8"?>
<doc>
${mapLines(makeProfile, profiles)}
</doc>`
const individualA = {id: '9', aliases: [{id: '5',
parts: [
{partName: 'firstName', value: 'john'},
{partName: 'lastName', value: 'doe'}],
fullName: 'john doe',
words: [
{value: 'john', phonetics: ['JN', 'AN']},
{value: 'doe', phonetics: ['T']}]}],
birthDatePeriods: [{
start: {year: 1955, month: 10, day: 5},
end: {year: 1955, month: 10, day: 5}}]
}
const individualB = {id: '11', aliases: [{id: '15',
parts: [
{partName: 'firstName', value: 'john'},
{partName: 'middleName', value: 'de'},
{partName: 'lastName', value: 'gaul'}],
fullName: 'john de gaul',
words: [
{value: 'john', phonetics: ['JN', 'AN']},
{value: 'de', phonetics: ['T']},
{value: 'gaul', phonetics: ['KL']}]}],
birthDatePeriods: [{
start: {year: 1965, month: 11, day: 20},
end: {year: 1965, month: 11, day: 20}}]
}
const parseIndividuals = source => {
const individuals = []
return new Promise((resolve, reject) => {
parser.parse(source, (err, profile) => {
if (err) {
reject(err)
return
}
if (!profile) {
resolve(individuals)
return
}
individuals.push(profile)
})
})
}
describe('OFAC', function () {
describe('Parsing', function () {
// To detect botched downloads
it('should fail on malformed XML', function () {
const xml = '<a><b></a>'
return makeDataFiles([xml])
.then(files => Promise.all(_.map(parseIndividuals, files)))
.catch(error => {
assert.ok(error instanceof Error)
return 'failed'
})
.then(ret => {
assert.equal(ret, 'failed')
})
})
it('should return the expected individuals', function () {
const xml = makeXml([individualA, individualB])
return makeDataFiles([xml])
.then(files => Promise.all(_.map(parseIndividuals, files)))
.then(([individuals]) => {
assert.ok(Array.isArray(individuals))
assert.equal(individuals.length, 2)
assert.deepEqual(individuals, [individualA, individualB])
})
})
})
})